diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 59c40a0e5d18b753038f2b9301d1c9494e3901be..c2d04828564e69d7ac965881057f185194aa0475 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -52,8 +52,8 @@ function(op_library TARGET) endif() if(WITH_MKLDNN) string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}") - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MKLDNN_FILE}.cc) - list(APPEND mkldnn_cc_srcs ${MKLDNN_FILE}.cc) + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/${MKLDNN_FILE}.cc) + list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc) endif() endif() else() diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index ad759c2eda5f5901608aa18667739c20a1e8fb60..041e5d95eb4fb777e997089bd8b6973c8a44be35 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -122,7 +122,7 @@ paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)) paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)) paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)) -paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name'], varargs=None, keywords=None, defaults=(0, True, None)) +paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)) paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)) @@ -322,10 +322,10 @@ paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_class paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)) paddle.fluid.layers.generate_mask_labels ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None)) +paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)) paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None)) paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)) paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)) paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)) @@ -362,6 +362,9 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.contrib.Calibrator.__init__ ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None) +paddle.fluid.contrib.Calibrator.sample_data ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.Calibrator.save_int8_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.reader.ctr_reader.ctr_reader ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.build_compressor ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)) paddle.fluid.contrib.CompressPass.__init__ ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)) diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index b118dccd1b3de881b4791bff6cd331726c8e05da..914bcce7755bcf0651da29dd669f5d6d14e081d2 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -10,8 +10,22 @@ function(pass_library TARGET DEST) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) + set(targetPrefix "") + + # Get optional argument + set(extraMacroArgs ${ARGN}) + list(LENGTH extraMacroArgs numExtraMacroArgs) + if(numExtraMacroArgs GREATER 0) + list(GET extraMacroArgs 0 targetPrefix) + endif() + cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - cc_library(${TARGET} SRCS ${TARGET}.cc DEPS graph_pattern_detector pass fuse_pass_base ${op_library_DEPS}) + if(targetPrefix) + cc_library(${TARGET} SRCS ${targetPrefix}/${TARGET}.cc DEPS graph_pattern_detector pass fuse_pass_base ${op_library_DEPS}) + else() + cc_library(${TARGET} SRCS ${TARGET}.cc DEPS graph_pattern_detector pass fuse_pass_base ${op_library_DEPS}) + endif() + # add more DEST here, such as train, dist and collect USE_PASS into a file automatically. if (${DEST} STREQUAL "base" OR ${DEST} STREQUAL "inference") message(STATUS "add pass ${TARGET} ${DEST}") @@ -62,11 +76,11 @@ foreach (index RANGE 3 6) endforeach() if(WITH_MKLDNN) - pass_library(mkldnn_placement_pass base) - pass_library(depthwise_conv_mkldnn_pass base) - pass_library(conv_bias_mkldnn_fuse_pass inference) - pass_library(conv_relu_mkldnn_fuse_pass inference) - pass_library(conv_elementwise_add_mkldnn_fuse_pass inference) + pass_library(mkldnn_placement_pass base mkldnn) + pass_library(depthwise_conv_mkldnn_pass base mkldnn) + pass_library(conv_bias_mkldnn_fuse_pass inference mkldnn) + pass_library(conv_relu_mkldnn_fuse_pass inference mkldnn) + pass_library(conv_elementwise_add_mkldnn_fuse_pass inference mkldnn) endif() cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector ) @@ -86,7 +100,7 @@ cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framewor cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto) cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass) if (WITH_MKLDNN) - cc_test(test_depthwise_conv_mkldnn_pass SRCS depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass) - cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass) - cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass) + cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass) + cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass) + cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass) endif () diff --git a/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc similarity index 98% rename from paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc index d4a701e0b173a96d8605dff308fee7007a0ecc0c..5d0b294f6fec5f14dcddb91f8ceffb27fc833d4e 100644 --- a/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h" #include #include #include diff --git a/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc similarity index 99% rename from paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc index a8029e67e659a269f8492cf6e2f1f09040144283..fb3db81347b102cfa264082b36a2e22ea8c22982 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h" #include #include #include diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass_tester.cc rename to paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc index 61ba097fd8cb55e25bda1947ea97d53308c55bd3..9ef5c298b8cddfec094e9544dc6da9afdcaf0dab 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc @@ -15,8 +15,8 @@ #include #include -#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_traits.h" +#include "paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass.cc similarity index 97% rename from paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass.cc index e359a3832ee8d549f8c58d63bc1cc6564ecadede..4f4605398a665e63662a64a3a925c32d48f10952 100644 --- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass.h" #include #include #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc rename to paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc index 19248b4dfee1da81d18cd2effac08ba68dde80fb..06d56f6222e4bb9a9969d4ab2d260c97d1ce6c72 100644 --- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/conv_relu_mkldnn_fuse_pass.h" #include #include "paddle/fluid/framework/op_proto_maker.h" diff --git a/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc similarity index 96% rename from paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc rename to paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc index 19056e18aa892dbc83dfbf7305b6ad8b6b6bc51c..7851e8c84bca2e3b05d3b1603eaa4c0ca5909e10 100644 --- a/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.h b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h similarity index 100% rename from paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.h rename to paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h diff --git a/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc similarity index 98% rename from paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass_tester.cc rename to paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc index 09d0b15f46a7e50afb6aea46383013ce6a6c6118..1783e3322b1df8125f580f09a12aefe64d246c1a 100644 --- a/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" #include diff --git a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn_placement_pass.cc rename to paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc index 951fcb066ce759ebfec0182e1e9dca887e343170..20e52410ffe3caa86450bc05bf3aabf5a5bce374 100644 --- a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/mkldnn_placement_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" #include namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn_placement_pass.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn_placement_pass.h rename to paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index aa3da397ff67dd06dd750d336a49056baedaaab6..7ecd9e35332843e3a391cdad5ce32220d890abd1 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -128,9 +128,9 @@ inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz" SERIAL) -# bert, max_len=20 -set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert20") -download_model_and_data(${BERT_INSTALL_DIR} "bert_model.tar.gz" "bert_data_len20.txt.tar.gz") +# bert, max_len=20, embedding_dim=128 +set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert_emb128") +download_model_and_data(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz") inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc SERIAL) # anakin diff --git a/paddle/fluid/inference/utils/CMakeLists.txt b/paddle/fluid/inference/utils/CMakeLists.txt index a7b239731b9a2e876c16d9ff84dfb8ac3df7b82e..c43eaf7f9849ee4a88ed95bdb8b6966da8760435 100644 --- a/paddle/fluid/inference/utils/CMakeLists.txt +++ b/paddle/fluid/inference/utils/CMakeLists.txt @@ -1,4 +1,4 @@ cc_library(benchmark SRCS benchmark.cc DEPS enforce) cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark) -#cc_binary(visualizer SRCS visualizer.cc DEPS analysis -# paddle_pass_builder ir_pass_manager pass graph_viz_pass analysis_passes) +cc_binary(visualizer SRCS visualizer.cc DEPS analysis + paddle_pass_builder ir_pass_manager pass graph_viz_pass analysis_passes) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 9c5b8604f40ae56c463b54c71623feb61bd8d297..7ec9d2fed53c9c73952db7dcdfc2d8e634f3f84e 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include -#include "paddle/fluid/operators/mkldnn_activation_op.h" +#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h" #include "paddle/fluid/platform/port.h" namespace paddle { diff --git a/paddle/fluid/operators/beam_search_op.cc b/paddle/fluid/operators/beam_search_op.cc index e78ecc1a12309fe084a4165e5bb0d8bfb1dcf957..e93cd8615e052e4dfc6255549bf7a9b84b7dd657 100644 --- a/paddle/fluid/operators/beam_search_op.cc +++ b/paddle/fluid/operators/beam_search_op.cc @@ -51,6 +51,9 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("selected_scores", "A LoDTensor containing the accumulated scores corresponding to " "Output(selected_ids)."); + AddOutput( + "parent_idx", + "A Tensor preserving the selected_ids' parent indice in pre_ids."); // Attributes stored in AttributeMap AddAttr("level", "the level of LoDTensor"); diff --git a/paddle/fluid/operators/beam_search_op.h b/paddle/fluid/operators/beam_search_op.h index 1b939e742de06aedf187d25d002d19e0a4fafc9d..f808020cc765585d1633c6c3bf528080a7e83f07 100644 --- a/paddle/fluid/operators/beam_search_op.h +++ b/paddle/fluid/operators/beam_search_op.h @@ -41,13 +41,15 @@ class BeamSearchOpKernel : public framework::OpKernel { auto selected_ids = context.Output("selected_ids"); auto selected_scores = context.Output("selected_scores"); + auto* parent_idx = context.Output("parent_idx"); PADDLE_ENFORCE_NOT_NULL(selected_ids); PADDLE_ENFORCE_NOT_NULL(selected_scores); + PADDLE_ENFORCE_NOT_NULL(parent_idx); math::BeamSearchFunctor alg; alg(context.template device_context(), pre_ids, pre_scores, - ids, scores, selected_ids, selected_scores, level, beam_size, end_id, - is_accumulated); + ids, scores, selected_ids, selected_scores, parent_idx, level, + beam_size, end_id, is_accumulated); } }; diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 4ea5a70730cfdbd0d31eb4c719ed5b4abf9ff2ce..f6fbe97565c43c306ea885c765c0a665492fa317 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -32,6 +32,7 @@ polygon_box_transform_op.cu) detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) +detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc) if(WITH_GPU) detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub) diff --git a/paddle/fluid/operators/detection/box_coder_op.cc b/paddle/fluid/operators/detection/box_coder_op.cc index 06fbb9815c52ea69e3aa9e893512e039853b9514..fdcff62e1fe59b3a2f4925bdff98632f71220abb 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cc +++ b/paddle/fluid/operators/detection/box_coder_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/box_coder_op.h" +#include namespace paddle { namespace operators { @@ -32,32 +33,57 @@ class BoxCoderOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2, - "The rank of Input of PriorBoxVar must be 2"); + "The rank of Input PriorBox must be 2"); PADDLE_ENFORCE_EQ(prior_box_dims[1], 4, "The shape of PriorBox is [N, 4]"); if (ctx->HasInput("PriorBoxVar")) { auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar"); - PADDLE_ENFORCE_EQ(prior_box_dims, prior_box_var_dims); + PADDLE_ENFORCE( + prior_box_var_dims.size() == 1 || prior_box_var_dims.size() == 2, + "Input(PriorBoxVar) of BoxCoderOp should be 1 or 2."); + if (prior_box_var_dims.size() == 1) { + PADDLE_ENFORCE_EQ( + prior_box_var_dims[0], 4, + "The 1st dimension of Input(PriorBoxVar) should be 4" + "when the rank is 1."); + } else { + PADDLE_ENFORCE_EQ( + prior_box_dims, prior_box_var_dims, + "The dimension of Input(PriorBoxVar) should be equal to" + "the dimension of Input(PriorBox when the rank is 2.)"); + } } + } - auto code_type = - GetBoxCodeType(ctx->Attrs().Get("code_type")); - if (code_type == BoxCodeType::kEncodeCenterSize) { - PADDLE_ENFORCE_EQ(target_box_dims.size(), 2, - "The rank of Input of TargetBox must be 2"); - PADDLE_ENFORCE_EQ(target_box_dims[1], 4, - "The shape of TargetBox is [M, 4]"); - } else if (code_type == BoxCodeType::kDecodeCenterSize) { - PADDLE_ENFORCE_EQ(target_box_dims.size(), 3, - "The rank of Input of TargetBox must be 3"); + auto code_type = GetBoxCodeType(ctx->Attrs().Get("code_type")); + int axis = ctx->Attrs().Get("axis"); + if (code_type == BoxCodeType::kEncodeCenterSize) { + PADDLE_ENFORCE_EQ(target_box_dims.size(), 2, + "The rank of Input TargetBox must be 2"); + PADDLE_ENFORCE_EQ(target_box_dims[1], 4, + "The shape of TargetBox is [M, 4]"); + ctx->SetOutputDim( + "OutputBox", + framework::make_ddim({target_box_dims[0], prior_box_dims[0], 4})); + } else if (code_type == BoxCodeType::kDecodeCenterSize) { + PADDLE_ENFORCE_EQ(target_box_dims.size(), 3, + "The rank of Input TargetBox must be 3"); + if (axis == 0) { PADDLE_ENFORCE_EQ(target_box_dims[1], prior_box_dims[0]); - PADDLE_ENFORCE_EQ(target_box_dims[2], prior_box_dims[1]); + } else if (axis == 1) { + PADDLE_ENFORCE_EQ(target_box_dims[0], prior_box_dims[0]); + } else { + PADDLE_THROW("axis must be 0 or 1."); } + PADDLE_ENFORCE_EQ(target_box_dims[2], prior_box_dims[1]); + ctx->ShareDim("TargetBox", /*->*/ "OutputBox"); + } + + if (code_type == BoxCodeType::kDecodeCenterSize && axis == 1) { + ctx->ShareLoD("PriorBox", /*->*/ "OutputBox"); + } else { + ctx->ShareLoD("TargetBox", /*->*/ "OutputBox"); } - ctx->SetOutputDim( - "OutputBox", - framework::make_ddim({target_box_dims[0], prior_box_dims[0], 4})); - ctx->ShareLoD("TargetBox", /*->*/ "OutputBox"); } }; @@ -100,6 +126,21 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker { "(bool, default true) " "whether treat the priorbox as a noramlized box") .SetDefault(true); + AddAttr("axis", + "(int, default 0)" + "which axis in PriorBox to broadcast for box decode," + "for example, if axis is 0 and TargetBox has shape" + "[N, M, 4] and PriorBox has shape [M, 4], then PriorBox " + "will broadcast to [N, M, 4] for decoding. It is only valid" + "when code type is decode_center_size") + .SetDefault(0) + .InEnum({0, 1}); + AddAttr>( + "variance", + "(vector, default {})," + "variance of prior box with shape [4]. PriorBoxVar and variance can" + "not be provided at the same time.") + .SetDefault(std::vector{}); AddOutput("OutputBox", "(LoDTensor or Tensor) " "When code_type is 'encode_center_size', the output tensor of " @@ -138,7 +179,11 @@ where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the priorbox's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`, `phv` denote the variance of the priorbox and `ox`, `oy`, `ow`, `oh` denote the -encoded/decoded coordinates, width and height. +encoded/decoded coordinates, width and height. + +During Box Decoding, two modes for broadcast are supported. Say target box has +shape [N, M, 4], and the shape of prior box can be [N, 4] or [M, 4]. Then prior +box will broadcast to target box along the assigned axis. )DOC"); } }; diff --git a/paddle/fluid/operators/detection/box_coder_op.cu b/paddle/fluid/operators/detection/box_coder_op.cu index a7af111f63d654319dd1d90d2032956951dfe49e..e078af3eb478a8bebc6a7fc6460d169d803a3c4b 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cu +++ b/paddle/fluid/operators/detection/box_coder_op.cu @@ -9,6 +9,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include +#include +#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/detection/box_coder_op.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -16,11 +19,11 @@ namespace paddle { namespace operators { template -__global__ void EncodeCenterSizeKernel(const T* prior_box_data, - const T* prior_box_var_data, - const T* target_box_data, const int row, - const int col, const int len, - const bool normalized, T* output) { +__global__ void EncodeCenterSizeKernel( + const T* prior_box_data, const T* prior_box_var_data, + const T* target_box_data, const int row, const int col, const int len, + const bool normalized, const T prior_box_var_size, const float* variance, + const int var_size, T* output) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < row * col) { const int row_idx = idx / col; @@ -30,11 +33,9 @@ __global__ void EncodeCenterSizeKernel(const T* prior_box_data, T prior_box_height = prior_box_data[col_idx * len + 3] - prior_box_data[col_idx * len + 1] + (normalized == false); - T prior_box_center_x = - (prior_box_data[col_idx * len + 2] + prior_box_data[col_idx * len]) / 2; - T prior_box_center_y = (prior_box_data[col_idx * len + 3] + - prior_box_data[col_idx * len + 1]) / - 2; + T prior_box_center_x = prior_box_data[col_idx * len] + prior_box_width / 2; + T prior_box_center_y = + prior_box_data[col_idx * len + 1] + prior_box_height / 2; T target_box_center_x = (target_box_data[row_idx * len + 2] + target_box_data[row_idx * len]) / @@ -55,58 +56,73 @@ __global__ void EncodeCenterSizeKernel(const T* prior_box_data, output[idx * len + 2] = log(fabs(target_box_width / prior_box_width)); output[idx * len + 3] = log(fabs(target_box_height / prior_box_height)); if (prior_box_var_data) { - output[idx * len] /= prior_box_var_data[col_idx * len]; - output[idx * len + 1] /= prior_box_var_data[col_idx * len + 1]; - output[idx * len + 2] /= prior_box_var_data[col_idx * len + 2]; - output[idx * len + 3] /= prior_box_var_data[col_idx * len + 3]; + int prior_var_offset = 0; + if (prior_box_var_size == 2) { + prior_var_offset = col_idx * len; + } + output[idx * len] /= prior_box_var_data[prior_var_offset]; + output[idx * len + 1] /= prior_box_var_data[prior_var_offset + 1]; + output[idx * len + 2] /= prior_box_var_data[prior_var_offset + 2]; + output[idx * len + 3] /= prior_box_var_data[prior_var_offset + 3]; + } else if (var_size == 4) { + for (int k = 0; k < 4; ++k) { + output[idx * len + k] /= static_cast(variance[k]); + } } } } template -__global__ void DecodeCenterSizeKernel(const T* prior_box_data, - const T* prior_box_var_data, - const T* target_box_data, const int row, - const int col, const int len, - const bool normalized, T* output) { +__global__ void DecodeCenterSizeKernel( + const T* prior_box_data, const T* prior_box_var_data, + const T* target_box_data, const int row, const int col, const int len, + const bool normalized, const T prior_box_var_size, const float* variance, + const int var_size, const int axis, T* output) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; + int prior_box_offset = 0; if (idx < row * col) { const int col_idx = idx % col; - T prior_box_width = prior_box_data[col_idx * len + 2] - - prior_box_data[col_idx * len] + (normalized == false); - T prior_box_height = prior_box_data[col_idx * len + 3] - - prior_box_data[col_idx * len + 1] + + const int row_idx = idx / col; + prior_box_offset = axis == 0 ? col_idx * len : row_idx * len; + T prior_box_width = prior_box_data[prior_box_offset + 2] - + prior_box_data[prior_box_offset] + + (normalized == false); + T prior_box_height = prior_box_data[prior_box_offset + 3] - + prior_box_data[prior_box_offset + 1] + (normalized == false); T prior_box_center_x = - (prior_box_data[col_idx * len + 2] + prior_box_data[col_idx * len]) / 2; - T prior_box_center_y = (prior_box_data[col_idx * len + 3] + - prior_box_data[col_idx * len + 1]) / - 2; + prior_box_data[prior_box_offset] + prior_box_width / 2; + T prior_box_center_y = + prior_box_data[prior_box_offset + 1] + prior_box_height / 2; T target_box_width, target_box_height; T target_box_center_x, target_box_center_y; + T box_var_x = T(1), box_var_y = T(1); + T box_var_w = T(1), box_var_h = T(1); if (prior_box_var_data) { - target_box_width = exp(prior_box_var_data[col_idx * len + 2] * - target_box_data[idx * len + 2]) * - prior_box_width; - target_box_height = exp(prior_box_var_data[col_idx * len + 3] * - target_box_data[idx * len + 3]) * - prior_box_height; - target_box_center_x = prior_box_var_data[col_idx * len] * - target_box_data[idx * len] * prior_box_width + - prior_box_center_x; - target_box_center_y = prior_box_var_data[col_idx * len + 1] * - target_box_data[idx * len + 1] * - prior_box_height + - prior_box_center_y; - } else { - target_box_width = exp(target_box_data[idx * len + 2]) * prior_box_width; - target_box_height = - exp(target_box_data[idx * len + 3]) * prior_box_height; - target_box_center_x = - target_box_data[idx * len] * prior_box_width + prior_box_center_x; - target_box_center_y = target_box_data[idx * len + 1] * prior_box_height + - prior_box_center_y; + int prior_var_offset = 0; + if (prior_box_var_size == 2) { + prior_var_offset = axis == 0 ? col_idx * len : row_idx * len; + } + box_var_x = prior_box_var_data[prior_var_offset]; + box_var_y = prior_box_var_data[prior_var_offset + 1]; + box_var_w = prior_box_var_data[prior_var_offset + 2]; + box_var_h = prior_box_var_data[prior_var_offset + 3]; + } else if (var_size == 4) { + box_var_x = static_cast(variance[0]); + box_var_y = static_cast(variance[1]); + box_var_w = static_cast(variance[2]); + box_var_h = static_cast(variance[3]); } + target_box_width = + exp(box_var_w * target_box_data[idx * len + 2]) * prior_box_width; + target_box_height = + exp(box_var_h * target_box_data[idx * len + 3]) * prior_box_height; + target_box_center_x = + box_var_x * target_box_data[idx * len] * prior_box_width + + prior_box_center_x; + target_box_center_y = + box_var_y * target_box_data[idx * len + 1] * prior_box_height + + prior_box_center_y; output[idx * len] = target_box_center_x - target_box_width / 2; output[idx * len + 1] = target_box_center_y - target_box_height / 2; @@ -127,36 +143,64 @@ class BoxCoderCUDAKernel : public framework::OpKernel { auto* prior_box_var = context.Input("PriorBoxVar"); auto* target_box = context.Input("TargetBox"); auto* output_box = context.Output("OutputBox"); - + std::vector variance = context.Attr>("variance"); const T* prior_box_data = prior_box->data(); const T* target_box_data = target_box->data(); const T* prior_box_var_data = nullptr; - if (prior_box_var) prior_box_var_data = prior_box_var->data(); + auto prior_box_var_size = 0; + if (prior_box_var) { + PADDLE_ENFORCE(variance.empty(), + "Input 'PriorBoxVar' and attribute 'variance' should not" + "be used at the same time."); + prior_box_var_data = prior_box_var->data(); + prior_box_var_size = prior_box_var->dims().size(); + } + if (!(variance.empty())) { + PADDLE_ENFORCE(static_cast(variance.size()) == 4, + "Size of attribute 'variance' should be 4"); + } if (target_box->lod().size()) { PADDLE_ENFORCE_EQ(target_box->lod().size(), 1, "Only support 1 level of LoD."); } + const int var_size = static_cast(variance.size()); + + auto code_type = GetBoxCodeType(context.Attr("code_type")); + bool normalized = context.Attr("box_normalized"); + int axis = context.Attr("axis"); + auto row = target_box->dims()[0]; auto col = prior_box->dims()[0]; + if (code_type == BoxCodeType::kDecodeCenterSize) { + col = target_box->dims()[1]; + } auto len = prior_box->dims()[1]; int block = 512; int grid = (row * col + block - 1) / block; auto& device_ctx = context.cuda_device_context(); + auto& allocator = + platform::DeviceTemporaryAllocator::Instance().Get(device_ctx); + int bytes = var_size * sizeof(float); + auto dev_var = allocator.Allocate(bytes); + float* dev_var_data = reinterpret_cast(dev_var->ptr()); + auto cplace = platform::CPUPlace(); + const auto gplace = boost::get(context.GetPlace()); + memory::Copy(gplace, dev_var_data, cplace, &variance[0], bytes, + device_ctx.stream()); + output_box->mutable_data({row, col, len}, context.GetPlace()); T* output = output_box->data(); - auto code_type = GetBoxCodeType(context.Attr("code_type")); - bool normalized = context.Attr("box_normalized"); if (code_type == BoxCodeType::kEncodeCenterSize) { EncodeCenterSizeKernel<<>>( prior_box_data, prior_box_var_data, target_box_data, row, col, len, - normalized, output); + normalized, prior_box_var_size, dev_var_data, var_size, output); } else if (code_type == BoxCodeType::kDecodeCenterSize) { DecodeCenterSizeKernel<<>>( prior_box_data, prior_box_var_data, target_box_data, row, col, len, - normalized, output); + normalized, prior_box_var_size, dev_var_data, var_size, axis, output); } } }; diff --git a/paddle/fluid/operators/detection/box_coder_op.h b/paddle/fluid/operators/detection/box_coder_op.h index b2a2bcdce932032a761a1fc064fe622f7629f9bf..a0b1faf7bdc7001eba2d92b4d03fbaf9feb7bcbb 100644 --- a/paddle/fluid/operators/detection/box_coder_op.h +++ b/paddle/fluid/operators/detection/box_coder_op.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" @@ -34,7 +35,8 @@ class BoxCoderKernel : public framework::OpKernel { void EncodeCenterSize(const framework::Tensor* target_box, const framework::Tensor* prior_box, const framework::Tensor* prior_box_var, - const bool normalized, T* output) const { + const bool normalized, + const std::vector variance, T* output) const { int64_t row = target_box->dims()[0]; int64_t col = prior_box->dims()[0]; int64_t len = prior_box->dims()[1]; @@ -53,10 +55,9 @@ class BoxCoderKernel : public framework::OpKernel { T prior_box_height = prior_box_data[j * len + 3] - prior_box_data[j * len + 1] + (normalized == false); - T prior_box_center_x = - (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2; + T prior_box_center_x = prior_box_data[j * len] + prior_box_width / 2; T prior_box_center_y = - (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2; + prior_box_data[j * len + 1] + prior_box_height / 2; T target_box_center_x = (target_box_data[i * len + 2] + target_box_data[i * len]) / 2; @@ -78,10 +79,18 @@ class BoxCoderKernel : public framework::OpKernel { output[offset + 3] = std::log(std::fabs(target_box_height / prior_box_height)); if (prior_box_var) { - output[offset] /= prior_box_var_data[j * len]; - output[offset + 1] /= prior_box_var_data[j * len + 1]; - output[offset + 2] /= prior_box_var_data[j * len + 2]; - output[offset + 3] /= prior_box_var_data[j * len + 3]; + int prior_var_offset = 0; + if (prior_box_var->dims().size() == 2) { + prior_var_offset = j * len; + } + output[offset] /= prior_box_var_data[prior_var_offset]; + output[offset + 1] /= prior_box_var_data[prior_var_offset + 1]; + output[offset + 2] /= prior_box_var_data[prior_var_offset + 2]; + output[offset + 3] /= prior_box_var_data[prior_var_offset + 3]; + } else if (!(variance.empty())) { + for (int k = 0; k < 4; ++k) { + output[offset + k] /= static_cast(variance[k]); + } } } } @@ -89,58 +98,71 @@ class BoxCoderKernel : public framework::OpKernel { void DecodeCenterSize(const framework::Tensor* target_box, const framework::Tensor* prior_box, const framework::Tensor* prior_box_var, - const bool normalized, T* output) const { + const bool normalized, const int axis, + const std::vector variance, T* output) const { int64_t row = target_box->dims()[0]; - int64_t col = prior_box->dims()[0]; - int64_t len = prior_box->dims()[1]; + int64_t col = target_box->dims()[1]; + int64_t len = target_box->dims()[2]; auto* target_box_data = target_box->data(); auto* prior_box_data = prior_box->data(); const T* prior_box_var_data = nullptr; if (prior_box_var) prior_box_var_data = prior_box_var->data(); - + int prior_box_offset = 0; #ifdef PADDLE_WITH_MKLML #pragma omp parallel for collapse(2) #endif for (int64_t i = 0; i < row; ++i) { for (int64_t j = 0; j < col; ++j) { size_t offset = i * col * len + j * len; - T prior_box_width = prior_box_data[j * len + 2] - - prior_box_data[j * len] + (normalized == false); - T prior_box_height = prior_box_data[j * len + 3] - - prior_box_data[j * len + 1] + + if (axis == 0) { + prior_box_offset = j * len; + } else if (axis == 1) { + prior_box_offset = i * len; + } + T prior_box_width = prior_box_data[prior_box_offset + 2] - + prior_box_data[prior_box_offset] + + (normalized == false); + T prior_box_height = prior_box_data[prior_box_offset + 3] - + prior_box_data[prior_box_offset + 1] + (normalized == false); T prior_box_center_x = - (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2; + prior_box_data[prior_box_offset] + prior_box_width / 2; T prior_box_center_y = - (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2; + prior_box_data[prior_box_offset + 1] + prior_box_height / 2; T target_box_center_x = 0, target_box_center_y = 0; T target_box_width = 0, target_box_height = 0; + T box_var_x = T(1), box_var_y = T(1); + T box_var_w = T(1), box_var_h = T(1); if (prior_box_var) { - target_box_center_x = prior_box_var_data[j * len] * - target_box_data[offset] * prior_box_width + - prior_box_center_x; - target_box_center_y = prior_box_var_data[j * len + 1] * - target_box_data[offset + 1] * - prior_box_height + - prior_box_center_y; - target_box_width = std::exp(prior_box_var_data[j * len + 2] * - target_box_data[offset + 2]) * - prior_box_width; - target_box_height = std::exp(prior_box_var_data[j * len + 3] * - target_box_data[offset + 3]) * - prior_box_height; - } else { - target_box_center_x = - target_box_data[offset] * prior_box_width + prior_box_center_x; - target_box_center_y = target_box_data[offset + 1] * prior_box_height + - prior_box_center_y; - target_box_width = - std::exp(target_box_data[offset + 2]) * prior_box_width; - target_box_height = - std::exp(target_box_data[offset + 3]) * prior_box_height; + int prior_var_offset = 0; + if (prior_box_var->dims().size() == 2) { + if (axis == 0) + prior_var_offset = j * len; + else if (axis == 1) + prior_var_offset = i * len; + } + box_var_x = prior_box_var_data[prior_var_offset]; + box_var_y = prior_box_var_data[prior_var_offset + 1]; + box_var_w = prior_box_var_data[prior_var_offset + 2]; + box_var_h = prior_box_var_data[prior_var_offset + 3]; + } else if (!(variance.empty())) { + box_var_x = static_cast(variance[0]); + box_var_y = static_cast(variance[1]); + box_var_w = static_cast(variance[2]); + box_var_h = static_cast(variance[3]); } + target_box_center_x = + box_var_x * target_box_data[offset] * prior_box_width + + prior_box_center_x; + target_box_center_y = + box_var_y * target_box_data[offset + 1] * prior_box_height + + prior_box_center_y; + target_box_width = + std::exp(box_var_w * target_box_data[offset + 2]) * prior_box_width; + target_box_height = std::exp(box_var_h * target_box_data[offset + 3]) * + prior_box_height; output[offset] = target_box_center_x - target_box_width / 2; output[offset + 1] = target_box_center_y - target_box_height / 2; @@ -157,26 +179,40 @@ class BoxCoderKernel : public framework::OpKernel { auto* prior_box_var = context.Input("PriorBoxVar"); auto* target_box = context.Input("TargetBox"); auto* output_box = context.Output("OutputBox"); - + std::vector variance = context.Attr>("variance"); + const int axis = context.Attr("axis"); if (target_box->lod().size()) { PADDLE_ENFORCE_EQ(target_box->lod().size(), 1UL, "Only support 1 level of LoD."); } + if (prior_box_var) { + PADDLE_ENFORCE(variance.empty(), + "Input 'PriorBoxVar' and attribute 'variance' should not" + "be used at the same time."); + } + if (!(variance.empty())) { + PADDLE_ENFORCE(static_cast(variance.size()) == 4, + "Size of attribute 'variance' should be 4"); + } + auto code_type = GetBoxCodeType(context.Attr("code_type")); + bool normalized = context.Attr("box_normalized"); + auto row = target_box->dims()[0]; auto col = prior_box->dims()[0]; + if (code_type == BoxCodeType::kDecodeCenterSize) { + col = target_box->dims()[1]; + } auto len = prior_box->dims()[1]; output_box->mutable_data({row, col, len}, context.GetPlace()); - auto code_type = GetBoxCodeType(context.Attr("code_type")); - bool normalized = context.Attr("box_normalized"); T* output = output_box->data(); if (code_type == BoxCodeType::kEncodeCenterSize) { EncodeCenterSize(target_box, prior_box, prior_box_var, normalized, - output); + variance, output); } else if (code_type == BoxCodeType::kDecodeCenterSize) { - DecodeCenterSize(target_box, prior_box, prior_box_var, normalized, - output); + DecodeCenterSize(target_box, prior_box, prior_box_var, normalized, axis, + variance, output); } } }; diff --git a/paddle/fluid/operators/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc similarity index 69% rename from paddle/fluid/operators/yolov3_loss_op.cc rename to paddle/fluid/operators/detection/yolov3_loss_op.cc index 60508f7ab871910c38f1e4aa04c2035075d37df5..2a69ad4b53c26f5e2e0547e75e0d9c6518a8bcba 100644 --- a/paddle/fluid/operators/yolov3_loss_op.cc +++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc @@ -9,7 +9,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/yolov3_loss_op.h" +#include "paddle/fluid/operators/detection/yolov3_loss_op.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -29,23 +29,33 @@ class Yolov3LossOp : public framework::OperatorWithKernel { "Input(GTLabel) of Yolov3LossOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Loss"), "Output(Loss) of Yolov3LossOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("ObjectnessMask"), + "Output(ObjectnessMask) of Yolov3LossOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("GTMatchMask"), + "Output(GTMatchMask) of Yolov3LossOp should not be null."); auto dim_x = ctx->GetInputDim("X"); auto dim_gtbox = ctx->GetInputDim("GTBox"); auto dim_gtlabel = ctx->GetInputDim("GTLabel"); auto anchors = ctx->Attrs().Get>("anchors"); + int anchor_num = anchors.size() / 2; + auto anchor_mask = ctx->Attrs().Get>("anchor_mask"); + int mask_num = anchor_mask.size(); auto class_num = ctx->Attrs().Get("class_num"); + PADDLE_ENFORCE_EQ(dim_x.size(), 4, "Input(X) should be a 4-D tensor."); PADDLE_ENFORCE_EQ(dim_x[2], dim_x[3], "Input(X) dim[3] and dim[4] should be euqal."); - PADDLE_ENFORCE_EQ(dim_x[1], anchors.size() / 2 * (5 + class_num), - "Input(X) dim[1] should be equal to (anchor_number * (5 " - "+ class_num))."); + PADDLE_ENFORCE_EQ( + dim_x[1], mask_num * (5 + class_num), + "Input(X) dim[1] should be equal to (anchor_mask_number * (5 " + "+ class_num))."); PADDLE_ENFORCE_EQ(dim_gtbox.size(), 3, "Input(GTBox) should be a 3-D tensor"); PADDLE_ENFORCE_EQ(dim_gtbox[2], 4, "Input(GTBox) dim[2] should be 5"); PADDLE_ENFORCE_EQ(dim_gtlabel.size(), 2, - "Input(GTBox) should be a 2-D tensor"); + "Input(GTLabel) should be a 2-D tensor"); PADDLE_ENFORCE_EQ(dim_gtlabel[0], dim_gtbox[0], "Input(GTBox) and Input(GTLabel) dim[0] should be same"); PADDLE_ENFORCE_EQ(dim_gtlabel[1], dim_gtbox[1], @@ -54,11 +64,22 @@ class Yolov3LossOp : public framework::OperatorWithKernel { "Attr(anchors) length should be greater then 0."); PADDLE_ENFORCE_EQ(anchors.size() % 2, 0, "Attr(anchors) length should be even integer."); + for (size_t i = 0; i < anchor_mask.size(); i++) { + PADDLE_ENFORCE_LT( + anchor_mask[i], anchor_num, + "Attr(anchor_mask) should not crossover Attr(anchors)."); + } PADDLE_ENFORCE_GT(class_num, 0, "Attr(class_num) should be an integer greater then 0."); - std::vector dim_out({1}); + std::vector dim_out({dim_x[0]}); ctx->SetOutputDim("Loss", framework::make_ddim(dim_out)); + + std::vector dim_obj_mask({dim_x[0], mask_num, dim_x[2], dim_x[3]}); + ctx->SetOutputDim("ObjectnessMask", framework::make_ddim(dim_obj_mask)); + + std::vector dim_gt_match_mask({dim_gtbox[0], dim_gtbox[1]}); + ctx->SetOutputDim("GTMatchMask", framework::make_ddim(dim_gt_match_mask)); } protected: @@ -73,11 +94,11 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "The input tensor of YOLO v3 loss operator, " + "The input tensor of YOLOv3 loss operator, " "This is a 4-D tensor with shape of [N, C, H, W]." "H and W should be same, and the second dimention(C) stores" "box locations, confidence score and classification one-hot" - "key of each anchor box"); + "keys of each anchor box"); AddInput("GTBox", "The input tensor of ground truth boxes, " "This is a 3-D tensor with shape of [N, max_box_num, 5], " @@ -89,32 +110,39 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("GTLabel", "The input tensor of ground truth label, " "This is a 2-D tensor with shape of [N, max_box_num], " - "and each element shoudl be an integer to indicate the " + "and each element should be an integer to indicate the " "box class id."); AddOutput("Loss", "The output yolov3 loss tensor, " - "This is a 1-D tensor with shape of [1]"); + "This is a 1-D tensor with shape of [N]"); + AddOutput("ObjectnessMask", + "This is an intermediate tensor with shape of [N, M, H, W], " + "M is the number of anchor masks. This parameter caches the " + "mask for calculate objectness loss in gradient kernel.") + .AsIntermediate(); + AddOutput("GTMatchMask", + "This is an intermediate tensor with shape of [N, B], " + "B is the max box number of GT boxes. This parameter caches " + "matched mask index of each GT boxes for gradient calculate.") + .AsIntermediate(); AddAttr("class_num", "The number of classes to predict."); AddAttr>("anchors", "The anchor width and height, " - "it will be parsed pair by pair."); + "it will be parsed pair by pair.") + .SetDefault(std::vector{}); + AddAttr>("anchor_mask", + "The mask index of anchors used in " + "current YOLOv3 loss calculation.") + .SetDefault(std::vector{}); + AddAttr("downsample_ratio", + "The downsample ratio from network input to YOLOv3 loss " + "input, so 32, 16, 8 should be set for the first, second, " + "and thrid YOLOv3 loss operators.") + .SetDefault(32); AddAttr("ignore_thresh", - "The ignore threshold to ignore confidence loss."); - AddAttr("loss_weight_xy", "The weight of x, y location loss.") - .SetDefault(1.0); - AddAttr("loss_weight_wh", "The weight of w, h location loss.") - .SetDefault(1.0); - AddAttr( - "loss_weight_conf_target", - "The weight of confidence score loss in locations with target object.") - .SetDefault(1.0); - AddAttr("loss_weight_conf_notarget", - "The weight of confidence score loss in locations without " - "target object.") - .SetDefault(1.0); - AddAttr("loss_weight_class", "The weight of classification loss.") - .SetDefault(1.0); + "The ignore threshold to ignore confidence loss.") + .SetDefault(0.7); AddComment(R"DOC( This operator generate yolov3 loss by given predict result and ground truth boxes. @@ -147,17 +175,28 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { thresh, the confidence score loss of this anchor box will be ignored. Therefore, the yolov3 loss consist of three major parts, box location loss, - confidence score loss, and classification loss. The MSE loss is used for - box location, and binary cross entropy loss is used for confidence score - loss and classification loss. + confidence score loss, and classification loss. The L2 loss is used for + box coordinates (w, h), and sigmoid cross entropy loss is used for box + coordinates (x, y), confidence score loss and classification loss. + + Each groud truth box find a best matching anchor box in all anchors, + prediction of this anchor box will incur all three parts of losses, and + prediction of anchor boxes with no GT box matched will only incur objectness + loss. + + In order to trade off box coordinate losses between big boxes and small + boxes, box coordinate losses will be mutiplied by scale weight, which is + calculated as follow. + + $$ + weight_{box} = 2.0 - t_w * t_h + $$ Final loss will be represented as follow. $$ - loss = \loss_weight_{xy} * loss_{xy} + \loss_weight_{wh} * loss_{wh} - + \loss_weight_{conf_target} * loss_{conf_target} - + \loss_weight_{conf_notarget} * loss_{conf_notarget} - + \loss_weight_{class} * loss_{class} + loss = (loss_{xy} + loss_{wh}) * weight_{box} + + loss_{conf} + loss_{class} $$ )DOC"); } @@ -196,6 +235,8 @@ class Yolov3LossGradMaker : public framework::SingleGradOpDescMaker { op->SetInput("GTBox", Input("GTBox")); op->SetInput("GTLabel", Input("GTLabel")); op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss")); + op->SetInput("ObjectnessMask", Output("ObjectnessMask")); + op->SetInput("GTMatchMask", Output("GTMatchMask")); op->SetAttrMap(Attrs()); diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.h b/paddle/fluid/operators/detection/yolov3_loss_op.h new file mode 100644 index 0000000000000000000000000000000000000000..8407d4e6e8f87a2e8d073c4fbda5691abe1bba68 --- /dev/null +++ b/paddle/fluid/operators/detection/yolov3_loss_op.h @@ -0,0 +1,447 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenTensor = framework::EigenTensor; +template +using EigenVector = framework::EigenVector; + +template +static inline bool LessEqualZero(T x) { + return x < 1e-6; +} + +template +static T SigmoidCrossEntropy(T x, T label) { + return (x > 0 ? x : 0.0) - x * label + std::log(1.0 + std::exp(-std::abs(x))); +} + +template +static T L2Loss(T x, T y) { + return 0.5 * (y - x) * (y - x); +} + +template +static T SigmoidCrossEntropyGrad(T x, T label) { + return 1.0 / (1.0 + std::exp(-x)) - label; +} + +template +static T L2LossGrad(T x, T y) { + return x - y; +} + +static int GetMaskIndex(std::vector mask, int val) { + for (size_t i = 0; i < mask.size(); i++) { + if (mask[i] == val) { + return i; + } + } + return -1; +} + +template +struct Box { + T x, y, w, h; +}; + +template +static inline T sigmoid(T x) { + return 1.0 / (1.0 + std::exp(-x)); +} + +template +static inline Box GetYoloBox(const T* x, std::vector anchors, int i, + int j, int an_idx, int grid_size, + int input_size, int index, int stride) { + Box b; + b.x = (i + sigmoid(x[index])) / grid_size; + b.y = (j + sigmoid(x[index + stride])) / grid_size; + b.w = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] / input_size; + b.h = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] / input_size; + return b; +} + +template +static inline Box GetGtBox(const T* gt, int batch, int max_boxes, int idx) { + Box b; + b.x = gt[(batch * max_boxes + idx) * 4]; + b.y = gt[(batch * max_boxes + idx) * 4 + 1]; + b.w = gt[(batch * max_boxes + idx) * 4 + 2]; + b.h = gt[(batch * max_boxes + idx) * 4 + 3]; + return b; +} + +template +static inline T BoxOverlap(T c1, T w1, T c2, T w2) { + T l1 = c1 - w1 / 2.0; + T l2 = c2 - w2 / 2.0; + T left = l1 > l2 ? l1 : l2; + T r1 = c1 + w1 / 2.0; + T r2 = c2 + w2 / 2.0; + T right = r1 < r2 ? r1 : r2; + return right - left; +} + +template +static inline T CalcBoxIoU(Box b1, Box b2) { + T w = BoxOverlap(b1.x, b1.w, b2.x, b2.w); + T h = BoxOverlap(b1.y, b1.h, b2.y, b2.h); + T inter_area = (w < 0 || h < 0) ? 0.0 : w * h; + T union_area = b1.w * b1.h + b2.w * b2.h - inter_area; + return inter_area / union_area; +} + +static inline int GetEntryIndex(int batch, int an_idx, int hw_idx, int an_num, + int an_stride, int stride, int entry) { + return (batch * an_num + an_idx) * an_stride + entry * stride + hw_idx; +} + +template +static void CalcBoxLocationLoss(T* loss, const T* input, Box gt, + std::vector anchors, int an_idx, + int box_idx, int gi, int gj, int grid_size, + int input_size, int stride) { + T tx = gt.x * grid_size - gi; + T ty = gt.y * grid_size - gj; + T tw = std::log(gt.w * input_size / anchors[2 * an_idx]); + T th = std::log(gt.h * input_size / anchors[2 * an_idx + 1]); + + T scale = (2.0 - gt.w * gt.h); + loss[0] += SigmoidCrossEntropy(input[box_idx], tx) * scale; + loss[0] += SigmoidCrossEntropy(input[box_idx + stride], ty) * scale; + loss[0] += L2Loss(input[box_idx + 2 * stride], tw) * scale; + loss[0] += L2Loss(input[box_idx + 3 * stride], th) * scale; +} + +template +static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input, + Box gt, std::vector anchors, + int an_idx, int box_idx, int gi, int gj, + int grid_size, int input_size, int stride) { + T tx = gt.x * grid_size - gi; + T ty = gt.y * grid_size - gj; + T tw = std::log(gt.w * input_size / anchors[2 * an_idx]); + T th = std::log(gt.h * input_size / anchors[2 * an_idx + 1]); + + T scale = (2.0 - gt.w * gt.h); + input_grad[box_idx] = + SigmoidCrossEntropyGrad(input[box_idx], tx) * scale * loss; + input_grad[box_idx + stride] = + SigmoidCrossEntropyGrad(input[box_idx + stride], ty) * scale * loss; + input_grad[box_idx + 2 * stride] = + L2LossGrad(input[box_idx + 2 * stride], tw) * scale * loss; + input_grad[box_idx + 3 * stride] = + L2LossGrad(input[box_idx + 3 * stride], th) * scale * loss; +} + +template +static inline void CalcLabelLoss(T* loss, const T* input, const int index, + const int label, const int class_num, + const int stride) { + for (int i = 0; i < class_num; i++) { + T pred = input[index + i * stride]; + loss[0] += SigmoidCrossEntropy(pred, (i == label) ? 1.0 : 0.0); + } +} + +template +static inline void CalcLabelLossGrad(T* input_grad, const T loss, + const T* input, const int index, + const int label, const int class_num, + const int stride) { + for (int i = 0; i < class_num; i++) { + T pred = input[index + i * stride]; + input_grad[index + i * stride] = + SigmoidCrossEntropyGrad(pred, (i == label) ? 1.0 : 0.0) * loss; + } +} + +template +static inline void CalcObjnessLoss(T* loss, const T* input, const T* objness, + const int n, const int an_num, const int h, + const int w, const int stride, + const int an_stride) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < an_num; j++) { + for (int k = 0; k < h; k++) { + for (int l = 0; l < w; l++) { + T obj = objness[k * w + l]; + if (obj > 1e-5) { + // positive sample: obj = 1 + loss[i] += SigmoidCrossEntropy(input[k * w + l], 1.0); + } else if (obj > -0.5) { + // negetive sample: obj = 0 + loss[i] += SigmoidCrossEntropy(input[k * w + l], 0.0); + } + } + } + objness += stride; + input += an_stride; + } + } +} + +template +static inline void CalcObjnessLossGrad(T* input_grad, const T* loss, + const T* input, const T* objness, + const int n, const int an_num, + const int h, const int w, + const int stride, const int an_stride) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < an_num; j++) { + for (int k = 0; k < h; k++) { + for (int l = 0; l < w; l++) { + T obj = objness[k * w + l]; + if (obj > 1e-5) { + input_grad[k * w + l] = + SigmoidCrossEntropyGrad(input[k * w + l], 1.0) * loss[i]; + } else if (obj > -0.5) { + input_grad[k * w + l] = + SigmoidCrossEntropyGrad(input[k * w + l], 0.0) * loss[i]; + } + } + } + objness += stride; + input += an_stride; + input_grad += an_stride; + } + } +} + +template +static void inline GtValid(bool* valid, const T* gtbox, const int n, + const int b) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < b; j++) { + if (LessEqualZero(gtbox[j * 4 + 2]) || LessEqualZero(gtbox[j * 4 + 3])) { + valid[j] = false; + } else { + valid[j] = true; + } + } + valid += b; + gtbox += b * 4; + } +} + +template +class Yolov3LossKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("X"); + auto* gt_box = ctx.Input("GTBox"); + auto* gt_label = ctx.Input("GTLabel"); + auto* loss = ctx.Output("Loss"); + auto* objness_mask = ctx.Output("ObjectnessMask"); + auto* gt_match_mask = ctx.Output("GTMatchMask"); + auto anchors = ctx.Attr>("anchors"); + auto anchor_mask = ctx.Attr>("anchor_mask"); + int class_num = ctx.Attr("class_num"); + float ignore_thresh = ctx.Attr("ignore_thresh"); + int downsample_ratio = ctx.Attr("downsample_ratio"); + + const int n = input->dims()[0]; + const int h = input->dims()[2]; + const int w = input->dims()[3]; + const int an_num = anchors.size() / 2; + const int mask_num = anchor_mask.size(); + const int b = gt_box->dims()[1]; + int input_size = downsample_ratio * h; + + const int stride = h * w; + const int an_stride = (class_num + 5) * stride; + + const T* input_data = input->data(); + const T* gt_box_data = gt_box->data(); + const int* gt_label_data = gt_label->data(); + T* loss_data = loss->mutable_data({n}, ctx.GetPlace()); + memset(loss_data, 0, loss->numel() * sizeof(T)); + T* obj_mask_data = + objness_mask->mutable_data({n, mask_num, h, w}, ctx.GetPlace()); + memset(obj_mask_data, 0, objness_mask->numel() * sizeof(T)); + int* gt_match_mask_data = + gt_match_mask->mutable_data({n, b}, ctx.GetPlace()); + + // calc valid gt box mask, avoid calc duplicately in following code + Tensor gt_valid_mask; + bool* gt_valid_mask_data = + gt_valid_mask.mutable_data({n, b}, ctx.GetPlace()); + GtValid(gt_valid_mask_data, gt_box_data, n, b); + + for (int i = 0; i < n; i++) { + for (int j = 0; j < mask_num; j++) { + for (int k = 0; k < h; k++) { + for (int l = 0; l < w; l++) { + // each predict box find a best match gt box, if overlap is bigger + // then ignore_thresh, ignore the objectness loss. + int box_idx = + GetEntryIndex(i, j, k * w + l, mask_num, an_stride, stride, 0); + Box pred = GetYoloBox(input_data, anchors, l, k, anchor_mask[j], + h, input_size, box_idx, stride); + T best_iou = 0; + for (int t = 0; t < b; t++) { + if (!gt_valid_mask_data[i * b + t]) { + continue; + } + Box gt = GetGtBox(gt_box_data, i, b, t); + T iou = CalcBoxIoU(pred, gt); + if (iou > best_iou) { + best_iou = iou; + } + } + + // If best IoU is bigger then ignore_thresh, + // ignore the objectness loss. + if (best_iou > ignore_thresh) { + int obj_idx = (i * mask_num + j) * stride + k * w + l; + obj_mask_data[obj_idx] = static_cast(-1); + } + // all losses should be calculated if best IoU + // is bigger then truth thresh, but currently, + // truth thresh is an unreachable value as 1.0. + } + } + } + for (int t = 0; t < b; t++) { + if (!gt_valid_mask_data[i * b + t]) { + gt_match_mask_data[i * b + t] = -1; + continue; + } + Box gt = GetGtBox(gt_box_data, i, b, t); + int gi = static_cast(gt.x * w); + int gj = static_cast(gt.y * h); + Box gt_shift = gt; + gt_shift.x = 0.0; + gt_shift.y = 0.0; + T best_iou = 0.0; + int best_n = 0; + // each gt box find a best match anchor box as positive sample, + // for positive sample, all losses should be calculated, and for + // other samples, only objectness loss is required. + for (int an_idx = 0; an_idx < an_num; an_idx++) { + Box an_box; + an_box.x = 0.0; + an_box.y = 0.0; + an_box.w = anchors[2 * an_idx] / static_cast(input_size); + an_box.h = anchors[2 * an_idx + 1] / static_cast(input_size); + float iou = CalcBoxIoU(an_box, gt_shift); + if (iou > best_iou) { + best_iou = iou; + best_n = an_idx; + } + } + + int mask_idx = GetMaskIndex(anchor_mask, best_n); + gt_match_mask_data[i * b + t] = mask_idx; + if (mask_idx >= 0) { + int box_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, + an_stride, stride, 0); + CalcBoxLocationLoss(loss_data + i, input_data, gt, anchors, best_n, + box_idx, gi, gj, h, input_size, stride); + + int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi; + obj_mask_data[obj_idx] = 1.0; + + int label = gt_label_data[i * b + t]; + int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, + an_stride, stride, 5); + CalcLabelLoss(loss_data + i, input_data, label_idx, label, + class_num, stride); + } + } + } + + CalcObjnessLoss(loss_data, input_data + 4 * stride, obj_mask_data, n, + mask_num, h, w, stride, an_stride); + } +}; + +template +class Yolov3LossGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("X"); + auto* gt_box = ctx.Input("GTBox"); + auto* gt_label = ctx.Input("GTLabel"); + auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); + auto* objness_mask = ctx.Input("ObjectnessMask"); + auto* gt_match_mask = ctx.Input("GTMatchMask"); + auto anchors = ctx.Attr>("anchors"); + auto anchor_mask = ctx.Attr>("anchor_mask"); + int class_num = ctx.Attr("class_num"); + int downsample_ratio = ctx.Attr("downsample_ratio"); + + const int n = input_grad->dims()[0]; + const int c = input_grad->dims()[1]; + const int h = input_grad->dims()[2]; + const int w = input_grad->dims()[3]; + const int mask_num = anchor_mask.size(); + const int b = gt_match_mask->dims()[1]; + int input_size = downsample_ratio * h; + + const int stride = h * w; + const int an_stride = (class_num + 5) * stride; + + const T* input_data = input->data(); + const T* gt_box_data = gt_box->data(); + const int* gt_label_data = gt_label->data(); + const T* loss_grad_data = loss_grad->data(); + const T* obj_mask_data = objness_mask->data(); + const int* gt_match_mask_data = gt_match_mask->data(); + T* input_grad_data = + input_grad->mutable_data({n, c, h, w}, ctx.GetPlace()); + memset(input_grad_data, 0, input_grad->numel() * sizeof(T)); + + for (int i = 0; i < n; i++) { + for (int t = 0; t < b; t++) { + int mask_idx = gt_match_mask_data[i * b + t]; + if (mask_idx >= 0) { + Box gt = GetGtBox(gt_box_data, i, b, t); + int gi = static_cast(gt.x * w); + int gj = static_cast(gt.y * h); + + int box_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, + an_stride, stride, 0); + CalcBoxLocationLossGrad( + input_grad_data, loss_grad_data[i], input_data, gt, anchors, + anchor_mask[mask_idx], box_idx, gi, gj, h, input_size, stride); + + int label = gt_label_data[i * b + t]; + int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, + an_stride, stride, 5); + CalcLabelLossGrad(input_grad_data, loss_grad_data[i], input_data, + label_idx, label, class_num, stride); + } + } + } + + CalcObjnessLossGrad(input_grad_data + 4 * stride, loss_grad_data, + input_data + 4 * stride, obj_mask_data, n, mask_num, + h, w, stride, an_stride); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/dropout_op.cu b/paddle/fluid/operators/dropout_op.cu index d65491267de1ce3495d8b8250cf0cff570dfcc6a..7a6927d3e54b4ece8f17d7a1e7e431ba836edff9 100644 --- a/paddle/fluid/operators/dropout_op.cu +++ b/paddle/fluid/operators/dropout_op.cu @@ -114,4 +114,5 @@ REGISTER_OP_CUDA_KERNEL( ops::GPUDropoutKernel); REGISTER_OP_CUDA_KERNEL( dropout_grad, ops::DropoutGradKernel, + ops::DropoutGradKernel, ops::DropoutGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/elementwise/elementwise_add_mkldnn_op.cc rename to paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc rename to paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc diff --git a/paddle/fluid/operators/gather_op.cu b/paddle/fluid/operators/gather_op.cu index 9f4aef08cd58e72ce344a640e6564b9e360ce169..490ba9a585ee8fac82a9e1178f506a6d39e5fd1c 100644 --- a/paddle/fluid/operators/gather_op.cu +++ b/paddle/fluid/operators/gather_op.cu @@ -31,7 +31,7 @@ class GatherOpCUDAKernel : public framework::OpKernel { auto *output = ctx.Output("Out"); output->mutable_data(ctx.GetPlace()); - + if (x->numel() == 0) return; GPUGather(ctx.device_context(), *x, *index, output); } }; @@ -45,14 +45,13 @@ class GatherGradOpCUDAKernel : public framework::OpKernel { auto *Index = ctx.Input("Index"); auto *dX = ctx.Output(framework::GradVarName("X")); auto *dO = ctx.Input(framework::GradVarName("Out")); - auto *x = ctx.Input("X"); dX->mutable_data(ctx.GetPlace()); auto dxt = framework::EigenVector::Flatten(*dX); auto &place = *ctx.template device_context() .eigen_device(); dxt.device(place) = dxt.constant(static_cast(0)); - + if (dO->numel() == 0) return; GPUScatterAssign(ctx.device_context(), *dO, *Index, dX); } }; @@ -61,11 +60,14 @@ class GatherGradOpCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; +namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(gather, ops::GatherOpCUDAKernel, ops::GatherOpCUDAKernel, ops::GatherOpCUDAKernel, - ops::GatherOpCUDAKernel); + ops::GatherOpCUDAKernel, + ops::GatherOpCUDAKernel); REGISTER_OP_CUDA_KERNEL(gather_grad, ops::GatherGradOpCUDAKernel, ops::GatherGradOpCUDAKernel, ops::GatherGradOpCUDAKernel, - ops::GatherGradOpCUDAKernel); + ops::GatherGradOpCUDAKernel, + ops::GatherGradOpCUDAKernel); diff --git a/paddle/fluid/operators/gather_op.h b/paddle/fluid/operators/gather_op.h index 2dd726bebb1bc2e4d83844c0b98df01c390e622f..2e18298cf8e34d5f70369c89b3b3b2a9ced0ce62 100644 --- a/paddle/fluid/operators/gather_op.h +++ b/paddle/fluid/operators/gather_op.h @@ -35,7 +35,7 @@ class GatherOpKernel : public framework::OpKernel { auto *output = ctx.Output("Out"); output->mutable_data(ctx.GetPlace()); - + if (x->numel() == 0) return; CPUGather(ctx.device_context(), *x, *index, output); } }; @@ -56,7 +56,7 @@ class GatherGradientOpKernel : public framework::OpKernel { auto &place = *ctx.template device_context() .eigen_device(); dxt.device(place) = dxt.constant(static_cast(0)); - + if (dO->numel() == 0) return; ScatterAssign(ctx.device_context(), *dO, *Index, dX); } }; diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index fd15539f7b6727496988c9b13d0d2551659a420a..0af8b9e69cfe09890f28ef2028baa19319a5c379 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/fluid/operators/lookup_table_op.h" #include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/float16.h" namespace paddle { namespace operators { @@ -193,8 +194,11 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; +namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(lookup_table, ops::LookupTableCUDAKernel, - ops::LookupTableCUDAKernel); + ops::LookupTableCUDAKernel, + ops::LookupTableCUDAKernel); REGISTER_OP_CUDA_KERNEL(lookup_table_grad, ops::LookupTableGradCUDAKernel, - ops::LookupTableGradCUDAKernel); + ops::LookupTableGradCUDAKernel, + ops::LookupTableGradCUDAKernel); diff --git a/paddle/fluid/operators/math/beam_search.cc b/paddle/fluid/operators/math/beam_search.cc index fb7119273a734feba870fdabade6a4faa1d5e9a3..69971ef7423eff6bc3f8543a491edb6b0bbd00ca 100644 --- a/paddle/fluid/operators/math/beam_search.cc +++ b/paddle/fluid/operators/math/beam_search.cc @@ -29,8 +29,9 @@ class BeamSearchFunctor { const framework::LoDTensor *ids, const framework::LoDTensor *scores, framework::LoDTensor *selected_ids, - framework::LoDTensor *selected_scores, size_t level, - size_t beam_size, int end_id, bool is_accumulated) { + framework::LoDTensor *selected_scores, + framework::Tensor *parent_idx, size_t level, size_t beam_size, + int end_id, bool is_accumulated) { auto abs_lod = framework::ToAbsOffset(scores->lod()); auto &high_level = abs_lod[level]; @@ -57,11 +58,13 @@ class BeamSearchFunctor { std::vector({static_cast(num_instances), 1})); selected_ids->Resize(dims); selected_scores->Resize(dims); + parent_idx->Resize({static_cast(num_instances)}); auto *selected_ids_data = selected_ids->mutable_data(platform::CPUPlace()); auto *selected_scores_data = selected_scores->mutable_data(platform::CPUPlace()); + auto *parent_idx_data = parent_idx->mutable_data(platform::CPUPlace()); // fill in data std::vector low_level; @@ -69,6 +72,7 @@ class BeamSearchFunctor { for (auto &items : selected_items) { low_level.push_back(low_offset); for (auto &item : items) { + parent_idx_data[low_offset] = static_cast(low_level.size() - 1); selected_ids_data[low_offset] = item.id; selected_scores_data[low_offset] = item.score; low_offset++; diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index d94e3023ce537cb9fa456e079c4fa3cf57fb954d..61d021ef627f1ccd90b992c2078a7f3ca879422d 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -157,10 +157,10 @@ __device__ __forceinline__ bool PruneEndBeams(Triple* top_beam_local, } __device__ __forceinline__ void WriteBack( - int64_t* selected_ids, float* selected_scores, size_t* selected_offsets, - Triple* top_beam_local, const int seq_offset_start, - const int seq_offset_end, const int selected_seq_start, - const int selected_seq_length) { + int64_t* selected_ids, float* selected_scores, int* parent_idx, + size_t* selected_offsets, Triple* top_beam_local, + const int seq_offset_start, const int seq_offset_end, + const int selected_seq_start, const int selected_seq_length) { const int tid = threadIdx.x; // use 1 thread only for each sequence int global_index = selected_seq_start; for (int global_offset = seq_offset_start; global_offset < seq_offset_end; @@ -171,6 +171,7 @@ __device__ __forceinline__ void WriteBack( selected_ids[global_index] = static_cast(top_beam_local[local_index].id); selected_scores[global_index] = top_beam_local[local_index].score; + parent_idx[global_index] = static_cast(global_offset); global_index++; } } @@ -180,11 +181,11 @@ __device__ __forceinline__ void WriteBack( template __device__ void BeamSearchDetails( - int64_t* selected_ids, float* selected_scores, size_t* selected_offsets, - const int64_t* pre_ids, const float* pre_scores, const int64_t* ids, - const float* scores, const int seq_offset_start, const int seq_offset_end, - const int seq_width, int beam_size, int end_id, bool is_accumulated, - int num_used_threads) { + int64_t* selected_ids, float* selected_scores, int* parent_idx, + size_t* selected_offsets, const int64_t* pre_ids, const float* pre_scores, + const int64_t* ids, const float* scores, const int seq_offset_start, + const int seq_offset_end, const int seq_width, int beam_size, int end_id, + bool is_accumulated, int num_used_threads) { __shared__ Triple top_beam[MaxLength]; int num_items = 0; @@ -228,15 +229,15 @@ __device__ void BeamSearchDetails( selected_offsets[0] = 0; } - WriteBack(selected_ids, selected_scores, selected_offsets, top_beam_local, - seq_offset_start, seq_offset_end, selected_seq_start, - selected_seq_length); + WriteBack(selected_ids, selected_scores, parent_idx, selected_offsets, + top_beam_local, seq_offset_start, seq_offset_end, + selected_seq_start, selected_seq_length); } } template __global__ void BeamSearchKernel(int64_t* selected_ids, float* selected_scores, - size_t* selected_offsets, + int* parent_idx, size_t* selected_offsets, const int64_t* pre_ids, const float* pre_scores, const int64_t* ids, const float* scores, const size_t* seq_offsets, @@ -250,24 +251,25 @@ __global__ void BeamSearchKernel(int64_t* selected_ids, float* selected_scores, int seq_offset_end = static_cast(seq_offsets[seq_id + 1]); BeamSearchDetails( - selected_ids, selected_scores, selected_offsets, pre_ids, pre_scores, ids, - scores, seq_offset_start, seq_offset_end, seq_width, beam_size, end_id, - is_accumulated, num_used_threads); + selected_ids, selected_scores, parent_idx, selected_offsets, pre_ids, + pre_scores, ids, scores, seq_offset_start, seq_offset_end, seq_width, + beam_size, end_id, is_accumulated, num_used_threads); } template __global__ void BeamSearchKernelSingle( - int64_t* selected_ids, float* selected_scores, size_t* selected_offsets, - const int64_t* pre_ids, const float* pre_scores, const int64_t* ids, - const float* scores, const int seq_length, const int seq_width, - int beam_size, int end_id, bool is_accumulated, int num_used_threads) { + int64_t* selected_ids, float* selected_scores, int* parent_idx, + size_t* selected_offsets, const int64_t* pre_ids, const float* pre_scores, + const int64_t* ids, const float* scores, const int seq_length, + const int seq_width, int beam_size, int end_id, bool is_accumulated, + int num_used_threads) { const int seq_offset_start = 0; const int seq_offset_end = seq_length; BeamSearchDetails( - selected_ids, selected_scores, selected_offsets, pre_ids, pre_scores, ids, - scores, seq_offset_start, seq_offset_end, seq_width, beam_size, end_id, - is_accumulated, num_used_threads); + selected_ids, selected_scores, parent_idx, selected_offsets, pre_ids, + pre_scores, ids, scores, seq_offset_start, seq_offset_end, seq_width, + beam_size, end_id, is_accumulated, num_used_threads); } static inline int GetNumUsedThreads(const int max_threads_per_seq, @@ -300,8 +302,9 @@ class BeamSearchFunctor { const framework::LoDTensor* ids, const framework::LoDTensor* scores, framework::LoDTensor* selected_ids, - framework::LoDTensor* selected_scores, size_t level, - size_t beam_size, int end_id, bool is_accumulated) { + framework::LoDTensor* selected_scores, + framework::Tensor* parent_idx, size_t level, size_t beam_size, + int end_id, bool is_accumulated) { auto abs_lod = framework::ToAbsOffset(scores->lod()); const int64_t* pre_ids_data = pre_ids->data(); @@ -322,6 +325,8 @@ class BeamSearchFunctor { selected_ids->mutable_data(selected_dims, context.GetPlace()); float* selected_scores_data = selected_scores->mutable_data(selected_dims, context.GetPlace()); + int* parent_idx_data = parent_idx->mutable_data( + {static_cast(num_seqs * beam_size)}, context.GetPlace()); framework::LoD selected_lod(2); selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end()); @@ -339,9 +344,9 @@ class BeamSearchFunctor { CUDA_LAUNCH_KERNEL_HELPER( BeamSearchKernelSingle<<< 1, kMaxThreadsPerSeq, 0, context.stream()>>>( - selected_ids_data, selected_scores_data, selected_offsets, - pre_ids_data, pre_scores_data, ids_data, scores_data, - seq_length, static_cast(seq_width), + selected_ids_data, selected_scores_data, parent_idx_data, + selected_offsets, pre_ids_data, pre_scores_data, ids_data, + scores_data, seq_length, static_cast(seq_width), static_cast(beam_size), static_cast(end_id), is_accumulated, num_used_threads)); } @@ -357,9 +362,9 @@ class BeamSearchFunctor { CUDA_LAUNCH_KERNEL_HELPER( BeamSearchKernel<<< 1, num_seqs * kMaxThreadsPerSeq, 0, context.stream()>>>( - selected_ids_data, selected_scores_data, selected_offsets, - pre_ids_data, pre_scores_data, ids_data, scores_data, - seq_offsets, static_cast(num_seqs), + selected_ids_data, selected_scores_data, parent_idx_data, + selected_offsets, pre_ids_data, pre_scores_data, ids_data, + scores_data, seq_offsets, static_cast(num_seqs), static_cast(seq_width), static_cast(beam_size), end_id, is_accumulated, num_used_threads)); } @@ -379,6 +384,7 @@ class BeamSearchFunctor { {static_cast(selected_lod[1].back()), 1}); selected_ids->Resize(final_selected_dims); selected_scores->Resize(final_selected_dims); + parent_idx->Resize({static_cast(selected_lod[1].back())}); } } }; diff --git a/paddle/fluid/operators/math/beam_search.h b/paddle/fluid/operators/math/beam_search.h index 3cd17f426c5596582c91f2b3f0cc5ba513e3aa4b..4474e7ea52affed792572d02202ec2577c471e50 100644 --- a/paddle/fluid/operators/math/beam_search.h +++ b/paddle/fluid/operators/math/beam_search.h @@ -104,14 +104,12 @@ class BeamSearchFunctor { * Return false if all the input tensor is empty, in machine translation task * that means no candidates is provided, and the task will stop running. */ - void operator()(const DeviceContext& context, - const framework::LoDTensor* pre_ids, - const framework::LoDTensor* pre_scores, - const framework::LoDTensor* ids, - const framework::LoDTensor* scores, - framework::LoDTensor* selected_ids, - framework::LoDTensor* selected_scores, size_t level, - size_t beam_size, int end_id, bool is_accumulated); + void operator()( + const DeviceContext& context, const framework::LoDTensor* pre_ids, + const framework::LoDTensor* pre_scores, const framework::LoDTensor* ids, + const framework::LoDTensor* scores, framework::LoDTensor* selected_ids, + framework::LoDTensor* selected_scores, framework::Tensor* parent_idx, + size_t level, size_t beam_size, int end_id, bool is_accumulated); }; } // namespace math diff --git a/paddle/fluid/operators/math/beam_search_test.cc b/paddle/fluid/operators/math/beam_search_test.cc index 1c29ee95f6b109209316e4e8c8f3cda37eac62ae..7ea8eb8b00db328ca13d3d33d751aca4eac66dae 100644 --- a/paddle/fluid/operators/math/beam_search_test.cc +++ b/paddle/fluid/operators/math/beam_search_test.cc @@ -93,13 +93,14 @@ void TestBeamSearch() { paddle::framework::LoDTensor selected_ids; paddle::framework::LoDTensor selected_scores; + paddle::framework::LoDTensor parent_idx; size_t level = 0; size_t beam_size = 2; int end_id = 0; paddle::operators::math::BeamSearchFunctor beamsearch; beamsearch(*context, &pre_ids, &pre_scores, &ids, &scores, &selected_ids, - &selected_scores, level, beam_size, end_id, true); + &selected_scores, &parent_idx, level, beam_size, end_id, true); ASSERT_EQ(selected_ids.lod(), selected_scores.lod()); diff --git a/paddle/fluid/operators/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/activation_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc diff --git a/paddle/fluid/operators/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/batch_norm_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc diff --git a/paddle/fluid/operators/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/concat_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/conv_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc diff --git a/paddle/fluid/operators/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/conv_transpose_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc diff --git a/paddle/fluid/operators/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/dequantize_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc diff --git a/paddle/fluid/operators/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/fc_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc diff --git a/paddle/fluid/operators/gaussian_random_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/gaussian_random_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/gaussian_random_mkldnn_op.cc diff --git a/paddle/fluid/operators/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/lrn_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc diff --git a/paddle/fluid/operators/mkldnn_activation_op.h b/paddle/fluid/operators/mkldnn/mkldnn_activation_op.h similarity index 100% rename from paddle/fluid/operators/mkldnn_activation_op.h rename to paddle/fluid/operators/mkldnn/mkldnn_activation_op.h diff --git a/paddle/fluid/operators/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/pool_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc diff --git a/paddle/fluid/operators/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/quantize_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc diff --git a/paddle/fluid/operators/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/softmax_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc diff --git a/paddle/fluid/operators/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/sum_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc diff --git a/paddle/fluid/operators/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc similarity index 100% rename from paddle/fluid/operators/transpose_mkldnn_op.cc rename to paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc diff --git a/paddle/fluid/operators/ngraph/ngraph_bridge.cc b/paddle/fluid/operators/ngraph/ngraph_bridge.cc index d6e897ed4666261cdd0bd6565f61abb218d971e5..13b168ce4553c3377a62d9781b185fa7303c1136 100644 --- a/paddle/fluid/operators/ngraph/ngraph_bridge.cc +++ b/paddle/fluid/operators/ngraph/ngraph_bridge.cc @@ -38,6 +38,8 @@ std::map +#include + +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildPool2dNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map); + auto x_shape = x->get_shape(); + + std::string pooling_type = op_attrs.Get("pooling_type"); + std::vector ksize = op_attrs.Get>("ksize"); + std::vector strides = op_attrs.Get>("strides"); + std::vector paddings = op_attrs.Get>("paddings"); + + PADDLE_ENFORCE_EQ(x_shape.size() - 2, ksize.size(), + "Handling 2d pooling only"); + + if (op_attrs.Get("global_pooling")) { + for (size_t i = 0; i < ksize.size(); ++i) { + paddings[i] = 0; + ksize[i] = static_cast(x_shape.at(i + 2)); + } + } + + ngraph::Shape ng_padding_below{static_cast(paddings.at(0)), + static_cast(paddings.at(1))}; + ngraph::Shape ng_padding_above{static_cast(paddings.at(0)), + static_cast(paddings.at(1))}; + ngraph::Shape ng_ksize_shape{static_cast(ksize.at(0)), + static_cast(ksize.at(1))}; + ngraph::Strides ng_strides{static_cast(strides.at(0)), + static_cast(strides.at(1))}; + + auto ComputeCeiledOutput = [](size_t in, size_t k, size_t p, size_t s) { + return (in - k + 2 * p) / s + 1; + }; + + if (op_attrs.Get("ceil_mode")) { + auto dummy_out = paddle::platform::GetOutputNode(op, "Out", ngb_node_map); + auto dummpy_shape = dummy_out->get_shape(); + for (size_t i = 0; i < ng_padding_above.size(); ++i) { + auto desired_size = ComputeCeiledOutput(x_shape[i + 2], ksize[i], + paddings[i], strides[i]); + if (desired_size != dummpy_shape[i + 2]) { + ng_padding_above[i] += strides[i]; + } + } + } + + bool padding_exclusive = op_attrs.Get("exclusive"); + if (pooling_type == "max") { + auto pool2d = std::make_shared( + x, ng_ksize_shape, ng_strides, ng_padding_below, ng_padding_above); + paddle::platform::SetOutputNode(op, "Out", pool2d, ngb_node_map); + } else if (pooling_type == "avg") { + std::shared_ptr pool2d; + if (op_attrs.Get("adaptive")) { + auto ComputeAdaptive = [](size_t in, size_t k) { + return std::floor(in / k); + }; + ng_strides[0] = x_shape.size() == 4 + ? ComputeAdaptive(x_shape[3], ksize[0]) + : ng_strides[0]; + ng_strides[1] = x_shape.size() == 4 + ? ComputeAdaptive(x_shape[3], ksize[0]) + : ng_strides[1]; + pool2d = + std::make_shared(x, ng_ksize_shape, ng_strides); + } else { + pool2d = std::make_shared( + x, ng_ksize_shape, ng_strides, ng_padding_below, ng_padding_above, + !padding_exclusive); + } + paddle::platform::SetOutputNode(op, "Out", pool2d, ngb_node_map); + } else { + PADDLE_THROW("Support max and avg pooling only"); + } +} + +void BuildPool2dGradNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + auto out = paddle::platform::GetInputNode(op, "Out", ngb_node_map); + auto dout = paddle::platform::GetInputNode(op, "Out@GRAD", ngb_node_map); + auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map); + auto x_shape = x->get_shape(); + + std::string pooling_type = op_attrs.Get("pooling_type"); + std::vector ksize = op_attrs.Get>("ksize"); + std::vector strides = op_attrs.Get>("strides"); + std::vector paddings = op_attrs.Get>("paddings"); + + PADDLE_ENFORCE_EQ(x_shape.size() - 2, ksize.size(), + "Handling 2d pooling only"); + + if (op_attrs.Get("global_pooling")) { + for (size_t i = 0; i < ksize.size(); ++i) { + paddings[i] = 0; + ksize[i] = static_cast(x_shape.at(i + 2)); + } + } + + ngraph::Shape ng_padding_below{static_cast(paddings.at(0)), + static_cast(paddings.at(1))}; + ngraph::Shape ng_padding_above{static_cast(paddings.at(0)), + static_cast(paddings.at(1))}; + ngraph::Shape ng_ksize_shape{static_cast(ksize.at(0)), + static_cast(ksize.at(1))}; + ngraph::Strides ng_strides{static_cast(strides.at(0)), + static_cast(strides.at(1))}; + + bool padding_exclusive = op_attrs.Get("exclusive"); + if (pooling_type == "max") { + auto pool2d_grad = std::make_shared( + x, dout, out, ng_ksize_shape, ng_strides, ng_padding_below, + ng_padding_above); + paddle::platform::SetOutputNode(op, "X@GRAD", pool2d_grad, ngb_node_map); + } else if (pooling_type == "avg") { + std::shared_ptr pool2d_grad; + if (op_attrs.Get("adaptive")) { + auto ComputeAdaptive = [](size_t in, size_t k) { + return std::floor(in / k); + }; + ng_strides[0] = x_shape.size() == 4 + ? ComputeAdaptive(x_shape[3], ksize[0]) + : ng_strides[0]; + ng_strides[1] = x_shape.size() == 4 + ? ComputeAdaptive(x_shape[3], ksize[0]) + : ng_strides[1]; + pool2d_grad = std::make_shared( + x->get_shape(), dout, ng_ksize_shape, ng_strides, ng_padding_below, + ng_padding_above, !padding_exclusive); + } else { + pool2d_grad = std::make_shared( + x->get_shape(), dout, ng_ksize_shape, ng_strides, ng_padding_below, + ng_padding_above, !padding_exclusive); + } + paddle::platform::SetOutputNode(op, "X@GRAD", pool2d_grad, ngb_node_map); + } else { + PADDLE_THROW("Support max and avg pooling only"); + } +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 8eab3a6f891f1dfa91c5ce316f1419df2cd42248..32365d6a9602fa8ad2c01b59c8cd361d52ed973f 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -330,6 +330,7 @@ class Reshape2GradOp : public framework::OperatorWithKernel { } // namespace operators } // namespace paddle namespace ops = paddle::operators; +namespace plat = paddle::platform; REGISTER_OPERATOR(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, paddle::framework::DefaultGradOpDescMaker); @@ -356,16 +357,20 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, #ifdef PADDLE_WITH_CUDA REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int, ops::ReshapeKernel, - int64_t, ops::ReshapeKernel); + int64_t, ops::ReshapeKernel, plat::float16, + ops::ReshapeKernel); REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel, double, ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel, plat::float16, ops::ReshapeGradKernel); REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int, ops::ReshapeKernel, - int64_t, ops::ReshapeKernel); + int64_t, ops::ReshapeKernel, plat::float16, + ops::ReshapeKernel); REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, double, ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel, plat::float16, ops::ReshapeGradKernel); #endif diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 789e61b2d332b9391ef45a8ebe58ad0f1a4d2bf0..94995fc99612adb1164e60f1a51747f74eacfb73 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -54,6 +54,9 @@ class SliceOp : public framework::OperatorWithKernel { out_dims[axes[i]] = end - start; } ctx->SetOutputDim("Out", out_dims); + if (axes[0] != 0) { + ctx->ShareLoD("Input", /*->*/ "Out"); + } } protected: diff --git a/paddle/fluid/operators/stack_op.cu b/paddle/fluid/operators/stack_op.cu index bf2a9e5b3d22996e688621727cb280dc9aed7859..24d0b2f906a8e0b360c3f477c9290ebe5d57a3ff 100644 --- a/paddle/fluid/operators/stack_op.cu +++ b/paddle/fluid/operators/stack_op.cu @@ -17,13 +17,16 @@ namespace plat = paddle::platform; namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(stack, ops::StackKernel, - ops::StackKernel, - ops::StackKernel, - ops::StackKernel); +REGISTER_OP_CUDA_KERNEL( + stack, ops::StackKernel, + ops::StackKernel, + ops::StackKernel, + ops::StackKernel, + ops::StackKernel); -REGISTER_OP_CUDA_KERNEL(stack_grad, - ops::StackGradKernel, - ops::StackGradKernel, - ops::StackGradKernel, - ops::StackGradKernel); +REGISTER_OP_CUDA_KERNEL( + stack_grad, ops::StackGradKernel, + ops::StackGradKernel, + ops::StackGradKernel, + ops::StackGradKernel, + ops::StackGradKernel); diff --git a/paddle/fluid/operators/transpose_op.cu.cc b/paddle/fluid/operators/transpose_op.cu.cc index b4025350fa9f3610bde43eee91cd059f3063813f..915774e5f3624f26dbd1451a99d7bf0bf75a72c8 100644 --- a/paddle/fluid/operators/transpose_op.cu.cc +++ b/paddle/fluid/operators/transpose_op.cu.cc @@ -15,19 +15,27 @@ limitations under the License. */ #include "paddle/fluid/operators/transpose_op.h" namespace ops = paddle::operators; +namespace plat = paddle::platform; + REGISTER_OP_CUDA_KERNEL( transpose, ops::TransposeKernel, - ops::TransposeKernel); + ops::TransposeKernel, + ops::TransposeKernel); REGISTER_OP_CUDA_KERNEL( transpose_grad, ops::TransposeGradKernel, - ops::TransposeGradKernel); + ops::TransposeGradKernel, + ops::TransposeGradKernel); REGISTER_OP_CUDA_KERNEL( transpose2, ops::TransposeKernel, - ops::TransposeKernel); + ops::TransposeKernel, + ops::TransposeKernel); REGISTER_OP_CUDA_KERNEL( transpose2_grad, ops::TransposeGradKernel, - ops::TransposeGradKernel); + ops::TransposeGradKernel, + ops::TransposeGradKernel); diff --git a/paddle/fluid/operators/yolov3_loss_op.h b/paddle/fluid/operators/yolov3_loss_op.h deleted file mode 100644 index 0bb285722ddedf721d98237760ec9868e2134442..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/yolov3_loss_op.h +++ /dev/null @@ -1,483 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once -#include -#include -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; -template -using EigenTensor = framework::EigenTensor; -template -using EigenVector = framework::EigenVector; - -using Array5 = Eigen::DSizes; - -template -static inline bool isZero(T x) { - return fabs(x) < 1e-6; -} - -template -static inline T sigmoid(T x) { - return 1.0 / (exp(-1.0 * x) + 1.0); -} - -template -static inline T CalcMaskPointNum(const Tensor& mask) { - auto mask_t = EigenVector::Flatten(mask); - T count = 0.0; - for (int i = 0; i < mask_t.dimensions()[0]; i++) { - if (mask_t(i)) { - count += 1.0; - } - } - return count; -} - -template -static inline T CalcMSEWithMask(const Tensor& x, const Tensor& y, - const Tensor& mask) { - auto x_t = EigenVector::Flatten(x); - auto y_t = EigenVector::Flatten(y); - auto mask_t = EigenVector::Flatten(mask); - - T error_sum = 0.0; - T points = 0.0; - for (int i = 0; i < x_t.dimensions()[0]; i++) { - if (mask_t(i)) { - error_sum += pow(x_t(i) - y_t(i), 2); - points += 1; - } - } - return (error_sum / points); -} - -template -static void CalcMSEGradWithMask(Tensor* grad, const Tensor& x, const Tensor& y, - const Tensor& mask, T mf) { - auto grad_t = EigenVector::Flatten(*grad).setConstant(0.0); - auto x_t = EigenVector::Flatten(x); - auto y_t = EigenVector::Flatten(y); - auto mask_t = EigenVector::Flatten(mask); - - for (int i = 0; i < x_t.dimensions()[0]; i++) { - if (mask_t(i)) { - grad_t(i) = 2.0 * (x_t(i) - y_t(i)) / mf; - } - } -} - -template -static inline T CalcBCEWithMask(const Tensor& x, const Tensor& y, - const Tensor& mask) { - auto x_t = EigenVector::Flatten(x); - auto y_t = EigenVector::Flatten(y); - auto mask_t = EigenVector::Flatten(mask); - - T error_sum = 0.0; - T points = 0.0; - for (int i = 0; i < x_t.dimensions()[0]; i++) { - if (mask_t(i)) { - error_sum += - -1.0 * (y_t(i) * log(x_t(i)) + (1.0 - y_t(i)) * log(1.0 - x_t(i))); - points += 1; - } - } - return (error_sum / points); -} - -template -static inline void CalcBCEGradWithMask(Tensor* grad, const Tensor& x, - const Tensor& y, const Tensor& mask, - T mf) { - auto grad_t = EigenVector::Flatten(*grad).setConstant(0.0); - auto x_t = EigenVector::Flatten(x); - auto y_t = EigenVector::Flatten(y); - auto mask_t = EigenVector::Flatten(mask); - - for (int i = 0; i < x_t.dimensions()[0]; i++) { - if (mask_t(i)) { - grad_t(i) = ((1.0 - y_t(i)) / (1.0 - x_t(i)) - y_t(i) / x_t(i)) / mf; - } - } -} - -template -static void CalcPredResult(const Tensor& input, Tensor* pred_conf, - Tensor* pred_class, Tensor* pred_x, Tensor* pred_y, - Tensor* pred_w, Tensor* pred_h, const int anchor_num, - const int class_num) { - const int n = input.dims()[0]; - const int h = input.dims()[2]; - const int w = input.dims()[3]; - const int box_attr_num = 5 + class_num; - - auto input_t = EigenTensor::From(input); - auto pred_conf_t = EigenTensor::From(*pred_conf); - auto pred_class_t = EigenTensor::From(*pred_class); - auto pred_x_t = EigenTensor::From(*pred_x); - auto pred_y_t = EigenTensor::From(*pred_y); - auto pred_w_t = EigenTensor::From(*pred_w); - auto pred_h_t = EigenTensor::From(*pred_h); - - for (int i = 0; i < n; i++) { - for (int an_idx = 0; an_idx < anchor_num; an_idx++) { - for (int j = 0; j < h; j++) { - for (int k = 0; k < w; k++) { - pred_x_t(i, an_idx, j, k) = - sigmoid(input_t(i, box_attr_num * an_idx, j, k)); - pred_y_t(i, an_idx, j, k) = - sigmoid(input_t(i, box_attr_num * an_idx + 1, j, k)); - pred_w_t(i, an_idx, j, k) = - input_t(i, box_attr_num * an_idx + 2, j, k); - pred_h_t(i, an_idx, j, k) = - input_t(i, box_attr_num * an_idx + 3, j, k); - - pred_conf_t(i, an_idx, j, k) = - sigmoid(input_t(i, box_attr_num * an_idx + 4, j, k)); - - for (int c = 0; c < class_num; c++) { - pred_class_t(i, an_idx, j, k, c) = - sigmoid(input_t(i, box_attr_num * an_idx + 5 + c, j, k)); - } - } - } - } - } -} - -template -static T CalcBoxIoU(std::vector box1, std::vector box2) { - T b1_x1 = box1[0] - box1[2] / 2; - T b1_x2 = box1[0] + box1[2] / 2; - T b1_y1 = box1[1] - box1[3] / 2; - T b1_y2 = box1[1] + box1[3] / 2; - T b2_x1 = box2[0] - box2[2] / 2; - T b2_x2 = box2[0] + box2[2] / 2; - T b2_y1 = box2[1] - box2[3] / 2; - T b2_y2 = box2[1] + box2[3] / 2; - - T b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1); - T b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1); - - T inter_rect_x1 = std::max(b1_x1, b2_x1); - T inter_rect_y1 = std::max(b1_y1, b2_y1); - T inter_rect_x2 = std::min(b1_x2, b2_x2); - T inter_rect_y2 = std::min(b1_y2, b2_y2); - T inter_area = std::max(inter_rect_x2 - inter_rect_x1, static_cast(0.0)) * - std::max(inter_rect_y2 - inter_rect_y1, static_cast(0.0)); - - return inter_area / (b1_area + b2_area - inter_area); -} - -template -static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, - const float ignore_thresh, std::vector anchors, - const int grid_size, Tensor* obj_mask, - Tensor* noobj_mask, Tensor* tx, Tensor* ty, - Tensor* tw, Tensor* th, Tensor* tconf, - Tensor* tclass) { - const int n = gt_box.dims()[0]; - const int b = gt_box.dims()[1]; - const int anchor_num = anchors.size() / 2; - auto gt_box_t = EigenTensor::From(gt_box); - auto gt_label_t = EigenTensor::From(gt_label); - auto obj_mask_t = EigenTensor::From(*obj_mask).setConstant(0); - auto noobj_mask_t = EigenTensor::From(*noobj_mask).setConstant(1); - auto tx_t = EigenTensor::From(*tx).setConstant(0.0); - auto ty_t = EigenTensor::From(*ty).setConstant(0.0); - auto tw_t = EigenTensor::From(*tw).setConstant(0.0); - auto th_t = EigenTensor::From(*th).setConstant(0.0); - auto tconf_t = EigenTensor::From(*tconf).setConstant(0.0); - auto tclass_t = EigenTensor::From(*tclass).setConstant(0.0); - - for (int i = 0; i < n; i++) { - for (int j = 0; j < b; j++) { - if (isZero(gt_box_t(i, j, 0)) && isZero(gt_box_t(i, j, 1)) && - isZero(gt_box_t(i, j, 2)) && isZero(gt_box_t(i, j, 3))) { - continue; - } - - int cur_label = gt_label_t(i, j); - T gx = gt_box_t(i, j, 0) * grid_size; - T gy = gt_box_t(i, j, 1) * grid_size; - T gw = gt_box_t(i, j, 2) * grid_size; - T gh = gt_box_t(i, j, 3) * grid_size; - int gi = static_cast(gx); - int gj = static_cast(gy); - - T max_iou = static_cast(0); - T iou; - int best_an_index = -1; - std::vector gt_box_shape({0, 0, gw, gh}); - for (int an_idx = 0; an_idx < anchor_num; an_idx++) { - std::vector anchor_shape({0, 0, static_cast(anchors[2 * an_idx]), - static_cast(anchors[2 * an_idx + 1])}); - iou = CalcBoxIoU(gt_box_shape, anchor_shape); - if (iou > max_iou) { - max_iou = iou; - best_an_index = an_idx; - } - if (iou > ignore_thresh) { - noobj_mask_t(i, an_idx, gj, gi) = 0; - } - } - obj_mask_t(i, best_an_index, gj, gi) = 1; - noobj_mask_t(i, best_an_index, gj, gi) = 0; - tx_t(i, best_an_index, gj, gi) = gx - gi; - ty_t(i, best_an_index, gj, gi) = gy - gj; - tw_t(i, best_an_index, gj, gi) = log(gw / anchors[2 * best_an_index]); - th_t(i, best_an_index, gj, gi) = log(gh / anchors[2 * best_an_index + 1]); - tclass_t(i, best_an_index, gj, gi, cur_label) = 1; - tconf_t(i, best_an_index, gj, gi) = 1; - } - } -} - -static void ExpandObjMaskByClassNum(Tensor* obj_mask_expand, - const Tensor& obj_mask) { - const int n = obj_mask_expand->dims()[0]; - const int an_num = obj_mask_expand->dims()[1]; - const int h = obj_mask_expand->dims()[2]; - const int w = obj_mask_expand->dims()[3]; - const int class_num = obj_mask_expand->dims()[4]; - auto obj_mask_expand_t = EigenTensor::From(*obj_mask_expand); - auto obj_mask_t = EigenTensor::From(obj_mask); - - obj_mask_expand_t = obj_mask_t.reshape(Array5(n, an_num, h, w, 1)) - .broadcast(Array5(1, 1, 1, 1, class_num)); -} - -template -static void AddAllGradToInputGrad( - Tensor* grad, T loss, const Tensor& pred_x, const Tensor& pred_y, - const Tensor& pred_conf, const Tensor& pred_class, const Tensor& grad_x, - const Tensor& grad_y, const Tensor& grad_w, const Tensor& grad_h, - const Tensor& grad_conf_target, const Tensor& grad_conf_notarget, - const Tensor& grad_class, const int class_num, const float loss_weight_xy, - const float loss_weight_wh, const float loss_weight_conf_target, - const float loss_weight_conf_notarget, const float loss_weight_class) { - const int n = pred_x.dims()[0]; - const int an_num = pred_x.dims()[1]; - const int h = pred_x.dims()[2]; - const int w = pred_x.dims()[3]; - const int attr_num = class_num + 5; - auto grad_t = EigenTensor::From(*grad).setConstant(0.0); - auto pred_x_t = EigenTensor::From(pred_x); - auto pred_y_t = EigenTensor::From(pred_y); - auto pred_conf_t = EigenTensor::From(pred_conf); - auto pred_class_t = EigenTensor::From(pred_class); - auto grad_x_t = EigenTensor::From(grad_x); - auto grad_y_t = EigenTensor::From(grad_y); - auto grad_w_t = EigenTensor::From(grad_w); - auto grad_h_t = EigenTensor::From(grad_h); - auto grad_conf_target_t = EigenTensor::From(grad_conf_target); - auto grad_conf_notarget_t = EigenTensor::From(grad_conf_notarget); - auto grad_class_t = EigenTensor::From(grad_class); - - for (int i = 0; i < n; i++) { - for (int j = 0; j < an_num; j++) { - for (int k = 0; k < h; k++) { - for (int l = 0; l < w; l++) { - grad_t(i, j * attr_num, k, l) = - grad_x_t(i, j, k, l) * pred_x_t(i, j, k, l) * - (1.0 - pred_x_t(i, j, k, l)) * loss * loss_weight_xy; - grad_t(i, j * attr_num + 1, k, l) = - grad_y_t(i, j, k, l) * pred_y_t(i, j, k, l) * - (1.0 - pred_y_t(i, j, k, l)) * loss * loss_weight_xy; - grad_t(i, j * attr_num + 2, k, l) = - grad_w_t(i, j, k, l) * loss * loss_weight_wh; - grad_t(i, j * attr_num + 3, k, l) = - grad_h_t(i, j, k, l) * loss * loss_weight_wh; - grad_t(i, j * attr_num + 4, k, l) = - grad_conf_target_t(i, j, k, l) * pred_conf_t(i, j, k, l) * - (1.0 - pred_conf_t(i, j, k, l)) * loss * loss_weight_conf_target; - grad_t(i, j * attr_num + 4, k, l) += - grad_conf_notarget_t(i, j, k, l) * pred_conf_t(i, j, k, l) * - (1.0 - pred_conf_t(i, j, k, l)) * loss * - loss_weight_conf_notarget; - - for (int c = 0; c < class_num; c++) { - grad_t(i, j * attr_num + 5 + c, k, l) = - grad_class_t(i, j, k, l, c) * pred_class_t(i, j, k, l, c) * - (1.0 - pred_class_t(i, j, k, l, c)) * loss * loss_weight_class; - } - } - } - } - } -} - -template -class Yolov3LossKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* gt_box = ctx.Input("GTBox"); - auto* gt_label = ctx.Input("GTLabel"); - auto* loss = ctx.Output("Loss"); - auto anchors = ctx.Attr>("anchors"); - int class_num = ctx.Attr("class_num"); - float ignore_thresh = ctx.Attr("ignore_thresh"); - float loss_weight_xy = ctx.Attr("loss_weight_xy"); - float loss_weight_wh = ctx.Attr("loss_weight_wh"); - float loss_weight_conf_target = ctx.Attr("loss_weight_conf_target"); - float loss_weight_conf_notarget = - ctx.Attr("loss_weight_conf_notarget"); - float loss_weight_class = ctx.Attr("loss_weight_class"); - - const int n = input->dims()[0]; - const int h = input->dims()[2]; - const int w = input->dims()[3]; - const int an_num = anchors.size() / 2; - - Tensor pred_x, pred_y, pred_w, pred_h; - Tensor pred_conf, pred_class; - pred_x.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_y.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_w.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_h.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_conf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_class.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); - CalcPredResult(*input, &pred_conf, &pred_class, &pred_x, &pred_y, - &pred_w, &pred_h, an_num, class_num); - - Tensor obj_mask, noobj_mask; - Tensor tx, ty, tw, th, tconf, tclass; - obj_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - noobj_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - tx.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - ty.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - tw.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - th.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - tconf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - tclass.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); - PreProcessGTBox(*gt_box, *gt_label, ignore_thresh, anchors, h, &obj_mask, - &noobj_mask, &tx, &ty, &tw, &th, &tconf, &tclass); - - Tensor obj_mask_expand; - obj_mask_expand.mutable_data({n, an_num, h, w, class_num}, - ctx.GetPlace()); - ExpandObjMaskByClassNum(&obj_mask_expand, obj_mask); - - T loss_x = CalcMSEWithMask(pred_x, tx, obj_mask); - T loss_y = CalcMSEWithMask(pred_y, ty, obj_mask); - T loss_w = CalcMSEWithMask(pred_w, tw, obj_mask); - T loss_h = CalcMSEWithMask(pred_h, th, obj_mask); - T loss_conf_target = CalcBCEWithMask(pred_conf, tconf, obj_mask); - T loss_conf_notarget = CalcBCEWithMask(pred_conf, tconf, noobj_mask); - T loss_class = CalcBCEWithMask(pred_class, tclass, obj_mask_expand); - - auto* loss_data = loss->mutable_data({1}, ctx.GetPlace()); - loss_data[0] = loss_weight_xy * (loss_x + loss_y) + - loss_weight_wh * (loss_w + loss_h) + - loss_weight_conf_target * loss_conf_target + - loss_weight_conf_notarget * loss_conf_notarget + - loss_weight_class * loss_class; - } -}; - -template -class Yolov3LossGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* gt_box = ctx.Input("GTBox"); - auto* gt_label = ctx.Input("GTLabel"); - auto anchors = ctx.Attr>("anchors"); - int class_num = ctx.Attr("class_num"); - float ignore_thresh = ctx.Attr("ignore_thresh"); - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* output_grad = ctx.Input(framework::GradVarName("Loss")); - const T loss = output_grad->data()[0]; - float loss_weight_xy = ctx.Attr("loss_weight_xy"); - float loss_weight_wh = ctx.Attr("loss_weight_wh"); - float loss_weight_conf_target = ctx.Attr("loss_weight_conf_target"); - float loss_weight_conf_notarget = - ctx.Attr("loss_weight_conf_notarget"); - float loss_weight_class = ctx.Attr("loss_weight_class"); - - const int n = input->dims()[0]; - const int c = input->dims()[1]; - const int h = input->dims()[2]; - const int w = input->dims()[3]; - const int an_num = anchors.size() / 2; - - Tensor pred_x, pred_y, pred_w, pred_h; - Tensor pred_conf, pred_class; - pred_x.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_y.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_w.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_h.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_conf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_class.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); - CalcPredResult(*input, &pred_conf, &pred_class, &pred_x, &pred_y, - &pred_w, &pred_h, an_num, class_num); - - Tensor obj_mask, noobj_mask; - Tensor tx, ty, tw, th, tconf, tclass; - obj_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - noobj_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - tx.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - ty.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - tw.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - th.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - tconf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - tclass.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); - PreProcessGTBox(*gt_box, *gt_label, ignore_thresh, anchors, h, &obj_mask, - &noobj_mask, &tx, &ty, &tw, &th, &tconf, &tclass); - - Tensor obj_mask_expand; - obj_mask_expand.mutable_data({n, an_num, h, w, class_num}, - ctx.GetPlace()); - ExpandObjMaskByClassNum(&obj_mask_expand, obj_mask); - - Tensor grad_x, grad_y, grad_w, grad_h; - Tensor grad_conf_target, grad_conf_notarget, grad_class; - grad_x.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_y.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_w.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_h.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_conf_target.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_conf_notarget.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_class.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); - T obj_mf = CalcMaskPointNum(obj_mask); - T noobj_mf = CalcMaskPointNum(noobj_mask); - T obj_expand_mf = CalcMaskPointNum(obj_mask_expand); - CalcMSEGradWithMask(&grad_x, pred_x, tx, obj_mask, obj_mf); - CalcMSEGradWithMask(&grad_y, pred_y, ty, obj_mask, obj_mf); - CalcMSEGradWithMask(&grad_w, pred_w, tw, obj_mask, obj_mf); - CalcMSEGradWithMask(&grad_h, pred_h, th, obj_mask, obj_mf); - CalcBCEGradWithMask(&grad_conf_target, pred_conf, tconf, obj_mask, - obj_mf); - CalcBCEGradWithMask(&grad_conf_notarget, pred_conf, tconf, noobj_mask, - noobj_mf); - CalcBCEGradWithMask(&grad_class, pred_class, tclass, obj_mask_expand, - obj_expand_mf); - - input_grad->mutable_data({n, c, h, w}, ctx.GetPlace()); - AddAllGradToInputGrad( - input_grad, loss, pred_x, pred_y, pred_conf, pred_class, grad_x, grad_y, - grad_w, grad_h, grad_conf_target, grad_conf_notarget, grad_class, - class_num, loss_weight_xy, loss_weight_wh, loss_weight_conf_target, - loss_weight_conf_notarget, loss_weight_class); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/python/paddle/fluid/contrib/__init__.py b/python/paddle/fluid/contrib/__init__.py index 6127ca8a3eacd013dd258a02b9f3cc792b634137..870c57e54011361caae5265201d19f58830a87bc 100644 --- a/python/paddle/fluid/contrib/__init__.py +++ b/python/paddle/fluid/contrib/__init__.py @@ -22,6 +22,8 @@ from . import op_frequence from .op_frequence import * from . import quantize from .quantize import * +from . import int8_inference +from .int8_inference import * from . import reader from .reader import * from . import slim @@ -34,6 +36,7 @@ __all__ += decoder.__all__ __all__ += memory_usage_calc.__all__ __all__ += op_frequence.__all__ __all__ += quantize.__all__ +__all__ += int8_inference.__all__ __all__ += reader.__all__ __all__ += slim.__all__ __all__ += utils.__all__ diff --git a/python/paddle/fluid/contrib/int8_inference/__init__.py b/python/paddle/fluid/contrib/int8_inference/__init__.py index eca2dce114b069bf9b455d77ce670d73b5047fd2..45547201d598c809f7dcf3a1a09103ae5de3e4c6 100644 --- a/python/paddle/fluid/contrib/int8_inference/__init__.py +++ b/python/paddle/fluid/contrib/int8_inference/__init__.py @@ -11,3 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from __future__ import print_function + +from . import utility +from .utility import * + +__all__ = utility.__all__ diff --git a/python/paddle/fluid/contrib/int8_inference/utility.py b/python/paddle/fluid/contrib/int8_inference/utility.py index 40de038f28a83738e6e6cd8c77c0a9916ce68b4f..b35d9f2424ccf093f70e75b13e23f6c5ad59e859 100644 --- a/python/paddle/fluid/contrib/int8_inference/utility.py +++ b/python/paddle/fluid/contrib/int8_inference/utility.py @@ -11,11 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid.core as core + +from paddle.fluid import core import numpy as np import math import os -import paddle.fluid as fluid +from paddle.fluid.executor import global_scope +from paddle.fluid import io + +__all__ = ['Calibrator'] class Calibrator(object): @@ -76,8 +80,7 @@ class Calibrator(object): ''' for i in self.sampling_program.list_vars(): if i.name in self.sampling_vars: - np_data = np.array(fluid.global_scope().find_var(i.name) - .get_tensor()) + np_data = np.array(global_scope().find_var(i.name).get_tensor()) if i.name not in self._sampling_data: self._sampling_data[i.name] = [] self._sampling_data[i.name].append(np_data) @@ -86,9 +89,9 @@ class Calibrator(object): ''' Save the quantized model to the disk. ''' - fluid.io.save_inference_model(self.output, self.feed_var_names, - self.fetch_list, self.exe, - self.sampling_program) + io.save_inference_model(self.output, self.feed_var_names, + self.fetch_list, self.exe, + self.sampling_program) def __display_debug(self): if self.debug: diff --git a/python/paddle/fluid/contrib/tests/test_calibration.py b/python/paddle/fluid/contrib/tests/test_calibration.py index f07fefe7e097377a845193bb37b6e9aa42708948..424ea245a0f2dff0d437ace386f2e4e0fa6b517d 100644 --- a/python/paddle/fluid/contrib/tests/test_calibration.py +++ b/python/paddle/fluid/contrib/tests/test_calibration.py @@ -19,15 +19,12 @@ import sys import random import paddle import paddle.fluid as fluid -import argparse import functools import contextlib -import paddle.fluid.profiler as profiler from paddle.dataset.common import download from PIL import Image, ImageEnhance import math -sys.path.append('..') -import int8_inference.utility as int8_utility +import paddle.fluid.contrib.int8_inference.utility as int8_utility random.seed(0) np.random.seed(0) @@ -43,7 +40,7 @@ img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) -# TODO(guomingz): Remove duplicated code from line 45 ~ line 114 +# TODO(guomingz): Remove duplicated code from resize_short, crop_image, process_image, _reader_creator def resize_short(img, target_size): percent = float(target_size) / min(img.size[0], img.size[1]) resized_width = int(round(img.size[0] * percent)) @@ -123,16 +120,37 @@ class TestCalibrationForResnet50(unittest.TestCase): self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' + self.int8_download) - data_url = 'http://paddle-inference-dist.cdn.bcebos.com/int8/calibration_test_data.tar.gz' - data_md5 = '1b6c1c434172cca1bf9ba1e4d7a3157d' - self.data_cache_folder = self.download_data(data_url, data_md5, "data") + data_urls = [] + data_md5s = [] + self.data_cache_folder = '' + if os.environ.get('DATASET') == 'full': + data_urls.append( + 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa' + ) + data_md5s.append('60f6525b0e1d127f345641d75d41f0a8') + data_urls.append( + 'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab' + ) + data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5') + self.data_cache_folder = self.download_data(data_urls, data_md5s, + "full_data", False) + else: + data_urls.append( + 'http://paddle-inference-dist.cdn.bcebos.com/int8/calibration_test_data.tar.gz' + ) + data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d') + self.data_cache_folder = self.download_data(data_urls, data_md5s, + "small_data", False) # reader/decorator.py requires the relative path to the data folder cmd = 'rm -rf {0} && ln -s {1} {0}'.format("data", self.data_cache_folder) os.system(cmd) - self.iterations = 50 + self.batch_size = 1 + self.sample_iterations = 50 + self.infer_iterations = 50000 if os.environ.get( + 'DATASET') == 'full' else 50 def cache_unzipping(self, target_folder, zip_path): if not os.path.exists(target_folder): @@ -140,20 +158,44 @@ class TestCalibrationForResnet50(unittest.TestCase): zip_path) os.system(cmd) - def download_data(self, data_url, data_md5, folder_name): - download(data_url, self.int8_download, data_md5) + def download_data(self, data_urls, data_md5s, folder_name, is_model=True): data_cache_folder = os.path.join(self.cache_folder, folder_name) - file_name = data_url.split('/')[-1] - zip_path = os.path.join(self.cache_folder, file_name) + zip_path = '' + if os.environ.get('DATASET') == 'full': + file_names = [] + for i in range(0, len(data_urls)): + download(data_urls[i], self.int8_download, data_md5s[i]) + file_names.append(data_urls[i].split('/')[-1]) + + zip_path = os.path.join(self.cache_folder, + 'full_imagenet_val.tar.gz') + if not os.path.exists(zip_path): + cat_command = 'cat' + for file_name in file_names: + cat_command += ' ' + os.path.join(self.cache_folder, + file_name) + cat_command += ' > ' + zip_path + os.system(cat_command) + + if os.environ.get('DATASET') != 'full' or is_model: + download(data_urls[0], self.int8_download, data_md5s[0]) + file_name = data_urls[0].split('/')[-1] + zip_path = os.path.join(self.cache_folder, file_name) + + print('Data is downloaded at {0}').format(zip_path) self.cache_unzipping(data_cache_folder, zip_path) return data_cache_folder - def download_resnet50_model(self): + def download_model(self): # resnet50 fp32 data - data_url = 'http://paddle-inference-dist.cdn.bcebos.com/int8/resnet50_int8_model.tar.gz' - data_md5 = '4a5194524823d9b76da6e738e1367881' - self.model_cache_folder = self.download_data(data_url, data_md5, + data_urls = [ + 'http://paddle-inference-dist.cdn.bcebos.com/int8/resnet50_int8_model.tar.gz' + ] + data_md5s = ['4a5194524823d9b76da6e738e1367881'] + self.model_cache_folder = self.download_data(data_urls, data_md5s, "resnet50_fp32") + self.model = "ResNet-50" + self.algo = "direct" def run_program(self, model_path, generate_int8=False, algo='direct'): image_shape = [3, 224, 224] @@ -169,17 +211,17 @@ class TestCalibrationForResnet50(unittest.TestCase): t = fluid.transpiler.InferenceTranspiler() t.transpile(infer_program, fluid.CPUPlace()) - val_reader = paddle.batch(val(), batch_size=1) + val_reader = paddle.batch(val(), self.batch_size) + iterations = self.infer_iterations if generate_int8: int8_model = os.path.join(os.getcwd(), "calibration_out") + iterations = self.sample_iterations if os.path.exists(int8_model): os.system("rm -rf " + int8_model) os.system("mkdir " + int8_model) - print("Start calibration ...") - calibrator = int8_utility.Calibrator( program=infer_program, pretrained_model=model_path, @@ -191,6 +233,7 @@ class TestCalibrationForResnet50(unittest.TestCase): test_info = [] cnt = 0 + periods = [] for batch_id, data in enumerate(val_reader()): image = np.array( [x[0].reshape(image_shape) for x in data]).astype("float32") @@ -202,21 +245,28 @@ class TestCalibrationForResnet50(unittest.TestCase): if op.has_attr("use_mkldnn"): op._set_attr("use_mkldnn", True) + t1 = time.time() _, acc1, _ = exe.run( running_program, feed={feed_dict[0]: image, feed_dict[1]: label}, fetch_list=fetch_targets) + t2 = time.time() + period = t2 - t1 + periods.append(period) + if generate_int8: calibrator.sample_data() test_info.append(np.mean(acc1) * len(data)) cnt += len(data) - if batch_id != self.iterations - 1: - continue + if (batch_id + 1) % 100 == 0: + print("{0} images,".format(batch_id + 1)) + sys.stdout.flush() - break + if (batch_id + 1) == iterations: + break if generate_int8: calibrator.save_int8_model() @@ -225,32 +275,49 @@ class TestCalibrationForResnet50(unittest.TestCase): "Calibration is done and the corresponding files are generated at {}". format(os.path.abspath("calibration_out"))) else: - return np.sum(test_info) / cnt + throughput = cnt / np.sum(periods) + latency = np.average(periods) + acc1 = np.sum(test_info) / cnt + return (throughput, latency, acc1) def test_calibration(self): - self.download_resnet50_model() - fp32_acc1 = self.run_program(self.model_cache_folder + "/model") - self.run_program(self.model_cache_folder + "/model", True) - int8_acc1 = self.run_program("calibration_out") + self.download_model() + print("Start FP32 inference for {0} on {1} images ...").format( + self.model, self.infer_iterations) + (fp32_throughput, fp32_latency, + fp32_acc1) = self.run_program(self.model_cache_folder + "/model") + print("Start INT8 calibration for {0} on {1} images ...").format( + self.model, self.sample_iterations) + self.run_program( + self.model_cache_folder + "/model", True, algo=self.algo) + print("Start INT8 inference for {0} on {1} images ...").format( + self.model, self.infer_iterations) + (int8_throughput, int8_latency, + int8_acc1) = self.run_program("calibration_out") delta_value = np.abs(fp32_acc1 - int8_acc1) self.assertLess(delta_value, 0.01) + print( + "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}". + format(self.model, self.batch_size, fp32_throughput, fp32_latency, + fp32_acc1)) + print( + "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}". + format(self.model, self.batch_size, int8_throughput, int8_latency, + int8_acc1)) + sys.stdout.flush() class TestCalibrationForMobilenetv1(TestCalibrationForResnet50): - def download_mobilenetv1_model(self): + def download_model(self): # mobilenetv1 fp32 data - data_url = 'http://paddle-inference-dist.cdn.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' - data_md5 = '13892b0716d26443a8cdea15b3c6438b' - self.model_cache_folder = self.download_data(data_url, data_md5, + data_urls = [ + 'http://paddle-inference-dist.cdn.bcebos.com/int8/mobilenetv1_int8_model.tar.gz' + ] + data_md5s = ['13892b0716d26443a8cdea15b3c6438b'] + self.model_cache_folder = self.download_data(data_urls, data_md5s, "mobilenetv1_fp32") - - def test_calibration(self): - self.download_mobilenetv1_model() - fp32_acc1 = self.run_program(self.model_cache_folder + "/model") - self.run_program(self.model_cache_folder + "/model", True, algo='KL') - int8_acc1 = self.run_program("calibration_out") - delta_value = np.abs(fp32_acc1 - int8_acc1) - self.assertLess(delta_value, 0.01) + self.model = "MobileNet-V1" + self.algo = "KL" if __name__ == '__main__': diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 4f434328e47df4363b304ff55f587018d3157c5e..5be21ff7f7270f6ce950c069f61418c922bcedc5 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -366,17 +366,40 @@ class TruncatedNormalInitializer(Initializer): # Initialization Ops should be prepended and not appended if self._seed == 0: self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype == VarDesc.VarType.FP16: + out_dtype = VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate(".".join( + ['truncated_gaussian_random', 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) + else: + out_dtype = var.dtype + out_var = var + op = block._prepend_op( type="truncated_gaussian_random", - outputs={"Out": var}, + outputs={"Out": out_var}, attrs={ "shape": var.shape, - "dtype": int(var.dtype), + "dtype": out_dtype, "mean": self._mean, "std": self._std_dev, "seed": self._seed }, stop_gradient=True) + + if var.dtype == VarDesc.VarType.FP16: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, + "out_dtype": var.dtype}) var.op = op return op diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 63d8bd4dc7c9e51968036c357fe1cdfc9bfff60c..c983e2a44b25c5943df5e822e2e363b2557a6ac3 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -347,19 +347,107 @@ def box_coder(prior_box, target_box, code_type="encode_center_size", box_normalized=True, - name=None): + name=None, + axis=0): """ - ${comment} + **Box Coder Layer** + + Encode/Decode the target bounding box with the priorbox information. + + The Encoding schema described below: + + .. math:: + + ox = (tx - px) / pw / pxv + + oy = (ty - py) / ph / pyv + + ow = \log(\abs(tw / pw)) / pwv + + oh = \log(\abs(th / ph)) / phv + + The Decoding schema described below: + + .. math:: + + ox = (pw * pxv * tx * + px) - tw / 2 + + oy = (ph * pyv * ty * + py) - th / 2 + + ow = \exp(pwv * tw) * pw + tw / 2 + + oh = \exp(phv * th) * ph + th / 2 + + where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, + width and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote + the priorbox's (anchor) center coordinates, width and height. `pxv`, + `pyv`, `pwv`, `phv` denote the variance of the priorbox and `ox`, `oy`, + `ow`, `oh` denote the encoded/decoded coordinates, width and height. + + During Box Decoding, two modes for broadcast are supported. Say target + box has shape [N, M, 4], and the shape of prior box can be [N, 4] or + [M, 4]. Then prior box will broadcast to target box along the + assigned axis. Args: - prior_box(${prior_box_type}): ${prior_box_comment} - prior_box_var(${prior_box_var_type}): ${prior_box_var_comment} - target_box(${target_box_type}): ${target_box_comment} - code_type(${code_type_type}): ${code_type_comment} - box_normalized(${box_normalized_type}): ${box_normalized_comment} + prior_box(Variable): Box list prior_box is a 2-D Tensor with shape + [M, 4] holds M boxes, each box is represented as + [xmin, ymin, xmax, ymax], [xmin, ymin] is the + left top coordinate of the anchor box, if the + input is image feature map, they are close to + the origin of the coordinate system. [xmax, ymax] + is the right bottom coordinate of the anchor box. + prior_box_var(Variable|list): prior_box_var supports two types of input. + One is variable with shape [M, 4] holds M group. + The other one is list consist of 4 elements + shared by all boxes. + target_box(Variable): This input can be a 2-D LoDTensor with shape + [N, 4] when code_type is 'encode_center_size'. + This input also can be a 3-D Tensor with shape + [N, M, 4] when code_type is 'decode_center_size'. + Each box is represented as + [xmin, ymin, xmax, ymax]. This tensor can + contain LoD information to represent a batch + of inputs. + code_type(string): The code type used with the target box. It can be + encode_center_size or decode_center_size + box_normalized(int): Whether treat the priorbox as a noramlized box. + Set true by default. + name(string): The name of box coder. + axis(int): Which axis in PriorBox to broadcast for box decode, + for example, if axis is 0 and TargetBox has shape + [N, M, 4] and PriorBox has shape [M, 4], then PriorBox + will broadcast to [N, M, 4] for decoding. It is only valid + when code type is decode_center_size. Set 0 by default. Returns: - output_box(${output_box_type}): ${output_box_comment} + output_box(Variable): When code_type is 'encode_center_size', the + output tensor of box_coder_op with shape + [N, M, 4] representing the result of N target + boxes encoded with M Prior boxes and variances. + When code_type is 'decode_center_size', + N represents the batch size and M represents + the number of deocded boxes. + + Examples: + + .. code-block:: python + + prior_box = fluid.layers.data(name='prior_box', + shape=[512, 4], + dtype='float32', + append_batch_size=False) + target_box = fluid.layers.data(name='target_box', + shape=[512,81,4], + dtype='float32', + append_batch_size=False) + output = fluid.layers.box_coder(prior_box=prior_box, + prior_box_var=[0.1,0.1,0.2,0.2], + target_box=target_box, + code_type="decode_center_size", + box_normalized=False, + axis=1) + """ helper = LayerHelper("box_coder", **locals()) @@ -370,15 +458,22 @@ def box_coder(prior_box, output_box = helper.create_variable( name=name, dtype=prior_box.dtype, persistable=False) + inputs = {"PriorBox": prior_box, "TargetBox": target_box} + attrs = { + "code_type": code_type, + "box_normalized": box_normalized, + "axis": axis + } + if isinstance(prior_box_var, Variable): + inputs['PriorBoxVar'] = prior_box_var + elif isinstance(prior_box_var, list): + attrs['variance'] = prior_box_var + else: + raise TypeError("Input variance of box_coder must be Variable or lisz") helper.append_op( type="box_coder", - inputs={ - "PriorBox": prior_box, - "PriorBoxVar": prior_box_var, - "TargetBox": target_box - }, - attrs={"code_type": code_type, - "box_normalized": box_normalized}, + inputs=inputs, + attrs=attrs, outputs={"OutputBox": output_box}) return output_box @@ -414,13 +509,10 @@ def yolov3_loss(x, gtbox, gtlabel, anchors, + anchor_mask, class_num, ignore_thresh, - loss_weight_xy=None, - loss_weight_wh=None, - loss_weight_conf_target=None, - loss_weight_conf_notarget=None, - loss_weight_class=None, + downsample_ratio, name=None): """ ${comment} @@ -432,16 +524,13 @@ def yolov3_loss(x, and x, y, w, h should be relative value of input image. N is the batch number and B is the max box number in an image. - gtlabel (Variable): class id of ground truth boxes, shoud be ins shape + gtlabel (Variable): class id of ground truth boxes, shoud be in shape of [N, B]. anchors (list|tuple): ${anchors_comment} + anchor_mask (list|tuple): ${anchor_mask_comment} class_num (int): ${class_num_comment} ignore_thresh (float): ${ignore_thresh_comment} - loss_weight_xy (float|None): ${loss_weight_xy_comment} - loss_weight_wh (float|None): ${loss_weight_wh_comment} - loss_weight_conf_target (float|None): ${loss_weight_conf_target_comment} - loss_weight_conf_notarget (float|None): ${loss_weight_conf_notarget_comment} - loss_weight_class (float|None): ${loss_weight_class_comment} + downsample_ratio (int): ${downsample_ratio_comment} name (string): the name of yolov3 loss Returns: @@ -461,9 +550,10 @@ def yolov3_loss(x, x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32') gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32') - anchors = [10, 13, 16, 30, 33, 23] - loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, class_num=80 - anchors=anchors, ignore_thresh=0.5) + anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] + anchors = [0, 1, 2] + loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, class_num=80, anchors=anchors, + ignore_thresh=0.5, downsample_ratio=32) """ helper = LayerHelper('yolov3_loss', **locals()) @@ -475,6 +565,8 @@ def yolov3_loss(x, raise TypeError("Input gtlabel of yolov3_loss must be Variable") if not isinstance(anchors, list) and not isinstance(anchors, tuple): raise TypeError("Attr anchors of yolov3_loss must be list or tuple") + if not isinstance(anchor_mask, list) and not isinstance(anchor_mask, tuple): + raise TypeError("Attr anchor_mask of yolov3_loss must be list or tuple") if not isinstance(class_num, int): raise TypeError("Attr class_num of yolov3_loss must be an integer") if not isinstance(ignore_thresh, float): @@ -487,31 +579,29 @@ def yolov3_loss(x, loss = helper.create_variable( name=name, dtype=x.dtype, persistable=False) + objectness_mask = helper.create_variable_for_type_inference(dtype='int32') + gt_match_mask = helper.create_variable_for_type_inference(dtype='int32') + attrs = { "anchors": anchors, + "anchor_mask": anchor_mask, "class_num": class_num, "ignore_thresh": ignore_thresh, + "downsample_ratio": downsample_ratio, } - if loss_weight_xy is not None and isinstance(loss_weight_xy, float): - self.attrs['loss_weight_xy'] = loss_weight_xy - if loss_weight_wh is not None and isinstance(loss_weight_wh, float): - self.attrs['loss_weight_wh'] = loss_weight_wh - if loss_weight_conf_target is not None and isinstance( - loss_weight_conf_target, float): - self.attrs['loss_weight_conf_target'] = loss_weight_conf_target - if loss_weight_conf_notarget is not None and isinstance( - loss_weight_conf_notarget, float): - self.attrs['loss_weight_conf_notarget'] = loss_weight_conf_notarget - if loss_weight_class is not None and isinstance(loss_weight_class, float): - self.attrs['loss_weight_class'] = loss_weight_class - helper.append_op( type='yolov3_loss', - inputs={"X": x, - "GTBox": gtbox, - "GTLabel": gtlabel}, - outputs={'Loss': loss}, + inputs={ + "X": x, + "GTBox": gtbox, + "GTLabel": gtlabel, + }, + outputs={ + 'Loss': loss, + 'ObjectnessMask': objectness_mask, + 'GTMatchMask': gt_match_mask + }, attrs=attrs) return loss diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 0dbcf442a3b098f60ec803152f23f4e65970d948..0e4b5aadc0b0d7e87ea1cfb8e18339fe211e1eef 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -3877,7 +3877,8 @@ def beam_search(pre_ids, end_id, level=0, is_accumulated=True, - name=None): + name=None, + return_parent_idx=False): """ Beam search is a classical algorithm for selecting candidate words in a machine translation task. @@ -3933,10 +3934,16 @@ def beam_search(pre_ids, accumulated scores. name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. + return_parent_idx(bool): Whether to return an extra Tensor variable + preserving the selected_ids' parent indice in pre_ids + in output, which can be used to gather cell states at + the next time step. Returns: - Variable: The LodTensor pair containing the selected ids and the \ - corresponding scores. + Variable: The LodTensor tuple containing the selected ids and the \ + corresponding scores. If :attr:`return_parent_idx` is :attr:`True`, \ + an extra Tensor variable preserving the selected_ids' parent indice \ + is included. Examples: .. code-block:: python @@ -3969,6 +3976,11 @@ def beam_search(pre_ids, selected_scores = helper.create_variable_for_type_inference( dtype=score_type) selected_ids = helper.create_variable_for_type_inference(dtype=id_type) + # parent_idx is a tensor used to gather cell states at the next time + # step. Though lod in selected_ids can also be used to gather by + # sequence_expand, it is not efficient. + # gather_op's index input only supports int32 dtype currently + parent_idx = helper.create_variable_for_type_inference(dtype="int32") helper.append_op( type='beam_search', @@ -3976,6 +3988,7 @@ def beam_search(pre_ids, outputs={ 'selected_ids': selected_ids, 'selected_scores': selected_scores, + 'parent_idx': parent_idx }, attrs={ # TODO(ChunweiYan) to assure other value support @@ -3984,8 +3997,10 @@ def beam_search(pre_ids, 'end_id': end_id, 'is_accumulated': is_accumulated, }) - - return selected_ids, selected_scores + if return_parent_idx: + return selected_ids, selected_scores, parent_idx + else: + return selected_ids, selected_scores def beam_search_decode(ids, scores, beam_size, end_id, name=None): diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 6c18af7283e19bd431c8d543255d900dc89cba09..3dcf9dc06998be9c38a48f18075cbf99f3dccb1a 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -135,7 +135,7 @@ def thresholded_relu(x, threshold=None): if val is not None: kwargs[name] = val - _thresholded_relu_(**kwargs) + return _thresholded_relu_(**kwargs) thresholded_relu.__doc__ = _thresholded_relu_.__doc__ + """ diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 5e21dda9679eec3cffafe7d448ebb71cd5e06f57..0d39a139eed87f900b1f59fd0569b6acaec0962b 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -50,6 +50,19 @@ class TestDetection(unittest.TestCase): self.assertEqual(out.shape[-1], 6) print(str(program)) + def test_box_coder_api(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[4], dtype='float32') + y = layers.data(name='z', shape=[4], dtype='float32', lod_level=1) + bcoder = layers.box_coder( + prior_box=x, + prior_box_var=[0.1, 0.2, 0.1, 0.2], + target_box=y, + code_type='encode_center_size') + self.assertIsNotNone(bcoder) + print(str(program)) + def test_detection_api(self): program = Program() with program_guard(program): @@ -463,8 +476,8 @@ class TestYoloDetection(unittest.TestCase): x = layers.data(name='x', shape=[30, 7, 7], dtype='float32') gtbox = layers.data(name='gtbox', shape=[10, 4], dtype='float32') gtlabel = layers.data(name='gtlabel', shape=[10], dtype='int32') - loss = layers.yolov3_loss(x, gtbox, gtlabel, [10, 13, 30, 13], 10, - 0.5) + loss = layers.yolov3_loss(x, gtbox, gtlabel, [10, 13, 30, 13], + [0, 1], 10, 0.7, 32) self.assertIsNotNone(loss) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 7e693c6a41f71f11fd702e2cfc26aa4a21cd2de7..699181d01da862dca72113e6c11630ae5693e41c 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1,15 +1,6 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -# The MKLDNN tests are skiped when the MKLDNN flag is OFF -if(NOT WITH_MKLDNN) - foreach(src ${TEST_OPS}) - if(${src} MATCHES ".*_mkldnn_op$") - list(REMOVE_ITEM TEST_OPS ${src}) - endif() - endforeach() -endif(NOT WITH_MKLDNN) - if(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_OPS test_recv_op) list(REMOVE_ITEM TEST_OPS test_dist_transpiler) @@ -123,3 +114,7 @@ endif() if (WITH_NGRAPH) add_subdirectory(ngraph) endif() + +if (WITH_MKLDNN) + add_subdirectory(mkldnn) +endif() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/CMakeLists.txt b/python/paddle/fluid/tests/unittests/mkldnn/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f71e04c09aa38b8cf7b3a167b84d4dc0e6cc3ec7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/CMakeLists.txt @@ -0,0 +1,6 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +foreach(TEST_OP ${TEST_OPS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) +endforeach(TEST_OP) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/__init__.py b/python/paddle/fluid/tests/unittests/mkldnn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b94a21a7e406b833797f8f521c62a2351c2bc30a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle/fluid/tests/unittests/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py similarity index 94% rename from python/paddle/fluid/tests/unittests/test_activation_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py index 611d0dd076b827b0f528f2e3a31182cc4939d1f1..ad94a4b21c347c9a2782437948c20d3b3071c679 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py @@ -17,9 +17,9 @@ from __future__ import print_function import unittest import numpy as np import paddle.fluid.core as core -from op_test import OpTest +from paddle.fluid.tests.unittests.op_test import OpTest from scipy.special import expit -from test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs +from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs class TestMKLDNNReluDim2(TestRelu): diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py similarity index 92% rename from python/paddle/fluid/tests/unittests/test_batch_norm_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py index 1286cee8dc1855c1b1695da46ae0b5222c065114..5fce90372d9beda9b04ab68d0a8ac5ef5c124421 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_batch_norm_mkldnn_op.py @@ -19,9 +19,9 @@ import numpy as np import paddle.fluid.core as core from paddle.fluid.op import Operator import paddle.fluid as fluid -from op_test import OpTest +from paddle.fluid.tests.unittests.op_test import OpTest from paddle.fluid.framework import grad_var_name -from test_batch_norm_op import TestBatchNormOpInference, TestBatchNormOpTraining, _reference_training, _reference_grad +from paddle.fluid.tests.unittests.test_batch_norm_op import TestBatchNormOpInference, TestBatchNormOpTraining, _reference_training, _reference_grad class TestMKLDNNBatchNormOpTraining(TestBatchNormOpTraining): diff --git a/python/paddle/fluid/tests/unittests/test_concat_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py similarity index 94% rename from python/paddle/fluid/tests/unittests/test_concat_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py index 0f2130f9049c7ee294444282e59c654551f76603..1a399740692eab8ccea0c984a1a4f2ac984eb045 100644 --- a/python/paddle/fluid/tests/unittests/test_concat_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_mkldnn_op.py @@ -15,7 +15,7 @@ from __future__ import print_function import unittest -from test_concat_op import TestConcatOp, TestConcatOp2, TestConcatOp3 +from paddle.fluid.tests.unittests.test_concat_op import TestConcatOp, TestConcatOp2, TestConcatOp3 class TestMKLDNNConcatOp(TestConcatOp): diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py similarity index 98% rename from python/paddle/fluid/tests/unittests/test_conv2d_int8_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py index 5ad376cb08e488e85be6369a91d4e81031e9e9db..100a03cea0f740a615c4a08810d4ad9e8c974d7a 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py @@ -18,8 +18,8 @@ import unittest import numpy as np import paddle.fluid.core as core -from op_test import OpTest -from test_conv2d_op import conv2d_forward_naive, TestConv2dOp +from paddle.fluid.tests.unittests.op_test import OpTest +from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, TestConv2dOp def conv2d_forward_refer(input, filter, group, conv_param): diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py similarity index 91% rename from python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py index 438d45b84033b697c3210acc44392b93bf436df0..0542eef80070cbf281ee013c28b7092a2dd17eaa 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest -from test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1 +from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1 class TestMKLDNN(TestConv2dOp): diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py similarity index 94% rename from python/paddle/fluid/tests/unittests/test_conv2d_transpose_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py index deefdd09abe6b9f9ca362654f21850f598337245..9bcdb7b2a975b648471714ab628caf91b6b6f3a9 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_transpose_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest -from test_conv2d_transpose_op import TestConv2dTransposeOp, TestWithPad, TestWithStride +from paddle.fluid.tests.unittests.test_conv2d_transpose_op import TestConv2dTransposeOp, TestWithPad, TestWithStride class TestMKLDNN(TestConv2dTransposeOp): diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv3d_mkldnn_op.py similarity index 91% rename from python/paddle/fluid/tests/unittests/test_conv3d_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_conv3d_mkldnn_op.py index f0e1265e142b800587599783367eca2203033bf1..080b74502fbe83e97e88a65866e0d9b66b37033e 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv3d_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest -from test_conv3d_op import TestConv3dOp, TestCase1, TestWithGroup1, TestWithGroup2, TestWith1x1, TestWithInput1x1Filter1x1 +from paddle.fluid.tests.unittests.test_conv3d_op import TestConv3dOp, TestCase1, TestWithGroup1, TestWithGroup2, TestWith1x1, TestWithInput1x1Filter1x1 class TestMKLDNN(TestConv3dOp): diff --git a/python/paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py similarity index 97% rename from python/paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py index 0c5e1abd7c8fb010357998c0ceaebaf21619fda9..9a54f927cbde648bbbb06d043bbc1391ee43c314 100644 --- a/python/paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import numpy as np -from op_test import OpTest +from paddle.fluid.tests.unittests.op_test import OpTest class TestDeQuantizeOp(OpTest): diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py similarity index 97% rename from python/paddle/fluid/tests/unittests/test_elementwise_add_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py index d85cc1f856df8eaa73cef318b48a292042488edf..c3a42656b71d09dbc22abf8ce2ddc243b43b422f 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py @@ -16,8 +16,8 @@ from __future__ import print_function import unittest import numpy as np import paddle.fluid.core as core -from op_test import OpTest -from test_elementwise_add_op import * +from paddle.fluid.tests.unittests.op_test import OpTest +from paddle.fluid.tests.unittests.test_elementwise_add_op import * ''' Some tests differ from the tests defined in test_elementwise_add_op.py because MKLDNN does not support tensors of number of dimensions 3. diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py similarity index 98% rename from python/paddle/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py index 536e9a1c58ec4a8b1b5a7c1d3a5fe737b38d24ab..738715dd70181988028adff1c50be3a52199c312 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mul_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py @@ -15,10 +15,10 @@ from __future__ import print_function import unittest import numpy as np -from op_test import OpTest +from paddle.fluid.tests.unittests.op_test import OpTest import paddle.fluid.core as core from paddle.fluid.op import Operator -from test_elementwise_mul_op import * +from paddle.fluid.tests.unittests.test_elementwise_mul_op import * class TestElementwiseMulMKLDNNOp_BroadcastNCHW16c(ElementwiseMulOp): diff --git a/python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py similarity index 98% rename from python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py index 45951a34d6f61a242cb2dc004d6801a6c1c9dd92..84229a5cffbb466ef3c69cd997adacfb21f6aae2 100644 --- a/python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_fc_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import numpy as np -from op_test import OpTest +from paddle.fluid.tests.unittests.op_test import OpTest def fully_connected_naive(input, weights, bias_data=None): diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_gaussian_random_mkldnn_op.py similarity index 90% rename from python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_gaussian_random_mkldnn_op.py index 9777ec390656d3f6166bf9f5de7bbad8b6bd786d..c18bd77bd3e6de08283f3ac3a31c73453f3c9129 100644 --- a/python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_gaussian_random_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest -from test_gaussian_random_op import TestGaussianRandomOp +from paddle.fluid.tests.unittests.test_gaussian_random_op import TestGaussianRandomOp class TestMKLDNN(TestGaussianRandomOp): diff --git a/python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py similarity index 96% rename from python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py index f6bb2ab7a696c40cb61dd5b38ca702b577fe7ea2..a5e6e116a5f1bc1e051ce3cfdac8cd1e5f3ed90e 100644 --- a/python/paddle/fluid/tests/unittests/test_lrn_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_lrn_mkldnn_op.py @@ -15,7 +15,7 @@ from __future__ import print_function import unittest -from test_lrn_op import TestLRNOp +from paddle.fluid.tests.unittests.test_lrn_op import TestLRNOp class TestLRNMKLDNNOp(TestLRNOp): diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py similarity index 94% rename from python/paddle/fluid/tests/unittests/test_pool2d_int8_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py index f4495d0bc8198189962d033ec18b8b67f1f47c84..fca906fecc5fe8d25b9251c886398f8df778043f 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py @@ -19,8 +19,8 @@ import unittest import numpy as np import paddle.fluid.core as core -from op_test import OpTest -from test_pool2d_op import TestPool2D_Op, avg_pool2D_forward_naive, max_pool2D_forward_naive +from paddle.fluid.tests.unittests.op_test import OpTest +from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, avg_pool2D_forward_naive, max_pool2D_forward_naive class TestPool2dMKLDNNInt8_Op(TestPool2D_Op): diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py similarity index 90% rename from python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py index 7de5fefc148021d4109da2ac9f4b36c93a05a23f..6de43dd46e5d184ec934f2d85e0c87137e9702e0 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py @@ -15,7 +15,7 @@ from __future__ import print_function import unittest -from test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 +from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 def create_test_mkldnn_class(parent): diff --git a/python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py similarity index 97% rename from python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py index 99607928648be437b7f944f86a0c28b99d1775c4..132f7bd039f7797fb0fc332d6f7b8c242af46535 100644 --- a/python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import numpy as np -from op_test import OpTest +from paddle.fluid.tests.unittests.op_test import OpTest class TestQuantizeOp(OpTest): diff --git a/python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py similarity index 92% rename from python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py index 55820f31b81df9f3618d1004f6d21565564efa29..5928047b5171bcf33b024040ce79577b8aa0b53a 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_sum_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest -from test_sum_op import TestSumOp +from paddle.fluid.tests.unittests.test_sum_op import TestSumOp class TestMKLDNN(TestSumOp): diff --git a/python/paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py similarity index 95% rename from python/paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py index 0c201b9e4f48df94924a248d820ae2cf73367560..4845eefe367f1ad6a2eb6ffd1f9b0598b1b4fbbd 100644 --- a/python/paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_transpose_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest -from test_transpose_op import TestTransposeOp +from paddle.fluid.tests.unittests.test_transpose_op import TestTransposeOp class TestTransposeMKLDNN(TestTransposeOp): diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..95e592e8ec036ad231ed57ddbc706683cb7aa153 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py @@ -0,0 +1,51 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +from paddle.fluid.tests.unittests.test_pool2d_op import * + + +class TestNGRAPHPool2D_Op(TestPool2D_Op): + def init_test_case(self): + super(TestNGRAPHPool2D_Op, self).init_test_case() + + +class TestNGRAPHCase1(TestCase1): + def init_test_case(self): + super(TestNGRAPHCase1, self).init_test_case() + + +class TestNGRAPHCase2(TestCase2): + def init_test_case(self): + super(TestNGRAPHCase2, self).init_test_case() + + +class TestNGRAPHCase3(TestCase3): + def init_pool_type(self): + super(TestNGRAPHCase3, self).init_pool_type() + + +class TestNGRAPHCase4(TestCase4): + def init_pool_type(self): + super(TestNGRAPHCase4, self).init_pool_type() + + +class TestNGRAPHCase5(TestCase5): + def init_pool_type(self): + super(TestNGRAPHCase5, self).init_pool_type() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_op.py index c28dda4b53ce5d394ff11222e5df8d257b4e80da..1d9f4b78f30fefa21c189036c3731e0afe39ea9e 100644 --- a/python/paddle/fluid/tests/unittests/test_beam_search_op.py +++ b/python/paddle/fluid/tests/unittests/test_beam_search_op.py @@ -38,6 +38,7 @@ class BeamSearchOpTester(unittest.TestCase): self._create_pre_ids() self.scope.var('selected_ids') self.scope.var('selected_scores') + self.scope.var('parent_idx') def test_run(self): op = Operator( @@ -48,12 +49,14 @@ class BeamSearchOpTester(unittest.TestCase): scores='scores', selected_ids='selected_ids', selected_scores='selected_scores', + parent_idx='parent_idx', level=0, beam_size=2, end_id=0, ) op.run(self.scope, core.CPUPlace()) selected_ids = self.scope.find_var("selected_ids").get_tensor() selected_scores = self.scope.find_var("selected_scores").get_tensor() + parent_idx = self.scope.find_var("parent_idx").get_tensor() self.assertTrue( np.allclose( np.array(selected_ids), np.array([4, 2, 3, 8])[:, np.newaxis])) @@ -62,6 +65,8 @@ class BeamSearchOpTester(unittest.TestCase): np.array(selected_scores), np.array([0.5, 0.6, 0.9, 0.7])[:, np.newaxis])) self.assertEqual(selected_ids.lod(), [[0, 2, 4], [0, 1, 2, 3, 4]]) + self.assertTrue( + np.allclose(np.array(parent_idx), np.array([0, 1, 2, 3]))) def _create_pre_ids(self): np_data = np.array([[1, 2, 3, 4]], dtype='int64') diff --git a/python/paddle/fluid/tests/unittests/test_box_coder_op.py b/python/paddle/fluid/tests/unittests/test_box_coder_op.py index 2511c5c22e012babdeb71a71d3546456ea2ceaf3..6156268bf25ada310a3d22242ecff4b9cdf1759a 100644 --- a/python/paddle/fluid/tests/unittests/test_box_coder_op.py +++ b/python/paddle/fluid/tests/unittests/test_box_coder_op.py @@ -21,80 +21,80 @@ import math from op_test import OpTest -def box_coder(target_box, prior_box, prior_box_var, output_box, code_type, - box_normalized): - prior_box_x = ( - (prior_box[:, 2] + prior_box[:, 0]) / 2).reshape(1, prior_box.shape[0]) - prior_box_y = ( - (prior_box[:, 3] + prior_box[:, 1]) / 2).reshape(1, prior_box.shape[0]) - prior_box_width = ( - (prior_box[:, 2] - prior_box[:, 0])).reshape(1, prior_box.shape[0]) - prior_box_height = ( - (prior_box[:, 3] - prior_box[:, 1])).reshape(1, prior_box.shape[0]) - prior_box_var = prior_box_var.reshape(1, prior_box_var.shape[0], - prior_box_var.shape[1]) - if not box_normalized: - prior_box_height = prior_box_height + 1 - prior_box_width = prior_box_width + 1 - - if (code_type == "EncodeCenterSize"): - target_box_x = ((target_box[:, 2] + target_box[:, 0]) / 2).reshape( - target_box.shape[0], 1) - target_box_y = ((target_box[:, 3] + target_box[:, 1]) / 2).reshape( - target_box.shape[0], 1) - target_box_width = ((target_box[:, 2] - target_box[:, 0])).reshape( - target_box.shape[0], 1) - target_box_height = ((target_box[:, 3] - target_box[:, 1])).reshape( - target_box.shape[0], 1) - if not box_normalized: - target_box_height = target_box_height + 1 - target_box_width = target_box_width + 1 - - output_box[:,:,0] = (target_box_x - prior_box_x) / prior_box_width / \ - prior_box_var[:,:,0] - output_box[:,:,1] = (target_box_y - prior_box_y) / prior_box_height / \ - prior_box_var[:,:,1] - output_box[:,:,2] = np.log(np.fabs(target_box_width / prior_box_width)) / \ - prior_box_var[:,:,2] - output_box[:,:,3] = np.log(np.fabs(target_box_height / prior_box_height)) / \ - prior_box_var[:,:,3] - - elif (code_type == "DecodeCenterSize"): - target_box_x = prior_box_var[:,:,0] * target_box[:,:,0] * \ - prior_box_width + prior_box_x - target_box_y = prior_box_var[:,:,1] * target_box[:,:,1] * \ - prior_box_height + prior_box_y - target_box_width = np.exp(prior_box_var[:,:,2] * target_box[:,:,2]) * \ - prior_box_width - target_box_height = np.exp(prior_box_var[:,:,3] * target_box[:,:,3]) * \ - prior_box_height - - output_box[:, :, 0] = target_box_x - target_box_width / 2 - output_box[:, :, 1] = target_box_y - target_box_height / 2 - output_box[:, :, 2] = target_box_x + target_box_width / 2 - output_box[:, :, 3] = target_box_y + target_box_height / 2 - if not box_normalized: - output_box[:, :, 2] = output_box[:, :, 2] - 1 - output_box[:, :, 3] = output_box[:, :, 3] - 1 - - -def batch_box_coder(prior_box, prior_box_var, target_box, lod, code_type, - box_normalized): - n = target_box.shape[0] - m = prior_box.shape[0] +def box_decoder(t_box, p_box, pb_v, output_box, norm, axis=0): + pb_w = p_box[:, 2] - p_box[:, 0] + (norm == False) + pb_h = p_box[:, 3] - p_box[:, 1] + (norm == False) + pb_x = pb_w * 0.5 + p_box[:, 0] + pb_y = pb_h * 0.5 + p_box[:, 1] + shape = (1, p_box.shape[0]) if axis == 0 else (p_box.shape[0], 1) + + pb_w = pb_w.reshape(shape) + pb_h = pb_h.reshape(shape) + pb_x = pb_x.reshape(shape) + pb_y = pb_y.reshape(shape) + + if pb_v.ndim == 2: + pb_v = pb_v.reshape(1, pb_v.shape[0], pb_v.shape[1]) + if pb_v.ndim == 1: + tb_x = pb_v[0] * t_box[:, :, 0] * pb_w + pb_x + tb_y = pb_v[1] * t_box[:, :, 1] * pb_h + pb_y + tb_w = np.exp(pb_v[2] * t_box[:, :, 2]) * pb_w + tb_h = np.exp(pb_v[3] * t_box[:, :, 3]) * pb_h + else: + tb_x = pb_v[:, :, 0] * t_box[:, :, 0] * pb_w + pb_x + tb_y = pb_v[:, :, 1] * t_box[:, :, 1] * pb_h + pb_y + tb_w = np.exp(pb_v[:, :, 2] * t_box[:, :, 2]) * pb_w + tb_h = np.exp(pb_v[:, :, 3] * t_box[:, :, 3]) * pb_h + output_box[:, :, 0] = tb_x - tb_w / 2 + output_box[:, :, 1] = tb_y - tb_h / 2 + output_box[:, :, 2] = tb_x + tb_w / 2 - (not norm) + output_box[:, :, 3] = tb_y + tb_h / 2 - (not norm) + + +def box_encoder(t_box, p_box, pb_v, output_box, norm): + pb_w = p_box[:, 2] - p_box[:, 0] + (norm == False) + pb_h = p_box[:, 3] - p_box[:, 1] + (norm == False) + pb_x = pb_w * 0.5 + p_box[:, 0] + pb_y = pb_h * 0.5 + p_box[:, 1] + shape = (1, p_box.shape[0]) + + pb_w = pb_w.reshape(shape) + pb_h = pb_h.reshape(shape) + pb_x = pb_x.reshape(shape) + pb_y = pb_y.reshape(shape) + + if pb_v.ndim == 2: + pb_v = pb_v.reshape(1, pb_v.shape[0], pb_v.shape[1]) + tb_x = ((t_box[:, 2] + t_box[:, 0]) / 2).reshape(t_box.shape[0], 1) + tb_y = ((t_box[:, 3] + t_box[:, 1]) / 2).reshape(t_box.shape[0], 1) + tb_w = (t_box[:, 2] - t_box[:, 0]).reshape(t_box.shape[0], 1) + (not norm) + tb_h = (t_box[:, 3] - t_box[:, 1]).reshape(t_box.shape[0], 1) + (not norm) + if pb_v.ndim == 1: + output_box[:, :, 0] = (tb_x - pb_x) / pb_w / pb_v[0] + output_box[:, :, 1] = (tb_y - pb_y) / pb_h / pb_v[1] + output_box[:, :, 2] = np.log(np.fabs(tb_w / pb_w)) / pb_v[2] + output_box[:, :, 3] = np.log(np.fabs(tb_h / pb_h)) / pb_v[3] + else: + output_box[:, :, 0] = (tb_x - pb_x) / pb_w / pb_v[:, :, 0] + output_box[:, :, 1] = (tb_y - pb_y) / pb_h / pb_v[:, :, 1] + output_box[:, :, 2] = np.log(np.fabs(tb_w / pb_w)) / pb_v[:, :, 2] + output_box[:, :, 3] = np.log(np.fabs(tb_h / pb_h)) / pb_v[:, :, 3] + + +def batch_box_coder(p_box, pb_v, t_box, lod, code_type, norm, axis=0): + n = t_box.shape[0] + m = p_box.shape[0] + if code_type == "DecodeCenterSize": + m = t_box.shape[1] output_box = np.zeros((n, m, 4), dtype=np.float32) cur_offset = 0 for i in range(len(lod)): if (code_type == "EncodeCenterSize"): - box_coder(target_box[cur_offset:(cur_offset + lod[i]), :], - prior_box, prior_box_var, - output_box[cur_offset:(cur_offset + lod[i]), :, :], - code_type, box_normalized) + box_encoder(t_box[cur_offset:(cur_offset + lod[i]), :], p_box, pb_v, + output_box[cur_offset:(cur_offset + lod[i]), :, :], + norm) elif (code_type == "DecodeCenterSize"): - box_coder(target_box[cur_offset:(cur_offset + lod[i]), :, :], - prior_box, prior_box_var, - output_box[cur_offset:(cur_offset + lod[i]), :, :], - code_type, box_normalized) + box_decoder(t_box, p_box, pb_v, output_box, norm, axis) cur_offset += lod[i] return output_box @@ -106,9 +106,35 @@ class TestBoxCoderOp(OpTest): def setUp(self): self.op_type = "box_coder" lod = [[1, 1, 1, 1, 1]] - prior_box = np.random.random((10, 4)).astype('float32') - prior_box_var = np.random.random((10, 4)).astype('float32') - target_box = np.random.random((5, 10, 4)).astype('float32') + prior_box = np.random.random((81, 4)).astype('float32') + prior_box_var = np.random.random((81, 4)).astype('float32') + target_box = np.random.random((20, 81, 4)).astype('float32') + code_type = "DecodeCenterSize" + box_normalized = False + output_box = batch_box_coder(prior_box, prior_box_var, target_box, + lod[0], code_type, box_normalized) + self.inputs = { + 'PriorBox': prior_box, + 'PriorBoxVar': prior_box_var, + 'TargetBox': target_box, + } + self.attrs = { + 'code_type': 'decode_center_size', + 'box_normalized': False + } + self.outputs = {'OutputBox': output_box} + + +class TestBoxCoderOpWithOneRankVar(OpTest): + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "box_coder" + lod = [[1, 1, 1, 1, 1]] + prior_box = np.random.random((81, 4)).astype('float32') + prior_box_var = np.random.random((4)).astype('float32') + target_box = np.random.random((20, 81, 4)).astype('float32') code_type = "DecodeCenterSize" box_normalized = False output_box = batch_box_coder(prior_box, prior_box_var, target_box, @@ -133,9 +159,9 @@ class TestBoxCoderOpWithoutBoxVar(OpTest): def setUp(self): self.op_type = "box_coder" lod = [[0, 1, 2, 3, 4, 5]] - prior_box = np.random.random((10, 4)).astype('float32') - prior_box_var = np.ones((10, 4)).astype('float32') - target_box = np.random.random((5, 10, 4)).astype('float32') + prior_box = np.random.random((81, 4)).astype('float32') + prior_box_var = np.ones((81, 4)).astype('float32') + target_box = np.random.random((20, 81, 4)).astype('float32') code_type = "DecodeCenterSize" box_normalized = False output_box = batch_box_coder(prior_box, prior_box_var, target_box, @@ -158,10 +184,10 @@ class TestBoxCoderOpWithLoD(OpTest): def setUp(self): self.op_type = "box_coder" - lod = [[4, 8, 8]] - prior_box = np.random.random((10, 4)).astype('float32') - prior_box_var = np.random.random((10, 4)).astype('float32') - target_box = np.random.random((20, 4)).astype('float32') + lod = [[10, 20, 20]] + prior_box = np.random.random((20, 4)).astype('float32') + prior_box_var = np.random.random((20, 4)).astype('float32') + target_box = np.random.random((50, 4)).astype('float32') code_type = "EncodeCenterSize" box_normalized = True output_box = batch_box_coder(prior_box, prior_box_var, target_box, @@ -176,5 +202,63 @@ class TestBoxCoderOpWithLoD(OpTest): self.outputs = {'OutputBox': output_box} +class TestBoxCoderOpWithAxis(OpTest): + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "box_coder" + lod = [[1, 1, 1, 1, 1]] + prior_box = np.random.random((30, 4)).astype('float32') + prior_box_var = np.random.random((4)).astype('float32') + target_box = np.random.random((30, 81, 4)).astype('float32') + code_type = "DecodeCenterSize" + box_normalized = False + axis = 1 + output_box = batch_box_coder(prior_box, prior_box_var, target_box, + lod[0], code_type, box_normalized, axis) + + self.inputs = { + 'PriorBox': prior_box, + 'PriorBoxVar': prior_box_var, + 'TargetBox': target_box, + } + self.attrs = { + 'code_type': 'decode_center_size', + 'box_normalized': False, + 'axis': axis + } + self.outputs = {'OutputBox': output_box} + + +class TestBoxCoderOpWithVariance(OpTest): + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "box_coder" + lod = [[1, 1, 1, 1, 1]] + prior_box = np.random.random((30, 4)).astype('float32') + prior_box_var = np.random.random((4)).astype('float32') + target_box = np.random.random((30, 81, 4)).astype('float32') + code_type = "DecodeCenterSize" + box_normalized = False + axis = 1 + output_box = batch_box_coder(prior_box, prior_box_var, target_box, + lod[0], code_type, box_normalized, axis) + + self.inputs = { + 'PriorBox': prior_box, + 'TargetBox': target_box, + } + self.attrs = { + 'code_type': 'decode_center_size', + 'box_normalized': False, + 'variance': prior_box_var.astype(np.float).flatten(), + 'axis': axis + } + self.outputs = {'OutputBox': output_box} + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 5877e91f92e642e69265104c6728cd9bd41c41cd..afe990e74ff96dfbca4f335b561f9bbe7d295246 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -268,9 +268,6 @@ class TestImperativePtbRnn(unittest.TestCase): sgd.minimize(dy_loss) for param in ptb_model.parameters(): dy_param_updated[param.name] = param._numpy() - # print("dy_loss is {}".format(dy_loss._numpy())) - # print("last_hidden is {}".format(last_hidden._numpy())) - # print("last_cell is {}".format(last_cell._numpy())) with new_program_scope(): fluid.default_startup_program().random_seed = seed diff --git a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py index 544fe4b4f81909b69a05d9751316e3d3137fdc45..020c1139230a9177c4d7765367359d91839d7d46 100644 --- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py @@ -16,174 +16,179 @@ from __future__ import division import unittest import numpy as np +from scipy.special import logit +from scipy.special import expit from op_test import OpTest from paddle.fluid import core -def sigmoid(x): - return 1.0 / (1.0 + np.exp(-1.0 * x)) +def l2loss(x, y): + return 0.5 * (y - x) * (y - x) -def mse(x, y, num): - return ((y - x)**2).sum() / num +def sce(x, label): + sigmoid_x = expit(x) + term1 = label * np.log(sigmoid_x) + term2 = (1.0 - label) * np.log(1.0 - sigmoid_x) + return -term1 - term2 -def bce(x, y, mask): - x = x.reshape((-1)) - y = y.reshape((-1)) - mask = mask.reshape((-1)) +def sigmoid(x): + return 1.0 / (1.0 + np.exp(-1.0 * x)) - error_sum = 0.0 - count = 0 - for i in range(x.shape[0]): - if mask[i] > 0: - error_sum += y[i] * np.log(x[i]) + (1 - y[i]) * np.log(1 - x[i]) - count += 1 - return error_sum / (-1.0 * count) +def batch_xywh_box_iou(box1, box2): + b1_left = box1[:, :, 0] - box1[:, :, 2] / 2 + b1_right = box1[:, :, 0] + box1[:, :, 2] / 2 + b1_top = box1[:, :, 1] - box1[:, :, 3] / 2 + b1_bottom = box1[:, :, 1] + box1[:, :, 3] / 2 -def box_iou(box1, box2): - b1_x1 = box1[0] - box1[2] / 2 - b1_x2 = box1[0] + box1[2] / 2 - b1_y1 = box1[1] - box1[3] / 2 - b1_y2 = box1[1] + box1[3] / 2 - b2_x1 = box2[0] - box2[2] / 2 - b2_x2 = box2[0] + box2[2] / 2 - b2_y1 = box2[1] - box2[3] / 2 - b2_y2 = box2[1] + box2[3] / 2 + b2_left = box2[:, :, 0] - box2[:, :, 2] / 2 + b2_right = box2[:, :, 0] + box2[:, :, 2] / 2 + b2_top = box2[:, :, 1] - box2[:, :, 3] / 2 + b2_bottom = box2[:, :, 1] + box2[:, :, 3] / 2 - b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) - b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + left = np.maximum(b1_left[:, :, np.newaxis], b2_left[:, np.newaxis, :]) + right = np.minimum(b1_right[:, :, np.newaxis], b2_right[:, np.newaxis, :]) + top = np.maximum(b1_top[:, :, np.newaxis], b2_top[:, np.newaxis, :]) + bottom = np.minimum(b1_bottom[:, :, np.newaxis], + b2_bottom[:, np.newaxis, :]) - inter_rect_x1 = max(b1_x1, b2_x1) - inter_rect_y1 = max(b1_y1, b2_y1) - inter_rect_x2 = min(b1_x2, b2_x2) - inter_rect_y2 = min(b1_y2, b2_y2) - inter_area = max(inter_rect_x2 - inter_rect_x1, 0) * max( - inter_rect_y2 - inter_rect_y1, 0) + inter_w = np.clip(right - left, 0., 1.) + inter_h = np.clip(bottom - top, 0., 1.) + inter_area = inter_w * inter_h - return inter_area / (b1_area + b2_area + inter_area) + b1_area = (b1_right - b1_left) * (b1_bottom - b1_top) + b2_area = (b2_right - b2_left) * (b2_bottom - b2_top) + union = b1_area[:, :, np.newaxis] + b2_area[:, np.newaxis, :] - inter_area + return inter_area / union -def build_target(gtboxs, gtlabel, attrs, grid_size): - n, b, _ = gtboxs.shape - ignore_thresh = attrs["ignore_thresh"] - anchors = attrs["anchors"] - class_num = attrs["class_num"] - an_num = len(anchors) // 2 - obj_mask = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') - noobj_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32') - tx = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') - ty = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') - tw = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') - th = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') - tconf = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') - tcls = np.zeros( - (n, an_num, grid_size, grid_size, class_num)).astype('float32') +def YOLOv3Loss(x, gtbox, gtlabel, attrs): + n, c, h, w = x.shape + b = gtbox.shape[1] + anchors = attrs['anchors'] + an_num = len(anchors) // 2 + anchor_mask = attrs['anchor_mask'] + mask_num = len(anchor_mask) + class_num = attrs["class_num"] + ignore_thresh = attrs['ignore_thresh'] + downsample = attrs['downsample'] + input_size = downsample * h + x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2)) + loss = np.zeros((n)).astype('float32') + + pred_box = x[:, :, :, :, :4].copy() + grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1)) + grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w)) + pred_box[:, :, :, :, 0] = (grid_x + sigmoid(pred_box[:, :, :, :, 0])) / w + pred_box[:, :, :, :, 1] = (grid_y + sigmoid(pred_box[:, :, :, :, 1])) / h + + x[:, :, :, :, 5:] = np.where(x[:, :, :, :, 5:] < -0.5, x[:, :, :, :, 5:], + np.ones_like(x[:, :, :, :, 5:]) * 1.0 / + class_num) + + mask_anchors = [] + for m in anchor_mask: + mask_anchors.append((anchors[2 * m], anchors[2 * m + 1])) + anchors_s = np.array( + [(an_w / input_size, an_h / input_size) for an_w, an_h in mask_anchors]) + anchor_w = anchors_s[:, 0:1].reshape((1, mask_num, 1, 1)) + anchor_h = anchors_s[:, 1:2].reshape((1, mask_num, 1, 1)) + pred_box[:, :, :, :, 2] = np.exp(pred_box[:, :, :, :, 2]) * anchor_w + pred_box[:, :, :, :, 3] = np.exp(pred_box[:, :, :, :, 3]) * anchor_h + + pred_box = pred_box.reshape((n, -1, 4)) + pred_obj = x[:, :, :, :, 4].reshape((n, -1)) + objness = np.zeros(pred_box.shape[:2]).astype('float32') + ious = batch_xywh_box_iou(pred_box, gtbox) + ious_max = np.max(ious, axis=-1) + objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness), + objness) + + gtbox_shift = gtbox.copy() + gtbox_shift[:, :, 0] = 0 + gtbox_shift[:, :, 1] = 0 + + anchors = [(anchors[2 * i], anchors[2 * i + 1]) for i in range(0, an_num)] + anchors_s = np.array( + [(an_w / input_size, an_h / input_size) for an_w, an_h in anchors]) + anchor_boxes = np.concatenate( + [np.zeros_like(anchors_s), anchors_s], axis=-1) + anchor_boxes = np.tile(anchor_boxes[np.newaxis, :, :], (n, 1, 1)) + ious = batch_xywh_box_iou(gtbox_shift, anchor_boxes) + iou_matches = np.argmax(ious, axis=-1) + gt_matches = iou_matches.copy() for i in range(n): for j in range(b): - if gtboxs[i, j, :].sum() == 0: + if gtbox[i, j, 2:].sum() == 0: + gt_matches[i, j] = -1 continue + if iou_matches[i, j] not in anchor_mask: + gt_matches[i, j] = -1 + continue + an_idx = anchor_mask.index(iou_matches[i, j]) + gt_matches[i, j] = an_idx + gi = int(gtbox[i, j, 0] * w) + gj = int(gtbox[i, j, 1] * h) - gt_label = gtlabel[i, j] - gx = gtboxs[i, j, 0] * grid_size - gy = gtboxs[i, j, 1] * grid_size - gw = gtboxs[i, j, 2] * grid_size - gh = gtboxs[i, j, 3] * grid_size - - gi = int(gx) - gj = int(gy) - - gtbox = [0, 0, gw, gh] - max_iou = 0 - for k in range(an_num): - anchor_box = [0, 0, anchors[2 * k], anchors[2 * k + 1]] - iou = box_iou(gtbox, anchor_box) - if iou > max_iou: - max_iou = iou - best_an_index = k - if iou > ignore_thresh: - noobj_mask[i, best_an_index, gj, gi] = 0 - - obj_mask[i, best_an_index, gj, gi] = 1 - noobj_mask[i, best_an_index, gj, gi] = 0 - tx[i, best_an_index, gj, gi] = gx - gi - ty[i, best_an_index, gj, gi] = gy - gj - tw[i, best_an_index, gj, gi] = np.log(gw / anchors[2 * - best_an_index]) - th[i, best_an_index, gj, gi] = np.log( - gh / anchors[2 * best_an_index + 1]) - tconf[i, best_an_index, gj, gi] = 1 - tcls[i, best_an_index, gj, gi, gt_label] = 1 - - return (tx, ty, tw, th, tconf, tcls, obj_mask, noobj_mask) - - -def YoloV3Loss(x, gtbox, gtlabel, attrs): - n, c, h, w = x.shape - an_num = len(attrs['anchors']) // 2 - class_num = attrs["class_num"] - x = x.reshape((n, an_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2)) - pred_x = sigmoid(x[:, :, :, :, 0]) - pred_y = sigmoid(x[:, :, :, :, 1]) - pred_w = x[:, :, :, :, 2] - pred_h = x[:, :, :, :, 3] - pred_conf = sigmoid(x[:, :, :, :, 4]) - pred_cls = sigmoid(x[:, :, :, :, 5:]) - - tx, ty, tw, th, tconf, tcls, obj_mask, noobj_mask = build_target( - gtbox, gtlabel, attrs, x.shape[2]) - - obj_mask_expand = np.tile( - np.expand_dims(obj_mask, 4), (1, 1, 1, 1, int(attrs['class_num']))) - loss_x = mse(pred_x * obj_mask, tx * obj_mask, obj_mask.sum()) - loss_y = mse(pred_y * obj_mask, ty * obj_mask, obj_mask.sum()) - loss_w = mse(pred_w * obj_mask, tw * obj_mask, obj_mask.sum()) - loss_h = mse(pred_h * obj_mask, th * obj_mask, obj_mask.sum()) - loss_conf_target = bce(pred_conf * obj_mask, tconf * obj_mask, obj_mask) - loss_conf_notarget = bce(pred_conf * noobj_mask, tconf * noobj_mask, - noobj_mask) - loss_class = bce(pred_cls * obj_mask_expand, tcls * obj_mask_expand, - obj_mask_expand) - - return attrs['loss_weight_xy'] * (loss_x + loss_y) \ - + attrs['loss_weight_wh'] * (loss_w + loss_h) \ - + attrs['loss_weight_conf_target'] * loss_conf_target \ - + attrs['loss_weight_conf_notarget'] * loss_conf_notarget \ - + attrs['loss_weight_class'] * loss_class + tx = gtbox[i, j, 0] * w - gi + ty = gtbox[i, j, 1] * w - gj + tw = np.log(gtbox[i, j, 2] * input_size / mask_anchors[an_idx][0]) + th = np.log(gtbox[i, j, 3] * input_size / mask_anchors[an_idx][1]) + scale = (2.0 - gtbox[i, j, 2] * gtbox[i, j, 3]) + loss[i] += sce(x[i, an_idx, gj, gi, 0], tx) * scale + loss[i] += sce(x[i, an_idx, gj, gi, 1], ty) * scale + loss[i] += l2loss(x[i, an_idx, gj, gi, 2], tw) * scale + loss[i] += l2loss(x[i, an_idx, gj, gi, 3], th) * scale + + objness[i, an_idx * h * w + gj * w + gi] = 1.0 + + for label_idx in range(class_num): + loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], + float(label_idx == gtlabel[i, j])) + + for j in range(mask_num * h * w): + if objness[i, j] > 0: + loss[i] += sce(pred_obj[i, j], 1.0) + elif objness[i, j] == 0: + loss[i] += sce(pred_obj[i, j], 0.0) + + return (loss, objness.reshape((n, mask_num, h, w)).astype('float32'), \ + gt_matches.astype('int32')) class TestYolov3LossOp(OpTest): def setUp(self): - self.loss_weight_xy = 1.0 - self.loss_weight_wh = 1.0 - self.loss_weight_conf_target = 1.0 - self.loss_weight_conf_notarget = 1.0 - self.loss_weight_class = 1.0 self.initTestCase() self.op_type = 'yolov3_loss' - x = np.random.random(size=self.x_shape).astype('float32') + x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32')) gtbox = np.random.random(size=self.gtbox_shape).astype('float32') - gtlabel = np.random.randint(0, self.class_num, - self.gtbox_shape[:2]).astype('int32') + gtlabel = np.random.randint(0, self.class_num, self.gtbox_shape[:2]) + gtmask = np.random.randint(0, 2, self.gtbox_shape[:2]) + gtbox = gtbox * gtmask[:, :, np.newaxis] + gtlabel = gtlabel * gtmask self.attrs = { "anchors": self.anchors, + "anchor_mask": self.anchor_mask, "class_num": self.class_num, "ignore_thresh": self.ignore_thresh, - "loss_weight_xy": self.loss_weight_xy, - "loss_weight_wh": self.loss_weight_wh, - "loss_weight_conf_target": self.loss_weight_conf_target, - "loss_weight_conf_notarget": self.loss_weight_conf_notarget, - "loss_weight_class": self.loss_weight_class, + "downsample": self.downsample, } - self.inputs = {'X': x, 'GTBox': gtbox, 'GTLabel': gtlabel} + self.inputs = { + 'X': x, + 'GTBox': gtbox.astype('float32'), + 'GTLabel': gtlabel.astype('int32'), + } + loss, objness, gt_matches = YOLOv3Loss(x, gtbox, gtlabel, self.attrs) self.outputs = { - 'Loss': np.array( - [YoloV3Loss(x, gtbox, gtlabel, self.attrs)]).astype('float32') + 'Loss': loss, + 'ObjectnessMask': objness, + "GTMatchMask": gt_matches } def test_check_output(self): @@ -196,19 +201,16 @@ class TestYolov3LossOp(OpTest): place, ['X'], 'Loss', no_grad_set=set(["GTBox", "GTLabel"]), - max_relative_error=0.06) + max_relative_error=0.3) def initTestCase(self): - self.anchors = [10, 13, 12, 12] - self.class_num = 10 + self.anchors = [10, 13, 16, 30, 33, 23] + self.anchor_mask = [1, 2] + self.class_num = 5 self.ignore_thresh = 0.5 - self.x_shape = (5, len(self.anchors) // 2 * (5 + self.class_num), 7, 7) - self.gtbox_shape = (5, 10, 4) - self.loss_weight_xy = 2.5 - self.loss_weight_wh = 0.8 - self.loss_weight_conf_target = 1.5 - self.loss_weight_conf_notarget = 0.5 - self.loss_weight_class = 1.2 + self.downsample = 32 + self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5) + self.gtbox_shape = (3, 5, 4) if __name__ == "__main__": diff --git a/python/paddle/fluid/transpiler/details/__init__.py b/python/paddle/fluid/transpiler/details/__init__.py index f33c05ed2f48c2498b98fc486d6ff7471088d77e..82d0d336e523ec48c5ceca3b92ff0963c4499123 100644 --- a/python/paddle/fluid/transpiler/details/__init__.py +++ b/python/paddle/fluid/transpiler/details/__init__.py @@ -17,3 +17,4 @@ from __future__ import print_function from .program_utils import * from .ufind import * from .checkport import * +from .vars_distributed import * diff --git a/python/paddle/fluid/transpiler/details/vars_distributed.py b/python/paddle/fluid/transpiler/details/vars_distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..05e7f6e3e706376efc8af870a780d96c45642514 --- /dev/null +++ b/python/paddle/fluid/transpiler/details/vars_distributed.py @@ -0,0 +1,269 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function +from paddle.fluid.framework import Variable + + +class VarStruct(object): + """ + record part properties of a Variable in python. + """ + + def __init__(self, name, shape, dtype, type, lod_level, persistable): + self.name = name + self.shape = shape + self.dtype = dtype + self.type = type + self.lod_level = lod_level + self.persistable = persistable + + +class VarDistributed(object): + """ + a class to record the var distributed on parameter servers. + the class will record the relationship between origin var and slice var. + the slice var's properties, such as type/shape/offset/endpoint. + """ + + def __init__(self, + origin_var, + slice_var, + is_slice=None, + block_id=None, + offset=None, + vtype=None, + endpoint=None): + """ + Args: + origin_var(Variable|VarStruct): origin var properties + slice_var(Variable|VarStruct): slice var properties + is_slice(bool|None): slice or not, slice_var=True/False and its block size > 8192 are the judgement standard. + block_id(int|None): the number about the slice var. + offset(int|None): if the slice var is sliced, offset is the numel before the var. + vtype(str|None): a tag, such as Optimizer/Param/RemoteProfetch. + endpoint(str|None): which parameter the slice var on, such as "127.0.0.1:1001" + """ + + if isinstance(origin_var, Variable): + self.origin = self.__create_var_struct(origin_var) + else: + self.origin = origin_var + + if isinstance(slice_var, Variable): + self.slice = self.__create_var_struct(slice_var) + else: + self.slice = slice_var + + if self.equal(self.origin, self.slice): + self.is_slice = False + self.block_id = 0 + self.offset = 0 + else: + self.is_slice = True + self.block_id = 0 + self.offset = 0 + + if is_slice is not None: + self.is_slice = is_slice + if block_id is not None: + self.block_id = block_id + if offset is not None: + self.offset = offset + + self.vtype = vtype + self.endpoint = endpoint + + @staticmethod + def __create_var_struct(var): + return VarStruct(var.name, var.shape, var.dtype, var.type, + var.lod_level, var.persistable) + + @staticmethod + def equal(var1, var2): + """ + the two var is equal or not. + Returns: + bool: equal will return True else False + """ + assert isinstance(var1, VarStruct) and isinstance(var2, VarStruct) + + return var1.name == var2.name and \ + var1.type == var2.type and \ + var1.shape == var2.shape and \ + var1.dtype == var2.dtype and \ + var1.lod_level == var2.lod_level and \ + var1.persistable == var2.persistable + + def __str__(self): + origin_var_str = "{name} : fluid.{type}.shape{shape}.astype({dtype})". \ + format(i="{", e="}", name=self.origin.name, type=self.origin.type, + shape=self.origin.shape, dtype=self.origin.dtype) + + slice_var_str = "{name} : fluid.{type}.shape{shape}.astype({dtype})" \ + ".slice({is_slice}).block({block_id}).offset({offset})". \ + format(i="{", e="}", name=self.slice.name, type=self.slice.type, + shape=self.slice.shape, dtype=self.slice.dtype, + is_slice=self.is_slice, block_id=self.block_id, offset=self.offset) + + return "var owned: {}, origin var: ( {} ), slice var: ( {} ), endpoint: {} ".format( + self.vtype, origin_var_str, slice_var_str, self.endpoint) + + +class VarsDistributed(object): + """ + a gather about VarDistributed with many methods to find distributed vars. + through the class, we can get overview about the distributed parameters on parameter servers. + this class may centralized and convenient for developer to manage and get variable's distribute. + other module can also use this to find variables such io.py. + """ + + def __init__(self): + self.distributed_vars = [] + + def add_distributed_var(self, + origin_var, + slice_var, + is_slice=None, + block_id=None, + offset=None, + vtype=None, + endpoint=None): + """ + add distributed var in this. + + Args: + origin_var(Variable|VarStruct): origin var properties + slice_var(Variable|VarStruct): slice var properties + is_slice(bool|None): slice or not, slice_var=True/False and its block size > 8192 are the judgement standard. + block_id(int|None): the number about the slice var. + offset(int|None): if the slice var is sliced, offset is the numel before the var. + vtype(str|None): a tag, such as Optimizer/Param/RemoteProfetch. + endpoint(str|None): which parameter the slice var on, such as "127.0.0.1:1001" + Returns: + None + """ + self.distributed_vars.append( + VarDistributed(origin_var, slice_var, is_slice, block_id, offset, + vtype, endpoint)) + + def get_distributed_var_by_slice(self, var_name): + """ + get distributed var by conditions. + + Args: + var_name(str): slice var name, such as "w.traier0.block1" + Returns: + VarDistributed: distributed var. + """ + for dist_var in self.distributed_vars: + if dist_var.slice.name == var_name: + return dist_var + return None + + @staticmethod + def equal(var1, var2): + """ + the two var is equal or not. + Returns: + bool: equal will return True else False + """ + return var1.name == var2.name and \ + var1.type == var2.type and \ + var1.shape == var2.shape and \ + var1.dtype == var2.dtype and \ + var1.lod_level == var2.lod_level and \ + var1.persistable == var2.persistable + + def get_distributed_var_by_origin_and_ep(self, origin_var_name, endpoint): + """ + get distributed var by conditions. + + Args: + origin_var_name(str): + endpoint(str): the parameter endpoint, such as "127.0.0.1:1001" + Returns: + VarDistributed: distributed var. + """ + for dist_var in self.distributed_vars: + if dist_var.origin.name == origin_var_name and dist_var.endpoint == endpoint: + return dist_var + return None + + def get_distributed_vars_by_vtypes(self, vtypes, groupby=False): + """ + get distributed vars by conditions. + + Args: + vtype(str|None): distributed var's vtype, such as "Optimizer", "RemotePrefetch" + groupby(bool|False): group by origin var or not. + + Returns: + list: distributed var list. + dict: distributed var map when groupby=True + """ + vtype_vars = [] + for var in self.distributed_vars: + if var.vtype in vtypes: + vtype_vars.append(var) + if not groupby: + return vtype_vars + + params_map = {} + for var in vtype_vars: + origin_var_name = var.origin.name + + if origin_var_name in params_map.keys(): + optimizers = params_map.get(origin_var_name) + else: + optimizers = [] + optimizers.append(var) + params_map[origin_var_name] = optimizers + return params_map + + def get_distributed_vars_by_ep(self, endpoint, vtype=None): + """ + get distributed vars by conditions. + + Args: + endpoint(str): the parameter server endpoint, such as "127.0.0.1:2001" + vtype(str|None): distributed var's vtype, such as "Optimizer", "RemotePrefetch" + + Returns: + list: distributed var list. + """ + endpoint_vars = [] + for var in self.distributed_vars: + if var.endpoint == endpoint: + endpoint_vars.append(var) + if not vtype: + return endpoint_vars + + vtype_vars = [] + for var in endpoint_vars: + if var.vtype == vtype: + vtype_vars.append(var) + return vtype_vars + + def overview(self): + """ + get the overview string about all params on all parameter servers. + + Returns: + Str: overview string. + + """ + vars_str = [] + for var in self.distributed_vars: + vars_str.append(str(var)) + return "\n".join(vars_str) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index e58f34e3750803669149685003ea5858fa775ed7..a3293afbbd7cef8470c808e98ae88a05f2e492f4 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -30,19 +30,23 @@ Steps to transpile pserver: 5. add listen_and_serv op """ +import sys import math -import numpy as np +from functools import reduce + import collections +import six import logging +import numpy as np + from .ps_dispatcher import RoundRobin, PSDispatcher from .. import core, framework, unique_name from ..framework import Program, default_main_program, \ - default_startup_program, Block, \ - Parameter, Variable, grad_var_name -from .details import * + default_startup_program, Block, Parameter, grad_var_name +from .details import wait_server_ready, UnionFind, VarStruct, VarsDistributed +from .details import delete_ops, find_op_by_output_arg from ..distribute_lookup_table import find_distributed_lookup_table -from functools import reduce LOOKUP_TABLE_TYPE = "lookup_table" LOOKUP_TABLE_GRAD_TYPE = "lookup_table_grad" @@ -62,260 +66,6 @@ def log(*args): print(args) -class VarStruct(object): - """ - record part properties of a Variable in python. - """ - - def __init__(self, name, shape, dtype, type, lod_level, persistable): - self.name = name - self.shape = shape - self.dtype = dtype - self.type = type - self.lod_level = lod_level - self.persistable = persistable - - -class VarDistributed(object): - """ - a class to record the var distributed on parameter servers. - the class will record the relationship between origin var and slice var. - the slice var's properties, such as type/shape/offset/endpoint. - """ - - def __init__(self, - origin_var, - slice_var, - is_slice=None, - block_id=None, - offset=None, - vtype=None, - endpoint=None): - """ - Args: - origin_var(Variable|VarStruct): origin var properties - slice_var(Variable|VarStruct): slice var properties - is_slice(bool|None): slice or not, slice_var=True/False and its block size > 8192 are the judgement standard. - block_id(int|None): the number about the slice var. - offset(int|None): if the slice var is sliced, offset is the numel before the var. - vtype(str|None): a tag, such as Optimizer/Param/RemoteProfetch. - endpoint(str|None): which parameter the slice var on, such as "127.0.0.1:1001" - """ - - if isinstance(origin_var, Variable): - self.origin = self.__create_var_struct(origin_var) - else: - self.origin = origin_var - - if isinstance(slice_var, Variable): - self.slice = self.__create_var_struct(slice_var) - else: - self.slice = slice_var - - if self.equal(self.origin, self.slice): - self.is_slice = False - self.block_id = 0 - self.offset = 0 - else: - self.is_slice = True - self.block_id = 0 - self.offset = 0 - - if is_slice is not None: - self.is_slice = is_slice - if block_id is not None: - self.block_id = block_id - if offset is not None: - self.offset = offset - - self.vtype = vtype - self.endpoint = endpoint - - @staticmethod - def __create_var_struct(var): - return VarStruct(var.name, var.shape, var.dtype, var.type, - var.lod_level, var.persistable) - - @staticmethod - def equal(var1, var2): - """ - the two var is equal or not. - Returns: - bool: equal will return True else False - """ - assert isinstance(var1, VarStruct) and isinstance(var2, VarStruct) - - return var1.name == var2.name and \ - var1.type == var2.type and \ - var1.shape == var2.shape and \ - var1.dtype == var2.dtype and \ - var1.lod_level == var2.lod_level and \ - var1.persistable == var2.persistable - - def __str__(self): - origin_var_str = "{name} : fluid.{type}.shape{shape}.astype({dtype})". \ - format(i="{", e="}", name=self.origin.name, type=self.origin.type, - shape=self.origin.shape, dtype=self.origin.dtype) - - slice_var_str = "{name} : fluid.{type}.shape{shape}.astype({dtype})" \ - ".slice({is_slice}).block({block_id}).offset({offset})". \ - format(i="{", e="}", name=self.slice.name, type=self.slice.type, - shape=self.slice.shape, dtype=self.slice.dtype, - is_slice=self.is_slice, block_id=self.block_id, offset=self.offset) - - return "var owned: {}, origin var: ( {} ), slice var: ( {} ), endpoint: {} ".format( - self.vtype, origin_var_str, slice_var_str, self.endpoint) - - -class VarsDistributed(object): - """ - a gather about VarDistributed with many methods to find distributed vars. - through the class, we can get overview about the distributed parameters on parameter servers. - this class may centralized and convenient for developer to manage and get variable's distribute. - other module can also use this to find variables such io.py. - """ - - def __init__(self): - self.distributed_vars = [] - - def add_distributed_var(self, - origin_var, - slice_var, - is_slice=None, - block_id=None, - offset=None, - vtype=None, - endpoint=None): - """ - add distributed var in this. - - Args: - origin_var(Variable|VarStruct): origin var properties - slice_var(Variable|VarStruct): slice var properties - is_slice(bool|None): slice or not, slice_var=True/False and its block size > 8192 are the judgement standard. - block_id(int|None): the number about the slice var. - offset(int|None): if the slice var is sliced, offset is the numel before the var. - vtype(str|None): a tag, such as Optimizer/Param/RemoteProfetch. - endpoint(str|None): which parameter the slice var on, such as "127.0.0.1:1001" - Returns: - None - """ - self.distributed_vars.append( - VarDistributed(origin_var, slice_var, is_slice, block_id, offset, - vtype, endpoint)) - - def get_distributed_var_by_slice(self, var_name): - """ - get distributed var by conditions. - - Args: - var_name(str): slice var name, such as "w.traier0.block1" - Returns: - VarDistributed: distributed var. - """ - for dist_var in self.distributed_vars: - if dist_var.slice.name == var_name: - return dist_var - return None - - @staticmethod - def equal(var1, var2): - """ - the two var is equal or not. - Returns: - bool: equal will return True else False - """ - return var1.name == var2.name and \ - var1.type == var2.type and \ - var1.shape == var2.shape and \ - var1.dtype == var2.dtype and \ - var1.lod_level == var2.lod_level and \ - var1.persistable == var2.persistable - - def get_distributed_var_by_origin_and_ep(self, origin_var_name, endpoint): - """ - get distributed var by conditions. - - Args: - origin_var_name(str): - endpoint(str): the parameter endpoint, such as "127.0.0.1:1001" - Returns: - VarDistributed: distributed var. - """ - for dist_var in self.distributed_vars: - if dist_var.origin.name == origin_var_name and dist_var.endpoint == endpoint: - return dist_var - return None - - def get_distributed_vars_by_vtypes(self, vtypes, groupby=False): - """ - get distributed vars by conditions. - - Args: - vtype(str|None): distributed var's vtype, such as "Optimizer", "RemotePrefetch" - groupby(bool|False): group by origin var or not. - - Returns: - list: distributed var list. - dict: distributed var map when groupby=True - """ - vtype_vars = [] - for var in self.distributed_vars: - if var.vtype in vtypes: - vtype_vars.append(var) - if not groupby: - return vtype_vars - - params_map = {} - for var in vtype_vars: - origin_var_name = var.origin.name - - if origin_var_name in params_map.keys(): - optimizers = params_map.get(origin_var_name) - else: - optimizers = [] - optimizers.append(var) - params_map[origin_var_name] = optimizers - return params_map - - def get_distributed_vars_by_ep(self, endpoint, vtype=None): - """ - get distributed vars by conditions. - - Args: - endpoint(str): the parameter server endpoint, such as "127.0.0.1:2001" - vtype(str|None): distributed var's vtype, such as "Optimizer", "RemotePrefetch" - - Returns: - list: distributed var list. - """ - endpoint_vars = [] - for var in self.distributed_vars: - if var.endpoint == endpoint: - endpoint_vars.append(var) - if not vtype: - return endpoint_vars - - vtype_vars = [] - for var in endpoint_vars: - if var.vtype == vtype: - vtype_vars.append(var) - return vtype_vars - - def overview(self): - """ - get the overview string about all params on all parameter servers. - - Returns: - Str: overview string. - - """ - vars_str = [] - for var in self.distributed_vars: - vars_str.append(str(var)) - return "\n".join(vars_str) - - class VarBlock: def __init__(self, varname, offset, size): self.varname = varname diff --git a/python/setup.py.in b/python/setup.py.in index c947785cbf7517be56c3e43120db65284ab22d10..f93f0cd130e33311bade2b15726c3eff37546214 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -109,6 +109,7 @@ packages=['paddle', 'paddle.fluid.contrib', 'paddle.fluid.contrib.decoder', 'paddle.fluid.contrib.quantize', + 'paddle.fluid.contrib.int8_inference', 'paddle.fluid.contrib.reader', 'paddle.fluid.contrib.slim', 'paddle.fluid.contrib.slim.core',