diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index 3348abb19b3339b2b3e8b50485133b15a1973a32..7b6ce0da07309a0ed2a5c8bcd5f59d84105261d7 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -57,6 +57,7 @@ std::unique_ptr FCFusePass::ApplyImpl( desc.SetInput("W", std::vector({fc_Y_in})); desc.SetInput("Bias", std::vector({fc_bias_in})); desc.SetOutput("Out", std::vector({fc_out_out})); + desc.SetAttr("in_num_col_dims", mul->Op()->GetAttr("x_num_col_dims")); desc.SetType("fc"); auto fc_node = g->CreateOpNode(&desc); // OpDesc will be copied. GraphSafeRemoveNodes(graph.get(), {mul, elementwise_add, mul_out}); diff --git a/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc index 2db7d95cae1c8c59691fd642e2462e92ed58814f..4e1e4e27f9ba932b56ecc25e816a2aee9d42362e 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc @@ -29,6 +29,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, if (type == "mul") { op->SetInput("X", {inputs[0]}); op->SetInput("Y", {inputs[1]}); + op->SetAttr("x_num_col_dims", {1}); } else if (type == "elementwise_add") { op->SetInput("X", inputs); } diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc index e903ec54cc4ed25ab0648c8c19caa2c8bb00b94f..b6a5dfd087c95d0ccb0f5adfa4f754cfa5a44f14 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc @@ -412,7 +412,7 @@ void DetachDeletedNodes(framework::ir::Graph *graph) { void SubGraphFuser::ReplaceNodesWithSubGraphs() { auto subgraphs = SubgraphDetector(graph_, node_inside_subgraph_teller_)(); for (auto &subgraph : subgraphs) { - if (subgraph.size() <= min_subgraph_size_) continue; + if (subgraph.size() <= (size_t)min_subgraph_size_) continue; LOG(INFO) << "detect a subgraph size " << subgraph.size(); std::unordered_set subgraph_uniq(subgraph.begin(), subgraph.end()); // replace this sub-graph with the first node. Two steps: 1. Create a Block diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index f27347b9d176eae8fbd087a21bdedb9cb84085e6..21fd8d2df49698d7fa38d906f7921f092ca916a3 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -114,7 +114,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node, // it is either an OP's input or an OP's output. auto &subgraph_nodes = *Agent(node).subgraph(); - for (int index = 0; index < block_desc.OpSize(); index++) { + for (size_t index = 0; index < block_desc.OpSize(); index++) { framework::proto::OpDesc *op = block_desc.Op(index)->Proto(); auto correspond_node = subgraph_nodes[index]; PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type()); diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 4915f28f4349b5033cf63d438c3409315c7a8237..74569057913d1db9a7184ab61ba655b3a66e49bd 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -45,11 +45,7 @@ inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2 # DAM set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") -inference_analysis_test(test_analyzer_dam SRCS analyzer_dam_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS - --infer_model=${DAM_INSTALL_DIR}/model - --infer_data=${DAM_INSTALL_DIR}/data.txt - --use_analysis=0) +inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc) # chinese_ner set(CHINESE_NER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/chinese_ner") diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index d1adc086673de93f3314412e7fca7e0a7aeb33a2..b369cba5c8b3f8aadd1123d6b7345fad6e47bd0f 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -69,7 +69,7 @@ struct DataRecord { num_lines++; std::vector data; split(line, ',', &data); - CHECK_EQ(data.size(), 2 * MAX_TURN_NUM + 3); + CHECK_EQ(data.size(), (size_t)(2 * MAX_TURN_NUM + 3)); // load turn data std::vector turns_tmp[MAX_TURN_NUM]; for (int i = 0; i < MAX_TURN_NUM; ++i) { @@ -197,15 +197,13 @@ TEST(Analyzer_dam, fuse_statis) { contrib::AnalysisConfig cfg; SetConfig(&cfg); - if (FLAGS_use_analysis) { - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 317); - EXPECT_EQ(num_ops, 2020); - } + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 317); + EXPECT_EQ(num_ops, 2020); } // Compare result of NativeConfig and AnalysisConfig @@ -216,11 +214,8 @@ TEST(Analyzer_dam, compare) { std::vector> input_slots_all; SetInput(&input_slots_all); - if (FLAGS_use_analysis) { - CompareNativeAndAnalysis( - reinterpret_cast(&cfg), - input_slots_all); - } + CompareNativeAndAnalysis( + reinterpret_cast(&cfg), input_slots_all); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index 16e1011dda55700967024500a813b921a411af94..956a235edcefb7d688983c3b63b187e284efb02a 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -59,9 +59,6 @@ void SetConfig(AnalysisConfig *cfg) { cfg->specify_input_name = true; // TODO(TJ): fix fusion gru cfg->pass_builder()->DeletePass("fc_gru_fuse_pass"); -#ifdef PADDLE_WITH_MKLDNN - cfg->EnableMKLDNN(); -#endif } void SetInput(std::vector> *inputs) { diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index fa4dec9cf118cef9b836943fd4eae90d23e6218a..e80249fc87855311479b35af61f872182292795a 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -27,11 +27,9 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { "Out(Output) of Fully Connected should not be null."); PADDLE_ENFORCE(ctx->HasInput("W"), "W(Input) of Fully Connected should not be null."); - // NCHW + auto in_dims = ctx->GetInputDim("Input"); - // IO, I=C*H*W auto w_dims = ctx->GetInputDim("W"); - std::vector output_shape({in_dims[0], w_dims[1]}); if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); @@ -44,14 +42,32 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { "The shape of Bias must be [1, dim]."); } } - PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4, - "Fully Connected input should be 2-D or 4-D tensor."); + + if (ctx->Attrs().Get("use_mkldnn")) { + PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4, + "Fully Connected input should be 2-D or 4-D tensor."); + } PADDLE_ENFORCE_EQ(w_dims.size(), 2UL, "Fully Connected input should be 2-D tensor."); - PADDLE_ENFORCE_EQ(framework::product(in_dims) / in_dims[0], w_dims[0], - "Fully Connected input and weigth size do not match."); + int in_num_col_dims = ctx->Attrs().Get("in_num_col_dims"); + PADDLE_ENFORCE_GT( + in_dims.size(), in_num_col_dims, + "The input tensor Input's rank of FCOp should be larger than " + "in_num_col_dims."); + + auto in_mat_dims = framework::flatten_to_2d(in_dims, in_num_col_dims); + PADDLE_ENFORCE_EQ( + in_mat_dims[1], w_dims[0], + "Fully Connected input and weigth size do not match. %s, %s"); + + std::vector output_dims; + output_dims.reserve(static_cast(in_num_col_dims + 1)); + for (int i = 0; i < in_num_col_dims; ++i) { + output_dims.push_back(in_dims[i]); + } + output_dims.push_back(w_dims[1]); - ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); + ctx->SetOutputDim("Out", framework::make_ddim(output_dims)); ctx->ShareLoD("Input", "Out"); } @@ -101,12 +117,15 @@ framework::OpKernelType FCOpGrad::GetExpectedKernelType( } void FCOpMaker::Make() { - AddInput("Input", - "(Tensor), The input tensor of fully connected operator with format " - "(NCHW). "); + AddInput("Input", "(Tensor), The input tensor of fully connected operator."); AddInput("W", "(Tensor), The weight fc op with shape (I, O)."); AddInput("Bias", "(Tensor, optional) Bias vector with shape (1 x O") .AsDispensable(); + AddAttr("in_num_col_dims", + "(int, default 1), The fc op can take tensors with more than " + "two dimensions as its inputs.") + .SetDefault(1) + .EqualGreaterThan(1); AddOutput("Out", "(Tensor) The output tensor of fully connected operator. "); AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") @@ -131,13 +150,15 @@ class FCOpKernel : public framework::OpKernel { auto output = ctx.Output("Out"); auto in_dims = input->dims(); auto w_dims = w->dims(); + auto out_dims = output->dims(); + int M = framework::product(out_dims) / out_dims[out_dims.size() - 1]; const T* input_data = input->data(); const T* w_data = w->data(); T* output_data = output->mutable_data(ctx.GetPlace()); auto blas = math::GetBlas(ctx); math::FCCompute( - blas, in_dims[0], w_dims[1], w_dims[0], input_data, w_data, output_data, + blas, M, w_dims[1], w_dims[0], input_data, w_data, output_data, bias ? bias->data() : NULL); // TODO(TJ): fuse act diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index b9ebe71a3d7ae270a10a45f4805652415078b363..b2c2c7954b79658e66f1524a81bcad0b7bf22c35 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -38,7 +38,7 @@ class HashOp : public framework::OperatorWithKernel { std::vector out_dims; out_dims.reserve(dims.size() + 1); // copy all dims except the last one - for (size_t i = 0u; i != dims.size() - 1; ++i) { + for (int i = 0u; i != dims.size() - 1; ++i) { out_dims.emplace_back(dims[i]); } int num_hash = ctx->Attrs().Get("num_hash"); diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index 9577a4cb9d275df9604b7578f8685e4d2938a5e9..5978c1d6056001142854583840b8bfcb54d475d1 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -244,7 +244,7 @@ typename std::enable_if< std::is_same::value>::type elementwise_add_to(const DeviceContext& ctx, BlasT* blas, size_t data_len, const T* in, T* out) { - for (int64_t i = 0; i < data_len; i++) { + for (size_t i = 0; i < data_len; i++) { out[i] += in[i]; } } diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/paddle/fluid/operators/math/sequence_pooling_test.cc index 2bc008dd34ffcfe93a00bd4a8cde61626d91e235..5535523e798912ff80eeb5d753914c7d8d70a05f 100644 --- a/paddle/fluid/operators/math/sequence_pooling_test.cc +++ b/paddle/fluid/operators/math/sequence_pooling_test.cc @@ -70,11 +70,11 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { EXPECT_EQ(in_grad.lod(), lod); if (paddle::platform::is_cpu_place(*place)) { - for (int64_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) { + for (size_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) { int64_t begin = in_grad.lod()[0][i]; int64_t end = in_grad.lod()[0][i + 1]; paddle::framework::Tensor tmp = in_grad.Slice(begin, end); - for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) { + for (size_t j = 0; j != tmp.numel() / second_dim; ++j) { for (int64_t m = 0; m != second_dim; ++m) { EXPECT_EQ(tmp.data()[m + j * second_dim], out_grad.data()[m + i * second_dim]); @@ -82,11 +82,11 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { } } } else { - for (int64_t i = 0; i < cpu_in_grad.lod()[0].size() - 1; ++i) { + for (size_t i = 0; i < cpu_in_grad.lod()[0].size() - 1; ++i) { int64_t begin = cpu_in_grad.lod()[0][i]; int64_t end = cpu_in_grad.lod()[0][i + 1]; paddle::framework::Tensor tmp = cpu_in_grad.Slice(begin, end); - for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) { + for (size_t j = 0; j != tmp.numel() / second_dim; ++j) { for (int64_t m = 0; m != second_dim; ++m) { EXPECT_EQ(tmp.data()[m + j * second_dim], cpu_out_grad.data()[m + i * second_dim]); diff --git a/paddle/fluid/operators/merge_ids_op.h b/paddle/fluid/operators/merge_ids_op.h index fef9e023d02f45e21ec409ad398ba7d9bdd36880..99c57590191d58a12760fb335df76037685d1ced 100644 --- a/paddle/fluid/operators/merge_ids_op.h +++ b/paddle/fluid/operators/merge_ids_op.h @@ -43,11 +43,11 @@ class MergeIdsOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(ids.size(), outs.size(), "the number of Ids and Out should be the same"); - int row_ids_size = 0; + size_t row_ids_size = 0; int row_size = 0; int embedding_size = 0; - for (int i = 0; i < x_tensors.size(); ++i) { + for (size_t i = 0; i < x_tensors.size(); ++i) { const auto *x_tensor = x_tensors[i]; const auto *row_id = row_ids[i]; @@ -66,7 +66,7 @@ class MergeIdsOpKernel : public framework::OpKernel { std::unordered_map> selected_rows_idx_map; - for (int i = 0; i < x_tensors.size(); ++i) { + for (size_t i = 0; i < x_tensors.size(); ++i) { const auto *row_id = row_ids[i]; for (int j = 0; j < row_id->numel(); ++j) { @@ -78,7 +78,7 @@ class MergeIdsOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(row_ids_size, selected_rows_idx_map.size(), "the rows and tensor map size should be the same"); - for (int i = 0; i < outs.size(); ++i) { + for (size_t i = 0; i < outs.size(); ++i) { auto *out_ids = ids[i]; auto *out = outs[i]; diff --git a/paddle/fluid/operators/ref_by_trainer_id_op.h b/paddle/fluid/operators/ref_by_trainer_id_op.h index 2ce577544ae2437b9297da2190fd09b435d5173c..34192278d84758d720e021215c14a54349ba0c62 100644 --- a/paddle/fluid/operators/ref_by_trainer_id_op.h +++ b/paddle/fluid/operators/ref_by_trainer_id_op.h @@ -38,7 +38,7 @@ class RefByTrainerIdKernel : public framework::OpKernel { } else { trainer_id = *trainer_id_data; } - PADDLE_ENFORCE_LT(trainer_id, in_list.size()); + PADDLE_ENFORCE_LT((size_t)trainer_id, in_list.size()); out->mutable_data(context.GetPlace()); out->ShareDataWith(*(in_list[trainer_id])); } diff --git a/paddle/fluid/operators/split_ids_op.h b/paddle/fluid/operators/split_ids_op.h index 6dbada3da8826f0e7cb07a9642d327e5ee38c309..f5d6d85d7d75507f82de212812ecee0a650d3aad 100644 --- a/paddle/fluid/operators/split_ids_op.h +++ b/paddle/fluid/operators/split_ids_op.h @@ -64,7 +64,7 @@ class SplitIdsOpKernel : public framework::OpKernel { out_ids.resize(outs.size()); // split id by their shard_num. - for (int i = 0; i < all_ids.size(); ++i) { + for (size_t i = 0; i < all_ids.size(); ++i) { T id = all_ids[i]; size_t shard_id = static_cast(id) % shard_num; out_ids[shard_id].push_back(id);