diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index 3348abb19b3339b2b3e8b50485133b15a1973a32..7b6ce0da07309a0ed2a5c8bcd5f59d84105261d7 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -57,6 +57,7 @@ std::unique_ptr FCFusePass::ApplyImpl( desc.SetInput("W", std::vector({fc_Y_in})); desc.SetInput("Bias", std::vector({fc_bias_in})); desc.SetOutput("Out", std::vector({fc_out_out})); + desc.SetAttr("in_num_col_dims", mul->Op()->GetAttr("x_num_col_dims")); desc.SetType("fc"); auto fc_node = g->CreateOpNode(&desc); // OpDesc will be copied. GraphSafeRemoveNodes(graph.get(), {mul, elementwise_add, mul_out}); diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index fc3e44ffd741cee5185e01c254d0c591f3c179a2..53b5f59088bccedc87d637f3880a0560cede7073 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -45,11 +45,7 @@ inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2 # DAM set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") -inference_analysis_test(test_analyzer_dam SRCS analyzer_dam_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS - --infer_model=${DAM_INSTALL_DIR}/model - --infer_data=${DAM_INSTALL_DIR}/data.txt - --use_analysis=0) +inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc) # chinese_ner set(CHINESE_NER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/chinese_ner") diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index ceac5dc7e14365c77cf1cbbbc16e4bf3ebfced73..a60615758f341223c1fc40f42b14ecf7165741af 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -196,15 +196,13 @@ TEST(Analyzer_dam, fuse_statis) { contrib::AnalysisConfig cfg; SetConfig(&cfg); - if (FLAGS_use_analysis) { - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 317); - EXPECT_EQ(num_ops, 2020); - } + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 317); + EXPECT_EQ(num_ops, 2020); } // Compare result of NativeConfig and AnalysisConfig @@ -215,9 +213,7 @@ TEST(Analyzer_dam, compare) { std::vector> input_slots_all; SetInput(&input_slots_all); - if (FLAGS_use_analysis) { - CompareNativeAndAnalysis(cfg, input_slots_all); - } + CompareNativeAndAnalysis(cfg, input_slots_all); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index f590ef27967e47ffcb3a97e80dd147efdd1906e6..abe93f1f398ef24d6c90bb34176dde7aaf54ba61 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -189,7 +189,6 @@ TEST(Analyzer_seq_conv1, fuse_statis) { ASSERT_TRUE(fuse_statis.count("seqconv_eltadd_relu_fuse")); EXPECT_EQ(fuse_statis.at("fc_fuse"), 2); EXPECT_EQ(fuse_statis.at("seqconv_eltadd_relu_fuse"), 6); - EXPECT_EQ(num_ops, 32); } // Compare result of NativeConfig and AnalysisConfig diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index 8fafd25b781a1755cce3d882e92b7ed018d3686c..ae846750177548b5af0f9243b2cbf5cf5c632fa3 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -59,9 +59,6 @@ void SetConfig(AnalysisConfig *cfg) { cfg->specify_input_name = true; // TODO(TJ): fix fusion gru cfg->pass_builder()->DeletePass("fc_gru_fuse_pass"); -#ifdef PADDLE_WITH_MKLDNN - cfg->EnableMKLDNN(); -#endif } void SetInput(std::vector> *inputs) { diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index fa4dec9cf118cef9b836943fd4eae90d23e6218a..1f1c5823df26b2fdfc6d8f4422a1c72fc2f0cd7c 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -27,11 +27,9 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { "Out(Output) of Fully Connected should not be null."); PADDLE_ENFORCE(ctx->HasInput("W"), "W(Input) of Fully Connected should not be null."); - // NCHW + auto in_dims = ctx->GetInputDim("Input"); - // IO, I=C*H*W auto w_dims = ctx->GetInputDim("W"); - std::vector output_shape({in_dims[0], w_dims[1]}); if (ctx->HasInput("Bias")) { auto bias_dims = ctx->GetInputDim("Bias"); @@ -44,14 +42,32 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const { "The shape of Bias must be [1, dim]."); } } - PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4, - "Fully Connected input should be 2-D or 4-D tensor."); + + if (ctx->Attrs().Get("use_mkldnn")) { + PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4, + "Fully Connected input should be 2-D or 4-D tensor."); + } PADDLE_ENFORCE_EQ(w_dims.size(), 2UL, "Fully Connected input should be 2-D tensor."); - PADDLE_ENFORCE_EQ(framework::product(in_dims) / in_dims[0], w_dims[0], - "Fully Connected input and weigth size do not match."); + int in_num_col_dims = ctx->Attrs().Get("in_num_col_dims"); + PADDLE_ENFORCE_GT( + in_dims.size(), in_num_col_dims, + "The input tensor Input's rank of FCOp should be larger than " + "in_num_col_dims."); + + auto in_mat_dims = framework::flatten_to_2d(in_dims, in_num_col_dims); + PADDLE_ENFORCE_EQ( + in_mat_dims[1], w_dims[0], + "Fully Connected input and weigth size do not match. %s, %s"); + + std::vector output_dims; + output_dims.reserve(static_cast(in_num_col_dims + 1)); + for (int i = 0; i < in_num_col_dims; ++i) { + output_dims.push_back(in_dims[i]); + } + output_dims.push_back(w_dims[1]); - ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); + ctx->SetOutputDim("Out", framework::make_ddim(output_dims)); ctx->ShareLoD("Input", "Out"); } @@ -101,12 +117,15 @@ framework::OpKernelType FCOpGrad::GetExpectedKernelType( } void FCOpMaker::Make() { - AddInput("Input", - "(Tensor), The input tensor of fully connected operator with format " - "(NCHW). "); + AddInput("Input", "(Tensor), The input tensor of fully connected operator."); AddInput("W", "(Tensor), The weight fc op with shape (I, O)."); AddInput("Bias", "(Tensor, optional) Bias vector with shape (1 x O") .AsDispensable(); + AddAttr("x_num_col_dims", + "(int, default 1), The fc op can take tensors with more than " + "two dimensions as its inputs.") + .SetDefault(1) + .EqualGreaterThan(1); AddOutput("Out", "(Tensor) The output tensor of fully connected operator. "); AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") @@ -131,13 +150,15 @@ class FCOpKernel : public framework::OpKernel { auto output = ctx.Output("Out"); auto in_dims = input->dims(); auto w_dims = w->dims(); + auto out_dims = output->dims(); + int M = framework::product(out_dims) / out_dims[out_dims.size() - 1]; const T* input_data = input->data(); const T* w_data = w->data(); T* output_data = output->mutable_data(ctx.GetPlace()); auto blas = math::GetBlas(ctx); math::FCCompute( - blas, in_dims[0], w_dims[1], w_dims[0], input_data, w_data, output_data, + blas, M, w_dims[1], w_dims[0], input_data, w_data, output_data, bias ? bias->data() : NULL); // TODO(TJ): fuse act