diff --git a/lite/kernels/mlu/bridges/graph.h b/lite/kernels/mlu/bridges/graph.h index fb49d406936d49253a09323bc437b32782ac11c3..6cf40675397eef2db96a51e21b76296e6695e317 100644 --- a/lite/kernels/mlu/bridges/graph.h +++ b/lite/kernels/mlu/bridges/graph.h @@ -103,10 +103,11 @@ class Graph { // return outputs_shape; // } - void AddInput(std::shared_ptr tensor) { + void AddInput(std::shared_ptr tensor, + bool disable_batch_size_changeable = true) { inputs_.push_back(tensor->mlu_tensor()); input_tensors_.push_back(tensor); - if (GetBoolFromEnv("BATCH_SIZE_CHANGEABLE")) { + if (!disable_batch_size_changeable) { constexpr int input_dimNb = 4; bool input_dim_mutable[4] = {true, false, false, false}; cnmlSetTensorDimMutable( diff --git a/lite/kernels/mlu/subgraph_compute.h b/lite/kernels/mlu/subgraph_compute.h index 11bd19d9b6c536f0ecabc6100ea55e2a1045a51f..72ab7cd2703d6834f1ab21082feaa71cdea8b27d 100644 --- a/lite/kernels/mlu/subgraph_compute.h +++ b/lite/kernels/mlu/subgraph_compute.h @@ -50,7 +50,15 @@ class SubgraphEngine : public subgraph::Engine { paddle::lite_api::PrecisionType type) : subgraph::Engine( ctx, block_idx, block_desc, input_names, output_names, scope), - fp_type_(type) {} + fp_type_(type) { + VLOG(4) << "[MLU] PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL is " + << GetBoolFromEnv("PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL"); + VLOG(4) << "[MLU] PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE is " + << GetBoolFromEnv("PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE"); + if (GetBoolFromEnv("PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE")) { + disable_batch_size_changeable_ = true; + } + } int Build() { // In order to attach all of the ops of the block desc, we need to build @@ -83,7 +91,7 @@ class SubgraphEngine : public subgraph::Engine { // used in batch changable situation std::vector> all_shape; for (auto origin_itensor : origin_itensors_) { - if (GetBoolFromEnv("BATCH_SIZE_CHANGEABLE")) { + if (!disable_batch_size_changeable_) { auto iv = origin_itensor->dims().Vectorize(); all_shape.push_back(iv); iv.erase(iv.begin()); @@ -117,6 +125,21 @@ class SubgraphEngine : public subgraph::Engine { protected: int BuildDeviceProgram() override { + if (!error_compile_batch_size_changeable_ && + !disable_batch_size_changeable_) { + int status = BuildDeviceProgramImpl(); + if (subgraph::CHECK_SUCCESS(status)) { + return status; + } + VLOG(4) << "[MLU] build batch_size changeable subgraph op failed, " + "changed to input_shape changeable"; + } + error_compile_batch_size_changeable_ = true; + disable_batch_size_changeable_ = true; + return BuildDeviceProgramImpl(); + } + + int BuildDeviceProgramImpl() { int status = 0; auto graph = std::make_shared(); graph->SetFPType(fp_type_); @@ -131,7 +154,7 @@ class SubgraphEngine : public subgraph::Engine { auto data_type = input_tensor->precision(); cnmlDataType_t fp_type = PrecisionToDatatype(data_type); origin_itensors_.push_back(input_tensor); - if (GetBoolFromEnv("BATCH_SIZE_CHANGEABLE")) { + if (!disable_batch_size_changeable_) { auto iv = input_tensor->dims().Vectorize(); iv.erase(iv.begin()); new_shape.push_back(iv); @@ -156,6 +179,16 @@ class SubgraphEngine : public subgraph::Engine { auto op = inst.op(); CHECK(op); std::string op_type = op->op_info()->Type(); + if (!disable_batch_size_changeable_ && + std::find(unsupport_batch_size_changeable_op_type_.begin(), + unsupport_batch_size_changeable_op_type_.end(), + op_type) != + unsupport_batch_size_changeable_op_type_.end()) { + status |= subgraph::FAILED; + VLOG(4) << "[MLU] found unsupported batch_size changeable op type: " + << op_type; + return status; + } op->CheckShape(); const_cast(op)->InferShape(); if (!bridges.Exists(op_type, TARGET(kMLU))) { @@ -185,7 +218,8 @@ class SubgraphEngine : public subgraph::Engine { } } for (auto& input_name : input_names_) { - graph->AddInput(graph->GetNode(input_name)); + graph->AddInput(graph->GetNode(input_name), + disable_batch_size_changeable_); } CHECK(!origin_otensors_.empty()) << "[MLU] no valid output names"; @@ -194,7 +228,7 @@ class SubgraphEngine : public subgraph::Engine { auto core_number = mlu_context.MLUCoreNumber(); graph->Compile(core_version, core_number); shape_graph_map_[new_shape] = graph; - if (GetBoolFromEnv("SAVE_MLU_OFFLINE_MODEL")) { + if (GetBoolFromEnv("PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL")) { graph->GenOfflineModel(GetOfflineModName()); } return status; @@ -289,7 +323,7 @@ class SubgraphEngine : public subgraph::Engine { CHECK_EQ(graph_input->size(), origin_itensors_.size()); CHECK_EQ(graph_output->size(), origin_otensors_.size()); - if (GetBoolFromEnv("BATCH_SIZE_CHANGEABLE")) { + if (!disable_batch_size_changeable_) { std::vector> graph_in; if (shape_tensor_map_in_.find(all_inputs_shape_) != @@ -405,6 +439,12 @@ class SubgraphEngine : public subgraph::Engine { std::map>, std::shared_ptr> shape_graph_map_{}; + // enable batch size changeable by default, this cound be changed by + // environment variable PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE and + // whether the op can be compiled with batch size changeable way + bool disable_batch_size_changeable_{false}; + bool error_compile_batch_size_changeable_{false}; + std::vector unsupport_batch_size_changeable_op_type_{"concat"}; // search output runtime MLUTensor for certain output shape when enable // BATCH_SIZE_CHANGEABLE std::map>, @@ -419,7 +459,7 @@ class SubgraphEngine : public subgraph::Engine { // BATCH_SIZE_CHANGEABLE std::map>, std::vector>> in_out_shape_map_{}; -}; // namespace mlu +}; template class SubgraphCompute