diff --git a/lite/core/memory.h b/lite/core/memory.h index bc382d8c36d87735bbd3028ff06b05f3394bf377..5a56f73b0de0fce64905f483ded88eda9ceffd52 100644 --- a/lite/core/memory.h +++ b/lite/core/memory.h @@ -90,11 +90,6 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) { case TARGET(kBM): TargetWrapper::MemcpySync(dst, src, size, dir); break; -#endif -#ifdef LITE_WITH_MLU - case TARGET(kMLU): - TargetWrapperMlu::MemcpySync(dst, src, size, dir); - break; #endif default: LOG(FATAL) diff --git a/lite/core/mir/mlu_postprocess_pass.cc b/lite/core/mir/mlu_postprocess_pass.cc index f3658fe484822509eb2a9e3d3652bc9e7ed457a0..727189db77427a01a6f4d4477c053112e99e8103 100644 --- a/lite/core/mir/mlu_postprocess_pass.cc +++ b/lite/core/mir/mlu_postprocess_pass.cc @@ -413,6 +413,14 @@ void MLUPostprocessPass::InsertAfter(SSAGraph* graph, auto* sub_block_op_desc = sub_block_desc->GetOp(i); UpdateOutputTo( sub_block_op_desc, tail_node->AsArg().name, cur_node->AsArg().name); + /* graph like this + * subgraph_op_0 + * / \ + * / \ + * subgraph_op_1 host_op + */ + UpdateInputTo( + sub_block_op_desc, tail_node->AsArg().name, cur_node->AsArg().name); } // recreate the op diff --git a/lite/core/mir/subgraph/subgraph_detector.cc b/lite/core/mir/subgraph/subgraph_detector.cc index 91aa04d99505eac5fa9abc50a5008ec7b5de4fbf..454682043b5199a32e56ed7c8bbea1752942c212 100644 --- a/lite/core/mir/subgraph/subgraph_detector.cc +++ b/lite/core/mir/subgraph/subgraph_detector.cc @@ -450,6 +450,9 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph, for (auto &var_node : output_var_nodes) { output_var_names.push_back(var_node->AsArg().name); } + for (auto &var_node : local_var_nodes) { + output_var_names.push_back(var_node->AsArg().name); + } subgraph_op_desc.SetAttr>("input_data_names", input_var_names); subgraph_op_desc.SetAttr>("output_data_names", @@ -491,9 +494,6 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph, for (auto &var_node : weight_var_nodes) { input_var_names.push_back(var_node->AsArg().name); } - for (auto &var_node : local_var_nodes) { - output_var_names.push_back(var_node->AsArg().name); - } for (auto &var_node : unused_var_nodes) { output_var_names.push_back(var_node->AsArg().name); } diff --git a/lite/kernels/mlu/bridges/act_op.cc b/lite/kernels/mlu/bridges/act_op.cc index 5d6e2f9ca0932c03ade897b416420e41019d775a..286195d9d5f961288dd0156db31ff8aacae58227 100644 --- a/lite/kernels/mlu/bridges/act_op.cc +++ b/lite/kernels/mlu/bridges/act_op.cc @@ -37,7 +37,7 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto output = scope->FindVar(out_var_name)->GetMutable(); auto output_dims = output->dims().Vectorize(); auto output_tensor = graph->AddNode( - out_var_name, output_dims, CNML_TENSOR, CNML_NHWC, fp_type); + out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, fp_type); CHECK(graph->HasNode(x_var_name)); auto input_tensor = graph->GetNode(x_var_name); cnmlBaseOp_t activation_op; diff --git a/lite/kernels/mlu/bridges/batch_norm_op.cc b/lite/kernels/mlu/bridges/batch_norm_op.cc index d95a5115c96c10a8881f50c44fee9881c6a9e218..7353a685dd5fd3a5bcc8c88def8ffb8b96fdde55 100644 --- a/lite/kernels/mlu/bridges/batch_norm_op.cc +++ b/lite/kernels/mlu/bridges/batch_norm_op.cc @@ -42,7 +42,7 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto output = scope->FindVar(y_var_name)->GetMutable(); auto output_dims = output->dims().Vectorize(); auto output_tensor = graph->AddNode( - y_var_name, output_dims, CNML_TENSOR, CNML_NHWC, graph->FPType()); + y_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType()); CHECK(graph->HasNode(x_var_name)); diff --git a/lite/kernels/mlu/bridges/batch_norm_op_test.cc b/lite/kernels/mlu/bridges/batch_norm_op_test.cc index 706b0421b32e25a8ceaea465c57e60b52202f104..65b24a0a72a48a306b6a8976efd8839679d58038 100644 --- a/lite/kernels/mlu/bridges/batch_norm_op_test.cc +++ b/lite/kernels/mlu/bridges/batch_norm_op_test.cc @@ -137,9 +137,7 @@ void test_batch_norm( {bs, ic, ih, iw}, {0, 2, 3, 1}); - out->Resize({bs, ih, iw, ic}); x->CopyDataFrom(input_trans); - x->Resize({bs, ih, iw, ic}); LaunchOp(op, {x_var_name}, {out_var_name}); diff --git a/lite/kernels/mlu/bridges/concat_op.cc b/lite/kernels/mlu/bridges/concat_op.cc index e2986b964853ab90e5d7317638ee8c2c2969a4d0..14f0da746a00c1ea10ffae824217dbb2df84df55 100644 --- a/lite/kernels/mlu/bridges/concat_op.cc +++ b/lite/kernels/mlu/bridges/concat_op.cc @@ -32,60 +32,33 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto x_var_name = op_info->Input("X"); auto out_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(out_var_name)->GetMutable(); + auto output_dims = output->dims().Vectorize(); auto param_axis = op_info->GetAttr("axis"); - // auto x = scope->FindVar(x_var_name[0])->GetMutable(); - - auto input_num = x_var_name.size(); std::vector input_tensor; - std::vector> input_dims; for (auto x_name : x_var_name) { CHECK(graph->HasNode(x_name)); input_tensor.push_back(graph->GetNode(x_name)->mlu_tensor()); - auto x = scope->FindVar(x_name)->GetMutable(); - input_dims.push_back(x->dims().Vectorize()); } - auto dims = input_dims[0].size(); + auto dims = output_dims.size(); int axis = (param_axis < 0) ? (param_axis + dims) : param_axis; - int nhwc_axis = -1; - if (dims == 4) { - int nchw_to_nhwc_axis_map[4] = {0, 3, 1, 2}; - nhwc_axis = nchw_to_nhwc_axis_map[axis]; - } else if (dims == 3) { - int nchw_to_nhwc_axis_map[3] = {0, 2, 1}; - nhwc_axis = nchw_to_nhwc_axis_map[axis]; - } else { - CHECK(0) << "Unsupport dims in mlu concat"; - } - - std::vector output_dims; - output_dims.assign(dims, 0); - - /* std::cout << string_format("concat axis: %d(NCHW), %d(NHWC)", axis, - * nhwc_axis) << std::endl; */ - - for (int i = 0; i < output_dims.size(); ++i) { - if (i == nhwc_axis) { - for (auto& dim : input_dims) output_dims[i] += dim[i]; - } else { - output_dims[i] = input_dims[0][i]; - } - } - - /* std::cout << string_format("concat output dim: %ld, %ld, %ld, %ld") << - * std::endl; */ + CHECK_LE(axis, 4) << "Unsupport dims in mlu concat"; + int nchw_to_nhwc_axis_map[4] = {0, 3, 1, 2}; + int nhwc_axis = nchw_to_nhwc_axis_map[axis]; - auto* output = scope->FindVar(out_var_name)->GetMutable(); - output->Resize(output_dims); auto output_tensor = graph->AddNode( - out_var_name, output_dims, CNML_TENSOR, CNML_NHWC, graph->FPType()); + out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType()); cnmlBaseOp_t concat_op; - cnmlTensor_t outputs[1]; - outputs[0] = output_tensor->mlu_tensor(); - CNML_CALL(cnmlCreateNdConcatOp( - &concat_op, nhwc_axis, input_tensor.data(), input_num, outputs, 1)); + cnmlTensor_t outputs = output_tensor->mlu_tensor(); + CNML_CALL(cnmlCreateNdConcatOp(&concat_op, + nhwc_axis, + input_tensor.data(), + x_var_name.size(), + &outputs, + 1)); graph->FuseOp(concat_op); return SUCCESS; } diff --git a/lite/kernels/mlu/bridges/concat_op_test.cc b/lite/kernels/mlu/bridges/concat_op_test.cc index b75ebc0951af91799f40ef8782a5bca43d0a87e1..c4b48a9ef45430ec5867d231bbc2d0a798ec66d0 100644 --- a/lite/kernels/mlu/bridges/concat_op_test.cc +++ b/lite/kernels/mlu/bridges/concat_op_test.cc @@ -113,21 +113,8 @@ void test_concat(std::vector> input, int axis) { static_cast(input[1][2]), static_cast(input[1][3])}, {0, 2, 3, 1}); - auto os = out->dims(); - out->Resize({static_cast(os[0]), - static_cast(os[2]), - static_cast(os[3]), - static_cast(os[1])}); x->CopyDataFrom(input_x); y->CopyDataFrom(input_y); - x->Resize({static_cast(input[0][0]), - static_cast(input[0][2]), - static_cast(input[0][3]), - static_cast(input[0][1])}); - y->Resize({static_cast(input[1][0]), - static_cast(input[1][2]), - static_cast(input[1][3]), - static_cast(input[1][1])}); LaunchOp(op, {x_var_name, y_var_name}, {out_var_name}); @@ -136,6 +123,7 @@ void test_concat(std::vector> input, int axis) { Tensor output_trans; output_trans.Resize(out->dims()); + auto os = out->dims(); transpose(out_data, output_trans.mutable_data(), {static_cast(os[0]), diff --git a/lite/kernels/mlu/bridges/conv_op.cc b/lite/kernels/mlu/bridges/conv_op.cc index e4f672e06e38c0212d1887de5cebed6a35bd0e0d..6a7ef408eb7432950d5a0985dd6e174236e937e0 100644 --- a/lite/kernels/mlu/bridges/conv_op.cc +++ b/lite/kernels/mlu/bridges/conv_op.cc @@ -33,13 +33,14 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { // get input, filter and op attributes const auto input_var_name = op_info->Input("Input").front(); - const auto& input_dims_nhwc = + const auto& input_dims = scope->FindVar(input_var_name)->GetMutable()->dims(); - const auto input_dims = DimNHWC2NCHW(input_dims_nhwc); const auto filter_var_name = op_info->Input("Filter").front(); auto* filter = scope->FindVar(filter_var_name)->GetMutable(); const auto& filter_dims = filter->dims(); const auto output_var_name = op_info->Output("Output").front(); + auto* output = scope->FindVar(output_var_name)->GetMutable(); + const auto output_shape = output->dims().Vectorize(); const auto bs = input_dims[0]; const auto oc = filter_dims[0]; CHECK_EQ(input_dims.size(), 4); @@ -70,24 +71,8 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { input_dims, filter_dims); - std::vector output_shape({bs, oc}); - for (size_t i = 0; i < 2; i++) { - const int dkernel = dilations[i] * (filter_dims[2 + i] - 1) + 1; - output_shape.push_back( - (input_dims[i + 2] + paddings[2 * i] + paddings[2 * i + 1] - dkernel) / - strides[i] + - 1); - } - - const auto output_shape_nhwc = DimNCHW2NHWC(output_shape); - const auto output_tensor = graph->AddNode(output_var_name, - output_shape_nhwc, - CNML_TENSOR, - CNML_NHWC, - graph->FPType()); - scope->FindVar(output_var_name) - ->GetMutable<::paddle::lite::Tensor>() - ->Resize(output_shape_nhwc); + const auto output_tensor = graph->AddNode( + output_var_name, output_shape, CNML_TENSOR, CNML_NCHW, graph->FPType()); // Create filter node const auto filter_tensor = graph->AddNode(filter_var_name, @@ -156,7 +141,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { const auto input_scale = op_info->GetAttr("input_scale"); bool use_first_conv = false; - if (lite::DeviceInfo::Global().UseFirstConv() && input_dims_nhwc[3] == 3) { + if (lite::DeviceInfo::Global().UseFirstConv() && input_dims[1] == 3) { use_first_conv = true; } diff --git a/lite/kernels/mlu/bridges/conv_op_test.cc b/lite/kernels/mlu/bridges/conv_op_test.cc index 6155a75018382482118ddbe8878bcdb69214bcd6..e34dd7c2a85dbda62596b6e82d820fc437bfd194 100644 --- a/lite/kernels/mlu/bridges/conv_op_test.cc +++ b/lite/kernels/mlu/bridges/conv_op_test.cc @@ -244,10 +244,6 @@ void test_conv(int bs, } } - input->Resize({bs, ih, iw, ic}); - output->Resize( - {output_shape[0], output_shape[2], output_shape[3], output_shape[1]}); - // create and convert op to MLU model, then run it on MLU auto op = CreateOp(opdesc_mlu, &scope); LaunchOp(op, {input_var_name}, {output_var_name}); diff --git a/lite/kernels/mlu/bridges/elementwise_ops.cc b/lite/kernels/mlu/bridges/elementwise_ops.cc index 4ef949925d20e0a2cb1c7f25d840e2041d79dd7a..41526a0100ba71be9eda25983cb96aa888d6cf4d 100644 --- a/lite/kernels/mlu/bridges/elementwise_ops.cc +++ b/lite/kernels/mlu/bridges/elementwise_ops.cc @@ -77,7 +77,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto output_tensor = graph->AddNode(out_var_name, x->dims().Vectorize(), CNML_TENSOR, - CNML_NHWC, + CNML_NCHW, graph->FPType()); cnmlBaseOp_t elementwise_op; @@ -90,7 +90,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto mid_tensor = graph->AddNode(out_var_name + "_mid", x->dims().Vectorize(), CNML_TENSOR, - CNML_NHWC, + CNML_NCHW, graph->FPType()); CNML_CALL(cnmlCreateBroadcastAddOp(&elementwise_op, x_tensor->mlu_tensor(), diff --git a/lite/kernels/mlu/bridges/fc_op.cc b/lite/kernels/mlu/bridges/fc_op.cc index f480a9110790406ddb2aa7464221c7062b26268e..286feec8d4d44eaa025f333d559c32ca72f042ff 100644 --- a/lite/kernels/mlu/bridges/fc_op.cc +++ b/lite/kernels/mlu/bridges/fc_op.cc @@ -37,6 +37,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { // int in_num_col_dims = op_info->GetAttr("in_num_col_dims"); auto x = scope->FindVar(x_var_name)->GetMutable(); auto w = scope->FindVar(w_var_name)->GetMutable(); + auto output = scope->FindVar(output_var_name)->GetMutable(); auto x_dims = x->dims(); auto w_dims = w->dims(); @@ -50,15 +51,11 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto input_scale = op_info->GetAttr("input_scale"); - std::vector output_shape_nhwc({x_dims[0], 1, 1, w_dims[1]}); auto output_tensor = graph->AddNode(output_var_name, - output_shape_nhwc, + output->dims().Vectorize(), CNML_TENSOR, - CNML_NHWC, + CNML_NCHW, graph->FPType()); - scope->FindVar(output_var_name) - ->GetMutable<::paddle::lite::Tensor>() - ->Resize(output_shape_nhwc); std::string bias_var_name; std::shared_ptr bias_tensor; diff --git a/lite/kernels/mlu/bridges/fc_op_test.cc b/lite/kernels/mlu/bridges/fc_op_test.cc index 79bee35501051e7b46f9c3dd3fa8cea6222abec4..8f92b6abad97650100d0862d49550abaf62daac9 100644 --- a/lite/kernels/mlu/bridges/fc_op_test.cc +++ b/lite/kernels/mlu/bridges/fc_op_test.cc @@ -139,15 +139,34 @@ void test_fc(const std::vector& input_shape, } auto fc_op_mlu = CreateOp(fc_op_desc_mlu, &scope); - input->Resize({static_cast(input_shape[0]), - static_cast(input_shape[2]), - static_cast(input_shape[3]), - static_cast(input_shape[1])}); - out->Resize({static_cast(input_shape[0]), static_cast(w_shape[1])}); + + Tensor input_tmp, out_tmp; + input_tmp.Resize(input_shape); + transpose(input->mutable_data(), + input_tmp.mutable_data(), + {static_cast(input_shape[0]), + static_cast(input_shape[1]), + static_cast(input_shape[2]), + static_cast(input_shape[3])}, + {0, 2, 3, 1}); + input->CopyDataFrom(input_tmp); + LaunchOp(fc_op_mlu, {input_var_name}, {out_var_name}); - // compare results + auto os = out->dims(); + out_tmp.Resize(os); auto* out_data = out->mutable_data(); + // transpose(out_data, + // out_tmp.mutable_data(), + // {static_cast(os[0]), + // static_cast(os[2]), + // static_cast(os[3]), + // static_cast(os[1])}, + // {0, 3, 1, 2}); + // + // out_data = out_tmp.mutable_data(); + + // compare results auto* out_ref_data = out_ref->mutable_data(); for (int i = 0; i < out->dims().production(); i++) { EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5); diff --git a/lite/kernels/mlu/bridges/graph.cc b/lite/kernels/mlu/bridges/graph.cc index 27c6ab2597fa6930b14c4c4e34750030608167b6..65c2f8214c13ee8d004dbe4b2e706523d007469c 100644 --- a/lite/kernels/mlu/bridges/graph.cc +++ b/lite/kernels/mlu/bridges/graph.cc @@ -25,12 +25,12 @@ namespace mlu { std::shared_ptr Graph::AddNode(const std::string& name, std::vector shape, cnmlTensorType_t tensor_type, - cnmlDataOrder_t data_order, + cnmlDataOrder_t shape_order, cnmlDataType_t mlu_dtype, void* raw_ptr) { CHECK(!HasNode(name)); auto node = std::shared_ptr( - new MLUTensor(shape, tensor_type, data_order, mlu_dtype)); + new MLUTensor(shape, tensor_type, shape_order, mlu_dtype)); node->set_mlu_ptr(raw_ptr); nodes_.insert(std::make_pair(name, node)); return node; diff --git a/lite/kernels/mlu/bridges/interpolate_op.cc b/lite/kernels/mlu/bridges/interpolate_op.cc index 77a6722e03a63c4ee06a0159916509aa0ca36139..e201199824d8042abd6002ccbe5bb659a9ca2898 100644 --- a/lite/kernels/mlu/bridges/interpolate_op.cc +++ b/lite/kernels/mlu/bridges/interpolate_op.cc @@ -45,8 +45,8 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(graph->HasNode(x_var_name)); auto input_tensor = graph->GetNode(x_var_name); - auto in_h = x_dims[1]; - auto in_w = x_dims[2]; + auto in_h = x_dims[2]; + auto in_w = x_dims[3]; // Priority: SizeTensor > OutSize > Scale > scale > out_h/out_w if (HasInputArg(op_info, scope, "SizeTensor")) { @@ -69,25 +69,13 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { } } - out->Resize({x_dims[0], out_h, out_w, x_dims[3]}); - auto output_tensor = graph->AddNode(out_var_name, out->dims().Vectorize(), CNML_TENSOR, - CNML_NHWC, + CNML_NCHW, graph->FPType()); cnmlBaseOp_t interp_op; - /* if (interp_method == "bilinear") { */ - /* cnmlInterpOpParam_t interp_param; */ - /* CNML_CALL(cnmlCreateInterpOpParam(&interp_param, out_w, out_h, - * align_corners)); */ - /* CNML_CALL(cnmlCreateInterpOp(&interp_op, */ - /* input_tensor->mlu_tensor(), */ - /* output_tensor->mlu_tensor(), */ - /* interp_param)); */ - /* CNML_CALL(cnmlDestroyInterpOpParam(&interp_param)); */ - /* } else if (interp_method == "nearest") { */ cnmlNearestNeighborOpParam_t nn_param; CNML_CALL(cnmlCreateNearestNeighborOpParam(&nn_param, out_w, out_h)); CNML_CALL(cnmlSetNearestNeighborAlignCorner(&nn_param, align_corners)); @@ -96,11 +84,6 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { output_tensor->mlu_tensor(), nn_param)); CNML_CALL(cnmlDestroyNearestNeighborOpParam(&nn_param)); - /* } else { */ - /* LOG(WARNING) << "[MLU] Unsupported interpolate method: " << - * interp_method; */ - /* return FAILED; */ - /* } */ graph->FuseOp(interp_op); return SUCCESS; diff --git a/lite/kernels/mlu/bridges/interpolate_op_test.cc b/lite/kernels/mlu/bridges/interpolate_op_test.cc index 29abff819afed2471f301ed11582b3dabf708e21..0e99da64358e6590af0b8e57dc3ddec142c8d0f0 100644 --- a/lite/kernels/mlu/bridges/interpolate_op_test.cc +++ b/lite/kernels/mlu/bridges/interpolate_op_test.cc @@ -237,7 +237,6 @@ class InterpComputeTester { /* printf("----output tensor dims: %ld, %d, %d, %ld\n", dims_[0], out_h, * out_w, dims_[1]); */ std::vector out_shape_nchw = {dims_[0], dims_[1], out_h, out_w}; - out->Resize(DimNCHW2NHWC(out_shape_nchw)); outref->Resize(out_shape_nchw); outsize->Resize({2}); @@ -283,7 +282,6 @@ class InterpComputeTester { {in, ic, ih, iw}, {0, 2, 3, 1}); x->CopyDataFrom(input_trans); - x->Resize(DimNCHW2NHWC(dims_.Vectorize())); if (use_outsize_) { LaunchOp(op, {x_var_name, outsize_var_name}, {out_var_name}); } else { diff --git a/lite/kernels/mlu/bridges/pool_op.cc b/lite/kernels/mlu/bridges/pool_op.cc index 3119b6c77dca10641c7c7c32072969fedb1ecef6..f77c8084c76fc52c39938e723f02bde9b3cac41b 100644 --- a/lite/kernels/mlu/bridges/pool_op.cc +++ b/lite/kernels/mlu/bridges/pool_op.cc @@ -47,9 +47,8 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { // Get input, and attributes auto x_var_name = op_info->Input("X").front(); auto x = scope->FindTensor(x_var_name); - auto input_dims_nhwc = x->dims(); - const auto input_dims = DimNHWC2NCHW(input_dims_nhwc); auto output_var_name = op_info->Output("Out").front(); + auto output_shape = scope->FindTensor(output_var_name)->dims().Vectorize(); auto pooling_type = op_info->GetAttr("pooling_type"); auto ceil_mode = op_info->GetAttr("ceil_mode"); auto paddings = op_info->GetAttr>("paddings"); @@ -81,23 +80,17 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { strides, ksize); - std::vector output_shape({input_dims[0], input_dims[1]}); - for (size_t i = 0; i < 2; i++) { - output_shape.push_back( - (input_dims[i + 2] + paddings[2 * i] + paddings[2 * i + 1] - ksize[0]) / - strides[i] + - 1); - } + // std::vector output_shape({input_dims[0], input_dims[1]}); + // for (size_t i = 0; i < 2; i++) { + // output_shape.push_back( + // (input_dims[i + 2] + paddings[2 * i] + paddings[2 * i + 1] - + // ksize[0]) / + // strides[i] + + // 1); + // } - auto output_shape_nhwc = DimNCHW2NHWC(output_shape); - auto output_tensor = graph->AddNode(output_var_name, - output_shape_nhwc, - CNML_TENSOR, - CNML_NHWC, - graph->FPType()); - scope->FindVar(output_var_name) - ->GetMutable<::paddle::lite::Tensor>() - ->Resize(output_shape_nhwc); + auto output_tensor = graph->AddNode( + output_var_name, output_shape, CNML_TENSOR, CNML_NCHW, graph->FPType()); cnmlPoolOpParam_t pool_param; CNML_CALL( diff --git a/lite/kernels/mlu/bridges/pool_op_test.cc b/lite/kernels/mlu/bridges/pool_op_test.cc index 90e43987e481fdfcc22da847937aa18a5149568d..8cee8dbe86109b14cff49f329d71074a9b3bfb61 100644 --- a/lite/kernels/mlu/bridges/pool_op_test.cc +++ b/lite/kernels/mlu/bridges/pool_op_test.cc @@ -180,12 +180,7 @@ void test_pool(int bs, {0, 2, 3, 1}); auto os = out->dims(); - out->Resize({static_cast(os[0]), - static_cast(os[2]), - static_cast(os[3]), - static_cast(os[1])}); x->CopyDataFrom(input_trans); - x->Resize({bs, ih, iw, ic}); LaunchOp(op, {x_var_name}, {out_var_name}); diff --git a/lite/kernels/mlu/bridges/scale_op.cc b/lite/kernels/mlu/bridges/scale_op.cc index d500786006286884af0843967410fbc907923e56..5557602bd7576ccd71c51f52a538a45fe27f7ada 100644 --- a/lite/kernels/mlu/bridges/scale_op.cc +++ b/lite/kernels/mlu/bridges/scale_op.cc @@ -36,7 +36,7 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto output = scope->FindVar(out_var_name)->GetMutable(); auto output_dims = output->dims().Vectorize(); auto output_tensor = graph->AddNode( - out_var_name, output_dims, CNML_TENSOR, CNML_NHWC, graph->FPType()); + out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType()); auto bias_after_scale = op_info->GetAttr("bias_after_scale"); auto scale = op_info->GetAttr("scale"); auto bias = op_info->GetAttr("bias"); diff --git a/lite/kernels/mlu/bridges/softmax_op.cc b/lite/kernels/mlu/bridges/softmax_op.cc index b9e2b1116dc95ec276f8d85a5669cec45d98ea39..17c911675718a15c7ede4888b268ffcd62b4d8ed 100644 --- a/lite/kernels/mlu/bridges/softmax_op.cc +++ b/lite/kernels/mlu/bridges/softmax_op.cc @@ -45,11 +45,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { axis = output_dims.size() + axis; } } - int nhwc_axis = nchw_to_nhwc_aixs_map[axis]; auto output_tensor = graph->AddNode( - out_var_name, output_dims, CNML_TENSOR, CNML_NHWC, graph->FPType()); + out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType()); cnmlBaseOp_t softmax_op; CNML_CALL(cnmlCreateNdSoftmaxOp(&softmax_op, nhwc_axis, diff --git a/lite/kernels/mlu/bridges/softmax_op_test.cc b/lite/kernels/mlu/bridges/softmax_op_test.cc index 87f8f589bc6e610071235eac25554353122fa085..a5251ed43c9187fc2874f9b01853b45b8abf7f1c 100644 --- a/lite/kernels/mlu/bridges/softmax_op_test.cc +++ b/lite/kernels/mlu/bridges/softmax_op_test.cc @@ -110,9 +110,7 @@ void test_softmax(const std::vector& input_shape, int axis) { {bs, ic, ih, iw}, {0, 2, 3, 1}); - out->Resize({bs, ih, iw, ic}); x->CopyDataFrom(input_trans); - x->Resize({bs, ih, iw, ic}); LaunchOp(op, {x_var_name}, {out_var_name}); diff --git a/lite/kernels/mlu/bridges/test_helper.cc b/lite/kernels/mlu/bridges/test_helper.cc index c6c87e42b40abcac49c41a35c95e893c6f70fb8c..377a00689ef3a27f78ae008072578ab3701cd337 100644 --- a/lite/kernels/mlu/bridges/test_helper.cc +++ b/lite/kernels/mlu/bridges/test_helper.cc @@ -58,7 +58,7 @@ void LaunchOp(const std::shared_ptr op, graph.AddNode(input_name, input_tensor->dims().Vectorize(), CNML_TENSOR, - CNML_NHWC, + CNML_NCHW, graph.FPType(), reinterpret_cast( input_tensor->mutable_data(TARGET(kMLU)))); @@ -68,6 +68,8 @@ void LaunchOp(const std::shared_ptr op, sizeof(float) * input_tensor->dims().production(), CNRT_MEM_TRANS_DIR_HOST2DEV)); } + op->CheckShape(); + op->InferShape(); bridges.Select(op_type, TARGET(kMLU))( reinterpret_cast(&graph), const_cast(op.get()), nullptr); diff --git a/lite/kernels/mlu/bridges/transpose_op.cc b/lite/kernels/mlu/bridges/transpose_op.cc index 74af692b6f5b8834e29b8e008a4e48801a1e8820..5e5c5b79ebff4e4ae06e99e4a18f22ebabd4ceb5 100644 --- a/lite/kernels/mlu/bridges/transpose_op.cc +++ b/lite/kernels/mlu/bridges/transpose_op.cc @@ -21,8 +21,8 @@ namespace lite { namespace subgraph { namespace mlu { -std::vector axis_to_nhwc4d(const std::vector& axis) { - CHECK_EQ(axis.size(), 4); +std::vector axis_to_nhwc(const std::vector& axis) { + CHECK_EQ(axis.size(), 4) << "Unsupport dim in mlu transpose"; std::vector new_axis(4, 0); const std::vector axis_map1 = {0, 2, 3, 1}; const std::vector axis_map2 = {0, 3, 1, 2}; @@ -32,26 +32,6 @@ std::vector axis_to_nhwc4d(const std::vector& axis) { return new_axis; } -std::vector axis_to_nhw3d(const std::vector& axis) { - CHECK_EQ(axis.size(), 3); - std::vector new_axis(3, 0); - const std::vector axis_map = {0, 2, 1}; - for (size_t i = 0; i < new_axis.size(); ++i) { - new_axis[i] = axis_map[axis[axis_map[i]]]; - } - new_axis.push_back(3); - return new_axis; -} - -std::vector infer_shape(const std::vector& x_dims, - const std::vector& axis_nhwc) { - std::vector out_dims(x_dims); - for (size_t i = 0; i < out_dims.size(); ++i) { - out_dims[i] = x_dims[axis_nhwc[i]]; - } - return out_dims; -} - int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); @@ -71,21 +51,13 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto output_dims = output->dims().Vectorize(); auto axis = op_info->GetAttr>("axis"); - - std::vector axis_nhwc; - if (axis.size() == 4) { - axis_nhwc = axis_to_nhwc4d(axis); - } else if (axis.size() == 3) { - axis_nhwc = axis_to_nhw3d(axis); - } else { - CHECK(0) << "Unsupport dim in mlu transpose"; + while (axis.size() < 4) { + axis.push_back(axis.size()); } - - auto output_dims_nhwc = infer_shape(x_dims, axis_nhwc); - output->Resize(output_dims_nhwc); + std::vector axis_nhwc = axis_to_nhwc(axis); auto output_tensor = graph->AddNode( - out_var_name, output_dims_nhwc, CNML_TENSOR, CNML_NHWC, graph->FPType()); + out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType()); CHECK(graph->HasNode(x_var_name)); auto input_tensor = graph->GetNode(x_var_name); @@ -113,7 +85,6 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { REGISTER_SUBGRAPH_BRIDGE(transpose, kMLU, paddle::lite::subgraph::mlu::TransposeConverter); - REGISTER_SUBGRAPH_BRIDGE(transpose2, kMLU, paddle::lite::subgraph::mlu::TransposeConverter); diff --git a/lite/kernels/mlu/bridges/transpose_op_test.cc b/lite/kernels/mlu/bridges/transpose_op_test.cc index 0b2c015975740eac7a3e07783292bc17c132ef58..f10801fbc6844769342223f9ab15da88e748e0c0 100644 --- a/lite/kernels/mlu/bridges/transpose_op_test.cc +++ b/lite/kernels/mlu/bridges/transpose_op_test.cc @@ -115,6 +115,7 @@ void test_transpose(const std::vector& input_shape, } } +// TODO(pmshst): fix the transpose test TEST(MLUBridges, transpose) { std::vector input_shape = {2, 3, 4, 5}; test_transpose(input_shape, std::vector{0, 1, 3, 2}); diff --git a/lite/kernels/mlu/layout_compute.h b/lite/kernels/mlu/layout_compute.h index 2d355e5ddf590b55c22a103d3d2a24ad4357da4c..5e87e3526417573f2e0f01280b1d86ccb5691093 100644 --- a/lite/kernels/mlu/layout_compute.h +++ b/lite/kernels/mlu/layout_compute.h @@ -67,6 +67,8 @@ class LayoutNchwToNhwcCompute auto x_dims = param.x->dims().size(); auto& context = this->ctx_->template As(); + const auto origin_dims = out->dims().Vectorize(); + std::vector axis; switch (x_dims) { case 2: @@ -88,6 +90,10 @@ class LayoutNchwToNhwcCompute LayoutTransCompute( x_dims, context, *x, out, axis); + + if (x_dims > 2) { + out->Resize(origin_dims); + } } std::string doc() const override { @@ -109,20 +115,22 @@ class LayoutNhwcToNchwCompute auto x_dims = param.x->dims().size(); auto& context = this->ctx_->template As(); + const auto origin_dims = out->dims().Vectorize(); + std::vector axis; switch (x_dims) { case 2: axis = {0, 1}; break; case 3: - axis = {0, 2, 1}; out->Resize(std::vector{ out->dims()[0], out->dims()[2], out->dims()[1]}); + axis = {0, 2, 1}; break; case 4: - axis = {0, 3, 1, 2}; out->Resize(std::vector{ out->dims()[0], out->dims()[3], out->dims()[1], out->dims()[2]}); + axis = {0, 3, 1, 2}; break; default: CHECK(0) << "Unsupport dim in mlu layout nhwc to nchw"; @@ -130,6 +138,10 @@ class LayoutNhwcToNchwCompute LayoutTransCompute( x_dims, context, *x, out, axis); + + if (x_dims > 2) { + out->Resize(origin_dims); + } } std::string doc() const override { diff --git a/lite/kernels/mlu/subgraph_compute.h b/lite/kernels/mlu/subgraph_compute.h index 0e79e54eb2888fa9c2d6867d16de81c2f334af29..51a9c0ffe05232bd807017e79c490d947e26c0f7 100644 --- a/lite/kernels/mlu/subgraph_compute.h +++ b/lite/kernels/mlu/subgraph_compute.h @@ -83,7 +83,7 @@ class SubgraphEngine : public subgraph::Engine { graph_.AddNode(input_name, input_tensor->dims().Vectorize(), CNML_TENSOR, - CNML_NHWC, + CNML_NCHW, graph_.FPType(), const_cast(input_tensor->raw_data())); CHECK(input_node); @@ -99,9 +99,7 @@ class SubgraphEngine : public subgraph::Engine { CHECK(op); std::string op_type = op->op_info()->Type(); op->CheckShape(); - if (op_type != "concat") { - op->InferShape(); - } + op->InferShape(); if (!bridges.Exists(op_type, TARGET(kMLU))) { LOG(INFO) << "MLU bridges doesn't support op_type: " << op_type; return subgraph::FAILED;