提交 a5b73e42 编写于 作者: B baolei.an

[LITE][BM] fix reshape infer shape issue, test=develop

上级 40a31442
......@@ -190,7 +190,11 @@ if(WITH_TESTING)
lite_cc_test(test_classify_lite_bm SRCS test_classify_lite_bm.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${bm_kernels} ${bm_bridges}
ARGS --model_dir=${LITE_MODEL_DIR}/resnet50)
ARGS --model_dir=${LITE_MODEL_DIR}/classify)
lite_cc_test(test_yolov3_lite_bm SRCS test_yolov3_lite_bm.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${bm_kernels} ${bm_bridges}
ARGS --model_dir=${LITE_MODEL_DIR}/yolov3)
endif()
endif()
endif()
......
......@@ -63,6 +63,7 @@ USE_LITE_OP(swish)
USE_LITE_OP(log)
USE_LITE_OP(exp)
USE_LITE_OP(conv2d_transpose)
USE_LITE_OP(depthwise_conv2d_transpose)
USE_LITE_OP(negative)
USE_LITE_OP(pad2d)
USE_LITE_OP(power)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_string(input_img_txt_path,
"",
"if set input_img_txt_path, read the img filename as input.");
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places) {
lite::Predictor predictor;
std::vector<std::string> passes;
predictor.Build(FLAGS_model_dir,
FLAGS_model_dir + "/model",
FLAGS_model_dir + "/params",
valid_places,
passes);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(
std::vector<DDim::value_type>({1, 3, FLAGS_im_height, FLAGS_im_width})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
if (FLAGS_input_img_txt_path.empty()) {
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
} else {
std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
if (!fs.is_open()) {
LOG(FATAL) << "open input_img_txt error.";
}
for (int i = 0; i < item_size; i++) {
fs >> data[i];
}
}
auto* image_tensor = predictor.GetInput(1);
image_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 2})));
data = image_tensor->mutable_data<float>();
data[0] = FLAGS_im_height;
data[1] = FLAGS_im_width;
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
auto out = predictor.GetOutputs();
FILE* fp = fopen("result.txt", "wb");
for (int i = 0; i < out.size(); i++) {
auto* out_data = out[i]->data<float>();
for (int j = 0; j < out[i]->numel(); j++) {
fprintf(fp, "%f\n", out_data[j]);
}
}
fclose(fp);
}
TEST(Yolov3, test_bm) {
std::vector<Place> valid_places({Place{TARGET(kBM), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
TestModel(valid_places);
}
} // namespace lite
} // namespace paddle
......@@ -32,6 +32,9 @@ lite_cc_library(subgraph_bridge_squeeze_op_bm SRCS squeeze_op.cc DEPS ${bm_subgr
lite_cc_library(subgraph_bridge_cast_op_bm SRCS cast_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_fill_constant_op_bm SRCS fill_constant_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_assign_value_op_bm SRCS assign_value_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_shape_op_bm SRCS shape_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_split_op_bm SRCS split_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_matmul_op_bm SRCS matmul_op.cc DEPS ${bm_subgraph_bridge_deps})
set(bm_subgraph_bridges
subgraph_bridge_registry
......@@ -62,4 +65,7 @@ set(bm_subgraph_bridges
subgraph_bridge_cast_op_bm
subgraph_bridge_fill_constant_op_bm
subgraph_bridge_assign_value_op_bm
subgraph_bridge_shape_op_bm
subgraph_bridge_split_op_bm
subgraph_bridge_matmul_op_bm
CACHE INTERNAL "bm_subgraph_bridges")
......@@ -40,17 +40,31 @@ int AssignValueConverter(void* ctx, OpLite* op, KernelBase* kernel) {
i_output_shape_data[i] = static_cast<int>(output_dims[i]);
buffer_size *= i_output_shape_data[i];
}
auto fp32_values = op_info->GetAttr<std::vector<float>>("fp32_values");
std::vector<float> fp32_values;
std::vector<int> int32_values;
float* assign_data =
reinterpret_cast<float*>(malloc(buffer_size * sizeof(float)));
CHECK(assign_data != nullptr);
CHECK_EQ(buffer_size, fp32_values.size());
bm_data_type_t data_type = static_cast<bm_data_type_t>(DTYPE_FP32);
fp32_values = op_info->GetAttr<std::vector<float>>("fp32_values");
if (0 != fp32_values.size()) {
for (int i = 0; i < fp32_values.size(); i++) {
assign_data[i] = fp32_values[i];
}
} else {
int32_values = op_info->GetAttr<std::vector<int>>("int32_values");
data_type = static_cast<bm_data_type_t>(DTYPE_INT32);
CHECK_EQ(buffer_size, int32_values.size());
for (int i = 0; i < int32_values.size(); i++) {
assign_data[i] = int32_values[i];
}
}
bm_add_const_tensor(graph->GetCompilerHandle(),
static_cast<const char*>(output_var_name.c_str()),
const_cast<const int*>(i_output_shape_data.data()),
output_dims.size(),
static_cast<bm_data_type_t>(DTYPE_FP32),
data_type,
reinterpret_cast<const void*>(assign_data));
graph->AddNode(output_var_name);
return SUCCESS;
......
......@@ -91,7 +91,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
dilations[1],
static_cast<int>(has_bias));
graph->AddNode(output_var_name);
LOG(INFO) << output_var_name << input_dims << " " << output_dims;
return SUCCESS;
}
......
......@@ -108,3 +108,6 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
REGISTER_SUBGRAPH_BRIDGE(conv2d_transpose,
kBM,
paddle::lite::subgraph::bm::ConvTransposeConverter);
REGISTER_SUBGRAPH_BRIDGE(depthwise_conv2d_transpose,
kBM,
paddle::lite::subgraph::bm::ConvTransposeConverter);
......@@ -65,7 +65,6 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto output_dims = output->dims();
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
LOG(INFO) << x_dims << " " << output_dims;
std::vector<int32_t> i_output_shape_data(output_dims.size());
for (size_t i = 0; i < output_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
......
......@@ -54,6 +54,7 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
} else {
type = 0;
}
is_int = false;
if (type == 2 && is_int) {
add_upsample_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include <bmcompiler_op_code.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto unique_op_name = lite::subgraph::bm::UniqueName(op_type);
// input
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
auto y_var_name = op_info->Input("Y").front();
auto y = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
auto y_dims = y->dims();
const int64_t* y_shape_data = const_cast<const int64_t*>(&y_dims.data()[0]);
std::vector<int32_t> i_y_shape_data(y_dims.size());
for (size_t i = 0; i < y_dims.size(); i++) {
i_y_shape_data[i] = static_cast<int>(y_shape_data[i]);
}
// output
auto output_var_name = op_info->Output("Out").front();
bool transpose_x = op_info->GetAttr<bool>("transpose_X");
bool transpose_y = op_info->GetAttr<bool>("transpose_Y");
float alpha = op_info->GetAttr<float>("alpha");
LOG(INFO) << x_dims << " " << y_dims << " " << alpha << " " << transpose_x
<< " " << transpose_y;
#if 0
add_const_binary_layer(graph->GetCompilerHandle(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
scale,
static_cast<const char*>(unique_op_scale_name.c_str()),
BINARY_MUL,
0);
add_const_binary_layer(graph->GetCompilerHandle(),
static_cast<const char*>(unique_op_scale_name.c_str()),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
bias,
static_cast<const char*>(output_var_name.c_str()),
BINARY_ADD,
0);
#endif
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(matmul,
kBM,
paddle::lite::subgraph::bm::MatMulConverter);
......@@ -45,14 +45,6 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
i_score_shape_data[i] = static_cast<int32_t>(score_dims[i]);
}
auto out_var_name = op_info->Output("Out").front();
auto out = scope->FindVar(out_var_name)->GetMutable<lite::Tensor>();
auto out_dims = out->dims();
std::vector<int32_t> i_out_shape_data(out_dims.size());
for (size_t i = 0; i < out_dims.size(); i++) {
i_out_shape_data[i] = static_cast<int32_t>(out_dims[i]);
}
auto background_label = op_info->GetAttr<int>("background_label");
auto keep_top_k = op_info->GetAttr<int>("keep_top_k");
auto nms_top_k = op_info->GetAttr<int>("nms_top_k");
......@@ -64,6 +56,26 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
normalized = op_info->GetAttr<bool>("normalized");
}
auto out_var_name = op_info->Output("Out").front();
auto out = scope->FindVar(out_var_name)->GetMutable<lite::Tensor>();
std::vector<int64_t> vec_out_dim(score_dims.size());
if (3 == score_dims.size()) {
vec_out_dim[0] = score_dims[0]; // batch_size
vec_out_dim[1] = keep_top_k;
vec_out_dim[2] = 6;
} else {
vec_out_dim[0] = keep_top_k;
vec_out_dim[1] = 6;
}
DDimLite out_dims(vec_out_dim);
out->Resize(out_dims);
out->mutable_data<float>();
std::vector<int32_t> i_out_shape_data(out_dims.size());
for (size_t i = 0; i < out_dims.size(); i++) {
i_out_shape_data[i] = static_cast<int32_t>(out_dims[i]);
}
user_cpu_param_t bm_param;
bm_param.op_type = USER_PADDLE_MULTICLASS_NMS;
bm_param.u.multiclass_nms_param.background_label = background_label;
......@@ -88,12 +100,9 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
int32_t* out_shape[1];
int32_t out_dim[1];
const char* out_name[1];
i_out_shape_data[0] = keep_top_k;
i_out_shape_data[1] = 6;
out_shape[0] = &i_out_shape_data[0];
out_dim[0] = 2;
out_dim[0] = out_dims.size();
out_name[0] = static_cast<const char*>(out_var_name.c_str());
add_user_cpu_layer(graph->GetCompilerHandle(),
input_num,
in_shape,
......
......@@ -48,8 +48,13 @@ USE_SUBGRAPH_BRIDGE(slice, kBM);
USE_SUBGRAPH_BRIDGE(conv2d_transpose, kBM);
USE_SUBGRAPH_BRIDGE(reduce_sum, kBM);
USE_SUBGRAPH_BRIDGE(reduce_mean, kBM);
USE_SUBGRAPH_BRIDGE(reduce_max, kBM);
USE_SUBGRAPH_BRIDGE(squeeze, kBM);
USE_SUBGRAPH_BRIDGE(squeeze2, kBM);
USE_SUBGRAPH_BRIDGE(cast, kBM);
USE_SUBGRAPH_BRIDGE(fill_constant, kBM);
USE_SUBGRAPH_BRIDGE(assign_value, kBM);
USE_SUBGRAPH_BRIDGE(depthwise_conv2d_transpose, kBM);
USE_SUBGRAPH_BRIDGE(shape, kBM);
USE_SUBGRAPH_BRIDGE(split, kBM);
USE_SUBGRAPH_BRIDGE(matmul, kBM);
......@@ -49,6 +49,8 @@ int ReduceFullConverter(void* ctx, OpLite* op, KernelBase* kernel) {
op_code = REDUCE_SUM;
} else if (op_type == "reduce_mean") {
op_code = REDUCE_MEAN;
} else if (op_type == "reduce_max") {
op_code = REDUCE_MAX;
}
add_reduce_full_layer(graph->GetCompilerHandle(),
......@@ -75,3 +77,6 @@ REGISTER_SUBGRAPH_BRIDGE(reduce_sum,
REGISTER_SUBGRAPH_BRIDGE(reduce_mean,
kBM,
paddle::lite::subgraph::bm::ReduceFullConverter);
REGISTER_SUBGRAPH_BRIDGE(reduce_max,
kBM,
paddle::lite::subgraph::bm::ReduceFullConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_defs.h>
#include <bmcompiler_if.h>
#include <bmcompiler_if_lite.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int ShapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
// input
auto x_var_name = op_info->Input("Input").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
// output
auto output_var_name = op_info->Output("Out").front();
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int32_t>(x_dims[i]);
}
add_shape_ref_layer(graph->GetCompilerHandle(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(i_x_shape_data.data()),
x_dims.size(),
static_cast<const char*>(output_var_name.c_str()));
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(shape,
kBM,
paddle::lite::subgraph::bm::ShapeConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include <bmcompiler_op_code.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int SplitConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
// input
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
// output
auto output_names = op_info->Output("Out");
auto axis = op_info->GetAttr<int>("axis");
auto num = op_info->GetAttr<int>("num");
auto sections = op_info->GetAttr<std::vector<int>>("sections");
if (0 == num) {
num = sections.size();
}
if (0 == sections.size()) {
for (size_t i = 0; i < num; i++) {
sections.push_back(x_dims[axis] / num);
}
}
int** shape = new int*[num];
int* dim = new int[num];
const char** name = new const char*[num];
for (size_t i = 0; i < num; i++) {
auto out = scope->FindVar(output_names[i])->GetMutable<lite::Tensor>();
name[i] = static_cast<const char*>(output_names[i].c_str());
auto out_dims = out->dims();
shape[i] = new int[out_dims.size()];
for (size_t j = 0; j < out_dims.size(); j++) {
shape[i][j] = out_dims[j];
}
dim[i] = out_dims.size();
}
add_tf_split_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
num,
shape,
dim,
name,
x_dims.size(),
axis,
const_cast<const int*>(&sections[0]),
num);
for (size_t i = 0; i < num; i++) {
graph->AddNode(output_names[i]);
delete[] shape[i];
}
delete[] shape;
delete[] name;
delete[] dim;
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(split,
kBM,
paddle::lite::subgraph::bm::SplitConverter);
......@@ -15,6 +15,7 @@
#include <bmcompiler_defs.h>
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
......@@ -39,11 +40,20 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
std::vector<int32_t> i_output_shape_data(output_dims.size());
std::vector<int32_t> i_output_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
for (size_t i = 0; i < output_dims.size(); i++) {
auto out_name = output_var_name;
if (x_dims.size() > output_dims.size()) {
for (size_t i = 0; i < (x_dims.size() - output_dims.size()); i++) {
i_output_shape_data[i] = 1;
}
out_name = lite::subgraph::bm::UniqueName(op_type);
}
for (size_t i = (x_dims.size() - output_dims.size()); i < output_dims.size();
i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
auto axis = op_info->GetAttr<std::vector<int>>("axis");
......@@ -53,9 +63,22 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
DTYPE_FP32,
static_cast<const char*>(output_var_name.c_str()),
static_cast<const char*>(out_name.c_str()),
NULL,
const_cast<const int*>(&axis[0]));
if (x_dims.size() > output_dims.size()) {
std::vector<int32_t> i_real_output_shape_data(output_dims.size());
for (size_t i = 0; i < output_dims.size(); i++) {
i_real_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
add_reshape_layer_v2(graph->GetCompilerHandle(),
static_cast<const char*>(out_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
i_output_shape_data.size(),
static_cast<const char*>(output_var_name.c_str()),
const_cast<const int*>(&i_real_output_shape_data[0]),
output_dims.size());
}
graph->AddNode(output_var_name);
return SUCCESS;
}
......
......@@ -88,18 +88,27 @@ int SubgraphEngine::BuildDeviceProgram() {
// output
origin_odims_.resize(output_names_.size());
origin_otensors_.resize(output_names_.size());
device_outputs_.resize(output_names_.size());
for (size_t i = 0; i < output_names_.size(); i++) {
origin_otensors_[i] = scope_->FindMutableTensor(net_info_->output_names[i]);
CHECK(origin_otensors_[i]);
origin_odims_[i] = origin_otensors_[i]->dims();
origin_otensors_[i]->mutable_data<float>();
device_outputs_.resize(net_info_->output_num);
int out_index = 0;
for (int i = 0; i < output_names_.size(); i++) {
outname_map_.insert(std::pair<std::string, int>(output_names_[i], i));
}
for (int i = 0; i < net_info_->output_num; i++) {
Tensor* t_cur = scope_->FindMutableTensor(net_info_->output_names[i]);
CHECK(t_cur != nullptr);
bm_device_mem_t* p_mem =
static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
CHECK(p_mem != nullptr);
CHECK_EQ(bm_malloc_device_byte(
bm_hd_, p_mem, origin_otensors_[i]->memory_size()),
BM_SUCCESS);
if (outname_map_.find(net_info_->output_names[i]) != outname_map_.end()) {
origin_otensors_[out_index] = t_cur;
origin_odims_[out_index] = origin_otensors_[out_index]->dims();
origin_otensors_[out_index]->mutable_data<float>();
out_index += 1;
}
CHECK_EQ(
bm_malloc_device_byte(bm_hd_, p_mem, net_info_->max_output_bytes[i]),
BM_SUCCESS);
bmrt_tensor_with_device(&device_outputs_[i],
*p_mem,
net_info_->output_dtypes[i],
......@@ -123,10 +132,14 @@ int SubgraphEngine::LaunchDeviceProgram() {
true,
false);
bm_thread_sync(bm_hd_);
int out_index = 0;
for (size_t i = 0; i < device_outputs_.size(); i++) {
bm_memcpy_d2s(bm_hd_,
const_cast<void*>(origin_otensors_[i]->raw_data()),
device_outputs_[i].device_mem);
if (outname_map_.find(net_info_->output_names[i]) != outname_map_.end()) {
bm_memcpy_d2s(bm_hd_,
const_cast<void*>(origin_otensors_[out_index]->raw_data()),
device_outputs_[i].device_mem);
out_index++;
}
}
return 0;
}
......
......@@ -51,6 +51,7 @@ class SubgraphEngine : public subgraph::Engine {
void *bmrt_hd_;
std::vector<bm_tensor_t> device_inputs_;
std::vector<bm_tensor_t> device_outputs_;
std::map<std::string, int> outname_map_;
const char **net_names_;
const bm_net_info_t *net_info_;
bm_handle_t bm_hd_;
......
......@@ -157,3 +157,5 @@ bool ConvTransposeOpLite::AttachImpl(const cpp::OpDesc& op_desc,
REGISTER_LITE_OP(conv2d_transpose,
paddle::lite::operators::ConvTransposeOpLite);
REGISTER_LITE_OP(depthwise_conv2d_transpose,
paddle::lite::operators::ConvTransposeOpLite);
......@@ -37,7 +37,7 @@ bool ReshapeOp::InferShapeImpl() const {
for (size_t i = 0; i < shape_tensor_vct.size(); i++) {
final_shape[i] = shape_tensor_vct[i]->data<int>()[0];
}
} else if (shape_tensor != nullptr) {
} else if (shape_tensor != nullptr && shape_tensor->data<int>() != nullptr) {
auto *shape_tensor_data = shape_tensor->data<int>();
final_shape = std::vector<int>(shape_tensor_data,
shape_tensor_data + shape_tensor->numel());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册