未验证 提交 e0fe902c 编写于 作者: Y Yuan Shuai 提交者: GitHub

[cherry-pick][LITE][OPENCL] OpenCL Image2D model passed (#2891)

* mobilenetv1/v2,mnasnet,yolonano,sr models passed with opencl image2d kernel. test=develop

* Fix gflag shape define. test=develop
上级 bbcdb92e
......@@ -23,6 +23,10 @@
#include "lite/core/op_registry.h"
DEFINE_string(optimized_model, "", "optimized_model");
DEFINE_int32(N, 1, "input_batch");
DEFINE_int32(C, 3, "input_channel");
DEFINE_int32(H, 224, "input_height");
DEFINE_int32(W, 224, "input_width");
namespace paddle {
namespace lite {
......@@ -37,7 +41,8 @@ void TestModel(const std::vector<Place>& valid_places,
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
input_tensor->Resize(DDim(
std::vector<DDim::value_type>({FLAGS_N, FLAGS_C, FLAGS_H, FLAGS_W})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
for (int i = 0; i < item_size; i++) {
......@@ -58,6 +63,8 @@ void TestModel(const std::vector<Place>& valid_places,
predictor.SaveModel(FLAGS_optimized_model);
}
LOG(INFO) << "input shape(NCHW):" << FLAGS_N << " " << FLAGS_C << " "
<< FLAGS_H << " " << FLAGS_W;
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
......
......@@ -23,6 +23,10 @@
#include "lite/core/op_registry.h"
DEFINE_string(optimized_model, "", "optimized_model");
DEFINE_int32(N, 1, "input_batch");
DEFINE_int32(C, 3, "input_channel");
DEFINE_int32(H, 224, "input_height");
DEFINE_int32(W, 224, "input_width");
namespace paddle {
namespace lite {
......@@ -38,7 +42,8 @@ void TestModel(const std::vector<Place>& valid_places,
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
input_tensor->Resize(DDim(
std::vector<DDim::value_type>({FLAGS_N, FLAGS_C, FLAGS_H, FLAGS_W})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
for (int i = 0; i < item_size; i++) {
......@@ -59,6 +64,8 @@ void TestModel(const std::vector<Place>& valid_places,
predictor.SaveModel(FLAGS_optimized_model);
}
LOG(INFO) << "input shape(NCHW):" << FLAGS_N << " " << FLAGS_C << " "
<< FLAGS_H << " " << FLAGS_W;
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
......@@ -123,8 +130,11 @@ TEST(MobileNetV2, test_arm) {
#ifdef LITE_WITH_OPENCL
TEST(MobileNetV2, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kImageDefault)},
Place{TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)},
Place{TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kNCHW)},
TARGET(kARM), // enable kARM CPU kernel when no opencl kernel
});
TestModel(valid_places);
......
......@@ -362,6 +362,9 @@ void ConvImageCompute::PrepareForRun() {
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d1x1;
#if 1 // TODO(ysh329): enable general dwconv
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1]) {
#else // TODO(ysh329): remove dwconv3x3s1 and dwconv3x3 temporarily, need fix
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] &&
kernel_h == 3 && kernel_w == 3 && groups > 1) {
// depth_conv2d_3x3s1, depth_conv2d_3x3
......@@ -374,7 +377,7 @@ void ConvImageCompute::PrepareForRun() {
}
kernel_func_paths_.push_back("image/depthwise_conv2d_kernel.cl");
CLImageConverterDWBlock converter;
CLImageConverterNWBlock converter;
const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims);
std::vector<float> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
......@@ -382,12 +385,13 @@ void ConvImageCompute::PrepareForRun() {
filter_gpu_image_.mutable_data<float, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] &&
kernel_h != 3 && groups > 1) {
kernel_h != 3) {
#endif
// depth_conv2d
kernel_func_names_.push_back("depth_conv2d");
kernel_func_paths_.push_back("image/depthwise_conv2d_basic_kernel.cl");
CLImageConverterDWBlock converter;
CLImageConverterNWBlock converter;
const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims);
std::vector<float> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册