diff --git a/lite/api/resnet50_test_fpga.cc b/lite/api/resnet50_test_fpga.cc index ab647f96998f1c0e73476369611218d0a7930c57..75e6f0cbbc43c3cd7eb9bfa89bc004554ea6f85b 100644 --- a/lite/api/resnet50_test_fpga.cc +++ b/lite/api/resnet50_test_fpga.cc @@ -31,11 +31,7 @@ TEST(ResNet50, test) { std::vector valid_places( {Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}}); - predictor.Build(FLAGS_model_dir, - "", - "", - Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}, - valid_places); + predictor.Build(FLAGS_model_dir, "", "", valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/gen_code/paddle_infer.h b/lite/gen_code/paddle_infer.h index e01ffc25e29ca94166e8fe12b0643ae9e914001d..2449e1e5d3fb721a39760e78a0417bf9491d8cef 100644 --- a/lite/gen_code/paddle_infer.h +++ b/lite/gen_code/paddle_infer.h @@ -46,7 +46,7 @@ class Tensor { */ class PaddlePredictor { public: - void Init(); + void Init() {} std::unique_ptr GetTensor(const std::string &id) const; std::unique_ptr GetMutableTensor(const std::string &id); diff --git a/lite/kernels/fpga/conv_compute.cc b/lite/kernels/fpga/conv_compute.cc old mode 100755 new mode 100644 index b8d80fe14cabd607835d78616a68f4457afcf03f..50ad0b8a3c67aed3edf52d0596b2c0e38f83dca9 --- a/lite/kernels/fpga/conv_compute.cc +++ b/lite/kernels/fpga/conv_compute.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "lite/kernels/fpga/conv_compute.h" +#include #include "lite/core/op_registry.h" #include "lite/core/type_system.h" - namespace paddle { namespace lite { namespace kernels { @@ -26,9 +26,11 @@ using float16 = zynqmp::float16; void ConvCompute::PrepareForRun() { auto& param = this->Param(); param.output->mutable_data(); + int pad_h = (*param.paddings)[0]; + int pad_w = (*param.paddings)[2]; // ==================================================== - if (param.x->ZynqTensor()->shape().channel() != 1 && - param.groups == param.x->ZynqTensor()->shape().channel()) { + if (param.x->ZynqTensor()->shape().channel() != 1 && + param.groups == param.x->ZynqTensor()->shape().channel()) { zynqmp::DepthwiseConvParam& conv_param = dw_conv_pe_.param(); conv_param.input = param.x->ZynqTensor(); @@ -37,8 +39,8 @@ void ConvCompute::PrepareForRun() { conv_param.filter->setDataType(zynqmp::FP32); conv_param.groups = param.groups; conv_param.strides = param.strides; - conv_param.paddings = param.paddings; - conv_param.dilations = param.dilations; + conv_param.paddings = std::vector({pad_h, pad_w}); + conv_param.dilations = *param.dilations; fill_scale_bias_const(&conv_param); conv_param.bias()->copyFrom(param.bias->ZynqTensor()); conv_param.relu.enabled = param.fuse_relu; @@ -53,14 +55,14 @@ void ConvCompute::PrepareForRun() { conv_param.filter->setDataType(zynqmp::FP32); conv_param.groups = param.groups; conv_param.strides = param.strides; - conv_param.paddings = param.paddings; - conv_param.dilations = param.dilations; + conv_param.paddings = std::vector({pad_h, pad_w}); + conv_param.dilations = *param.dilations; fill_scale_bias_const(&conv_param); if (param.bias != nullptr) { conv_param.bias()->copyFrom(param.bias->ZynqTensor()); std::cout << "copy bias \n"; } - + conv_param.relu.enabled = param.fuse_relu; // conv_param.filter->saveToFile("filter", true); @@ -74,8 +76,8 @@ void ConvCompute::PrepareForRun() { void ConvCompute::Run() { auto& param = this->Param(); // std::cout << "in:" << param.x->ZynqTensor()->data() << std::endl; - if (param.x->ZynqTensor()->shape().channel() != 1 && - param.groups == param.x->ZynqTensor()->shape().channel()) { + if (param.x->ZynqTensor()->shape().channel() != 1 && + param.groups == param.x->ZynqTensor()->shape().channel()) { dw_conv_pe_.dispatch(); // param.output->ZynqTensor()->saveToFile("dw", true); } else { diff --git a/lite/kernels/fpga/conv_compute_test.cc b/lite/kernels/fpga/conv_compute_test.cc old mode 100755 new mode 100644 index f166974cc9f2fd856defd753e1e9131858d41252..7db855a0fe10d775cec07ad30e67f00c8230940a --- a/lite/kernels/fpga/conv_compute_test.cc +++ b/lite/kernels/fpga/conv_compute_test.cc @@ -143,11 +143,11 @@ void conv_compute_ref(const operators::ConvParam& param) { int kernel_h = param.filter->dims()[3]; int stride_w = param.strides[0]; int stride_h = param.strides[1]; - int dila_w = param.dilations[0]; - int dila_h = param.dilations[1]; + int dila_w = (*param.dilations)[0]; + int dila_h = (*param.dilations)[1]; - int pad_w = param.paddings[0]; - int pad_h = param.paddings[1]; + int pad_w = (*param.paddings)[2]; + int pad_h = (*param.paddings)[0]; bool flag_bias = (param.bias != nullptr); bool flag_relu = param.fuse_relu; @@ -277,9 +277,10 @@ TEST(conv_fpga, compute) { param.bias = &bias; } param.fuse_relu = flag_relu; - param.paddings = std::vector({padding, padding}); + *param.paddings = std::vector( + {padding, padding, padding, padding}); param.strides = std::vector({stride, stride}); - param.dilations = + *param.dilations = std::vector({dilation, dilation}); param.groups = group; conv.SetParam(param); diff --git a/lite/kernels/fpga/pooling_compute.cc b/lite/kernels/fpga/pooling_compute.cc index 911981bc8f09e502773c562ac31a0420f8502ed9..1dd320f007a7c13bd9752af1984702ea96844209 100755 --- a/lite/kernels/fpga/pooling_compute.cc +++ b/lite/kernels/fpga/pooling_compute.cc @@ -38,7 +38,9 @@ void PoolCompute::PrepareForRun() { pool_param.globalPooling = param.global_pooling; pool_param.kernelSize = param.ksize; pool_param.strides = param.strides; - pool_param.paddings = param.paddings; + int pad_h = (*param.paddings)[0]; + int pad_w = (*param.paddings)[2]; + pool_param.paddings = std::vector({pad_h, pad_w}); pe_.init(); pe_.apply(); } diff --git a/lite/kernels/fpga/pooling_compute_test.cc b/lite/kernels/fpga/pooling_compute_test.cc index 2309bf8fe4aa1c083e1662556ac49bf4357ab07a..9248289fe9353705e7a2d84831b9f3de5d8ee7d7 100755 --- a/lite/kernels/fpga/pooling_compute_test.cc +++ b/lite/kernels/fpga/pooling_compute_test.cc @@ -46,7 +46,7 @@ std::vector compute_output_shape(operators::PoolParam* param_) { if (param_->global_pooling) { ksize.resize(static_cast(x_dims.size()) - 2); for (size_t i = 0; i < ksize.size(); ++i) { - param_->paddings[i] = 0; + (*param_->paddings)[i] = 0; ksize[i] = static_cast(x_dims[i + 2]); } } @@ -59,7 +59,7 @@ std::vector compute_output_shape(operators::PoolParam* param_) { for (size_t i = 0; i < param_->ksize.size(); ++i) { output_shape.push_back(PoolOutputSize(x_dims[i + 2], param_->ksize[i], - param_->paddings[i], + (*param_->paddings)[i], param_->strides[i], param_->ceil_mode)); } @@ -76,7 +76,7 @@ void pool_compute_ref(const operators::PoolParam& param) { std::vector ksize = param.ksize; std::vector strides = param.strides; - std::vector paddings = param.paddings; + std::vector paddings = *param.paddings; std::string pooling_type = param.pooling_type; bool global_pooling = param.global_pooling; @@ -103,7 +103,7 @@ void pool_compute_ref(const operators::PoolParam& param) { int stride_h = strides[0]; int stride_w = strides[1]; int pad_h = paddings[0]; - int pad_w = paddings[1]; + int pad_w = paddings[2]; if (global_pooling == true) { for (int n = 0; n < in_n; ++n) { @@ -230,7 +230,7 @@ TEST(pool_fpga, compute) { } param.global_pooling = global_pooling; param.strides = {stride, stride}; - param.paddings = {pad, pad}; + *param.paddings = {pad, pad, pad, pad}; param.exclusive = exclusive; param.ceil_mode = ceil_mode; param.adaptive = false; diff --git a/lite/tools/build_fpga.sh b/lite/tools/build_fpga.sh index c6047c4bcf4e18d6305e462fe66d597db442975e..b97540684f3f9a38f051905f424b16f87d65ebda 100755 --- a/lite/tools/build_fpga.sh +++ b/lite/tools/build_fpga.sh @@ -2,12 +2,16 @@ build_dir=build_fpga mkdir -p ${build_dir} -cd ${build_dir} -GEN_CODE_PATH_PREFIX=lite/gen_code -mkdir -p ./${GEN_CODE_PATH_PREFIX} -touch ./${GEN_CODE_PATH_PREFIX}/__generated_code__.cc +root_dir=$(pwd) +build_dir=${build_dir} +# in build directory +# 1. Prepare gen_code file +GEN_CODE_PATH_PREFIX=${build_dir}/lite/gen_code +mkdir -p ${GEN_CODE_PATH_PREFIX} +touch ${GEN_CODE_PATH_PREFIX}/__generated_code__.cc +cd ${build_dir} cmake .. \ -DWITH_GPU=OFF \ -DWITH_MKL=OFF \ @@ -23,6 +27,5 @@ cmake .. \ -DLITE_BUILD_EXTRA=ON \ -DLITE_WITH_PROFILE=ON -make -j32 - +make -j4 cd -