From d0177f954c9a7657752be8738d5b42eb36e5ad87 Mon Sep 17 00:00:00 2001 From: chonwhite Date: Fri, 27 Mar 2020 17:15:17 +0800 Subject: [PATCH] fixed sequence_pool bug --- CMakeLists.txt | 5 +++++ lite/backends/fpga/KD/llapi/filter.cpp | 4 ++-- lite/backends/fpga/KD/llapi/zynqmp_api.cpp | 2 +- lite/backends/fpga/KD/pes/input_pe.hpp | 10 +--------- lite/backends/fpga/KD/pes/output_pe.hpp | 6 ++++-- lite/backends/fpga/KD/pes/pooling_pe.hpp | 2 +- lite/kernels/arm/sequence_pool_compute.cc | 1 + lite/kernels/fpga/CMakeLists.txt | 1 - lite/kernels/fpga/fetch_compute.cc | 2 +- lite/kernels/host/one_hot_compute.cc | 2 +- lite/operators/one_hot_op.cc | 2 +- 11 files changed, 18 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 786b1322b3..288be06b78 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,11 @@ endif(WITH_PADDLE_MOBILE) # set(CMAKE_BUILD_TYPE DEBUG) + +SET(CMAKE_BUILD_TYPE "Release") +SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") + + set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(CMAKE_CXX_STANDARD 11) diff --git a/lite/backends/fpga/KD/llapi/filter.cpp b/lite/backends/fpga/KD/llapi/filter.cpp index b6932bc27f..54c10f50af 100755 --- a/lite/backends/fpga/KD/llapi/filter.cpp +++ b/lite/backends/fpga/KD/llapi/filter.cpp @@ -234,8 +234,8 @@ int8_t* format_filter(float* data_in, float* filter_start = data_in + n * chw; float f_max = find_max(filter_start, chw); int8_t* quantized_start = quantized_data + n * chw; - quantize(filter_start, quantized_start, chw, max); - filter_max.push_back(max); + quantize(filter_start, quantized_start, chw, f_max); + filter_max.push_back(f_max); } int8_t* hwc_data = diff --git a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp index 68d0b6c68b..b8b7dfb887 100644 --- a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp +++ b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp @@ -187,7 +187,7 @@ int get_device_info(const struct DeviceInfo &args) { int perform_bypass(const struct BypassArgs &args) { int ret = -1; int size = args.image.channels * args.image.width * args.image.height; - int max_size = 1 << 21; + int max_size = 1 << 22; float times = 1.0 * size / max_size; int count = static_cast(times); diff --git a/lite/backends/fpga/KD/pes/input_pe.hpp b/lite/backends/fpga/KD/pes/input_pe.hpp index abae094d97..d8f9a15c6a 100755 --- a/lite/backends/fpga/KD/pes/input_pe.hpp +++ b/lite/backends/fpga/KD/pes/input_pe.hpp @@ -29,28 +29,20 @@ class InputPE : public PE { } bool dispatch() { - std::cout << "input_dispatch()\n"; + // std::cout << "input_dispatch()\n"; Tensor* input = param_.input; Tensor* output = param_.output; Tensor* src = input; - // std::cout << "input:" << input << std::endl; input->flush(); - // std::cout << "input_flush()\n"; Tensor half_tensor; if (input->dataType() == DataType::FP32) { - // std::cout << "2()\n"; half_tensor.mutableData(DataType::FP16, input->shape()); - // std::cout << "3()\n"; half_tensor.copyFrom(input); - // std::cout << "4()\n"; src = &half_tensor; } - // std::cout << "5()\n"; output->mutableData(); - // std::cout << "6()\n"; src->alignImage(output, true); - // std::cout << "7()\n"; return true; } diff --git a/lite/backends/fpga/KD/pes/output_pe.hpp b/lite/backends/fpga/KD/pes/output_pe.hpp index 53da0c5be7..2d02d30fba 100755 --- a/lite/backends/fpga/KD/pes/output_pe.hpp +++ b/lite/backends/fpga/KD/pes/output_pe.hpp @@ -54,8 +54,10 @@ class OutputPE : public PE { output->data(), output->shape().numel() * sizeof(float)); - // auto max = fpga_get_memory_size_max(); - // std::cout << "===== Max: ===== :: " << max << std::endl; + fpga_reset(); + + auto max = fpga_get_memory_size_max(); + std::cout << "PL ===== Max: ===== :: " << max << std::endl; return true; } diff --git a/lite/backends/fpga/KD/pes/pooling_pe.hpp b/lite/backends/fpga/KD/pes/pooling_pe.hpp index 84ed4f946e..bec99a5394 100755 --- a/lite/backends/fpga/KD/pes/pooling_pe.hpp +++ b/lite/backends/fpga/KD/pes/pooling_pe.hpp @@ -67,7 +67,7 @@ class PoolingPE : public PE { use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 && (k_width > 255 || k_height > 255); - use_cpu_ = param_.type == AVERAGE; + // use_cpu_ = param_.type == AVERAGE; } void compute() { diff --git a/lite/kernels/arm/sequence_pool_compute.cc b/lite/kernels/arm/sequence_pool_compute.cc index 8fcbb8cffe..93072fe499 100644 --- a/lite/kernels/arm/sequence_pool_compute.cc +++ b/lite/kernels/arm/sequence_pool_compute.cc @@ -59,6 +59,7 @@ void SequencePoolCompute::Run() { for (int i = 0; i <= batch_size; i++) { offset_new[i] = i; } + (output->mutable_lod())->clear(); (output->mutable_lod())->push_back(offset_new); } diff --git a/lite/kernels/fpga/CMakeLists.txt b/lite/kernels/fpga/CMakeLists.txt index e71e5255ca..90cd2bfd36 100755 --- a/lite/kernels/fpga/CMakeLists.txt +++ b/lite/kernels/fpga/CMakeLists.txt @@ -14,7 +14,6 @@ add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps}) # add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps}) add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps}) add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps}) -# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps}) diff --git a/lite/kernels/fpga/fetch_compute.cc b/lite/kernels/fpga/fetch_compute.cc index 2d296f4d4a..71ec37a64d 100755 --- a/lite/kernels/fpga/fetch_compute.cc +++ b/lite/kernels/fpga/fetch_compute.cc @@ -82,6 +82,6 @@ REGISTER_LITE_KERNEL(fetch, kNHWC, paddle::lite::kernels::fpga::FetchCompute, host_host) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) .Finalize(); diff --git a/lite/kernels/host/one_hot_compute.cc b/lite/kernels/host/one_hot_compute.cc index e0af6f5173..e1bf4c103b 100755 --- a/lite/kernels/host/one_hot_compute.cc +++ b/lite/kernels/host/one_hot_compute.cc @@ -16,7 +16,7 @@ #include #include -#include "lite/backends/fpga/KD/debugger.hpp" +// #include "lite/backends/fpga/KD/debugger.hpp" #include "lite/kernels/host/one_hot_compute.h" #include "lite/utils/paddle_enforce.h" diff --git a/lite/operators/one_hot_op.cc b/lite/operators/one_hot_op.cc index 023cdc23ae..ebab9e2067 100644 --- a/lite/operators/one_hot_op.cc +++ b/lite/operators/one_hot_op.cc @@ -15,7 +15,7 @@ #include "lite/operators/one_hot_op.h" #include "lite/core/op_registry.h" -#include "lite/backends/fpga/KD/debugger.hpp" +// #include "lite/backends/fpga/KD/debugger.hpp" namespace paddle { namespace lite { -- GitLab