提交 d0177f95 编写于 作者: C chonwhite

fixed sequence_pool bug

上级 4ab75386
...@@ -24,6 +24,11 @@ endif(WITH_PADDLE_MOBILE) ...@@ -24,6 +24,11 @@ endif(WITH_PADDLE_MOBILE)
# set(CMAKE_BUILD_TYPE DEBUG) # set(CMAKE_BUILD_TYPE DEBUG)
SET(CMAKE_BUILD_TYPE "Release")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
......
...@@ -234,8 +234,8 @@ int8_t* format_filter(float* data_in, ...@@ -234,8 +234,8 @@ int8_t* format_filter(float* data_in,
float* filter_start = data_in + n * chw; float* filter_start = data_in + n * chw;
float f_max = find_max(filter_start, chw); float f_max = find_max(filter_start, chw);
int8_t* quantized_start = quantized_data + n * chw; int8_t* quantized_start = quantized_data + n * chw;
quantize(filter_start, quantized_start, chw, max); quantize(filter_start, quantized_start, chw, f_max);
filter_max.push_back(max); filter_max.push_back(f_max);
} }
int8_t* hwc_data = int8_t* hwc_data =
......
...@@ -187,7 +187,7 @@ int get_device_info(const struct DeviceInfo &args) { ...@@ -187,7 +187,7 @@ int get_device_info(const struct DeviceInfo &args) {
int perform_bypass(const struct BypassArgs &args) { int perform_bypass(const struct BypassArgs &args) {
int ret = -1; int ret = -1;
int size = args.image.channels * args.image.width * args.image.height; int size = args.image.channels * args.image.width * args.image.height;
int max_size = 1 << 21; int max_size = 1 << 22;
float times = 1.0 * size / max_size; float times = 1.0 * size / max_size;
int count = static_cast<int>(times); int count = static_cast<int>(times);
......
...@@ -29,28 +29,20 @@ class InputPE : public PE { ...@@ -29,28 +29,20 @@ class InputPE : public PE {
} }
bool dispatch() { bool dispatch() {
std::cout << "input_dispatch()\n"; // std::cout << "input_dispatch()\n";
Tensor* input = param_.input; Tensor* input = param_.input;
Tensor* output = param_.output; Tensor* output = param_.output;
Tensor* src = input; Tensor* src = input;
// std::cout << "input:" << input << std::endl;
input->flush(); input->flush();
// std::cout << "input_flush()\n";
Tensor half_tensor; Tensor half_tensor;
if (input->dataType() == DataType::FP32) { if (input->dataType() == DataType::FP32) {
// std::cout << "2()\n";
half_tensor.mutableData<void*>(DataType::FP16, input->shape()); half_tensor.mutableData<void*>(DataType::FP16, input->shape());
// std::cout << "3()\n";
half_tensor.copyFrom(input); half_tensor.copyFrom(input);
// std::cout << "4()\n";
src = &half_tensor; src = &half_tensor;
} }
// std::cout << "5()\n";
output->mutableData<void>(); output->mutableData<void>();
// std::cout << "6()\n";
src->alignImage(output, true); src->alignImage(output, true);
// std::cout << "7()\n";
return true; return true;
} }
......
...@@ -54,8 +54,10 @@ class OutputPE : public PE { ...@@ -54,8 +54,10 @@ class OutputPE : public PE {
output->data<void>(), output->data<void>(),
output->shape().numel() * sizeof(float)); output->shape().numel() * sizeof(float));
// auto max = fpga_get_memory_size_max(); fpga_reset();
// std::cout << "===== Max: ===== :: " << max << std::endl;
auto max = fpga_get_memory_size_max();
std::cout << "PL ===== Max: ===== :: " << max << std::endl;
return true; return true;
} }
......
...@@ -67,7 +67,7 @@ class PoolingPE : public PE { ...@@ -67,7 +67,7 @@ class PoolingPE : public PE {
use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 && use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 &&
(k_width > 255 || k_height > 255); (k_width > 255 || k_height > 255);
use_cpu_ = param_.type == AVERAGE; // use_cpu_ = param_.type == AVERAGE;
} }
void compute() { void compute() {
......
...@@ -59,6 +59,7 @@ void SequencePoolCompute::Run() { ...@@ -59,6 +59,7 @@ void SequencePoolCompute::Run() {
for (int i = 0; i <= batch_size; i++) { for (int i = 0; i <= batch_size; i++) {
offset_new[i] = i; offset_new[i] = i;
} }
(output->mutable_lod())->clear();
(output->mutable_lod())->push_back(offset_new); (output->mutable_lod())->push_back(offset_new);
} }
......
...@@ -14,7 +14,6 @@ add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps}) ...@@ -14,7 +14,6 @@ add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
# add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps}) # add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps}) add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps})
add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps}) add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps})
# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps}) add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps})
......
...@@ -82,6 +82,6 @@ REGISTER_LITE_KERNEL(fetch, ...@@ -82,6 +82,6 @@ REGISTER_LITE_KERNEL(fetch,
kNHWC, kNHWC,
paddle::lite::kernels::fpga::FetchCompute, paddle::lite::kernels::fpga::FetchCompute,
host_host) host_host)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))}) .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize(); .Finalize();
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "lite/backends/fpga/KD/debugger.hpp" // #include "lite/backends/fpga/KD/debugger.hpp"
#include "lite/kernels/host/one_hot_compute.h" #include "lite/kernels/host/one_hot_compute.h"
#include "lite/utils/paddle_enforce.h" #include "lite/utils/paddle_enforce.h"
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "lite/operators/one_hot_op.h" #include "lite/operators/one_hot_op.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
#include "lite/backends/fpga/KD/debugger.hpp" // #include "lite/backends/fpga/KD/debugger.hpp"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册