diff --git a/lite/backends/fpga/KD/debugger.hpp b/lite/backends/fpga/KD/debugger.hpp old mode 100755 new mode 100644 index 33efaf20169dfad4035d40d3ca02ac7dc7047db3..2b9b23070616baf18f347c6b2af2d87a300d428f --- a/lite/backends/fpga/KD/debugger.hpp +++ b/lite/backends/fpga/KD/debugger.hpp @@ -17,7 +17,6 @@ #include #include -// #include "lite/backends/fpga/lite_tensor.h" #include "lite/core/tensor.h" namespace paddle { @@ -33,9 +32,7 @@ class Debugger { } void registerOutput(std::string op_type, zynqmp::Tensor* tensor) { - // tensor->printScale(); - if (op_type != "conv") { - // tensor->saveToFile(op_type, true); + if (op_type != "conv") { // NOLINT } } @@ -60,7 +57,6 @@ inline void chw_to_hwc(Tensor* t, float* dst) { if (t->dims().size() > 3) { width = t->dims()[3]; } - // int width = t->dims()[3]; const float* chw_data = t->data(); float* hwc_data = dst; @@ -92,11 +88,9 @@ inline void read_from_file(lite::Tensor* t, const std::string& path) { file_stream >> value; data[i] = value; } - // flush(); } inline void save_float(float* data, const std::string& name, int len) { - // return; static int counter = 0; std::string old_string = std::to_string(counter); std::string new_string = @@ -105,12 +99,8 @@ inline void save_float(float* data, const std::string& name, int len) { std::string file = "arm_" + new_string + name; counter++; - std::cout - << "-------------------------- saving file: --------------------------" - << file << std::endl; std::ofstream ofs; ofs.open(file); - // float* data = dst; for (int i = 0; i < len; i++) { float value = data[i]; ofs << value << std::endl; @@ -135,7 +125,6 @@ inline void save_tensor(lite::Tensor* t, inline void save_tensor(const lite::Tensor* t, const std::string& name, bool convert = true) { - // return; float* data = const_cast(t->data()); float* dst = new float[t->numel()]; if (convert) { diff --git a/lite/backends/fpga/KD/llapi/filter.cpp b/lite/backends/fpga/KD/llapi/filter.cpp index dcb7dbe8775ae66b909bfea04af8756c7f683d15..30250969b6fbe6e9e5ce7e9f96f963e8bee89224 100644 --- a/lite/backends/fpga/KD/llapi/filter.cpp +++ b/lite/backends/fpga/KD/llapi/filter.cpp @@ -28,26 +28,26 @@ static int FILTER_SIZE = 2048; static int COLUMN = 4; void saveToFile(std::string name, void* data_in, int size) { - // std::ofstream ofs; - // ofs.open(name); - - // int8_t* data = (int8_t*)data_in; - // for (int i = 0; i < size; i++) { - // float value = data[i]; - // ofs << value << std::endl; - // } - // ofs.close(); + std::ofstream ofs; + ofs.open(name); + + int8_t* data = static_cast data_in; + for (int i = 0; i < size; i++) { + float value = data[i]; + ofs << value << std::endl; + } + ofs.close(); } void saveFloatToFile(std::string name, float* data_in, int size) { - // std::ofstream ofs; - // ofs.open(name); - - // for (int i = 0; i < size; i++) { - // float value = data_in[i]; - // ofs << value << std::endl; - // } - // ofs.close(); + std::ofstream ofs; + ofs.open(name); + + for (int i = 0; i < size; i++) { + float value = data_in[i]; + ofs << value << std::endl; + } + ofs.close(); } void set_filter_capacity(uint32_t cap) { FILTER_SIZE = cap; } @@ -58,7 +58,6 @@ void set_colunm(uint32_t column) { COLUMN = column; } int get_filter_num_alignment() { return COLUMN * 4; } int calc_division_capacity(int chw) { - // int n = FILTER_SIZE / ((chw + 15) / 16) * 32; int filter_num_alignment = get_filter_num_alignment(); int n = FILTER_SIZE / ((chw + 15) / 16) * filter_num_alignment; return n < FILTER_SIZE ? n : FILTER_SIZE; @@ -222,14 +221,11 @@ int8_t* format_filter(float* data_in, align_to_x(num_per_div_before_alignment, filter_num_alignment); int div_num = (num + num_per_div_before_alignment - 1) / num_per_div_before_alignment; - // int num_after_alignment = num_per_div_after_alignment * div_num; int residual = num % num_per_div_before_alignment; int num_after_alignment = num_per_div_after_alignment * ((residual == 0) ? div_num : (div_num - 1)) + align_to_x(residual, filter_num_alignment); - // saveFloatToFile("quantize_before", data_in, data_size); - int8_t* quantized_data = reinterpret_cast(fpga_malloc(data_size * sizeof(int8_t))); @@ -237,21 +233,15 @@ int8_t* format_filter(float* data_in, float* filter_start = data_in + n * chw; float f_max = find_max(filter_start, chw); int8_t* quantized_start = quantized_data + n * chw; - // quantize(filter_start, quantized_start, chw, f_max); quantize(filter_start, quantized_start, chw, max); - // filter_max.push_back(f_max); filter_max.push_back(max); } - // saveToFile("chw.txt", quantized_data, data_size); - int8_t* hwc_data = reinterpret_cast(fpga_malloc(data_size * sizeof(int8_t))); convert_to_hwc(quantized_data, hwc_data, num, channel, height, width); fpga_free(quantized_data); - // saveToFile("hwc.txt", hwc_data, data_size); - int8_t* temp_data = hwc_data; // NOLINT int chw_aligned = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT); if (should_align_chw(chw)) { @@ -259,7 +249,6 @@ int8_t* format_filter(float* data_in, fpga_malloc(num * chw_aligned * sizeof(int8_t))); align_chw(hwc_data, hwc_aligned_data, num, chw); - // saveToFile("align_el.txt", hwc_aligned_data, data_size * 2); temp_data = hwc_aligned_data; fpga_free(hwc_data); } @@ -267,9 +256,6 @@ int8_t* format_filter(float* data_in, int filter_num_alignment = get_filter_num_alignment(); int num_per_div_after_alignment = align_to_x(num_per_div_before_alignment, filter_num_alignment); - // int div_num = - // (num + num_per_div_before_alignment - 1) / - // num_per_div_before_alignment; int num_element = div_num * num_per_div_after_alignment * chw_aligned; int8_t* num_aligned_data = reinterpret_cast(fpga_malloc(num_element * sizeof(int8_t))); @@ -279,19 +265,16 @@ int8_t* format_filter(float* data_in, num, chw_aligned); - // saveToFile("align_num.txt", num_aligned_data, data_size * 8); fpga_free(temp_data); temp_data = num_aligned_data; } int8_t* aligned_data = reinterpret_cast(fpga_malloc(num_after_alignment * chw_aligned)); reorder(temp_data, aligned_data, num_after_alignment, chw); - // saveToFile("reorder.txt", aligned_data, data_size * 8); fpga_free(temp_data); int8_t* interleaved_data = reinterpret_cast(fpga_malloc(num_after_alignment * chw_aligned)); interleave(aligned_data, interleaved_data, num_after_alignment, chw); - // saveToFile("interleave.txt", interleaved_data, data_size * 8); fpga_free(aligned_data); fpga_flush(interleaved_data, align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment * diff --git a/lite/backends/fpga/KD/llapi/filter.h b/lite/backends/fpga/KD/llapi/filter.h index 90093fe05b30150d6a8f7cc21e9bf7b4eb736ff9..6e056ce0da0d8e731abf7dc418800a8e3d94969a 100644 --- a/lite/backends/fpga/KD/llapi/filter.h +++ b/lite/backends/fpga/KD/llapi/filter.h @@ -45,8 +45,6 @@ int8_t* format_filter(float* data_in, void convert_to_hwn(int16_t** data_in, int num, int height, int width); size_t align_element_n(int16_t** data_in, int num, int height, int width); -// void quantize_to_fp16(float** data_in, int num, int height, int width, -// float* scale_ptr); size_t format_dwconv_filter( float** data_in, int num, int height, int width, float* scale_ptr); diff --git a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp index 2f29e5c1b539f47f5650928e14e8180c26414860..06488469d97c077a34b3cfdb8a049c8cd61dfc93 100755 --- a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp +++ b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp @@ -62,7 +62,6 @@ void reset_device() { // memory management; void *fpga_malloc(size_t size) { #ifdef ENABLE_DEBUG -// std::cout << "fpga_malloc:" << size << std::endl; #endif #ifdef PADDLE_OS_LINUX void *ptr = reinterpret_cast( @@ -250,10 +249,6 @@ int config_activation(const struct ActiveParamterArgs &args) { return do_ioctl(IOCTL_CONFIG_ACTIVATION_PARAMETER, &args); } -// int config_power(const struct PowerArgs& args) { -// return do_ioctl(IOCTL_CONFIG_POWER, &args); -// } - int config_inplace(const struct InplaceArgs &args) { return do_ioctl(IOCTL_CONFIG_INPLACE, &args); } diff --git a/lite/backends/fpga/KD/llapi/zynqmp_api.h b/lite/backends/fpga/KD/llapi/zynqmp_api.h index e00507e3247a70caf0dd57f5ed5b20d9ebbffd77..9489c24730e52fb778ed341e0ce452b7ef86edf9 100755 --- a/lite/backends/fpga/KD/llapi/zynqmp_api.h +++ b/lite/backends/fpga/KD/llapi/zynqmp_api.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#ifndef PADDLE_MOBILE_SRC_FPGA_KD_ZYNQMP_API_H -#define PADDLE_MOBILE_SRC_FPGA_KD_ZYNQMP_API_H +#ifndef PADDLE_LITE_SRC_FPGA_KD_ZYNQMP_API_H +#define PADDLE_LITE_SRC_FPGA_KD_ZYNQMP_API_H #include #include @@ -371,4 +371,4 @@ float fp16_2_fp32(int16_t fp16_num); } // namespace zynqmp } // namespace paddle -#endif // PADDLE_MOBILE_SRC_FPGA_KD_ZYNQMP_API_H +#endif // PADDLE_LITE_SRC_FPGA_KD_ZYNQMP_API_H diff --git a/lite/backends/fpga/KD/pes/conv_pe.hpp b/lite/backends/fpga/KD/pes/conv_pe.hpp old mode 100755 new mode 100644 index ca894bdc242faf58760743a98b16a40e10a7fc82..fb15eaf77822eed076ec2001bace6871e93587ff --- a/lite/backends/fpga/KD/pes/conv_pe.hpp +++ b/lite/backends/fpga/KD/pes/conv_pe.hpp @@ -59,11 +59,9 @@ class ConvPE : public PE { } if (param_.filter->shape().width() == 1 && - param_.filter->shape().height() == 1) { - // use_cpu_ = true; + param_.filter->shape().height() == 1) { // NOLINT } - if (!use_cpu_) { - // param_.filter->releaseData(); + if (!use_cpu_) { // NOLINT } } @@ -94,7 +92,6 @@ class ConvPE : public PE { int kernel_width = param_.filter->shape().width(); int kernel_step_h = param_.strides[0]; int kernel_step_w = param_.strides[1]; - // int out_channel = param_.strides[1]; int pooled_height_ = output->shape().height(); int pooled_width_ = out_width; int filter_chw = image_channels * kernel_height * kernel_width; @@ -205,7 +202,6 @@ class ConvPE : public PE { } out_index = h * out_width * out_channel + w * out_channel + i; out[out_index] = sum; - // out_index++; } } } diff --git a/lite/backends/fpga/KD/pes/conv_process.hpp b/lite/backends/fpga/KD/pes/conv_process.hpp index 3db9662b62cce6ed33d059f60835dca25be5f60e..ecee45569c8df3d3e3926b2ca78cb49da8415aa4 100755 --- a/lite/backends/fpga/KD/pes/conv_process.hpp +++ b/lite/backends/fpga/KD/pes/conv_process.hpp @@ -172,8 +172,6 @@ inline void format_scale_bias(Tensor* scale, } } - // int element_num_per_div = get_filter_num_per_div(filter, group); - // int scale_bias_len = align_to_x(channel / group, 8) * group; bias_scale::format_bias_scale_array( &temp_data, scale_bias_len / group, scale_bias_len); memcpy(bs_data, temp_data, 2 * scale_bias_len * sizeof(float)); @@ -268,8 +266,6 @@ inline void split_filter_num(const ConvParam& c_param) { int filter_num_alignment = filter::get_filter_num_alignment(); int aligned_num = align_to_x(num / param.groups, filter_num_alignment) * param.groups; - // int aligned_num = align_to_x(num / param.groups ,FILTER_NUM_ALIGNMENT) * - // param.groups; split_num = filter::calc_split_num(aligned_num, div_capacity); Shape& out_shape = out->shape(); @@ -368,7 +364,6 @@ inline void split_filter_num(const ConvParam& c_param) { args.image.height = input->shape().height(); args.image.pad_width = param.paddings[1]; args.image.pad_height = param.paddings[0]; - // dilations[0] = dilations[1] ; args.dilation = param.dilations[0]; args.output.address = out_address; @@ -424,7 +419,6 @@ inline void split_channel(const ConvParam& c_param) { } scale.flush(); bias.flush(); - // Shape sb_shape(N, {2 * channel}); format_scale_bias(&scale, &bias, &conv_param->filter, @@ -452,7 +446,6 @@ inline void split_channel(const ConvParam& c_param) { args.image.height = conv_param->input.shape().height(); args.image.pad_width = param.paddings[1]; args.image.pad_height = param.paddings[0]; - // dilations[0] = dilations[1] args.dilation = param.dilations[0]; args.output.address = conv_param->output.mutableData(); args.output.scale_address = conv_param->output.scale(); @@ -483,7 +476,6 @@ inline bool compute_conv(const ConvParam& c_conv_params) { } size_t size = params.size(); if (ret == 0 && size > 1) { - // Tensor* output = conv_params.output; Tensor& img = params[0]->output; for (int i = 0; i < 1; i++) { for (int i = 0; i < img.shape().numel(); i++) { diff --git a/lite/backends/fpga/KD/pes/depthwise_conv_pe.hpp b/lite/backends/fpga/KD/pes/depthwise_conv_pe.hpp index 8b88d24918bbbecae997817e72466798c1211a18..0efca2ec2e60e8973d92f41463b0444722f2a73b 100755 --- a/lite/backends/fpga/KD/pes/depthwise_conv_pe.hpp +++ b/lite/backends/fpga/KD/pes/depthwise_conv_pe.hpp @@ -62,7 +62,6 @@ class DepthwiseConvPE : public PE { float16* scale_data = param_.scale()->data(); float16* filter_data = param.quantizedFilter()->mutableData( FP16, param.filter->shape()); - // memcpy(filter_data, scale_data, channel * sizeof(float16)); memcpy(filter_data, scale_data, param.filter->shape().numel() * sizeof(float16)); diff --git a/lite/backends/fpga/KD/pes/elementwise_mul_pe.hpp b/lite/backends/fpga/KD/pes/elementwise_mul_pe.hpp index 15a3f5c98aed0d858bc40240286b42f4576a5069..0505e78b61e3b0130c876880894cec29c78406f2 100644 --- a/lite/backends/fpga/KD/pes/elementwise_mul_pe.hpp +++ b/lite/backends/fpga/KD/pes/elementwise_mul_pe.hpp @@ -33,7 +33,6 @@ class ElementwiseMulPE : public PE { Tensor* output = param_.output; int wc_aligned = align_to_x(param_.inputs[0]->shape().numel(), 32); - // int wc_aligned = / 32 * 32; Shape s(N, {wc_aligned}); float16* bias_data = bias_tensor.mutableData(FP16, s); diff --git a/lite/backends/fpga/KD/pes/gru_pe.hpp b/lite/backends/fpga/KD/pes/gru_pe.hpp old mode 100755 new mode 100644 index 2f1812707356c53e2ed846d68711b0687648a744..dcacab4eeef32b245d4126b72597b398a6627ba6 --- a/lite/backends/fpga/KD/pes/gru_pe.hpp +++ b/lite/backends/fpga/KD/pes/gru_pe.hpp @@ -38,25 +38,17 @@ struct GRUTensors { class GRUPE : public PE { public: - bool init() { - // Tensor* output = param_.output; - // output->setAligned(true); - // output->setDataLocation(Device); - return true; - } + bool init() { return true; } void apply() { auto hidden = param_.hidden; - // auto hidden_dims = hidden->dims(); int frame_size = hidden->shape().channel(); zynqmp::Shape hidden_shape{zynqmp::NCHW, {1, frame_size, 1, 1}}; float16* prev_hidden_data = prev_hidden_.mutableData(zynqmp::FP16, hidden_shape); - // set previous hidden data to 0; memset(prev_hidden_data, 0, hidden_shape.numel() * sizeof(float16)); - // copy 2/3 weight from param.weight; zynqmp::Shape weight_shape{zynqmp::NC, {frame_size, frame_size * 2}}; float* weight_data = weight_.mutableData(zynqmp::FP32, weight_shape); memset(weight_data, 0, weight_shape.numel() * sizeof(float)); @@ -77,52 +69,15 @@ class GRUPE : public PE { pre_out_pe_.init(); pre_out_pe_.apply(); - // // ============= C - // ElementwiseAddParam& bias_add_param = bias_ew_pe_.param(); - // bias_add_param.inputs = {&pre_output_, &pre_input_}; - // bias_add_param.output = &pre_input_; - // bias_ew_pe_.init(); - // bias_ew_pe_.apply(); - // // ==================== - - // Shape state_weight_shape(NC,{frame_size, frame_size}); - // float* state_weight_data = state_weight_.mutableData(FP32, - // state_weight_shape); - // memcpy(state_weight_data, weight_data + 2 * frame_size * frame_size, - // state_weight_shape.numel() * sizeof(float)); - // FullyConnectedParam& reset_out_param = reset_out_pe_.param(); - // reset_out_param.input = &prev_hidden; - // reset_out_param.output = &gate_ping; - // reset_out_param.filter = &state_weight_; - - // // ============== unit reset; - // update_gate_.mutableData(FP16, pre_input_shape); - // InputParam& relu_param = update_relu_pe_.param(); - // relu_param.input = &tempTensor; - // relu_param.output = &update_gate_; - // update_relu_pe_.init(); - // update_relu_pe_.apply(); - reset_gate_.mutableData(FP16, hidden_shape); prev_hidden_.mutableData(FP16, hidden_shape); reset_hidden_.mutableData(FP16, hidden_shape); - // InputParam& reset_param = reset_relu_pe_.param(); - // reset_param.input = &tempTensor; - // reset_param.output = &reset_gate_; - // reset_relu_pe_.init(); - // reset_relu_pe_.apply(); - - // float16* prev_data = prev_.mutableData(FP16, pre_input_shape); - // memset(prev_data, 0, (pre_input_shape.numel() + 32) * sizeof(float16)); - // // TODO - // reset_hidden_prev_.mutableData(FP16, pre_input_shape); ElementwiseMulParam& mul_param = mul_pe_.param(); mul_param.inputs = {&reset_gate_, &prev_hidden_}; mul_param.output = &reset_hidden_; mul_pe_.init(); mul_pe_.apply(); - // ============== } bool dispatch() { return true; } @@ -136,23 +91,15 @@ class GRUPE : public PE { int stride_hidden_prev = frame_size; int stride_hidden = frame_size; - // Tensor* gate = value.gate; - // value.gate->saveToFile("value_input.txt"); - float* update_gate_data = gate_ping_.data(); float* reset_gate_data = update_gate_data + frame_size; for (int b = 0; b < batch_size; b++) { - // memcpy(tempTensor.data(), reset_gate_data, gate->shape().numel() - // * sizeof(float)); - // tempTensor.flush(); - Tensor tmp; Shape s(NC, {1, frame_size}); float* tmp_data = tmp.mutableData(FP32, s); for (int i = 0; i < frame_size; i++) { - // f(x) = x / (1 + abs(x))? update_gate_data[i] = lite::arm::math::active_f32( update_gate_data[i]); @@ -164,17 +111,13 @@ class GRUPE : public PE { tmp.flush(); reset_gate_.copyFrom(&tmp); - // reset_gate_.copyFrom(&tempTensor); Tensor* hidden_prev = value.pre_output; if (hidden_prev) { - // memcpy(prev_data, ) // TODO(chonwhite): change to pre_out; prev_hidden_.copyFrom(value.pre_output); prev_hidden_.saveToFile("prev_.txt"); } - // // 4.0 reset_date * hidden_prev; - // // reset_hidden_prev[i] = reset_gate[i] * prev; mul_pe_.dispatch(); reset_hidden_.saveToFile("reset_hidden_.txt"); update_gate_data += stride_update; @@ -188,73 +131,13 @@ class GRUPE : public PE { bool origin_mode, GRUTensors& value, // NOLINT int frame_size, - int batch_size) { - // int stride_update = 3 * frame_size; - // int stride_cell_state = 3 * frame_size; - // int stride_hidden_prev = frame_size; - // int stride_hidden = frame_size; - - // Tensor* hidden = value.output_value; - // float* hidden_prev = nullptr; - // if (hidden) { - // hidden_prev = hidden->data(); - // } - - // float* cell_state = value.gate->data() + 2 * frame_size; - - // float* updata_gate = value.gate->data(); - // // float* reset_gate_data = update_gate_data + frame_size; - - // float prev = 0.0f; - // for (int b = 0; b < batch_size; ++b) { - // if (origin_mode) { - // // for (int i = 0; i < frame_size; i++) { - // // float prev = 0; - // // if (hidden_prev) { - // // prev = hidden_prev[i]; - // // } - // // cell_state[i] = - // lite::arm::math::active_f32(cell_state[i]); - // // hidden[i] = - // // cell_state[i] * (1.f - updata_gate[i]) + updata_gate[i] * - // prev; - // // } - // } else { - // for (int i = 0; i < frame_size; ++i) { - // cell_state[i] = - // lite::arm::math::active_f32(cell_state[i]); - // if (hidden_prev) { - // prev = hidden_prev[i]; - // } - // float hidden_value = - // prev * (1.f - updata_gate[i]) + updata_gate[i] * cell_state[i]; - // hidden_prev[i] = hidden_value; - // std::cout << "hidden_value::" << hidden_value << std::endl; - // } - // } - // updata_gate += stride_update; - // cell_state += stride_cell_state; - // hidden_prev += frame_size; - // } - } + int batch_size) {} void copy_input(GRUTensors& value) { // NOLINT float max = find_max(*(value.gate)); gate_ping_.mutableData(FP32, value.gate->shape()); gate_ping_.copyFrom(value.gate); // update input pointer? - - // gate_.readFromFile("input/in.txt"); - // // pre_input_.saveToFile("pppp_in.txt"); - // gate_.scale()[0] = max / 127; - // gate_.scale()[1] = 127 / max; - // gate_.printScale("pre_input_"); - - // gate_.saveToFile("pre_input_.txt"); - - // pre_out_pe_.dispatch(); - - // pre_output_.saveToFile("pp_out.txt"); } void GRUCOmpute(GRUTensors& value, // NOLINT @@ -272,25 +155,10 @@ class GRUPE : public PE { } gru_unit_reset_act(active_gate, value, frame_size, batch_size); - - // if (value.pre_output) { - // // state weight; - // reset_out_pe_.dispatch(); - // } - // gru_unit_out_act(active_node, origin_mode, value, frame_size, - // batch_size); } GRUParam& param() { return param_; } - // Tensor* preOutput() { - // return &pre_output_; - // } - - // Tensor* gate() { - // return &gate_; - // } - Tensor* updateGate() { return &update_gate_; } Tensor* resetGate() { return &reset_gate_; } @@ -302,7 +170,6 @@ class GRUPE : public PE { zynqmp::Tensor bias_; zynqmp::Tensor weight_; zynqmp::Tensor state_weight_; - // ================================= zynqmp::Tensor update_gate_; zynqmp::Tensor reset_gate_; zynqmp::Tensor cell_state_; @@ -310,7 +177,6 @@ class GRUPE : public PE { zynqmp::Tensor reset_hidden_; Tensor tempTensor; - // ================================= ReluPE update_relu_pe_; ReluPE reset_relu_pe_; diff --git a/lite/backends/fpga/KD/pes/pooling_pe.hpp b/lite/backends/fpga/KD/pes/pooling_pe.hpp index 386a470975261871137429f03d7c76b43aedb94b..a8725b51a690e0e134785fcfdb3dd70edeffd441 100755 --- a/lite/backends/fpga/KD/pes/pooling_pe.hpp +++ b/lite/backends/fpga/KD/pes/pooling_pe.hpp @@ -67,9 +67,6 @@ class PoolingPE : public PE { use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 && (k_width > 7 || k_height > 7); - // use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 - // && - // (k_width > 255 || k_height > 255); use_cpu_ = param_.type == AVERAGE; } @@ -79,7 +76,6 @@ class PoolingPE : public PE { input->syncToCPU(); Tensor float_input; - // Tensor float_output; float* image_addr = float_input.mutableData(FP32, input->shape()); float_input.copyFrom(input); float16* data_out = output->data(); @@ -192,9 +188,7 @@ class PoolingPE : public PE { bool dispatch() { if (use_cpu_) { - // cpu_compute(); compute(); - // exit(-1); return true; } param_.input->syncToDevice(); diff --git a/lite/backends/fpga/KD/pes/scale_pe.hpp b/lite/backends/fpga/KD/pes/scale_pe.hpp old mode 100755 new mode 100644 index 91f698ba514b949a4d22416791ed3993c1df737f..cc89ac943f90cb20062a3d6ef9a46b705193ad04 --- a/lite/backends/fpga/KD/pes/scale_pe.hpp +++ b/lite/backends/fpga/KD/pes/scale_pe.hpp @@ -67,15 +67,12 @@ class ScalePE : public PE { Tensor* scale = dw_param.scale(); float16* scale_data = scale->mutableData(FP16, shape); - // memcpy(scale_data, param_.scale->data(), input->shape().channel() - // * sizeof(float)); Tensor* bias = dw_param.bias(); float16* bias_data = bias->mutableData(FP16, shape); std::fill_n(bias_data, input->shape().channel(), 0); if (param_.scale->dataType() == FP32) { - // std::cout << "scale dataType FP32:" << std::endl; if (param_.bias != nullptr) { float* bias_data_float = param_.bias->data(); for (int i = 0; i < repeat; i++) { @@ -127,11 +124,6 @@ class ScalePE : public PE { } } - // if (param_.bias != nullptr) { - // memcpy(bias_data, param_.bias->data(), input->shape().channel() - // * sizeof(float)); - // } - dw_param.input = param_.input; dw_param.output = param_.output; dw_param.filter = &filter; @@ -182,9 +174,6 @@ class ScalePE : public PE { } bool dispatch() { - // cpu_compute(); - // return true; - if (param_.scale->dataType() == FP16) { DepthwiseConvParam& dw_param = dw_pe_.param(); memcpy(dw_param.quantizedFilter()->mutableData(), @@ -194,7 +183,6 @@ class ScalePE : public PE { dw_param.quantizedFilter()->scale()[1] = param_.scale->scale()[1]; dw_param.quantizedFilter()->flush(); - // apply(); } param_.input->syncToDevice(); return dw_pe_.dispatch(); diff --git a/lite/backends/fpga/KD/tensor.hpp b/lite/backends/fpga/KD/tensor.hpp old mode 100755 new mode 100644 index 047498eed009dded5ce398ddabc2079b62d937df..f1b07d02622fad32e99205667424a4cb3c9fb46d --- a/lite/backends/fpga/KD/tensor.hpp +++ b/lite/backends/fpga/KD/tensor.hpp @@ -348,19 +348,9 @@ class Tensor { if (placeHolder_ == nullptr) { return; } - std::cout << scale()[0] << " , " << scale()[1] << std::endl; } - void printScale(std::string type) { - std::cout << type << " : " - << std::to_string(shape_->num()) + "_" + - std::to_string(shape_->channel()) + "_" + - std::to_string(shape_->height()) + "_" + - std::to_string(shape_->width()) - << std::endl; - std::cout << type << " \n"; - printScale(); - } + void printScale(std::string type) { printScale(); } std::string dimsFileName() { return std::to_string(shape_->num()) + "_" + @@ -388,12 +378,10 @@ class Tensor { static int counter = 0; std::string npath = std::to_string(counter) + "_" + path; counter++; - std::cout << "======== saving file:" << npath << " ============\n"; save_file_with_name(npath); } void save_file_with_name(std::string path) { - // return; invalidate(); std::ofstream ofs; ofs.open(path); diff --git a/lite/backends/fpga/lite_tensor.h b/lite/backends/fpga/lite_tensor.h index ccf3628ecf16c91b722380ad6bfd11b8e89b1879..311fc8a98400e5a6916ba1b9c8de1e6e0bcec4c0 100644 --- a/lite/backends/fpga/lite_tensor.h +++ b/lite/backends/fpga/lite_tensor.h @@ -165,9 +165,6 @@ class TensorLite { TargetType target() const { return target_; } - // template - // TensorLite Slice(int64_t begin, int64_t end) const; - zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; } friend std::ostream &operator<<(std::ostream &os, const TensorLite &tensor) { @@ -257,8 +254,6 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { int64_t base = numel() / dims_[0]; TensorLite dst; - // dst.buffer_ = buffer_; - // dst.zynq_tensor_ = zynq_tensor_; dst.target_ = target_; auto dst_dims = dims_; dst_dims[0] = end - begin; @@ -271,7 +266,6 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { dst_dims.production() * sizeof(T)); dst.ZynqTensor()->saveToFile("_slice", true); - // dst.offset_ = offset_ + static_cast(begin * base) * sizeof(T); return dst; } }