提交 2bc665fe 编写于 作者: M MyPandaShaoxiang

fix:delete useless code

test=develop
上级 2e2c9d4b
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
// #include "lite/backends/fpga/lite_tensor.h"
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
namespace paddle { namespace paddle {
...@@ -33,9 +32,7 @@ class Debugger { ...@@ -33,9 +32,7 @@ class Debugger {
} }
void registerOutput(std::string op_type, zynqmp::Tensor* tensor) { void registerOutput(std::string op_type, zynqmp::Tensor* tensor) {
// tensor->printScale(); if (op_type != "conv") { // NOLINT
if (op_type != "conv") {
// tensor->saveToFile(op_type, true);
} }
} }
...@@ -60,7 +57,6 @@ inline void chw_to_hwc(Tensor* t, float* dst) { ...@@ -60,7 +57,6 @@ inline void chw_to_hwc(Tensor* t, float* dst) {
if (t->dims().size() > 3) { if (t->dims().size() > 3) {
width = t->dims()[3]; width = t->dims()[3];
} }
// int width = t->dims()[3];
const float* chw_data = t->data<float>(); const float* chw_data = t->data<float>();
float* hwc_data = dst; float* hwc_data = dst;
...@@ -92,11 +88,9 @@ inline void read_from_file(lite::Tensor* t, const std::string& path) { ...@@ -92,11 +88,9 @@ inline void read_from_file(lite::Tensor* t, const std::string& path) {
file_stream >> value; file_stream >> value;
data[i] = value; data[i] = value;
} }
// flush();
} }
inline void save_float(float* data, const std::string& name, int len) { inline void save_float(float* data, const std::string& name, int len) {
// return;
static int counter = 0; static int counter = 0;
std::string old_string = std::to_string(counter); std::string old_string = std::to_string(counter);
std::string new_string = std::string new_string =
...@@ -105,12 +99,8 @@ inline void save_float(float* data, const std::string& name, int len) { ...@@ -105,12 +99,8 @@ inline void save_float(float* data, const std::string& name, int len) {
std::string file = "arm_" + new_string + name; std::string file = "arm_" + new_string + name;
counter++; counter++;
std::cout
<< "-------------------------- saving file: --------------------------"
<< file << std::endl;
std::ofstream ofs; std::ofstream ofs;
ofs.open(file); ofs.open(file);
// float* data = dst;
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
float value = data[i]; float value = data[i];
ofs << value << std::endl; ofs << value << std::endl;
...@@ -135,7 +125,6 @@ inline void save_tensor(lite::Tensor* t, ...@@ -135,7 +125,6 @@ inline void save_tensor(lite::Tensor* t,
inline void save_tensor(const lite::Tensor* t, inline void save_tensor(const lite::Tensor* t,
const std::string& name, const std::string& name,
bool convert = true) { bool convert = true) {
// return;
float* data = const_cast<float*>(t->data<float>()); float* data = const_cast<float*>(t->data<float>());
float* dst = new float[t->numel()]; float* dst = new float[t->numel()];
if (convert) { if (convert) {
......
...@@ -28,26 +28,26 @@ static int FILTER_SIZE = 2048; ...@@ -28,26 +28,26 @@ static int FILTER_SIZE = 2048;
static int COLUMN = 4; static int COLUMN = 4;
void saveToFile(std::string name, void* data_in, int size) { void saveToFile(std::string name, void* data_in, int size) {
// std::ofstream ofs; std::ofstream ofs;
// ofs.open(name); ofs.open(name);
// int8_t* data = (int8_t*)data_in; int8_t* data = static_cast<int8_t*> data_in;
// for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
// float value = data[i]; float value = data[i];
// ofs << value << std::endl; ofs << value << std::endl;
// } }
// ofs.close(); ofs.close();
} }
void saveFloatToFile(std::string name, float* data_in, int size) { void saveFloatToFile(std::string name, float* data_in, int size) {
// std::ofstream ofs; std::ofstream ofs;
// ofs.open(name); ofs.open(name);
// for (int i = 0; i < size; i++) { for (int i = 0; i < size; i++) {
// float value = data_in[i]; float value = data_in[i];
// ofs << value << std::endl; ofs << value << std::endl;
// } }
// ofs.close(); ofs.close();
} }
void set_filter_capacity(uint32_t cap) { FILTER_SIZE = cap; } void set_filter_capacity(uint32_t cap) { FILTER_SIZE = cap; }
...@@ -58,7 +58,6 @@ void set_colunm(uint32_t column) { COLUMN = column; } ...@@ -58,7 +58,6 @@ void set_colunm(uint32_t column) { COLUMN = column; }
int get_filter_num_alignment() { return COLUMN * 4; } int get_filter_num_alignment() { return COLUMN * 4; }
int calc_division_capacity(int chw) { int calc_division_capacity(int chw) {
// int n = FILTER_SIZE / ((chw + 15) / 16) * 32;
int filter_num_alignment = get_filter_num_alignment(); int filter_num_alignment = get_filter_num_alignment();
int n = FILTER_SIZE / ((chw + 15) / 16) * filter_num_alignment; int n = FILTER_SIZE / ((chw + 15) / 16) * filter_num_alignment;
return n < FILTER_SIZE ? n : FILTER_SIZE; return n < FILTER_SIZE ? n : FILTER_SIZE;
...@@ -222,14 +221,11 @@ int8_t* format_filter(float* data_in, ...@@ -222,14 +221,11 @@ int8_t* format_filter(float* data_in,
align_to_x(num_per_div_before_alignment, filter_num_alignment); align_to_x(num_per_div_before_alignment, filter_num_alignment);
int div_num = int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment; (num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
// int num_after_alignment = num_per_div_after_alignment * div_num;
int residual = num % num_per_div_before_alignment; int residual = num % num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment * int num_after_alignment = num_per_div_after_alignment *
((residual == 0) ? div_num : (div_num - 1)) + ((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, filter_num_alignment); align_to_x(residual, filter_num_alignment);
// saveFloatToFile("quantize_before", data_in, data_size);
int8_t* quantized_data = int8_t* quantized_data =
reinterpret_cast<int8_t*>(fpga_malloc(data_size * sizeof(int8_t))); reinterpret_cast<int8_t*>(fpga_malloc(data_size * sizeof(int8_t)));
...@@ -237,21 +233,15 @@ int8_t* format_filter(float* data_in, ...@@ -237,21 +233,15 @@ int8_t* format_filter(float* data_in,
float* filter_start = data_in + n * chw; float* filter_start = data_in + n * chw;
float f_max = find_max(filter_start, chw); float f_max = find_max(filter_start, chw);
int8_t* quantized_start = quantized_data + n * chw; int8_t* quantized_start = quantized_data + n * chw;
// quantize(filter_start, quantized_start, chw, f_max);
quantize(filter_start, quantized_start, chw, max); quantize(filter_start, quantized_start, chw, max);
// filter_max.push_back(f_max);
filter_max.push_back(max); filter_max.push_back(max);
} }
// saveToFile("chw.txt", quantized_data, data_size);
int8_t* hwc_data = int8_t* hwc_data =
reinterpret_cast<int8_t*>(fpga_malloc(data_size * sizeof(int8_t))); reinterpret_cast<int8_t*>(fpga_malloc(data_size * sizeof(int8_t)));
convert_to_hwc(quantized_data, hwc_data, num, channel, height, width); convert_to_hwc(quantized_data, hwc_data, num, channel, height, width);
fpga_free(quantized_data); fpga_free(quantized_data);
// saveToFile("hwc.txt", hwc_data, data_size);
int8_t* temp_data = hwc_data; // NOLINT int8_t* temp_data = hwc_data; // NOLINT
int chw_aligned = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT); int chw_aligned = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
if (should_align_chw(chw)) { if (should_align_chw(chw)) {
...@@ -259,7 +249,6 @@ int8_t* format_filter(float* data_in, ...@@ -259,7 +249,6 @@ int8_t* format_filter(float* data_in,
fpga_malloc(num * chw_aligned * sizeof(int8_t))); fpga_malloc(num * chw_aligned * sizeof(int8_t)));
align_chw(hwc_data, hwc_aligned_data, num, chw); align_chw(hwc_data, hwc_aligned_data, num, chw);
// saveToFile("align_el.txt", hwc_aligned_data, data_size * 2);
temp_data = hwc_aligned_data; temp_data = hwc_aligned_data;
fpga_free(hwc_data); fpga_free(hwc_data);
} }
...@@ -267,9 +256,6 @@ int8_t* format_filter(float* data_in, ...@@ -267,9 +256,6 @@ int8_t* format_filter(float* data_in,
int filter_num_alignment = get_filter_num_alignment(); int filter_num_alignment = get_filter_num_alignment();
int num_per_div_after_alignment = int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, filter_num_alignment); align_to_x(num_per_div_before_alignment, filter_num_alignment);
// int div_num =
// (num + num_per_div_before_alignment - 1) /
// num_per_div_before_alignment;
int num_element = div_num * num_per_div_after_alignment * chw_aligned; int num_element = div_num * num_per_div_after_alignment * chw_aligned;
int8_t* num_aligned_data = int8_t* num_aligned_data =
reinterpret_cast<int8_t*>(fpga_malloc(num_element * sizeof(int8_t))); reinterpret_cast<int8_t*>(fpga_malloc(num_element * sizeof(int8_t)));
...@@ -279,19 +265,16 @@ int8_t* format_filter(float* data_in, ...@@ -279,19 +265,16 @@ int8_t* format_filter(float* data_in,
num, num,
chw_aligned); chw_aligned);
// saveToFile("align_num.txt", num_aligned_data, data_size * 8);
fpga_free(temp_data); fpga_free(temp_data);
temp_data = num_aligned_data; temp_data = num_aligned_data;
} }
int8_t* aligned_data = int8_t* aligned_data =
reinterpret_cast<int8_t*>(fpga_malloc(num_after_alignment * chw_aligned)); reinterpret_cast<int8_t*>(fpga_malloc(num_after_alignment * chw_aligned));
reorder(temp_data, aligned_data, num_after_alignment, chw); reorder(temp_data, aligned_data, num_after_alignment, chw);
// saveToFile("reorder.txt", aligned_data, data_size * 8);
fpga_free(temp_data); fpga_free(temp_data);
int8_t* interleaved_data = int8_t* interleaved_data =
reinterpret_cast<int8_t*>(fpga_malloc(num_after_alignment * chw_aligned)); reinterpret_cast<int8_t*>(fpga_malloc(num_after_alignment * chw_aligned));
interleave(aligned_data, interleaved_data, num_after_alignment, chw); interleave(aligned_data, interleaved_data, num_after_alignment, chw);
// saveToFile("interleave.txt", interleaved_data, data_size * 8);
fpga_free(aligned_data); fpga_free(aligned_data);
fpga_flush(interleaved_data, fpga_flush(interleaved_data,
align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment * align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment *
......
...@@ -45,8 +45,6 @@ int8_t* format_filter(float* data_in, ...@@ -45,8 +45,6 @@ int8_t* format_filter(float* data_in,
void convert_to_hwn(int16_t** data_in, int num, int height, int width); void convert_to_hwn(int16_t** data_in, int num, int height, int width);
size_t align_element_n(int16_t** data_in, int num, int height, int width); size_t align_element_n(int16_t** data_in, int num, int height, int width);
// void quantize_to_fp16(float** data_in, int num, int height, int width,
// float* scale_ptr);
size_t format_dwconv_filter( size_t format_dwconv_filter(
float** data_in, int num, int height, int width, float* scale_ptr); float** data_in, int num, int height, int width, float* scale_ptr);
......
...@@ -62,7 +62,6 @@ void reset_device() { ...@@ -62,7 +62,6 @@ void reset_device() {
// memory management; // memory management;
void *fpga_malloc(size_t size) { void *fpga_malloc(size_t size) {
#ifdef ENABLE_DEBUG #ifdef ENABLE_DEBUG
// std::cout << "fpga_malloc:" << size << std::endl;
#endif #endif
#ifdef PADDLE_OS_LINUX #ifdef PADDLE_OS_LINUX
void *ptr = reinterpret_cast<void *>( void *ptr = reinterpret_cast<void *>(
...@@ -250,10 +249,6 @@ int config_activation(const struct ActiveParamterArgs &args) { ...@@ -250,10 +249,6 @@ int config_activation(const struct ActiveParamterArgs &args) {
return do_ioctl(IOCTL_CONFIG_ACTIVATION_PARAMETER, &args); return do_ioctl(IOCTL_CONFIG_ACTIVATION_PARAMETER, &args);
} }
// int config_power(const struct PowerArgs& args) {
// return do_ioctl(IOCTL_CONFIG_POWER, &args);
// }
int config_inplace(const struct InplaceArgs &args) { int config_inplace(const struct InplaceArgs &args) {
return do_ioctl(IOCTL_CONFIG_INPLACE, &args); return do_ioctl(IOCTL_CONFIG_INPLACE, &args);
} }
......
...@@ -14,8 +14,8 @@ limitations under the License. */ ...@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#ifndef PADDLE_MOBILE_SRC_FPGA_KD_ZYNQMP_API_H #ifndef PADDLE_LITE_SRC_FPGA_KD_ZYNQMP_API_H
#define PADDLE_MOBILE_SRC_FPGA_KD_ZYNQMP_API_H #define PADDLE_LITE_SRC_FPGA_KD_ZYNQMP_API_H
#include <stdint.h> #include <stdint.h>
#include <cstddef> #include <cstddef>
...@@ -371,4 +371,4 @@ float fp16_2_fp32(int16_t fp16_num); ...@@ -371,4 +371,4 @@ float fp16_2_fp32(int16_t fp16_num);
} // namespace zynqmp } // namespace zynqmp
} // namespace paddle } // namespace paddle
#endif // PADDLE_MOBILE_SRC_FPGA_KD_ZYNQMP_API_H #endif // PADDLE_LITE_SRC_FPGA_KD_ZYNQMP_API_H
...@@ -59,11 +59,9 @@ class ConvPE : public PE { ...@@ -59,11 +59,9 @@ class ConvPE : public PE {
} }
if (param_.filter->shape().width() == 1 && if (param_.filter->shape().width() == 1 &&
param_.filter->shape().height() == 1) { param_.filter->shape().height() == 1) { // NOLINT
// use_cpu_ = true;
} }
if (!use_cpu_) { if (!use_cpu_) { // NOLINT
// param_.filter->releaseData();
} }
} }
...@@ -94,7 +92,6 @@ class ConvPE : public PE { ...@@ -94,7 +92,6 @@ class ConvPE : public PE {
int kernel_width = param_.filter->shape().width(); int kernel_width = param_.filter->shape().width();
int kernel_step_h = param_.strides[0]; int kernel_step_h = param_.strides[0];
int kernel_step_w = param_.strides[1]; int kernel_step_w = param_.strides[1];
// int out_channel = param_.strides[1];
int pooled_height_ = output->shape().height(); int pooled_height_ = output->shape().height();
int pooled_width_ = out_width; int pooled_width_ = out_width;
int filter_chw = image_channels * kernel_height * kernel_width; int filter_chw = image_channels * kernel_height * kernel_width;
...@@ -205,7 +202,6 @@ class ConvPE : public PE { ...@@ -205,7 +202,6 @@ class ConvPE : public PE {
} }
out_index = h * out_width * out_channel + w * out_channel + i; out_index = h * out_width * out_channel + w * out_channel + i;
out[out_index] = sum; out[out_index] = sum;
// out_index++;
} }
} }
} }
......
...@@ -172,8 +172,6 @@ inline void format_scale_bias(Tensor* scale, ...@@ -172,8 +172,6 @@ inline void format_scale_bias(Tensor* scale,
} }
} }
// int element_num_per_div = get_filter_num_per_div(filter, group);
// int scale_bias_len = align_to_x(channel / group, 8) * group;
bias_scale::format_bias_scale_array( bias_scale::format_bias_scale_array(
&temp_data, scale_bias_len / group, scale_bias_len); &temp_data, scale_bias_len / group, scale_bias_len);
memcpy(bs_data, temp_data, 2 * scale_bias_len * sizeof(float)); memcpy(bs_data, temp_data, 2 * scale_bias_len * sizeof(float));
...@@ -268,8 +266,6 @@ inline void split_filter_num(const ConvParam& c_param) { ...@@ -268,8 +266,6 @@ inline void split_filter_num(const ConvParam& c_param) {
int filter_num_alignment = filter::get_filter_num_alignment(); int filter_num_alignment = filter::get_filter_num_alignment();
int aligned_num = int aligned_num =
align_to_x(num / param.groups, filter_num_alignment) * param.groups; align_to_x(num / param.groups, filter_num_alignment) * param.groups;
// int aligned_num = align_to_x(num / param.groups ,FILTER_NUM_ALIGNMENT) *
// param.groups;
split_num = filter::calc_split_num(aligned_num, div_capacity); split_num = filter::calc_split_num(aligned_num, div_capacity);
Shape& out_shape = out->shape(); Shape& out_shape = out->shape();
...@@ -368,7 +364,6 @@ inline void split_filter_num(const ConvParam& c_param) { ...@@ -368,7 +364,6 @@ inline void split_filter_num(const ConvParam& c_param) {
args.image.height = input->shape().height(); args.image.height = input->shape().height();
args.image.pad_width = param.paddings[1]; args.image.pad_width = param.paddings[1];
args.image.pad_height = param.paddings[0]; args.image.pad_height = param.paddings[0];
// dilations[0] = dilations[1] ;
args.dilation = param.dilations[0]; args.dilation = param.dilations[0];
args.output.address = out_address; args.output.address = out_address;
...@@ -424,7 +419,6 @@ inline void split_channel(const ConvParam& c_param) { ...@@ -424,7 +419,6 @@ inline void split_channel(const ConvParam& c_param) {
} }
scale.flush(); scale.flush();
bias.flush(); bias.flush();
// Shape sb_shape(N, {2 * channel});
format_scale_bias(&scale, format_scale_bias(&scale,
&bias, &bias,
&conv_param->filter, &conv_param->filter,
...@@ -452,7 +446,6 @@ inline void split_channel(const ConvParam& c_param) { ...@@ -452,7 +446,6 @@ inline void split_channel(const ConvParam& c_param) {
args.image.height = conv_param->input.shape().height(); args.image.height = conv_param->input.shape().height();
args.image.pad_width = param.paddings[1]; args.image.pad_width = param.paddings[1];
args.image.pad_height = param.paddings[0]; args.image.pad_height = param.paddings[0];
// dilations[0] = dilations[1]
args.dilation = param.dilations[0]; args.dilation = param.dilations[0];
args.output.address = conv_param->output.mutableData<void>(); args.output.address = conv_param->output.mutableData<void>();
args.output.scale_address = conv_param->output.scale(); args.output.scale_address = conv_param->output.scale();
...@@ -483,7 +476,6 @@ inline bool compute_conv(const ConvParam& c_conv_params) { ...@@ -483,7 +476,6 @@ inline bool compute_conv(const ConvParam& c_conv_params) {
} }
size_t size = params.size(); size_t size = params.size();
if (ret == 0 && size > 1) { if (ret == 0 && size > 1) {
// Tensor* output = conv_params.output;
Tensor& img = params[0]->output; Tensor& img = params[0]->output;
for (int i = 0; i < 1; i++) { for (int i = 0; i < 1; i++) {
for (int i = 0; i < img.shape().numel(); i++) { for (int i = 0; i < img.shape().numel(); i++) {
......
...@@ -62,7 +62,6 @@ class DepthwiseConvPE : public PE { ...@@ -62,7 +62,6 @@ class DepthwiseConvPE : public PE {
float16* scale_data = param_.scale()->data<float16>(); float16* scale_data = param_.scale()->data<float16>();
float16* filter_data = param.quantizedFilter()->mutableData<float16>( float16* filter_data = param.quantizedFilter()->mutableData<float16>(
FP16, param.filter->shape()); FP16, param.filter->shape());
// memcpy(filter_data, scale_data, channel * sizeof(float16));
memcpy(filter_data, memcpy(filter_data,
scale_data, scale_data,
param.filter->shape().numel() * sizeof(float16)); param.filter->shape().numel() * sizeof(float16));
......
...@@ -33,7 +33,6 @@ class ElementwiseMulPE : public PE { ...@@ -33,7 +33,6 @@ class ElementwiseMulPE : public PE {
Tensor* output = param_.output; Tensor* output = param_.output;
int wc_aligned = align_to_x(param_.inputs[0]->shape().numel(), 32); int wc_aligned = align_to_x(param_.inputs[0]->shape().numel(), 32);
// int wc_aligned = / 32 * 32;
Shape s(N, {wc_aligned}); Shape s(N, {wc_aligned});
float16* bias_data = bias_tensor.mutableData<float16>(FP16, s); float16* bias_data = bias_tensor.mutableData<float16>(FP16, s);
......
...@@ -38,25 +38,17 @@ struct GRUTensors { ...@@ -38,25 +38,17 @@ struct GRUTensors {
class GRUPE : public PE { class GRUPE : public PE {
public: public:
bool init() { bool init() { return true; }
// Tensor* output = param_.output;
// output->setAligned(true);
// output->setDataLocation(Device);
return true;
}
void apply() { void apply() {
auto hidden = param_.hidden; auto hidden = param_.hidden;
// auto hidden_dims = hidden->dims();
int frame_size = hidden->shape().channel(); int frame_size = hidden->shape().channel();
zynqmp::Shape hidden_shape{zynqmp::NCHW, {1, frame_size, 1, 1}}; zynqmp::Shape hidden_shape{zynqmp::NCHW, {1, frame_size, 1, 1}};
float16* prev_hidden_data = float16* prev_hidden_data =
prev_hidden_.mutableData<float16>(zynqmp::FP16, hidden_shape); prev_hidden_.mutableData<float16>(zynqmp::FP16, hidden_shape);
// set previous hidden data to 0;
memset(prev_hidden_data, 0, hidden_shape.numel() * sizeof(float16)); memset(prev_hidden_data, 0, hidden_shape.numel() * sizeof(float16));
// copy 2/3 weight from param.weight;
zynqmp::Shape weight_shape{zynqmp::NC, {frame_size, frame_size * 2}}; zynqmp::Shape weight_shape{zynqmp::NC, {frame_size, frame_size * 2}};
float* weight_data = weight_.mutableData<float>(zynqmp::FP32, weight_shape); float* weight_data = weight_.mutableData<float>(zynqmp::FP32, weight_shape);
memset(weight_data, 0, weight_shape.numel() * sizeof(float)); memset(weight_data, 0, weight_shape.numel() * sizeof(float));
...@@ -77,52 +69,15 @@ class GRUPE : public PE { ...@@ -77,52 +69,15 @@ class GRUPE : public PE {
pre_out_pe_.init(); pre_out_pe_.init();
pre_out_pe_.apply(); pre_out_pe_.apply();
// // ============= C
// ElementwiseAddParam& bias_add_param = bias_ew_pe_.param();
// bias_add_param.inputs = {&pre_output_, &pre_input_};
// bias_add_param.output = &pre_input_;
// bias_ew_pe_.init();
// bias_ew_pe_.apply();
// // ====================
// Shape state_weight_shape(NC,{frame_size, frame_size});
// float* state_weight_data = state_weight_.mutableData<float>(FP32,
// state_weight_shape);
// memcpy(state_weight_data, weight_data + 2 * frame_size * frame_size,
// state_weight_shape.numel() * sizeof(float));
// FullyConnectedParam& reset_out_param = reset_out_pe_.param();
// reset_out_param.input = &prev_hidden;
// reset_out_param.output = &gate_ping;
// reset_out_param.filter = &state_weight_;
// // ============== unit reset;
// update_gate_.mutableData<void>(FP16, pre_input_shape);
// InputParam& relu_param = update_relu_pe_.param();
// relu_param.input = &tempTensor;
// relu_param.output = &update_gate_;
// update_relu_pe_.init();
// update_relu_pe_.apply();
reset_gate_.mutableData<void>(FP16, hidden_shape); reset_gate_.mutableData<void>(FP16, hidden_shape);
prev_hidden_.mutableData<void>(FP16, hidden_shape); prev_hidden_.mutableData<void>(FP16, hidden_shape);
reset_hidden_.mutableData<void>(FP16, hidden_shape); reset_hidden_.mutableData<void>(FP16, hidden_shape);
// InputParam& reset_param = reset_relu_pe_.param();
// reset_param.input = &tempTensor;
// reset_param.output = &reset_gate_;
// reset_relu_pe_.init();
// reset_relu_pe_.apply();
// float16* prev_data = prev_.mutableData<float16>(FP16, pre_input_shape);
// memset(prev_data, 0, (pre_input_shape.numel() + 32) * sizeof(float16));
// // TODO
// reset_hidden_prev_.mutableData<float16>(FP16, pre_input_shape);
ElementwiseMulParam& mul_param = mul_pe_.param(); ElementwiseMulParam& mul_param = mul_pe_.param();
mul_param.inputs = {&reset_gate_, &prev_hidden_}; mul_param.inputs = {&reset_gate_, &prev_hidden_};
mul_param.output = &reset_hidden_; mul_param.output = &reset_hidden_;
mul_pe_.init(); mul_pe_.init();
mul_pe_.apply(); mul_pe_.apply();
// ==============
} }
bool dispatch() { return true; } bool dispatch() { return true; }
...@@ -136,23 +91,15 @@ class GRUPE : public PE { ...@@ -136,23 +91,15 @@ class GRUPE : public PE {
int stride_hidden_prev = frame_size; int stride_hidden_prev = frame_size;
int stride_hidden = frame_size; int stride_hidden = frame_size;
// Tensor* gate = value.gate;
// value.gate->saveToFile("value_input.txt");
float* update_gate_data = gate_ping_.data<float>(); float* update_gate_data = gate_ping_.data<float>();
float* reset_gate_data = update_gate_data + frame_size; float* reset_gate_data = update_gate_data + frame_size;
for (int b = 0; b < batch_size; b++) { for (int b = 0; b < batch_size; b++) {
// memcpy(tempTensor.data<void>(), reset_gate_data, gate->shape().numel()
// * sizeof(float));
// tempTensor.flush();
Tensor tmp; Tensor tmp;
Shape s(NC, {1, frame_size}); Shape s(NC, {1, frame_size});
float* tmp_data = tmp.mutableData<float>(FP32, s); float* tmp_data = tmp.mutableData<float>(FP32, s);
for (int i = 0; i < frame_size; i++) { for (int i = 0; i < frame_size; i++) {
// f(x) = x / (1 + abs(x))?
update_gate_data[i] = update_gate_data[i] =
lite::arm::math::active_f32<lite_api::ActivationType::kSigmoid>( lite::arm::math::active_f32<lite_api::ActivationType::kSigmoid>(
update_gate_data[i]); update_gate_data[i]);
...@@ -164,17 +111,13 @@ class GRUPE : public PE { ...@@ -164,17 +111,13 @@ class GRUPE : public PE {
tmp.flush(); tmp.flush();
reset_gate_.copyFrom(&tmp); reset_gate_.copyFrom(&tmp);
// reset_gate_.copyFrom(&tempTensor);
Tensor* hidden_prev = value.pre_output; Tensor* hidden_prev = value.pre_output;
if (hidden_prev) { if (hidden_prev) {
// memcpy(prev_data, )
// TODO(chonwhite): change to pre_out; // TODO(chonwhite): change to pre_out;
prev_hidden_.copyFrom(value.pre_output); prev_hidden_.copyFrom(value.pre_output);
prev_hidden_.saveToFile("prev_.txt"); prev_hidden_.saveToFile("prev_.txt");
} }
// // 4.0 reset_date * hidden_prev;
// // reset_hidden_prev[i] = reset_gate[i] * prev;
mul_pe_.dispatch(); mul_pe_.dispatch();
reset_hidden_.saveToFile("reset_hidden_.txt"); reset_hidden_.saveToFile("reset_hidden_.txt");
update_gate_data += stride_update; update_gate_data += stride_update;
...@@ -188,73 +131,13 @@ class GRUPE : public PE { ...@@ -188,73 +131,13 @@ class GRUPE : public PE {
bool origin_mode, bool origin_mode,
GRUTensors& value, // NOLINT GRUTensors& value, // NOLINT
int frame_size, int frame_size,
int batch_size) { int batch_size) {}
// int stride_update = 3 * frame_size;
// int stride_cell_state = 3 * frame_size;
// int stride_hidden_prev = frame_size;
// int stride_hidden = frame_size;
// Tensor* hidden = value.output_value;
// float* hidden_prev = nullptr;
// if (hidden) {
// hidden_prev = hidden->data<float>();
// }
// float* cell_state = value.gate->data<float>() + 2 * frame_size;
// float* updata_gate = value.gate->data<float>();
// // float* reset_gate_data = update_gate_data + frame_size;
// float prev = 0.0f;
// for (int b = 0; b < batch_size; ++b) {
// if (origin_mode) {
// // for (int i = 0; i < frame_size; i++) {
// // float prev = 0;
// // if (hidden_prev) {
// // prev = hidden_prev[i];
// // }
// // cell_state[i] =
// lite::arm::math::active_f32<kSigmoid>(cell_state[i]);
// // hidden[i] =
// // cell_state[i] * (1.f - updata_gate[i]) + updata_gate[i] *
// prev;
// // }
// } else {
// for (int i = 0; i < frame_size; ++i) {
// cell_state[i] =
// lite::arm::math::active_f32<lite_api::ActivationType::kRelu>(cell_state[i]);
// if (hidden_prev) {
// prev = hidden_prev[i];
// }
// float hidden_value =
// prev * (1.f - updata_gate[i]) + updata_gate[i] * cell_state[i];
// hidden_prev[i] = hidden_value;
// std::cout << "hidden_value::" << hidden_value << std::endl;
// }
// }
// updata_gate += stride_update;
// cell_state += stride_cell_state;
// hidden_prev += frame_size;
// }
}
void copy_input(GRUTensors& value) { // NOLINT void copy_input(GRUTensors& value) { // NOLINT
float max = find_max(*(value.gate)); float max = find_max(*(value.gate));
gate_ping_.mutableData<void>(FP32, value.gate->shape()); gate_ping_.mutableData<void>(FP32, value.gate->shape());
gate_ping_.copyFrom(value.gate); gate_ping_.copyFrom(value.gate);
// update input pointer? // update input pointer?
// gate_.readFromFile("input/in.txt");
// // pre_input_.saveToFile("pppp_in.txt");
// gate_.scale()[0] = max / 127;
// gate_.scale()[1] = 127 / max;
// gate_.printScale("pre_input_");
// gate_.saveToFile("pre_input_.txt");
// pre_out_pe_.dispatch();
// pre_output_.saveToFile("pp_out.txt");
} }
void GRUCOmpute(GRUTensors& value, // NOLINT void GRUCOmpute(GRUTensors& value, // NOLINT
...@@ -272,25 +155,10 @@ class GRUPE : public PE { ...@@ -272,25 +155,10 @@ class GRUPE : public PE {
} }
gru_unit_reset_act(active_gate, value, frame_size, batch_size); gru_unit_reset_act(active_gate, value, frame_size, batch_size);
// if (value.pre_output) {
// // state weight;
// reset_out_pe_.dispatch();
// }
// gru_unit_out_act(active_node, origin_mode, value, frame_size,
// batch_size);
} }
GRUParam& param() { return param_; } GRUParam& param() { return param_; }
// Tensor* preOutput() {
// return &pre_output_;
// }
// Tensor* gate() {
// return &gate_;
// }
Tensor* updateGate() { return &update_gate_; } Tensor* updateGate() { return &update_gate_; }
Tensor* resetGate() { return &reset_gate_; } Tensor* resetGate() { return &reset_gate_; }
...@@ -302,7 +170,6 @@ class GRUPE : public PE { ...@@ -302,7 +170,6 @@ class GRUPE : public PE {
zynqmp::Tensor bias_; zynqmp::Tensor bias_;
zynqmp::Tensor weight_; zynqmp::Tensor weight_;
zynqmp::Tensor state_weight_; zynqmp::Tensor state_weight_;
// =================================
zynqmp::Tensor update_gate_; zynqmp::Tensor update_gate_;
zynqmp::Tensor reset_gate_; zynqmp::Tensor reset_gate_;
zynqmp::Tensor cell_state_; zynqmp::Tensor cell_state_;
...@@ -310,7 +177,6 @@ class GRUPE : public PE { ...@@ -310,7 +177,6 @@ class GRUPE : public PE {
zynqmp::Tensor reset_hidden_; zynqmp::Tensor reset_hidden_;
Tensor tempTensor; Tensor tempTensor;
// =================================
ReluPE update_relu_pe_; ReluPE update_relu_pe_;
ReluPE reset_relu_pe_; ReluPE reset_relu_pe_;
......
...@@ -67,9 +67,6 @@ class PoolingPE : public PE { ...@@ -67,9 +67,6 @@ class PoolingPE : public PE {
use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 && use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 &&
(k_width > 7 || k_height > 7); (k_width > 7 || k_height > 7);
// use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1
// &&
// (k_width > 255 || k_height > 255);
use_cpu_ = param_.type == AVERAGE; use_cpu_ = param_.type == AVERAGE;
} }
...@@ -79,7 +76,6 @@ class PoolingPE : public PE { ...@@ -79,7 +76,6 @@ class PoolingPE : public PE {
input->syncToCPU(); input->syncToCPU();
Tensor float_input; Tensor float_input;
// Tensor float_output;
float* image_addr = float_input.mutableData<float>(FP32, input->shape()); float* image_addr = float_input.mutableData<float>(FP32, input->shape());
float_input.copyFrom(input); float_input.copyFrom(input);
float16* data_out = output->data<float16>(); float16* data_out = output->data<float16>();
...@@ -192,9 +188,7 @@ class PoolingPE : public PE { ...@@ -192,9 +188,7 @@ class PoolingPE : public PE {
bool dispatch() { bool dispatch() {
if (use_cpu_) { if (use_cpu_) {
// cpu_compute();
compute(); compute();
// exit(-1);
return true; return true;
} }
param_.input->syncToDevice(); param_.input->syncToDevice();
......
...@@ -67,15 +67,12 @@ class ScalePE : public PE { ...@@ -67,15 +67,12 @@ class ScalePE : public PE {
Tensor* scale = dw_param.scale(); Tensor* scale = dw_param.scale();
float16* scale_data = scale->mutableData<float16>(FP16, shape); float16* scale_data = scale->mutableData<float16>(FP16, shape);
// memcpy(scale_data, param_.scale->data<float>(), input->shape().channel()
// * sizeof(float));
Tensor* bias = dw_param.bias(); Tensor* bias = dw_param.bias();
float16* bias_data = bias->mutableData<float16>(FP16, shape); float16* bias_data = bias->mutableData<float16>(FP16, shape);
std::fill_n(bias_data, input->shape().channel(), 0); std::fill_n(bias_data, input->shape().channel(), 0);
if (param_.scale->dataType() == FP32) { if (param_.scale->dataType() == FP32) {
// std::cout << "scale dataType FP32:" << std::endl;
if (param_.bias != nullptr) { if (param_.bias != nullptr) {
float* bias_data_float = param_.bias->data<float>(); float* bias_data_float = param_.bias->data<float>();
for (int i = 0; i < repeat; i++) { for (int i = 0; i < repeat; i++) {
...@@ -127,11 +124,6 @@ class ScalePE : public PE { ...@@ -127,11 +124,6 @@ class ScalePE : public PE {
} }
} }
// if (param_.bias != nullptr) {
// memcpy(bias_data, param_.bias->data<float>(), input->shape().channel()
// * sizeof(float));
// }
dw_param.input = param_.input; dw_param.input = param_.input;
dw_param.output = param_.output; dw_param.output = param_.output;
dw_param.filter = &filter; dw_param.filter = &filter;
...@@ -182,9 +174,6 @@ class ScalePE : public PE { ...@@ -182,9 +174,6 @@ class ScalePE : public PE {
} }
bool dispatch() { bool dispatch() {
// cpu_compute();
// return true;
if (param_.scale->dataType() == FP16) { if (param_.scale->dataType() == FP16) {
DepthwiseConvParam& dw_param = dw_pe_.param(); DepthwiseConvParam& dw_param = dw_pe_.param();
memcpy(dw_param.quantizedFilter()->mutableData<float16>(), memcpy(dw_param.quantizedFilter()->mutableData<float16>(),
...@@ -194,7 +183,6 @@ class ScalePE : public PE { ...@@ -194,7 +183,6 @@ class ScalePE : public PE {
dw_param.quantizedFilter()->scale()[1] = param_.scale->scale()[1]; dw_param.quantizedFilter()->scale()[1] = param_.scale->scale()[1];
dw_param.quantizedFilter()->flush(); dw_param.quantizedFilter()->flush();
// apply();
} }
param_.input->syncToDevice(); param_.input->syncToDevice();
return dw_pe_.dispatch(); return dw_pe_.dispatch();
......
...@@ -348,19 +348,9 @@ class Tensor { ...@@ -348,19 +348,9 @@ class Tensor {
if (placeHolder_ == nullptr) { if (placeHolder_ == nullptr) {
return; return;
} }
std::cout << scale()[0] << " , " << scale()[1] << std::endl;
} }
void printScale(std::string type) { void printScale(std::string type) { printScale(); }
std::cout << type << " : "
<< std::to_string(shape_->num()) + "_" +
std::to_string(shape_->channel()) + "_" +
std::to_string(shape_->height()) + "_" +
std::to_string(shape_->width())
<< std::endl;
std::cout << type << " \n";
printScale();
}
std::string dimsFileName() { std::string dimsFileName() {
return std::to_string(shape_->num()) + "_" + return std::to_string(shape_->num()) + "_" +
...@@ -388,12 +378,10 @@ class Tensor { ...@@ -388,12 +378,10 @@ class Tensor {
static int counter = 0; static int counter = 0;
std::string npath = std::to_string(counter) + "_" + path; std::string npath = std::to_string(counter) + "_" + path;
counter++; counter++;
std::cout << "======== saving file:" << npath << " ============\n";
save_file_with_name(npath); save_file_with_name(npath);
} }
void save_file_with_name(std::string path) { void save_file_with_name(std::string path) {
// return;
invalidate(); invalidate();
std::ofstream ofs; std::ofstream ofs;
ofs.open(path); ofs.open(path);
......
...@@ -165,9 +165,6 @@ class TensorLite { ...@@ -165,9 +165,6 @@ class TensorLite {
TargetType target() const { return target_; } TargetType target() const { return target_; }
// template <typename T>
// TensorLite Slice(int64_t begin, int64_t end) const;
zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; } zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; }
friend std::ostream &operator<<(std::ostream &os, const TensorLite &tensor) { friend std::ostream &operator<<(std::ostream &os, const TensorLite &tensor) {
...@@ -257,8 +254,6 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { ...@@ -257,8 +254,6 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
int64_t base = numel() / dims_[0]; int64_t base = numel() / dims_[0];
TensorLite dst; TensorLite dst;
// dst.buffer_ = buffer_;
// dst.zynq_tensor_ = zynq_tensor_;
dst.target_ = target_; dst.target_ = target_;
auto dst_dims = dims_; auto dst_dims = dims_;
dst_dims[0] = end - begin; dst_dims[0] = end - begin;
...@@ -271,7 +266,6 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { ...@@ -271,7 +266,6 @@ TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
dst_dims.production() * sizeof(T)); dst_dims.production() * sizeof(T));
dst.ZynqTensor()->saveToFile("_slice", true); dst.ZynqTensor()->saveToFile("_slice", true);
// dst.offset_ = offset_ + static_cast<size_t>(begin * base) * sizeof(T);
return dst; return dst;
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册