提交 8e699af1 编写于 作者: C chonwhite

ReNext Pass

上级 337eb1bf
...@@ -48,26 +48,33 @@ class Debugger { ...@@ -48,26 +48,33 @@ class Debugger {
void tock(std::string key) {} void tock(std::string key) {}
void setEnable(bool en) { enabled_ = en; }
private: private:
bool enabled_ = false;
std::unordered_map<std::string, bool> op_config; std::unordered_map<std::string, bool> op_config;
std::unordered_map<std::string, float> tick_tock_map; std::unordered_map<std::string, float> tick_tock_map;
Debugger() { Debugger() {
op_config["concat"] = true; // op_config["concat"] = true;
op_config["pooling"] = true; // op_config["pooling"] = true;
op_config["conv"] = true; // op_config["conv"] = true;
op_config["dwconv"] = true; // op_config["dropout"] = true;
op_config["ew_add"] = true; // op_config["dwconv"] = true;
op_config["crop"] = true; // op_config["ew_add"] = true;
op_config["feed"] = true; // op_config["ew_mul"] = true;
op_config["mul"] = true; // op_config["crop"] = true;
op_config["fetch"] = true; // op_config["feed"] = true;
op_config["boxes"] = true;
op_config["scores"] = true;
op_config["nms"] = true;
op_config["pb_boxes"] = true;
op_config["pb_variances"] = true;
// op_config["fc"] = true; // op_config["fc"] = true;
op_config["softmax"] = true; // op_config["mul"] = true;
// op_config["fetch"] = true;
// op_config["boxes"] = true;
// op_config["scores"] = true;
// op_config["nms"] = true;
// op_config["pb_boxes"] = true;
// op_config["pb_variances"] = true;
// op_config["softmax"] = true;
} }
}; };
......
...@@ -240,6 +240,8 @@ int8_t* format_filter(float* data_in, ...@@ -240,6 +240,8 @@ int8_t* format_filter(float* data_in,
for (int n = 0; n < num; n++) { for (int n = 0; n < num; n++) {
float* filter_start = data_in + n * chw; float* filter_start = data_in + n * chw;
int8_t* quantized_start = quantized_data + n * chw; int8_t* quantized_start = quantized_data + n * chw;
// float f_max = find_max(filter_start, chw);
float f_max = max;
quantize(filter_start, quantized_start, chw, f_max); quantize(filter_start, quantized_start, chw, f_max);
filter_max.push_back(f_max); filter_max.push_back(f_max);
} }
......
...@@ -83,26 +83,34 @@ struct ConvParam : PEParam { ...@@ -83,26 +83,34 @@ struct ConvParam : PEParam {
std::vector<int> kernelSize; std::vector<int> kernelSize;
std::vector<int> dilations; std::vector<int> dilations;
Tensor* scale() { return scale_; } Tensor* scale() { return &scale_; }
Tensor* bias() { return bias_; } Tensor* bias() { return &bias_; }
std::vector<BasicConvParam*>& splitParams() { return splitParams_; } std::vector<BasicConvParam*>& splitParams() { return splitParams_; }
~ConvParam() {
for (int i = 0; i < splitParams_.size(); i++) {
BasicConvParam* basic_param = splitParams_[i];
delete basic_param;
}
splitParams_.clear();
}
protected: protected:
std::vector<BasicConvParam*> splitParams_; std::vector<BasicConvParam*> splitParams_;
Tensor* scale_ = new Tensor(); Tensor scale_;
Tensor* bias_ = new Tensor(); Tensor bias_;
}; };
struct DepthwiseConvParam : ConvParam { struct DepthwiseConvParam : ConvParam {
public: public:
Tensor* quantizedFilter() { return quantizedFilter_; } Tensor* quantizedFilter() { return &quantizedFilter_; }
DWconvArgs args; DWconvArgs args;
protected: protected:
Tensor* quantizedFilter_ = new Tensor(); Tensor quantizedFilter_;
}; };
enum PoolingType : int { enum PoolingType : int {
...@@ -142,7 +150,7 @@ struct ElementwiseAddParam : PEParam { ...@@ -142,7 +150,7 @@ struct ElementwiseAddParam : PEParam {
struct ElementwiseMulParam : PEParam { struct ElementwiseMulParam : PEParam {
public: public:
Tensor* input_x; Tensor* input_x = nullptr;
Tensor* input_y = nullptr; Tensor* input_y = nullptr;
Tensor* output = nullptr; Tensor* output = nullptr;
}; };
...@@ -154,13 +162,13 @@ struct FullyConnectedParam : PEParam { ...@@ -154,13 +162,13 @@ struct FullyConnectedParam : PEParam {
Tensor* bias = nullptr; Tensor* bias = nullptr;
Tensor* output = nullptr; Tensor* output = nullptr;
Tensor* quantizedFilter() { return quantizedFilter_; } Tensor* quantizedFilter() { return &quantizedFilter_; }
Tensor* biasScale() { return biasScale_; } Tensor* biasScale() { return &biasScale_; }
protected: protected:
Tensor* quantizedFilter_ = new Tensor(); Tensor quantizedFilter_;
Tensor* biasScale_ = new Tensor(); Tensor biasScale_;
}; };
struct SoftmaxParam : PEParam { struct SoftmaxParam : PEParam {
...@@ -193,10 +201,10 @@ struct NormParam : PEParam { ...@@ -193,10 +201,10 @@ struct NormParam : PEParam {
}; };
struct PriorBoxParam : PEParam { struct PriorBoxParam : PEParam {
Tensor* input; Tensor* input = nullptr;
Tensor* image; Tensor* image = nullptr;
Tensor* outputBoxes; Tensor* outputBoxes = nullptr;
Tensor* outputVariances; Tensor* outputVariances = nullptr;
std::vector<float> minSizes; std::vector<float> minSizes;
std::vector<float> maxSizes; std::vector<float> maxSizes;
...@@ -212,10 +220,10 @@ struct PriorBoxParam : PEParam { ...@@ -212,10 +220,10 @@ struct PriorBoxParam : PEParam {
}; };
struct YoloBoxParam : PEParam { struct YoloBoxParam : PEParam {
Tensor* input; Tensor* input = nullptr;
Tensor* imgSize; Tensor* imgSize = nullptr;
Tensor* outputBoxes; Tensor* outputBoxes = nullptr;
Tensor* outputScores; Tensor* outputScores = nullptr;
int downsampleRatio; int downsampleRatio;
std::vector<int> anchors; std::vector<int> anchors;
int classNum; int classNum;
...@@ -229,15 +237,15 @@ struct ScaleParam : PEParam { ...@@ -229,15 +237,15 @@ struct ScaleParam : PEParam {
Tensor* scale = nullptr; Tensor* scale = nullptr;
Tensor* bias = nullptr; Tensor* bias = nullptr;
Tensor* alignedScale() { return alignedScale_; } Tensor* alignedScale() { return &alignedScale_; }
Tensor* alignedBias() { return alignedBias_; } Tensor* alignedBias() { return &alignedBias_; }
ScaleArgs args = {0}; ScaleArgs args = {0};
protected: protected:
Tensor* alignedScale_ = new Tensor(); Tensor alignedScale_;
Tensor* alignedBias_ = new Tensor(); Tensor alignedBias_;
}; };
struct ResizeParam : PEParam { struct ResizeParam : PEParam {
......
...@@ -195,16 +195,6 @@ class ConvPE : public PE { ...@@ -195,16 +195,6 @@ class ConvPE : public PE {
addPE_.init(); addPE_.init();
addPE_.apply(); addPE_.apply();
addPE_.dispatch(); addPE_.dispatch();
// param_.output->printScale();
// params[0]->input.saveToFile("conv_1.txt");
// params[1]->input.saveToFile("conv_2.txt");
// params[0]->output.saveToFile("ew_o1.txt");
// params[1]->output.saveToFile("ew_o2.txt");
// std::cout << "\n ================== EW ================== \n";
// }
} }
return ret == 0; return ret == 0;
...@@ -212,6 +202,8 @@ class ConvPE : public PE { ...@@ -212,6 +202,8 @@ class ConvPE : public PE {
ConvParam& param() { return param_; } ConvParam& param() { return param_; }
~ConvPE() {}
private: private:
bool use_cpu_ = false; bool use_cpu_ = false;
bool split_channel = false; bool split_channel = false;
......
...@@ -23,43 +23,27 @@ class ReluPE : public PE { ...@@ -23,43 +23,27 @@ class ReluPE : public PE {
public: public:
bool init() { bool init() {
Tensor* output = param_.output; Tensor* output = param_.output;
output->setAligned(true); output->setAligned(param_.input->aligned());
output->setDataLocation(Device); output->setDataLocation(CPU);
return true; return true;
} }
void apply() { void apply() {}
Tensor* src = param_.input;
args_.input_data_type = DATA_TYPE_FP16;
args_.output_data_type = DATA_TYPE_FP16;
args_.input_layout_type = LAYOUT_HWC;
args_.output_layout_type = LAYOUT_HWC;
args_.image = {.address = src->data<void>(),
.scale_address = src->scale(),
.channels = (uint32_t)src->shape().channel(),
.width = (uint32_t)src->shape().width(),
.height = (uint32_t)src->shape().height(),
.pad_width = 0u,
.pad_height = 0u};
args_.output = {
.address = param_.output->data<void>(),
.scale_address = param_.output->scale(),
};
inplace_.relu_enable = false;
inplace_.power_enable = false;
inplace_.normalize_enable = false;
}
bool dispatch() { bool dispatch() {
inplace_.relu_enable = true; param_.input->invalidate();
config_inplace(inplace_); int16_t* input_data = param_.input->data<int16_t>();
param_.input->syncToDevice(); float16* out_data = param_.output->data<float16>();
param_.output->copyFrom(param_.input); for (int i = 0; i < param_.input->shape().alignedElementCount(); i++) {
param_.output->invalidate(); int16_t v = param_.input->data<float16>()[i];
inplace_.relu_enable = false; if (v > 0) {
config_inplace(inplace_); out_data[i] = input_data[i];
} else {
out_data[i] = zero;
}
}
param_.output->copyScaleFrom(param_.input);
param_.output->flush();
return true; return true;
} }
...@@ -67,8 +51,7 @@ class ReluPE : public PE { ...@@ -67,8 +51,7 @@ class ReluPE : public PE {
private: private:
InputParam param_; InputParam param_;
BypassArgs args_; float16 zero = float_to_half(0.0f);
InplaceArgs inplace_;
}; };
} // namespace zynqmp } // namespace zynqmp
......
...@@ -36,6 +36,7 @@ class ScalePE : public PE { ...@@ -36,6 +36,7 @@ class ScalePE : public PE {
} }
inline int lcm(int a, int b) { return a * b / gcd(a, b); } inline int lcm(int a, int b) { return a * b / gcd(a, b); }
bool init() { bool init() {
Tensor* output = param_.output; Tensor* output = param_.output;
output->setAligned(true); output->setAligned(true);
......
...@@ -283,7 +283,6 @@ class Tensor { ...@@ -283,7 +283,6 @@ class Tensor {
.address = data<void>(), .scale_address = scale(), .address = data<void>(), .scale_address = scale(),
}; };
args.output = output; args.output = output;
src->syncToDevice();
size_t aligned_remainder = src->shape().numel() % 16; size_t aligned_remainder = src->shape().numel() % 16;
if (aligned_remainder > 0) { if (aligned_remainder > 0) {
size_t dtype_size = size_t dtype_size =
...@@ -293,7 +292,6 @@ class Tensor { ...@@ -293,7 +292,6 @@ class Tensor {
fpga_flush(dst, aligned_remainder * dtype_size); fpga_flush(dst, aligned_remainder * dtype_size);
} }
src->syncToDevice(); src->syncToDevice();
this->invalidate();
perform_bypass(args); perform_bypass(args);
this->invalidate(); this->invalidate();
} }
...@@ -303,8 +301,7 @@ class Tensor { ...@@ -303,8 +301,7 @@ class Tensor {
return; return;
} }
size_t memorySize = size_t memorySize = placeHolder_->memorySize();
shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_;
fpga_flush(placeHolder_->data(), memorySize); fpga_flush(placeHolder_->data(), memorySize);
} }
...@@ -384,7 +381,6 @@ class Tensor { ...@@ -384,7 +381,6 @@ class Tensor {
} }
void save_file_with_name(std::string path) { void save_file_with_name(std::string path) {
invalidate();
std::ofstream ofs; std::ofstream ofs;
ofs.open(path); ofs.open(path);
ofs << scale()[0] << " / " << scale()[1] << std::endl; ofs << scale()[0] << " / " << scale()[1] << std::endl;
......
...@@ -5,7 +5,7 @@ endif() ...@@ -5,7 +5,7 @@ endif()
set(fpga_deps fpga_target_wrapper kernel_fpga) set(fpga_deps fpga_target_wrapper kernel_fpga)
# add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps}) add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps})
# add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps}) # add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps})
# add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps}) # add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
...@@ -25,7 +25,7 @@ add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps}) ...@@ -25,7 +25,7 @@ add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps})
# add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps}) # add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps})
add_kernel(pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS ${fpga_deps}) add_kernel(pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS ${fpga_deps})
add_kernel(prior_box_compute_fpga FPGA basic SRCS prior_box_compute.cc DEPS ${fpga_deps}) add_kernel(prior_box_compute_fpga FPGA basic SRCS prior_box_compute.cc DEPS ${fpga_deps})
# add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op) add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
# add_kernel(sequence_pool_compute_fpga FPGA basic SRCS sequence_pool_compute.cc DEPS ${fpga_deps}) # add_kernel(sequence_pool_compute_fpga FPGA basic SRCS sequence_pool_compute.cc DEPS ${fpga_deps})
add_kernel(scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS ${fpga_deps}) add_kernel(scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS ${fpga_deps})
# add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps}) # add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
......
...@@ -25,10 +25,10 @@ using float16 = zynqmp::float16; ...@@ -25,10 +25,10 @@ using float16 = zynqmp::float16;
void ReluCompute::PrepareForRun() { void ReluCompute::PrepareForRun() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
auto output_data = param.Out->mutable_data<float16>(); auto output_data = param.Out->mutable_data<float16>();
zynqmp::InputParam& input_param = pe_.param(); zynqmp::InputParam& relu_param = pe_.param();
input_param.input = param.X->ZynqTensor(); relu_param.input = param.X->ZynqTensor();
input_param.output = param.Out->ZynqTensor(); relu_param.output = param.Out->ZynqTensor();
pe_.init(); pe_.init();
pe_.apply(); pe_.apply();
} }
......
...@@ -40,6 +40,7 @@ void ElementwiseAddCompute::PrepareForRun() { ...@@ -40,6 +40,7 @@ void ElementwiseAddCompute::PrepareForRun() {
pe_.apply(); pe_.apply();
} }
void ElementwiseAddCompute::Run() { void ElementwiseAddCompute::Run() {
usleep(50 * 100 * 1000);
pe_.dispatch(); pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR #ifdef FPGA_PRINT_TENSOR
zynqmp::ElementwiseAddParam& ew_param = pe_.param(); zynqmp::ElementwiseAddParam& ew_param = pe_.param();
...@@ -62,6 +63,7 @@ void ElementwiseAddActivationCompute::PrepareForRun() { ...@@ -62,6 +63,7 @@ void ElementwiseAddActivationCompute::PrepareForRun() {
pe_.apply(); pe_.apply();
} }
void ElementwiseAddActivationCompute::Run() { void ElementwiseAddActivationCompute::Run() {
usleep(500 * 100 * 1000);
pe_.dispatch(); pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR #ifdef FPGA_PRINT_TENSOR
zynqmp::ElementwiseAddParam& ew_param = pe_.param(); zynqmp::ElementwiseAddParam& ew_param = pe_.param();
...@@ -80,21 +82,21 @@ void ElementwiseMulCompute::PrepareForRun() { ...@@ -80,21 +82,21 @@ void ElementwiseMulCompute::PrepareForRun() {
scale_param.activeParam.type = zynqmp::TYPE_NONE; scale_param.activeParam.type = zynqmp::TYPE_NONE;
int channel = scale_param.input->shape().channel(); int channel = scale_param.input->shape().channel();
zynqmp::Tensor* scale = new zynqmp::Tensor(); scale_param.scale = &scale_;
zynqmp::Tensor* bias = new zynqmp::Tensor(); scale_param.bias = &bias_;
scale_param.scale = scale;
scale_param.bias = bias;
zynqmp::Shape shape(zynqmp::N, {channel}); zynqmp::Shape shape(zynqmp::N, {channel});
float* scale_data = scale->mutableData<float>(zynqmp::FP32, shape); zynqmp::float16* scale_data =
float* bias_data = bias->mutableData<float>(zynqmp::FP32, shape); scale_.mutableData<zynqmp::float16>(zynqmp::FP16, shape);
zynqmp::float16* bias_data =
bias_.mutableData<zynqmp::float16>(zynqmp::FP16, shape);
float scale_value = param.Y->data<float>()[0]; float scale_value = param.Y->data<float>()[0];
for (int i = 0; i < channel; ++i) { for (int i = 0; i < channel; i++) {
if (param.Y->dims().production() != 1) { if (param.Y->dims().production() != 1) {
scale_value = param.Y->ZynqTensor()->data<float>()[i]; scale_value = param.Y->ZynqTensor()->data<float>()[i];
} }
scale_data[i] = scale_value; scale_data[i] = zynqmp::float_to_half(scale_value);
bias_data[i] = 0; bias_data[i] = zero_;
} }
pe_.init(); pe_.init();
...@@ -102,6 +104,10 @@ void ElementwiseMulCompute::PrepareForRun() { ...@@ -102,6 +104,10 @@ void ElementwiseMulCompute::PrepareForRun() {
} }
void ElementwiseMulCompute::Run() { void ElementwiseMulCompute::Run() {
auto& param = Param<operators::ElementwiseParam>();
param.Y->ZynqTensor()->flush();
scale_.copyFrom(param.Y->ZynqTensor());
scale_.invalidate();
pe_.dispatch(); pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR #ifdef FPGA_PRINT_TENSOR
zynqmp::ScaleParam& scale_param = pe_.param(); zynqmp::ScaleParam& scale_param = pe_.param();
......
...@@ -61,6 +61,9 @@ class ElementwiseMulCompute ...@@ -61,6 +61,9 @@ class ElementwiseMulCompute
private: private:
zynqmp::ScalePE pe_; zynqmp::ScalePE pe_;
zynqmp::Tensor scale_;
zynqmp::Tensor bias_;
zynqmp::float16 zero_ = zynqmp::float_to_half(0.0f);
}; };
} // namespace fpga } // namespace fpga
......
...@@ -55,6 +55,7 @@ void FetchCompute::Run() { ...@@ -55,6 +55,7 @@ void FetchCompute::Run() {
#ifdef FPGA_PRINT_TENSOR #ifdef FPGA_PRINT_TENSOR
zynqmp::OutputParam& fetch_param = pe_.param(); zynqmp::OutputParam& fetch_param = pe_.param();
Debugger::get_instance().registerOutput("fetch", fetch_param.output); Debugger::get_instance().registerOutput("fetch", fetch_param.output);
Debugger::get_instance().setEnable(true);
#endif #endif
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册