提交 8e699af1 编写于 作者: C chonwhite

ReNext Pass

上级 337eb1bf
......@@ -48,26 +48,33 @@ class Debugger {
void tock(std::string key) {}
void setEnable(bool en) { enabled_ = en; }
private:
bool enabled_ = false;
std::unordered_map<std::string, bool> op_config;
std::unordered_map<std::string, float> tick_tock_map;
Debugger() {
op_config["concat"] = true;
op_config["pooling"] = true;
op_config["conv"] = true;
op_config["dwconv"] = true;
op_config["ew_add"] = true;
op_config["crop"] = true;
op_config["feed"] = true;
op_config["mul"] = true;
op_config["fetch"] = true;
op_config["boxes"] = true;
op_config["scores"] = true;
op_config["nms"] = true;
op_config["pb_boxes"] = true;
op_config["pb_variances"] = true;
// op_config["concat"] = true;
// op_config["pooling"] = true;
// op_config["conv"] = true;
// op_config["dropout"] = true;
// op_config["dwconv"] = true;
// op_config["ew_add"] = true;
// op_config["ew_mul"] = true;
// op_config["crop"] = true;
// op_config["feed"] = true;
// op_config["fc"] = true;
op_config["softmax"] = true;
// op_config["mul"] = true;
// op_config["fetch"] = true;
// op_config["boxes"] = true;
// op_config["scores"] = true;
// op_config["nms"] = true;
// op_config["pb_boxes"] = true;
// op_config["pb_variances"] = true;
// op_config["softmax"] = true;
}
};
......
......@@ -240,6 +240,8 @@ int8_t* format_filter(float* data_in,
for (int n = 0; n < num; n++) {
float* filter_start = data_in + n * chw;
int8_t* quantized_start = quantized_data + n * chw;
// float f_max = find_max(filter_start, chw);
float f_max = max;
quantize(filter_start, quantized_start, chw, f_max);
filter_max.push_back(f_max);
}
......
......@@ -83,26 +83,34 @@ struct ConvParam : PEParam {
std::vector<int> kernelSize;
std::vector<int> dilations;
Tensor* scale() { return scale_; }
Tensor* scale() { return &scale_; }
Tensor* bias() { return bias_; }
Tensor* bias() { return &bias_; }
std::vector<BasicConvParam*>& splitParams() { return splitParams_; }
~ConvParam() {
for (int i = 0; i < splitParams_.size(); i++) {
BasicConvParam* basic_param = splitParams_[i];
delete basic_param;
}
splitParams_.clear();
}
protected:
std::vector<BasicConvParam*> splitParams_;
Tensor* scale_ = new Tensor();
Tensor* bias_ = new Tensor();
Tensor scale_;
Tensor bias_;
};
struct DepthwiseConvParam : ConvParam {
public:
Tensor* quantizedFilter() { return quantizedFilter_; }
Tensor* quantizedFilter() { return &quantizedFilter_; }
DWconvArgs args;
protected:
Tensor* quantizedFilter_ = new Tensor();
Tensor quantizedFilter_;
};
enum PoolingType : int {
......@@ -142,7 +150,7 @@ struct ElementwiseAddParam : PEParam {
struct ElementwiseMulParam : PEParam {
public:
Tensor* input_x;
Tensor* input_x = nullptr;
Tensor* input_y = nullptr;
Tensor* output = nullptr;
};
......@@ -154,13 +162,13 @@ struct FullyConnectedParam : PEParam {
Tensor* bias = nullptr;
Tensor* output = nullptr;
Tensor* quantizedFilter() { return quantizedFilter_; }
Tensor* quantizedFilter() { return &quantizedFilter_; }
Tensor* biasScale() { return biasScale_; }
Tensor* biasScale() { return &biasScale_; }
protected:
Tensor* quantizedFilter_ = new Tensor();
Tensor* biasScale_ = new Tensor();
Tensor quantizedFilter_;
Tensor biasScale_;
};
struct SoftmaxParam : PEParam {
......@@ -193,10 +201,10 @@ struct NormParam : PEParam {
};
struct PriorBoxParam : PEParam {
Tensor* input;
Tensor* image;
Tensor* outputBoxes;
Tensor* outputVariances;
Tensor* input = nullptr;
Tensor* image = nullptr;
Tensor* outputBoxes = nullptr;
Tensor* outputVariances = nullptr;
std::vector<float> minSizes;
std::vector<float> maxSizes;
......@@ -212,10 +220,10 @@ struct PriorBoxParam : PEParam {
};
struct YoloBoxParam : PEParam {
Tensor* input;
Tensor* imgSize;
Tensor* outputBoxes;
Tensor* outputScores;
Tensor* input = nullptr;
Tensor* imgSize = nullptr;
Tensor* outputBoxes = nullptr;
Tensor* outputScores = nullptr;
int downsampleRatio;
std::vector<int> anchors;
int classNum;
......@@ -229,15 +237,15 @@ struct ScaleParam : PEParam {
Tensor* scale = nullptr;
Tensor* bias = nullptr;
Tensor* alignedScale() { return alignedScale_; }
Tensor* alignedScale() { return &alignedScale_; }
Tensor* alignedBias() { return alignedBias_; }
Tensor* alignedBias() { return &alignedBias_; }
ScaleArgs args = {0};
protected:
Tensor* alignedScale_ = new Tensor();
Tensor* alignedBias_ = new Tensor();
Tensor alignedScale_;
Tensor alignedBias_;
};
struct ResizeParam : PEParam {
......
......@@ -195,16 +195,6 @@ class ConvPE : public PE {
addPE_.init();
addPE_.apply();
addPE_.dispatch();
// param_.output->printScale();
// params[0]->input.saveToFile("conv_1.txt");
// params[1]->input.saveToFile("conv_2.txt");
// params[0]->output.saveToFile("ew_o1.txt");
// params[1]->output.saveToFile("ew_o2.txt");
// std::cout << "\n ================== EW ================== \n";
// }
}
return ret == 0;
......@@ -212,6 +202,8 @@ class ConvPE : public PE {
ConvParam& param() { return param_; }
~ConvPE() {}
private:
bool use_cpu_ = false;
bool split_channel = false;
......
......@@ -23,43 +23,27 @@ class ReluPE : public PE {
public:
bool init() {
Tensor* output = param_.output;
output->setAligned(true);
output->setDataLocation(Device);
output->setAligned(param_.input->aligned());
output->setDataLocation(CPU);
return true;
}
void apply() {
Tensor* src = param_.input;
args_.input_data_type = DATA_TYPE_FP16;
args_.output_data_type = DATA_TYPE_FP16;
args_.input_layout_type = LAYOUT_HWC;
args_.output_layout_type = LAYOUT_HWC;
args_.image = {.address = src->data<void>(),
.scale_address = src->scale(),
.channels = (uint32_t)src->shape().channel(),
.width = (uint32_t)src->shape().width(),
.height = (uint32_t)src->shape().height(),
.pad_width = 0u,
.pad_height = 0u};
args_.output = {
.address = param_.output->data<void>(),
.scale_address = param_.output->scale(),
};
inplace_.relu_enable = false;
inplace_.power_enable = false;
inplace_.normalize_enable = false;
}
void apply() {}
bool dispatch() {
inplace_.relu_enable = true;
config_inplace(inplace_);
param_.input->syncToDevice();
param_.output->copyFrom(param_.input);
param_.output->invalidate();
inplace_.relu_enable = false;
config_inplace(inplace_);
param_.input->invalidate();
int16_t* input_data = param_.input->data<int16_t>();
float16* out_data = param_.output->data<float16>();
for (int i = 0; i < param_.input->shape().alignedElementCount(); i++) {
int16_t v = param_.input->data<float16>()[i];
if (v > 0) {
out_data[i] = input_data[i];
} else {
out_data[i] = zero;
}
}
param_.output->copyScaleFrom(param_.input);
param_.output->flush();
return true;
}
......@@ -67,8 +51,7 @@ class ReluPE : public PE {
private:
InputParam param_;
BypassArgs args_;
InplaceArgs inplace_;
float16 zero = float_to_half(0.0f);
};
} // namespace zynqmp
......
......@@ -36,6 +36,7 @@ class ScalePE : public PE {
}
inline int lcm(int a, int b) { return a * b / gcd(a, b); }
bool init() {
Tensor* output = param_.output;
output->setAligned(true);
......
......@@ -283,7 +283,6 @@ class Tensor {
.address = data<void>(), .scale_address = scale(),
};
args.output = output;
src->syncToDevice();
size_t aligned_remainder = src->shape().numel() % 16;
if (aligned_remainder > 0) {
size_t dtype_size =
......@@ -293,7 +292,6 @@ class Tensor {
fpga_flush(dst, aligned_remainder * dtype_size);
}
src->syncToDevice();
this->invalidate();
perform_bypass(args);
this->invalidate();
}
......@@ -303,8 +301,7 @@ class Tensor {
return;
}
size_t memorySize =
shape_->memorySize(CellSize(dataType_)) * mem_scale_factor_;
size_t memorySize = placeHolder_->memorySize();
fpga_flush(placeHolder_->data(), memorySize);
}
......@@ -384,7 +381,6 @@ class Tensor {
}
void save_file_with_name(std::string path) {
invalidate();
std::ofstream ofs;
ofs.open(path);
ofs << scale()[0] << " / " << scale()[1] << std::endl;
......
......@@ -5,7 +5,7 @@ endif()
set(fpga_deps fpga_target_wrapper kernel_fpga)
# add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps})
add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps})
# add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps})
# add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
......@@ -25,7 +25,7 @@ add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps})
# add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps})
add_kernel(pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS ${fpga_deps})
add_kernel(prior_box_compute_fpga FPGA basic SRCS prior_box_compute.cc DEPS ${fpga_deps})
# add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
# add_kernel(sequence_pool_compute_fpga FPGA basic SRCS sequence_pool_compute.cc DEPS ${fpga_deps})
add_kernel(scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS ${fpga_deps})
# add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
......
......@@ -25,10 +25,10 @@ using float16 = zynqmp::float16;
void ReluCompute::PrepareForRun() {
auto& param = this->Param<param_t>();
auto output_data = param.Out->mutable_data<float16>();
zynqmp::InputParam& input_param = pe_.param();
zynqmp::InputParam& relu_param = pe_.param();
input_param.input = param.X->ZynqTensor();
input_param.output = param.Out->ZynqTensor();
relu_param.input = param.X->ZynqTensor();
relu_param.output = param.Out->ZynqTensor();
pe_.init();
pe_.apply();
}
......
......@@ -40,6 +40,7 @@ void ElementwiseAddCompute::PrepareForRun() {
pe_.apply();
}
void ElementwiseAddCompute::Run() {
usleep(50 * 100 * 1000);
pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR
zynqmp::ElementwiseAddParam& ew_param = pe_.param();
......@@ -62,6 +63,7 @@ void ElementwiseAddActivationCompute::PrepareForRun() {
pe_.apply();
}
void ElementwiseAddActivationCompute::Run() {
usleep(500 * 100 * 1000);
pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR
zynqmp::ElementwiseAddParam& ew_param = pe_.param();
......@@ -80,21 +82,21 @@ void ElementwiseMulCompute::PrepareForRun() {
scale_param.activeParam.type = zynqmp::TYPE_NONE;
int channel = scale_param.input->shape().channel();
zynqmp::Tensor* scale = new zynqmp::Tensor();
zynqmp::Tensor* bias = new zynqmp::Tensor();
scale_param.scale = scale;
scale_param.bias = bias;
scale_param.scale = &scale_;
scale_param.bias = &bias_;
zynqmp::Shape shape(zynqmp::N, {channel});
float* scale_data = scale->mutableData<float>(zynqmp::FP32, shape);
float* bias_data = bias->mutableData<float>(zynqmp::FP32, shape);
zynqmp::float16* scale_data =
scale_.mutableData<zynqmp::float16>(zynqmp::FP16, shape);
zynqmp::float16* bias_data =
bias_.mutableData<zynqmp::float16>(zynqmp::FP16, shape);
float scale_value = param.Y->data<float>()[0];
for (int i = 0; i < channel; ++i) {
for (int i = 0; i < channel; i++) {
if (param.Y->dims().production() != 1) {
scale_value = param.Y->ZynqTensor()->data<float>()[i];
}
scale_data[i] = scale_value;
bias_data[i] = 0;
scale_data[i] = zynqmp::float_to_half(scale_value);
bias_data[i] = zero_;
}
pe_.init();
......@@ -102,6 +104,10 @@ void ElementwiseMulCompute::PrepareForRun() {
}
void ElementwiseMulCompute::Run() {
auto& param = Param<operators::ElementwiseParam>();
param.Y->ZynqTensor()->flush();
scale_.copyFrom(param.Y->ZynqTensor());
scale_.invalidate();
pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR
zynqmp::ScaleParam& scale_param = pe_.param();
......
......@@ -61,6 +61,9 @@ class ElementwiseMulCompute
private:
zynqmp::ScalePE pe_;
zynqmp::Tensor scale_;
zynqmp::Tensor bias_;
zynqmp::float16 zero_ = zynqmp::float_to_half(0.0f);
};
} // namespace fpga
......
......@@ -55,6 +55,7 @@ void FetchCompute::Run() {
#ifdef FPGA_PRINT_TENSOR
zynqmp::OutputParam& fetch_param = pe_.param();
Debugger::get_instance().registerOutput("fetch", fetch_param.output);
Debugger::get_instance().setEnable(true);
#endif
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册