提交 46f36ceb 编写于 作者: C chonwhite

mobilenet 1&2 works

上级 d3d793c7
......@@ -19,12 +19,13 @@
#include <string>
#include <unordered_map>
#include "lite/core/program.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
// #define FPGA_PRINT_TENSOR
#define FPGA_PRINT_TENSOR
class Debugger {
public:
......@@ -35,7 +36,7 @@ class Debugger {
void registerOutput(std::string op_type, zynqmp::Tensor* tensor) {
if (op_config[op_type]) {
tensor->saveToFile(op_type, true);
// tensor->saveToFile(op_type, true);
}
}
......
......@@ -72,18 +72,110 @@ class ConvPE : public PE {
}
if (param_.filter->shape().width() == 1 &&
param_.filter->shape().num() % 16 != 0) {
use_cpu_ = true;
// use_cpu_ = true;
}
if (!use_cpu_) {
// param_.filter->releaseData();
}
}
void cpu_conv_half_hwc() {
Tensor* input = param_.input;
Tensor* output = param_.output;
Shape& input_shape = input->shape();
Shape& out_shape = output->shape();
int image_height = input_shape.height();
int image_width = input_shape.width();
int image_channels = input_shape.channel();
int image_pad_h = param_.paddings[0];
int image_pad_w = param_.paddings[0];
int kernel_height = param_.filter->shape().height();
int kernel_width = param_.filter->shape().width();
int kernel_step_h = param_.strides[0];
int kernel_step_w = param_.strides[1];
int dilation_rate = 1;
int out_channel = out_shape.channel();
int pooled_height_ = out_shape.height();
int pooled_width_ = out_shape.width();
int filter_chw = image_channels * kernel_height * kernel_width;
int kernel_rw = kernel_width + (dilation_rate - 1) * (kernel_width - 1);
int kernel_rh = kernel_height + (dilation_rate - 1) * (kernel_height - 1);
float* weight = param_.filter->data<float>();
Tensor float_input;
Tensor float_output;
float* image_addr = float_input.mutableData<float>(FP32, input->shape());
float_input.copyFrom(input);
// exit(-1);
float* out = float_output.mutableData<float>(FP32, output->shape());
for (int ph = 0; ph < pooled_height_; ph++) {
for (int pw = 0; pw < pooled_width_; pw++) {
int hstart = ph * kernel_step_h - image_pad_h;
int wstart = pw * kernel_step_w - image_pad_w;
int hend = std::min(hstart + kernel_rh, (int)image_height);
int wend = std::min(wstart + kernel_rw, (int)image_width);
int hstart_plus =
dilation_rate * ceil(float(image_pad_h - ph * kernel_step_h) /
float(dilation_rate)) -
image_pad_h + ph * kernel_step_h;
int wstart_plus =
dilation_rate * ceil(float(image_pad_w - pw * kernel_step_w) /
float(dilation_rate)) -
image_pad_w + pw * kernel_step_w;
int hstart_ = hstart < 0 ? hstart_plus : hstart;
int wstart_ = wstart < 0 ? wstart_plus : wstart;
for (int oc = 0; oc < out_channel; oc++) {
float sum = 0.0f;
const int pool_index = (ph * pooled_width_ + pw) * out_channel + oc;
for (int c = 0; c < image_channels; c++) {
for (int h = hstart_; h < hend; h += dilation_rate) {
int hi = 0;
if (hstart < 0) {
hi = (kernel_rh - (hend - h)) / dilation_rate;
} else {
hi = (h - hstart_) / dilation_rate;
}
for (int w = wstart_; w < wend; w += dilation_rate) {
int wi = 0;
if (wstart < 0) {
wi = (kernel_rw - (wend - w)) / dilation_rate;
} else {
wi = (w - wstart_) / dilation_rate;
}
const int index = (h * image_width + w) * image_channels + c;
int weight_index = oc * filter_chw +
kernel_width * kernel_height * c +
kernel_width * hi + wi;
float value = image_addr[index] * weight[weight_index];
sum += value;
}
}
}
float s = param_.scale()->data<float>()[oc];
float b = param_.bias()->data<float>()[oc];
out[pool_index] = sum * s + b;
}
}
}
float_output.saveToFile("fo", true);
exit(-1);
}
void cpu_compute() {
Tensor* input = param_.input;
Tensor* output = param_.output;
input->syncToCPU();
// input->saveToFile("input", true);
// input->syncToCPU();
Tensor float_input;
Tensor float_output;
......@@ -117,24 +209,39 @@ class ConvPE : public PE {
for (int j = 0; j < in_channel; j++) {
sum += mi[j];
}
sum *= param_.scale()->data<float>()[i];
sum += param_.bias()->data<float>()[i];
out[i * wh + k] = sum;
max = std::max(max, std::abs(sum));
float fv = sum;
float s = param_.scale()->data<float>()[i];
float b = param_.bias()->data<float>()[i];
fv *= s;
fv += b;
// std::cout << "\n" << fv << " = " << sum << " x " << s << " + " << b
// << std::endl;
out[i * wh + k] = fv;
max = std::max(max, std::abs(fv));
}
}
delete[] mi;
param_.bias()->saveToFile("bias", true);
exit(-1);
float_output.flush();
float_output.saveToFile("float_output", true);
output->copyFrom(&float_output);
output->invalidate();
output->scale()[0] = max / 127.0;
output->scale()[1] = 127.0 / max;
// output->saveToFile("cpu", true);
}
bool dispatch() {
fpga_reset();
// fpga_reset();
if (use_cpu_) {
cpu_compute();
// cpu_compute();
cpu_conv_half_hwc();
return true;
}
......
......@@ -59,6 +59,7 @@ static void softmax(Tensor *X, Tensor *Y) {
int batch_size = X->shape().num();
int num_classes = dims[X->shape().dimSize() - 1];
int channels = X->shape().numel() / batch_size / num_classes;
float *x = X->data<float>();
float *y = Y->mutableData<float>();
......@@ -140,12 +141,23 @@ bool SoftmaxPE::init() {
bool SoftmaxPE::dispatch() {
Tensor *input = param_.input;
Tensor *output = param_.output;
input->syncToCPU();
Tensor float_input;
Tensor float_output;
float_input.mutableData<float>(DataType::FP32, input->shape());
float_input.copyFrom(input);
// input->saveToFile("in", true);
// input->syncToDevice();
// float_input.copyFrom(input);
input->syncToCPU();
float16 *in_data = input->data<float16>();
float *f_data = float_input.data<float>();
for (int i = 0; i < input->shape().channel(); i++) {
f_data[i] = half_to_float(in_data[i]);
}
// float_input.invalidate();
// float_input.saveToFile("fin", true);
float *out_data =
float_output.mutableData<float>(DataType::FP32, input->shape());
......
......@@ -20,51 +20,61 @@ limitations under the License. */
namespace paddle {
namespace zynqmp {
float sigmoid(float x) {
return 1.0 / (1.0 + std::exp(-x));
}
inline void GetYoloBox(float* box, const float* x, const int* anchors, int w,
int h, int an_idx, int grid_size,
int input_size, int index,
int img_height, int img_width) {
box[0] = (w + sigmoid(x[index])) * img_width * 1.0f/ grid_size;
float sigmoid(float x) { return 1.0 / (1.0 + std::exp(-x)); }
inline void GetYoloBox(float* box,
const float* x,
const int* anchors,
int w,
int h,
int an_idx,
int grid_size,
int input_size,
int index,
int img_height,
int img_width) {
box[0] = (w + sigmoid(x[index])) * img_width * 1.0f / grid_size;
box[1] = (h + sigmoid(x[index + 1])) * img_height * 1.0f / grid_size;
box[2] = std::exp(x[index + 2 ]) * anchors[2 * an_idx] * img_width * 1.0f/
box[2] = std::exp(x[index + 2]) * anchors[2 * an_idx] * img_width * 1.0f /
input_size;
box[3] = std::exp(x[index + 3]) * anchors[2 * an_idx + 1] *
img_height * 1.0f / input_size;
box[3] = std::exp(x[index + 3]) * anchors[2 * an_idx + 1] * img_height *
1.0f / input_size;
}
inline int GetEntryIndex(int batch, int an_idx, int hw_idx,
int an_num, int an_stride, int stride,
int entry) {
inline int GetEntryIndex(int batch,
int an_idx,
int hw_idx,
int an_num,
int an_stride,
int stride,
int entry) {
return (batch * an_num + an_idx) * an_stride + entry * stride + hw_idx;
}
inline void CalcDetectionBox(float* boxes, float* box, const int box_idx,
const int img_height,
const int img_width) {
inline void CalcDetectionBox(float* boxes,
float* box,
const int box_idx,
const int img_height,
const int img_width) {
boxes[box_idx] = box[0] - box[2] / 2;
boxes[box_idx + 1] = box[1] - box[3] / 2;
boxes[box_idx + 2] = box[0] + box[2] / 2;
boxes[box_idx + 3] = box[1] + box[3] / 2;
boxes[box_idx] = boxes[box_idx] > 0 ? boxes[box_idx] : 0;
boxes[box_idx + 1] =
boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : 0;
boxes[box_idx + 2] = boxes[box_idx + 2] < img_width - 1
? boxes[box_idx + 2]
: (img_width - 1);
boxes[box_idx + 3] = boxes[box_idx + 3] < img_height - 1
? boxes[box_idx + 3]
: (img_height - 1);
boxes[box_idx + 1] = boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : 0;
boxes[box_idx + 2] =
boxes[box_idx + 2] < img_width - 1 ? boxes[box_idx + 2] : (img_width - 1);
boxes[box_idx + 3] = boxes[box_idx + 3] < img_height - 1 ? boxes[box_idx + 3]
: (img_height - 1);
}
inline void CalcLabelScore(float* scores, const float* input,
const int label_idx, const int score_idx,
const int class_num, const float conf) {
inline void CalcLabelScore(float* scores,
const float* input,
const int label_idx,
const int score_idx,
const int class_num,
const float conf) {
for (int i = 0; i < class_num; i++) {
scores[score_idx + i] = conf * sigmoid(input[label_idx + i]);
// std::cout << scores[score_idx + i] << " ";
......@@ -72,7 +82,6 @@ inline void CalcLabelScore(float* scores, const float* input,
// std::cout << std::endl;
}
class YoloBoxPE : public PE {
public:
bool init() {
......@@ -93,7 +102,6 @@ class YoloBoxPE : public PE {
float conf_thresh = param_.confThresh;
int downsample_ratio = param_.downsampleRatio;
const int num = input->shape().num();
const int height = input->shape().height();
const int width = input->shape().width();
......@@ -134,39 +142,42 @@ class YoloBoxPE : public PE {
imgsize->saveToFile("img_size", true);
const int32_t* imgsize_data = imgsize->data<int32_t>();
Tensor boxes_float;
Tensor scores_float;
boxes_float.setDataLocation(CPU);
float* boxes_float_data = boxes_float.mutableData<float>(FP32, boxes->shape());
float* boxes_float_data =
boxes_float.mutableData<float>(FP32, boxes->shape());
memset(boxes_float_data, 0, boxes->shape().numel() * sizeof(float));
scores_float.setDataLocation(CPU);
float* scores_float_data = scores_float.mutableData<float>(FP32, scores->shape());
float* scores_float_data =
scores_float.mutableData<float>(FP32, scores->shape());
memset(scores_float_data, 0, scores->shape().numel() * sizeof(float));
// float* boxes_data = boxes->mutableData<float>();
// memset(boxes_data, 0, boxes->shape().numel() * sizeof(float));
// float* scores_data = scores->mutableData<float>();
// memset(scores_data, 0, scores->shape().numel() * sizeof(float));
float box[4];
// for (int n = 0; n < num; n++) {
// int img_height = imgsize_data[2 * i];
// int img_width = imgsize_data[2 * i + 1];
// int img_height = imgsize_data[2 * i];
// int img_width = imgsize_data[2 * i + 1];
int img_height = imgsize_data[0];
int img_width = imgsize_data[1];
std::cout << "YoloBoxPE imgsize:" << img_height << "," << img_width << std::endl;
std::cout << "YoloBoxPE imgsize:" << img_height << "," << img_width
<< std::endl;
int channel = input_float.shape().channel();
int count = 0;
for (int h = 0; h < height; h++) {
for (int w = 0; w < width ; w++) {
for (int w = 0; w < width; w++) {
for (int n = 0; n < an_num; n++) {
int obj_idx = channel * width * h + channel * w + n * (5 + class_num) + 4;
int obj_idx =
channel * width * h + channel * w + n * (5 + class_num) + 4;
// std::cout << obj_idx << " ";
float conf = sigmoid(input_data[obj_idx]);
if (conf < conf_thresh) {
......@@ -174,16 +185,34 @@ class YoloBoxPE : public PE {
continue;
}
int box_idx = channel * width * h + channel * w + n * (5 + class_num) + 0;
GetYoloBox(box, input_data, anchors_data, w, h, n, height, input_size,
box_idx, img_height, img_width);
box_idx = h * an_num * 4 * width + an_num * 4 * w + n * 4;
CalcDetectionBox(boxes_float_data, box, box_idx, img_height,img_width);
int label_idx = channel * width * h + channel * w + n * (5 + class_num) + 5;
int score_idx = h * an_num * class_num * width + an_num * class_num * w + n * class_num;
CalcLabelScore(scores_float_data, input_data, label_idx, score_idx, class_num, conf);
int box_idx =
channel * width * h + channel * w + n * (5 + class_num) + 0;
GetYoloBox(box,
input_data,
anchors_data,
w,
h,
n,
height,
input_size,
box_idx,
img_height,
img_width);
box_idx = h * an_num * 4 * width + an_num * 4 * w + n * 4;
CalcDetectionBox(
boxes_float_data, box, box_idx, img_height, img_width);
int label_idx =
channel * width * h + channel * w + n * (5 + class_num) + 5;
int score_idx = h * an_num * class_num * width +
an_num * class_num * w + n * class_num;
CalcLabelScore(scores_float_data,
input_data,
label_idx,
score_idx,
class_num,
conf);
}
}
}
......@@ -195,11 +224,10 @@ class YoloBoxPE : public PE {
void apply(){};
YoloBoxParam& param() { return param_; }
YoloBoxParam& param() { return param_; }
private:
YoloBoxParam param_;
};
} // namespace zynqmp
} // namespace paddle
......@@ -70,6 +70,7 @@ class PlaceHolder {
explicit PlaceHolder(size_t size) {
size_ = size;
data_ = fpga_malloc(size_);
// memset(data_, 0, size);
}
void* data() { return data_; }
......@@ -80,7 +81,7 @@ class PlaceHolder {
~PlaceHolder() { fpga_free(data_); }
float scale_[2];
float scale_[2] = {0};
private:
void* data_ = nullptr;
......@@ -409,12 +410,14 @@ class Tensor {
if (i < 10) {
std::cout << value << ",";
}
// if (i > 1000) {
// break;
// }
ofs << value << std::endl;
}
usleep(30000);
std::cout << std::endl;
// usleep(30000);
ofs.close();
}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <fstream>
#include <iostream>
#include <string>
#include <unordered_map>
#include "lite/core/program.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
class Monitor {
public:
static Monitor& get_instance() {
static Monitor s_instance;
return s_instance;
}
void inferStart() {}
void preRun(Instruction& inst) {
VLOG(4) << "Running op:" << const_cast<OpLite*>(inst.op())->Type();
}
void postRun(Instruction& inst) {}
void inferEnd() {}
private:
};
} // namespace lite
} // namespace paddle
......@@ -134,7 +134,6 @@ void PrecisionCastPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
// Start from inputs of the graph, those should have place set.
std::list<Node*> nodes;
for (auto& node : graph->StmtTopologicalOrder()) {
// if (node->IsStmt()) {
// auto& s = node->AsStmt();
// std::cout << "type_precision type:" << s.op_type() << std::endl;
......
......@@ -25,6 +25,10 @@
#include "lite/core/profile/precision_profiler.h"
#endif
#ifdef LITE_WITH_FPGA
#include "lite/backends/fpga/monitor.hpp"
#endif
namespace paddle {
namespace lite {
......@@ -151,23 +155,41 @@ void RuntimeProgram::Run() {
inst_precision_profiler.GetSummaryHeader();
#endif
#ifdef LITE_WITH_FPGA
Monitor& monitor = Monitor::get_instance();
monitor.inferStart();
#endif
for (auto& inst : instructions_) {
#ifdef LITE_WITH_FPGA
monitor.preRun(inst);
#endif
#ifndef LITE_WITH_FPGA
if (inst.is_feed_fetch_op()) continue;
#endif
#ifdef LITE_WITH_CUDA
if (inst.need_sync()) {
inst.Sync();
}
#endif
inst.Run();
#ifdef LITE_WITH_FPGA
monitor.postRun(inst);
#endif
#ifdef LITE_WITH_PRECISION_PROFILE
#ifndef LITE_WITH_FPGA
precision_profiler_summary +=
inst_precision_profiler.GetInstPrecision(&inst);
#endif
#endif // LITE_WITH_PRECISION_PROFILE
}
#ifdef LITE_WITH_FPGA
monitor.inferEnd();
#endif
#ifdef LITE_WITH_PROFILE
LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch, false, 1);
#endif
......
......@@ -25,12 +25,46 @@ namespace kernels {
namespace fpga {
using float16 = zynqmp::float16;
using lite_api::ActivationType;
void ConvCompute::PrepareForRun() {
auto& param = this->Param<param_t>();
param.output->mutable_data<float16>();
int pad_h = (*param.paddings)[0];
int pad_w = (*param.paddings)[2];
zynqmp::ActiveType active_type = zynqmp::TYPE_NONE;
float leaky_relu_factor = 0;
switch (param.activation_param.active_type) {
case ActivationType::kIndentity:
active_type = zynqmp::TYPE_NONE;
break;
case ActivationType::kRelu:
active_type = zynqmp::TYPE_RELU;
break;
case ActivationType::kRelu6:
active_type = zynqmp::TYPE_RELU6;
break;
case ActivationType::kPRelu:
case ActivationType::kLeakyRelu:
active_type = zynqmp::TYPE_LEAKY_RELU;
leaky_relu_factor = param.activation_param.Leaky_relu_alpha;
break;
case ActivationType::kSigmoid:
active_type = zynqmp::TYPE_SIGMOID;
break;
case ActivationType::kTanh:
case ActivationType::kSwish:
case ActivationType::kExp:
case ActivationType::kAbs:
case ActivationType::kHardSwish:
case ActivationType::kReciprocal:
default:
throw("not supported activation");
break;
}
// ====================================================
if (param.x->ZynqTensor()->shape().channel() != 1 &&
param.groups == param.x->ZynqTensor()->shape().channel()) {
......@@ -45,17 +79,12 @@ void ConvCompute::PrepareForRun() {
conv_param.paddings = std::vector<int>({pad_h, pad_w});
conv_param.dilations = *param.dilations;
fill_scale_bias_const(&conv_param);
conv_param.bias()->copyFrom(param.bias->ZynqTensor());
if (param.fuse_relu) {
conv_param.activeParam.type = zynqmp::TYPE_RELU;
if (param.bias != nullptr) {
conv_param.bias()->copyFrom(param.bias->ZynqTensor());
}
if (param.activation_param.Leaky_relu_alpha > 0.001) {
conv_param.activeParam.type = zynqmp::TYPE_LEAKY_RELU;
conv_param.activeParam.leaky_relu_factor =
param.activation_param.Leaky_relu_alpha;
}
conv_param.activeParam.type = active_type;
conv_param.activeParam.leaky_relu_factor = leaky_relu_factor;
dw_conv_pe_.init();
dw_conv_pe_.apply();
......@@ -74,21 +103,12 @@ void ConvCompute::PrepareForRun() {
conv_param.bias()->copyFrom(param.bias->ZynqTensor());
}
if (param.fuse_relu) {
conv_param.activeParam.type = zynqmp::TYPE_RELU;
}
if (param.activation_param.Leaky_relu_alpha > 0.001) {
conv_param.activeParam.type = zynqmp::TYPE_LEAKY_RELU;
conv_param.activeParam.leaky_relu_factor =
param.activation_param.Leaky_relu_alpha;
}
conv_param.activeParam.type = active_type;
conv_param.activeParam.leaky_relu_factor = leaky_relu_factor;
conv_pe_.init();
conv_pe_.apply();
}
// std::cout << "Leaky_relu_alpha:" << param.activation_param.Leaky_relu_alpha
// << std::endl;
}
void ConvCompute::Run() {
......
......@@ -227,7 +227,7 @@ void MultiClassNMS(const operators::MulticlassNmsParam& param,
SliceOneClass<T>(scores, c, &score_slice);
SliceOneClass<T>(bboxes, c, &bbox_slice);
}
NMSFast(bboxes,// TODO
NMSFast(bboxes, // TODO
score_slice,
score_threshold,
nms_threshold,
......
......@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/fpga/reshape_compute.h"
#include <vector>
#include "lite/backends/fpga/KD/debugger.hpp"
#include "lite/kernels/fpga/reshape_compute.h"
#include "lite/operators/reshape_op.h"
namespace paddle {
......@@ -48,21 +50,31 @@ void FlattenCompute::Run() {
#endif
}
void ReshapeCompute::Run() {
void ReshapeCompute::PrepareForRun() {
auto& param = Param<operators::ReshapeParam>();
auto x = param.x;
auto output = param.output;
auto output_dims = output->dims();
x->ZynqTensor()->unalignImage();
// x->ZynqTensor()->saveToFile("ri", true);
output->Resize(output_dims);
output->mutable_data<float16>();
}
void ReshapeCompute::Run() {
auto& param = Param<operators::ReshapeParam>();
auto x = param.x;
auto output = param.output;
// auto output_dims = output->dims();
// x->ZynqTensor()->invalidate();// TODO
x->ZynqTensor()->unalignImage();
x->ZynqTensor()->flush();
// output->Resize(output_dims);
// output->mutable_data<float16>();
if (param.inplace) {
output->ShareDataWith(*x);
// output->ShareDataWith(*x);
} else {
// output->CopyDataFrom(*x);
}
......@@ -70,7 +82,7 @@ void ReshapeCompute::Run() {
output->ZynqTensor()->copyFrom(x->ZynqTensor());
// output->ZynqTensor()->saveToFile("ro", true);
output->ZynqTensor()->flush();
output->ZynqTensor()->setAligned(x->ZynqTensor()->aligned());
// output->ZynqTensor()->setAligned(x->ZynqTensor()->aligned());
#ifdef FPGA_PRINT_TENSOR
Debugger::get_instance().registerOutput("reshape", output->ZynqTensor());
......
......@@ -25,6 +25,7 @@ namespace fpga {
class ReshapeCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
void PrepareForRun() override;
void Run() override;
virtual ~ReshapeCompute() = default;
......@@ -41,6 +42,7 @@ class FlattenCompute
class ReshapeComputeFpgaToHost
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
void PrepareForRun() override;
void Run() override;
virtual ~ReshapeComputeFpgaToHost() = default;
......
......@@ -14,6 +14,7 @@
#include "lite/kernels/fpga/softmax_compute.h"
#include "lite/backends/arm/math/funcs.h"
#include "lite/backends/fpga/KD/debugger.hpp"
namespace paddle {
namespace lite {
......@@ -36,11 +37,10 @@ void SoftmaxCompute::PrepareForRun() {
void SoftmaxCompute::Run() {
zynqmp::SoftmaxParam& softmax_param = pe_.param();
// softmax_param.input->saveToFile("softmax_in", true);
pe_.dispatch();
softmax_param.output->flush();
// softmax_param.output->saveToFile("softmax", true);
// softmax_param.output->flush();
// // softmax_param.output->saveToFile("softmax", true);
#ifdef FPGA_PRINT_TENSOR
Debugger::get_instance().registerOutput("softmax", softmax_param.output);
#endif
......
......@@ -28,7 +28,6 @@ void YoloBoxCompute::PrepareForRun() {
lite::Tensor* ImgSize = param.ImgSize;
lite::Tensor* Boxes = param.Boxes;
lite::Tensor* Scores = param.Scores;
Boxes->mutable_data<float>();
Scores->mutable_data<float>();
......@@ -45,16 +44,14 @@ void YoloBoxCompute::PrepareForRun() {
pe_.init();
pe_.apply();
}
void YoloBoxCompute::Run() {
pe_.dispatch();
zynqmp::YoloBoxParam& yolobox_param = pe_.param();
yolobox_param.imgSize->saveToFile("img_size", true);
// exit(-1);
// exit(-1);
yolobox_param.outputBoxes->saveToFile("yolo_boxes", true);
yolobox_param.outputScores->saveToFile("yolo_scores", true);
}
......
......@@ -27,13 +27,13 @@ namespace fpga {
using float16 = zynqmp::float16;
class YoloBoxCompute
class YoloBoxCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
void PrepareForRun() override;
void Run() override;
virtual ~YoloBoxCompute() {
virtual ~YoloBoxCompute(){
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册