提交 af1fd538 编写于 作者: Z zhangyang0701

fix bugs

上级 0719bf08
...@@ -290,14 +290,11 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -290,14 +290,11 @@ int ComputeBasicConv(const struct ConvArgs &args) {
reg_writeq(args.driver.deconv_param, 0xd18); reg_writeq(args.driver.deconv_param, 0xd18);
reg_writeq(args.driver.fpga_bias_scale_len / 4, 0xd20); reg_writeq(args.driver.fpga_bias_scale_len / 4, 0xd20);
reg_writeq(args.driver.cmd, REG_CONV_CMD); reg_writeq(args.driver.cmd, REG_CONV_CMD);
DLOG << "before reg poll";
if (0 != fpga_regpoll(REG_INTERRUPT, INTERRUPT_CONV, PE_IRQ_TIMEOUT)) { if (0 != fpga_regpoll(REG_INTERRUPT, INTERRUPT_CONV, PE_IRQ_TIMEOUT)) {
g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status = ERROR; g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status = ERROR;
ret = -EIO; ret = -EIO;
DLOG << "Conv Wait Irq Timeout!"; DLOG << "Conv Wait Irq Timeout!";
} }
DLOG << "after reg poll";
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
......
...@@ -459,7 +459,7 @@ void Executor<Device, T>::InjectVariable(const Tensor &t, ...@@ -459,7 +459,7 @@ void Executor<Device, T>::InjectVariable(const Tensor &t,
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::FeedData(const Tensor &t) { void Executor<Device, T>::FeedData(const Tensor &t) {
InjectVariable(t, "feed"); InjectVariable(t, "feed0");
} }
template <typename Device, typename T> template <typename Device, typename T>
......
...@@ -80,7 +80,6 @@ class OperatorBase { ...@@ -80,7 +80,6 @@ class OperatorBase {
} }
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
void InsertTensors(); void InsertTensors();
void ChangeNameMap(string key, std::vector<string> value);
#endif #endif
protected: protected:
std::shared_ptr<Scope> scope_; std::shared_ptr<Scope> scope_;
...@@ -96,7 +95,6 @@ class OperatorBase { ...@@ -96,7 +95,6 @@ class OperatorBase {
template <typename Dtype, typename ParamType, typename KernelType> template <typename Dtype, typename ParamType, typename KernelType>
class OperatorWithKernel : public OperatorBase<Dtype> { class OperatorWithKernel : public OperatorBase<Dtype> {
public: public:
#ifndef PADDLE_MOBILE_FPGA1
OperatorWithKernel(const std::string &type, const VariableNameMap &inputs, OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) std::shared_ptr<Scope> scope)
...@@ -106,25 +104,6 @@ class OperatorWithKernel : public OperatorBase<Dtype> { ...@@ -106,25 +104,6 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
kernel_.InitCLHelper(scope->GetCLScpoe()); kernel_.InitCLHelper(scope->GetCLScpoe());
#endif #endif
} }
#else
OperatorWithKernel(const std::string &type, const VariableNameMap inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {
static int feed_num = 0;
static int fetch_num = 0;
if (type == "feed") {
auto new_name = string("feed") + std::to_string(feed_num++);
auto var = scope->Var(new_name);
(const_cast<VariableNameMap &>(inputs)).at("X") = {string(new_name)};
} else if (type == "fetch") {
auto new_name = string("fetch") + std::to_string(fetch_num++);
auto var = scope->Var(new_name);
(const_cast<VariableNameMap &>(outputs)).at("Out") = {string(new_name)};
}
param_ = ParamType(inputs, outputs, attrs, *scope);
}
#endif
virtual void RunImpl() { this->kernel_.Compute(this->param_); } virtual void RunImpl() { this->kernel_.Compute(this->param_); }
virtual void InferShape() const = 0; virtual void InferShape() const = 0;
......
...@@ -126,8 +126,6 @@ std::vector<Variable *> Scope::VarContain(const std::string substring) { ...@@ -126,8 +126,6 @@ std::vector<Variable *> Scope::VarContain(const std::string substring) {
return v; return v;
} }
void Scope::InsertVar(const std::string str, Variable *var) {}
void Scope::print_vars() { void Scope::print_vars() {
DLOG << "====================start to print variables================="; DLOG << "====================start to print variables=================";
for (auto pair : vars_) { for (auto pair : vars_) {
......
...@@ -86,7 +86,6 @@ class Scope { ...@@ -86,7 +86,6 @@ class Scope {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
Variable *Var(const std::string &name, const int id); Variable *Var(const std::string &name, const int id);
std::vector<Variable *> VarContain(const std::string substring); std::vector<Variable *> VarContain(const std::string substring);
void InsertVar(const std::string str, Variable *var);
void print_vars(); void print_vars();
#endif #endif
......
...@@ -22,7 +22,6 @@ bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) { ...@@ -22,7 +22,6 @@ bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
auto input = const_cast<Tensor *>(param->InputX()); auto input = const_cast<Tensor *>(param->InputX());
auto output = param->Out(); auto output = param->Out();
if (input->type() == typeid(float)) { if (input->type() == typeid(float)) {
output->ShareDataWith(*input);
return true; return true;
} }
output->init(typeid(float)); output->init(typeid(float));
...@@ -52,9 +51,13 @@ template <> ...@@ -52,9 +51,13 @@ template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) { void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
auto input = param.InputX(); auto input = param.InputX();
if (input->type() == typeid(float)) { if (input->type() == typeid(float)) {
auto output = param.Out();
output->ShareDataWith(*input);
return; return;
} }
fpga::PerformBypass(param.fpga_bypass_args); fpga::PerformBypass(param.fpga_bypass_args);
fpga::fpga_invalidate(param.fpga_bypass_args.output.address,
param.fpga_bypass_args.image.channels * sizeof(float));
// TODO: DEalign: get rid of extra 0 // TODO: DEalign: get rid of extra 0
} }
......
...@@ -61,15 +61,16 @@ void dump(std::string filename, Tensor input_tensor) { ...@@ -61,15 +61,16 @@ void dump(std::string filename, Tensor input_tensor) {
} }
out.close(); out.close();
} }
void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum) { void dump_stride_half(std::string filename, Tensor input_tensor,
const int dumpnum) {
int c = (input_tensor.dims())[1]; int c = (input_tensor.dims())[1];
int h = (input_tensor.dims())[2]; int h = (input_tensor.dims())[2];
int w = (input_tensor.dims())[3]; int w = (input_tensor.dims())[3];
auto data_ptr = input_tensor.get_data(); auto data_ptr = input_tensor.get_data();
int16_t *data_tmp = (int16_t *)malloc(c * h * w * sizeof(int16_t)); auto *data_tmp =
int16_t *data_ptr_16 = (int16_t *)data_ptr; reinterpret_cast<half *>(malloc(c * h * w * sizeof(int16_t)));
auto *data_ptr_16 = reinterpret_cast<half *>(data_ptr);
convert_to_chw(&data_ptr_16, c, h, w, data_tmp); convert_to_chw(&data_ptr_16, c, h, w, data_tmp);
// const int16_t *dataptr = input_tensor.data<int16_t>();
std::ofstream out(filename.c_str()); std::ofstream out(filename.c_str());
float result = 0; float result = 0;
int stride = input_tensor.numel() / dumpnum; int stride = input_tensor.numel() / dumpnum;
...@@ -81,6 +82,20 @@ void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum) { ...@@ -81,6 +82,20 @@ void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum) {
out.close(); out.close();
free(data_tmp); free(data_tmp);
} }
void dump_stride_float(std::string filename, Tensor input_tensor,
const int dumpnum) {
auto data_ptr = reinterpret_cast<float *>(input_tensor.get_data());
std::ofstream out(filename.c_str());
float result = 0;
int stride = input_tensor.numel() / dumpnum;
stride = stride > 0 ? stride : 1;
for (int i = 0; i < input_tensor.numel(); i += stride) {
result = data_ptr[i];
out << result << std::endl;
}
out.close();
}
static const char *g_resnet50 = "../models/resnet50"; static const char *g_resnet50 = "../models/resnet50";
const std::string g_image_src_float = "../images/image_src_float"; const std::string g_image_src_float = "../images/image_src_float";
int main() { int main() {
...@@ -99,22 +114,19 @@ int main() { ...@@ -99,22 +114,19 @@ int main() {
std::string saveName = "resnet50_result_" + std::to_string(i); std::string saveName = "resnet50_result_" + std::to_string(i);
paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(), paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(),
tensor_ptr->numel() * sizeof(half)); tensor_ptr->numel() * sizeof(half));
// dump_stride(saveName, (*tensor_ptr), 20); dump_stride_half(saveName, (*tensor_ptr), 20);
// dump(saveName, (*tensor_ptr)); // dump(saveName, (*tensor_ptr));
} }
std::shared_ptr<Tensor> output_tensor = paddle_mobile.FetchResult(73); auto tensor_ptr = paddle_mobile.FetchResult(73);
//(*output_tensor).dump<float>("resnet50_result_73"); dump_stride_float("resnet50_result_73", (*tensor_ptr), 20);
output_tensor = paddle_mobile.FetchResult(74); tensor_ptr = paddle_mobile.FetchResult(74);
//(*output_tensor).dump<float>("resnet50_result_74"); dump_stride_float("resnet50_result_74", (*tensor_ptr), 9999);
// std::shared_ptr<Tensor> output_tensor = paddle_mobile.FetchResult(74);
// output_tensor = paddle_mobile.FetchResult(74);
float max = 0; float max = 0;
auto data_ptr = output_tensor->data<float>(); auto data_ptr = tensor_ptr->data<float>();
int maximumIdx = 0; int maximumIdx = 0;
for (int i = 0; i < (*output_tensor).numel(); i++) { for (int i = 0; i < (*tensor_ptr).numel(); i++) {
if (data_ptr[i] > max) { if (data_ptr[i] > max) {
maximumIdx = i; maximumIdx = i;
max = data_ptr[i]; max = data_ptr[i];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册