diff --git a/src/fpga/V1/pe.cpp b/src/fpga/V1/pe.cpp index 5a81e2422979f08b2113bd9b46022fe4d77154cb..37feeb9dfa1a0e9a8c4dc9f789c0ab673e0f4d65 100644 --- a/src/fpga/V1/pe.cpp +++ b/src/fpga/V1/pe.cpp @@ -290,14 +290,11 @@ int ComputeBasicConv(const struct ConvArgs &args) { reg_writeq(args.driver.deconv_param, 0xd18); reg_writeq(args.driver.fpga_bias_scale_len / 4, 0xd20); reg_writeq(args.driver.cmd, REG_CONV_CMD); - DLOG << "before reg poll"; if (0 != fpga_regpoll(REG_INTERRUPT, INTERRUPT_CONV, PE_IRQ_TIMEOUT)) { g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status = ERROR; ret = -EIO; DLOG << "Conv Wait Irq Timeout!"; } - DLOG << "after reg poll"; - output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 09f02f22fe5165f868aed2ec04366f9f10001d1b..b9e7acfdaf3f1b70a8484d7426505da9c27b34a4 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -459,7 +459,7 @@ void Executor::InjectVariable(const Tensor &t, template void Executor::FeedData(const Tensor &t) { - InjectVariable(t, "feed"); + InjectVariable(t, "feed0"); } template diff --git a/src/framework/operator.h b/src/framework/operator.h index 2a68ef802eaabd0c5f2a7f95227ebaa39a9442e7..1c7605944a77e4f8d6d4ea033e3d460030653217 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -80,7 +80,6 @@ class OperatorBase { } #ifdef PADDLE_MOBILE_FPGA void InsertTensors(); - void ChangeNameMap(string key, std::vector value); #endif protected: std::shared_ptr scope_; @@ -96,7 +95,6 @@ class OperatorBase { template class OperatorWithKernel : public OperatorBase { public: -#ifndef PADDLE_MOBILE_FPGA1 OperatorWithKernel(const std::string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, std::shared_ptr scope) @@ -106,25 +104,6 @@ class OperatorWithKernel : public OperatorBase { kernel_.InitCLHelper(scope->GetCLScpoe()); #endif } -#else - OperatorWithKernel(const std::string &type, const VariableNameMap inputs, - const VariableNameMap &outputs, const AttributeMap &attrs, - std::shared_ptr scope) - : OperatorBase(type, inputs, outputs, attrs, scope) { - static int feed_num = 0; - static int fetch_num = 0; - if (type == "feed") { - auto new_name = string("feed") + std::to_string(feed_num++); - auto var = scope->Var(new_name); - (const_cast(inputs)).at("X") = {string(new_name)}; - } else if (type == "fetch") { - auto new_name = string("fetch") + std::to_string(fetch_num++); - auto var = scope->Var(new_name); - (const_cast(outputs)).at("Out") = {string(new_name)}; - } - param_ = ParamType(inputs, outputs, attrs, *scope); - } -#endif virtual void RunImpl() { this->kernel_.Compute(this->param_); } virtual void InferShape() const = 0; diff --git a/src/framework/scope.cpp b/src/framework/scope.cpp index db263081446f9804e5352588063a23f72a8bf163..5ddb71aaf700b96b0630c1d0a4a8779f3ac1ddcb 100644 --- a/src/framework/scope.cpp +++ b/src/framework/scope.cpp @@ -126,8 +126,6 @@ std::vector Scope::VarContain(const std::string substring) { return v; } -void Scope::InsertVar(const std::string str, Variable *var) {} - void Scope::print_vars() { DLOG << "====================start to print variables================="; for (auto pair : vars_) { diff --git a/src/framework/scope.h b/src/framework/scope.h index d9e3a179e0aae9f93947df60cea410d3eb5cb128..c85a09979607316149de711440b3228a655e49b7 100644 --- a/src/framework/scope.h +++ b/src/framework/scope.h @@ -86,7 +86,6 @@ class Scope { #ifdef PADDLE_MOBILE_FPGA Variable *Var(const std::string &name, const int id); std::vector VarContain(const std::string substring); - void InsertVar(const std::string str, Variable *var); void print_vars(); #endif diff --git a/src/operators/kernel/fpga/V1/fetch_kernel.cpp b/src/operators/kernel/fpga/V1/fetch_kernel.cpp index 1cf6f141af88f2b206de6d0f6efe6c7999ac8674..883c4e4dcb81e54d0de63ab9d90f2061b3734596 100644 --- a/src/operators/kernel/fpga/V1/fetch_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fetch_kernel.cpp @@ -22,7 +22,6 @@ bool FetchKernel::Init(FetchParam *param) { auto input = const_cast(param->InputX()); auto output = param->Out(); if (input->type() == typeid(float)) { - output->ShareDataWith(*input); return true; } output->init(typeid(float)); @@ -52,9 +51,13 @@ template <> void FetchKernel::Compute(const FetchParam ¶m) { auto input = param.InputX(); if (input->type() == typeid(float)) { + auto output = param.Out(); + output->ShareDataWith(*input); return; } fpga::PerformBypass(param.fpga_bypass_args); + fpga::fpga_invalidate(param.fpga_bypass_args.output.address, + param.fpga_bypass_args.image.channels * sizeof(float)); // TODO: DEalign: get rid of extra 0 } diff --git a/test/fpga/test_resnet50.cpp b/test/fpga/test_resnet50.cpp index 218550ca6b0478bbee985c16c6d3b111171f1745..723e4ea3e3ff35e0d555703391adcafacccb42f1 100644 --- a/test/fpga/test_resnet50.cpp +++ b/test/fpga/test_resnet50.cpp @@ -61,15 +61,16 @@ void dump(std::string filename, Tensor input_tensor) { } out.close(); } -void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum) { +void dump_stride_half(std::string filename, Tensor input_tensor, + const int dumpnum) { int c = (input_tensor.dims())[1]; int h = (input_tensor.dims())[2]; int w = (input_tensor.dims())[3]; auto data_ptr = input_tensor.get_data(); - int16_t *data_tmp = (int16_t *)malloc(c * h * w * sizeof(int16_t)); - int16_t *data_ptr_16 = (int16_t *)data_ptr; + auto *data_tmp = + reinterpret_cast(malloc(c * h * w * sizeof(int16_t))); + auto *data_ptr_16 = reinterpret_cast(data_ptr); convert_to_chw(&data_ptr_16, c, h, w, data_tmp); - // const int16_t *dataptr = input_tensor.data(); std::ofstream out(filename.c_str()); float result = 0; int stride = input_tensor.numel() / dumpnum; @@ -81,6 +82,20 @@ void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum) { out.close(); free(data_tmp); } + +void dump_stride_float(std::string filename, Tensor input_tensor, + const int dumpnum) { + auto data_ptr = reinterpret_cast(input_tensor.get_data()); + std::ofstream out(filename.c_str()); + float result = 0; + int stride = input_tensor.numel() / dumpnum; + stride = stride > 0 ? stride : 1; + for (int i = 0; i < input_tensor.numel(); i += stride) { + result = data_ptr[i]; + out << result << std::endl; + } + out.close(); +} static const char *g_resnet50 = "../models/resnet50"; const std::string g_image_src_float = "../images/image_src_float"; int main() { @@ -99,22 +114,19 @@ int main() { std::string saveName = "resnet50_result_" + std::to_string(i); paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(), tensor_ptr->numel() * sizeof(half)); - // dump_stride(saveName, (*tensor_ptr), 20); + dump_stride_half(saveName, (*tensor_ptr), 20); // dump(saveName, (*tensor_ptr)); } - std::shared_ptr output_tensor = paddle_mobile.FetchResult(73); - //(*output_tensor).dump("resnet50_result_73"); - output_tensor = paddle_mobile.FetchResult(74); - //(*output_tensor).dump("resnet50_result_74"); - // std::shared_ptr output_tensor = paddle_mobile.FetchResult(74); - - // output_tensor = paddle_mobile.FetchResult(74); + auto tensor_ptr = paddle_mobile.FetchResult(73); + dump_stride_float("resnet50_result_73", (*tensor_ptr), 20); + tensor_ptr = paddle_mobile.FetchResult(74); + dump_stride_float("resnet50_result_74", (*tensor_ptr), 9999); float max = 0; - auto data_ptr = output_tensor->data(); + auto data_ptr = tensor_ptr->data(); int maximumIdx = 0; - for (int i = 0; i < (*output_tensor).numel(); i++) { + for (int i = 0; i < (*tensor_ptr).numel(); i++) { if (data_ptr[i] > max) { maximumIdx = i; max = data_ptr[i];