提交 fbe2b87a 编写于 作者: Z zhangyang0701

fix bugs

上级 b144e472
......@@ -290,14 +290,11 @@ int ComputeBasicConv(const struct ConvArgs &args) {
reg_writeq(args.driver.deconv_param, 0xd18);
reg_writeq(args.driver.fpga_bias_scale_len / 4, 0xd20);
reg_writeq(args.driver.cmd, REG_CONV_CMD);
DLOG << "before reg poll";
if (0 != fpga_regpoll(REG_INTERRUPT, INTERRUPT_CONV, PE_IRQ_TIMEOUT)) {
g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status = ERROR;
ret = -EIO;
DLOG << "Conv Wait Irq Timeout!";
}
DLOG << "after reg poll";
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
......
......@@ -459,7 +459,7 @@ void Executor<Device, T>::InjectVariable(const Tensor &t,
template <typename Device, typename T>
void Executor<Device, T>::FeedData(const Tensor &t) {
InjectVariable(t, "feed");
InjectVariable(t, "feed0");
}
template <typename Device, typename T>
......
......@@ -80,7 +80,6 @@ class OperatorBase {
}
#ifdef PADDLE_MOBILE_FPGA
void InsertTensors();
void ChangeNameMap(string key, std::vector<string> value);
#endif
protected:
std::shared_ptr<Scope> scope_;
......@@ -96,7 +95,6 @@ class OperatorBase {
template <typename Dtype, typename ParamType, typename KernelType>
class OperatorWithKernel : public OperatorBase<Dtype> {
public:
#ifndef PADDLE_MOBILE_FPGA1
OperatorWithKernel(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
......@@ -106,25 +104,6 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
kernel_.InitCLHelper(scope->GetCLScpoe());
#endif
}
#else
OperatorWithKernel(const std::string &type, const VariableNameMap inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope) {
static int feed_num = 0;
static int fetch_num = 0;
if (type == "feed") {
auto new_name = string("feed") + std::to_string(feed_num++);
auto var = scope->Var(new_name);
(const_cast<VariableNameMap &>(inputs)).at("X") = {string(new_name)};
} else if (type == "fetch") {
auto new_name = string("fetch") + std::to_string(fetch_num++);
auto var = scope->Var(new_name);
(const_cast<VariableNameMap &>(outputs)).at("Out") = {string(new_name)};
}
param_ = ParamType(inputs, outputs, attrs, *scope);
}
#endif
virtual void RunImpl() { this->kernel_.Compute(this->param_); }
virtual void InferShape() const = 0;
......
......@@ -126,8 +126,6 @@ std::vector<Variable *> Scope::VarContain(const std::string substring) {
return v;
}
void Scope::InsertVar(const std::string str, Variable *var) {}
void Scope::print_vars() {
DLOG << "====================start to print variables=================";
for (auto pair : vars_) {
......
......@@ -86,7 +86,6 @@ class Scope {
#ifdef PADDLE_MOBILE_FPGA
Variable *Var(const std::string &name, const int id);
std::vector<Variable *> VarContain(const std::string substring);
void InsertVar(const std::string str, Variable *var);
void print_vars();
#endif
......
......@@ -22,7 +22,6 @@ bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
auto input = const_cast<Tensor *>(param->InputX());
auto output = param->Out();
if (input->type() == typeid(float)) {
output->ShareDataWith(*input);
return true;
}
output->init(typeid(float));
......@@ -52,9 +51,13 @@ template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
auto input = param.InputX();
if (input->type() == typeid(float)) {
auto output = param.Out();
output->ShareDataWith(*input);
return;
}
fpga::PerformBypass(param.fpga_bypass_args);
fpga::fpga_invalidate(param.fpga_bypass_args.output.address,
param.fpga_bypass_args.image.channels * sizeof(float));
// TODO: DEalign: get rid of extra 0
}
......
......@@ -61,15 +61,16 @@ void dump(std::string filename, Tensor input_tensor) {
}
out.close();
}
void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum) {
void dump_stride_half(std::string filename, Tensor input_tensor,
const int dumpnum) {
int c = (input_tensor.dims())[1];
int h = (input_tensor.dims())[2];
int w = (input_tensor.dims())[3];
auto data_ptr = input_tensor.get_data();
int16_t *data_tmp = (int16_t *)malloc(c * h * w * sizeof(int16_t));
int16_t *data_ptr_16 = (int16_t *)data_ptr;
auto *data_tmp =
reinterpret_cast<half *>(malloc(c * h * w * sizeof(int16_t)));
auto *data_ptr_16 = reinterpret_cast<half *>(data_ptr);
convert_to_chw(&data_ptr_16, c, h, w, data_tmp);
// const int16_t *dataptr = input_tensor.data<int16_t>();
std::ofstream out(filename.c_str());
float result = 0;
int stride = input_tensor.numel() / dumpnum;
......@@ -81,6 +82,20 @@ void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum) {
out.close();
free(data_tmp);
}
void dump_stride_float(std::string filename, Tensor input_tensor,
const int dumpnum) {
auto data_ptr = reinterpret_cast<float *>(input_tensor.get_data());
std::ofstream out(filename.c_str());
float result = 0;
int stride = input_tensor.numel() / dumpnum;
stride = stride > 0 ? stride : 1;
for (int i = 0; i < input_tensor.numel(); i += stride) {
result = data_ptr[i];
out << result << std::endl;
}
out.close();
}
static const char *g_resnet50 = "../models/resnet50";
const std::string g_image_src_float = "../images/image_src_float";
int main() {
......@@ -99,22 +114,19 @@ int main() {
std::string saveName = "resnet50_result_" + std::to_string(i);
paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(),
tensor_ptr->numel() * sizeof(half));
// dump_stride(saveName, (*tensor_ptr), 20);
dump_stride_half(saveName, (*tensor_ptr), 20);
// dump(saveName, (*tensor_ptr));
}
std::shared_ptr<Tensor> output_tensor = paddle_mobile.FetchResult(73);
//(*output_tensor).dump<float>("resnet50_result_73");
output_tensor = paddle_mobile.FetchResult(74);
//(*output_tensor).dump<float>("resnet50_result_74");
// std::shared_ptr<Tensor> output_tensor = paddle_mobile.FetchResult(74);
// output_tensor = paddle_mobile.FetchResult(74);
auto tensor_ptr = paddle_mobile.FetchResult(73);
dump_stride_float("resnet50_result_73", (*tensor_ptr), 20);
tensor_ptr = paddle_mobile.FetchResult(74);
dump_stride_float("resnet50_result_74", (*tensor_ptr), 9999);
float max = 0;
auto data_ptr = output_tensor->data<float>();
auto data_ptr = tensor_ptr->data<float>();
int maximumIdx = 0;
for (int i = 0; i < (*output_tensor).numel(); i++) {
for (int i = 0; i < (*tensor_ptr).numel(); i++) {
if (data_ptr[i] > max) {
maximumIdx = i;
max = data_ptr[i];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册