diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc index ac5a0681f1bbf8417ccb154d9bc82a353a4acb83..6f4b548155ca91ab01a6426cca6ba92ce4f9340e 100644 --- a/src/io/api_paddle_mobile.cc +++ b/src/io/api_paddle_mobile.cc @@ -190,6 +190,9 @@ void PaddleMobilePredictor::FetchPaddleTensors(PaddleTensor *output, } else if (tensor_ptr.get()->type() == type_id().hash_code()) { data_addr = tensor_ptr.get()->data(); data_sizeof = sizeof(float); + } else if (tensor_ptr.get()->type() == type_id().hash_code()) { + data_addr = tensor_ptr.get()->data(); + data_sizeof = sizeof(int8_t); } else { PADDLE_MOBILE_ENFORCE(0, "output typeid is not supported"); } diff --git a/src/operators/kernel/fpga/V2/fetch_kernel.cpp b/src/operators/kernel/fpga/V2/fetch_kernel.cpp index 790c8dbd53bfa728d26b28422a776517ebd167e2..c6b8f9e85247865fd344bc86a365cdd26d3f5ec0 100644 --- a/src/operators/kernel/fpga/V2/fetch_kernel.cpp +++ b/src/operators/kernel/fpga/V2/fetch_kernel.cpp @@ -73,7 +73,7 @@ void FetchKernel::Compute(const FetchParam ¶m) { int unalignedCW = outC * outW; int alignedCW = fpga::align_to_x(unalignedCW, IMAGE_ALIGNMENT); if (input->type() == type_id()) { - if (unalignedCW == alignedCW) { + if ((output->dims().size() != 4) || (unalignedCW == alignedCW)) { output->ShareDataWith(*input); } else { auto input_address = input->data(); @@ -90,7 +90,7 @@ void FetchKernel::Compute(const FetchParam ¶m) { fpga::fpga_invalidate(input_address, (input->fpga_data_num) * sizeof(int8_t)); if (input->fpga_data_num < num_th) { for (int idx = 0; idx < product(input->dims()); ++idx) { - outdata_ptr[idx] = input_address[idx] * Si; + outdata_ptr[idx] = input_address[idx] / 127.0 * Si; } fpga::fpga_flush(outdata_ptr, product(input->dims()) * sizeof(float)); return; @@ -101,14 +101,14 @@ void FetchKernel::Compute(const FetchParam ¶m) { auto aligned_ptr = aligned_out->data(); fpga::fpga_invalidate(aligned_ptr, (input->fpga_data_num) * sizeof(float)); for (int idx = 0; idx < input->fpga_data_num; ++idx) { - aligned_ptr[idx] = input_address[idx] * Si; + aligned_ptr[idx] = input_address[idx] / 127.0 * Si; } dealign(aligned_ptr, outdata_ptr, outC, outH, outW); fpga::fpga_flush(outdata_ptr, outC * outH * outW * sizeof(float)); return; } for (int idx = 0; idx < input->fpga_data_num; ++idx) { - outdata_ptr[idx] = input_address[idx] * Si; + outdata_ptr[idx] = input_address[idx] / 127.0 * Si; } fpga::fpga_flush(outdata_ptr, outC * outH * outW * sizeof(float)); } diff --git a/src/operators/kernel/fpga/V2/softmax_kernel.cpp b/src/operators/kernel/fpga/V2/softmax_kernel.cpp index 07bf3bb807eb3d23300f77403847fb0e0e4ff3aa..4f75e0f30b2e9f57d94941e972d012016b55251e 100755 --- a/src/operators/kernel/fpga/V2/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/V2/softmax_kernel.cpp @@ -27,8 +27,6 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { auto out = param->Out(); out->Resize(framework::make_ddim(dims)); - out->mutable_data(framework::make_ddim(dims)); - fpga::format_ofm(out); PADDLE_MOBILE_ENFORCE(input->dims().size() == 4, "Softmax should have 4-order input"); @@ -44,6 +42,7 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { auto input_ptr = input->data(); float Si = input->scale[0]; int16_t slope = fpga::fp32_2_fp16(Si / 127); + out->mutable_data(framework::make_ddim(dims)); fpga::format_ofm(out); fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; args.input_layout_type = fpga::LAYOUT_HWC; @@ -65,17 +64,11 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { float_input_x->Resize(input->dims()); float_input_x->init(type_id().hash_code()); fpga::format_ofm(float_input_x.get()); - auto float_out = param->float_out; - float_out = std::make_shared(); - float_out->Resize(input->dims()); - float_out->init(type_id().hash_code()); - fpga::format_ofm(float_out.get()); + out->mutable_data(framework::make_ddim(dims)); + fpga::format_ofm(out); } else { - auto float_out = param->float_out; - float_out = std::make_shared(); - float_out->Resize(input->dims()); - float_out->init(type_id().hash_code()); - fpga::format_ofm(float_out.get()); + out->mutable_data(framework::make_ddim(dims)); + fpga::format_ofm(out); } return true; @@ -97,41 +90,24 @@ void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { Tensor *out = param.Out(); out->Resize( {in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]}); - auto out_data = out->data(); + auto float_input_x = param.float_input_x_; auto float_input_x_data = float_input_x->data(); int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT); for (int i = 0; i < dataNum; i++) { float_input_x_data[i] = in_data[i] * Si / 127; } - auto float_out = param.float_out; - auto float_out_data = float_out->data(); - math::SoftmaxFuntor()(float_input_x.get(), float_out.get()); - for (int i = 0; i < dataNum; i++) { - float tmp_out = float_out_data[i] * 127; - out_data[i] = tmp_out < 0 ? (signed char)(tmp_out - 0.5) - : (signed char)(tmp_out + 0.5); - } - fpga::fpga_flush(out_data, dataNum * sizeof(int8_t)); + math::SoftmaxFuntor()(float_input_x.get(), out); + auto out_data = out->data(); + fpga::fpga_flush(out_data, dataNum * sizeof(float)); } else { Tensor *out = param.Out(); out->Resize( {in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]}); - auto out_data = out->data(); - auto float_out = param.float_out; - float_out = std::make_shared(); - float_out->Resize(in_x->dims()); - float_out->init(type_id().hash_code()); - fpga::format_ofm(float_out.get()); - math::SoftmaxFuntor()(in_x, float_out.get()); - auto float_out_data = float_out->data(); + math::SoftmaxFuntor()(in_x, out); int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT); - for (int i = 0; i < dataNum; i++) { - float tmp_out = float_out_data[i] * 127; - out_data[i] = tmp_out < 0 ? (signed char)(tmp_out - 0.5) - : (signed char)(tmp_out + 0.5); - } - fpga::fpga_flush(out_data, dataNum * sizeof(int8_t)); + auto out_data = out->data(); + fpga::fpga_flush(out_data, dataNum * sizeof(float)); } }