From d8fb1ee02742390def2e0a082e5fde0cca52b996 Mon Sep 17 00:00:00 2001 From: hjchen2 Date: Wed, 13 Mar 2019 17:28:10 +0800 Subject: [PATCH] update --- src/common/log.h | 3 +- src/framework/executor.cpp | 14 -- src/framework/executor.h | 1 - src/io/api_paddle_mobile.cc | 2 +- src/io/jni/paddle_mobile_jni.cpp | 2 - src/io/paddle_mobile.h | 1 - src/operators/fusion_deconv_add_bn_op.h | 2 +- src/operators/fusion_deconv_add_bn_relu_op.h | 2 +- src/operators/fusion_deconv_bn_relu_op.h | 2 +- .../kernel/central-arm-func/conv_arm_func.cpp | 2 + src/operators/kernel/fpga/V1/conv_kernel.cpp | 4 +- .../kernel/fpga/V1/conv_transpose_kernel.cpp | 4 +- .../kernel/fpga/V1/deconv_add_bn_kernel.cpp | 4 +- .../fpga/V1/deconv_add_bn_relu_kernel.cpp | 4 +- .../kernel/fpga/V1/deconv_bn_relu_kernel.cpp | 4 +- src/operators/kernel/fpga/V1/fetch_kernel.cpp | 22 +- .../kernel/fpga/V1/fusion_fc_relu_kernel.cpp | 2 +- src/operators/kernel/fpga/V1/pad2d_kernel.cpp | 10 +- src/operators/kernel/fpga/V1/pool_kernel.cpp | 2 +- .../kernel/fpga/V1/sigmoid_kernel.cpp | 2 +- src/operators/math/channel_wise.h | 4 +- src/operators/math/gemm/cblas.cc | 4 + src/operators/pad2d_op.cpp | 3 + test/fpga/test_rfcn_api.cpp | 231 ++++++++---------- test/net/test_benchmark.cpp | 12 +- 25 files changed, 158 insertions(+), 185 deletions(-) diff --git a/src/common/log.h b/src/common/log.h index d574818f86..282ee27809 100644 --- a/src/common/log.h +++ b/src/common/log.h @@ -31,7 +31,8 @@ namespace paddle_mobile { #ifdef ANDROID -extern const char *ANDROID_LOG_TAG; +static const char *ANDROID_LOG_TAG = + "paddle_mobile LOG built on " __DATE__ " " __TIME__; #define ANDROIDLOGI(...) \ __android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \ diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index e4ffdaf05d..b5fab192aa 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -531,20 +531,6 @@ void Executor::FeedData(const std::vector &v) { } } -template -void Executor::FeedTensorData(const vector &v) { - auto input_size = v.size(); - int index = 0; - auto vars = program_.scope->VarContain("feed", &index); - PADDLE_MOBILE_ENFORCE(input_size == vars.size(), - "input data number not correct"); - for (int i = 0; i < input_size; i++) { - auto var = program_.scope->Var("feed", i + index); - auto feed_tensor = var->template GetMutable(); - feed_tensor->ShareDataWith(v[i]); - } -} - template void Executor::GetResults(std::vector *v) { auto output_size = v->size(); diff --git a/src/framework/executor.h b/src/framework/executor.h index ea7bde7f74..853914c54c 100644 --- a/src/framework/executor.h +++ b/src/framework/executor.h @@ -53,7 +53,6 @@ class Executor { void InjectVariable(const Tensor &t, std::string var_name); void FeedData(const Tensor &t); void FeedData(const std::vector &v); - void FeedTensorData(const std::vector &v); void GetResults(std::vector *v); void GetTensorResults(std::vector *v); diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc index 1f4769b282..5839a279cd 100644 --- a/src/io/api_paddle_mobile.cc +++ b/src/io/api_paddle_mobile.cc @@ -146,7 +146,7 @@ void PaddleMobilePredictor::FeedPaddleTensors( tensors[i].init(typeid(float)); ConvertPaddleTensors(inputs[i], &tensors[i]); } - paddle_mobile_->FeedTensorData(tensors); + // paddle_mobile_->FeedTensorData(tensors); } template diff --git a/src/io/jni/paddle_mobile_jni.cpp b/src/io/jni/paddle_mobile_jni.cpp index 12c0a6cbca..63511a2226 100644 --- a/src/io/jni/paddle_mobile_jni.cpp +++ b/src/io/jni/paddle_mobile_jni.cpp @@ -39,8 +39,6 @@ using framework::Tensor; using paddle_mobile::CPU; using std::string; -const char *ANDROID_LOG_TAG = - "paddle_mobile LOG built on " __DATE__ " " __TIME__; paddle_mobile::PaddleMobile paddle_mobile; static std::mutex shared_mutex; diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index e3fd9f40f4..7983541a22 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -91,7 +91,6 @@ class PaddleMobile { void InjectVariable(const framework::Tensor &t, std::string var_name); void FeedData(const framework::Tensor &t); void FeedData(const std::vector &v); - void FeedTensorData(const std::vector &v); void GetResults(std::vector *v); void GetTensorResults(std::vector *v); diff --git a/src/operators/fusion_deconv_add_bn_op.h b/src/operators/fusion_deconv_add_bn_op.h index f7f9b9e209..6185450441 100644 --- a/src/operators/fusion_deconv_add_bn_op.h +++ b/src/operators/fusion_deconv_add_bn_op.h @@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel< FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap &attrs, - std::shared_ptr scope) + framework::Scope *scope) : framework::OperatorWithKernel< DeviceType, FusionDeconvAddBNParam, operators::DeconvAddBNKernel>(type, inputs, outputs, diff --git a/src/operators/fusion_deconv_add_bn_relu_op.h b/src/operators/fusion_deconv_add_bn_relu_op.h index 97070ef01e..1c6cfd7318 100644 --- a/src/operators/fusion_deconv_add_bn_relu_op.h +++ b/src/operators/fusion_deconv_add_bn_relu_op.h @@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap &attrs, - std::shared_ptr scope) + framework::Scope *scope) : framework::OperatorWithKernel< DeviceType, FusionDeconvAddBNReluParam, operators::DeconvAddBNReluKernel>( diff --git a/src/operators/fusion_deconv_bn_relu_op.h b/src/operators/fusion_deconv_bn_relu_op.h index ad0920ebd6..92bb97445d 100644 --- a/src/operators/fusion_deconv_bn_relu_op.h +++ b/src/operators/fusion_deconv_bn_relu_op.h @@ -56,7 +56,7 @@ class FusionDeconvBNReluOp FusionDeconvBNReluOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap &attrs, - std::shared_ptr scope) + framework::Scope *scope) : framework::OperatorWithKernel< DeviceType, FusionDeconvBNReluParam, operators::DeconvBNReluKernel>(type, inputs, outputs, diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.cpp b/src/operators/kernel/central-arm-func/conv_arm_func.cpp index c34bd1f5d9..2c31667206 100644 --- a/src/operators/kernel/central-arm-func/conv_arm_func.cpp +++ b/src/operators/kernel/central-arm-func/conv_arm_func.cpp @@ -47,6 +47,7 @@ bool IsExpand(const std::vector &filter_dim, return !(filter_1 && strides_1 && padding_0 && dilation_1); } +#ifdef PADDLE_MOBILE_CPU template void GemmConv(const ConvParam ¶m) { const Tensor *input = param.Input(); @@ -241,6 +242,7 @@ template void GemmConv(const ConvParam ¶m); template void DepthwiseConv3x3(const ConvParam ¶m); template void DepthwiseConv5x5(const ConvParam ¶m); #endif +#endif } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/V1/conv_kernel.cpp b/src/operators/kernel/fpga/V1/conv_kernel.cpp index 73722820bd..57b5eb754e 100644 --- a/src/operators/kernel/fpga/V1/conv_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_kernel.cpp @@ -24,8 +24,8 @@ bool ConvKernel::Init(ConvParam *param) { paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::NONE; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); - auto filter = const_cast(param->Filter()); + auto input = const_cast(param->Input()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); int channel = out->dims()[1]; auto bs_ptr = diff --git a/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp b/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp index 788504df5d..1597885e43 100644 --- a/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp @@ -27,10 +27,10 @@ bool ConvTransposeKernel::Init(ConvTransposeParam *param) { paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::NONE; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); + auto input = const_cast(param->Input()); // const Tensor *bias = param->Bias(); // auto bias_ptr = bias->data(); - auto filter = const_cast(param->Filter()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], diff --git a/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp index 4239ac1e5d..a8205df3c9 100644 --- a/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp @@ -27,10 +27,10 @@ bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::NONE; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); + auto input = const_cast(param->Input()); const Tensor *bias = param->InputBias(); auto bias_ptr = bias->data(); - auto filter = const_cast(param->Filter()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], diff --git a/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp index 28b8c83198..b27f5cf870 100755 --- a/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp @@ -28,10 +28,10 @@ bool DeconvAddBNReluKernel::Init( paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::LEAKYRELU; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); + auto input = const_cast(param->Input()); const Tensor *bias = param->InputBias(); auto bias_ptr = bias->data(); - auto filter = const_cast(param->Filter()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], diff --git a/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp index f166587109..75597f0ecd 100644 --- a/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp @@ -29,10 +29,10 @@ bool DeconvBNReluKernel::Init( paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::LEAKYRELU; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); + auto input = const_cast(param->Input()); const Tensor *bias = param->InputBias(); auto bias_ptr = bias->data(); - auto filter = const_cast(param->Filter()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); auto bn_mean_ptr = param->InputMean()->data(); auto bn_var_ptr = param->InputVariance()->data(); diff --git a/src/operators/kernel/fpga/V1/fetch_kernel.cpp b/src/operators/kernel/fpga/V1/fetch_kernel.cpp index 2aea5a770c..b128c8e343 100644 --- a/src/operators/kernel/fpga/V1/fetch_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fetch_kernel.cpp @@ -57,13 +57,9 @@ void dealign(float *src, float *dst, int input_c, int input_h, int input_w) { } template <> void FetchKernel::Compute(const FetchParam ¶m) { - auto input = const_cast(param.InputX()); - if (input->type() == typeid(float)) { - int col = param.Col(); - auto output = &(param.Out()->at(col)); - output->ShareDataWith(*input); - return; - } + auto input = const_cast(param.InputX()); + int col = param.Col(); + LoDTensor *out = ¶m.Out()->at(col); fpga::BypassArgs args = param.fpga_bypass_args; auto input_address = (input->data()); @@ -71,7 +67,7 @@ void FetchKernel::Compute(const FetchParam ¶m) { float *outdata_ptr = reinterpret_cast(param.fpga_bypass_args.output.address); const int num_th = 32; - if ((param.Out()->fpga_data_num) < num_th) { + if ((out->fpga_data_num) < num_th) { fpga::fpga_invalidate(input_address, (input->fpga_data_num) * sizeof(half)); for (int idx = 0; idx < product(input->dims()); ++idx) { @@ -81,14 +77,14 @@ void FetchKernel::Compute(const FetchParam ¶m) { } fpga::PerformBypass(args); - auto outC = param.Out()->dims()[1]; - auto outH = param.Out()->dims()[2]; - auto outW = param.Out()->dims()[3]; + auto outC = out->dims()[1]; + auto outH = out->dims()[2]; + auto outW = out->dims()[3]; fpga::fpga_invalidate(param.fpga_bypass_args.output.address, - param.Out()->fpga_data_num * sizeof(float)); + out->fpga_data_num * sizeof(float)); - if (param.Out()->fpga_data_num != product(input->dims())) { + if (out->fpga_data_num != product(input->dims())) { float *data_tmp = reinterpret_cast(malloc(outC * outH * outW * sizeof(float))); dealign(outdata_ptr, data_tmp, outC, outH, outW); diff --git a/src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp b/src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp index 6fbeb63fe6..fef370515e 100644 --- a/src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp @@ -25,7 +25,7 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { paddle_mobile::fpga::LEAKYRELU; int16_t leaky_relu_negative_slope = 0; auto input_x = const_cast(param->InputX()); - auto filter = const_cast(param->InputY()); + auto filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); auto out = param->Out(); diff --git a/src/operators/kernel/fpga/V1/pad2d_kernel.cpp b/src/operators/kernel/fpga/V1/pad2d_kernel.cpp index f47a585ee4..5d81f71c36 100644 --- a/src/operators/kernel/fpga/V1/pad2d_kernel.cpp +++ b/src/operators/kernel/fpga/V1/pad2d_kernel.cpp @@ -16,8 +16,8 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { template <> -bool Pad2dKernel::Init(Pad2dParam *param) { - Tensor *output = param->Out(); +bool Pad2DKernel::Init(Pad2DParam *param) { + Tensor *output = param->output_; fpga::format_fp16_ofm(output); return true; } @@ -39,9 +39,9 @@ void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) { } } template <> -void Pad2dKernel::Compute(const Pad2dParam ¶m) { - auto in_x = param.InputX(); - auto out = param.Out(); +void Pad2DKernel::Compute(const Pad2DParam ¶m) { + auto in_x = param.input_; + auto out = param.output_; fpga::fpga_invalidate((void *)in_x->data(), // NOLINT in_x->numel() * sizeof(half)); pad2dFunc(in_x, out); diff --git a/src/operators/kernel/fpga/V1/pool_kernel.cpp b/src/operators/kernel/fpga/V1/pool_kernel.cpp index 4c0e09e63f..994fa15162 100644 --- a/src/operators/kernel/fpga/V1/pool_kernel.cpp +++ b/src/operators/kernel/fpga/V1/pool_kernel.cpp @@ -68,7 +68,7 @@ bool PoolKernel::Init(PoolParam *param) { template <> void PoolKernel::Compute(const PoolParam ¶m) { - auto *input = const_cast(param.Input()); + auto *input = const_cast(param.Input()); if (input->type() == typeid(float)) { auto *output = param.Output(); diff --git a/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp index bf36873a1f..bb9eb3d6e8 100644 --- a/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp +++ b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp @@ -24,7 +24,7 @@ bool SigmoidKernel::Init(SigmoidParam *param) { paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::SIGMOID; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->InputX()); + auto input = const_cast(param->InputX()); auto input_ptr = input->data(); auto out = param->Out(); fpga::format_fp16_ofm(out); diff --git a/src/operators/math/channel_wise.h b/src/operators/math/channel_wise.h index 796ea6d2b9..e4c0cbe05b 100644 --- a/src/operators/math/channel_wise.h +++ b/src/operators/math/channel_wise.h @@ -33,7 +33,7 @@ void AddChannelWise(const framework::Tensor *input, // maybe check shape int batch_size = input->dims()[0]; int channels = input->dims()[1]; - size_t spatial_size = input->dims()[2] * input->dims()[3]; + int spatial_size = input->dims()[2] * input->dims()[3]; for (int batch = 0; batch < batch_size; ++batch) { for (int channel = 0; channel < channels; ++channel) { @@ -88,7 +88,7 @@ void ScaleAddChannelWise(const framework::Tensor *input, // maybe check shape int batch_size = input->dims()[0]; int channels = input->dims()[1]; - size_t spatial_size = input->dims()[2] * input->dims()[3]; + int spatial_size = input->dims()[2] * input->dims()[3]; for (int batch = 0; batch < batch_size; ++batch) { for (int channel = 0; channel < channels; ++channel) { diff --git a/src/operators/math/gemm/cblas.cc b/src/operators/math/gemm/cblas.cc index 6dc04d1b4e..adc375b629 100644 --- a/src/operators/math/gemm/cblas.cc +++ b/src/operators/math/gemm/cblas.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + #pragma once #include "operators/math/gemm/cblas.h" @@ -47,3 +49,5 @@ void cblas_sgemv(const bool trans, const int M, const int N, const float alpha, } // namespace math } // namespace operators } // namespace paddle_mobile + +#endif diff --git a/src/operators/pad2d_op.cpp b/src/operators/pad2d_op.cpp index 3d0fdf44d5..8a771c36a5 100644 --- a/src/operators/pad2d_op.cpp +++ b/src/operators/pad2d_op.cpp @@ -37,5 +37,8 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(pad2d, ops::Pad2DOp); #endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(pad2d, ops::Pad2DOp); +#endif #endif // PAD2D_OP diff --git a/test/fpga/test_rfcn_api.cpp b/test/fpga/test_rfcn_api.cpp index 724ef7d14d..f787d8f9ac 100644 --- a/test/fpga/test_rfcn_api.cpp +++ b/test/fpga/test_rfcn_api.cpp @@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef PADDLE_MOBILE_FPGA -#define PADDLE_MOBILE_FPGA -#endif -#include #include -#include "io/paddle_inference_api.h" +#include "../test_helper.h" +#include "../test_include.h" + +#ifdef PADDLE_MOBILE_FPGA_V1 +#include "fpga/V1/api.h" +#endif +#ifdef PADDLE_MOBILE_FPGA_V2 +#include "fpga/V2/api.h" +#endif -static const char *g_image = "../models/rfcn/data.bin"; -static const char *g_model = "../models/rfcn/model"; -static const char *g_param = "../models/rfcn/params"; +#include void readStream(std::string filename, char *buf) { std::ifstream in; @@ -35,137 +37,116 @@ void readStream(std::string filename, char *buf) { auto length = in.tellg(); // report location (this is the length) in.seekg(0, std::ios::beg); // go back to the beginning in.read(buf, length); + DLOG << length; in.close(); } -PaddleMobileConfig GetConfig() { - PaddleMobileConfig config; - config.precision = PaddleMobileConfig::FP32; - config.device = PaddleMobileConfig::kFPGA; - config.prog_file = g_model; - config.param_file = g_param; - config.thread_num = 1; - config.batch_size = 1; - config.optimize = true; - config.lod_mode = true; - config.quantification = false; - return config; -} - -PaddleMobileConfig GetConfig1() { - PaddleMobileConfig config; - config.precision = PaddleMobileConfig::FP32; - config.device = PaddleMobileConfig::kFPGA; - config.model_dir = "../models/resnet50"; - config.thread_num = 1; - config.batch_size = 1; - config.optimize = true; - config.quantification = false; - return config; +void convert_to_chw(int16_t **data_in, int channel, int height, int width, + int num, int16_t *data_tmp) { + int64_t amount_per_side = width * height; + for (int n = 0; n < num; n++) { + for (int h = 0; h < height; h++) { + for (int w = 0; w < width; w++) { + for (int c = 0; c < channel; c++) { + *(data_tmp + n * amount_per_side * channel + c * amount_per_side + + width * h + w) = *((*data_in)++); + } + } + } + } } -int main() { - open_device(); - - PaddleMobileConfig config = GetConfig(); - auto predictor = - CreatePaddlePredictor(config); - - std::cout << "Finishing loading model" << std::endl; - - float img_info[3] = {432, 1280, 1.0f}; - int img_length = 432 * 1280 * 3; - auto img = reinterpret_cast(fpga_malloc(img_length * sizeof(float))); - readStream(g_image, reinterpret_cast(img)); - - std::cout << "Finishing initializing data" << std::endl; - struct PaddleTensor t_img_info, t_img; - t_img.dtypeid = typeid(float); - t_img_info.layout = LAYOUT_HWC; - t_img_info.shape = std::vector({1, 3}); - t_img_info.name = "Image information"; - t_img_info.data.Reset(img_info, 3 * sizeof(float)); - - t_img.dtypeid = typeid(float); - t_img.layout = LAYOUT_HWC; - t_img.shape = std::vector({1, 432, 1280, 3}); - t_img.name = "Image information"; - t_img.data.Reset(img, img_length * sizeof(float)); - predictor->FeedPaddleTensors({t_img_info, t_img}); - - std::cout << "Finishing feeding data " << std::endl; - - predictor->Predict_From_To(0, -1); - std::cout << "Finishing predicting " << std::endl; - - std::vector v; // No need to initialize v - predictor->FetchPaddleTensors(&v); // Old data in v will be cleared - std::cout << "Output number is " << v.size() << std::endl; - std::cout << "out[0] length " << v[0].data.length() << std::endl; - std::cout << "out[1] length " << v[1].data.length() << std::endl; - std::cout << "out[2] length " << v[2].data.length() << std::endl; - - auto post_nms = v[0].data.length() / sizeof(float) / 8; - for (int num = 0; num < post_nms; num++) { - for (int i = 0; i < 8; i++) { - auto p = reinterpret_cast(v[0].data.data()); - std::cout << p[num * 8 + i] << std::endl; - } +void dump_stride_half(std::string filename, Tensor input_tensor, + const int dumpnum, bool use_chw) { + // bool use_chw = true; + if (input_tensor.dims().size() != 4) return; + int c = (input_tensor.dims())[1]; + int h = (input_tensor.dims())[2]; + int w = (input_tensor.dims())[3]; + int n = (input_tensor.dims())[0]; + auto data_ptr = input_tensor.get_data(); + auto *data_ptr_16 = reinterpret_cast(data_ptr); + auto data_tmp = data_ptr_16; + if (use_chw) { + data_tmp = + reinterpret_cast(malloc(n * c * h * w * sizeof(int16_t))); + convert_to_chw(&data_ptr_16, c, h, w, n, data_tmp); } - for (int num = 0; num < post_nms; num++) { - for (int i = 0; i < 8; i++) { - auto p = reinterpret_cast(v[1].data.data()); - std::cout << p[num * 8 + i] << std::endl; - } + std::ofstream out(filename.c_str()); + float result = 0; + int stride = input_tensor.numel() / dumpnum; + stride = stride > 0 ? stride : 1; + for (int i = 0; i < input_tensor.numel(); i += stride) { + result = paddle_mobile::fpga::fp16_2_fp32(data_tmp[i]); + out << result << std::endl; } - for (int num = 0; num < post_nms; num++) { - for (int i = 0; i < 4; i++) { - auto p = reinterpret_cast(v[2].data.data()); - std::cout << p[num * 4 + i] << std::endl; - } + out.close(); + if (data_tmp != data_ptr_16) { + free(data_tmp); } - std::cout << "Finish getting vector values" << std::endl; - - //////////////////////////////////////////////////// +} - PaddleTensor tensor; - predictor->GetPaddleTensor("fetch2", &tensor); - for (int i = 0; i < post_nms; i++) { - auto p = reinterpret_cast(tensor.data.data()); - std::cout << p[+i] << std::endl; +void dump_stride_float(std::string filename, Tensor input_tensor, + const int dumpnum) { + auto data_ptr = reinterpret_cast(input_tensor.get_data()); + std::ofstream out(filename.c_str()); + float result = 0; + int stride = input_tensor.numel() / dumpnum; + stride = stride > 0 ? stride : 1; + for (int i = 0; i < input_tensor.numel(); i += stride) { + result = data_ptr[i]; + out << result << std::endl; } + out.close(); +} - ////////////////////////////////////////////////////// - - PaddleMobileConfig config1 = GetConfig1(); - auto predictor1 = - CreatePaddlePredictor(config1); - - std::cout << "Finishing loading model" << std::endl; - - int img_length1 = 224 * 224 * 3; - auto img1 = - reinterpret_cast(fpga_malloc(img_length1 * sizeof(float))); - - std::cout << "Finishing initializing data" << std::endl; +void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum, + bool use_chw) { + static int i = 0; + if (input_tensor.numel() == 0) { + return; + } + if (input_tensor.type() == typeid(float)) { + DLOG << "op: " << i++ << ", float data " << input_tensor.numel(); - struct PaddleTensor t_img1; + dump_stride_float(filename, input_tensor, dumpnum); + } else { + DLOG << "op: " << i++ << ", half data " << input_tensor.numel(); - t_img1.dtypeid = typeid(float); - t_img1.layout = LAYOUT_HWC; - t_img1.shape = std::vector({1, 224, 224, 3}); - t_img1.name = "Image information"; - t_img1.data.Reset(img1, img_length1 * sizeof(float)); - predictor1->FeedPaddleTensors({t_img1}); - predictor1->Predict_From_To(0, -1); - std::cout << "Finishing predicting " << std::endl; + dump_stride_half(filename, input_tensor, dumpnum, use_chw); + } + DLOG << "dump input address: " << input_tensor.get_data(); +} - std::vector v1; // No need to initialize v - predictor1->FetchPaddleTensors(&v1); // Old data in v will be cleared - std::cout << "Output number is " << v1.size() << std::endl; - std::cout << "out[0] length " << v1[0].data.length() << std::endl; +static const char *g_rfcn_combine = "../models/rfcn"; +static const char *g_image_src_float = "../models/rfcn/data.bin"; +int main() { + paddle_mobile::fpga::open_device(); + paddle_mobile::PaddleMobile paddle_mobile; + + if (paddle_mobile.Load(std::string(g_rfcn_combine) + "/model", + std::string(g_rfcn_combine) + "/params", true, false, + 1, true)) { + float img_info[3] = {768, 1536, 768.0f / 960.0f}; + auto img = reinterpret_cast( + fpga::fpga_malloc(768 * 1536 * 3 * sizeof(float))); + readStream(g_image_src_float, reinterpret_cast(img)); + + std::vector v(3, nullptr); + paddle_mobile.FeedData(std::vector({img_info, img})); + paddle_mobile.Predict_To(-1); + + for (int i = 65; i < 69; i++) { + auto tensor_ptr = paddle_mobile.FetchResult(i); + std::string saveName = "rfcn_" + std::to_string(i); + paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(), + tensor_ptr->numel() * sizeof(float)); + dump_stride(saveName, (*tensor_ptr), tensor_ptr->numel(), true); + } + // paddle_mobile.GetResults(&v); + DLOG << "Computation done"; + fpga::fpga_free(img); + } return 0; } diff --git a/test/net/test_benchmark.cpp b/test/net/test_benchmark.cpp index 31a0850c4d..38e6f8e870 100644 --- a/test/net/test_benchmark.cpp +++ b/test/net/test_benchmark.cpp @@ -36,7 +36,10 @@ int main(int argc, char* argv[]) { paddle_mobile::PaddleMobile paddle_mobile; paddle_mobile.SetThreadNum(thread_num); auto time1 = time(); - if (paddle_mobile.Load(fluid_model, optimize)) { +// if (paddle_mobile.Load(fluid_model, optimize, false, 1, true)) { + if (paddle_mobile.Load(std::string(fluid_model) + "/model", + std::string(fluid_model) + "/params", optimize, + false, 1, true)) { auto time2 = time(); std::cout << "load cost :" << time_diff(time1, time2) << "ms\n"; paddle_mobile::framework::Tensor input; @@ -51,14 +54,15 @@ int main(int argc, char* argv[]) { paddle_mobile::framework::DDim in_shape = paddle_mobile::framework::make_ddim(dims); SetupTensor(&input, in_shape, 0.f, 255.f); - // warmup - for (int i = 0; i < 10; ++i) { +// // warmup + for (int i = 0; i < 2; ++i) { paddle_mobile.Predict(input); } auto time3 = time(); for (int i = 0; i < 10; ++i) { paddle_mobile.Predict(input); } + auto time4 = time(); std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms\n"; std::ostringstream os("output tensor size: "); @@ -68,7 +72,7 @@ int main(int argc, char* argv[]) { os << ", " << output->data()[i]; } std::string output_str = os.str(); - std::cout << output_str << std::endl; +// std::cout << output_str << std::endl; } return 0; } -- GitLab