diff --git a/src/common/log.h b/src/common/log.h index d574818f865ab6b2af748a5b3162b589f396a564..282ee2780993447051143866f65907ba7ce17be3 100644 --- a/src/common/log.h +++ b/src/common/log.h @@ -31,7 +31,8 @@ namespace paddle_mobile { #ifdef ANDROID -extern const char *ANDROID_LOG_TAG; +static const char *ANDROID_LOG_TAG = + "paddle_mobile LOG built on " __DATE__ " " __TIME__; #define ANDROIDLOGI(...) \ __android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \ diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index e4ffdaf05d5dad129138b2a7745619c86e8ca805..b5fab192aaed8ecb7796fc81b2ac67d810654c4c 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -531,20 +531,6 @@ void Executor::FeedData(const std::vector &v) { } } -template -void Executor::FeedTensorData(const vector &v) { - auto input_size = v.size(); - int index = 0; - auto vars = program_.scope->VarContain("feed", &index); - PADDLE_MOBILE_ENFORCE(input_size == vars.size(), - "input data number not correct"); - for (int i = 0; i < input_size; i++) { - auto var = program_.scope->Var("feed", i + index); - auto feed_tensor = var->template GetMutable(); - feed_tensor->ShareDataWith(v[i]); - } -} - template void Executor::GetResults(std::vector *v) { auto output_size = v->size(); diff --git a/src/framework/executor.h b/src/framework/executor.h index ea7bde7f748352c9b1221e69f3359938b7371a39..853914c54cb962c570ae2a9751500d3275091499 100644 --- a/src/framework/executor.h +++ b/src/framework/executor.h @@ -53,7 +53,6 @@ class Executor { void InjectVariable(const Tensor &t, std::string var_name); void FeedData(const Tensor &t); void FeedData(const std::vector &v); - void FeedTensorData(const std::vector &v); void GetResults(std::vector *v); void GetTensorResults(std::vector *v); diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc index 1f4769b282385207a5b53d6d678364393d7da6cc..5839a279cdfc03472628cf7650b30064281a226e 100644 --- a/src/io/api_paddle_mobile.cc +++ b/src/io/api_paddle_mobile.cc @@ -146,7 +146,7 @@ void PaddleMobilePredictor::FeedPaddleTensors( tensors[i].init(typeid(float)); ConvertPaddleTensors(inputs[i], &tensors[i]); } - paddle_mobile_->FeedTensorData(tensors); + // paddle_mobile_->FeedTensorData(tensors); } template diff --git a/src/io/jni/paddle_mobile_jni.cpp b/src/io/jni/paddle_mobile_jni.cpp index 12c0a6cbca1721578efe175d8c108e30de18be7d..63511a2226e9563e758f87fea4fed67438eda8f6 100644 --- a/src/io/jni/paddle_mobile_jni.cpp +++ b/src/io/jni/paddle_mobile_jni.cpp @@ -39,8 +39,6 @@ using framework::Tensor; using paddle_mobile::CPU; using std::string; -const char *ANDROID_LOG_TAG = - "paddle_mobile LOG built on " __DATE__ " " __TIME__; paddle_mobile::PaddleMobile paddle_mobile; static std::mutex shared_mutex; diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index e3fd9f40f4194ed2841ba11366c6c5142e6279ab..7983541a221fb63f573dfa8186599934cd97387b 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -91,7 +91,6 @@ class PaddleMobile { void InjectVariable(const framework::Tensor &t, std::string var_name); void FeedData(const framework::Tensor &t); void FeedData(const std::vector &v); - void FeedTensorData(const std::vector &v); void GetResults(std::vector *v); void GetTensorResults(std::vector *v); diff --git a/src/operators/fusion_deconv_add_bn_op.h b/src/operators/fusion_deconv_add_bn_op.h index f7f9b9e2094a7228c944b70b88ae3105ae9f37e8..618545044136e42e750fd4c71ce96bd861954b71 100644 --- a/src/operators/fusion_deconv_add_bn_op.h +++ b/src/operators/fusion_deconv_add_bn_op.h @@ -57,7 +57,7 @@ class FusionDeconvAddBNOp : public framework::OperatorWithKernel< FusionDeconvAddBNOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap &attrs, - std::shared_ptr scope) + framework::Scope *scope) : framework::OperatorWithKernel< DeviceType, FusionDeconvAddBNParam, operators::DeconvAddBNKernel>(type, inputs, outputs, diff --git a/src/operators/fusion_deconv_add_bn_relu_op.h b/src/operators/fusion_deconv_add_bn_relu_op.h index 97070ef01e544839be8eab6ddba21c43dfa9a26e..1c6cfd7318e48cad16e1d274b5724c832c70d8c8 100644 --- a/src/operators/fusion_deconv_add_bn_relu_op.h +++ b/src/operators/fusion_deconv_add_bn_relu_op.h @@ -59,7 +59,7 @@ class FusionDeconvAddBNReluOp FusionDeconvAddBNReluOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap &attrs, - std::shared_ptr scope) + framework::Scope *scope) : framework::OperatorWithKernel< DeviceType, FusionDeconvAddBNReluParam, operators::DeconvAddBNReluKernel>( diff --git a/src/operators/fusion_deconv_bn_relu_op.h b/src/operators/fusion_deconv_bn_relu_op.h index ad0920ebd69b1a13ebc0e85f2c5f6008379715da..92bb97445d1442056843efb1fd66fa3fb1e54511 100644 --- a/src/operators/fusion_deconv_bn_relu_op.h +++ b/src/operators/fusion_deconv_bn_relu_op.h @@ -56,7 +56,7 @@ class FusionDeconvBNReluOp FusionDeconvBNReluOp(const string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const framework::AttributeMap &attrs, - std::shared_ptr scope) + framework::Scope *scope) : framework::OperatorWithKernel< DeviceType, FusionDeconvBNReluParam, operators::DeconvBNReluKernel>(type, inputs, outputs, diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.cpp b/src/operators/kernel/central-arm-func/conv_arm_func.cpp index c34bd1f5d909317fe727a192f5dc27479f71bc90..2c3166720652a77d3b628d2e5fd5d227a1a7fc33 100644 --- a/src/operators/kernel/central-arm-func/conv_arm_func.cpp +++ b/src/operators/kernel/central-arm-func/conv_arm_func.cpp @@ -47,6 +47,7 @@ bool IsExpand(const std::vector &filter_dim, return !(filter_1 && strides_1 && padding_0 && dilation_1); } +#ifdef PADDLE_MOBILE_CPU template void GemmConv(const ConvParam ¶m) { const Tensor *input = param.Input(); @@ -241,6 +242,7 @@ template void GemmConv(const ConvParam ¶m); template void DepthwiseConv3x3(const ConvParam ¶m); template void DepthwiseConv5x5(const ConvParam ¶m); #endif +#endif } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/V1/conv_kernel.cpp b/src/operators/kernel/fpga/V1/conv_kernel.cpp index 73722820bd90b54abd64dd01b157c74c6a1069e8..57b5eb754e327160399bee728d0689101fac1134 100644 --- a/src/operators/kernel/fpga/V1/conv_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_kernel.cpp @@ -24,8 +24,8 @@ bool ConvKernel::Init(ConvParam *param) { paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::NONE; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); - auto filter = const_cast(param->Filter()); + auto input = const_cast(param->Input()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); int channel = out->dims()[1]; auto bs_ptr = diff --git a/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp b/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp index 788504df5d2ea1005cfaa76f12b58e61c0218391..1597885e43e01895b6acd425031341af70d5eaf7 100644 --- a/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_transpose_kernel.cpp @@ -27,10 +27,10 @@ bool ConvTransposeKernel::Init(ConvTransposeParam *param) { paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::NONE; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); + auto input = const_cast(param->Input()); // const Tensor *bias = param->Bias(); // auto bias_ptr = bias->data(); - auto filter = const_cast(param->Filter()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], diff --git a/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp index 4239ac1e5da421cb0e2421a8919d8d15e40348af..a8205df3c9c1052055ba15ca58fd215f1d49ba0e 100644 --- a/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_add_bn_kernel.cpp @@ -27,10 +27,10 @@ bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::NONE; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); + auto input = const_cast(param->Input()); const Tensor *bias = param->InputBias(); auto bias_ptr = bias->data(); - auto filter = const_cast(param->Filter()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], diff --git a/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp index 28b8c83198a5517ed0dc9732e0033030a876a7da..b27f5cf870d2e3220bec31ee63bb27361cb2c8cf 100755 --- a/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_add_bn_relu_kernel.cpp @@ -28,10 +28,10 @@ bool DeconvAddBNReluKernel::Init( paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::LEAKYRELU; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); + auto input = const_cast(param->Input()); const Tensor *bias = param->InputBias(); auto bias_ptr = bias->data(); - auto filter = const_cast(param->Filter()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], diff --git a/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp index f166587109e5f63e30203a940aa3baa8ae87f844..75597f0ecd570b6b21894a2f9a0ff0ad91a54ea4 100644 --- a/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_bn_relu_kernel.cpp @@ -29,10 +29,10 @@ bool DeconvBNReluKernel::Init( paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::LEAKYRELU; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->Input()); + auto input = const_cast(param->Input()); const Tensor *bias = param->InputBias(); auto bias_ptr = bias->data(); - auto filter = const_cast(param->Filter()); + auto filter = const_cast(param->Filter()); auto out = param->Output(); auto bn_mean_ptr = param->InputMean()->data(); auto bn_var_ptr = param->InputVariance()->data(); diff --git a/src/operators/kernel/fpga/V1/fetch_kernel.cpp b/src/operators/kernel/fpga/V1/fetch_kernel.cpp index 2aea5a770c674a7d70dc2abf0d691598444f9a25..b128c8e3430b8a359a5ad9dbcba397ad0f2b6568 100644 --- a/src/operators/kernel/fpga/V1/fetch_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fetch_kernel.cpp @@ -57,13 +57,9 @@ void dealign(float *src, float *dst, int input_c, int input_h, int input_w) { } template <> void FetchKernel::Compute(const FetchParam ¶m) { - auto input = const_cast(param.InputX()); - if (input->type() == typeid(float)) { - int col = param.Col(); - auto output = &(param.Out()->at(col)); - output->ShareDataWith(*input); - return; - } + auto input = const_cast(param.InputX()); + int col = param.Col(); + LoDTensor *out = ¶m.Out()->at(col); fpga::BypassArgs args = param.fpga_bypass_args; auto input_address = (input->data()); @@ -71,7 +67,7 @@ void FetchKernel::Compute(const FetchParam ¶m) { float *outdata_ptr = reinterpret_cast(param.fpga_bypass_args.output.address); const int num_th = 32; - if ((param.Out()->fpga_data_num) < num_th) { + if ((out->fpga_data_num) < num_th) { fpga::fpga_invalidate(input_address, (input->fpga_data_num) * sizeof(half)); for (int idx = 0; idx < product(input->dims()); ++idx) { @@ -81,14 +77,14 @@ void FetchKernel::Compute(const FetchParam ¶m) { } fpga::PerformBypass(args); - auto outC = param.Out()->dims()[1]; - auto outH = param.Out()->dims()[2]; - auto outW = param.Out()->dims()[3]; + auto outC = out->dims()[1]; + auto outH = out->dims()[2]; + auto outW = out->dims()[3]; fpga::fpga_invalidate(param.fpga_bypass_args.output.address, - param.Out()->fpga_data_num * sizeof(float)); + out->fpga_data_num * sizeof(float)); - if (param.Out()->fpga_data_num != product(input->dims())) { + if (out->fpga_data_num != product(input->dims())) { float *data_tmp = reinterpret_cast(malloc(outC * outH * outW * sizeof(float))); dealign(outdata_ptr, data_tmp, outC, outH, outW); diff --git a/src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp b/src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp index 6fbeb63fe606aac014f76088210c74a4118e6c78..fef370515e9e9ffa1d90c184e62919235533b8a5 100644 --- a/src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fusion_fc_relu_kernel.cpp @@ -25,7 +25,7 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { paddle_mobile::fpga::LEAKYRELU; int16_t leaky_relu_negative_slope = 0; auto input_x = const_cast(param->InputX()); - auto filter = const_cast(param->InputY()); + auto filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); auto out = param->Out(); diff --git a/src/operators/kernel/fpga/V1/pad2d_kernel.cpp b/src/operators/kernel/fpga/V1/pad2d_kernel.cpp index f47a585ee412316ce65084c5fa10a622ffb93a4f..5d81f71c3608d19f5be5c46699b8379ebb279982 100644 --- a/src/operators/kernel/fpga/V1/pad2d_kernel.cpp +++ b/src/operators/kernel/fpga/V1/pad2d_kernel.cpp @@ -16,8 +16,8 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { template <> -bool Pad2dKernel::Init(Pad2dParam *param) { - Tensor *output = param->Out(); +bool Pad2DKernel::Init(Pad2DParam *param) { + Tensor *output = param->output_; fpga::format_fp16_ofm(output); return true; } @@ -39,9 +39,9 @@ void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) { } } template <> -void Pad2dKernel::Compute(const Pad2dParam ¶m) { - auto in_x = param.InputX(); - auto out = param.Out(); +void Pad2DKernel::Compute(const Pad2DParam ¶m) { + auto in_x = param.input_; + auto out = param.output_; fpga::fpga_invalidate((void *)in_x->data(), // NOLINT in_x->numel() * sizeof(half)); pad2dFunc(in_x, out); diff --git a/src/operators/kernel/fpga/V1/pool_kernel.cpp b/src/operators/kernel/fpga/V1/pool_kernel.cpp index 4c0e09e63f2785b535f81b5262afe93099a74aa5..994fa151621956aa791d36cc0f4cd829dc88f3d1 100644 --- a/src/operators/kernel/fpga/V1/pool_kernel.cpp +++ b/src/operators/kernel/fpga/V1/pool_kernel.cpp @@ -68,7 +68,7 @@ bool PoolKernel::Init(PoolParam *param) { template <> void PoolKernel::Compute(const PoolParam ¶m) { - auto *input = const_cast(param.Input()); + auto *input = const_cast(param.Input()); if (input->type() == typeid(float)) { auto *output = param.Output(); diff --git a/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp index bf36873a1fb442a4d5ff6f57056515009d275cd6..bb9eb3d6e8acf3d59ce3c4541f8c553fe7cb1cc2 100644 --- a/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp +++ b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp @@ -24,7 +24,7 @@ bool SigmoidKernel::Init(SigmoidParam *param) { paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::SIGMOID; int16_t leaky_relu_negative_slope = 0; - auto input = const_cast(param->InputX()); + auto input = const_cast(param->InputX()); auto input_ptr = input->data(); auto out = param->Out(); fpga::format_fp16_ofm(out); diff --git a/src/operators/math/channel_wise.h b/src/operators/math/channel_wise.h index 796ea6d2b97d31d3091b225601065ee4670316e8..e4c0cbe05bfabde42df7f33a71882aa8ec08c477 100644 --- a/src/operators/math/channel_wise.h +++ b/src/operators/math/channel_wise.h @@ -33,7 +33,7 @@ void AddChannelWise(const framework::Tensor *input, // maybe check shape int batch_size = input->dims()[0]; int channels = input->dims()[1]; - size_t spatial_size = input->dims()[2] * input->dims()[3]; + int spatial_size = input->dims()[2] * input->dims()[3]; for (int batch = 0; batch < batch_size; ++batch) { for (int channel = 0; channel < channels; ++channel) { @@ -88,7 +88,7 @@ void ScaleAddChannelWise(const framework::Tensor *input, // maybe check shape int batch_size = input->dims()[0]; int channels = input->dims()[1]; - size_t spatial_size = input->dims()[2] * input->dims()[3]; + int spatial_size = input->dims()[2] * input->dims()[3]; for (int batch = 0; batch < batch_size; ++batch) { for (int channel = 0; channel < channels; ++channel) { diff --git a/src/operators/math/gemm/cblas.cc b/src/operators/math/gemm/cblas.cc index 6dc04d1b4e1ec3b8247713c2060bbd767c76d2e2..adc375b62913f0ad1105080f8c26b547e96671f3 100644 --- a/src/operators/math/gemm/cblas.cc +++ b/src/operators/math/gemm/cblas.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + #pragma once #include "operators/math/gemm/cblas.h" @@ -47,3 +49,5 @@ void cblas_sgemv(const bool trans, const int M, const int N, const float alpha, } // namespace math } // namespace operators } // namespace paddle_mobile + +#endif diff --git a/src/operators/pad2d_op.cpp b/src/operators/pad2d_op.cpp index 3d0fdf44d5b3a97298e1bc2809794e23e3a9e908..8a771c36a50f5a1b458df38d73ed93be61859cd4 100644 --- a/src/operators/pad2d_op.cpp +++ b/src/operators/pad2d_op.cpp @@ -37,5 +37,8 @@ namespace ops = paddle_mobile::operators; #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(pad2d, ops::Pad2DOp); #endif +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(pad2d, ops::Pad2DOp); +#endif #endif // PAD2D_OP diff --git a/test/fpga/test_rfcn_api.cpp b/test/fpga/test_rfcn_api.cpp index 724ef7d14d1189bb68cb5db1a583850dd1e72816..f787d8f9acfe85ead101aeb16a4fbebe1aefee65 100644 --- a/test/fpga/test_rfcn_api.cpp +++ b/test/fpga/test_rfcn_api.cpp @@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifndef PADDLE_MOBILE_FPGA -#define PADDLE_MOBILE_FPGA -#endif -#include #include -#include "io/paddle_inference_api.h" +#include "../test_helper.h" +#include "../test_include.h" + +#ifdef PADDLE_MOBILE_FPGA_V1 +#include "fpga/V1/api.h" +#endif +#ifdef PADDLE_MOBILE_FPGA_V2 +#include "fpga/V2/api.h" +#endif -static const char *g_image = "../models/rfcn/data.bin"; -static const char *g_model = "../models/rfcn/model"; -static const char *g_param = "../models/rfcn/params"; +#include void readStream(std::string filename, char *buf) { std::ifstream in; @@ -35,137 +37,116 @@ void readStream(std::string filename, char *buf) { auto length = in.tellg(); // report location (this is the length) in.seekg(0, std::ios::beg); // go back to the beginning in.read(buf, length); + DLOG << length; in.close(); } -PaddleMobileConfig GetConfig() { - PaddleMobileConfig config; - config.precision = PaddleMobileConfig::FP32; - config.device = PaddleMobileConfig::kFPGA; - config.prog_file = g_model; - config.param_file = g_param; - config.thread_num = 1; - config.batch_size = 1; - config.optimize = true; - config.lod_mode = true; - config.quantification = false; - return config; -} - -PaddleMobileConfig GetConfig1() { - PaddleMobileConfig config; - config.precision = PaddleMobileConfig::FP32; - config.device = PaddleMobileConfig::kFPGA; - config.model_dir = "../models/resnet50"; - config.thread_num = 1; - config.batch_size = 1; - config.optimize = true; - config.quantification = false; - return config; +void convert_to_chw(int16_t **data_in, int channel, int height, int width, + int num, int16_t *data_tmp) { + int64_t amount_per_side = width * height; + for (int n = 0; n < num; n++) { + for (int h = 0; h < height; h++) { + for (int w = 0; w < width; w++) { + for (int c = 0; c < channel; c++) { + *(data_tmp + n * amount_per_side * channel + c * amount_per_side + + width * h + w) = *((*data_in)++); + } + } + } + } } -int main() { - open_device(); - - PaddleMobileConfig config = GetConfig(); - auto predictor = - CreatePaddlePredictor(config); - - std::cout << "Finishing loading model" << std::endl; - - float img_info[3] = {432, 1280, 1.0f}; - int img_length = 432 * 1280 * 3; - auto img = reinterpret_cast(fpga_malloc(img_length * sizeof(float))); - readStream(g_image, reinterpret_cast(img)); - - std::cout << "Finishing initializing data" << std::endl; - struct PaddleTensor t_img_info, t_img; - t_img.dtypeid = typeid(float); - t_img_info.layout = LAYOUT_HWC; - t_img_info.shape = std::vector({1, 3}); - t_img_info.name = "Image information"; - t_img_info.data.Reset(img_info, 3 * sizeof(float)); - - t_img.dtypeid = typeid(float); - t_img.layout = LAYOUT_HWC; - t_img.shape = std::vector({1, 432, 1280, 3}); - t_img.name = "Image information"; - t_img.data.Reset(img, img_length * sizeof(float)); - predictor->FeedPaddleTensors({t_img_info, t_img}); - - std::cout << "Finishing feeding data " << std::endl; - - predictor->Predict_From_To(0, -1); - std::cout << "Finishing predicting " << std::endl; - - std::vector v; // No need to initialize v - predictor->FetchPaddleTensors(&v); // Old data in v will be cleared - std::cout << "Output number is " << v.size() << std::endl; - std::cout << "out[0] length " << v[0].data.length() << std::endl; - std::cout << "out[1] length " << v[1].data.length() << std::endl; - std::cout << "out[2] length " << v[2].data.length() << std::endl; - - auto post_nms = v[0].data.length() / sizeof(float) / 8; - for (int num = 0; num < post_nms; num++) { - for (int i = 0; i < 8; i++) { - auto p = reinterpret_cast(v[0].data.data()); - std::cout << p[num * 8 + i] << std::endl; - } +void dump_stride_half(std::string filename, Tensor input_tensor, + const int dumpnum, bool use_chw) { + // bool use_chw = true; + if (input_tensor.dims().size() != 4) return; + int c = (input_tensor.dims())[1]; + int h = (input_tensor.dims())[2]; + int w = (input_tensor.dims())[3]; + int n = (input_tensor.dims())[0]; + auto data_ptr = input_tensor.get_data(); + auto *data_ptr_16 = reinterpret_cast(data_ptr); + auto data_tmp = data_ptr_16; + if (use_chw) { + data_tmp = + reinterpret_cast(malloc(n * c * h * w * sizeof(int16_t))); + convert_to_chw(&data_ptr_16, c, h, w, n, data_tmp); } - for (int num = 0; num < post_nms; num++) { - for (int i = 0; i < 8; i++) { - auto p = reinterpret_cast(v[1].data.data()); - std::cout << p[num * 8 + i] << std::endl; - } + std::ofstream out(filename.c_str()); + float result = 0; + int stride = input_tensor.numel() / dumpnum; + stride = stride > 0 ? stride : 1; + for (int i = 0; i < input_tensor.numel(); i += stride) { + result = paddle_mobile::fpga::fp16_2_fp32(data_tmp[i]); + out << result << std::endl; } - for (int num = 0; num < post_nms; num++) { - for (int i = 0; i < 4; i++) { - auto p = reinterpret_cast(v[2].data.data()); - std::cout << p[num * 4 + i] << std::endl; - } + out.close(); + if (data_tmp != data_ptr_16) { + free(data_tmp); } - std::cout << "Finish getting vector values" << std::endl; - - //////////////////////////////////////////////////// +} - PaddleTensor tensor; - predictor->GetPaddleTensor("fetch2", &tensor); - for (int i = 0; i < post_nms; i++) { - auto p = reinterpret_cast(tensor.data.data()); - std::cout << p[+i] << std::endl; +void dump_stride_float(std::string filename, Tensor input_tensor, + const int dumpnum) { + auto data_ptr = reinterpret_cast(input_tensor.get_data()); + std::ofstream out(filename.c_str()); + float result = 0; + int stride = input_tensor.numel() / dumpnum; + stride = stride > 0 ? stride : 1; + for (int i = 0; i < input_tensor.numel(); i += stride) { + result = data_ptr[i]; + out << result << std::endl; } + out.close(); +} - ////////////////////////////////////////////////////// - - PaddleMobileConfig config1 = GetConfig1(); - auto predictor1 = - CreatePaddlePredictor(config1); - - std::cout << "Finishing loading model" << std::endl; - - int img_length1 = 224 * 224 * 3; - auto img1 = - reinterpret_cast(fpga_malloc(img_length1 * sizeof(float))); - - std::cout << "Finishing initializing data" << std::endl; +void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum, + bool use_chw) { + static int i = 0; + if (input_tensor.numel() == 0) { + return; + } + if (input_tensor.type() == typeid(float)) { + DLOG << "op: " << i++ << ", float data " << input_tensor.numel(); - struct PaddleTensor t_img1; + dump_stride_float(filename, input_tensor, dumpnum); + } else { + DLOG << "op: " << i++ << ", half data " << input_tensor.numel(); - t_img1.dtypeid = typeid(float); - t_img1.layout = LAYOUT_HWC; - t_img1.shape = std::vector({1, 224, 224, 3}); - t_img1.name = "Image information"; - t_img1.data.Reset(img1, img_length1 * sizeof(float)); - predictor1->FeedPaddleTensors({t_img1}); - predictor1->Predict_From_To(0, -1); - std::cout << "Finishing predicting " << std::endl; + dump_stride_half(filename, input_tensor, dumpnum, use_chw); + } + DLOG << "dump input address: " << input_tensor.get_data(); +} - std::vector v1; // No need to initialize v - predictor1->FetchPaddleTensors(&v1); // Old data in v will be cleared - std::cout << "Output number is " << v1.size() << std::endl; - std::cout << "out[0] length " << v1[0].data.length() << std::endl; +static const char *g_rfcn_combine = "../models/rfcn"; +static const char *g_image_src_float = "../models/rfcn/data.bin"; +int main() { + paddle_mobile::fpga::open_device(); + paddle_mobile::PaddleMobile paddle_mobile; + + if (paddle_mobile.Load(std::string(g_rfcn_combine) + "/model", + std::string(g_rfcn_combine) + "/params", true, false, + 1, true)) { + float img_info[3] = {768, 1536, 768.0f / 960.0f}; + auto img = reinterpret_cast( + fpga::fpga_malloc(768 * 1536 * 3 * sizeof(float))); + readStream(g_image_src_float, reinterpret_cast(img)); + + std::vector v(3, nullptr); + paddle_mobile.FeedData(std::vector({img_info, img})); + paddle_mobile.Predict_To(-1); + + for (int i = 65; i < 69; i++) { + auto tensor_ptr = paddle_mobile.FetchResult(i); + std::string saveName = "rfcn_" + std::to_string(i); + paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(), + tensor_ptr->numel() * sizeof(float)); + dump_stride(saveName, (*tensor_ptr), tensor_ptr->numel(), true); + } + // paddle_mobile.GetResults(&v); + DLOG << "Computation done"; + fpga::fpga_free(img); + } return 0; } diff --git a/test/net/test_benchmark.cpp b/test/net/test_benchmark.cpp index 31a0850c4d531d13f7960d9857b3721ee69c6d27..38e6f8e8701d28949331d03f2d8598c5ac46086c 100644 --- a/test/net/test_benchmark.cpp +++ b/test/net/test_benchmark.cpp @@ -36,7 +36,10 @@ int main(int argc, char* argv[]) { paddle_mobile::PaddleMobile paddle_mobile; paddle_mobile.SetThreadNum(thread_num); auto time1 = time(); - if (paddle_mobile.Load(fluid_model, optimize)) { +// if (paddle_mobile.Load(fluid_model, optimize, false, 1, true)) { + if (paddle_mobile.Load(std::string(fluid_model) + "/model", + std::string(fluid_model) + "/params", optimize, + false, 1, true)) { auto time2 = time(); std::cout << "load cost :" << time_diff(time1, time2) << "ms\n"; paddle_mobile::framework::Tensor input; @@ -51,14 +54,15 @@ int main(int argc, char* argv[]) { paddle_mobile::framework::DDim in_shape = paddle_mobile::framework::make_ddim(dims); SetupTensor(&input, in_shape, 0.f, 255.f); - // warmup - for (int i = 0; i < 10; ++i) { +// // warmup + for (int i = 0; i < 2; ++i) { paddle_mobile.Predict(input); } auto time3 = time(); for (int i = 0; i < 10; ++i) { paddle_mobile.Predict(input); } + auto time4 = time(); std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms\n"; std::ostringstream os("output tensor size: "); @@ -68,7 +72,7 @@ int main(int argc, char* argv[]) { os << ", " << output->data()[i]; } std::string output_str = os.str(); - std::cout << output_str << std::endl; +// std::cout << output_str << std::endl; } return 0; }