diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp index 5cef0ec1a64e7e696d6b5c797e39918d6f1ee915..02db327cb3c261b31a80375b8b2062405a072c3e 100644 --- a/src/fpga/V1/api.cpp +++ b/src/fpga/V1/api.cpp @@ -70,10 +70,11 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) { DLOG << "Wrong ofm dimension"; } auto p = fpga_malloc(memory_size); - memset(p, 0, memory_size); + // memset(p, 0, memory_size); ofm_tensor->reset_data_ptr(p); ofm_tensor->set_type(typeid(half)); ofm_tensor->fpga_data_num = memory_size / sizeof(half); + fpga::fpga_flush(p, memory_size); } void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { @@ -89,10 +90,11 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { DLOG << "Wrong ofm dimension"; } auto p = fpga_malloc(memory_size); - memset(p, 0, memory_size); + // memset(p, 0, memory_size); ofm_tensor->reset_data_ptr(p); ofm_tensor->set_type(typeid(half)); ofm_tensor->fpga_data_num = memory_size / sizeof(half); + fpga::fpga_flush(p, memory_size); } void format_fp32_ofm(framework::Tensor *ofm_tensor) { @@ -108,10 +110,11 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) { DLOG << "Wrong ofm dimension"; } auto p = fpga_malloc(memory_size); - memset(p, 0, memory_size); + // memset(p, 0, memory_size); ofm_tensor->reset_data_ptr(p); ofm_tensor->set_type(typeid(float)); ofm_tensor->fpga_data_num = memory_size / sizeof(float); + fpga::fpga_flush(p, memory_size); } float filter_find_max(framework::Tensor *filter_tensor) { @@ -463,9 +466,24 @@ void expand_EW_arg(EWAddArgs *arg) { uint64_t image_amount_per_row = align_to_x((uint64_t)args.image0.width * (uint64_t)args.image0.channels, IMAGE_ALIGNMENT); - uint64_t image_image_pixel = ((uint64_t)args.image0.channels << 32) | - ((uint64_t)args.image0.width << 16) | - (uint64_t)args.image0.height; + ////////////////////////////////////////////////////////// + // temporary modify for EW and DMA problem + uint64_t image_image_pixel = 0; + if ((args.image0.width * args.image0.channels) >= 24576) { + if ((args.image0.width * args.image0.channels) % 32 != 0) { + DLOG << "EW parameter can not be support"; + } else { + image_amount_per_row = image_amount_per_row / 2; + image_image_pixel = ((uint64_t)args.image0.channels << 32) | + ((uint64_t)(args.image0.width / 2) << 16) | + (uint64_t)(args.image0.height * 2); + } + } else { + image_image_pixel = ((uint64_t)args.image0.channels << 32) | + ((uint64_t)args.image0.width << 16) | + (uint64_t)args.image0.height; + } + ////////////////////////////////////////////////////////// (*arg).driver.image0_address_phy = image0_address_phy; (*arg).driver.image1_address_phy = image1_address_phy; @@ -560,6 +578,18 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, reinterpret_cast(arg->conv_arg[i].filter_address), deleter)); memcpy(arg->conv_arg[i].filter_address, filter_head, filter_size); fpga_flush(arg->conv_arg[i].filter_address, filter_size); + // for test + // { + // static int cnt = 0; + // if(cnt == 4){ + // int8_t result = 0; + // std::string str = "fc_filter"; + // fpga::savefile(str, arg->conv_arg[i].filter_address, + // filter_size, result); + // + // } + // cnt++; + //} size_t bs_size = 2 * align_to_x(arg->conv_arg[i].filter_num, BS_NUM_ALIGNMENT) * @@ -570,6 +600,18 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, reinterpret_cast(arg->conv_arg[i].sb_address), deleter)); memcpy(arg->conv_arg[i].sb_address, bs_head, bs_size); fpga_flush(arg->conv_arg[i].sb_address, bs_size); + // for test + /*{ + static int cnt = 0; + if(cnt == 4){ + float result = 0; + std::string str = "fc_bs"; + fpga::savefile(str, arg->conv_arg[i].sb_address, bs_size/4, +result); + + } + cnt++; +}*/ if (n > 1) { arg->conv_arg[i].output.scale_address = diff --git a/src/fpga/V1/filter.cpp b/src/fpga/V1/filter.cpp index 50341b75e129479e7f8d8ab4d9c200df574996cb..425d1d1b5c3d0304de06dae9bb0e9fcf32f4d957 100644 --- a/src/fpga/V1/filter.cpp +++ b/src/fpga/V1/filter.cpp @@ -268,6 +268,7 @@ void format_fc_filter(float **data_in, int num, int channel, int height, quantize(data_in, data_size, max); char **quantize_data = (char **)data_in; // NOLINT convert_fc_filter(quantize_data, num, chw); + convert_to_hwc(quantize_data, num, channel, height, width); align_element(quantize_data, num, chw); if (num_after_alignment != num) { align_num(quantize_data, num_per_div_before_alignment, num, chw); @@ -316,7 +317,7 @@ void align_element_n(int16_t **data_in, int num, int height, int width) { } *data_in = data_tmp; - free(tmp); + fpga_free(tmp); } } void quantize_to_fp16(float **data_in, int num, int height, int width, diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 93078d9d4d7f253e37969d770bc5022e03430ab6..fe95e6c7f3e913e0c0801b0371ffe5a179fb77ff 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -90,11 +90,6 @@ Executor::Executor(const Program &program, InitMemory(); } -#ifdef PADDLE_MOBILE_FPGA - program_.scope->EraseVars({"feed", "fetch"}); - program_.scope->print_vars(); -#endif - int count = 0; for (auto &op_handler : ops_of_block0_) { DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type(); @@ -514,6 +509,32 @@ PMStatus Executor::Predict() { return PMSuccess; } +template +void Executor::FeedTensorData(const vector &v) { + auto input_size = v.size(); + auto *feed_var = program_.scope->Var("feed"); + + PADDLE_MOBILE_ENFORCE(input_size == feed_indices_.size(), + "input data number not correct"); + for (int i = 0; i < input_size; i++) { + framework::LoDTensor &target = + feed_var->template GetMutable()->at(i); + target.ShareDataWith(v[input_size - i - 1]); + } +} + +template +void Executor::GetTensorResults( + std::vector *v) { + auto *fetch_var = program_.scope->Var("fetch"); + auto output_size = fetch_indices_.size(); + for (int i = 0; i < output_size; i++) { + framework::LoDTensor &target = + fetch_var->template GetMutable()->at(i); + v->push_back(&target); + } +} + #ifdef PADDLE_MOBILE_FPGA template void Executor::InjectVariable(const Tensor &t, @@ -559,19 +580,6 @@ void Executor::GetResults(std::vector *v) { } } -template -void Executor::GetTensorResults( - std::vector *v) { - int index = 0; - auto vars = program_.scope->VarContain("fetch", &index); - auto output_size = vars.size(); - for (int i = 0; i < output_size; i++) { - auto var = program_.scope->Var("fetch", i + index); - auto fetch_tensor = var->template GetMutable(); - v->push_back(fetch_tensor); - } -} - template framework::Tensor *Executor::GetTensorByName( const std::string &name) { diff --git a/src/framework/executor.h b/src/framework/executor.h index 074bc4179ade271683a5454edf024661732d270d..fa589880c14fe6477d4443108ca2c436c1987c48 100644 --- a/src/framework/executor.h +++ b/src/framework/executor.h @@ -51,15 +51,15 @@ class Executor { std::shared_ptr GetOutput(const std::string &var_name); + void FeedTensorData(const std::vector &v); + void GetTensorResults(std::vector *v); + #ifdef PADDLE_MOBILE_FPGA void InjectVariable(const Tensor &t, std::string var_name); void FeedData(const Tensor &t); void FeedData(const std::vector &v); - void GetResults(std::vector *v); - void GetTensorResults(std::vector *v); framework::Tensor *GetTensorByName(const std::string &name); - std::shared_ptr FetchResult(int id = -1); void Predict_From_To(int start = 0, int end = -1); void Predict_From(int start); diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index 74398bbc5b368236d56e5180452b5b05d7d156ad..d140603744ac00a46d54d668c2201d11c1e1d088 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -50,9 +50,6 @@ OperatorBase::OperatorBase(const std::string &type, attrs_(attrs), scope_(scope) { CheckAllInputOutputSet(); -#ifdef PADDLE_MOBILE_FPGA - InsertTensors(); -#endif } template @@ -72,6 +69,9 @@ void OperatorBase::Run() { var->template IsType()) { const Tensor *tensor = var->template Get(); if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor; +#ifdef PADDLE_MOBILE_FPGA + DLOG << var_vec_in[i]; +#endif } } } @@ -83,6 +83,9 @@ void OperatorBase::Run() { var->template IsType()) { const Tensor *tensor = var->template Get(); if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor; +#ifdef PADDLE_MOBILE_FPGA + DLOG << var_vec_out[i]; +#endif } } } diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc index 5839a279cdfc03472628cf7650b30064281a226e..1f4769b282385207a5b53d6d678364393d7da6cc 100644 --- a/src/io/api_paddle_mobile.cc +++ b/src/io/api_paddle_mobile.cc @@ -146,7 +146,7 @@ void PaddleMobilePredictor::FeedPaddleTensors( tensors[i].init(typeid(float)); ConvertPaddleTensors(inputs[i], &tensors[i]); } - // paddle_mobile_->FeedTensorData(tensors); + paddle_mobile_->FeedTensorData(tensors); } template diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index 412c5687b778ced55498f47197fbab51fac0cea5..bf9749393b154f5a1484a95852c2bad300037344 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -236,6 +236,11 @@ template void PaddleMobile::FeedData(const std::vector &v) { executor_->FeedData(v); } +template +void PaddleMobile::FeedTensorData( + const std::vector &v) { + executor_->FeedTensorData(v); +} template void PaddleMobile::GetResults(std::vector *v) { diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index 6439c4cea94fbc8474caa7115afbc58b964e21ad..b05485fcae954e2aa2540ba81110fe36e6421019 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -91,6 +91,7 @@ class PaddleMobile { void InjectVariable(const framework::Tensor &t, std::string var_name); void FeedData(const framework::Tensor &t); void FeedData(const std::vector &v); + void FeedTensorData(const std::vector &v); void GetResults(std::vector *v); void GetTensorResults(std::vector *v); diff --git a/src/operators/kernel/fpga/V1/feed_kernel.cpp b/src/operators/kernel/fpga/V1/feed_kernel.cpp index a661cd642c51a1baff2ac6ec97933831bd034c40..f57c517bb00b8d676beaabf24c662efcbe752aeb 100644 --- a/src/operators/kernel/fpga/V1/feed_kernel.cpp +++ b/src/operators/kernel/fpga/V1/feed_kernel.cpp @@ -21,6 +21,7 @@ template <> bool FeedKernel::Init(FeedParam *param) { auto output = param->Out(); int col = param->Col(); + DLOG << "col = " << col; auto input = const_cast(¶m->InputX()->at(col)); input->init(typeid(float)); input->Resize(output->dims()); diff --git a/src/operators/kernel/fpga/V1/fetch_kernel.cpp b/src/operators/kernel/fpga/V1/fetch_kernel.cpp index b128c8e3430b8a359a5ad9dbcba397ad0f2b6568..6fbd81ae7f527b6983e27d482498cb43f1ef93a4 100644 --- a/src/operators/kernel/fpga/V1/fetch_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fetch_kernel.cpp @@ -19,6 +19,7 @@ template <> bool FetchKernel::Init(FetchParam *param) { auto input = const_cast(param->InputX()); int col = param->Col(); + DLOG << "col = " << col; auto output = &(param->Out()->at(col)); if (input->type() == typeid(float)) { return true; @@ -59,7 +60,11 @@ template <> void FetchKernel::Compute(const FetchParam ¶m) { auto input = const_cast(param.InputX()); int col = param.Col(); - LoDTensor *out = ¶m.Out()->at(col); + auto output = ¶m.Out()->at(col); + if (input->type() == typeid(float)) { + output->ShareDataWith(*input); + return; + } fpga::BypassArgs args = param.fpga_bypass_args; auto input_address = (input->data()); @@ -67,7 +72,7 @@ void FetchKernel::Compute(const FetchParam ¶m) { float *outdata_ptr = reinterpret_cast(param.fpga_bypass_args.output.address); const int num_th = 32; - if ((out->fpga_data_num) < num_th) { + if (output->fpga_data_num < num_th) { fpga::fpga_invalidate(input_address, (input->fpga_data_num) * sizeof(half)); for (int idx = 0; idx < product(input->dims()); ++idx) { @@ -77,14 +82,14 @@ void FetchKernel::Compute(const FetchParam ¶m) { } fpga::PerformBypass(args); - auto outC = out->dims()[1]; - auto outH = out->dims()[2]; - auto outW = out->dims()[3]; + auto outC = output->dims()[1]; + auto outH = output->dims()[2]; + auto outW = output->dims()[3]; fpga::fpga_invalidate(param.fpga_bypass_args.output.address, - out->fpga_data_num * sizeof(float)); + output->fpga_data_num * sizeof(float)); - if (out->fpga_data_num != product(input->dims())) { + if (output->fpga_data_num != product(input->dims())) { float *data_tmp = reinterpret_cast(malloc(outC * outH * outW * sizeof(float))); dealign(outdata_ptr, data_tmp, outC, outH, outW); @@ -92,7 +97,6 @@ void FetchKernel::Compute(const FetchParam ¶m) { free(data_tmp); } } - template class FetchKernel; } // namespace operators diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2b9527bb64d41eda203d41dd24d875b7d7aa7843..a63af19380214f62bf73e8b7e4e292186862b8ec 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -68,23 +68,38 @@ endif () list(FIND NET "FPGA_NET_V1" CON) if (CON GREATER -1) - ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-resnet50 paddle-mobile) + #ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h) + #target_link_libraries(test-resnet50 paddle-mobile) - ADD_EXECUTABLE(test-densebox fpga/test_densebox_combine.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-densebox paddle-mobile) + #ADD_EXECUTABLE(test-densebox fpga/test_densebox_combine.cpp test_helper.h test_include.h executor_for_test.h) + #target_link_libraries(test-densebox paddle-mobile) - ADD_EXECUTABLE(test-rfcn fpga/test_rfcn.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-rfcn paddle-mobile) + #ADD_EXECUTABLE(test-rfcn fpga/test_rfcn.cpp test_helper.h test_include.h executor_for_test.h) + #target_link_libraries(test-rfcn paddle-mobile) - ADD_EXECUTABLE(test-marker fpga/test_marker.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-marker paddle-mobile) + #ADD_EXECUTABLE(test-marker fpga/test_marker.cpp test_helper.h test_include.h executor_for_test.h) + #target_link_libraries(test-marker paddle-mobile) ADD_EXECUTABLE(test-rfcn-api fpga/test_rfcn_api.cpp) target_link_libraries(test-rfcn-api paddle-mobile) + + ADD_EXECUTABLE(test-mobilenet-api fpga/test_mobilenet_api.cpp) + target_link_libraries(test-mobilenet-api paddle-mobile) + + ADD_EXECUTABLE(test-yolo-api fpga/test_yolo_api.cpp) + target_link_libraries(test-yolo-api paddle-mobile) + + ADD_EXECUTABLE(test-marker-api fpga/test_marker_api.cpp) + target_link_libraries(test-marker-api paddle-mobile) - ADD_EXECUTABLE(test-marker2 fpga/test_marker2.cpp test_helper.h test_include.h executor_for_test.h ) - target_link_libraries(test-marker2 paddle-mobile) + #ADD_EXECUTABLE(test-marker2 fpga/test_marker2.cpp test_helper.h test_include.h executor_for_test.h ) + #target_link_libraries(test-marker2 paddle-mobile) + + #ADD_EXECUTABLE(test-mobilenet fpga/test_mobilenet_beijing.cpp test_helper.h test_include.h executor_for_test.h) + #target_link_libraries(test-mobilenet paddle-mobile) + + #ADD_EXECUTABLE(test-yolo fpga/test_yolo_combine.cpp test_helper.h test_include.h executor_for_test.h) + #target_link_libraries(test-yolo paddle-mobile) set(FOUND_MATCH ON) endif () diff --git a/test/fpga/test_marker_api.cpp b/test/fpga/test_marker_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e5b2995676b7b2dad5f32ae51b4b6220fda4506d --- /dev/null +++ b/test/fpga/test_marker_api.cpp @@ -0,0 +1,221 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef PADDLE_MOBILE_FPGA +#define PADDLE_MOBILE_FPGA +#endif +#include +#include +#include "../../src/io/paddle_inference_api.h" + +using namespace paddle_mobile; +using namespace paddle_mobile::fpga; + +static const char *g_image = "../models/marker/model/image.bin"; +static const char *g_model = "../models/marker/model/model"; +static const char *g_param = "../models/marker/model/params"; + +static const char *g_image1 = "../models/marker2/model/marker.bin"; +static const char *g_model1 = "../models/marker2/model/model"; +static const char *g_param1 = "../models/marker2/model/params"; + +void readStream(std::string filename, char *buf) { + std::ifstream in; + in.open(filename, std::ios::in | std::ios::binary); + if (!in.is_open()) { + std::cout << "open File Failed." << std::endl; + return; + } + + in.seekg(0, std::ios::end); // go to the end + auto length = in.tellg(); // report location (this is the length) + in.seekg(0, std::ios::beg); // go back to the beginning + in.read(buf, length); + in.close(); +} +signed char float_to_int8(float fdata) { + if (fdata < 0.0) { + fdata -= 0.5; + } else { + fdata += 0.5; + } + return (signed char)fdata; +} +void quantize(float **data_in, int data_size) { + float *tmp = *data_in; + signed char *tmp_data = + (signed char *)paddle_mobile::fpga::fpga_malloc(data_size * sizeof(char)); + for (int i = 0; i < data_size; i++) { + tmp_data[i] = float_to_int8((*data_in)[i] + 128); + } + *data_in = (float *)tmp_data; // NOLINT + paddle_mobile::fpga::fpga_free(tmp); +} + +void convert_to_chw(float **data_in, int channel, int height, int width, + float *data_tmp) { + int64_t amount_per_side = width * height; + for (int h = 0; h < height; h++) { + for (int w = 0; w < width; w++) { + for (int c = 0; c < channel; c++) { + *(data_tmp + c * amount_per_side + width * h + w) = *((*data_in)++); + } + } + } +} + +void dump_stride_float(std::string filename, + paddle_mobile::PaddleTensor input_tensor) { + auto data_ptr = reinterpret_cast(input_tensor.data.data()); + int c = (input_tensor.shape)[1]; + int h = (input_tensor.shape)[2]; + int w = (input_tensor.shape)[3]; + int n = (input_tensor.shape)[0]; + float *data_tmp = + reinterpret_cast(malloc(c * h * w * sizeof(float))); + // convert_to_chw(&data_ptr, c, h, w, data_tmp); + std::ofstream out(filename.c_str()); + float result = 0; + int datasize = abs(c * h * w * n); + if (datasize == 0) { + std::cout << "wrong dump data size" << std::endl; + return; + } + for (int i = 0; i < datasize; i++) { + result = data_ptr[i]; + out << result << std::endl; + } + out.close(); +} + +void dump_stride(std::string filename, + paddle_mobile::PaddleTensor input_tensor) { + if (input_tensor.dtypeid == typeid(float)) { + dump_stride_float(filename, input_tensor); + } else { + std::cout << "only support dumping float data" << std::endl; + } +} +PaddleMobileConfig GetConfig() { + PaddleMobileConfig config; + config.precision = PaddleMobileConfig::FP32; + config.device = PaddleMobileConfig::kFPGA; + config.prog_file = g_model; + config.param_file = g_param; + config.thread_num = 1; + config.batch_size = 1; + config.optimize = true; + config.lod_mode = true; + config.quantification = false; + return config; +} +PaddleMobileConfig GetConfig1() { + PaddleMobileConfig config; + config.precision = PaddleMobileConfig::FP32; + config.device = PaddleMobileConfig::kFPGA; + config.prog_file = g_model1; + config.param_file = g_param1; + config.thread_num = 1; + config.batch_size = 1; + config.optimize = true; + config.lod_mode = true; + config.quantification = false; + return config; +} + +int main() { + open_device(); + + PaddleMobileConfig config1 = GetConfig1(); + auto predictor1 = + CreatePaddlePredictor(config1); + + std::cout << "Finishing loading model" << std::endl; + for (int i = 0; i < 1; ++i) { + int img_length1 = 144 * 14 * 14; + auto img1 = + reinterpret_cast(fpga_malloc(img_length1 * sizeof(float))); + readStream(g_image1, reinterpret_cast(img1)); + + std::cout << "Finishing initializing data" << std::endl; + struct PaddleTensor t_img1; + + t_img1.dtypeid = typeid(float); + t_img1.layout = LAYOUT_HWC; + t_img1.shape = std::vector({1, 14, 14, 144}); + t_img1.name = "Image information"; + t_img1.data.Reset(img1, img_length1 * sizeof(float)); + predictor1->FeedPaddleTensors({t_img1}); + + std::cout << "Finishing feeding data " << std::endl; + + predictor1->Predict_From_To(0, -1); + std::cout << "Finishing predicting " << std::endl; + + std::vector v1; // No need to initialize v + predictor1->FetchPaddleTensors(&v1); // Old data in v will be cleared + std::cout << "Output number is " << v1.size() << std::endl; + for (int fetchNum = 0; fetchNum < v1.size(); fetchNum++) { + std::string dumpName = "marker2_api_fetch_" + std::to_string(fetchNum); + dump_stride(dumpName, v1[fetchNum]); + } + } + ///////////////////////////////////// + + PaddleMobileConfig config = GetConfig(); + auto predictor = + CreatePaddlePredictor(config); + + std::cout << "Finishing loading model" << std::endl; + + float img_info[3] = {432, 1280, 1.0f}; + int img_length = 432 * 1280 * 3; + auto img = reinterpret_cast(fpga_malloc(img_length * sizeof(float))); + readStream(g_image, reinterpret_cast(img)); + + std::cout << "Finishing initializing data" << std::endl; + struct PaddleTensor t_img_info, t_img; + t_img_info.dtypeid = typeid(float); + t_img_info.layout = LAYOUT_HWC; + t_img_info.shape = std::vector({1, 3}); + t_img_info.name = "Image information"; + t_img_info.data.Reset(img_info, 3 * sizeof(float)); + + t_img.dtypeid = typeid(float); + // quantize(&img, img_length); + // t_img.dtypeid = typeid(int8_t); + t_img.layout = LAYOUT_HWC; + t_img.shape = std::vector({1, 432, 1280, 3}); + t_img.name = "Image information"; + t_img.data.Reset(img, img_length * sizeof(float)); + // t_img.data.Reset(img, img_length * sizeof(int8_t)); + // for(int i = 0; i < 100; ++i){ + predictor->FeedPaddleTensors({t_img_info, t_img}); + + std::cout << "Finishing feeding data " << std::endl; + + predictor->Predict_From_To(0, -1); + std::cout << "Finishing predicting " << std::endl; + + std::vector v; // No need to initialize v + predictor->FetchPaddleTensors(&v); // Old data in v will be cleared + std::cout << "Output number is " << v.size() << std::endl; + for (int fetchNum = 0; fetchNum < v.size(); fetchNum++) { + std::string dumpName = "marker_api_fetch_" + std::to_string(fetchNum); + dump_stride(dumpName, v[fetchNum]); + } + return 0; +} diff --git a/test/fpga/test_mobilenet_api.cpp b/test/fpga/test_mobilenet_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4b372773937722942b70c584dda1eeb22339841f --- /dev/null +++ b/test/fpga/test_mobilenet_api.cpp @@ -0,0 +1,158 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef PADDLE_MOBILE_FPGA +#define PADDLE_MOBILE_FPGA +#endif +#include +#include +#include "../../src/io/paddle_inference_api.h" + +using namespace paddle_mobile; // NOLINT +using namespace paddle_mobile::fpga; // NOLINT + +static const char *g_image = "../images/mobilenet_txtdata/1.txt"; +static const char *g_model = "../models/keycurve_l2_regular4_model/__model__"; +static const char *g_param = + "../models/keycurve_l2_regular4_model/model.params"; + +void readStream(std::string filename, float *buf) { + std::ifstream in; + in.open(filename, std::ios::in); + if (!in.is_open()) { + std::cout << "open File Failed." << std::endl; + return; + } + int i = 0; + while (!in.eof()) { + in >> buf[i]; + i++; + } + in.close(); +} + +signed char float_to_int8(float fdata) { + if (fdata < 0.0) { + fdata -= 0.5; + } else { + fdata += 0.5; + } + return (signed char)fdata; +} +void quantize(float **data_in, int data_size) { + float *tmp = *data_in; + signed char *tmp_data = (signed char *)fpga_malloc(data_size * sizeof(char)); + for (int i = 0; i < data_size; i++) { + tmp_data[i] = float_to_int8((*data_in)[i] + 128); + } + *data_in = (float *)tmp_data; // NOLINT + fpga_free(tmp); +} + +void convert_to_chw(float **data_in, int channel, int height, int width, + float *data_tmp) { + int64_t amount_per_side = width * height; + for (int h = 0; h < height; h++) { + for (int w = 0; w < width; w++) { + for (int c = 0; c < channel; c++) { + *(data_tmp + c * amount_per_side + width * h + w) = *((*data_in)++); + } + } + } +} + +void dump_stride_float(std::string filename, PaddleTensor input_tensor) { + auto data_ptr = reinterpret_cast(input_tensor.data.data()); + int c = (input_tensor.shape)[1]; + int h = (input_tensor.shape)[2]; + int w = (input_tensor.shape)[3]; + int n = (input_tensor.shape)[0]; + float *data_tmp = + reinterpret_cast(malloc(c * h * w * sizeof(float))); + convert_to_chw(&data_ptr, c, h, w, data_tmp); + std::ofstream out(filename.c_str()); + float result = 0; + int datasize = abs(c * h * w * n); + if (datasize == 0) { + std::cout << "wrong dump data size" << std::endl; + return; + } + for (int i = 0; i < datasize; i++) { + result = data_tmp[i]; + out << result << std::endl; + } + out.close(); +} + +void dump_stride(std::string filename, PaddleTensor input_tensor) { + if (input_tensor.dtypeid == typeid(float)) { + dump_stride_float(filename, input_tensor); + } else { + std::cout << "only support dumping float data" << std::endl; + } +} + +PaddleMobileConfig GetConfig() { + PaddleMobileConfig config; + config.precision = PaddleMobileConfig::FP32; + config.device = PaddleMobileConfig::kFPGA; + config.prog_file = g_model; + config.param_file = g_param; + config.thread_num = 1; + config.batch_size = 1; + config.optimize = true; + config.lod_mode = true; + config.quantification = false; + return config; +} +int main() { + open_device(); + PaddleMobileConfig config = GetConfig(); + auto predictor = + CreatePaddlePredictor(config); + + std::cout << "Finishing loading model" << std::endl; + int img_length = 256 * 416 * 3; + auto img = reinterpret_cast(fpga_malloc(img_length * sizeof(float))); + readStream(g_image, img); + + std::cout << "Finishing initializing data" << std::endl; + struct PaddleTensor t_img; + t_img.dtype = FLOAT32; + t_img.dtypeid = typeid(float); + // quantize(&img, img_length); + // t_img.dtype = INT8; + // t_img.dtypeid = typeid(int8_t); + t_img.layout = LAYOUT_HWC; + t_img.shape = std::vector({1, 256, 416, 3}); + t_img.name = "Image information"; + t_img.data.Reset(img, img_length * sizeof(float)); + // t_img.data.Reset(img, img_length * sizeof(int8_t)); + predictor->FeedPaddleTensors({t_img}); + + std::cout << "Finishing feeding data " << std::endl; + + predictor->Predict_From_To(0, -1); + std::cout << "Finishing predicting " << std::endl; + + std::vector v; // No need to initialize v + predictor->FetchPaddleTensors(&v); // Old data in v will be cleared + std::cout << "Output number is " << v.size() << std::endl; + for (int fetchNum = 0; fetchNum < v.size(); fetchNum++) { + std::string dumpName = "mobilenet_api_fetch_" + std::to_string(fetchNum); + dump_stride(dumpName, v[fetchNum]); + } + return 0; +} diff --git a/test/fpga/test_rfcn_api.cpp b/test/fpga/test_rfcn_api.cpp index f787d8f9acfe85ead101aeb16a4fbebe1aefee65..a52be938dee6df098a1f20a5df5cd852e0bfe37f 100644 --- a/test/fpga/test_rfcn_api.cpp +++ b/test/fpga/test_rfcn_api.cpp @@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#ifndef PADDLE_MOBILE_FPGA +#define PADDLE_MOBILE_FPGA +#endif +#include #include -#include "../test_helper.h" -#include "../test_include.h" +#include "../../src/io/paddle_inference_api.h" -#ifdef PADDLE_MOBILE_FPGA_V1 -#include "fpga/V1/api.h" -#endif -#ifdef PADDLE_MOBILE_FPGA_V2 -#include "fpga/V2/api.h" -#endif +using namespace paddle_mobile; +using namespace paddle_mobile::fpga; -#include +static const char *g_image = "../models/rfcn/data.bin"; +static const char *g_model = "../models/rfcn/model"; +static const char *g_param = "../models/rfcn/params"; void readStream(std::string filename, char *buf) { std::ifstream in; @@ -37,116 +38,128 @@ void readStream(std::string filename, char *buf) { auto length = in.tellg(); // report location (this is the length) in.seekg(0, std::ios::beg); // go back to the beginning in.read(buf, length); - DLOG << length; in.close(); } -void convert_to_chw(int16_t **data_in, int channel, int height, int width, - int num, int16_t *data_tmp) { - int64_t amount_per_side = width * height; - for (int n = 0; n < num; n++) { - for (int h = 0; h < height; h++) { - for (int w = 0; w < width; w++) { - for (int c = 0; c < channel; c++) { - *(data_tmp + n * amount_per_side * channel + c * amount_per_side + - width * h + w) = *((*data_in)++); - } - } - } - } -} - -void dump_stride_half(std::string filename, Tensor input_tensor, - const int dumpnum, bool use_chw) { - // bool use_chw = true; - if (input_tensor.dims().size() != 4) return; - int c = (input_tensor.dims())[1]; - int h = (input_tensor.dims())[2]; - int w = (input_tensor.dims())[3]; - int n = (input_tensor.dims())[0]; - auto data_ptr = input_tensor.get_data(); - auto *data_ptr_16 = reinterpret_cast(data_ptr); - auto data_tmp = data_ptr_16; - if (use_chw) { - data_tmp = - reinterpret_cast(malloc(n * c * h * w * sizeof(int16_t))); - convert_to_chw(&data_ptr_16, c, h, w, n, data_tmp); - } - std::ofstream out(filename.c_str()); - float result = 0; - int stride = input_tensor.numel() / dumpnum; - stride = stride > 0 ? stride : 1; - for (int i = 0; i < input_tensor.numel(); i += stride) { - result = paddle_mobile::fpga::fp16_2_fp32(data_tmp[i]); - out << result << std::endl; - } - out.close(); - if (data_tmp != data_ptr_16) { - free(data_tmp); - } +PaddleMobileConfig GetConfig() { + PaddleMobileConfig config; + config.precision = PaddleMobileConfig::FP32; + config.device = PaddleMobileConfig::kFPGA; + config.prog_file = g_model; + config.param_file = g_param; + config.thread_num = 1; + config.batch_size = 1; + config.optimize = true; + config.lod_mode = true; + config.quantification = false; + return config; } -void dump_stride_float(std::string filename, Tensor input_tensor, - const int dumpnum) { - auto data_ptr = reinterpret_cast(input_tensor.get_data()); - std::ofstream out(filename.c_str()); - float result = 0; - int stride = input_tensor.numel() / dumpnum; - stride = stride > 0 ? stride : 1; - for (int i = 0; i < input_tensor.numel(); i += stride) { - result = data_ptr[i]; - out << result << std::endl; - } - out.close(); +PaddleMobileConfig GetConfig1() { + PaddleMobileConfig config; + config.precision = PaddleMobileConfig::FP32; + config.device = PaddleMobileConfig::kFPGA; + config.model_dir = "../models/resnet50"; + config.thread_num = 1; + config.batch_size = 1; + config.optimize = true; + config.quantification = false; + return config; } -void dump_stride(std::string filename, Tensor input_tensor, const int dumpnum, - bool use_chw) { - static int i = 0; - if (input_tensor.numel() == 0) { - return; +int main() { + open_device(); + + PaddleMobileConfig config1 = GetConfig1(); + auto predictor1 = + CreatePaddlePredictor(config1); + + std::cout << "Finishing loading model" << std::endl; + + int img_length1 = 224 * 224 * 3; + auto img1 = + reinterpret_cast(fpga_malloc(img_length1 * sizeof(float))); + + std::cout << "Finishing initializing data" << std::endl; + + struct PaddleTensor t_img1; + + t_img1.dtypeid = typeid(float); + t_img1.layout = LAYOUT_HWC; + t_img1.shape = std::vector({1, 224, 224, 3}); + t_img1.name = "Image information"; + t_img1.data.Reset(img1, img_length1 * sizeof(float)); + predictor1->FeedPaddleTensors({t_img1}); + predictor1->Predict_From_To(0, -1); + std::cout << "Finishing predicting " << std::endl; + + std::vector v1; // No need to initialize v + predictor1->FetchPaddleTensors(&v1); // Old data in v will be cleared + std::cout << "Output number is " << v1.size() << std::endl; + std::cout << "out[0] length " << v1[0].data.length() << std::endl; + + //////////////////////////// + + PaddleMobileConfig config = GetConfig(); + auto predictor = + CreatePaddlePredictor(config); + + std::cout << "Finishing loading model" << std::endl; + + float img_info[3] = {432, 1280, 1.0f}; + int img_length = 432 * 1280 * 3; + auto img = reinterpret_cast(fpga_malloc(img_length * sizeof(float))); + readStream(g_image, reinterpret_cast(img)); + + std::cout << "Finishing initializing data" << std::endl; + struct PaddleTensor t_img_info, t_img; + t_img.dtypeid = typeid(float); + t_img_info.layout = LAYOUT_HWC; + t_img_info.shape = std::vector({1, 3}); + t_img_info.name = "Image information"; + t_img_info.data.Reset(img_info, 3 * sizeof(float)); + + t_img.dtypeid = typeid(float); + t_img.layout = LAYOUT_HWC; + t_img.shape = std::vector({1, 432, 1280, 3}); + t_img.name = "Image information"; + t_img.data.Reset(img, img_length * sizeof(float)); + predictor->FeedPaddleTensors({t_img_info, t_img}); + + std::cout << "Finishing feeding data " << std::endl; + + predictor->Predict_From_To(0, -1); + std::cout << "Finishing predicting " << std::endl; + + std::vector v; // No need to initialize v + predictor->FetchPaddleTensors(&v); // Old data in v will be cleared + std::cout << "Output number is " << v.size() << std::endl; + std::cout << "out[0] length " << v[0].data.length() << std::endl; + std::cout << "out[1] length " << v[1].data.length() << std::endl; + std::cout << "out[2] length " << v[2].data.length() << std::endl; + + auto post_nms = v[0].data.length() / sizeof(float) / 8; + for (int num = 0; num < post_nms; num++) { + for (int i = 0; i < 8; i++) { + auto p = reinterpret_cast(v[0].data.data()); + std::cout << p[num * 8 + i] << std::endl; + } } - if (input_tensor.type() == typeid(float)) { - DLOG << "op: " << i++ << ", float data " << input_tensor.numel(); - - dump_stride_float(filename, input_tensor, dumpnum); - } else { - DLOG << "op: " << i++ << ", half data " << input_tensor.numel(); - - dump_stride_half(filename, input_tensor, dumpnum, use_chw); + for (int num = 0; num < post_nms; num++) { + for (int i = 0; i < 8; i++) { + auto p = reinterpret_cast(v[1].data.data()); + std::cout << p[num * 8 + i] << std::endl; + } } - DLOG << "dump input address: " << input_tensor.get_data(); -} - -static const char *g_rfcn_combine = "../models/rfcn"; -static const char *g_image_src_float = "../models/rfcn/data.bin"; -int main() { - paddle_mobile::fpga::open_device(); - paddle_mobile::PaddleMobile paddle_mobile; - - if (paddle_mobile.Load(std::string(g_rfcn_combine) + "/model", - std::string(g_rfcn_combine) + "/params", true, false, - 1, true)) { - float img_info[3] = {768, 1536, 768.0f / 960.0f}; - auto img = reinterpret_cast( - fpga::fpga_malloc(768 * 1536 * 3 * sizeof(float))); - readStream(g_image_src_float, reinterpret_cast(img)); - - std::vector v(3, nullptr); - paddle_mobile.FeedData(std::vector({img_info, img})); - paddle_mobile.Predict_To(-1); - - for (int i = 65; i < 69; i++) { - auto tensor_ptr = paddle_mobile.FetchResult(i); - std::string saveName = "rfcn_" + std::to_string(i); - paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).get_data(), - tensor_ptr->numel() * sizeof(float)); - dump_stride(saveName, (*tensor_ptr), tensor_ptr->numel(), true); + for (int num = 0; num < post_nms; num++) { + for (int i = 0; i < 4; i++) { + auto p = reinterpret_cast(v[2].data.data()); + std::cout << p[num * 4 + i] << std::endl; } - // paddle_mobile.GetResults(&v); - DLOG << "Computation done"; - fpga::fpga_free(img); } + std::cout << "Finish getting vector values" << std::endl; return 0; } diff --git a/test/fpga/test_yolo_api.cpp b/test/fpga/test_yolo_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4ef890506eb1c40638242b9767267756a64da787 --- /dev/null +++ b/test/fpga/test_yolo_api.cpp @@ -0,0 +1,158 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef PADDLE_MOBILE_FPGA +#define PADDLE_MOBILE_FPGA +#endif +#include +#include +#include "../../src/io/paddle_inference_api.h" + +using namespace paddle_mobile; // NOLINT +using namespace paddle_mobile::fpga; // NOLINT + +static const char *g_image = "../images/yolo_test_txtimg/1.txt"; +static const char *g_model = "../models/yolo_bn_l2_model/__model__"; +static const char *g_param = "../models/yolo_bn_l2_model/model.params"; + +void readStream(std::string filename, float *buf) { + std::ifstream in; + in.open(filename, std::ios::in); + if (!in.is_open()) { + std::cout << "open File Failed." << std::endl; + return; + } + int i = 0; + while (!in.eof()) { + in >> buf[i]; + i++; + } + in.close(); +} + +signed char float_to_int8(float fdata) { + if (fdata < 0.0) { + fdata -= 0.5; + } else { + fdata += 0.5; + } + return (signed char)fdata; +} +void quantize(float **data_in, int data_size) { + float *tmp = *data_in; + signed char *tmp_data = (signed char *)fpga_malloc(data_size * sizeof(char)); + for (int i = 0; i < data_size; i++) { + tmp_data[i] = float_to_int8((*data_in)[i] + 128); + } + *data_in = (float *)tmp_data; // NOLINT + fpga_free(tmp); +} + +void convert_to_chw(float **data_in, int channel, int height, int width, + float *data_tmp) { + int64_t amount_per_side = width * height; + for (int h = 0; h < height; h++) { + for (int w = 0; w < width; w++) { + for (int c = 0; c < channel; c++) { + *(data_tmp + c * amount_per_side + width * h + w) = *((*data_in)++); + } + } + } +} + +void dump_stride_float(std::string filename, PaddleTensor input_tensor) { + auto data_ptr = reinterpret_cast(input_tensor.data.data()); + int c = (input_tensor.shape)[1]; + int h = (input_tensor.shape)[2]; + int w = (input_tensor.shape)[3]; + int n = (input_tensor.shape)[0]; + float *data_tmp = + reinterpret_cast(malloc(c * h * w * sizeof(float))); + convert_to_chw(&data_ptr, c, h, w, data_tmp); + std::ofstream out(filename.c_str()); + float result = 0; + int datasize = abs(c * h * w * n); + if (datasize == 0) { + std::cout << "wrong dump data size" << std::endl; + return; + } + for (int i = 0; i < datasize; i++) { + result = data_tmp[i]; + out << result << std::endl; + } + out.close(); +} + +void dump_stride(std::string filename, PaddleTensor input_tensor) { + if (input_tensor.dtypeid == typeid(float)) { + dump_stride_float(filename, input_tensor); + } else { + std::cout << "only support dumping float data" << std::endl; + } +} + +PaddleMobileConfig GetConfig() { + PaddleMobileConfig config; + config.precision = PaddleMobileConfig::FP32; + config.device = PaddleMobileConfig::kFPGA; + config.prog_file = g_model; + config.param_file = g_param; + config.thread_num = 1; + config.batch_size = 1; + config.optimize = true; + config.lod_mode = true; + config.quantification = false; + return config; +} + +int main() { + open_device(); + PaddleMobileConfig config = GetConfig(); + auto predictor = + CreatePaddlePredictor(config); + + std::cout << "Finishing loading model" << std::endl; + int img_length = 256 * 416 * 3; + auto img = reinterpret_cast(fpga_malloc(img_length * sizeof(float))); + readStream(g_image, img); + + std::cout << "Finishing initializing data" << std::endl; + struct PaddleTensor t_img; + // t_img.dtype = FLOAT32; + // t_img.dtypeid = typeid(float); + quantize(&img, img_length); + t_img.dtype = INT8; + t_img.dtypeid = typeid(int8_t); + t_img.layout = LAYOUT_HWC; + t_img.shape = std::vector({1, 256, 416, 3}); + t_img.name = "Image information"; + // t_img.data.Reset(img, img_length * sizeof(float)); + t_img.data.Reset(img, img_length * sizeof(int8_t)); + predictor->FeedPaddleTensors({t_img}); + + std::cout << "Finishing feeding data " << std::endl; + + predictor->Predict_From_To(0, -1); + std::cout << "Finishing predicting " << std::endl; + + std::vector v; // No need to initialize v + predictor->FetchPaddleTensors(&v); // Old data in v will be cleared + std::cout << "Output number is " << v.size() << std::endl; + for (int fetchNum = 0; fetchNum < v.size(); fetchNum++) { + std::string dumpName = "yolo_api_fetch_" + std::to_string(fetchNum); + dump_stride(dumpName, v[fetchNum]); + } + return 0; +}