diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp index b8f131634e9eb4c56218db8f0643f10834089393..0f9f96dc65fcfd892a5ca99a7c36a71ebca83817 100644 --- a/src/fpga/V1/api.cpp +++ b/src/fpga/V1/api.cpp @@ -32,7 +32,6 @@ void format_image(framework::Tensor *image_tensor) { float *p_data = external_ptr == nullptr ? data_ptr : external_ptr; image::format_image(&p_data, channel, height, width); - if (p_data != data_ptr && external_ptr == nullptr) { image_tensor->reset_data_ptr(p_data); } @@ -61,6 +60,7 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) { memset(p, 0, memory_size); ofm_tensor->reset_data_ptr(p); ofm_tensor->set_type(typeid(half)); + ofm_tensor->fpga_data_num = memory_size / sizeof(half); } void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { @@ -79,7 +79,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { memset(p, 0, memory_size); ofm_tensor->reset_data_ptr(p); ofm_tensor->set_type(typeid(half)); + ofm_tensor->fpga_data_num = memory_size / sizeof(half); } + void format_fp32_ofm(framework::Tensor *ofm_tensor) { auto dims = ofm_tensor->dims(); size_t memory_size = 0; @@ -96,6 +98,7 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) { memset(p, 0, memory_size); ofm_tensor->reset_data_ptr(p); ofm_tensor->set_type(typeid(float)); + ofm_tensor->fpga_data_num = memory_size / sizeof(float); } float filter_find_max(framework::Tensor *filter_tensor) { diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index b9e7acfdaf3f1b70a8484d7426505da9c27b34a4..637521ea69e6301b6242d492eacc8dcf38091bfb 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -475,6 +475,19 @@ void Executor::FeedData(const std::vector &v) { } } +template +void Executor::FeedData(const vector &v) { + auto input_size = v.size(); + auto vars = program_.scope->VarContain("feed"); + PADDLE_MOBILE_ENFORCE(input_size == vars.size(), + "input data number not correct"); + for (int i = 0; i < input_size; i++) { + auto var = program_.scope->Var("feed", i); + auto feed_tensor = var->template GetMutable(); + feed_tensor->ShareDataWith(v[i]); + } +} + template void Executor::GetResults(std::vector *v) { auto output_size = v->size(); @@ -489,6 +502,20 @@ void Executor::GetResults(std::vector *v) { } } +template +void Executor::GetResults(std::vector *v) { + auto output_size = v->size(); + PADDLE_MOBILE_ENFORCE(output_size > 0, "Empty output"); + auto vars = program_.scope->VarContain("fetch"); + PADDLE_MOBILE_ENFORCE(output_size == vars.size(), + "output data number not correct"); + for (int i = 0; i < output_size; i++) { + auto var = program_.scope->Var("fetch", i); + auto fetch_tensor = var->template GetMutable(); + (*v)[i] = fetch_tensor; + } +} + template std::shared_ptr Executor::FetchResult(int id) { auto &ops = ops_of_block_[0]; diff --git a/src/framework/executor.h b/src/framework/executor.h index ee285acac3e8bdf500452b6494bb37d79a2089e4..ba1a8b1afef3a9f592c4f84301576f187bc3c001 100644 --- a/src/framework/executor.h +++ b/src/framework/executor.h @@ -53,7 +53,11 @@ class Executor { void InjectVariable(const Tensor &t, std::string var_name); void FeedData(const Tensor &t); void FeedData(const std::vector &v); + void FeedData(const std::vector &v); + void GetResults(std::vector *v); + void GetResults(std::vector *v); + std::shared_ptr FetchResult(int id = -1); void Predict_From_To(int start = 0, int end = -1); void Predict_From(int start); diff --git a/src/framework/tensor.h b/src/framework/tensor.h index c684169ce21474b4c68de9db523035866859818a..8b633ec5cca6719dc3b1ebf5637ca8796e90046f 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -31,6 +31,11 @@ limitations under the License. */ namespace paddle_mobile { namespace framework { +enum LayoutType { + LAYOUT_CHW = 1, + LAYOUT_HWC = 0, +}; + class LoDTensor; class Tensor : public TensorBase { @@ -223,6 +228,8 @@ class Tensor : public TensorBase { float scale[2]; // scale[0]= MAX/127.0, scale[1]= 127.0/MAX void *external_data = nullptr; // only used for Feed + LayoutType layout = LAYOUT_HWC; + int64_t fpga_data_num; #endif }; diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc index ebeabead13a69ae1690335b4a73a9a511e086192..e2c2e6ffbfbe140d95d24684eb57227cc9503e78 100644 --- a/src/io/api_paddle_mobile.cc +++ b/src/io/api_paddle_mobile.cc @@ -113,72 +113,53 @@ bool PaddleMobilePredictor::Run( } #ifdef PADDLE_MOBILE_FPGA -template -bool PaddleMobilePredictor::Run( - const std::vector &inputs, - std::vector *output_data, std::vector *index_data, - int batch_size) { - if (inputs.empty()) { - LOG(kLOG_ERROR) << "At least one output should be set with tensors' names."; - return false; - } - auto input = inputs[0]; - - if (input.shape.size() != 4) { - LOG(kLOG_ERROR) << "input shape not equal to 4!"; - return false; - } - std::vector dims; - for (auto d : input.shape) { - dims.push_back(static_cast(d)); - } - - // use tensor - framework::DDim ddim = - framework::make_ddim({dims[0], dims[1], dims[2], dims[3]}); +void ConvertPaddleTensors(const PaddleTensor &src, framework::Tensor *des) { + des->Resize(framework::make_ddim(src.shape)); + des->external_data = src.data.data(); + des->set_type(src.dtypeid); + des->layout = + src.layout == LAYOUT_HWC ? framework::LAYOUT_HWC : framework::LAYOUT_CHW; +} - framework::Tensor input_tensor; - input_tensor.Resize(ddim); - int input_length = framework::product(ddim); - auto input_ptr = input_tensor.mutable_data(); +void ConvertTensors(const framework::Tensor &src, PaddleTensor *des) { + des->shape = framework::vectorize2int(src.dims()); + des->dtypeid = src.type(); + des->layout = src.layout == framework::LAYOUT_HWC ? LAYOUT_HWC : LAYOUT_CHW; - memcpy(input_ptr, static_cast(input.data.data()), - input_length * sizeof(T)); - paddle_mobile_->Predict(input_tensor); - auto num_result = index_data->size(); - if (output_data->size() != num_result) { - LOG(kLOG_ERROR) << "index and output number don't match"; - return false; + auto num = src.numel(); + if (src.type() == typeid(float)) { + des->data.Reset(const_cast(src.data()), + num * sizeof(float)); + } else { + des->data.Reset(const_cast(src.data()), + num * sizeof(int16_t)); } +} - for (int i = 0; i < num_result; i++) { - auto output_tensor = paddle_mobile_->FetchResult((*index_data)[i]); - - if (output_data->empty()) { - LOG(kLOG_ERROR) - << "At least one output should be set with tensors' names."; - return false; - } - - auto &output = (*output_data)[i]; - int output_length = output_tensor->numel(); - std::vector tensor_shape = - framework::vectorize(output_tensor->dims()); - - for (auto d : tensor_shape) { - output.shape.push_back(static_cast(d)); - } - - if (output.data.length() < output_length * sizeof(T)) { - output.data.Resize(output_length * sizeof(T)); - } - - memcpy(output.data.data(), output_tensor->template data(), - output_length * sizeof(T)); +template +void PaddleMobilePredictor::FeedPaddleTensors( + const std::vector &inputs) { + auto num = inputs.size(); + std::vector tensors(num, framework::Tensor()); + for (int i = 0; i < num; i++) { + tensors[i].init(typeid(float)); + ConvertPaddleTensors(inputs[i], &tensors[i]); } + paddle_mobile_->FeedData(tensors); +} - return true; +template +void PaddleMobilePredictor::FetchPaddleTensors( + std::vector *outputs) { + auto num = outputs->size(); + PADDLE_MOBILE_ENFORCE(num > 0, "0 output pointers is not permitted"); + std::vector tensors(num, nullptr); + paddle_mobile_->GetResults(&tensors); + for (int i = 0; i < num; i++) { + ConvertTensors(*tensors[i], &(*outputs)[i]); + } } + template void PaddleMobilePredictor::FeedData( const std::vector &inputs) { diff --git a/src/io/api_paddle_mobile.h b/src/io/api_paddle_mobile.h index 0cadd71c226b20331c8399d2cfd8873c093a6b84..4ea83123355ac4dfff0479045eef7f1c0a4734d2 100644 --- a/src/io/api_paddle_mobile.h +++ b/src/io/api_paddle_mobile.h @@ -32,13 +32,13 @@ class PaddleMobilePredictor : public PaddlePredictor { std::vector* output_data, int batch_size = -1) override; #ifdef PADDLE_MOBILE_FPGA - bool Run(const std::vector& inputs, - std::vector* output_data, std::vector* index_data, - int batch_size = -1) override; void FeedData(const std::vector& inputs) override; void GetResults(std::vector* outputs) override; - void Predict_From_To(int start = 0, int end = -1) override; + void Predict_From_To(int start, int end) override; + void FeedPaddleTensors(const std::vector& inputs) override; + void FetchPaddleTensors(std::vector* outputs) override; #endif + ~PaddleMobilePredictor() override; private: diff --git a/src/io/paddle_inference_api.h b/src/io/paddle_inference_api.h index 42509915d13cf7e632ed20c73f1320ec8bac09d1..f7e66740f0b5f732e7517db527ad60dd660d6807 100644 --- a/src/io/paddle_inference_api.h +++ b/src/io/paddle_inference_api.h @@ -24,6 +24,7 @@ limitations under the License. */ #include #include #include +#include #include // #define PADDLE_MOBILE_FPGA @@ -33,12 +34,21 @@ namespace paddle_mobile { #ifdef PADDLE_MOBILE_FPGA namespace fpga { int open_device(); -} +void* fpga_malloc(size_t size); +void fpga_free(void* ptr); +} // namespace fpga #endif enum PaddleDType { FLOAT32, + FLOAT16, INT64, + INT8, +}; + +enum LayoutType { + LAYOUT_CHW = 1, + LAYOUT_HWC = 0, }; class PaddleBuf { @@ -78,6 +88,8 @@ struct PaddleTensor { // TODO(Superjomn) for LoD support, add a vector> field if needed. PaddleBuf data; // blob of data. PaddleDType dtype; + std::type_index dtypeid = typeid(float); + LayoutType layout; }; enum class PaddleEngineKind { @@ -116,12 +128,11 @@ class PaddlePredictor { std::string param_file; }; #ifdef PADDLE_MOBILE_FPGA - virtual bool Run(const std::vector& inputs, - std::vector* output_data, - std::vector* index_data, int batch_size = -1) = 0; virtual void FeedData(const std::vector& inputs) = 0; virtual void GetResults(std::vector* outputs) = 0; - virtual void Predict_From_To(int start = 0, int end = -1) = 0; + virtual void Predict_From_To(int start, int end) = 0; + virtual void FeedPaddleTensors(const std::vector& inputs) = 0; + virtual void FetchPaddleTensors(std::vector* outputs) = 0; #endif protected: diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index 0dfa9d0500847c80e78a156b9c82a33d1dfd4a00..687185e82a44806783535e084cf34e90ca09882d 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -231,11 +231,23 @@ template void PaddleMobile::FeedData(const std::vector &v) { executor_->FeedData(v); }; + +template +void PaddleMobile::FeedData( + const std::vector &v) { + executor_->FeedData(v); +}; + template void PaddleMobile::GetResults(std::vector *v) { executor_->GetResults(v); } +template +void PaddleMobile::GetResults(std::vector *v) { + executor_->GetResults(v); +} + template std::shared_ptr PaddleMobile::FetchResult( int id) { diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index d608abcac79d2a5ae79ad375a8cb93d4594d1e8a..1aa0efd6beaadaa461643610023ae10a3543604f 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -91,7 +91,11 @@ class PaddleMobile { void InjectVariable(const framework::Tensor &t, std::string var_name); void FeedData(const framework::Tensor &t); void FeedData(const std::vector &v); + void FeedData(const std::vector &v); + void GetResults(std::vector *v); + void GetResults(std::vector *v); + std::shared_ptr FetchResult(int id = -1); void Predict_From_To(int start = 0, int end = -1); void Predict_From(int start); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3af55f075805361fd0cff40ab2e53752ea63f781..138362f20892cb1b5db9bf0a2c83baec79f5f0f4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -80,6 +80,9 @@ if (CON GREATER -1) ADD_EXECUTABLE(test-marker fpga/test_marker.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-marker paddle-mobile) + ADD_EXECUTABLE(test-rfcn-api fpga/test_rfcn_api.cpp) + target_link_libraries(test-rfcn-api paddle-mobile) + set(FOUND_MATCH ON) endif () diff --git a/test/fpga/test_rfcn_api.cpp b/test/fpga/test_rfcn_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5cd910080d8f45b40806a10ef2f50b2a6f1219bc --- /dev/null +++ b/test/fpga/test_rfcn_api.cpp @@ -0,0 +1,135 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "../../src/io/paddle_inference_api.h" + +using namespace paddle_mobile; +using namespace paddle_mobile::fpga; + +static const char *g_image = "../models/rfcn/data.bin"; +static const char *g_model = "../models/rfcn/model"; +static const char *g_param = "../models/rfcn/params"; + +void readStream(std::string filename, char *buf) { + std::ifstream in; + in.open(filename, std::ios::in | std::ios::binary); + if (!in.is_open()) { + std::cout << "open File Failed." << std::endl; + return; + } + + in.seekg(0, std::ios::end); // go to the end + auto length = in.tellg(); // report location (this is the length) + in.seekg(0, std::ios::beg); // go back to the beginning + in.read(buf, length); + in.close(); +} + +PaddleMobileConfig GetConfig() { + PaddleMobileConfig config; + config.precision = PaddleMobileConfig::FP32; + config.device = PaddleMobileConfig::kFPGA; + config.prog_file = g_model; + config.param_file = g_param; + config.thread_num = 1; + config.batch_size = 1; + config.optimize = true; + config.lod_mode = true; + config.quantification = false; + return config; +} + +int main() { + open_device(); + PaddleMobileConfig config = GetConfig(); + auto predictor = + CreatePaddlePredictor(config); + + std::cout << "after loading model" << std::endl; + + float img_info[3] = {768, 1536, 768.0f / 960.0f}; + int img_length = 768 * 1536 * 3; + auto img = reinterpret_cast(fpga_malloc(img_length * sizeof(float))); + readStream(g_image, reinterpret_cast(img)); + + std::cout << "after initializing data" << std::endl; +/* + predictor->FeedData({img_info, img}); + predictor->Predict_From_To(0, -1); + std::cout << " Finishing predicting " << std::endl; + std::vector v(3, nullptr); + predictor->GetResults(&v); + int post_nms = 300; + for (int num = 0; num < post_nms; num ++){ + for (int i = 0; i < 8; i ++){ + std:: cout << ((float*)(v[0]))[num * 8 + i] << std::endl; + } + } + for (int num = 0; num < post_nms; num ++){ + for (int i = 0; i < 8; i ++){ + std:: cout << ((float*)(v[1]))[num * 8 + i] << std::endl; + } + } + for (int num = 0; num < post_nms; num ++){ + for (int i = 0; i < 4; i ++){ + std:: cout << ((float*)(v[2]))[num * 4 + i] << std::endl; + } + } +*/ + + struct PaddleTensor t_img_info, t_img; + t_img_info.dtype = FLOAT32; + t_img_info.layout = LAYOUT_HWC; + t_img_info.shape = std::vector({1,3}); + t_img_info.name = "Image information"; + t_img_info.data.Reset(img_info, 3 * sizeof(float)); + + t_img.dtype = FLOAT32; + t_img.layout = LAYOUT_HWC; + t_img.shape = std::vector({1,768, 1536, 3}); + t_img.name = "Image information"; + t_img.data.Reset(img, img_length * sizeof(float)); + predictor->FeedPaddleTensors({t_img_info, t_img}); + + std::cout << "Finishing feeding data " << std::endl; + + predictor->Predict_From_To(0, -1); + std::cout << "Finishing predicting " << std::endl; + + std::vector v(3, PaddleTensor()); + predictor->FetchPaddleTensors(&v); + auto post_nms = v[0].data.length()/sizeof(float)/8; + for (int num = 0; num < post_nms; num ++){ + for (int i = 0; i < 8; i ++){ + auto p = reinterpret_cast(v[0].data.data()); + std:: cout << p[num * 8 + i] << std::endl; + } + } + for (int num = 0; num < post_nms; num ++){ + for (int i = 0; i < 8; i ++){ + auto p = reinterpret_cast(v[1].data.data()); + std:: cout << p[num * 8 + i] << std::endl; + } + } + for (int num = 0; num < post_nms; num ++){ + for (int i = 0; i < 4; i ++){ + auto p = reinterpret_cast(v[2].data.data()); + std:: cout << p[num * 4 + i] << std::endl; + } + } + return 0; +}