diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index 36a54eb3a84790e241abf8beb138360505f2f89e..11b26fe6893dc24664b563f5a2212fae77926d5e 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -17,17 +17,21 @@ limitations under the License. */ #include #include #include +#include #include "bias_scale.h" #include "filter.h" #include "image.h" #define FPGA_TEST_MODE -//#define PADDLE_MOBILE_OS_LINUX +#define PADDLE_MOBILE_OS_LINUX namespace paddle_mobile { namespace fpga { static int fd = -1; static const char *device_path = "/dev/fpgadrv0"; +#ifdef PADDLE_MOBILE_OS_LINUX +static std::map memory_map; +#endif static inline int do_ioctl(int req, const void *arg) { #ifdef PADDLE_MOBILE_OS_LINUX @@ -48,10 +52,13 @@ int open_device() { // memory management; void *fpga_malloc(size_t size) { - DLOG << size << " bytes allocated"; + static uint64_t counter = 0; + counter += size; + DLOG << size << " bytes allocated. Total " << counter << " bytes"; #ifdef PADDLE_MOBILE_OS_LINUX - return reinterpret_cast( - mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); + auto ptr = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + memory_map.insert(std::make_pair(ptr, size)); + return ptr; #else return malloc(size); #endif @@ -59,7 +66,16 @@ void *fpga_malloc(size_t size) { void fpga_free(void *ptr) { #ifdef PADDLE_MOBILE_OS_LINUX - munmap(ptr, 0); + static uint64_t counter = 0; + size_t size = 0; + auto iter = memory_map.find(ptr); // std::map::iterator + if (iter != memory_map.end()) { + size = iter->second; + munmap(ptr, size); + memory_map.erase(iter); + } + counter += size; + DLOG << size << " bytes freed. Total " << counter << " bytes"; #else free(ptr); #endif diff --git a/src/fpga/api.h b/src/fpga/api.h index 83eca9e2688d7076d33818d992cafb5a4c05df01..f5fa05b6750996ee391a30d2651a69d90e357547 100644 --- a/src/fpga/api.h +++ b/src/fpga/api.h @@ -20,8 +20,6 @@ limitations under the License. */ #include #include "framework/tensor.h" -// memory management; - namespace paddle_mobile { namespace fpga { @@ -45,9 +43,6 @@ struct MemoryCopyArgs { size_t size; }; -/** -Conv and Pooling kernel -*/ struct KernelArgs { uint32_t width; uint32_t height; @@ -109,7 +104,6 @@ struct PoolingArgs { struct ImageOutputArgs output; }; -// elementwise add arguments struct EWAddArgs { bool relu_enabled; diff --git a/src/framework/tensor.h b/src/framework/tensor.h index c5572dcbfdbd665994be7ebe005b6c9c98b5bca9..ba8e3d3402f16966f08c370bff8cd6b0d1f2637b 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -289,12 +289,8 @@ class Tensor { virtual std::type_index type() const { return type_; } virtual void set_type(std::type_index type) { type_ = type; } -#ifndef PADDLE_MOBILE_FPGA - /*! the pointer of memory block. */ + std::unique_ptr> ptr_; -#else - std::shared_ptr ptr_; -#endif /*! the size of memory block. */ size_t size_; diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 8bc3c15c6d761234f5f45a645fde81ff97822f03..c12f1ce02c8ab32d04d00d76cad5dc7d6ce45bc2 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -662,13 +662,15 @@ void Executor::FeedData(const framework::Tensor &t) { }; template -std::shared_ptr Executor::FetchResult() { +std::shared_ptr Executor::FetchResult(int id) { std::shared_ptr to_predict_block = to_predict_program_->Block(0); auto &ops = ops_of_block_[*to_predict_block.get()]; - auto last_op = ops.rbegin(); - auto output_map = (*last_op)->Outputs(); - std::vector out_keys = (*last_op)->GetOutKeys(); + + PADDLE_MOBILE_ENFORCE(id < ops.size(), "Index out of range"); + auto last_op = id < 0 ? ops[ops.size() - 1] : ops[id]; + auto output_map = last_op->Outputs(); + std::vector out_keys = last_op->GetOutKeys(); PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output"); auto *output_tensor = framework::GetVarValue( out_keys[0], output_map, *(program_.scope)); diff --git a/src/io/executor.h b/src/io/executor.h index bec9f45444a7502c1b6a119f80f55220765efe50..67d3f02ac37c4203950a2679d30d7aa9072c70ba 100644 --- a/src/io/executor.h +++ b/src/io/executor.h @@ -99,7 +99,7 @@ class Executor { public: void InjectVariable(const framework::Tensor &t, string var_name); void FeedData(const framework::Tensor &t); - std::shared_ptr FetchResult(); + std::shared_ptr FetchResult(int id = -1); void Predict_From_To(int start = 0, int end = -1); void Predict_From(int start); void Predict_To(int end); diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index 9710a0ec452db3381e051db95d7da81b48f5f154..0b84f1ff45e519dbbc244863db481f2364907a89 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -138,8 +138,8 @@ void PaddleMobile::FeedData(const framework::Tensor &t) { }; template -std::shared_ptr PaddleMobile::FetchResult() { - return executor_->FetchResult(); +std::shared_ptr PaddleMobile::FetchResult(int id) { + return executor_->FetchResult(id); }; template diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index b11f7d7c3b8fc051c1d0da17e769225aab0dc968..73c5553d91c1b4781718265aba8b7fa8dd5e2777 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -97,7 +97,7 @@ class PaddleMobile { public: void InjectVariable(const framework::Tensor &t, string var_name); void FeedData(const framework::Tensor &t); - std::shared_ptr FetchResult(); + std::shared_ptr FetchResult(int id = -1); void Predict_From_To(int start = 0, int end = -1); void Predict_From(int start); void Predict_To(int end); diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp index dfc40f86c4849c768334030cfbc96f0fea9a0178..d2a4abbbfd2c023f1e8220e74f815eda44acb6db 100644 --- a/test/net/test_resnet.cpp +++ b/test/net/test_resnet.cpp @@ -54,7 +54,13 @@ int main() { paddle_mobile.FeedData(input_tensor); paddle_mobile.Predict_To(10); paddle_mobile.Predict_From(10); - paddle_mobile.FetchResult(); + auto tensor_ptr = paddle_mobile.FetchResult(9); + std::cout << "Tensor element number for op[9]: " << tensor_ptr->numel() + << std::endl; + auto result_ptr = paddle_mobile.FetchResult(); + std::cout << "Result tensor element number: " << result_ptr->numel() + << std::endl; + auto time4 = time(); std::cout << "predict cost :" << time_diff(time3, time4) << "ms" << std::endl;