提交 489e06d1 编写于 作者: xiebaiyuan's avatar xiebaiyuan

Merge remote-tracking branch 'upstream/develop' into develop

...@@ -17,17 +17,21 @@ limitations under the License. */ ...@@ -17,17 +17,21 @@ limitations under the License. */
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <algorithm> #include <algorithm>
#include <map>
#include "bias_scale.h" #include "bias_scale.h"
#include "filter.h" #include "filter.h"
#include "image.h" #include "image.h"
#define FPGA_TEST_MODE #define FPGA_TEST_MODE
//#define PADDLE_MOBILE_OS_LINUX #define PADDLE_MOBILE_OS_LINUX
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
static int fd = -1; static int fd = -1;
static const char *device_path = "/dev/fpgadrv0"; static const char *device_path = "/dev/fpgadrv0";
#ifdef PADDLE_MOBILE_OS_LINUX
static std::map<void *, size_t> memory_map;
#endif
static inline int do_ioctl(int req, const void *arg) { static inline int do_ioctl(int req, const void *arg) {
#ifdef PADDLE_MOBILE_OS_LINUX #ifdef PADDLE_MOBILE_OS_LINUX
...@@ -48,10 +52,13 @@ int open_device() { ...@@ -48,10 +52,13 @@ int open_device() {
// memory management; // memory management;
void *fpga_malloc(size_t size) { void *fpga_malloc(size_t size) {
DLOG << size << " bytes allocated"; static uint64_t counter = 0;
counter += size;
DLOG << size << " bytes allocated. Total " << counter << " bytes";
#ifdef PADDLE_MOBILE_OS_LINUX #ifdef PADDLE_MOBILE_OS_LINUX
return reinterpret_cast<void *>( auto ptr = mmap64(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); memory_map.insert(std::make_pair(ptr, size));
return ptr;
#else #else
return malloc(size); return malloc(size);
#endif #endif
...@@ -59,7 +66,16 @@ void *fpga_malloc(size_t size) { ...@@ -59,7 +66,16 @@ void *fpga_malloc(size_t size) {
void fpga_free(void *ptr) { void fpga_free(void *ptr) {
#ifdef PADDLE_MOBILE_OS_LINUX #ifdef PADDLE_MOBILE_OS_LINUX
munmap(ptr, 0); static uint64_t counter = 0;
size_t size = 0;
auto iter = memory_map.find(ptr); // std::map<void *, size_t>::iterator
if (iter != memory_map.end()) {
size = iter->second;
munmap(ptr, size);
memory_map.erase(iter);
}
counter += size;
DLOG << size << " bytes freed. Total " << counter << " bytes";
#else #else
free(ptr); free(ptr);
#endif #endif
......
...@@ -20,8 +20,6 @@ limitations under the License. */ ...@@ -20,8 +20,6 @@ limitations under the License. */
#include <limits> #include <limits>
#include "framework/tensor.h" #include "framework/tensor.h"
// memory management;
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
...@@ -45,9 +43,6 @@ struct MemoryCopyArgs { ...@@ -45,9 +43,6 @@ struct MemoryCopyArgs {
size_t size; size_t size;
}; };
/**
Conv and Pooling kernel
*/
struct KernelArgs { struct KernelArgs {
uint32_t width; uint32_t width;
uint32_t height; uint32_t height;
...@@ -109,7 +104,6 @@ struct PoolingArgs { ...@@ -109,7 +104,6 @@ struct PoolingArgs {
struct ImageOutputArgs output; struct ImageOutputArgs output;
}; };
// elementwise add arguments
struct EWAddArgs { struct EWAddArgs {
bool relu_enabled; bool relu_enabled;
......
...@@ -289,12 +289,8 @@ class Tensor { ...@@ -289,12 +289,8 @@ class Tensor {
virtual std::type_index type() const { return type_; } virtual std::type_index type() const { return type_; }
virtual void set_type(std::type_index type) { type_ = type; } virtual void set_type(std::type_index type) { type_ = type; }
#ifndef PADDLE_MOBILE_FPGA
/*! the pointer of memory block. */
std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_; std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
#else
std::shared_ptr<uint8_t> ptr_;
#endif
/*! the size of memory block. */ /*! the size of memory block. */
size_t size_; size_t size_;
......
...@@ -662,13 +662,15 @@ void Executor<Dtype, P>::FeedData(const framework::Tensor &t) { ...@@ -662,13 +662,15 @@ void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
}; };
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult() { std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
std::shared_ptr<framework::BlockDesc> to_predict_block = std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0); to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()]; auto &ops = ops_of_block_[*to_predict_block.get()];
auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs(); PADDLE_MOBILE_ENFORCE(id < ops.size(), "Index out of range");
std::vector<std::string> out_keys = (*last_op)->GetOutKeys(); auto last_op = id < 0 ? ops[ops.size() - 1] : ops[id];
auto output_map = last_op->Outputs();
std::vector<std::string> out_keys = last_op->GetOutKeys();
PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output"); PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output");
auto *output_tensor = framework::GetVarValue<framework::LoDTensor>( auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
out_keys[0], output_map, *(program_.scope)); out_keys[0], output_map, *(program_.scope));
......
...@@ -99,7 +99,7 @@ class Executor { ...@@ -99,7 +99,7 @@ class Executor {
public: public:
void InjectVariable(const framework::Tensor &t, string var_name); void InjectVariable(const framework::Tensor &t, string var_name);
void FeedData(const framework::Tensor &t); void FeedData(const framework::Tensor &t);
std::shared_ptr<framework::Tensor> FetchResult(); std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1); void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start); void Predict_From(int start);
void Predict_To(int end); void Predict_To(int end);
......
...@@ -138,8 +138,8 @@ void PaddleMobile<Dtype, P>::FeedData(const framework::Tensor &t) { ...@@ -138,8 +138,8 @@ void PaddleMobile<Dtype, P>::FeedData(const framework::Tensor &t) {
}; };
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult() { std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult(int id) {
return executor_->FetchResult(); return executor_->FetchResult(id);
}; };
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
......
...@@ -97,7 +97,7 @@ class PaddleMobile { ...@@ -97,7 +97,7 @@ class PaddleMobile {
public: public:
void InjectVariable(const framework::Tensor &t, string var_name); void InjectVariable(const framework::Tensor &t, string var_name);
void FeedData(const framework::Tensor &t); void FeedData(const framework::Tensor &t);
std::shared_ptr<framework::Tensor> FetchResult(); std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1); void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start); void Predict_From(int start);
void Predict_To(int end); void Predict_To(int end);
......
...@@ -54,7 +54,13 @@ int main() { ...@@ -54,7 +54,13 @@ int main() {
paddle_mobile.FeedData(input_tensor); paddle_mobile.FeedData(input_tensor);
paddle_mobile.Predict_To(10); paddle_mobile.Predict_To(10);
paddle_mobile.Predict_From(10); paddle_mobile.Predict_From(10);
paddle_mobile.FetchResult(); auto tensor_ptr = paddle_mobile.FetchResult(9);
std::cout << "Tensor element number for op[9]: " << tensor_ptr->numel()
<< std::endl;
auto result_ptr = paddle_mobile.FetchResult();
std::cout << "Result tensor element number: " << result_ptr->numel()
<< std::endl;
auto time4 = time(); auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms" std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl; << std::endl;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册