提交 add0c1eb 编写于 作者: Z zhangyang0701

support multi PaddleTensor for FPGA track

上级 70baa872
...@@ -32,7 +32,6 @@ void format_image(framework::Tensor *image_tensor) { ...@@ -32,7 +32,6 @@ void format_image(framework::Tensor *image_tensor) {
float *p_data = external_ptr == nullptr ? data_ptr : external_ptr; float *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
image::format_image(&p_data, channel, height, width); image::format_image(&p_data, channel, height, width);
if (p_data != data_ptr && external_ptr == nullptr) { if (p_data != data_ptr && external_ptr == nullptr) {
image_tensor->reset_data_ptr(p_data); image_tensor->reset_data_ptr(p_data);
} }
...@@ -61,6 +60,7 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) { ...@@ -61,6 +60,7 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) {
memset(p, 0, memory_size); memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(half)); ofm_tensor->set_type(typeid(half));
ofm_tensor->fpga_data_num = memory_size / sizeof(half);
} }
void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) {
...@@ -79,7 +79,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { ...@@ -79,7 +79,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) {
memset(p, 0, memory_size); memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(half)); ofm_tensor->set_type(typeid(half));
ofm_tensor->fpga_data_num = memory_size / sizeof(half);
} }
void format_fp32_ofm(framework::Tensor *ofm_tensor) { void format_fp32_ofm(framework::Tensor *ofm_tensor) {
auto dims = ofm_tensor->dims(); auto dims = ofm_tensor->dims();
size_t memory_size = 0; size_t memory_size = 0;
...@@ -96,6 +98,7 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) { ...@@ -96,6 +98,7 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) {
memset(p, 0, memory_size); memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(float)); ofm_tensor->set_type(typeid(float));
ofm_tensor->fpga_data_num = memory_size / sizeof(float);
} }
float filter_find_max(framework::Tensor *filter_tensor) { float filter_find_max(framework::Tensor *filter_tensor) {
......
...@@ -475,6 +475,19 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) { ...@@ -475,6 +475,19 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) {
} }
} }
template <typename Device, typename T>
void Executor<Device, T>::FeedData(const vector<framework::Tensor> &v) {
auto input_size = v.size();
auto vars = program_.scope->VarContain("feed");
PADDLE_MOBILE_ENFORCE(input_size == vars.size(),
"input data number not correct");
for (int i = 0; i < input_size; i++) {
auto var = program_.scope->Var("feed", i);
auto feed_tensor = var->template GetMutable<LoDTensor>();
feed_tensor->ShareDataWith(v[i]);
}
}
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::GetResults(std::vector<void *> *v) { void Executor<Device, T>::GetResults(std::vector<void *> *v) {
auto output_size = v->size(); auto output_size = v->size();
...@@ -489,6 +502,20 @@ void Executor<Device, T>::GetResults(std::vector<void *> *v) { ...@@ -489,6 +502,20 @@ void Executor<Device, T>::GetResults(std::vector<void *> *v) {
} }
} }
template <typename Device, typename T>
void Executor<Device, T>::GetResults(std::vector<framework::Tensor *> *v) {
auto output_size = v->size();
PADDLE_MOBILE_ENFORCE(output_size > 0, "Empty output");
auto vars = program_.scope->VarContain("fetch");
PADDLE_MOBILE_ENFORCE(output_size == vars.size(),
"output data number not correct");
for (int i = 0; i < output_size; i++) {
auto var = program_.scope->Var("fetch", i);
auto fetch_tensor = var->template GetMutable<LoDTensor>();
(*v)[i] = fetch_tensor;
}
}
template <typename Device, typename T> template <typename Device, typename T>
std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) { std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) {
auto &ops = ops_of_block_[0]; auto &ops = ops_of_block_[0];
......
...@@ -53,7 +53,11 @@ class Executor { ...@@ -53,7 +53,11 @@ class Executor {
void InjectVariable(const Tensor &t, std::string var_name); void InjectVariable(const Tensor &t, std::string var_name);
void FeedData(const Tensor &t); void FeedData(const Tensor &t);
void FeedData(const std::vector<void *> &v); void FeedData(const std::vector<void *> &v);
void FeedData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v); void GetResults(std::vector<void *> *v);
void GetResults(std::vector<framework::Tensor *> *v);
std::shared_ptr<Tensor> FetchResult(int id = -1); std::shared_ptr<Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1); void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start); void Predict_From(int start);
......
...@@ -31,6 +31,11 @@ limitations under the License. */ ...@@ -31,6 +31,11 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
enum LayoutType {
LAYOUT_CHW = 1,
LAYOUT_HWC = 0,
};
class LoDTensor; class LoDTensor;
class Tensor : public TensorBase { class Tensor : public TensorBase {
...@@ -223,6 +228,8 @@ class Tensor : public TensorBase { ...@@ -223,6 +228,8 @@ class Tensor : public TensorBase {
float scale[2]; // scale[0]= MAX/127.0, scale[1]= 127.0/MAX float scale[2]; // scale[0]= MAX/127.0, scale[1]= 127.0/MAX
void *external_data = nullptr; // only used for Feed void *external_data = nullptr; // only used for Feed
LayoutType layout = LAYOUT_HWC;
int64_t fpga_data_num;
#endif #endif
}; };
......
...@@ -113,72 +113,53 @@ bool PaddleMobilePredictor<Device, T>::Run( ...@@ -113,72 +113,53 @@ bool PaddleMobilePredictor<Device, T>::Run(
} }
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
template <typename Device, typename T> void ConvertPaddleTensors(const PaddleTensor &src, framework::Tensor *des) {
bool PaddleMobilePredictor<Device, T>::Run( des->Resize(framework::make_ddim(src.shape));
const std::vector<PaddleTensor> &inputs, des->external_data = src.data.data();
std::vector<PaddleTensor> *output_data, std::vector<int> *index_data, des->set_type(src.dtypeid);
int batch_size) { des->layout =
if (inputs.empty()) { src.layout == LAYOUT_HWC ? framework::LAYOUT_HWC : framework::LAYOUT_CHW;
LOG(kLOG_ERROR) << "At least one output should be set with tensors' names."; }
return false;
}
auto input = inputs[0];
if (input.shape.size() != 4) {
LOG(kLOG_ERROR) << "input shape not equal to 4!";
return false;
}
std::vector<int64_t> dims;
for (auto d : input.shape) {
dims.push_back(static_cast<int64_t>(d));
}
// use tensor
framework::DDim ddim =
framework::make_ddim({dims[0], dims[1], dims[2], dims[3]});
framework::Tensor input_tensor; void ConvertTensors(const framework::Tensor &src, PaddleTensor *des) {
input_tensor.Resize(ddim); des->shape = framework::vectorize2int(src.dims());
int input_length = framework::product(ddim); des->dtypeid = src.type();
auto input_ptr = input_tensor.mutable_data<T>(); des->layout = src.layout == framework::LAYOUT_HWC ? LAYOUT_HWC : LAYOUT_CHW;
memcpy(input_ptr, static_cast<T *>(input.data.data()), auto num = src.numel();
input_length * sizeof(T)); if (src.type() == typeid(float)) {
paddle_mobile_->Predict(input_tensor); des->data.Reset(const_cast<float *>(src.data<float>()),
auto num_result = index_data->size(); num * sizeof(float));
if (output_data->size() != num_result) { } else {
LOG(kLOG_ERROR) << "index and output number don't match"; des->data.Reset(const_cast<int16_t *>(src.data<int16_t>()),
return false; num * sizeof(int16_t));
} }
}
for (int i = 0; i < num_result; i++) { template <typename Device, typename T>
auto output_tensor = paddle_mobile_->FetchResult((*index_data)[i]); void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
const std::vector<PaddleTensor> &inputs) {
if (output_data->empty()) { auto num = inputs.size();
LOG(kLOG_ERROR) std::vector<framework::Tensor> tensors(num, framework::Tensor());
<< "At least one output should be set with tensors' names."; for (int i = 0; i < num; i++) {
return false; tensors[i].init(typeid(float));
} ConvertPaddleTensors(inputs[i], &tensors[i]);
auto &output = (*output_data)[i];
int output_length = output_tensor->numel();
std::vector<int64_t> tensor_shape =
framework::vectorize(output_tensor->dims());
for (auto d : tensor_shape) {
output.shape.push_back(static_cast<int>(d));
}
if (output.data.length() < output_length * sizeof(T)) {
output.data.Resize(output_length * sizeof(T));
}
memcpy(output.data.data(), output_tensor->template data<T>(),
output_length * sizeof(T));
} }
paddle_mobile_->FeedData(tensors);
}
return true; template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::FetchPaddleTensors(
std::vector<PaddleTensor> *outputs) {
auto num = outputs->size();
PADDLE_MOBILE_ENFORCE(num > 0, "0 output pointers is not permitted");
std::vector<framework::Tensor *> tensors(num, nullptr);
paddle_mobile_->GetResults(&tensors);
for (int i = 0; i < num; i++) {
ConvertTensors(*tensors[i], &(*outputs)[i]);
}
} }
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::FeedData( void PaddleMobilePredictor<Device, T>::FeedData(
const std::vector<void *> &inputs) { const std::vector<void *> &inputs) {
......
...@@ -32,13 +32,13 @@ class PaddleMobilePredictor : public PaddlePredictor { ...@@ -32,13 +32,13 @@ class PaddleMobilePredictor : public PaddlePredictor {
std::vector<PaddleTensor>* output_data, std::vector<PaddleTensor>* output_data,
int batch_size = -1) override; int batch_size = -1) override;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data, std::vector<int>* index_data,
int batch_size = -1) override;
void FeedData(const std::vector<void*>& inputs) override; void FeedData(const std::vector<void*>& inputs) override;
void GetResults(std::vector<void*>* outputs) override; void GetResults(std::vector<void*>* outputs) override;
void Predict_From_To(int start = 0, int end = -1) override; void Predict_From_To(int start, int end) override;
void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) override;
void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) override;
#endif #endif
~PaddleMobilePredictor() override; ~PaddleMobilePredictor() override;
private: private:
......
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include <cassert> #include <cassert>
#include <memory> #include <memory>
#include <string> #include <string>
#include <typeindex>
#include <vector> #include <vector>
// #define PADDLE_MOBILE_FPGA // #define PADDLE_MOBILE_FPGA
...@@ -33,12 +34,21 @@ namespace paddle_mobile { ...@@ -33,12 +34,21 @@ namespace paddle_mobile {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
namespace fpga { namespace fpga {
int open_device(); int open_device();
} void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
} // namespace fpga
#endif #endif
enum PaddleDType { enum PaddleDType {
FLOAT32, FLOAT32,
FLOAT16,
INT64, INT64,
INT8,
};
enum LayoutType {
LAYOUT_CHW = 1,
LAYOUT_HWC = 0,
}; };
class PaddleBuf { class PaddleBuf {
...@@ -78,6 +88,8 @@ struct PaddleTensor { ...@@ -78,6 +88,8 @@ struct PaddleTensor {
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed. // TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
PaddleBuf data; // blob of data. PaddleBuf data; // blob of data.
PaddleDType dtype; PaddleDType dtype;
std::type_index dtypeid = typeid(float);
LayoutType layout;
}; };
enum class PaddleEngineKind { enum class PaddleEngineKind {
...@@ -116,12 +128,11 @@ class PaddlePredictor { ...@@ -116,12 +128,11 @@ class PaddlePredictor {
std::string param_file; std::string param_file;
}; };
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
virtual bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data,
std::vector<int>* index_data, int batch_size = -1) = 0;
virtual void FeedData(const std::vector<void*>& inputs) = 0; virtual void FeedData(const std::vector<void*>& inputs) = 0;
virtual void GetResults(std::vector<void*>* outputs) = 0; virtual void GetResults(std::vector<void*>* outputs) = 0;
virtual void Predict_From_To(int start = 0, int end = -1) = 0; virtual void Predict_From_To(int start, int end) = 0;
virtual void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) = 0;
virtual void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) = 0;
#endif #endif
protected: protected:
......
...@@ -231,11 +231,23 @@ template <typename Device, typename T> ...@@ -231,11 +231,23 @@ template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(const std::vector<void *> &v) { void PaddleMobile<Device, T>::FeedData(const std::vector<void *> &v) {
executor_->FeedData(v); executor_->FeedData(v);
}; };
template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(
const std::vector<framework::Tensor> &v) {
executor_->FeedData(v);
};
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobile<Device, T>::GetResults(std::vector<void *> *v) { void PaddleMobile<Device, T>::GetResults(std::vector<void *> *v) {
executor_->GetResults(v); executor_->GetResults(v);
} }
template <typename Device, typename T>
void PaddleMobile<Device, T>::GetResults(std::vector<framework::Tensor *> *v) {
executor_->GetResults(v);
}
template <typename Device, typename T> template <typename Device, typename T>
std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult( std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult(
int id) { int id) {
......
...@@ -91,7 +91,11 @@ class PaddleMobile { ...@@ -91,7 +91,11 @@ class PaddleMobile {
void InjectVariable(const framework::Tensor &t, std::string var_name); void InjectVariable(const framework::Tensor &t, std::string var_name);
void FeedData(const framework::Tensor &t); void FeedData(const framework::Tensor &t);
void FeedData(const std::vector<void *> &v); void FeedData(const std::vector<void *> &v);
void FeedData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v); void GetResults(std::vector<void *> *v);
void GetResults(std::vector<framework::Tensor *> *v);
std::shared_ptr<framework::Tensor> FetchResult(int id = -1); std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1); void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start); void Predict_From(int start);
......
...@@ -80,6 +80,9 @@ if (CON GREATER -1) ...@@ -80,6 +80,9 @@ if (CON GREATER -1)
ADD_EXECUTABLE(test-marker fpga/test_marker.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-marker fpga/test_marker.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-marker paddle-mobile) target_link_libraries(test-marker paddle-mobile)
ADD_EXECUTABLE(test-rfcn-api fpga/test_rfcn_api.cpp)
target_link_libraries(test-rfcn-api paddle-mobile)
set(FOUND_MATCH ON) set(FOUND_MATCH ON)
endif () endif ()
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <fstream>
#include "../../src/io/paddle_inference_api.h"
using namespace paddle_mobile;
using namespace paddle_mobile::fpga;
static const char *g_image = "../models/rfcn/data.bin";
static const char *g_model = "../models/rfcn/model";
static const char *g_param = "../models/rfcn/params";
void readStream(std::string filename, char *buf) {
std::ifstream in;
in.open(filename, std::ios::in | std::ios::binary);
if (!in.is_open()) {
std::cout << "open File Failed." << std::endl;
return;
}
in.seekg(0, std::ios::end); // go to the end
auto length = in.tellg(); // report location (this is the length)
in.seekg(0, std::ios::beg); // go back to the beginning
in.read(buf, length);
in.close();
}
PaddleMobileConfig GetConfig() {
PaddleMobileConfig config;
config.precision = PaddleMobileConfig::FP32;
config.device = PaddleMobileConfig::kFPGA;
config.prog_file = g_model;
config.param_file = g_param;
config.thread_num = 1;
config.batch_size = 1;
config.optimize = true;
config.lod_mode = true;
config.quantification = false;
return config;
}
int main() {
open_device();
PaddleMobileConfig config = GetConfig();
auto predictor =
CreatePaddlePredictor<PaddleMobileConfig,
PaddleEngineKind::kPaddleMobile>(config);
std::cout << "after loading model" << std::endl;
float img_info[3] = {768, 1536, 768.0f / 960.0f};
int img_length = 768 * 1536 * 3;
auto img = reinterpret_cast<float *>(fpga_malloc(img_length * sizeof(float)));
readStream(g_image, reinterpret_cast<char *>(img));
std::cout << "after initializing data" << std::endl;
/*
predictor->FeedData({img_info, img});
predictor->Predict_From_To(0, -1);
std::cout << " Finishing predicting " << std::endl;
std::vector<void *> v(3, nullptr);
predictor->GetResults(&v);
int post_nms = 300;
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 8; i ++){
std:: cout << ((float*)(v[0]))[num * 8 + i] << std::endl;
}
}
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 8; i ++){
std:: cout << ((float*)(v[1]))[num * 8 + i] << std::endl;
}
}
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 4; i ++){
std:: cout << ((float*)(v[2]))[num * 4 + i] << std::endl;
}
}
*/
struct PaddleTensor t_img_info, t_img;
t_img_info.dtype = FLOAT32;
t_img_info.layout = LAYOUT_HWC;
t_img_info.shape = std::vector<int>({1,3});
t_img_info.name = "Image information";
t_img_info.data.Reset(img_info, 3 * sizeof(float));
t_img.dtype = FLOAT32;
t_img.layout = LAYOUT_HWC;
t_img.shape = std::vector<int>({1,768, 1536, 3});
t_img.name = "Image information";
t_img.data.Reset(img, img_length * sizeof(float));
predictor->FeedPaddleTensors({t_img_info, t_img});
std::cout << "Finishing feeding data " << std::endl;
predictor->Predict_From_To(0, -1);
std::cout << "Finishing predicting " << std::endl;
std::vector<PaddleTensor> v(3, PaddleTensor());
predictor->FetchPaddleTensors(&v);
auto post_nms = v[0].data.length()/sizeof(float)/8;
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 8; i ++){
auto p = reinterpret_cast<float*>(v[0].data.data());
std:: cout << p[num * 8 + i] << std::endl;
}
}
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 8; i ++){
auto p = reinterpret_cast<float*>(v[1].data.data());
std:: cout << p[num * 8 + i] << std::endl;
}
}
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 4; i ++){
auto p = reinterpret_cast<float*>(v[2].data.data());
std:: cout << p[num * 4 + i] << std::endl;
}
}
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册