提交 add0c1eb 编写于 作者: Z zhangyang0701

support multi PaddleTensor for FPGA track

上级 70baa872
......@@ -32,7 +32,6 @@ void format_image(framework::Tensor *image_tensor) {
float *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
image::format_image(&p_data, channel, height, width);
if (p_data != data_ptr && external_ptr == nullptr) {
image_tensor->reset_data_ptr(p_data);
}
......@@ -61,6 +60,7 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) {
memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(half));
ofm_tensor->fpga_data_num = memory_size / sizeof(half);
}
void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) {
......@@ -79,7 +79,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) {
memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(half));
ofm_tensor->fpga_data_num = memory_size / sizeof(half);
}
void format_fp32_ofm(framework::Tensor *ofm_tensor) {
auto dims = ofm_tensor->dims();
size_t memory_size = 0;
......@@ -96,6 +98,7 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) {
memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(float));
ofm_tensor->fpga_data_num = memory_size / sizeof(float);
}
float filter_find_max(framework::Tensor *filter_tensor) {
......
......@@ -475,6 +475,19 @@ void Executor<Device, T>::FeedData(const std::vector<void *> &v) {
}
}
template <typename Device, typename T>
void Executor<Device, T>::FeedData(const vector<framework::Tensor> &v) {
auto input_size = v.size();
auto vars = program_.scope->VarContain("feed");
PADDLE_MOBILE_ENFORCE(input_size == vars.size(),
"input data number not correct");
for (int i = 0; i < input_size; i++) {
auto var = program_.scope->Var("feed", i);
auto feed_tensor = var->template GetMutable<LoDTensor>();
feed_tensor->ShareDataWith(v[i]);
}
}
template <typename Device, typename T>
void Executor<Device, T>::GetResults(std::vector<void *> *v) {
auto output_size = v->size();
......@@ -489,6 +502,20 @@ void Executor<Device, T>::GetResults(std::vector<void *> *v) {
}
}
template <typename Device, typename T>
void Executor<Device, T>::GetResults(std::vector<framework::Tensor *> *v) {
auto output_size = v->size();
PADDLE_MOBILE_ENFORCE(output_size > 0, "Empty output");
auto vars = program_.scope->VarContain("fetch");
PADDLE_MOBILE_ENFORCE(output_size == vars.size(),
"output data number not correct");
for (int i = 0; i < output_size; i++) {
auto var = program_.scope->Var("fetch", i);
auto fetch_tensor = var->template GetMutable<LoDTensor>();
(*v)[i] = fetch_tensor;
}
}
template <typename Device, typename T>
std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) {
auto &ops = ops_of_block_[0];
......
......@@ -53,7 +53,11 @@ class Executor {
void InjectVariable(const Tensor &t, std::string var_name);
void FeedData(const Tensor &t);
void FeedData(const std::vector<void *> &v);
void FeedData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v);
void GetResults(std::vector<framework::Tensor *> *v);
std::shared_ptr<Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start);
......
......@@ -31,6 +31,11 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
enum LayoutType {
LAYOUT_CHW = 1,
LAYOUT_HWC = 0,
};
class LoDTensor;
class Tensor : public TensorBase {
......@@ -223,6 +228,8 @@ class Tensor : public TensorBase {
float scale[2]; // scale[0]= MAX/127.0, scale[1]= 127.0/MAX
void *external_data = nullptr; // only used for Feed
LayoutType layout = LAYOUT_HWC;
int64_t fpga_data_num;
#endif
};
......
......@@ -113,72 +113,53 @@ bool PaddleMobilePredictor<Device, T>::Run(
}
#ifdef PADDLE_MOBILE_FPGA
template <typename Device, typename T>
bool PaddleMobilePredictor<Device, T>::Run(
const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, std::vector<int> *index_data,
int batch_size) {
if (inputs.empty()) {
LOG(kLOG_ERROR) << "At least one output should be set with tensors' names.";
return false;
}
auto input = inputs[0];
if (input.shape.size() != 4) {
LOG(kLOG_ERROR) << "input shape not equal to 4!";
return false;
}
std::vector<int64_t> dims;
for (auto d : input.shape) {
dims.push_back(static_cast<int64_t>(d));
}
// use tensor
framework::DDim ddim =
framework::make_ddim({dims[0], dims[1], dims[2], dims[3]});
void ConvertPaddleTensors(const PaddleTensor &src, framework::Tensor *des) {
des->Resize(framework::make_ddim(src.shape));
des->external_data = src.data.data();
des->set_type(src.dtypeid);
des->layout =
src.layout == LAYOUT_HWC ? framework::LAYOUT_HWC : framework::LAYOUT_CHW;
}
framework::Tensor input_tensor;
input_tensor.Resize(ddim);
int input_length = framework::product(ddim);
auto input_ptr = input_tensor.mutable_data<T>();
void ConvertTensors(const framework::Tensor &src, PaddleTensor *des) {
des->shape = framework::vectorize2int(src.dims());
des->dtypeid = src.type();
des->layout = src.layout == framework::LAYOUT_HWC ? LAYOUT_HWC : LAYOUT_CHW;
memcpy(input_ptr, static_cast<T *>(input.data.data()),
input_length * sizeof(T));
paddle_mobile_->Predict(input_tensor);
auto num_result = index_data->size();
if (output_data->size() != num_result) {
LOG(kLOG_ERROR) << "index and output number don't match";
return false;
auto num = src.numel();
if (src.type() == typeid(float)) {
des->data.Reset(const_cast<float *>(src.data<float>()),
num * sizeof(float));
} else {
des->data.Reset(const_cast<int16_t *>(src.data<int16_t>()),
num * sizeof(int16_t));
}
}
for (int i = 0; i < num_result; i++) {
auto output_tensor = paddle_mobile_->FetchResult((*index_data)[i]);
if (output_data->empty()) {
LOG(kLOG_ERROR)
<< "At least one output should be set with tensors' names.";
return false;
}
auto &output = (*output_data)[i];
int output_length = output_tensor->numel();
std::vector<int64_t> tensor_shape =
framework::vectorize(output_tensor->dims());
for (auto d : tensor_shape) {
output.shape.push_back(static_cast<int>(d));
}
if (output.data.length() < output_length * sizeof(T)) {
output.data.Resize(output_length * sizeof(T));
}
memcpy(output.data.data(), output_tensor->template data<T>(),
output_length * sizeof(T));
template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
const std::vector<PaddleTensor> &inputs) {
auto num = inputs.size();
std::vector<framework::Tensor> tensors(num, framework::Tensor());
for (int i = 0; i < num; i++) {
tensors[i].init(typeid(float));
ConvertPaddleTensors(inputs[i], &tensors[i]);
}
paddle_mobile_->FeedData(tensors);
}
return true;
template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::FetchPaddleTensors(
std::vector<PaddleTensor> *outputs) {
auto num = outputs->size();
PADDLE_MOBILE_ENFORCE(num > 0, "0 output pointers is not permitted");
std::vector<framework::Tensor *> tensors(num, nullptr);
paddle_mobile_->GetResults(&tensors);
for (int i = 0; i < num; i++) {
ConvertTensors(*tensors[i], &(*outputs)[i]);
}
}
template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::FeedData(
const std::vector<void *> &inputs) {
......
......@@ -32,13 +32,13 @@ class PaddleMobilePredictor : public PaddlePredictor {
std::vector<PaddleTensor>* output_data,
int batch_size = -1) override;
#ifdef PADDLE_MOBILE_FPGA
bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data, std::vector<int>* index_data,
int batch_size = -1) override;
void FeedData(const std::vector<void*>& inputs) override;
void GetResults(std::vector<void*>* outputs) override;
void Predict_From_To(int start = 0, int end = -1) override;
void Predict_From_To(int start, int end) override;
void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) override;
void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) override;
#endif
~PaddleMobilePredictor() override;
private:
......
......@@ -24,6 +24,7 @@ limitations under the License. */
#include <cassert>
#include <memory>
#include <string>
#include <typeindex>
#include <vector>
// #define PADDLE_MOBILE_FPGA
......@@ -33,12 +34,21 @@ namespace paddle_mobile {
#ifdef PADDLE_MOBILE_FPGA
namespace fpga {
int open_device();
}
void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
} // namespace fpga
#endif
enum PaddleDType {
FLOAT32,
FLOAT16,
INT64,
INT8,
};
enum LayoutType {
LAYOUT_CHW = 1,
LAYOUT_HWC = 0,
};
class PaddleBuf {
......@@ -78,6 +88,8 @@ struct PaddleTensor {
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
PaddleBuf data; // blob of data.
PaddleDType dtype;
std::type_index dtypeid = typeid(float);
LayoutType layout;
};
enum class PaddleEngineKind {
......@@ -116,12 +128,11 @@ class PaddlePredictor {
std::string param_file;
};
#ifdef PADDLE_MOBILE_FPGA
virtual bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data,
std::vector<int>* index_data, int batch_size = -1) = 0;
virtual void FeedData(const std::vector<void*>& inputs) = 0;
virtual void GetResults(std::vector<void*>* outputs) = 0;
virtual void Predict_From_To(int start = 0, int end = -1) = 0;
virtual void Predict_From_To(int start, int end) = 0;
virtual void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) = 0;
virtual void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) = 0;
#endif
protected:
......
......@@ -231,11 +231,23 @@ template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(const std::vector<void *> &v) {
executor_->FeedData(v);
};
template <typename Device, typename T>
void PaddleMobile<Device, T>::FeedData(
const std::vector<framework::Tensor> &v) {
executor_->FeedData(v);
};
template <typename Device, typename T>
void PaddleMobile<Device, T>::GetResults(std::vector<void *> *v) {
executor_->GetResults(v);
}
template <typename Device, typename T>
void PaddleMobile<Device, T>::GetResults(std::vector<framework::Tensor *> *v) {
executor_->GetResults(v);
}
template <typename Device, typename T>
std::shared_ptr<framework::Tensor> PaddleMobile<Device, T>::FetchResult(
int id) {
......
......@@ -91,7 +91,11 @@ class PaddleMobile {
void InjectVariable(const framework::Tensor &t, std::string var_name);
void FeedData(const framework::Tensor &t);
void FeedData(const std::vector<void *> &v);
void FeedData(const std::vector<framework::Tensor> &v);
void GetResults(std::vector<void *> *v);
void GetResults(std::vector<framework::Tensor *> *v);
std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start);
......
......@@ -80,6 +80,9 @@ if (CON GREATER -1)
ADD_EXECUTABLE(test-marker fpga/test_marker.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-marker paddle-mobile)
ADD_EXECUTABLE(test-rfcn-api fpga/test_rfcn_api.cpp)
target_link_libraries(test-rfcn-api paddle-mobile)
set(FOUND_MATCH ON)
endif ()
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <fstream>
#include "../../src/io/paddle_inference_api.h"
using namespace paddle_mobile;
using namespace paddle_mobile::fpga;
static const char *g_image = "../models/rfcn/data.bin";
static const char *g_model = "../models/rfcn/model";
static const char *g_param = "../models/rfcn/params";
void readStream(std::string filename, char *buf) {
std::ifstream in;
in.open(filename, std::ios::in | std::ios::binary);
if (!in.is_open()) {
std::cout << "open File Failed." << std::endl;
return;
}
in.seekg(0, std::ios::end); // go to the end
auto length = in.tellg(); // report location (this is the length)
in.seekg(0, std::ios::beg); // go back to the beginning
in.read(buf, length);
in.close();
}
PaddleMobileConfig GetConfig() {
PaddleMobileConfig config;
config.precision = PaddleMobileConfig::FP32;
config.device = PaddleMobileConfig::kFPGA;
config.prog_file = g_model;
config.param_file = g_param;
config.thread_num = 1;
config.batch_size = 1;
config.optimize = true;
config.lod_mode = true;
config.quantification = false;
return config;
}
int main() {
open_device();
PaddleMobileConfig config = GetConfig();
auto predictor =
CreatePaddlePredictor<PaddleMobileConfig,
PaddleEngineKind::kPaddleMobile>(config);
std::cout << "after loading model" << std::endl;
float img_info[3] = {768, 1536, 768.0f / 960.0f};
int img_length = 768 * 1536 * 3;
auto img = reinterpret_cast<float *>(fpga_malloc(img_length * sizeof(float)));
readStream(g_image, reinterpret_cast<char *>(img));
std::cout << "after initializing data" << std::endl;
/*
predictor->FeedData({img_info, img});
predictor->Predict_From_To(0, -1);
std::cout << " Finishing predicting " << std::endl;
std::vector<void *> v(3, nullptr);
predictor->GetResults(&v);
int post_nms = 300;
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 8; i ++){
std:: cout << ((float*)(v[0]))[num * 8 + i] << std::endl;
}
}
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 8; i ++){
std:: cout << ((float*)(v[1]))[num * 8 + i] << std::endl;
}
}
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 4; i ++){
std:: cout << ((float*)(v[2]))[num * 4 + i] << std::endl;
}
}
*/
struct PaddleTensor t_img_info, t_img;
t_img_info.dtype = FLOAT32;
t_img_info.layout = LAYOUT_HWC;
t_img_info.shape = std::vector<int>({1,3});
t_img_info.name = "Image information";
t_img_info.data.Reset(img_info, 3 * sizeof(float));
t_img.dtype = FLOAT32;
t_img.layout = LAYOUT_HWC;
t_img.shape = std::vector<int>({1,768, 1536, 3});
t_img.name = "Image information";
t_img.data.Reset(img, img_length * sizeof(float));
predictor->FeedPaddleTensors({t_img_info, t_img});
std::cout << "Finishing feeding data " << std::endl;
predictor->Predict_From_To(0, -1);
std::cout << "Finishing predicting " << std::endl;
std::vector<PaddleTensor> v(3, PaddleTensor());
predictor->FetchPaddleTensors(&v);
auto post_nms = v[0].data.length()/sizeof(float)/8;
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 8; i ++){
auto p = reinterpret_cast<float*>(v[0].data.data());
std:: cout << p[num * 8 + i] << std::endl;
}
}
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 8; i ++){
auto p = reinterpret_cast<float*>(v[1].data.data());
std:: cout << p[num * 8 + i] << std::endl;
}
}
for (int num = 0; num < post_nms; num ++){
for (int i = 0; i < 4; i ++){
auto p = reinterpret_cast<float*>(v[2].data.data());
std:: cout << p[num * 4 + i] << std::endl;
}
}
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册