提交 294e346f 编写于 作者: H hjchen2

Make fpga/gpu compiles, and disable memory optimize if compiled with gpu

上级 7701ac0d
...@@ -24,6 +24,7 @@ typedef enum { ...@@ -24,6 +24,7 @@ typedef enum {
_void = 0, _void = 0,
_float, _float,
_int, _int,
_uint16_t,
_double, _double,
_int64_t, _int64_t,
_size_t, _size_t,
...@@ -64,6 +65,9 @@ typedef enum { ...@@ -64,6 +65,9 @@ typedef enum {
_dim7, _dim7,
_dim8, _dim8,
_dim9, _dim9,
#ifdef PADDLE_MOBILE_CL
_cl_image,
#endif
} kTypeId_t; } kTypeId_t;
template <typename T> template <typename T>
...@@ -83,15 +87,18 @@ struct type_id { ...@@ -83,15 +87,18 @@ struct type_id {
} }
}; };
template <typename T> #define OVERIDE_TYPEID_OPERATOR(oprand) \
inline bool operator==(const kTypeId_t &t0, const type_id<T> &t1) { template <typename T> \
return t0 == t1.hash_code(); inline bool operator oprand(const kTypeId_t &t0, const type_id<T> &t1) { \
} return t0 oprand t1.hash_code(); \
} \
template <typename T> \
inline bool operator oprand(const type_id<T> &t0, const kTypeId_t &t1) { \
return t1 oprand t0.hash_code(); \
}
template <typename T> OVERIDE_TYPEID_OPERATOR(==)
inline bool operator==(const type_id<T> &t0, const kTypeId_t &t1) { OVERIDE_TYPEID_OPERATOR(!=)
return t1 == t0.hash_code();
}
namespace framework { namespace framework {
class BlockDesc; class BlockDesc;
...@@ -99,6 +106,9 @@ class Tensor; ...@@ -99,6 +106,9 @@ class Tensor;
class LoDTensor; class LoDTensor;
class SelectedRows; class SelectedRows;
class Scope; class Scope;
#ifdef PADDLE_MOBILE_CL
class CLImage;
#endif
template <int> template <int>
struct Dim; struct Dim;
...@@ -114,6 +124,7 @@ struct Dim; ...@@ -114,6 +124,7 @@ struct Dim;
REGISTER_TYPE_ID(void, _void) REGISTER_TYPE_ID(void, _void)
REGISTER_TYPE_ID(float, _float) REGISTER_TYPE_ID(float, _float)
REGISTER_TYPE_ID(int, _int) REGISTER_TYPE_ID(int, _int)
REGISTER_TYPE_ID(uint16_t, _uint16_t)
REGISTER_TYPE_ID(double, _double) REGISTER_TYPE_ID(double, _double)
REGISTER_TYPE_ID(int64_t, _int64_t) REGISTER_TYPE_ID(int64_t, _int64_t)
REGISTER_TYPE_ID(size_t, _size_t) REGISTER_TYPE_ID(size_t, _size_t)
...@@ -159,6 +170,9 @@ REGISTER_TYPE_ID(framework::Dim<7>, _dim7) ...@@ -159,6 +170,9 @@ REGISTER_TYPE_ID(framework::Dim<7>, _dim7)
REGISTER_TYPE_ID(framework::Dim<8>, _dim8) REGISTER_TYPE_ID(framework::Dim<8>, _dim8)
REGISTER_TYPE_ID(framework::Dim<9>, _dim9) REGISTER_TYPE_ID(framework::Dim<9>, _dim9)
#ifdef PADDLE_MOBILE_CL
REGISTER_TYPE_ID(framework::CLImage, _cl_image)
#endif
} // namespace paddle_mobile } // namespace paddle_mobile
namespace std { namespace std {
......
...@@ -28,8 +28,8 @@ namespace fpga { ...@@ -28,8 +28,8 @@ namespace fpga {
void format_image(framework::Tensor *image_tensor) { void format_image(framework::Tensor *image_tensor) {
auto dims = image_tensor->dims(); auto dims = image_tensor->dims();
auto channel = dims[1], height = dims[2], width = dims[3]; auto channel = dims[1], height = dims[2], width = dims[3];
std::type_index input_type = image_tensor->type(); kTypeId_t input_type = image_tensor->type();
if (input_type == typeid(float)) { if (input_type == type_id<float>()) {
auto data_ptr = image_tensor->data<float>(); auto data_ptr = image_tensor->data<float>();
auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data); auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data);
float *p_data = external_ptr == nullptr ? data_ptr : external_ptr; float *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
...@@ -51,7 +51,7 @@ void format_image(framework::Tensor *image_tensor) { ...@@ -51,7 +51,7 @@ void format_image(framework::Tensor *image_tensor) {
} }
void format_ofm(framework::Tensor *ofm_tensor) { void format_ofm(framework::Tensor *ofm_tensor) {
if (ofm_tensor->type() == typeid(float)) { if (ofm_tensor->type() == type_id<float>()) {
format_fp32_ofm(ofm_tensor); format_fp32_ofm(ofm_tensor);
} else { } else {
format_fp16_ofm(ofm_tensor); format_fp16_ofm(ofm_tensor);
...@@ -72,7 +72,7 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) { ...@@ -72,7 +72,7 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) {
auto p = fpga_malloc(memory_size); auto p = fpga_malloc(memory_size);
// memset(p, 0, memory_size); // memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(half)); ofm_tensor->set_type(type_id<half>().hash_code());
ofm_tensor->fpga_data_num = memory_size / sizeof(half); ofm_tensor->fpga_data_num = memory_size / sizeof(half);
fpga::fpga_flush(p, memory_size); fpga::fpga_flush(p, memory_size);
} }
...@@ -92,7 +92,7 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) { ...@@ -92,7 +92,7 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor, framework::DDim dims) {
auto p = fpga_malloc(memory_size); auto p = fpga_malloc(memory_size);
// memset(p, 0, memory_size); // memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(half)); ofm_tensor->set_type(type_id<half>().hash_code());
ofm_tensor->fpga_data_num = memory_size / sizeof(half); ofm_tensor->fpga_data_num = memory_size / sizeof(half);
fpga::fpga_flush(p, memory_size); fpga::fpga_flush(p, memory_size);
} }
...@@ -112,7 +112,7 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) { ...@@ -112,7 +112,7 @@ void format_fp32_ofm(framework::Tensor *ofm_tensor) {
auto p = fpga_malloc(memory_size); auto p = fpga_malloc(memory_size);
// memset(p, 0, memory_size); // memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p); ofm_tensor->reset_data_ptr(p);
ofm_tensor->set_type(typeid(float)); ofm_tensor->set_type(type_id<float>().hash_code());
ofm_tensor->fpga_data_num = memory_size / sizeof(float); ofm_tensor->fpga_data_num = memory_size / sizeof(float);
fpga::fpga_flush(p, memory_size); fpga::fpga_flush(p, memory_size);
} }
...@@ -171,7 +171,7 @@ void format_filter(framework::Tensor *filter_tensor, float max_value, ...@@ -171,7 +171,7 @@ void format_filter(framework::Tensor *filter_tensor, float max_value,
filter::format_filter(&new_data, num, channel, height, width, group_num, filter::format_filter(&new_data, num, channel, height, width, group_num,
max_value); max_value);
filter_tensor->reset_data_ptr(new_data); filter_tensor->reset_data_ptr(new_data);
filter_tensor->set_type(typeid(int8_t)); filter_tensor->set_type(type_id<int8_t>().hash_code());
} }
void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) { void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) {
auto dims = filter_tensor->dims(); auto dims = filter_tensor->dims();
...@@ -182,7 +182,7 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) { ...@@ -182,7 +182,7 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) {
fpga_copy(new_data, data_ptr, memory_size); fpga_copy(new_data, data_ptr, memory_size);
filter::format_dwconv_filter(&new_data, num, height, width, scale_ptr); filter::format_dwconv_filter(&new_data, num, height, width, scale_ptr);
filter_tensor->reset_data_ptr(new_data); filter_tensor->reset_data_ptr(new_data);
filter_tensor->set_type(typeid(int16_t)); filter_tensor->set_type(type_id<int16_t>().hash_code());
} }
void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr, void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
...@@ -207,7 +207,7 @@ void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr, ...@@ -207,7 +207,7 @@ void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
// framework::make_ddim({num, 1, height, width}); // framework::make_ddim({num, 1, height, width});
// filter_tensor->Resize(dims_new); // filter_tensor->Resize(dims_new);
filter_tensor->reset_data_ptr(new_data); filter_tensor->reset_data_ptr(new_data);
filter_tensor->set_type(typeid(int16_t)); filter_tensor->set_type(type_id<int16_t>().hash_code());
} }
void format_fc_filter(framework::Tensor *filter_tensor, float max_value) { void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {
...@@ -222,7 +222,7 @@ void format_fc_filter(framework::Tensor *filter_tensor, float max_value) { ...@@ -222,7 +222,7 @@ void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {
filter::format_fc_filter(&new_data, num, channel, height, width, 1, filter::format_fc_filter(&new_data, num, channel, height, width, 1,
max_value); max_value);
filter_tensor->reset_data_ptr(new_data); filter_tensor->reset_data_ptr(new_data);
filter_tensor->set_type(typeid(int8_t)); filter_tensor->set_type(type_id<int8_t>().hash_code());
} }
void format_deconv_filter(framework::Tensor *filter_tensor, float max_value, void format_deconv_filter(framework::Tensor *filter_tensor, float max_value,
int group_num, int stride) { int group_num, int stride) {
...@@ -249,7 +249,7 @@ void format_deconv_filter(framework::Tensor *filter_tensor, float max_value, ...@@ -249,7 +249,7 @@ void format_deconv_filter(framework::Tensor *filter_tensor, float max_value,
framework::make_ddim({num, channel, height, width}); framework::make_ddim({num, channel, height, width});
filter_tensor->Resize(dims_new); filter_tensor->Resize(dims_new);
filter_tensor->reset_data_ptr(new_data); filter_tensor->reset_data_ptr(new_data);
filter_tensor->set_type(typeid(int8_t)); filter_tensor->set_type(type_id<int8_t>().hash_code());
} }
void format_bias_scale_array(float **bias_scale_array, void format_bias_scale_array(float **bias_scale_array,
...@@ -273,7 +273,7 @@ void format_concat_output(framework::Tensor *out, int height, int width, ...@@ -273,7 +273,7 @@ void format_concat_output(framework::Tensor *out, int height, int width,
auto ddim = framework::make_ddim({1, sum_channel, height, width}); auto ddim = framework::make_ddim({1, sum_channel, height, width});
out->Resize(ddim); out->Resize(ddim);
out->reset_data_ptr(data_ptr); out->reset_data_ptr(data_ptr);
out->set_type(typeid(half)); out->set_type(type_id<half>().hash_code());
} }
void format_conv_data(framework::Tensor *filter_tensor, void format_conv_data(framework::Tensor *filter_tensor,
framework::Tensor *ofm_tensor, float **bs_ptr, framework::Tensor *ofm_tensor, float **bs_ptr,
......
...@@ -53,12 +53,12 @@ class CLTensor : TensorBase { ...@@ -53,12 +53,12 @@ class CLTensor : TensorBase {
int64_t size = numel() * sizeof(T); int64_t size = numel() * sizeof(T);
holder_.reset(new PlaceholderImpl( holder_.reset(new PlaceholderImpl(
size, reinterpret_cast<void *>(const_cast<T *>(data)), type_id<T>(), size, reinterpret_cast<void *>(const_cast<T *>(data)),
context_, command_queue_)); type_id<T>().hash_code(), context_, command_queue_));
return reinterpret_cast<cl_mem>(holder_->ptr()); return reinterpret_cast<cl_mem>(holder_->ptr());
} }
inline cl_mem mutable_data(std::string type) { inline cl_mem mutable_data(kTypeId_t type) {
if (holder_ != nullptr) { if (holder_ != nullptr) {
holder_->set_type(type); holder_->set_type(type);
} }
...@@ -77,7 +77,7 @@ class CLTensor : TensorBase { ...@@ -77,7 +77,7 @@ class CLTensor : TensorBase {
*/ */
template <typename T> template <typename T>
inline cl_mem mutable_data() { inline cl_mem mutable_data() {
return reinterpret_cast<cl_mem>(mutable_data(type_id<T>())); return reinterpret_cast<cl_mem>(mutable_data(type_id<T>().hash_code()));
} }
/** /**
...@@ -132,7 +132,7 @@ class CLTensor : TensorBase { ...@@ -132,7 +132,7 @@ class CLTensor : TensorBase {
void *host_ptr_ = nullptr; void *host_ptr_ = nullptr;
struct PlaceholderImpl : public Placeholder { struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(size_t size, void *input, std::string type, PlaceholderImpl(size_t size, void *input, kTypeId_t type,
cl_context context, cl_command_queue command_queue) cl_context context, cl_command_queue command_queue)
: ptr_(clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, : ptr_(clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
size, reinterpret_cast<void *>(input), NULL)), size, reinterpret_cast<void *>(input), NULL)),
...@@ -142,7 +142,7 @@ class CLTensor : TensorBase { ...@@ -142,7 +142,7 @@ class CLTensor : TensorBase {
context_(context), context_(context),
command_queue_(command_queue) {} command_queue_(command_queue) {}
PlaceholderImpl(size_t size, std::string type, cl_context context, PlaceholderImpl(size_t size, kTypeId_t type, cl_context context,
cl_command_queue command_queue) cl_command_queue command_queue)
: ptr_(clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, NULL)), : ptr_(clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, NULL)),
size_(size), size_(size),
...@@ -155,9 +155,9 @@ class CLTensor : TensorBase { ...@@ -155,9 +155,9 @@ class CLTensor : TensorBase {
virtual void *ptr() const { return static_cast<void *>(ptr_.get()); } virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }
virtual std::string type() const { return type_; } virtual kTypeId_t type() const { return type_; }
virtual void set_type(std::string type) { type_ = type; } virtual void set_type(kTypeId_t type) { type_ = type; }
virtual void resize(size_t size) { virtual void resize(size_t size) {
if (size > capatity_) { if (size > capatity_) {
...@@ -175,7 +175,7 @@ class CLTensor : TensorBase { ...@@ -175,7 +175,7 @@ class CLTensor : TensorBase {
size_t capatity_; size_t capatity_;
/* the current type of memory */ /* the current type of memory */
std::string type_; kTypeId_t type_;
cl_context context_; cl_context context_;
cl_command_queue command_queue_; cl_command_queue command_queue_;
......
...@@ -62,8 +62,8 @@ Executor<Device, T>::Executor(const Program<Device> &program, ...@@ -62,8 +62,8 @@ Executor<Device, T>::Executor(const Program<Device> &program,
use_optimize_ ? program_.optimizeProgram : program_.originProgram; use_optimize_ ? program_.optimizeProgram : program_.originProgram;
PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr, PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr,
"program_desc_ should not be nullptr"); "program_desc_ should not be nullptr");
#ifndef PADDLE_MOBILE_FPGA #if !defined(PADDLE_MOBILE_FPGA) && !defined(PADDLE_MOBILE_CL)
// pass::MemoryOptPass()(program_desc_.get(), program_.scope.get()); pass::MemoryOptPass()(program_desc_.get(), program_.scope.get());
#endif #endif
// resize feed and fetch list // resize feed and fetch list
// should init feed and fetch variables before infer shape // should init feed and fetch variables before infer shape
...@@ -302,7 +302,7 @@ bool Executor<Device, T>::varInputMemory( ...@@ -302,7 +302,7 @@ bool Executor<Device, T>::varInputMemory(
const std::shared_ptr<VarDesc> &var_desc, Variable *var) const { const std::shared_ptr<VarDesc> &var_desc, Variable *var) const {
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
framework::LoDTensor *tensor = var->template GetMutable<LoDTensor>(); framework::LoDTensor *tensor = var->template GetMutable<LoDTensor>();
tensor->init(type_id<float>()); tensor->init(type_id<float>().hash_code());
return true; return true;
#endif #endif
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <vector>
#include "framework/framework.pb-c.h" #include "framework/framework.pb-c.h"
#include "framework/program/op_desc.h" #include "framework/program/op_desc.h"
#include "framework/program/var_desc.h" #include "framework/program/var_desc.h"
...@@ -26,8 +28,8 @@ class BlockDesc { ...@@ -26,8 +28,8 @@ class BlockDesc {
friend class Node; friend class Node;
friend class ProgramOptimize; friend class ProgramOptimize;
BlockDesc() {} BlockDesc() {}
BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc); explicit BlockDesc(PaddleMobile__Framework__Proto__BlockDesc *desc);
BlockDesc(const BlockDesc &block_desc) explicit BlockDesc(const BlockDesc &block_desc)
: index_(block_desc.index_), parent_index_(block_desc.parent_index_) { : index_(block_desc.index_), parent_index_(block_desc.parent_index_) {
for (auto &op_desc : block_desc.ops_) { for (auto &op_desc : block_desc.ops_) {
std::shared_ptr<OpDesc> copy_op_desc = std::make_shared<OpDesc>(*op_desc); std::shared_ptr<OpDesc> copy_op_desc = std::make_shared<OpDesc>(*op_desc);
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <cinttypes> #include <cinttypes>
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
......
...@@ -143,7 +143,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors( ...@@ -143,7 +143,7 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
auto num = inputs.size(); auto num = inputs.size();
std::vector<framework::Tensor> tensors(num, framework::Tensor()); std::vector<framework::Tensor> tensors(num, framework::Tensor());
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
tensors[i].init(type_id<float>()); tensors[i].init(type_id<float>().hash_code());
ConvertPaddleTensors(inputs[i], &tensors[i]); ConvertPaddleTensors(inputs[i], &tensors[i]);
} }
paddle_mobile_->FeedTensorData(tensors); paddle_mobile_->FeedTensorData(tensors);
......
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "common/type_define.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -87,6 +88,7 @@ struct PaddleTensor { ...@@ -87,6 +88,7 @@ struct PaddleTensor {
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed. // TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
PaddleBuf data; // blob of data. PaddleBuf data; // blob of data.
PaddleDType dtype; PaddleDType dtype;
kTypeId_t dtypeid;
LayoutType layout; LayoutType layout;
}; };
......
...@@ -25,7 +25,7 @@ template <> ...@@ -25,7 +25,7 @@ template <>
bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto *input_y = const_cast<LoDTensor *>(param->InputY()); auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out(); auto *out = param->Out();
if (input_y->type() != typeid(float)) { if (input_y->type() != type_id<float>()) {
paddle_mobile::fpga::ActivationType activation_enable = paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE; paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0; int16_t leaky_relu_negative_slope = 0;
...@@ -62,11 +62,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { ...@@ -62,11 +62,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
param->SetFpgaArgs(ewaddArgs); param->SetFpgaArgs(ewaddArgs);
} else { } else {
param->float_input_x.Resize(param->InputX()->dims()); param->float_input_x.Resize(param->InputX()->dims());
param->float_input_x.init(typeid(float)); param->float_input_x.init(type_id<float>().hash_code());
fpga::format_fp32_ofm(&(param->float_input_x)); fpga::format_fp32_ofm(&(param->float_input_x));
param->float_out.Resize(param->InputX()->dims()); param->float_out.Resize(param->InputX()->dims());
// param->float_out.init(typeid(float));
param->float_out.mutable_data<float>(param->InputX()->dims()); param->float_out.mutable_data<float>(param->InputX()->dims());
fpga::format_fp32_ofm(&(param->float_out)); fpga::format_fp32_ofm(&(param->float_out));
...@@ -118,7 +117,7 @@ template <> ...@@ -118,7 +117,7 @@ template <>
void ElementwiseAddKernel<FPGA, float>::Compute( void ElementwiseAddKernel<FPGA, float>::Compute(
const ElementwiseAddParam<FPGA> &param) { const ElementwiseAddParam<FPGA> &param) {
auto input_y = const_cast<LoDTensor *>(param.InputY()); auto input_y = const_cast<LoDTensor *>(param.InputY());
if (input_y->type() != typeid(float)) { if (input_y->type() != type_id<float>()) {
fpga::ComputeFpgaEWAdd(param.FpgaArgs()); fpga::ComputeFpgaEWAdd(param.FpgaArgs());
} else { } else {
auto input_x = const_cast<LoDTensor *>(param.InputX()); auto input_x = const_cast<LoDTensor *>(param.InputX());
......
...@@ -27,11 +27,11 @@ struct MulFunctor { ...@@ -27,11 +27,11 @@ struct MulFunctor {
template <> template <>
bool ElementwiseMulKernel<FPGA, float>::Init(ElementwiseMulParam<FPGA> *param) { bool ElementwiseMulKernel<FPGA, float>::Init(ElementwiseMulParam<FPGA> *param) {
param->float_input_x.Resize(param->InputX()->dims()); param->float_input_x.Resize(param->InputX()->dims());
param->float_input_x.init(typeid(float)); param->float_input_x.init(type_id<float>().hash_code());
fpga::format_fp32_ofm(&(param->float_input_x)); fpga::format_fp32_ofm(&(param->float_input_x));
param->float_out.Resize(param->InputX()->dims()); param->float_out.Resize(param->InputX()->dims());
param->float_out.init(typeid(float)); param->float_out.init(type_id<float>().hash_code());
fpga::format_fp32_ofm(&(param->float_out)); fpga::format_fp32_ofm(&(param->float_out));
auto *out = param->Out(); auto *out = param->Out();
......
...@@ -23,7 +23,7 @@ bool FeedKernel<FPGA, float>::Init(FeedParam<FPGA> *param) { ...@@ -23,7 +23,7 @@ bool FeedKernel<FPGA, float>::Init(FeedParam<FPGA> *param) {
int col = param->Col(); int col = param->Col();
DLOG << "col = " << col; DLOG << "col = " << col;
auto input = const_cast<LoDTensor *>(&param->InputX()->at(col)); auto input = const_cast<LoDTensor *>(&param->InputX()->at(col));
input->init(typeid(float)); input->init(type_id<float>().hash_code());
input->Resize(output->dims()); input->Resize(output->dims());
if (output->dims().size() != 4) { if (output->dims().size() != 4) {
...@@ -39,12 +39,12 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) { ...@@ -39,12 +39,12 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
auto output = param.Out(); auto output = param.Out();
int col = param.Col(); int col = param.Col();
auto input = const_cast<LoDTensor *>(&param.InputX()->at(col)); auto input = const_cast<LoDTensor *>(&param.InputX()->at(col));
std::type_index input_type = input->type(); kTypeId_t input_type = input->type();
if (input_type == typeid(float)) { if (input_type == type_id<float>()) {
input->init(typeid(float)); input->init(type_id<float>().hash_code());
} else { // input_type == typeid(int8_t) } else {
input->init(typeid(int8_t)); input->init(type_id<int8_t>().hash_code());
} }
input->Resize(output->dims()); input->Resize(output->dims());
...@@ -62,7 +62,7 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) { ...@@ -62,7 +62,7 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
fpga::format_image(input); fpga::format_image(input);
auto output_ptr = output->data<half>(); auto output_ptr = output->data<half>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP32}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
if (input_type == typeid(float)) { if (input_type == type_id<float>()) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto external_ptr = reinterpret_cast<float *>(input->external_data); auto external_ptr = reinterpret_cast<float *>(input->external_data);
float *p_data = external_ptr == nullptr ? input_ptr : external_ptr; float *p_data = external_ptr == nullptr ? input_ptr : external_ptr;
...@@ -81,7 +81,7 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) { ...@@ -81,7 +81,7 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
args.output.scale_address = output->scale; args.output.scale_address = output->scale;
fpga::PerformBypass(args); fpga::PerformBypass(args);
input->external_data = nullptr; input->external_data = nullptr;
} else { // input_type == typeid(int8_t) } else {
auto input_ptr = input->data<int8_t>(); auto input_ptr = input->data<int8_t>();
auto external_ptr = reinterpret_cast<int8_t *>(input->external_data); auto external_ptr = reinterpret_cast<int8_t *>(input->external_data);
int8_t *p_data = external_ptr == nullptr ? input_ptr : external_ptr; int8_t *p_data = external_ptr == nullptr ? input_ptr : external_ptr;
......
...@@ -21,10 +21,10 @@ bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) { ...@@ -21,10 +21,10 @@ bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
int col = param->Col(); int col = param->Col();
DLOG << "col = " << col; DLOG << "col = " << col;
auto output = &(param->Out()->at(col)); auto output = &(param->Out()->at(col));
if (input->type() == typeid(float)) { if (input->type() == type_id<float>()) {
return true; return true;
} }
output->init(typeid(float)); output->init(type_id<float>().hash_code());
output->Resize(input->dims()); output->Resize(input->dims());
fpga::format_fp32_ofm(output); fpga::format_fp32_ofm(output);
int outC = 1; int outC = 1;
...@@ -78,7 +78,7 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) { ...@@ -78,7 +78,7 @@ void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
auto input = const_cast<LoDTensor *>(param.InputX()); auto input = const_cast<LoDTensor *>(param.InputX());
int col = param.Col(); int col = param.Col();
auto output = &param.Out()->at(col); auto output = &param.Out()->at(col);
if (input->type() == typeid(float)) { if (input->type() == type_id<float>()) {
output->ShareDataWith(*input); output->ShareDataWith(*input);
return; return;
} }
......
...@@ -28,7 +28,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { ...@@ -28,7 +28,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
vector<int> paddings = param->Paddings(); vector<int> paddings = param->Paddings();
std::string pooling_type = param->PoolingType(); std::string pooling_type = param->PoolingType();
if (input->type() == typeid(float)) { if (input->type() == type_id<float>()) {
int channels = input->dims()[1]; int channels = input->dims()[1];
int height = input->dims()[2]; int height = input->dims()[2];
int width = input->dims()[3]; int width = input->dims()[3];
...@@ -70,7 +70,7 @@ template <> ...@@ -70,7 +70,7 @@ template <>
void PoolKernel<FPGA, float>::Compute(const PoolParam<FPGA> &param) { void PoolKernel<FPGA, float>::Compute(const PoolParam<FPGA> &param) {
auto *input = const_cast<LoDTensor *>(param.Input()); auto *input = const_cast<LoDTensor *>(param.Input());
if (input->type() == typeid(float)) { if (input->type() == type_id<float>()) {
auto *output = param.Output(); auto *output = param.Output();
auto in = input->data<float>(); auto in = input->data<float>();
auto N = input->dims()[0]; auto N = input->dims()[0];
......
...@@ -37,11 +37,11 @@ bool ProposalKernel<FPGA, float>::Init(ProposalParam<FPGA> *param) { ...@@ -37,11 +37,11 @@ bool ProposalKernel<FPGA, float>::Init(ProposalParam<FPGA> *param) {
param->float_bbox = std::make_shared<Tensor>(); param->float_bbox = std::make_shared<Tensor>();
param->float_bbox->Resize(param->bbox_deltas_->dims()); param->float_bbox->Resize(param->bbox_deltas_->dims());
param->float_bbox->init(typeid(float)); param->float_bbox->init(type_id<float>().hash_code());
fpga::format_fp32_ofm(param->float_bbox.get()); fpga::format_fp32_ofm(param->float_bbox.get());
param->float_score = std::make_shared<Tensor>(); param->float_score = std::make_shared<Tensor>();
param->float_score->Resize(param->scores_->dims()); param->float_score->Resize(param->scores_->dims());
param->float_score->init(typeid(float)); param->float_score->init(type_id<float>().hash_code());
fpga::format_fp32_ofm(param->float_score.get()); fpga::format_fp32_ofm(param->float_score.get());
auto input = param->bbox_deltas_; auto input = param->bbox_deltas_;
...@@ -437,7 +437,6 @@ void ProposalKernel<FPGA, float>::Compute(const ProposalParam<FPGA> &param) { ...@@ -437,7 +437,6 @@ void ProposalKernel<FPGA, float>::Compute(const ProposalParam<FPGA> &param) {
bbox_height = (uint32_t)(input_bbox->dims()[2]); bbox_height = (uint32_t)(input_bbox->dims()[2]);
bbox_width = (uint32_t)(input_bbox->dims()[3]); bbox_width = (uint32_t)(input_bbox->dims()[3]);
// score_tmp->init(typeid(half));
std::shared_ptr<Tensor> score_tmp = std::make_shared<Tensor>(); std::shared_ptr<Tensor> score_tmp = std::make_shared<Tensor>();
score_tmp->Resize(param.scores_->dims()); score_tmp->Resize(param.scores_->dims());
score_tmp->mutable_data<half>(); score_tmp->mutable_data<half>();
......
...@@ -25,7 +25,7 @@ bool SliceKernel<FPGA, float>::Init(SliceParam<FPGA>* param) { ...@@ -25,7 +25,7 @@ bool SliceKernel<FPGA, float>::Init(SliceParam<FPGA>* param) {
fpga::format_fp16_ofm(output); fpga::format_fp16_ofm(output);
DLOG << "input: " << param->input_; DLOG << "input: " << param->input_;
DLOG << "output: " << param->output_; DLOG << "output: " << param->output_;
if (param->input_->type() != typeid(half)) { if (param->input_->type() != type_id<half>()) {
DLOG << "wrong type"; DLOG << "wrong type";
} }
return true; return true;
......
...@@ -26,7 +26,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -26,7 +26,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto dims = framework::vectorize(input->dims()); auto dims = framework::vectorize(input->dims());
half *input_ptr; half *input_ptr;
auto out = param->Out(); auto out = param->Out();
if (input->type() == typeid(float)) { if (input->type() == type_id<float>()) {
out->Resize(framework::make_ddim(dims)); out->Resize(framework::make_ddim(dims));
out->mutable_data<float>(framework::make_ddim(dims)); out->mutable_data<float>(framework::make_ddim(dims));
} else { } else {
...@@ -50,7 +50,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -50,7 +50,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
if (channel != 2) { // Use CPU if (channel != 2) { // Use CPU
out->Resize(framework::make_ddim(dims)); out->Resize(framework::make_ddim(dims));
out->mutable_data<float>(framework::make_ddim(dims)); out->mutable_data<float>(framework::make_ddim(dims));
float_input->init(typeid(float)); float_input->init(type_id<float>().hash_code());
float_input->mutable_data<float>(framework::make_ddim(dims)); float_input->mutable_data<float>(framework::make_ddim(dims));
// fpga::format_fp32_ofm(float_input); // fpga::format_fp32_ofm(float_input);
// fpga::format_fp32_ofm(out); // fpga::format_fp32_ofm(out);
...@@ -91,7 +91,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -91,7 +91,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
template <> template <>
void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) { void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
auto *in_x = (param.InputX()); auto *in_x = (param.InputX());
if (in_x->type() == typeid(half)) { if (in_x->type() == type_id<half>()) {
fpga::PerformBypass(param.FpgaArgs()); fpga::PerformBypass(param.FpgaArgs());
if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) { if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) {
Tensor *out = param.Out(); Tensor *out = param.Out();
......
...@@ -104,7 +104,7 @@ void dump_stride_float(std::string filename, ...@@ -104,7 +104,7 @@ void dump_stride_float(std::string filename,
void dump_stride(std::string filename, void dump_stride(std::string filename,
paddle_mobile::PaddleTensor input_tensor) { paddle_mobile::PaddleTensor input_tensor) {
if (input_tensor.dtypeid == typeid(float)) { if (input_tensor.dtypeid == type_id<float>().hash_code()) {
dump_stride_float(filename, input_tensor); dump_stride_float(filename, input_tensor);
} else { } else {
std::cout << "only support dumping float data" << std::endl; std::cout << "only support dumping float data" << std::endl;
...@@ -156,13 +156,13 @@ int main() { ...@@ -156,13 +156,13 @@ int main() {
std::cout << "Finishing initializing data" << std::endl; std::cout << "Finishing initializing data" << std::endl;
struct PaddleTensor t_img_info, t_img; struct PaddleTensor t_img_info, t_img;
t_img_info.dtypeid = typeid(float); t_img_info.dtypeid = type_id<float>().hash_code();
t_img_info.layout = LAYOUT_HWC; t_img_info.layout = LAYOUT_HWC;
t_img_info.shape = std::vector<int>({1, 3}); t_img_info.shape = std::vector<int>({1, 3});
t_img_info.name = "Image information"; t_img_info.name = "Image information";
t_img_info.data.Reset(img_info, 3 * sizeof(float)); t_img_info.data.Reset(img_info, 3 * sizeof(float));
t_img.dtypeid = typeid(float); t_img.dtypeid = type_id<float>().hash_code();
// quantize(&img, img_length); // quantize(&img, img_length);
// t_img.dtypeid = typeid(int8_t); // t_img.dtypeid = typeid(int8_t);
t_img.layout = LAYOUT_HWC; t_img.layout = LAYOUT_HWC;
...@@ -209,7 +209,7 @@ int main() { ...@@ -209,7 +209,7 @@ int main() {
std::cout << "Finishing initializing data" << std::endl; std::cout << "Finishing initializing data" << std::endl;
struct PaddleTensor t_img1; struct PaddleTensor t_img1;
t_img1.dtypeid = typeid(float); t_img1.dtypeid = type_id<float>().hash_code();
t_img1.layout = LAYOUT_HWC; t_img1.layout = LAYOUT_HWC;
t_img1.shape = std::vector<int>({1, 14, 14, 144}); t_img1.shape = std::vector<int>({1, 14, 14, 144});
t_img1.name = "Image information"; t_img1.name = "Image information";
......
...@@ -96,7 +96,7 @@ void dump_stride_float(std::string filename, PaddleTensor input_tensor) { ...@@ -96,7 +96,7 @@ void dump_stride_float(std::string filename, PaddleTensor input_tensor) {
} }
void dump_stride(std::string filename, PaddleTensor input_tensor) { void dump_stride(std::string filename, PaddleTensor input_tensor) {
if (input_tensor.dtypeid == typeid(float)) { if (input_tensor.dtypeid == type_id<float>().hash_code()) {
dump_stride_float(filename, input_tensor); dump_stride_float(filename, input_tensor);
} else { } else {
std::cout << "only support dumping float data" << std::endl; std::cout << "only support dumping float data" << std::endl;
...@@ -131,7 +131,7 @@ int main() { ...@@ -131,7 +131,7 @@ int main() {
std::cout << "Finishing initializing data" << std::endl; std::cout << "Finishing initializing data" << std::endl;
struct PaddleTensor t_img; struct PaddleTensor t_img;
t_img.dtype = FLOAT32; t_img.dtype = FLOAT32;
t_img.dtypeid = typeid(float); t_img.dtypeid = type_id<float>().hash_code();
// quantize(&img, img_length); // quantize(&img, img_length);
// t_img.dtype = INT8; // t_img.dtype = INT8;
// t_img.dtypeid = typeid(int8_t); // t_img.dtypeid = typeid(int8_t);
......
...@@ -20,8 +20,8 @@ limitations under the License. */ ...@@ -20,8 +20,8 @@ limitations under the License. */
#include <iostream> #include <iostream>
#include "../../src/io/paddle_inference_api.h" #include "../../src/io/paddle_inference_api.h"
using namespace paddle_mobile; using namespace paddle_mobile; // NOLINT
using namespace paddle_mobile::fpga; using namespace paddle_mobile::fpga; // NOLINT
static const char *g_image = "../models/rfcn/data.bin"; static const char *g_image = "../models/rfcn/data.bin";
static const char *g_model = "../models/rfcn/model"; static const char *g_model = "../models/rfcn/model";
...@@ -86,7 +86,7 @@ int main() { ...@@ -86,7 +86,7 @@ int main() {
struct PaddleTensor t_img1; struct PaddleTensor t_img1;
t_img1.dtypeid = typeid(float); t_img1.dtypeid = type_id<float>().hash_code();
t_img1.layout = LAYOUT_HWC; t_img1.layout = LAYOUT_HWC;
t_img1.shape = std::vector<int>({1, 224, 224, 3}); t_img1.shape = std::vector<int>({1, 224, 224, 3});
t_img1.name = "Image information"; t_img1.name = "Image information";
...@@ -117,13 +117,13 @@ int main() { ...@@ -117,13 +117,13 @@ int main() {
std::cout << "Finishing initializing data" << std::endl; std::cout << "Finishing initializing data" << std::endl;
struct PaddleTensor t_img_info, t_img; struct PaddleTensor t_img_info, t_img;
t_img.dtypeid = typeid(float); t_img.dtypeid = type_id<float>().hash_code();
t_img_info.layout = LAYOUT_HWC; t_img_info.layout = LAYOUT_HWC;
t_img_info.shape = std::vector<int>({1, 3}); t_img_info.shape = std::vector<int>({1, 3});
t_img_info.name = "Image information"; t_img_info.name = "Image information";
t_img_info.data.Reset(img_info, 3 * sizeof(float)); t_img_info.data.Reset(img_info, 3 * sizeof(float));
t_img.dtypeid = typeid(float); t_img.dtypeid = type_id<float>().hash_code();
t_img.layout = LAYOUT_HWC; t_img.layout = LAYOUT_HWC;
t_img.shape = std::vector<int>({1, 432, 1280, 3}); t_img.shape = std::vector<int>({1, 432, 1280, 3});
t_img.name = "Image information"; t_img.name = "Image information";
......
...@@ -95,7 +95,7 @@ void dump_stride_float(std::string filename, PaddleTensor input_tensor) { ...@@ -95,7 +95,7 @@ void dump_stride_float(std::string filename, PaddleTensor input_tensor) {
} }
void dump_stride(std::string filename, PaddleTensor input_tensor) { void dump_stride(std::string filename, PaddleTensor input_tensor) {
if (input_tensor.dtypeid == typeid(float)) { if (input_tensor.dtypeid == type_id<float>().hash_code()) {
dump_stride_float(filename, input_tensor); dump_stride_float(filename, input_tensor);
} else { } else {
std::cout << "only support dumping float data" << std::endl; std::cout << "only support dumping float data" << std::endl;
...@@ -131,10 +131,10 @@ int main() { ...@@ -131,10 +131,10 @@ int main() {
std::cout << "Finishing initializing data" << std::endl; std::cout << "Finishing initializing data" << std::endl;
struct PaddleTensor t_img; struct PaddleTensor t_img;
// t_img.dtype = FLOAT32; // t_img.dtype = FLOAT32;
// t_img.dtypeid = typeid(float); // t_img.dtypeid = type_id<float>().hash_code();
quantize(&img, img_length); quantize(&img, img_length);
t_img.dtype = INT8; t_img.dtype = INT8;
t_img.dtypeid = typeid(int8_t); t_img.dtypeid = type_id<int8_t>().hash_code();
t_img.layout = LAYOUT_HWC; t_img.layout = LAYOUT_HWC;
t_img.shape = std::vector<int>({1, 256, 416, 3}); t_img.shape = std::vector<int>({1, 256, 416, 3});
t_img.name = "Image information"; t_img.name = "Image information";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册