提交 0720653b 编写于 作者: T TianXiaogang 提交者: Yan Chunwei

fix fpga compile problem and kernels (#1989)

上级 019f5b8e
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
.DS_Store .DS_Store
build/ build/
build_fpga/
.idea/ .idea/
......
...@@ -23,9 +23,7 @@ void fpga_resize(float* input, ...@@ -23,9 +23,7 @@ void fpga_resize(float* input,
uint8_t* output, uint8_t* output,
int output_width, int output_width,
int output_height) { int output_height) {
paddle::zynqmp::InplaceArgs inplace_args = { paddle::zynqmp::InplaceArgs inplace_args = {0, 0, 0};
.relu_enable = 0, .power_enable = 0,
};
paddle::zynqmp::config_inplace(inplace_args); paddle::zynqmp::config_inplace(inplace_args);
paddle::zynqmp::ImageInputArgs input_args = {nullptr}; paddle::zynqmp::ImageInputArgs input_args = {nullptr};
......
...@@ -39,10 +39,14 @@ static size_t memory_size_max = 0; ...@@ -39,10 +39,14 @@ static size_t memory_size_max = 0;
static size_t memory_size = 0; static size_t memory_size = 0;
static inline int do_ioctl(uint64_t req, const void *arg) { static inline int do_ioctl(uint64_t req, const void *arg) {
int ret = -1;
#ifdef PADDLE_LITE_OS_LINUX #ifdef PADDLE_LITE_OS_LINUX
return ioctl(fd, req, arg); ret = ioctl(fd, req, arg);
if (ret != 0) {
throw - 1;
}
#else #else
return -1; return ret;
#endif #endif
} }
......
...@@ -46,6 +46,15 @@ struct VersionArgs { ...@@ -46,6 +46,15 @@ struct VersionArgs {
struct DeviceInfo { struct DeviceInfo {
uint32_t filter_cap; uint32_t filter_cap;
uint32_t version;
uint16_t device_type;
uint32_t reserved0;
uint32_t reserved1;
uint32_t reserved2;
uint32_t reserved3;
uint32_t reserved4;
uint32_t reserved5;
uint32_t reserved6;
}; };
struct MemoryCopyArgs { struct MemoryCopyArgs {
...@@ -191,6 +200,7 @@ struct NormalizeParameterArgs { ...@@ -191,6 +200,7 @@ struct NormalizeParameterArgs {
}; };
struct InplaceArgs { struct InplaceArgs {
bool leaky_relu_enable;
bool relu_enable; bool relu_enable;
bool power_enable; bool power_enable;
bool normalize_enable; bool normalize_enable;
......
...@@ -57,7 +57,7 @@ class DDimLite { ...@@ -57,7 +57,7 @@ class DDimLite {
DDimLite Slice(int start, int end) const; DDimLite Slice(int start, int end) const;
DDimLite Flattern2D(int col) const { DDimLite Flatten2D(int col) const {
return DDimLite(std::vector<value_type>( return DDimLite(std::vector<value_type>(
{Slice(0, col).production(), Slice(col, size()).production()})); {Slice(0, col).production(), Slice(col, size()).production()}));
} }
...@@ -118,6 +118,13 @@ class TensorLite { ...@@ -118,6 +118,13 @@ class TensorLite {
const LoD &lod() const { return lod_; } const LoD &lod() const { return lod_; }
LoD *mutable_lod() { return &lod_; } LoD *mutable_lod() { return &lod_; }
void set_lod(const LoD &lod) { lod_ = lod; }
PrecisionType precision() const { return precision_; }
void set_precision(PrecisionType precision) { precision_ = precision; }
bool persistable() const { return persistable_; }
void set_persistable(bool persistable) { persistable_ = persistable; }
// T is the data type and R is the return type // T is the data type and R is the return type
// For OpenCL, the return type can be cl::Buffer // For OpenCL, the return type can be cl::Buffer
// and the data type can be float/int8_t. // and the data type can be float/int8_t.
...@@ -147,6 +154,9 @@ class TensorLite { ...@@ -147,6 +154,9 @@ class TensorLite {
void CopyDataFrom(const TensorLite &other); void CopyDataFrom(const TensorLite &other);
template <typename T>
TensorLite Slice(int64_t begin, int64_t end) const;
TargetType target() const { return target_; } TargetType target() const { return target_; }
zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; } zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; }
...@@ -168,6 +178,11 @@ class TensorLite { ...@@ -168,6 +178,11 @@ class TensorLite {
LoD lod_; LoD lod_;
size_t memory_size_{}; size_t memory_size_{};
size_t offset_{0};
PrecisionType precision_{PrecisionType::kUnk};
bool persistable_{false};
zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor(); zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor();
template <typename T> template <typename T>
...@@ -219,6 +234,18 @@ bool TensorCompareWith(const TensorT &a, const TensorT &b) { ...@@ -219,6 +234,18 @@ bool TensorCompareWith(const TensorT &a, const TensorT &b) {
if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false; if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false;
return true; return true;
} }
template <typename T>
TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
int64_t base = numel() / dims_[0];
TensorLite dst;
dst.buffer_ = buffer_;
dst.target_ = target_;
auto dst_dims = dims_;
dst_dims[0] = end - begin;
dst.Resize(dst_dims);
dst.offset_ = offset_ + static_cast<size_t>(begin * base) * sizeof(T);
return dst;
}
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -28,10 +28,9 @@ void ConvCompute::PrepareForRun() { ...@@ -28,10 +28,9 @@ void ConvCompute::PrepareForRun() {
// ==================================================== // ====================================================
zynqmp::ConvParam& conv_param = pe_.param(); zynqmp::ConvParam& conv_param = pe_.param();
param.output->mutable_data<float16>(); param.output->mutable_data<float16>();
filter_.setDataType(zynqmp::FP32); // filter_.setDataType(zynqmp::FP32);
conv_param.input = param.x->ZynqTensor(); conv_param.input = param.x->ZynqTensor();
conv_param.output = param.output->ZynqTensor(); conv_param.output = param.output->ZynqTensor();
conv_param.filter = param.filter->ZynqTensor(); conv_param.filter = param.filter->ZynqTensor();
...@@ -40,11 +39,17 @@ void ConvCompute::PrepareForRun() { ...@@ -40,11 +39,17 @@ void ConvCompute::PrepareForRun() {
conv_param.paddings = param.paddings; conv_param.paddings = param.paddings;
conv_param.dilations = param.dilations; conv_param.dilations = param.dilations;
fill_scale_bias_const(&conv_param); fill_scale_bias_const(&conv_param);
conv_param.bias()->copyFrom(param.bias->ZynqTensor());
conv_param.relu.enabled = param.fuse_relu;
pe_.init(); pe_.init();
pe_.apply(); pe_.apply();
} }
void ConvCompute::Run() { pe_.dispatch(); } void ConvCompute::Run() {
auto& param = this->Param<param_t>();
zynqmp::ConvParam& conv_param = pe_.param();
pe_.dispatch();
}
} // namespace fpga } // namespace fpga
} // namespace kernels } // namespace kernels
......
...@@ -37,9 +37,6 @@ class ConvCompute ...@@ -37,9 +37,6 @@ class ConvCompute
private: private:
zynqmp::ConvPE pe_; zynqmp::ConvPE pe_;
zynqmp::Tensor input_;
zynqmp::Tensor output_;
zynqmp::Tensor filter_;
}; };
} // namespace fpga } // namespace fpga
......
...@@ -36,9 +36,6 @@ class ElementwiseAddCompute ...@@ -36,9 +36,6 @@ class ElementwiseAddCompute
private: private:
zynqmp::ElementwiseAddPE pe_; zynqmp::ElementwiseAddPE pe_;
zynqmp::Tensor input_x_;
zynqmp::Tensor input_y_;
zynqmp::Tensor output_;
}; };
class ElementwiseAddActivationCompute class ElementwiseAddActivationCompute
...@@ -51,9 +48,6 @@ class ElementwiseAddActivationCompute ...@@ -51,9 +48,6 @@ class ElementwiseAddActivationCompute
private: private:
zynqmp::ElementwiseAddPE pe_; zynqmp::ElementwiseAddPE pe_;
zynqmp::Tensor input_x_;
zynqmp::Tensor input_y_;
zynqmp::Tensor output_;
}; };
} // namespace fpga } // namespace fpga
......
...@@ -35,9 +35,6 @@ void PoolCompute::PrepareForRun() { ...@@ -35,9 +35,6 @@ void PoolCompute::PrepareForRun() {
pool_param.output = param.output->ZynqTensor(); pool_param.output = param.output->ZynqTensor();
pool_param.relu.enabled = false; pool_param.relu.enabled = false;
auto& in_dims = param.x->dims();
auto& out_dims = param.output->dims();
pool_param.type = param.pooling_type == "max" ? zynqmp::PoolingType::MAX pool_param.type = param.pooling_type == "max" ? zynqmp::PoolingType::MAX
: zynqmp::PoolingType::AVERAGE; : zynqmp::PoolingType::AVERAGE;
pool_param.globalPooling = param.global_pooling; pool_param.globalPooling = param.global_pooling;
......
...@@ -36,8 +36,6 @@ class PoolCompute ...@@ -36,8 +36,6 @@ class PoolCompute
private: private:
zynqmp::PoolingPE pe_; zynqmp::PoolingPE pe_;
zynqmp::Tensor input_;
zynqmp::Tensor output_;
}; };
} // namespace fpga } // namespace fpga
......
...@@ -22,7 +22,7 @@ namespace fpga { ...@@ -22,7 +22,7 @@ namespace fpga {
using float16 = zynqmp::float16; using float16 = zynqmp::float16;
void SoftmaxCompute::Run() { void SoftmaxCompute::PrepareForRun() {
zynqmp::SoftmaxParam& softmax_param = pe_.param(); zynqmp::SoftmaxParam& softmax_param = pe_.param();
auto& param = Param<operators::SoftmaxParam>(); auto& param = Param<operators::SoftmaxParam>();
...@@ -33,6 +33,8 @@ void SoftmaxCompute::Run() { ...@@ -33,6 +33,8 @@ void SoftmaxCompute::Run() {
pe_.apply(); pe_.apply();
} }
void SoftmaxCompute::Run() { pe_.dispatch(); }
} // namespace fpga } // namespace fpga
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
......
...@@ -29,6 +29,7 @@ using float16 = zynqmp::float16; ...@@ -29,6 +29,7 @@ using float16 = zynqmp::float16;
class SoftmaxCompute class SoftmaxCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> { : public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public: public:
void PrepareForRun() override;
void Run() override; void Run() override;
virtual ~SoftmaxCompute() = default; virtual ~SoftmaxCompute() = default;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册