diff --git a/.gitignore b/.gitignore index 68380e97ab92a0632675a709836d19be669de89d..53ea5038737b7295cb1465274d95db484fe7d5f7 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ .DS_Store build/ +build_fpga/ .idea/ diff --git a/lite/backends/fpga/KD/fpga_cv.cpp b/lite/backends/fpga/KD/fpga_cv.cpp index e3a1eed1ff8ca9c08e41b8479f48346e1a8f6fed..15a20e368b09f193e3f43b574ff3682ce96782ad 100644 --- a/lite/backends/fpga/KD/fpga_cv.cpp +++ b/lite/backends/fpga/KD/fpga_cv.cpp @@ -23,9 +23,7 @@ void fpga_resize(float* input, uint8_t* output, int output_width, int output_height) { - paddle::zynqmp::InplaceArgs inplace_args = { - .relu_enable = 0, .power_enable = 0, - }; + paddle::zynqmp::InplaceArgs inplace_args = {0, 0, 0}; paddle::zynqmp::config_inplace(inplace_args); paddle::zynqmp::ImageInputArgs input_args = {nullptr}; diff --git a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp index 6e7c1cd03027ae28b2977fcbe217b6cfb06378a0..1f1226ead3d4e9b50100f4de574104a5d6f777b2 100644 --- a/lite/backends/fpga/KD/llapi/zynqmp_api.cpp +++ b/lite/backends/fpga/KD/llapi/zynqmp_api.cpp @@ -39,10 +39,14 @@ static size_t memory_size_max = 0; static size_t memory_size = 0; static inline int do_ioctl(uint64_t req, const void *arg) { + int ret = -1; #ifdef PADDLE_LITE_OS_LINUX - return ioctl(fd, req, arg); + ret = ioctl(fd, req, arg); + if (ret != 0) { + throw - 1; + } #else - return -1; + return ret; #endif } diff --git a/lite/backends/fpga/KD/llapi/zynqmp_api.h b/lite/backends/fpga/KD/llapi/zynqmp_api.h index 3dd7f1e981ac37fa687fdafa883409e6ad8439c9..7d22de95a2272862c6fe781295bdaab7177a92fe 100644 --- a/lite/backends/fpga/KD/llapi/zynqmp_api.h +++ b/lite/backends/fpga/KD/llapi/zynqmp_api.h @@ -46,6 +46,15 @@ struct VersionArgs { struct DeviceInfo { uint32_t filter_cap; + uint32_t version; + uint16_t device_type; + uint32_t reserved0; + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; }; struct MemoryCopyArgs { @@ -191,6 +200,7 @@ struct NormalizeParameterArgs { }; struct InplaceArgs { + bool leaky_relu_enable; bool relu_enable; bool power_enable; bool normalize_enable; diff --git a/lite/backends/fpga/lite_tensor.h b/lite/backends/fpga/lite_tensor.h index 77f6a7ad822a0071539f54d4cb29d69983a99f7e..2f9df3abb08dd15641323f4a3c59d6175f2e481b 100644 --- a/lite/backends/fpga/lite_tensor.h +++ b/lite/backends/fpga/lite_tensor.h @@ -57,7 +57,7 @@ class DDimLite { DDimLite Slice(int start, int end) const; - DDimLite Flattern2D(int col) const { + DDimLite Flatten2D(int col) const { return DDimLite(std::vector( {Slice(0, col).production(), Slice(col, size()).production()})); } @@ -118,6 +118,13 @@ class TensorLite { const LoD &lod() const { return lod_; } LoD *mutable_lod() { return &lod_; } + void set_lod(const LoD &lod) { lod_ = lod; } + + PrecisionType precision() const { return precision_; } + void set_precision(PrecisionType precision) { precision_ = precision; } + + bool persistable() const { return persistable_; } + void set_persistable(bool persistable) { persistable_ = persistable; } // T is the data type and R is the return type // For OpenCL, the return type can be cl::Buffer // and the data type can be float/int8_t. @@ -147,6 +154,9 @@ class TensorLite { void CopyDataFrom(const TensorLite &other); + template + TensorLite Slice(int64_t begin, int64_t end) const; + TargetType target() const { return target_; } zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; } @@ -168,6 +178,11 @@ class TensorLite { LoD lod_; size_t memory_size_{}; + size_t offset_{0}; + + PrecisionType precision_{PrecisionType::kUnk}; + bool persistable_{false}; + zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor(); template @@ -219,6 +234,18 @@ bool TensorCompareWith(const TensorT &a, const TensorT &b) { if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false; return true; } - +template +TensorLite TensorLite::Slice(int64_t begin, int64_t end) const { + int64_t base = numel() / dims_[0]; + + TensorLite dst; + dst.buffer_ = buffer_; + dst.target_ = target_; + auto dst_dims = dims_; + dst_dims[0] = end - begin; + dst.Resize(dst_dims); + dst.offset_ = offset_ + static_cast(begin * base) * sizeof(T); + return dst; +} } // namespace lite } // namespace paddle diff --git a/lite/kernels/fpga/conv_compute.cc b/lite/kernels/fpga/conv_compute.cc index fe662c58ee862cae337aaf93eabb499dc80358fc..3e06e103bba61937e48bb4d14eeedd493ab15bba 100644 --- a/lite/kernels/fpga/conv_compute.cc +++ b/lite/kernels/fpga/conv_compute.cc @@ -28,10 +28,9 @@ void ConvCompute::PrepareForRun() { // ==================================================== zynqmp::ConvParam& conv_param = pe_.param(); - param.output->mutable_data(); - filter_.setDataType(zynqmp::FP32); + // filter_.setDataType(zynqmp::FP32); conv_param.input = param.x->ZynqTensor(); conv_param.output = param.output->ZynqTensor(); conv_param.filter = param.filter->ZynqTensor(); @@ -40,11 +39,17 @@ void ConvCompute::PrepareForRun() { conv_param.paddings = param.paddings; conv_param.dilations = param.dilations; fill_scale_bias_const(&conv_param); + conv_param.bias()->copyFrom(param.bias->ZynqTensor()); + conv_param.relu.enabled = param.fuse_relu; pe_.init(); pe_.apply(); } -void ConvCompute::Run() { pe_.dispatch(); } +void ConvCompute::Run() { + auto& param = this->Param(); + zynqmp::ConvParam& conv_param = pe_.param(); + pe_.dispatch(); +} } // namespace fpga } // namespace kernels diff --git a/lite/kernels/fpga/conv_compute.h b/lite/kernels/fpga/conv_compute.h index 42909c0fa049772d0b837a3ec690397d58e19cb4..a023fb46fc8af0ad12d07929137f3eb058e92ef4 100644 --- a/lite/kernels/fpga/conv_compute.h +++ b/lite/kernels/fpga/conv_compute.h @@ -37,9 +37,6 @@ class ConvCompute private: zynqmp::ConvPE pe_; - zynqmp::Tensor input_; - zynqmp::Tensor output_; - zynqmp::Tensor filter_; }; } // namespace fpga diff --git a/lite/kernels/fpga/elementwise_compute.h b/lite/kernels/fpga/elementwise_compute.h index ef60b82f04adae3cb77b09ef19f747d9e19c4bee..7051dd7eeda02537be713ff042a0cf33ac1b618d 100644 --- a/lite/kernels/fpga/elementwise_compute.h +++ b/lite/kernels/fpga/elementwise_compute.h @@ -36,9 +36,6 @@ class ElementwiseAddCompute private: zynqmp::ElementwiseAddPE pe_; - zynqmp::Tensor input_x_; - zynqmp::Tensor input_y_; - zynqmp::Tensor output_; }; class ElementwiseAddActivationCompute @@ -51,9 +48,6 @@ class ElementwiseAddActivationCompute private: zynqmp::ElementwiseAddPE pe_; - zynqmp::Tensor input_x_; - zynqmp::Tensor input_y_; - zynqmp::Tensor output_; }; } // namespace fpga diff --git a/lite/kernels/fpga/pooling_compute.cc b/lite/kernels/fpga/pooling_compute.cc index 3a727798d88e1dbd18844c429108ce3c48274034..e4979f8e5762400f453e323f98a6b18ba17a0998 100644 --- a/lite/kernels/fpga/pooling_compute.cc +++ b/lite/kernels/fpga/pooling_compute.cc @@ -35,9 +35,6 @@ void PoolCompute::PrepareForRun() { pool_param.output = param.output->ZynqTensor(); pool_param.relu.enabled = false; - auto& in_dims = param.x->dims(); - auto& out_dims = param.output->dims(); - pool_param.type = param.pooling_type == "max" ? zynqmp::PoolingType::MAX : zynqmp::PoolingType::AVERAGE; pool_param.globalPooling = param.global_pooling; diff --git a/lite/kernels/fpga/pooling_compute.h b/lite/kernels/fpga/pooling_compute.h index 18eee5f21dbcc3f8db9ecaa771a2146990ca4351..0f5bf106dec81b95cc27f43bf3259748552eb0d4 100644 --- a/lite/kernels/fpga/pooling_compute.h +++ b/lite/kernels/fpga/pooling_compute.h @@ -36,8 +36,6 @@ class PoolCompute private: zynqmp::PoolingPE pe_; - zynqmp::Tensor input_; - zynqmp::Tensor output_; }; } // namespace fpga diff --git a/lite/kernels/fpga/softmax_compute.cc b/lite/kernels/fpga/softmax_compute.cc index 260f03c114da00b6336245b185e6c3e58ce468d4..63abc76e68ebf15a458ed380d7eabeaf89d5dd2f 100644 --- a/lite/kernels/fpga/softmax_compute.cc +++ b/lite/kernels/fpga/softmax_compute.cc @@ -22,7 +22,7 @@ namespace fpga { using float16 = zynqmp::float16; -void SoftmaxCompute::Run() { +void SoftmaxCompute::PrepareForRun() { zynqmp::SoftmaxParam& softmax_param = pe_.param(); auto& param = Param(); @@ -33,6 +33,8 @@ void SoftmaxCompute::Run() { pe_.apply(); } +void SoftmaxCompute::Run() { pe_.dispatch(); } + } // namespace fpga } // namespace kernels } // namespace lite diff --git a/lite/kernels/fpga/softmax_compute.h b/lite/kernels/fpga/softmax_compute.h index 5eb4af6223ed9166b72d47cf7e6c052c2a547e53..035c9a60ec369b77778332f789d8b5b2a7db2462 100644 --- a/lite/kernels/fpga/softmax_compute.h +++ b/lite/kernels/fpga/softmax_compute.h @@ -29,6 +29,7 @@ using float16 = zynqmp::float16; class SoftmaxCompute : public KernelLite { public: + void PrepareForRun() override; void Run() override; virtual ~SoftmaxCompute() = default;