提交 0720653b 编写于 作者: T TianXiaogang 提交者: Yan Chunwei

fix fpga compile problem and kernels (#1989)

上级 019f5b8e
......@@ -34,6 +34,7 @@
.DS_Store
build/
build_fpga/
.idea/
......
......@@ -23,9 +23,7 @@ void fpga_resize(float* input,
uint8_t* output,
int output_width,
int output_height) {
paddle::zynqmp::InplaceArgs inplace_args = {
.relu_enable = 0, .power_enable = 0,
};
paddle::zynqmp::InplaceArgs inplace_args = {0, 0, 0};
paddle::zynqmp::config_inplace(inplace_args);
paddle::zynqmp::ImageInputArgs input_args = {nullptr};
......
......@@ -39,10 +39,14 @@ static size_t memory_size_max = 0;
static size_t memory_size = 0;
static inline int do_ioctl(uint64_t req, const void *arg) {
int ret = -1;
#ifdef PADDLE_LITE_OS_LINUX
return ioctl(fd, req, arg);
ret = ioctl(fd, req, arg);
if (ret != 0) {
throw - 1;
}
#else
return -1;
return ret;
#endif
}
......
......@@ -46,6 +46,15 @@ struct VersionArgs {
struct DeviceInfo {
uint32_t filter_cap;
uint32_t version;
uint16_t device_type;
uint32_t reserved0;
uint32_t reserved1;
uint32_t reserved2;
uint32_t reserved3;
uint32_t reserved4;
uint32_t reserved5;
uint32_t reserved6;
};
struct MemoryCopyArgs {
......@@ -191,6 +200,7 @@ struct NormalizeParameterArgs {
};
struct InplaceArgs {
bool leaky_relu_enable;
bool relu_enable;
bool power_enable;
bool normalize_enable;
......
......@@ -57,7 +57,7 @@ class DDimLite {
DDimLite Slice(int start, int end) const;
DDimLite Flattern2D(int col) const {
DDimLite Flatten2D(int col) const {
return DDimLite(std::vector<value_type>(
{Slice(0, col).production(), Slice(col, size()).production()}));
}
......@@ -118,6 +118,13 @@ class TensorLite {
const LoD &lod() const { return lod_; }
LoD *mutable_lod() { return &lod_; }
void set_lod(const LoD &lod) { lod_ = lod; }
PrecisionType precision() const { return precision_; }
void set_precision(PrecisionType precision) { precision_ = precision; }
bool persistable() const { return persistable_; }
void set_persistable(bool persistable) { persistable_ = persistable; }
// T is the data type and R is the return type
// For OpenCL, the return type can be cl::Buffer
// and the data type can be float/int8_t.
......@@ -147,6 +154,9 @@ class TensorLite {
void CopyDataFrom(const TensorLite &other);
template <typename T>
TensorLite Slice(int64_t begin, int64_t end) const;
TargetType target() const { return target_; }
zynqmp::Tensor *ZynqTensor() const { return zynq_tensor_; }
......@@ -168,6 +178,11 @@ class TensorLite {
LoD lod_;
size_t memory_size_{};
size_t offset_{0};
PrecisionType precision_{PrecisionType::kUnk};
bool persistable_{false};
zynqmp::Tensor *zynq_tensor_ = new zynqmp::Tensor();
template <typename T>
......@@ -219,6 +234,18 @@ bool TensorCompareWith(const TensorT &a, const TensorT &b) {
if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false;
return true;
}
template <typename T>
TensorLite TensorLite::Slice(int64_t begin, int64_t end) const {
int64_t base = numel() / dims_[0];
TensorLite dst;
dst.buffer_ = buffer_;
dst.target_ = target_;
auto dst_dims = dims_;
dst_dims[0] = end - begin;
dst.Resize(dst_dims);
dst.offset_ = offset_ + static_cast<size_t>(begin * base) * sizeof(T);
return dst;
}
} // namespace lite
} // namespace paddle
......@@ -28,10 +28,9 @@ void ConvCompute::PrepareForRun() {
// ====================================================
zynqmp::ConvParam& conv_param = pe_.param();
param.output->mutable_data<float16>();
filter_.setDataType(zynqmp::FP32);
// filter_.setDataType(zynqmp::FP32);
conv_param.input = param.x->ZynqTensor();
conv_param.output = param.output->ZynqTensor();
conv_param.filter = param.filter->ZynqTensor();
......@@ -40,11 +39,17 @@ void ConvCompute::PrepareForRun() {
conv_param.paddings = param.paddings;
conv_param.dilations = param.dilations;
fill_scale_bias_const(&conv_param);
conv_param.bias()->copyFrom(param.bias->ZynqTensor());
conv_param.relu.enabled = param.fuse_relu;
pe_.init();
pe_.apply();
}
void ConvCompute::Run() { pe_.dispatch(); }
void ConvCompute::Run() {
auto& param = this->Param<param_t>();
zynqmp::ConvParam& conv_param = pe_.param();
pe_.dispatch();
}
} // namespace fpga
} // namespace kernels
......
......@@ -37,9 +37,6 @@ class ConvCompute
private:
zynqmp::ConvPE pe_;
zynqmp::Tensor input_;
zynqmp::Tensor output_;
zynqmp::Tensor filter_;
};
} // namespace fpga
......
......@@ -36,9 +36,6 @@ class ElementwiseAddCompute
private:
zynqmp::ElementwiseAddPE pe_;
zynqmp::Tensor input_x_;
zynqmp::Tensor input_y_;
zynqmp::Tensor output_;
};
class ElementwiseAddActivationCompute
......@@ -51,9 +48,6 @@ class ElementwiseAddActivationCompute
private:
zynqmp::ElementwiseAddPE pe_;
zynqmp::Tensor input_x_;
zynqmp::Tensor input_y_;
zynqmp::Tensor output_;
};
} // namespace fpga
......
......@@ -35,9 +35,6 @@ void PoolCompute::PrepareForRun() {
pool_param.output = param.output->ZynqTensor();
pool_param.relu.enabled = false;
auto& in_dims = param.x->dims();
auto& out_dims = param.output->dims();
pool_param.type = param.pooling_type == "max" ? zynqmp::PoolingType::MAX
: zynqmp::PoolingType::AVERAGE;
pool_param.globalPooling = param.global_pooling;
......
......@@ -36,8 +36,6 @@ class PoolCompute
private:
zynqmp::PoolingPE pe_;
zynqmp::Tensor input_;
zynqmp::Tensor output_;
};
} // namespace fpga
......
......@@ -22,7 +22,7 @@ namespace fpga {
using float16 = zynqmp::float16;
void SoftmaxCompute::Run() {
void SoftmaxCompute::PrepareForRun() {
zynqmp::SoftmaxParam& softmax_param = pe_.param();
auto& param = Param<operators::SoftmaxParam>();
......@@ -33,6 +33,8 @@ void SoftmaxCompute::Run() {
pe_.apply();
}
void SoftmaxCompute::Run() { pe_.dispatch(); }
} // namespace fpga
} // namespace kernels
} // namespace lite
......
......@@ -29,6 +29,7 @@ using float16 = zynqmp::float16;
class SoftmaxCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
void PrepareForRun() override;
void Run() override;
virtual ~SoftmaxCompute() = default;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册