diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index d1014ff87a86efeeefec731ebac05a8a30abe3b1..6acddc893f8f6340149170cb8803010a3c54454e 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -145,8 +145,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { } int PerformBypass(const struct BypassArgs &args) { #ifdef FPGA_TEST_MODE - DLOG << " layout_type:" << args.layout_type - << " convert_type:" << args.convert_type; + DLOG << " input_type:" << args.input_data_type + << " input_layout_type:" << args.input_layout_type; DLOG << " image_address:" << args.image.address << " image_scale_address:" << args.image.scale_address << " image_channels:" << args.image.channels diff --git a/src/fpga/api.h b/src/fpga/api.h index 096f847170501784f0ee74b5a98ca91349587cfc..4dfd80f318f07e75644c21e50d6b5e691908245a 100644 --- a/src/fpga/api.h +++ b/src/fpga/api.h @@ -25,23 +25,14 @@ limitations under the License. */ namespace paddle_mobile { namespace fpga { -int open_device(); -int close_device(); - -void* fpga_malloc(size_t size); -void fpga_free(void* ptr); -void fpga_copy(void* dst, const void* src, size_t num); - -enum DataConvertType { - DATA_NO_CONVERT = 0, - DATA_FP32_TO_FP16 = 1, - DATA_FP16_TO_FP32 = 2, +enum DataType { + DATA_TYPE_FP32 = 1, + DATA_TYPE_FP16 = 0, }; -enum LayoutConvertType { - LAYOUT_NO_CONVERT = 0, - LAYOUT_CHW_TO_HWC = 1, - LAYOUT_HWC_TO_CHW = 2, +enum LayoutType { + LAYOUT_CHW = 1, + LAYOUT_HWC = 0, }; struct VersionArgs { @@ -83,7 +74,6 @@ struct ConvArgs { bool relu_enabled; void* sb_address; // scale and bias are interlaced; void* filter_address; - float* filter_scale_address; uint32_t filter_num; uint32_t group_num; @@ -122,16 +112,18 @@ struct PoolingArgs { struct EWAddArgs { bool relu_enabled; - float const0; // output0 = const0 x input0 + const1 x input1; - float const1; + uint32_t const0; // output0 = const0 x input0 + const1 x input1; + uint32_t const1; struct ImageInputArgs image0; struct ImageInputArgs image1; struct ImageOutputArgs output; }; struct BypassArgs { - enum DataConvertType convert_type; - enum LayoutConvertType layout_type; + enum DataType input_data_type; + enum DataType output_data_type; + enum LayoutType input_layout_type; + enum LayoutType output_layout_type; struct ImageInputArgs image; struct ImageOutputArgs output; }; @@ -141,6 +133,11 @@ struct FpgaRegWriteArgs { uint64_t value; }; +struct FpgaRegReadArgs { + uint64_t address; + uint64_t value; +}; + #define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) @@ -184,6 +181,13 @@ enum FPGA_ERR_TYPE { //============================== API ============================= +int open_device(); +int close_device(); + +void* fpga_malloc(size_t size); +void fpga_free(void* ptr); +void fpga_copy(void* dst, const void* src, size_t num); + int PerformBypass(const struct BypassArgs& args); int ComputeFpgaConv(const struct WrapperConvArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args); diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index e1f8fdf63ff508d9afc59e2230406c46f2c9e4d0..7cfdaa56aedcfdafb0e0de5e7fe9d1897a5794d4 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -56,8 +56,11 @@ class FeedOp : public framework::OperatorBase { auto output_ptr = output->mutable_data(); fpga::BypassArgs args; - args.convert_type = fpga::DATA_FP32_TO_FP16; - args.layout_type = fpga::LAYOUT_NO_CONVERT; + + args.input_data_type = fpga::DATA_TYPE_FP32; + args.output_data_type = fpga::DATA_TYPE_FP16; + args.input_layout_type = fpga::LAYOUT_CHW; + args.output_layout_type = fpga::LAYOUT_HWC; args.image.address = (void *)input_ptr; args.image.channels = input->dims()[1]; args.image.height = input->dims()[2]; diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp index 20c86a5c73bc9c35b8f8fd430013bb97d269fb4a..077f7d3c8c870ea8be5f102bf23ec837b32117ac 100644 --- a/src/operators/kernel/fpga/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/softmax_kernel.cpp @@ -25,27 +25,34 @@ namespace operators { template <> bool SoftmaxKernel::Init(SoftmaxParam *param) { const Tensor *input = param->InputX(); - - auto input_ptr = input->data(); - auto output = param->Out(); - auto output_ptr = output->mutable_data(); - fpga::BypassArgs args; - args.convert_type = fpga::DATA_FP16_TO_FP32; - args.layout_type = fpga::LAYOUT_NO_CONVERT; - args.image.address = (void *)(input_ptr); - args.image.height = (uint32_t)input->dims()[0]; - args.image.width = (uint32_t)input->dims()[1]; - args.image.channels = 1; - args.output.address = output_ptr; - param->SetFpgaArgs(args); - + if (input->type() == typeid(half)) { + auto input_ptr = input->data(); + auto output_ptr = param->Out(); + fpga::BypassArgs args; + args.input_layout_type = fpga::LAYOUT_HWC; + args.output_layout_type = fpga::LAYOUT_CHW; + args.input_data_type = fpga::DATA_TYPE_FP16; + args.output_data_type = fpga::DATA_TYPE_FP32; + args.image.address = (void *)(input_ptr); + args.image.height = (uint32_t)input->dims()[0]; + args.image.width = (uint32_t)input->dims()[1]; + args.image.channels = 1; + args.output.address = output_ptr; + param->SetFpgaArgs(args); + } return true; } template <> void SoftmaxKernel::Compute( const SoftmaxParam ¶m) const { - // SoftmaxCompute(param); + DLOG << "======================================= FPGA SoftMAX " + "==============================================="; + const Tensor *in_x = param.InputX(); + Tensor *out = param.Out(); + auto x_dims = in_x->dims(); + out->Resize(x_dims); + math::SoftmaxFuntor()(in_x, out); } template class SoftmaxKernel;