diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp index 8167d4cf7d1fd343e15754bafc7c77ee6f52a822..f91c21beb2d6b5fbce86b56d49b7d8c6a3ec9219 100644 --- a/src/fpga/api/fpga_api.cpp +++ b/src/fpga/api/fpga_api.cpp @@ -36,7 +36,7 @@ static int fd = -1; static const char *device_path = "/dev/fpgadrv0"; static inline int do_ioctl(int req, void *arg) { - return ioctl(req, (long unsigned int)arg); + return ioctl(req, (unsigned int64_t)arg); } int open_device() { @@ -58,9 +58,13 @@ void fpga_copy(void *dest, const void *src, size_t num) { memcpy(dest, src, num); } -int ComputeFpgaConv(struct ConvArgs args) {} -int ComputeFpgaPool(struct PoolingArgs args) {} -int ComputeFpgaEWAdd(struct EWAddArgs args) {} +int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); } +int ComputeFpgaPool(const struct PoolingArgs &args) { + return do_ioctl(22, &args); +} +int ComputeFpgaEWAdd(const struct EWAddArgs &args) { + return do_ioctl(23, &args); +} } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h index 4225e887c3dadeddf48b173db72412a5209d989d..08635cdb5c01b50f59eb35554bba9a7b70f6ebfb 100644 --- a/src/fpga/api/fpga_api.h +++ b/src/fpga/api/fpga_api.h @@ -31,6 +31,18 @@ void* fpga_malloc(size_t size); void fpga_free(void* ptr); void fpga_copy(void* dst, const void* src, size_t num); +enum DataConvertType { + DATA_NO_CONVERT = 0, + DATA_FP32_TO_FP16 = 1, + DATA_FP16_TO_FP32 = 2, +}; + +enum LayoutConvertType { + LAYOUT_NO_CONVERT = 0, + LAYOUT_CHW_TO_HWC = 1, + LAYOUT_HWC_TO_CHW = 2, +}; + struct VersionArgs { void* buffer; }; @@ -79,7 +91,7 @@ struct ConvArgs { uint32_t filter_num; uint32_t group_num; - struct BNArgs bn; + void* sb_address; // scale and bias are interlaced; struct KernelArgs kernel; struct ImageInputArgs image; // input image; struct ImageOutputArgs output; @@ -102,6 +114,12 @@ struct EWAddArgs { struct ImageOutputArgs output; }; +struct BypassArgs { + enum DataConvertType convert_type; + struct ImageInputArgs image; + struct ImageOutputArgs output; +}; + struct FpgaRegWriteArgs { uint64_t address; // uint64_t value; @@ -115,8 +133,6 @@ struct FpgaRegReadArgs { #define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) -#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs) -#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs) #define IOCTL_SEPARATOR_0 10 @@ -127,6 +143,8 @@ struct FpgaRegReadArgs { #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs) #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs) #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs) +#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs) +#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs) enum FPGA_ERR_TYPE { ERR_IOCTL_CMD = -1, @@ -154,9 +172,9 @@ enum FPGA_ERR_TYPE { //============================== API ============================= -int ComputeFpgaConv(struct ConvArgs args); -int ComputeFpgaPool(struct PoolingArgs args); -int ComputeFpgaEWAdd(struct EWAddArgs args); +int ComputeFpgaConv(const struct ConvArgs& args); +int ComputeFpgaPool(const struct PoolingArgs& args); +int ComputeFpgaEWAdd(const struct EWAddArgs& args); } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h new file mode 100644 index 0000000000000000000000000000000000000000..d2d2d61835de84c94760c10a25a973d4eaff1fbe --- /dev/null +++ b/src/fpga/fpga_quantilization.h @@ -0,0 +1,67 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#include +#include "common/types.h" +#include "framework/lod_tensor.h" +#include "framework/operator.h" +#include "framework/scope.h" +#include "framework/tensor.h" + +namespace paddle_mobile { + +bool is_conv(std::string type) { + if (type.compare(G_OP_TYPE_CONV) == 0) { + return true; + } + if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) { + return true; + } + if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) { + return true; + } + if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) { + return true; + } + if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) { + return true; + } + return false; +} + +template +void quantilize_op(std::shared_ptr> op, + std::shared_ptr scope) { + if (!is_conv(op.get()->Type())) { + return; + } + framework::Tensor* filter = nullptr; + auto var_vec = op.get()->Inputs().at("Filter"); + if (!var_vec.empty()) { + auto var = scope.get()->FindVar(var_vec[0]); + filter = var->template GetMutable(); + } + float scale = 0; + + // 32bit filter -> 8bit filter; + if (filter->type() == typeid(float)) { + framework::Tensor* originalFilter = filter; + framework::Tensor* quantFilter = new framework::Tensor(); + float* floatData = originalFilter->data(); + int8_t* intData = quantFilter->mutable_data(); + } +} + +} // namespace paddle_mobile diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 388788216fe45b66441a0390e2ef09c2d51c16dc..3dba76d790d44f154f359454250d15b81ff717a3 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -260,7 +260,7 @@ class Tensor { inline float *scale_pointer() { return &scale; } }; - struct &fpga_args() const { + struct FPGAArgs &fpga_args() { return fpgaArgs_; } #endif diff --git a/src/io/executor.cpp b/src/io/executor.cpp index d6434b64aa752fd62bc637a882298228d59880b8..c09fe2c58532437336307ce007532d43689d8fd2 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -32,6 +32,10 @@ limitations under the License. */ #include "common/threadpool.h" #endif +#ifdef PADDLE_MOBILE_FPGA +#include "fpga/fpga_quantilization.h" +#endif + namespace paddle_mobile { using framework::Variable; @@ -96,6 +100,11 @@ Executor::Executor(const framework::Program p, int batch_size, for (const auto &op : ops) { op->Init(); } +#ifdef PADDLE_MOBILE_FPGA + for (const auto &op : ops) { + quantilize_op(op, program_.scope); + } +#endif } template diff --git a/test/fpga/test_tensor_quant.cpp b/test/fpga/test_tensor_quant.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1e30b9be551c608c5200460ebb80526270da5aed --- /dev/null +++ b/test/fpga/test_tensor_quant.cpp @@ -0,0 +1,34 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::PaddleMobile paddle_mobile; + bool optimize = false; + if (paddle_mobile.Load(g_googlenet, optimize)) { + auto time2 = time(); + DLOG << "load cost: " << time_diff(time1, time1) << "ms"; + std::vector input; + std::vector dims{1, 3, 224, 224}; + GetInput(g_test_image_1x3x224x224, &input, dims); + auto time3 = time(); + auto vec_result = paddle_mobile.Predict(input, dims); + auto time4 = time(); + DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + } + return 0; +}