From 686b2c9388502f5a4d34fb5ee6e431054b5c70e8 Mon Sep 17 00:00:00 2001 From: hanbuhe Date: Fri, 3 Aug 2018 19:32:38 +0800 Subject: [PATCH] FPGA conv added filter scale --- src/fpga/api/fpga_api.cpp | 13 +++++--- src/fpga/api/fpga_api.h | 12 +++---- src/fpga/fpga_quantilization.h | 57 +++++++++++++--------------------- src/framework/tensor.h | 5 ++- src/io/executor.cpp | 9 ------ 5 files changed, 39 insertions(+), 57 deletions(-) diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp index f91c21beb2..779c846d1f 100644 --- a/src/fpga/api/fpga_api.cpp +++ b/src/fpga/api/fpga_api.cpp @@ -35,7 +35,7 @@ namespace fpga { static int fd = -1; static const char *device_path = "/dev/fpgadrv0"; -static inline int do_ioctl(int req, void *arg) { +static inline int do_ioctl(int req, const void *arg) { return ioctl(req, (unsigned int64_t)arg); } @@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) { memcpy(dest, src, num); } -int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); } +int ComputeFpgaConv(const struct ConvArgs &args) { + return do_ioctl(IOCTL_CONFIG_CONV, &args); +} int ComputeFpgaPool(const struct PoolingArgs &args) { - return do_ioctl(22, &args); + return do_ioctl(IOCTL_CONFIG_POOLING, &args); } int ComputeFpgaEWAdd(const struct EWAddArgs &args) { - return do_ioctl(23, &args); + return do_ioctl(IOCTL_CONFIG_EW, &args); +} +int PerformBypass(const struct BypassArgs &args) { + return do_ioctl(IOCTL_CONFIG_BYPASS, &args); } } // namespace fpga diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h index 08635cdb5c..0823e19a7f 100644 --- a/src/fpga/api/fpga_api.h +++ b/src/fpga/api/fpga_api.h @@ -86,12 +86,12 @@ struct ImageOutputArgs { struct ConvArgs { bool relu_enabled; - void* bias_address; + void* sb_address; // scale and bias are interlaced; void* filter_address; + float* filter_scale_address; uint32_t filter_num; uint32_t group_num; - void* sb_address; // scale and bias are interlaced; struct KernelArgs kernel; struct ImageInputArgs image; // input image; struct ImageOutputArgs output; @@ -116,6 +116,7 @@ struct EWAddArgs { struct BypassArgs { enum DataConvertType convert_type; + enum LayoutConvertType layout_type; struct ImageInputArgs image; struct ImageOutputArgs output; }; @@ -125,11 +126,6 @@ struct FpgaRegWriteArgs { uint64_t value; }; -struct FpgaRegReadArgs { - uint64_t address; - uint64_t value; -}; - #define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) @@ -143,6 +139,7 @@ struct FpgaRegReadArgs { #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs) #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs) #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs) +#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs) #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs) #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs) @@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE { //============================== API ============================= +int PerformBypass(const struct BypassArgs& args); int ComputeFpgaConv(const struct ConvArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaEWAdd(const struct EWAddArgs& args); diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h index d2d2d61835..7a1df04732 100644 --- a/src/fpga/fpga_quantilization.h +++ b/src/fpga/fpga_quantilization.h @@ -13,55 +13,40 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include #include "common/types.h" #include "framework/lod_tensor.h" -#include "framework/operator.h" -#include "framework/scope.h" #include "framework/tensor.h" namespace paddle_mobile { -bool is_conv(std::string type) { - if (type.compare(G_OP_TYPE_CONV) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) { - return true; - } - return false; -} - template -void quantilize_op(std::shared_ptr> op, - std::shared_ptr scope) { - if (!is_conv(op.get()->Type())) { - return; - } - framework::Tensor* filter = nullptr; - auto var_vec = op.get()->Inputs().at("Filter"); - if (!var_vec.empty()) { - auto var = scope.get()->FindVar(var_vec[0]); - filter = var->template GetMutable(); - } +framework::Tensor* quantilize_filter(framework::Tensor* filter) { float scale = 0; - // 32bit filter -> 8bit filter; + float min = 0f; + float max = 0f; if (filter->type() == typeid(float)) { + float* floatData = originalFilter->data(); + for (int i = 0; i < filter->numel(); ++i) { + min = std::min(min, floatData[i]); + max = std::max(max, floatData[i]); + } + + float fix_range = (float)((1 << (8 - 1)) - 1); + float float_range = max; + scale = (float_range / fix_range); + framework::Tensor* originalFilter = filter; framework::Tensor* quantFilter = new framework::Tensor(); - float* floatData = originalFilter->data(); int8_t* intData = quantFilter->mutable_data(); - } + for (int i = 0; i < filter->numel(); ++i) { + intData[i] = (int8_t)floatData[i] * scale; + } + quantFilter.scale = scale; + // NCHW -> NHWC; + return quantFilter; + } + return filter; } } // namespace paddle_mobile diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 797fcf5bff..8bd6b56e23 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -257,7 +257,10 @@ class Tensor { struct FPGAArgs { float scale; - inline float *scale_pointer() { return &scale; } + inline const float *scale_pointer() { + return &scale; + } + }; struct FPGAArgs fpga_args() const { diff --git a/src/io/executor.cpp b/src/io/executor.cpp index c09fe2c585..d6434b64aa 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -32,10 +32,6 @@ limitations under the License. */ #include "common/threadpool.h" #endif -#ifdef PADDLE_MOBILE_FPGA -#include "fpga/fpga_quantilization.h" -#endif - namespace paddle_mobile { using framework::Variable; @@ -100,11 +96,6 @@ Executor::Executor(const framework::Program p, int batch_size, for (const auto &op : ops) { op->Init(); } -#ifdef PADDLE_MOBILE_FPGA - for (const auto &op : ops) { - quantilize_op(op, program_.scope); - } -#endif } template -- GitLab