diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp index f91c21beb2d6b5fbce86b56d49b7d8c6a3ec9219..779c846d1f3c465e5113f805b2b3856a1a7894c5 100644 --- a/src/fpga/api/fpga_api.cpp +++ b/src/fpga/api/fpga_api.cpp @@ -35,7 +35,7 @@ namespace fpga { static int fd = -1; static const char *device_path = "/dev/fpgadrv0"; -static inline int do_ioctl(int req, void *arg) { +static inline int do_ioctl(int req, const void *arg) { return ioctl(req, (unsigned int64_t)arg); } @@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) { memcpy(dest, src, num); } -int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); } +int ComputeFpgaConv(const struct ConvArgs &args) { + return do_ioctl(IOCTL_CONFIG_CONV, &args); +} int ComputeFpgaPool(const struct PoolingArgs &args) { - return do_ioctl(22, &args); + return do_ioctl(IOCTL_CONFIG_POOLING, &args); } int ComputeFpgaEWAdd(const struct EWAddArgs &args) { - return do_ioctl(23, &args); + return do_ioctl(IOCTL_CONFIG_EW, &args); +} +int PerformBypass(const struct BypassArgs &args) { + return do_ioctl(IOCTL_CONFIG_BYPASS, &args); } } // namespace fpga diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h index 08635cdb5c01b50f59eb35554bba9a7b70f6ebfb..0823e19a7f9dfaba709b6ad2723e3228c27e2e0f 100644 --- a/src/fpga/api/fpga_api.h +++ b/src/fpga/api/fpga_api.h @@ -86,12 +86,12 @@ struct ImageOutputArgs { struct ConvArgs { bool relu_enabled; - void* bias_address; + void* sb_address; // scale and bias are interlaced; void* filter_address; + float* filter_scale_address; uint32_t filter_num; uint32_t group_num; - void* sb_address; // scale and bias are interlaced; struct KernelArgs kernel; struct ImageInputArgs image; // input image; struct ImageOutputArgs output; @@ -116,6 +116,7 @@ struct EWAddArgs { struct BypassArgs { enum DataConvertType convert_type; + enum LayoutConvertType layout_type; struct ImageInputArgs image; struct ImageOutputArgs output; }; @@ -125,11 +126,6 @@ struct FpgaRegWriteArgs { uint64_t value; }; -struct FpgaRegReadArgs { - uint64_t address; - uint64_t value; -}; - #define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) @@ -143,6 +139,7 @@ struct FpgaRegReadArgs { #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs) #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs) #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs) +#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs) #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs) #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs) @@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE { //============================== API ============================= +int PerformBypass(const struct BypassArgs& args); int ComputeFpgaConv(const struct ConvArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaEWAdd(const struct EWAddArgs& args); diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h index d2d2d61835de84c94760c10a25a973d4eaff1fbe..7a1df04732580c7225423cedeb277beca3edc154 100644 --- a/src/fpga/fpga_quantilization.h +++ b/src/fpga/fpga_quantilization.h @@ -13,55 +13,40 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include #include "common/types.h" #include "framework/lod_tensor.h" -#include "framework/operator.h" -#include "framework/scope.h" #include "framework/tensor.h" namespace paddle_mobile { -bool is_conv(std::string type) { - if (type.compare(G_OP_TYPE_CONV) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) { - return true; - } - if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) { - return true; - } - return false; -} - template -void quantilize_op(std::shared_ptr> op, - std::shared_ptr scope) { - if (!is_conv(op.get()->Type())) { - return; - } - framework::Tensor* filter = nullptr; - auto var_vec = op.get()->Inputs().at("Filter"); - if (!var_vec.empty()) { - auto var = scope.get()->FindVar(var_vec[0]); - filter = var->template GetMutable(); - } +framework::Tensor* quantilize_filter(framework::Tensor* filter) { float scale = 0; - // 32bit filter -> 8bit filter; + float min = 0f; + float max = 0f; if (filter->type() == typeid(float)) { + float* floatData = originalFilter->data(); + for (int i = 0; i < filter->numel(); ++i) { + min = std::min(min, floatData[i]); + max = std::max(max, floatData[i]); + } + + float fix_range = (float)((1 << (8 - 1)) - 1); + float float_range = max; + scale = (float_range / fix_range); + framework::Tensor* originalFilter = filter; framework::Tensor* quantFilter = new framework::Tensor(); - float* floatData = originalFilter->data(); int8_t* intData = quantFilter->mutable_data(); - } + for (int i = 0; i < filter->numel(); ++i) { + intData[i] = (int8_t)floatData[i] * scale; + } + quantFilter.scale = scale; + // NCHW -> NHWC; + return quantFilter; + } + return filter; } } // namespace paddle_mobile diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 797fcf5bffbe5e738fe352d1ca84602f0e5d86a0..8bd6b56e233e3cb69ab0232ab3dac57a865480ed 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -257,7 +257,10 @@ class Tensor { struct FPGAArgs { float scale; - inline float *scale_pointer() { return &scale; } + inline const float *scale_pointer() { + return &scale; + } + }; struct FPGAArgs fpga_args() const { diff --git a/src/io/executor.cpp b/src/io/executor.cpp index c09fe2c58532437336307ce007532d43689d8fd2..d6434b64aa752fd62bc637a882298228d59880b8 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -32,10 +32,6 @@ limitations under the License. */ #include "common/threadpool.h" #endif -#ifdef PADDLE_MOBILE_FPGA -#include "fpga/fpga_quantilization.h" -#endif - namespace paddle_mobile { using framework::Variable; @@ -100,11 +96,6 @@ Executor::Executor(const framework::Program p, int batch_size, for (const auto &op : ops) { op->Init(); } -#ifdef PADDLE_MOBILE_FPGA - for (const auto &op : ops) { - quantilize_op(op, program_.scope); - } -#endif } template