提交 686b2c93 编写于 作者: H hanbuhe

FPGA conv added filter scale

上级 1b97e2fa
...@@ -35,7 +35,7 @@ namespace fpga { ...@@ -35,7 +35,7 @@ namespace fpga {
static int fd = -1; static int fd = -1;
static const char *device_path = "/dev/fpgadrv0"; static const char *device_path = "/dev/fpgadrv0";
static inline int do_ioctl(int req, void *arg) { static inline int do_ioctl(int req, const void *arg) {
return ioctl(req, (unsigned int64_t)arg); return ioctl(req, (unsigned int64_t)arg);
} }
...@@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) { ...@@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num); memcpy(dest, src, num);
} }
int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); } int ComputeFpgaConv(const struct ConvArgs &args) {
return do_ioctl(IOCTL_CONFIG_CONV, &args);
}
int ComputeFpgaPool(const struct PoolingArgs &args) { int ComputeFpgaPool(const struct PoolingArgs &args) {
return do_ioctl(22, &args); return do_ioctl(IOCTL_CONFIG_POOLING, &args);
} }
int ComputeFpgaEWAdd(const struct EWAddArgs &args) { int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return do_ioctl(23, &args); return do_ioctl(IOCTL_CONFIG_EW, &args);
}
int PerformBypass(const struct BypassArgs &args) {
return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
} }
} // namespace fpga } // namespace fpga
......
...@@ -86,12 +86,12 @@ struct ImageOutputArgs { ...@@ -86,12 +86,12 @@ struct ImageOutputArgs {
struct ConvArgs { struct ConvArgs {
bool relu_enabled; bool relu_enabled;
void* bias_address; void* sb_address; // scale and bias are interlaced;
void* filter_address; void* filter_address;
float* filter_scale_address;
uint32_t filter_num; uint32_t filter_num;
uint32_t group_num; uint32_t group_num;
void* sb_address; // scale and bias are interlaced;
struct KernelArgs kernel; struct KernelArgs kernel;
struct ImageInputArgs image; // input image; struct ImageInputArgs image; // input image;
struct ImageOutputArgs output; struct ImageOutputArgs output;
...@@ -116,6 +116,7 @@ struct EWAddArgs { ...@@ -116,6 +116,7 @@ struct EWAddArgs {
struct BypassArgs { struct BypassArgs {
enum DataConvertType convert_type; enum DataConvertType convert_type;
enum LayoutConvertType layout_type;
struct ImageInputArgs image; struct ImageInputArgs image;
struct ImageOutputArgs output; struct ImageOutputArgs output;
}; };
...@@ -125,11 +126,6 @@ struct FpgaRegWriteArgs { ...@@ -125,11 +126,6 @@ struct FpgaRegWriteArgs {
uint64_t value; uint64_t value;
}; };
struct FpgaRegReadArgs {
uint64_t address;
uint64_t value;
};
#define IOCTL_FPGA_MAGIC 'FPGA' #define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) #define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
...@@ -143,6 +139,7 @@ struct FpgaRegReadArgs { ...@@ -143,6 +139,7 @@ struct FpgaRegReadArgs {
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs) #define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs) #define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs) #define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs)
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs) #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs) #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)
...@@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE { ...@@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE {
//============================== API ============================= //============================== API =============================
int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct ConvArgs& args); int ComputeFpgaConv(const struct ConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args);
int ComputeFpgaEWAdd(const struct EWAddArgs& args); int ComputeFpgaEWAdd(const struct EWAddArgs& args);
......
...@@ -13,55 +13,40 @@ See the License for the specific language governing permissions and ...@@ -13,55 +13,40 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "common/types.h" #include "common/types.h"
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include "framework/operator.h"
#include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
namespace paddle_mobile { namespace paddle_mobile {
bool is_conv(std::string type) {
if (type.compare(G_OP_TYPE_CONV) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) {
return true;
}
return false;
}
template <typename Dtype> template <typename Dtype>
void quantilize_op(std::shared_ptr<framework::OperatorBase<Dtype>> op, framework::Tensor* quantilize_filter(framework::Tensor* filter) {
std::shared_ptr<framework::Scope> scope) {
if (!is_conv(op.get()->Type())) {
return;
}
framework::Tensor* filter = nullptr;
auto var_vec = op.get()->Inputs().at("Filter");
if (!var_vec.empty()) {
auto var = scope.get()->FindVar(var_vec[0]);
filter = var->template GetMutable<framework::LoDTensor>();
}
float scale = 0; float scale = 0;
// 32bit filter -> 8bit filter; // 32bit filter -> 8bit filter;
float min = 0f;
float max = 0f;
if (filter->type() == typeid(float)) { if (filter->type() == typeid(float)) {
float* floatData = originalFilter->data<float>();
for (int i = 0; i < filter->numel(); ++i) {
min = std::min(min, floatData[i]);
max = std::max(max, floatData[i]);
}
float fix_range = (float)((1 << (8 - 1)) - 1);
float float_range = max;
scale = (float_range / fix_range);
framework::Tensor* originalFilter = filter; framework::Tensor* originalFilter = filter;
framework::Tensor* quantFilter = new framework::Tensor(); framework::Tensor* quantFilter = new framework::Tensor();
float* floatData = originalFilter->data<float>();
int8_t* intData = quantFilter->mutable_data<int8_t>(); int8_t* intData = quantFilter->mutable_data<int8_t>();
for (int i = 0; i < filter->numel(); ++i) {
intData[i] = (int8_t)floatData[i] * scale;
}
quantFilter.scale = scale;
// NCHW -> NHWC;
return quantFilter;
} }
return filter;
} }
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -257,7 +257,10 @@ class Tensor { ...@@ -257,7 +257,10 @@ class Tensor {
struct FPGAArgs { struct FPGAArgs {
float scale; float scale;
inline float *scale_pointer() { return &scale; } inline const float *scale_pointer() {
return &scale;
}
}; };
struct FPGAArgs fpga_args() const { struct FPGAArgs fpga_args() const {
......
...@@ -32,10 +32,6 @@ limitations under the License. */ ...@@ -32,10 +32,6 @@ limitations under the License. */
#include "common/threadpool.h" #include "common/threadpool.h"
#endif #endif
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/fpga_quantilization.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
using framework::Variable; using framework::Variable;
...@@ -100,11 +96,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, ...@@ -100,11 +96,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
for (const auto &op : ops) { for (const auto &op : ops) {
op->Init(); op->Init();
} }
#ifdef PADDLE_MOBILE_FPGA
for (const auto &op : ops) {
quantilize_op(op, program_.scope);
}
#endif
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册