未验证 提交 17f85a8a 编写于 作者: Z zhangyang0701 提交者: GitHub

Merge pull request #698 from chonwhite/develop

fix:#697
......@@ -35,7 +35,7 @@ namespace fpga {
static int fd = -1;
static const char *device_path = "/dev/fpgadrv0";
static inline int do_ioctl(int req, void *arg) {
static inline int do_ioctl(int req, const void *arg) {
return ioctl(req, (unsigned int64_t)arg);
}
......@@ -58,12 +58,17 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num);
}
int ComputeFpgaConv(const struct ConvArgs &args) { return do_ioctl(21, &args); }
int ComputeFpgaConv(const struct ConvArgs &args) {
return do_ioctl(IOCTL_CONFIG_CONV, &args);
}
int ComputeFpgaPool(const struct PoolingArgs &args) {
return do_ioctl(22, &args);
return do_ioctl(IOCTL_CONFIG_POOLING, &args);
}
int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return do_ioctl(23, &args);
return do_ioctl(IOCTL_CONFIG_EW, &args);
}
int PerformBypass(const struct BypassArgs &args) {
return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
}
} // namespace fpga
......
......@@ -86,12 +86,12 @@ struct ImageOutputArgs {
struct ConvArgs {
bool relu_enabled;
void* bias_address;
void* sb_address; // scale and bias are interlaced;
void* filter_address;
float* filter_scale_address;
uint32_t filter_num;
uint32_t group_num;
void* sb_address; // scale and bias are interlaced;
struct KernelArgs kernel;
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
......@@ -116,6 +116,7 @@ struct EWAddArgs {
struct BypassArgs {
enum DataConvertType convert_type;
enum LayoutConvertType layout_type;
struct ImageInputArgs image;
struct ImageOutputArgs output;
};
......@@ -125,11 +126,6 @@ struct FpgaRegWriteArgs {
uint64_t value;
};
struct FpgaRegReadArgs {
uint64_t address;
uint64_t value;
};
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
......@@ -143,6 +139,7 @@ struct FpgaRegReadArgs {
#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs)
#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs)
#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs)
#define IOCTL_CONFIG_BYPASS _IOW(IOCTL_FPGA_MAGIC, 24, struct BypassArgs)
#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs)
#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs)
......@@ -172,6 +169,7 @@ enum FPGA_ERR_TYPE {
//============================== API =============================
int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct ConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args);
int ComputeFpgaEWAdd(const struct EWAddArgs& args);
......
......@@ -13,55 +13,40 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "common/types.h"
#include "framework/lod_tensor.h"
#include "framework/operator.h"
#include "framework/scope.h"
#include "framework/tensor.h"
namespace paddle_mobile {
bool is_conv(std::string type) {
if (type.compare(G_OP_TYPE_CONV) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_RELU) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_BN_RELU) == 0) {
return true;
}
if (type.compare(G_OP_TYPE_FUSION_CONV_ADD_BN) == 0) {
return true;
}
return false;
}
template <typename Dtype>
void quantilize_op(std::shared_ptr<framework::OperatorBase<Dtype>> op,
std::shared_ptr<framework::Scope> scope) {
if (!is_conv(op.get()->Type())) {
return;
}
framework::Tensor* filter = nullptr;
auto var_vec = op.get()->Inputs().at("Filter");
if (!var_vec.empty()) {
auto var = scope.get()->FindVar(var_vec[0]);
filter = var->template GetMutable<framework::LoDTensor>();
}
framework::Tensor* quantilize_filter(framework::Tensor* filter) {
float scale = 0;
// 32bit filter -> 8bit filter;
float min = 0f;
float max = 0f;
if (filter->type() == typeid(float)) {
float* floatData = originalFilter->data<float>();
for (int i = 0; i < filter->numel(); ++i) {
min = std::min(min, floatData[i]);
max = std::max(max, floatData[i]);
}
float fix_range = (float)((1 << (8 - 1)) - 1);
float float_range = max;
scale = (float_range / fix_range);
framework::Tensor* originalFilter = filter;
framework::Tensor* quantFilter = new framework::Tensor();
float* floatData = originalFilter->data<float>();
int8_t* intData = quantFilter->mutable_data<int8_t>();
}
for (int i = 0; i < filter->numel(); ++i) {
intData[i] = (int8_t)floatData[i] * scale;
}
quantFilter.scale = scale;
// NCHW -> NHWC;
return quantFilter;
}
return filter;
}
} // namespace paddle_mobile
......@@ -257,12 +257,10 @@ class Tensor {
struct FPGAArgs {
float scale;
inline float *scale_pointer() { return &scale; }
inline const float *scale_pointer() const { return &scale; }
};
struct FPGAArgs fpga_args() const {
return fpgaArgs_;
}
const struct FPGAArgs fpga_args() const { return fpgaArgs_; }
#endif
private:
......
......@@ -32,10 +32,6 @@ limitations under the License. */
#include "common/threadpool.h"
#endif
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/fpga_quantilization.h"
#endif
namespace paddle_mobile {
using framework::Variable;
......@@ -100,11 +96,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
for (const auto &op : ops) {
op->Init();
}
#ifdef PADDLE_MOBILE_FPGA
for (const auto &op : ops) {
quantilize_op(op, program_.scope);
}
#endif
}
template <typename Dtype, Precision P>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册