未验证 提交 6859850c 编写于 作者: C Chon 提交者: GitHub

Merge pull request #947 from chonwhite/develop

fix:#946
......@@ -145,8 +145,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
}
int PerformBypass(const struct BypassArgs &args) {
#ifdef FPGA_TEST_MODE
DLOG << " layout_type:" << args.layout_type
<< " convert_type:" << args.convert_type;
DLOG << " input_type:" << args.input_data_type
<< " input_layout_type:" << args.input_layout_type;
DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
......
......@@ -25,23 +25,14 @@ limitations under the License. */
namespace paddle_mobile {
namespace fpga {
int open_device();
int close_device();
void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num);
enum DataConvertType {
DATA_NO_CONVERT = 0,
DATA_FP32_TO_FP16 = 1,
DATA_FP16_TO_FP32 = 2,
enum DataType {
DATA_TYPE_FP32 = 1,
DATA_TYPE_FP16 = 0,
};
enum LayoutConvertType {
LAYOUT_NO_CONVERT = 0,
LAYOUT_CHW_TO_HWC = 1,
LAYOUT_HWC_TO_CHW = 2,
enum LayoutType {
LAYOUT_CHW = 1,
LAYOUT_HWC = 0,
};
struct VersionArgs {
......@@ -83,7 +74,6 @@ struct ConvArgs {
bool relu_enabled;
void* sb_address; // scale and bias are interlaced;
void* filter_address;
float* filter_scale_address;
uint32_t filter_num;
uint32_t group_num;
......@@ -122,16 +112,18 @@ struct PoolingArgs {
struct EWAddArgs {
bool relu_enabled;
float const0; // output0 = const0 x input0 + const1 x input1;
float const1;
uint32_t const0; // output0 = const0 x input0 + const1 x input1;
uint32_t const1;
struct ImageInputArgs image0;
struct ImageInputArgs image1;
struct ImageOutputArgs output;
};
struct BypassArgs {
enum DataConvertType convert_type;
enum LayoutConvertType layout_type;
enum DataType input_data_type;
enum DataType output_data_type;
enum LayoutType input_layout_type;
enum LayoutType output_layout_type;
struct ImageInputArgs image;
struct ImageOutputArgs output;
};
......@@ -141,6 +133,11 @@ struct FpgaRegWriteArgs {
uint64_t value;
};
struct FpgaRegReadArgs {
uint64_t address;
uint64_t value;
};
#define IOCTL_FPGA_MAGIC 'FPGA'
#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs)
......@@ -184,6 +181,13 @@ enum FPGA_ERR_TYPE {
//============================== API =============================
int open_device();
int close_device();
void* fpga_malloc(size_t size);
void fpga_free(void* ptr);
void fpga_copy(void* dst, const void* src, size_t num);
int PerformBypass(const struct BypassArgs& args);
int ComputeFpgaConv(const struct WrapperConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args);
......
......@@ -56,8 +56,11 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
auto output_ptr = output->mutable_data<half>();
fpga::BypassArgs args;
args.convert_type = fpga::DATA_FP32_TO_FP16;
args.layout_type = fpga::LAYOUT_NO_CONVERT;
args.input_data_type = fpga::DATA_TYPE_FP32;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = (void *)input_ptr;
args.image.channels = input->dims()[1];
args.image.height = input->dims()[2];
......
......@@ -25,27 +25,34 @@ namespace operators {
template <>
bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
const Tensor *input = param->InputX();
auto input_ptr = input->data<float>();
auto output = param->Out();
auto output_ptr = output->mutable_data<float>();
fpga::BypassArgs args;
args.convert_type = fpga::DATA_FP16_TO_FP32;
args.layout_type = fpga::LAYOUT_NO_CONVERT;
args.image.address = (void *)(input_ptr);
args.image.height = (uint32_t)input->dims()[0];
args.image.width = (uint32_t)input->dims()[1];
args.image.channels = 1;
args.output.address = output_ptr;
param->SetFpgaArgs(args);
if (input->type() == typeid(half)) {
auto input_ptr = input->data<half>();
auto output_ptr = param->Out();
fpga::BypassArgs args;
args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.image.address = (void *)(input_ptr);
args.image.height = (uint32_t)input->dims()[0];
args.image.width = (uint32_t)input->dims()[1];
args.image.channels = 1;
args.output.address = output_ptr;
param->SetFpgaArgs(args);
}
return true;
}
template <>
void SoftmaxKernel<FPGA, float>::Compute(
const SoftmaxParam<FPGA> &param) const {
// SoftmaxCompute<float>(param);
DLOG << "======================================= FPGA SoftMAX "
"===============================================";
const Tensor *in_x = param.InputX();
Tensor *out = param.Out();
auto x_dims = in_x->dims();
out->Resize(x_dims);
math::SoftmaxFuntor<CPU, float>()(in_x, out);
}
template class SoftmaxKernel<FPGA, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册