提交 995ef7c5 编写于 作者: Z zhangyang

add test code for data formatting for FPGA track

上级 6859850c
......@@ -14,11 +14,9 @@ limitations under the License. */
#include "api.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <algorithm>
#include <cstring>
#include <memory>
#include "bias_scale.h"
#include "filter.h"
#include "image.h"
......@@ -48,6 +46,7 @@ int open_device() {
// memory management;
void *fpga_malloc(size_t size) {
DLOG << size << " bytes allocated";
#ifdef PADDLE_MOBILE_OS_LINUX
return reinterpret_cast<void *>(
mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
......@@ -181,10 +180,19 @@ void format_image(framework::Tensor *image_tensor) {
void format_ofm(framework::Tensor *ofm_tensor) {
auto dims = ofm_tensor->dims();
auto channel = dims[1], height = dims[2], width = dims[3];
size_t memory_size =
height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(half);
ofm_tensor->reset_data_ptr(fpga_malloc(memory_size));
size_t memory_size = 0;
if (dims.size() == 4) {
auto channel = dims[1], height = dims[2], width = dims[3];
memory_size =
height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(half);
} else if (dims.size() == 2) {
memory_size = align_to_x(dims[1], IMAGE_ALIGNMENT) * sizeof(half);
} else {
DLOG << "Wrong ofm dimension";
}
auto p = fpga_malloc(memory_size);
memset(p, 0, memory_size);
ofm_tensor->reset_data_ptr(p);
}
float filter_find_max(framework::Tensor *filter_tensor) {
......@@ -200,7 +208,7 @@ int get_plit_num(framework::Tensor *filter_tensor) {
return filter::calc_split_num(num, div_capacity);
}
int get_element_num_per_div(framework::Tensor *filter_tensor, int group_num) {
int get_filter_num_per_div(framework::Tensor *filter_tensor, int group_num) {
auto dims = filter_tensor->dims();
auto chw = dims[1] * dims[2] * dims[3];
auto num = dims[0];
......@@ -279,7 +287,7 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
arg->concat_arg.image_out = out_ptr;
const int channel = (int)out->dims()[1];
int element_num_per_div = fpga::get_element_num_per_div(filter, group_num);
int filter_num_per_div = fpga::get_filter_num_per_div(filter, group_num);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
......@@ -297,12 +305,14 @@ void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input,
arg->conv_args[i].image.scale_address = input->scale;
arg->conv_args[i].image.pad_height = (uint32_t)padding_h;
arg->conv_args[i].image.pad_width = (uint32_t)padding_w;
arg->conv_args[i].filter_address = &((int8_t *)filter_ptr)[i * element_num];
arg->conv_args[i].sb_address = &((int8_t *)bs_ptr)[i * element_num];
arg->conv_args[i].filter_scale_address = filter->scale;
arg->conv_args[i].filter_address =
&((int8_t *)filter_ptr)[i * element_num * filter_num_per_div];
arg->conv_args[i].sb_address = &bs_ptr[i * filter_num_per_div * 2];
arg->conv_args[i].filter_num =
(uint32_t)(i == n - 1 ? fpga::get_aligned_filter_num(
channel - (n - 1) * element_num_per_div)
: element_num_per_div);
channel - (n - 1) * filter_num_per_div)
: filter_num_per_div);
if (n > 1) {
arg->conv_args[i].output.scale_address =
......
......@@ -74,6 +74,7 @@ struct ConvArgs {
bool relu_enabled;
void* sb_address; // scale and bias are interlaced;
void* filter_address;
float* filter_scale_address;
uint32_t filter_num;
uint32_t group_num;
......@@ -200,7 +201,7 @@ void format_image(framework::Tensor* image_tensor);
void format_ofm(framework::Tensor* ofm_tensor); // only allocate memory
float filter_find_max(framework::Tensor* filter_tensor);
int get_element_num_per_div(framework::Tensor* filter_tensor, int group_num);
int get_filter_num_per_div(framework::Tensor* filter_tensor, int group_num);
int get_plit_num(framework::Tensor* filter_tensor);
int get_aligned_filter_element_num(int chw);
int get_aligned_filter_num(int num);
......
......@@ -101,7 +101,6 @@ void align_element(char **data_in, int num, int chw) {
int j = 0;
int align_chw = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
if (align_chw != chw) {
printf("align %d \n", align_chw);
char *tmp = *data_in;
char *data_tmp = (char *)fpga_malloc(num * align_chw * sizeof(char));
......
......@@ -23,7 +23,7 @@ template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false;
auto input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>();
auto bias = param->Bias();
auto bias_ptr = bias->data<float>();
auto filter = const_cast<Tensor *>(param->Filter());
......@@ -62,7 +62,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_filter(filter, max_value, param->Groups());
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::get_filter_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
......@@ -80,7 +80,6 @@ void ConvAddBNKernel<FPGA, float>::Compute(
const FusionConvAddBNParam<FPGA> &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddBNKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
......
......@@ -24,7 +24,6 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true;
auto input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
auto filter = const_cast<Tensor *>(param->Filter());
......@@ -58,14 +57,12 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::get_filter_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>();
fpga::WrapperConvArgs conv_arg;
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
......@@ -80,7 +77,6 @@ void ConvAddBNReluKernel<FPGA, float>::Compute(
const FusionConvAddBNReluParam<FPGA> &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddBNReluKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
......
......@@ -23,7 +23,6 @@ template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bool relu_enabled = true;
auto input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>();
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
auto filter = const_cast<Tensor *>(param->Filter());
......@@ -40,14 +39,12 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::get_filter_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>();
fpga::WrapperConvArgs conv_arg;
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
......@@ -62,7 +59,6 @@ void ConvAddReluKernel<FPGA, float>::Compute(
const FusionConvAddReluParam<FPGA> &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvAddReluKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
......
......@@ -24,7 +24,6 @@ template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false;
auto input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>();
auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>();
......@@ -55,14 +54,12 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::get_filter_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>();
fpga::WrapperConvArgs conv_arg;
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
......@@ -77,7 +74,6 @@ void ConvBNKernel<FPGA, float>::Compute(
const FusionConvBNParam<FPGA> &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvBNKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
......
......@@ -23,7 +23,6 @@ template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true;
auto input = const_cast<Tensor *>(param->Input());
auto input_ptr = input->data<float>();
auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>();
......@@ -52,27 +51,12 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups());
auto filter_ptr = filter->data<float>();
int element_num_per_div =
fpga::get_element_num_per_div(filter, param->Groups());
fpga::get_filter_num_per_div(filter, param->Groups());
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_ofm(out);
auto out_ptr = out->mutable_data<float>();
fpga::WrapperConvArgs convArgs;
convArgs.group_num = (uint32_t)param->Groups();
convArgs.split_num = (uint32_t)fpga::get_plit_num(filter);
convArgs.filter_num = (uint32_t)filter->dims()[0];
convArgs.output.address = out_ptr;
convArgs.output.scale_address = out->scale;
convArgs.conv_args = (fpga::ConvArgs *)fpga::fpga_malloc(
convArgs.split_num * sizeof(fpga::ConvArgs));
param->SetFpgaArgs(convArgs);
int element_num = fpga::get_aligned_filter_element_num(
filter->dims()[1] * filter->dims()[2] * filter->dims()[3]);
fpga::WrapperConvArgs conv_arg;
fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled,
......@@ -87,7 +71,6 @@ void ConvBNReluKernel<FPGA, float>::Compute(
const FusionConvBNReluParam<FPGA> &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
template class ConvBNReluKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
......
......@@ -27,13 +27,7 @@ bool DropoutKernel<FPGA, float>::Init(DropoutParam<FPGA> *param) {
template <>
void DropoutKernel<FPGA, float>::Compute(
const DropoutParam<FPGA> &param) const {
// auto *input_x = param.InputX();
// auto *out = param.Out();
// auto input_x_ptr = input_x->data<float>();
// auto out_ptr = out->mutable_data<float>();
// out_ptr = const_cast<float *>(input_x_ptr);
}
const DropoutParam<FPGA> &param) const {}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -21,7 +21,6 @@ template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
bool relu_enabled = true;
auto input_x = const_cast<LoDTensor *>(param->InputX());
auto input_x_ptr = input_x->data<float>();
auto filter = const_cast<Tensor *>(param->InputY());
auto input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
......@@ -47,12 +46,10 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, 1);
auto filter_ptr = filter->data<float>();
int element_num_per_div = fpga::get_element_num_per_div(filter, 1);
int element_num_per_div = fpga::get_filter_num_per_div(filter, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
auto out_ptr = out->mutable_data<float>();
fpga::format_ofm(out);
fpga::WrapperConvArgs conv_arg;
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
......
......@@ -22,7 +22,6 @@ template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false;
auto input_x = const_cast<LoDTensor *>(param->InputX());
auto input_x_ptr = input_x->data<float>();
auto filter = const_cast<Tensor *>(param->InputY());
const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
......@@ -48,12 +47,10 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, 1);
auto filter_ptr = filter->data<float>();
int element_num_per_div = fpga::get_element_num_per_div(filter, 1);
int element_num_per_div = fpga::get_filter_num_per_div(filter, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
auto out_ptr = out->mutable_data<float>();
fpga::format_ofm(out);
fpga::WrapperConvArgs conv_arg;
fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0,
......
......@@ -50,9 +50,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
template <>
void PoolKernel<FPGA, float>::Compute(const PoolParam<FPGA> &param) const {
#ifdef PADDLE_MOBILE_FPGA
fpga::ComputeFpgaPool(param.FpgaArgs());
#endif
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -55,7 +55,6 @@ void SoftmaxKernel<FPGA, float>::Compute(
math::SoftmaxFuntor<CPU, float>()(in_x, out);
}
template class SoftmaxKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
......
......@@ -22,7 +22,7 @@ namespace fpga = paddle_mobile::fpga;
using std::cout;
using std::endl;
int main() {
void test_format_image() {
std::vector<int> dims{1, 1, 3, 3};
std::vector<float> elements{1, 2, 3, 4, 5, 6, 7, 8, 9};
frame::DDim ddim = frame::make_ddim(dims);
......@@ -44,6 +44,50 @@ int main() {
cout << endl;
auto dd = image.dims();
cout << dims[0] << dims[1] << dims[2] << dims[3] << endl;
}
void test_fill_conv_arg() {
Tensor input, out, filter;
DLOG << "Setup input";
SetupTensor<int16_t>(&input, {1, 250, 32, 30}, static_cast<int16_t>(0),
static_cast<int16_t>(1));
DLOG << "Setup filter";
SetupTensor<float>(&filter, {1001, 250, 3, 3}, static_cast<float>(0),
static_cast<float>(1));
DLOG << "Setup output";
SetupTensor<int16_t>(&out, {1, 1001, 32, 30}, static_cast<int16_t>(0),
static_cast<int16_t>(1));
auto bs_ptr = (float *)fpga::fpga_malloc(2 * 1001 * sizeof(float));
DLOG << "find max";
float max_value = fpga::filter_find_max(&filter);
DLOG << "format filter";
fpga::format_filter(&filter, max_value, 1);
DLOG << "format bs_ptr";
int element_num_per_div = fpga::get_filter_num_per_div(&filter, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, 1001);
DLOG << "format ofm";
fpga::format_ofm(&out);
DLOG << "Build arg";
fpga::WrapperConvArgs arg;
fpga::fill_conv_arg(&arg, &input, &out, &filter, true, 1, 1, 1, 1, 1, bs_ptr);
DLOG << "splitNum: " << arg.split_num << " group_num:" << arg.group_num
<< " filter_num:" << arg.filter_num;
for (int i = 0; i < arg.split_num; i++) {
DLOG << arg.conv_args[i].filter_num << " " << arg.conv_args[i].sb_address
<< " " << arg.conv_args[i].filter_address << " "
<< arg.conv_args[i].filter_scale_address;
}
}
int main() {
test_format_image();
test_fill_conv_arg();
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册