提交 06eead1c 编写于 作者: Z zhaojiaying01

resolve the conflict

......@@ -346,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) {
auto filter_pad_width_mul_channel =
args.image.pad_width * args.image.channels;
auto image_amount_per_row_multi_win_first =
image_amount_per_row * (4 * args.kernel.stride_h - args.image.pad_height);
image_amount_per_row * (2 * args.kernel.stride_h - args.image.pad_height);
auto image_amount_per_row_multi_win =
image_amount_per_row * (4 * args.kernel.stride_h);
image_amount_per_row * (2 * args.kernel.stride_h);
auto image_block_num = block_num;
auto image_block_len =
......@@ -375,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) {
(512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2)
? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2)
: 0;
auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
// auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto cmd = 0UL | USE_BIAS;
auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) |
((args.deconv_tx_param.sub_conv_num) << 16) |
......@@ -413,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) {
void expand_EW_arg(EWAddArgs *arg) {
EWAddArgs args = *arg;
uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
// uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
uint64_t cmd = 0;
uint64_t datalen = (uint64_t)args.image0.width *
(uint64_t)args.image0.height *
(uint64_t)args.image0.channels;
......@@ -441,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) {
void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float *bs_ptr) {
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) {
auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>();
auto out_ptr = out->data<float>();
......@@ -488,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter->dims()[3]));
for (int i = 0; i < n; i++) {
arg->conv_arg[i].relu_enabled = relu_enabled;
// arg->conv_arg[i].relu_enabled = relu_enabled;
arg->conv_arg[i].output.activation.activation_type = activation_enable;
arg->conv_arg[i].output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->conv_arg[i].group_num = (uint32_t)group_num;
arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h;
arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w;
......@@ -560,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w,
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) {
auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>();
......@@ -687,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
}
for (int j = 0; j < split_num; ++j) {
arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
// arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
activation_enable;
arg->split_conv_args[i]
->conv_arg[j]
.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num;
arg->split_conv_args[i]->conv_arg[j].kernel.width =
......@@ -800,13 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float *bias_ptr) {
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>();
arg->sub_conv_num = 1;
arg->relu_enabled = relu_enabled;
// arg->relu_enabled = relu_enabled;
arg->output.activation.activation_type = activation_enable;
arg->output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
arg->bias_address = bias_ptr;
arg->filter_address = filter_ptr;
arg->kernel.height = (uint32_t)filter->dims()[2];
......@@ -826,8 +844,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float *bias_ptr) {
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>();
......@@ -884,7 +904,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>());
arg->dw_conv_args[i]->sub_conv_num = sub_conv_num;
arg->dw_conv_args[i]->relu_enabled = relu_enabled;
// arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg->dw_conv_args[i]->output.activation.activation_type = activation_enable;
arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->dw_conv_args[i]->bias_address = bias_ptr;
arg->dw_conv_args[i]->filter_address =
......
......@@ -47,20 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width,
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float* bs_ptr);
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float* bs_ptr);
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float* bias_ptr);
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bias_ptr);
void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float* bs_ptr);
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void format_deconv_filter(framework::Tensor* filter_tensor, float max_value,
int group_num, int stride);
......
......@@ -19,7 +19,6 @@ limitations under the License. */
#include "fpga/V1/filter.h"
// #include "filter.h"
#include "fpga/V1/api.h"
// #include "fpga_api.h"
namespace paddle_mobile {
namespace fpga {
......
......@@ -63,6 +63,7 @@ using namespace std; // NOLINT
#define REG_TIMER_COUNTER 0x070
#define REG_SCALE_PARAMETER 0x080
#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090
#define REG_FLASH_CMD 0x200
#define REG_FLASH_DATA 0x208
......@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "======Compute Basic Conv======";
DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address
// DLOG << " relu_enabled:" << args.relu_enabled
DLOG << " sb_address:" << args.sb_address
<< " filter_address:" << args.filter_address
<< " filter_num:" << args.filter_num
<< " group_num:" << args.group_num;
......@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef PADDLE_MOBILE_ZU5
int ret = 0;
uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) {
ret = -EIO;
......@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
}
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(
((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32),
......@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) {
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
......@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
image_physical_address = vaddr_to_paddr_driver(args.image.address);
output_physical_address = vaddr_to_paddr_driver(args.output.address);
uint32_t output_height = (uint32_t)(
......@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
return ret;
}
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
......@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
......@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaEWAdd===========";
DLOG << " relu_enabled:" << args.relu_enabled
<< " const0:" << fp16_2_fp32(int16_t(args.const0))
// DLOG << " relu_enabled:" << args.relu_enabled
DLOG << " const0:" << fp16_2_fp32(int16_t(args.const0))
<< " const1:" << fp16_2_fp32(int16_t(args.const1));
DLOG << " image0_address:" << args.image0.address
<< " image0_scale_address:" << args.image0.scale_address
......@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef PADDLE_MOBILE_ZU5
int ret = 0;
uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) {
ret = -EIO;
......@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return ret;
}
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR);
reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR);
......@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
#endif
......@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) {
uint8_t data_cell_in = 0;
uint8_t data_cell_out = 0;
int ret = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
datalen = (uint64_t)args.image.width * (uint64_t)args.image.height *
(uint64_t)args.image.channels;
datalen = align_to_x(datalen, 16);
......@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) {
(data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) {
return -EFAULT;
}
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) {
ret = -EIO;
......@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
}
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR);
reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR);
......@@ -585,6 +666,7 @@ int PerformBypass(const struct BypassArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
#endif
......@@ -808,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) {
int ComputeDWConv(const struct DWconvArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeDWConv===========";
DLOG << " mode:" << args.relu_enabled;
// DLOG << " mode:" << args.relu_enabled;
DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
......@@ -831,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) {
uint64_t output_scale = 0;
uint64_t timer_cnt = 0;
int ret = 0;
uint64_t cmd = args.relu_enabled;
// uint64_t cmd = args.relu_enabled;
uint64_t cmd = 0;
uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0;
uint64_t filter_physical_address = 0;
......
......@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
unsigned int nr = (unsigned int)_nr;
int ret = 0;
uint64_t a_size = FPGA_PAGE_SIZE * nr;
DLOG << a_size;
pthread_mutex_lock(&memory->mutex);
......@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) {
void fpga_copy_driver(void *dest, const void *src, size_t num) {
uint64_t i;
DLOG << "dest:" << dest << " src:" << src << " size:" << num;
for (i = 0; i < num; i++) {
*((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT
}
......
......@@ -29,7 +29,7 @@ namespace driver {
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
#define FPGA_REG_PHY_ADDR 0xa0000000
#define FPGA_REG_PHY_ADDR 0x80000000
#define FPGA_REG_SIZE 0x1000
#define FPGA_MEM_PHY_ADDR 0x40000000
#define FPGA_MEM_SIZE 0x80000000
......
......@@ -45,6 +45,7 @@ enum ActivationType {
LEAKYRELU = 1,
SIGMOID = 2,
TANH = 3,
SOFTMAX = 4,
};
struct ActivationArgs {
......@@ -132,7 +133,7 @@ struct DeconvTxParm {
#endif
struct ConvArgs {
bool relu_enabled;
// bool relu_enabled;
void* sb_address; // scale and bias
void* filter_address;
float* filter_scale_address;
......@@ -198,7 +199,7 @@ struct PoolingArgs {
};
struct EWAddArgs {
bool relu_enabled;
// bool relu_enabled;
uint32_t const0; // output0 = const0 x input0 + const1 x input1;
uint32_t const1;
struct ImageInputArgs image0;
......@@ -230,7 +231,7 @@ struct DeconvArgs {
};
struct DWconvArgs {
uint32_t sub_conv_num;
bool relu_enabled;
// bool relu_enabled;
void* bias_address;
void* filter_address;
struct KernelArgs kernel;
......
......@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6);
#ifdef SIGMOID_OP
DEFINE_ACTIVATION_INFERSHAPE(Sigmoid);
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(sigmoid, ops::SigmoidOp);
#endif
#endif // SIGMOID_OP
#ifdef TANH_OP
......
......@@ -32,6 +32,7 @@ void ConvBNReluBasic(const FusionConvBNReluParam<CPU> &param) {
Tensor new_scale = *param.NewScale();
Tensor *output = param.Output();
output->mutable_data<float>();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
......
......@@ -32,6 +32,7 @@ void DWConvBNReluBasic(const FusionDWConvBNReluParam<CPU> &param) {
Tensor new_scale = *param.NewScale();
Tensor *output = param.Output();
output->mutable_data<float>();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
......
......@@ -22,7 +22,10 @@ namespace operators {
template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias();
......@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
......
......@@ -23,7 +23,10 @@ namespace operators {
template <>
bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
if (groups == channel) {
fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
fpga::DWconvArgs dwconv_arg = {0};
fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, relu_enabled,
strides[0], strides[1], paddings[0], paddings[1],
new_bias_ptr);
fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, strides[0], strides[1],
paddings[0], paddings[1], new_bias_ptr);
param->SetFpgaArgs(dwconv_arg);
} else {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), strides[0], strides[1], paddings[0],
paddings[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(), strides[0],
strides[1], paddings[0], paddings[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
}
return true;
......
......@@ -21,7 +21,10 @@ namespace operators {
template <>
bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -21,7 +21,10 @@ namespace operators {
template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -22,7 +22,10 @@ namespace operators {
template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output();
......@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -22,7 +22,10 @@ namespace operators {
template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output();
......@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -23,7 +23,10 @@ namespace operators {
template <>
bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -53,17 +56,18 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled,
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
activation_enable, leaky_relu_negative_slope,
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
}
......
......@@ -24,7 +24,10 @@ namespace operators {
template <>
bool DeconvAddReluKernel<FPGA, float>::Init(
FusionDeconvAddReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -54,17 +57,18 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, relu_enabled,
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
activation_enable, leaky_relu_negative_slope,
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
}
return true;
......
......@@ -20,7 +20,10 @@ namespace operators {
template <>
bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out();
......@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled;
// ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr;
......
......@@ -21,7 +21,10 @@ namespace operators {
template <>
bool ElementwiseAddReluKernel<FPGA, float>::Init(
ElementwiseAddReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out();
......@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled;
// ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr;
......
......@@ -19,12 +19,34 @@ namespace operators {
template <>
bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
Tensor *output = param->Out();
// fpga::format_fp16_ofm(output);
return true;
}
template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
/*auto input =
reinterpret_cast<Tensor *>(const_cast<Tensor *>(param.InputX()));
fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param.Out();
auto output_ptr = output->data<float>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] :
1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3]
: 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address
= output_ptr; args.output.scale_address = output->scale;
fpga::PerformBypass(args);*/
}
template class FetchKernel<FPGA, float>;
......
......@@ -20,7 +20,10 @@ namespace operators {
template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<Tensor *>(param->InputY());
const Tensor *input_z = param->InputZ();
......@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_fp16_ofm(out);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, 0, bs_ptr);
fpga::fill_split_arg(&conv_arg, input_x, out, filter, activation_enable,
leaky_relu_negative_slope, 1, 1, 1, 0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -22,6 +22,12 @@ namespace operators {
template <>
bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) {
param->Out()->ShareDataWith(*param->InputX());
const int in_n = param->InputX()->dims()[0];
const int in_c = param->InputX()->dims()[1];
const int in_h = param->InputX()->dims()[2];
const int in_w = param->InputX()->dims()[3];
auto out = param->Out();
out->Resize(framework::make_ddim({in_n, in_c * in_h * in_w}));
return true;
}
......
......@@ -15,73 +15,41 @@ limitations under the License. */
#ifdef SIGMOID_OP
#include "operators/kernel/activation_kernel.h"
namespace paddle_mobile {
namespace operators {
using framework::DDim;
using framework::Tensor;
template <>
bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>();
auto out = param->Out();
fpga::format_fp32_ofm(out);
fpga::format_fp16_ofm(out);
auto float_input = new Tensor;
if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]});
} else if (input->dims().size() == 4) {
float_input->mutable_data<float>(
{1, input->dims()[2], input->dims()[3], input->dims()[1]});
} else {
DLOG << "wrong dimension of softmax input";
}
fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
args.output_layout_type = fpga::LAYOUT_CHW;
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr;
args.image.height =
(input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1;
args.image.width =
(input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1;
args.image.channels = (uint32_t)input->dims()[1];
args.output.address = float_input->data<float>();
args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input);
args.output.address = out->data<float>();
args.output.scale_address = out->scale;
args.output.activation.activation_type = activation_enable;
args.output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
param->SetFpgaArgs(args);
return true;
}
template <typename T>
T Sigmoid(const T a) {
T tmp = -1.0f * a;
return (1.0 / (1.0 + exp(tmp)));
}
template <typename T>
void sigmoidFuntor(Tensor *input, Tensor *output) {
auto *input_ptr = input->data<T>();
auto *output_ptr = output->mutable_data<T>();
for (int i = 0; i < input->numel(); i++) {
*(output_ptr + i) = Sigmoid<T>(*(input_ptr + i));
}
}
template <>
void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
Tensor *in_x = param.FloatInput();
Tensor *out = param.Out();
fpga::PerformBypass(param.FpgaArgs());
fpga::fpga_invalidate((void *)in_x->data<float>(), // NOLINT
in_x->numel() * sizeof(float));
// TODO: In general case, 0 should be squeezed before softmax input // NOLINT
sigmoidFuntor<float>(in_x, out);
fpga::fpga_flush(out->data<float>(), out->memory_size());
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input_ptr = input->data<float>();
auto out = param->Out();
fpga::format_fp32_ofm(out);
auto float_input = new Tensor;
if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]});
......@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
} else {
DLOG << "wrong dimension of softmax input";
}
fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
......@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input);
param->SetFpgaArgs(args);
return true;
}
......
......@@ -564,7 +564,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const framework::Tensor *input,
#if __ARM_NEON
const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>();
float *output_data = output->data<float>();
float *output_data = output->mutable_data<float>();
const float *newscale_data = new_scale->data<float>();
const float *newbias_data = new_bias->data<float>();
......@@ -1309,7 +1309,7 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
#if __ARM_NEON
const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>();
float *output_data = output->data<float>();
float *output_data = output->mutable_data<float>();
const float *bias_data;
if (if_bias) {
bias_data = bias->data<float>();
......@@ -1729,7 +1729,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const framework::Tensor *input,
const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>();
float *output_data = output->data<float>();
float *output_data = output->mutable_data<float>();
const float *newscale_data = new_scale->data<float>();
const float *newbias_data = new_bias->data<float>();
......@@ -1978,6 +1978,7 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
const int output_width = static_cast<int>(output->dims()[3]);
const int inhxw = input_height * input_width;
const int outhxw = output_height * output_width;
output->mutable_data<float>();
float32x4_t zero = vdupq_n_f32(0.0);
for (int b = 0; b < batch_size; b++) {
......
......@@ -1081,14 +1081,9 @@ class SigmoidParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args;
public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
......@@ -1214,6 +1209,20 @@ class FetchParam : public OpParam {
private:
RType *input_x_;
Tensor *out_;
#ifdef PADDLE_MOBILE_FPGA
private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args;
public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
};
#ifdef FILL_CONSTANT_OP
......
......@@ -337,8 +337,8 @@ if (NOT FOUND_MATCH)
target_link_libraries(test-genet paddle-mobile)
# gen test
ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid paddle-mobile)
ADD_EXECUTABLE(test-sigmoid-op operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
......@@ -408,14 +408,14 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-ocr net/test_ocr.cpp test_helper.h test_include.h)
target_link_libraries(test-ocr paddle-mobile)
ADD_EXECUTABLE(test-sequence-expand operators/test_sequence_expand_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-expand paddle-mobile)
ADD_EXECUTABLE(test-sequence-expand-op operators/test_sequence_expand_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-expand-op paddle-mobile)
ADD_EXECUTABLE(test-sequence-pool operators/test_sequence_pool_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-pool paddle-mobile)
ADD_EXECUTABLE(test-sequence-pool-op operators/test_sequence_pool_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-pool-op paddle-mobile)
ADD_EXECUTABLE(test-sequence-softmax operators/test_sequence_softmax_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-softmax paddle-mobile)
ADD_EXECUTABLE(test-sequence-softmax-op operators/test_sequence_softmax_op.cpp test_helper.h test_include.h)
target_link_libraries(test-sequence-softmax-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-vgg16ssd net/test_vgg16ssd.cpp test_helper.h test_include.h)
......@@ -445,4 +445,9 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-is-empty-op operators/test_is_empty_op.cpp test_helper.h test_include.h)
target_link_libraries(test-is-empty-op paddle-mobile)
ADD_EXECUTABLE(test-conv-bn-relu-op operators/test_conv_bn_relu_op.cpp test_helper.h test_include.h)
target_link_libraries(test-conv-bn-relu-op paddle-mobile)
ADD_EXECUTABLE(test-dwconv-bn-relu-op operators/test_dwconv_bn_relu_op.cpp test_helper.h test_include.h)
target_link_libraries(test-dwconv-bn-relu-op paddle-mobile)
endif ()
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/fusion_conv_bn_relu_op.h"
namespace paddle_mobile {
// Reference convolution from Caffe for checking results.
// accumulate through explicit loops over input, output, and filters.
template <typename Itype, typename Otype, int Kernel, int Pad, int Stride>
int TestConvBnReluOp(int in_channels, int in_height, int in_width,
int out_channels, int groups, std::string opname) {
int kernel_h = Kernel;
int kernel_w = Kernel;
int pad_h = Pad;
int pad_w = Pad;
int stride_h = Stride;
int stride_w = Stride;
int dilation_h = 1;
int dilation_w = 1;
int batch_size = 1;
int input_c = in_channels;
int input_h = in_height;
int input_w = in_width;
int output_c = out_channels;
framework::DDim input_shape =
framework::make_ddim({batch_size, input_c, input_h, input_w});
framework::DDim filter_shape =
framework::make_ddim({output_c, input_c / groups, kernel_h, kernel_w});
framework::DDim shape = framework::make_ddim({output_c});
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["Input"] = std::vector<std::string>({"input"});
inputs["Filter"] = std::vector<std::string>({"filter"});
outputs["Out"] = std::vector<std::string>({"output"});
inputs["Mean"] = std::vector<std::string>({"input_mean"});
inputs["Variance"] = std::vector<std::string>({"input_variance"});
inputs["Scale"] = std::vector<std::string>({"input_scale"});
inputs["Bias"] = std::vector<std::string>({"input_bias"});
auto input_var = scope.get()->Var("input");
auto input = input_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(input, input_shape, -20.0, 20.0);
auto filter_var = scope.get()->Var("filter");
auto filter = filter_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(filter, filter_shape, -20, 20);
auto input_mean_var = scope.get()->Var("input_mean");
auto input_mean = input_mean_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(input_mean, shape, -10.0, 10.0);
auto vari_var = scope.get()->Var("input_variance");
auto vari = vari_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(vari, shape, -10.0, 10.0);
auto scale_var = scope.get()->Var("input_scale");
auto scale = scale_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(scale, shape, -10.0, 10.0);
auto input_bias_var = scope.get()->Var("input_bias");
auto input_bias = input_bias_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(input_bias, shape, -10.0, 10.0);
auto output_var = scope.get()->Var("output");
framework::AttributeMap attrs;
attrs["strides"].Set<vector<int>>(std::vector<int>({stride_h, stride_w}));
attrs["paddings"].Set<vector<int>>(std::vector<int>({pad_h, pad_w}));
attrs["dilations"].Set<vector<int>>(
std::vector<int>({dilation_h, dilation_w}));
attrs["groups"].Set<int>(groups);
attrs["epsilon"].Set<float>(1e-6);
attrs["momentum"].Set<float>(0.f);
auto *op = new operators::FusionConvBNReluOp<CPU, float>(
"fusion_conv_bn_relu", inputs, outputs, attrs, scope);
op->InferShape();
op->Init();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time1 = time();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time2 = time();
std::ofstream out_file("./out_conv.txt", std::ios::app);
out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms"
<< std::endl;
out_file.close();
delete op;
return 0;
}
} // namespace paddle_mobile
int main(int argc, char *argv[]) {
// kernel = 3, pad = 1, stride = 2
paddle_mobile::TestConvBnReluOp<float, float, 3, 1, 2>(3, 48, 48, 16, 1,
"conv_bn_relu");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(16, 24, 24, 8, 1,
"depthwise_seperable");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(8, 24, 24, 24, 1,
"MBConv_3x3_conv1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(24, 24, 24, 8, 1,
"MBConv_3x3_pw1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(8, 24, 24, 24, 1,
"MBConv_3x3_conv2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(24, 24, 24, 8, 1,
"MBConv_3x3_pw2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(8, 24, 24, 24, 1,
"MBConv_3x3_conv3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(24, 12, 12, 16, 1,
"MBConv_3x3_pw3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
48, 12, 12, 16, 1, "MBConv_5x5_stage1_pw1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
48, 12, 12, 16, 1, "MBConv_5x5_stage1_pw2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
16, 12, 12, 48, 1, "MBConv_5x5_stage1_conv3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
48, 6, 6, 32, 1, "MBConv_5x5_stage1_pw3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
192, 6, 6, 32, 1, "MBConv_5x5_stage2_pw1");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
192, 6, 6, 32, 1, "MBConv_5x5_stage2_pw2");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
32, 6, 6, 192, 1, "MBConv_5x5_stage2_conv3");
// kernel = 1, pad = 0, stride = 1
paddle_mobile::TestConvBnReluOp<float, float, 1, 0, 1>(
192, 6, 6, 64, 1, "MBConv_5x5_stage2_pw3");
return 0;
}
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/conv_op.h"
......@@ -209,10 +210,10 @@ int TestConvOp(int in_channels, int in_height, int in_width, int out_channels,
// PADDLE_MOBILE_ENFORCE(std::abs(gap / (output_data[i] + 1e-5)) < 1e-3,
// "output[%d] = %d, output_cmp[%d] = %d", i,
// output_data[i], i, output_cmp_data[i]);
if (std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) {
LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i
<< "] = " << output_cmp_data[i];
if (gap > 1e-2 && std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) {
std::cerr << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i << "] = " << output_cmp_data[i]
<< std::endl;
exit(1);
}
}
......@@ -222,94 +223,131 @@ int TestConvOp(int in_channels, int in_height, int in_width, int out_channels,
} // namespace paddle_mobile
int main(int argc, char *argv[]) {
if (argc < 5) {
LOG(paddle_mobile::kLOG_INFO)
<< "Usage:\n"
<< " ./test-int8-conv-op in_channels in_height in_width out_channels "
"[groups]\n"
<< " params:\n"
<< " -in_channels: int, input image's channels\n"
<< " -in_height: int, input image's height\n"
<< " -in_width: int, input image's width\n"
<< " -out_channels: int, conv output channels\n";
return 1;
}
int in_channels = atoi(argv[1]);
int in_height = atoi(argv[2]);
int in_width = atoi(argv[3]);
int out_channels = atoi(argv[4]);
int groups = 1;
if (argc == 6) {
groups = atoi(argv[5]);
}
int TestAll(const int in_channels, const int in_height, const int in_width,
const int out_channels, const int groups) {
std::cerr << "in_channels=" << in_channels << ", in_height=" << in_height
<< ", in_width=" << in_width << ", out_channels=" << out_channels
<< ", groups=" << groups << std::endl;
// // kernel = 3, pad = 0, stride = 1
// std::cerr << "float, kernel=3, pad=0, stride=1" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 0, 1>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 1, stride = 1
// std::cerr << "float, kernel=3, pad=1, stride=1" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 1, 1>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 2, stride = 1
// std::cerr << "float, kernel=3, pad=2, stride=1" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 2, 1>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 5, stride = 1
// std::cerr << "float, kernel=3, pad=5, stride=1" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 5, 1>(
// in_channels, in_height, in_width, out_channels, groups);
//
// // kernel = 3, pad = 0, stride = 2
// std::cerr << "float, kernel=3, pad=0, stride=2" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 0, 2>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 1, stride = 2
// std::cerr << "float, kernel=3, pad=1, stride=2" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 1, 2>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 2, stride = 2
// std::cerr << "float, kernel=3, pad=2, stride=2" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 2, 2>(
// in_channels, in_height, in_width, out_channels, groups);
// // kernel = 3, pad = 5, stride = 2
// std::cerr << "float, kernel=3, pad=5, stride=2" << std::endl;
// paddle_mobile::TestConvOp<float, float, 3, 5, 2>(
// in_channels, in_height, in_width, out_channels, groups);
#ifndef __aarch64__
// kernel = 3, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=0, stride=1";
std::cerr << "int8, kernel=3, pad=0, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 0, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=1, stride=1";
std::cerr << "int8, kernel=3, pad=1, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 1, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 2, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=2, stride=1";
std::cerr << "int8, kernel=3, pad=2, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 2, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 5, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=5, stride=1";
std::cerr << "int8, kernel=3, pad=5, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 5, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 0, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=0, stride=2";
std::cerr << "int8, kernel=3, pad=0, stride=2" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 0, 2>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 1, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=1, stride=2";
std::cerr << "int8, kernel=3, pad=1, stride=2" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 1, 2>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 2, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=2, stride=2";
std::cerr << "int8, kernel=3, pad=2, stride=2" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 2, 2>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 3, pad = 5, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=5, stride=2";
std::cerr << "int8, kernel=3, pad=5, stride=2" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 5, 2>(
in_channels, in_height, in_width, out_channels, groups);
#endif // __aarch64__
// kernel = 5, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=0, stride=1";
std::cerr << "float, kernel=5, pad=0, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 5, 0, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=1, stride=1";
std::cerr << "float, kernel=5, pad=1, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 5, 1, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 2, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=2, stride=1";
std::cerr << "float, kernel=5, pad=2, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 5, 2, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 5, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=5, stride=1";
std::cerr << "float, kernel=5, pad=5, stride=1" << std::endl;
paddle_mobile::TestConvOp<float, float, 5, 5, 1>(
in_channels, in_height, in_width, out_channels, groups);
#ifndef __aarch64__
// kernel = 5, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=0, stride=1";
std::cerr << "int8, kernel=5, pad=0, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 0, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=1, stride=1";
std::cerr << "int8, kernel=5, pad=1, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 1, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 2, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=2, stride=1";
std::cerr << "int8, kernel=5, pad=2, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 2, 1>(
in_channels, in_height, in_width, out_channels, groups);
// kernel = 5, pad = 5, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=5, stride=1";
std::cerr << "int8, kernel=5, pad=5, stride=1" << std::endl;
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 5, 1>(
in_channels, in_height, in_width, out_channels, groups);
#endif // __aarch64__
return 0;
}
int main() {
TestAll(1, 5, 5, 1, 1);
TestAll(1, 5, 5, 10, 1);
TestAll(10, 5, 5, 10, 10);
TestAll(5, 33, 33, 5, 1);
TestAll(5, 33, 33, 13, 1);
TestAll(13, 33, 33, 13, 13);
TestAll(5, 33, 13, 5, 1);
TestAll(5, 33, 13, 13, 1);
TestAll(13, 33, 13, 13, 13);
return 0;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/fusion_dwconv_bn_relu_op.h"
namespace paddle_mobile {
template <typename Itype, typename Otype, int Kernel, int Pad, int Stride>
int TestDWConvAddBnReluOp(int in_channels, int in_height, int in_width,
int out_channels, int groups, std::string opname) {
int kernel_h = Kernel;
int kernel_w = Kernel;
int pad_h = Pad;
int pad_w = Pad;
int stride_h = Stride;
int stride_w = Stride;
int dilation_h = 1;
int dilation_w = 1;
int batch_size = 1;
int input_c = in_channels;
int input_h = in_height;
int input_w = in_width;
int output_c = out_channels;
framework::DDim input_shape =
framework::make_ddim({batch_size, input_c, input_h, input_w});
framework::DDim filter_shape =
framework::make_ddim({output_c, input_c / groups, kernel_h, kernel_w});
framework::DDim shape = framework::make_ddim({output_c});
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["Input"] = std::vector<std::string>({"input"});
inputs["Filter"] = std::vector<std::string>({"filter"});
inputs["Mean"] = std::vector<std::string>({"mean"});
inputs["Variance"] = std::vector<std::string>({"variance"});
inputs["Scale"] = std::vector<std::string>({"scale"});
inputs["Bias"] = std::vector<std::string>({"bias"});
outputs["Out"] = std::vector<std::string>({"output"});
auto input_var = scope.get()->Var("input");
auto input = input_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(input, input_shape, -20.0, 20.0);
auto filter_var = scope.get()->Var("filter");
auto filter = filter_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(filter, filter_shape, -20, 20);
auto mean_var = scope.get()->Var("mean");
auto mean = mean_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(mean, shape, -10.0, 10.0);
auto vari_var = scope.get()->Var("variance");
auto vari = vari_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(vari, shape, -10.0, 10.0);
auto scale_var = scope.get()->Var("scale");
auto scale = scale_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(scale, shape, -10.0, 10.0);
auto bias_var = scope.get()->Var("bias");
auto bias = bias_var->template GetMutable<framework::LoDTensor>();
SetupTensor<float>(bias, shape, -10.0, 10.0);
auto output_var = scope.get()->Var("output");
framework::AttributeMap attrs;
attrs["strides"].Set<vector<int>>(std::vector<int>({stride_h, stride_w}));
attrs["paddings"].Set<vector<int>>(std::vector<int>({pad_h, pad_w}));
attrs["dilations"].Set<vector<int>>(
std::vector<int>({dilation_h, dilation_w}));
attrs["groups"].Set<int>(groups);
attrs["epsilon"].Set<float>(1e-6);
attrs["momentum"].Set<float>(0.f);
auto *op = new operators::FusionDWConvBNReluOp<CPU, float>(
"fusion_dwconv_bn_relu", inputs, outputs, attrs, scope);
op->InferShape();
op->Init();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time1 = time();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time2 = time();
std::ofstream out_file("./out_dwconv.txt", std::ios::app);
out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms"
<< std::endl;
out_file.close();
delete op;
return 0;
}
} // namespace paddle_mobile
int main(int argc, char *argv[]) {
// kernel = 3, pad = 1, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 3, 1, 1>(
16, 24, 24, 16, 16, "depthwise_seperable");
// kernel = 3, pad = 1, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 3, 1, 1>(
24, 24, 24, 24, 24, "MBConv_3x3_dw1");
// kernel = 3, pad = 1, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 3, 1, 1>(
24, 24, 24, 24, 24, "MBConv_3x3_dw2");
// kernel = 3, pad = 1, stride = 2
paddle_mobile::TestDWConvAddBnReluOp<float, float, 3, 1, 2>(
24, 24, 24, 24, 24, "MBConv_3x3_dw3");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw1");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw2");
// kernel = 5, pad = 2, stride = 2
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 2>(
48, 12, 12, 48, 48, "MBConv_5x5_stage1_dw3");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw1");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw2");
// kernel = 5, pad = 2, stride = 1
paddle_mobile::TestDWConvAddBnReluOp<float, float, 5, 2, 1>(
192, 6, 6, 192, 192, "MBConv_5x5_stage2_dw3");
return 0;
}
......@@ -12,18 +12,89 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/gru_op.h"
int main() {
paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_nlp);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
namespace paddle_mobile {
Executor4Test<paddle_mobile::CPU,
paddle_mobile::operators::GruOp<paddle_mobile::CPU, float>>
executor(program, "gru");
template <typename Itype, typename Otype>
int TestGruOp(int in_channels, int out_channels, std::string opname) {
int input_c = in_channels;
int output_c = out_channels;
paddle_mobile::framework::LoD lod{{0, input_c}};
int batch_size = lod.size();
framework::DDim input_shape = framework::make_ddim({input_c, output_c * 3});
framework::DDim weight_shape = framework::make_ddim({output_c, output_c * 3});
framework::DDim h0_shape = framework::make_ddim({batch_size, output_c});
framework::DDim bias_shape = framework::make_ddim({batch_size, output_c * 3});
VariableNameMap inputs;
VariableNameMap outputs;
auto scope = std::make_shared<framework::Scope>();
inputs["Input"] = std::vector<std::string>({"input"});
inputs["Weight"] = std::vector<std::string>({"weight"});
inputs["H0"] = std::vector<std::string>({"h0"});
inputs["Bias"] = std::vector<std::string>({"bias"});
outputs["BatchGate"] = std::vector<std::string>({"output_batch_gate"});
outputs["BatchResetHiddenPrev"] =
std::vector<std::string>({"output_batch_reset_hidden_prev"});
outputs["BatchHidden"] = std::vector<std::string>({"output_batch_hidden"});
outputs["Hidden"] = std::vector<std::string>({"output_hidden"});
auto input_var = scope.get()->Var("input");
auto input = input_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(input, input_shape, -127, 127);
input->set_lod(lod);
auto weight_var = scope.get()->Var("weight");
auto weight = weight_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(weight, weight_shape, -127, 127);
auto h0_var = scope.get()->Var("h0");
auto h0 = h0_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(h0, h0_shape, -127, 127);
auto bias_var = scope.get()->Var("bias");
auto bias = bias_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(bias, bias_shape, -127, 127);
auto batch_gate_var = scope.get()->Var("output_batch_gate");
auto batch_reset_hidden_prev_var =
scope.get()->Var("output_batch_reset_hidden_prev");
auto batch_hidden_var = scope.get()->Var("output_batch_hidden");
auto hidden_var = scope.get()->Var("output_hidden");
framework::AttributeMap attrs;
attrs["activation"].SetString(std::string("relu"));
attrs["gate_activation"].SetString(std::string("sigmoid"));
attrs["is_reverse"].Set<bool>(false);
auto *op =
new operators::GruOp<CPU, float>("gru", inputs, outputs, attrs, scope);
op->InferShape();
op->Init();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time1 = time();
for (int i = 0; i < 10; ++i) {
op->Run();
}
auto time2 = time();
std::ofstream out_file("./out_gru.txt", std::ios::app);
out_file << opname << " cost :" << time_diff(time1, time2) / 10.0 << "ms"
<< std::endl;
out_file.close();
delete op;
return 0;
}
} // namespace paddle_mobile
int main(int argc, char *argv[]) {
paddle_mobile::TestGruOp<float, float>(384, 120, "gru_forward");
return 0;
}
......@@ -76,6 +76,5 @@ int main() {
paddle_mobile::TestLogOp({1, 1, 2, 3});
paddle_mobile::TestLogOp({1, 3, 11, 22});
paddle_mobile::TestLogOp({1, 32, 112, 112});
std::cout << "test log op pass." << std::endl;
return 0;
}
......@@ -92,18 +92,10 @@ static float find_abs_max(const Tensor *input) {
return max_abs;
}
int TestQuqntizeOp(int argc, char *argv[]) {
if (argc < 5) {
std::cout << "Usage: ./test-quantize-op batch_size channel height width"
<< std::endl;
return 1;
}
int batch_size = atoi(argv[1]);
int channel = atoi(argv[2]);
int height = atoi(argv[3]);
int width = atoi(argv[4]);
std::cout << "batch_size: " << batch_size << ", channel: " << channel
<< ", height: " << height << ", width: " << width << std::endl;
int TestQuqntizeOp(const int batch_size, const int channel, const int height,
const int width) {
DLOG << "batch_size: " << batch_size << ", channel: " << channel
<< ", height: " << height << ", width: " << width;
framework::DDim dim =
framework::make_ddim({batch_size, channel, height, width});
......@@ -140,9 +132,7 @@ int TestQuqntizeOp(int argc, char *argv[]) {
framework::Tensor output_cmp;
output_cmp.Resize(output->dims());
float scale = 127 / output_scale_cmp;
// quantize<round::RoundToEven>(input, scale, &output_cmp);
// quantize<round::RoundAwayZero>(input, scale, &output_cmp);
quantize<round::RoundTowardsZero>(input, scale, &output_cmp);
quantize<round::RoundAwayZero>(input, scale, &output_cmp);
int8_t *output_cmp_data = output_cmp.data<int8_t>();
for (int i = 0; i < output->numel(); ++i) {
PADDLE_MOBILE_ENFORCE(output_data[i] == output_cmp_data[i],
......@@ -157,5 +147,7 @@ int TestQuqntizeOp(int argc, char *argv[]) {
} // namespace paddle_mobile
int main(int argc, char *argv[]) {
return paddle_mobile::TestQuqntizeOp(argc, argv);
TestQuqntizeOp(1, 10, 10, 5);
TestQuqntizeOp(1, 111, 111, 5);
TestQuqntizeOp(5, 111, 111, 5);
}
......@@ -59,7 +59,7 @@ int TestSequencePoolOp(const framework::LoDTensor &input_x,
int main(int argc, char *argv[]) {
framework::LoDTensor input_x, output;
// case 1
std::cerr << "running max case 1" << std::endl;
DLOG << "running max case 1";
{
std::vector<float> data{1, 2, 3, 4};
input_x.Resize(framework::make_ddim({4, 1}));
......@@ -71,14 +71,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{2, 4};
for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 2
std::cerr << "running max case 2" << std::endl;
DLOG << "running max case 2";
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
input_x.Resize(framework::make_ddim({data.size(), 1}));
......@@ -90,13 +90,13 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{3, 10};
for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
std::cerr << "running max case 3" << std::endl;
DLOG << "running max case 3";
// case 3
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
......@@ -109,14 +109,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{3, 4, 7, 8};
for (int i = 0; i < 4; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 4
std::cerr << "running max case 4" << std::endl;
DLOG << "running max case 4";
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
......@@ -129,14 +129,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{6, 7, 8, 9, 10, 16, 17, 18, 19, 20};
for (int i = 0; i < 10; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 1
std::cerr << "running sum case 1" << std::endl;
DLOG << "running sum case 1";
{
std::vector<float> data{1, 2, 3, 4};
input_x.Resize(framework::make_ddim({4, 1}));
......@@ -148,14 +148,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{3, 7};
for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 2
std::cerr << "running sum case 2" << std::endl;
DLOG << "running sum case 2";
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
input_x.Resize(framework::make_ddim({data.size(), 1}));
......@@ -167,14 +167,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{6, 49};
for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 3
std::cerr << "running sum case 3" << std::endl;
DLOG << "running sum case 3";
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
input_x.Resize(framework::make_ddim({4, 2}));
......@@ -186,14 +186,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{4, 6, 12, 14};
for (int i = 0; i < 4; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 4
std::cerr << "running sum case 4" << std::endl;
DLOG << "running sum case 4";
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
......@@ -206,14 +206,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{7, 9, 11, 13, 15, 27, 29, 31, 33, 35};
for (int i = 0; i < 10; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 1
std::cerr << "running first case 1" << std::endl;
DLOG << "running first case 1";
{
std::vector<float> data{1, 2, 3, 4};
input_x.Resize(framework::make_ddim({4, 1}));
......@@ -225,14 +225,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{1, 3};
for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 2
std::cerr << "running first case 2" << std::endl;
DLOG << "running first case 2";
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
input_x.Resize(framework::make_ddim({data.size(), 1}));
......@@ -244,14 +244,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{1, 4};
for (int i = 0; i < 2; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 3
std::cerr << "running first case 3" << std::endl;
DLOG << "running first case 3";
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8};
input_x.Resize(framework::make_ddim({4, 2}));
......@@ -263,14 +263,14 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{1, 2, 5, 6};
for (int i = 0; i < 4; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
}
// case 4
std::cerr << "running first case 4" << std::endl;
DLOG << "running first case 4";
{
std::vector<float> data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
......@@ -283,8 +283,8 @@ int main(int argc, char *argv[]) {
std::vector<float> expect_data{1, 2, 3, 4, 5, 11, 12, 13, 14, 15};
for (int i = 0; i < 10; ++i) {
if (output.data<float>()[i] != expect_data[i]) {
std::cerr << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i] << std::endl;
DLOG << "output[" << i << "]: " << output.data<float>()[i]
<< " != expect[" << i << "]: " << expect_data[i];
return 1;
}
}
......
......@@ -76,6 +76,5 @@ int main() {
paddle_mobile::TestSigmoidOp({1, 1, 2, 3});
paddle_mobile::TestSigmoidOp({1, 3, 11, 22});
paddle_mobile::TestSigmoidOp({1, 32, 112, 112});
std::cout << "test sigmoid op pass." << std::endl;
return 0;
}
......@@ -58,7 +58,7 @@ int TestTanhOp(const std::vector<int> input_shape) {
const float *output_data = output->data<float>();
for (int i = 0; i < output->numel(); ++i) {
float gap = output_data[i] - output_cmp_data[i];
if (std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) {
if (gap > 1e-5 && std::abs(gap / (output_data[i] + 1e-5)) > 1e-3) {
LOG(kLOG_INFO) << "output_data[" << i << "] = " << output_data[i]
<< ", output_cmp_data[" << i
<< "] = " << output_cmp_data[i];
......
......@@ -15,6 +15,7 @@
# limitations under the License.
set -e
source ./ci_run_test.sh
function print_usage() {
echo "\n${RED}Usage${NONE}:
......@@ -231,6 +232,11 @@ function build_linux_fpga() {
docker build -t paddle-mobile:dev - < Dockerfile
fi
docker run --rm -v `pwd`:/workspace paddle-mobile:dev bash /workspace/tools/docker_build_fpga.sh
cd -
}
function run_android_test() {
ExecuteAndroidTests $1
}
function main() {
......@@ -239,9 +245,11 @@ function main() {
case $CMD in
android_armv7)
build_android_armv7
run_android_test armeabi-v7a
;;
android_armv8)
build_android_armv8
run_android_test arm64-v8a
;;
ios)
build_ios
......
#!/usr/bin/env bash
operators=
function AddTest() {
operators="${operators} $1"
}
function ExecuteAndroidTests() {
platform=$1
devices=`adb devices | grep -v devices | grep device | awk -F ' ' '{print $1}'`
for device in ${devices}; do
adb -s ${device} shell rm -rf /data/local/tmp/*
adb -s ${device} push ../build/${platform}/build/libpaddle-mobile.so /data/local/tmp/
for op in ${operators}; do
adb -s ${device} push ../test/build/test-${op}-op /data/local/tmp/
adb -s ${device} shell "cd /data/local/tmp/; LD_LIBRARY_PATH=. ./test-${op}-op"
echo "${BLUE}run test ${op} pass${NONE}"
done
done
}
AddTest batchnorm
AddTest cast
AddTest conv
AddTest dequantize
#AddTest elementwiseadd
AddTest log
AddTest logical-and
AddTest logical-not
AddTest logical-or
AddTest logical-xor
AddTest pool
AddTest quantize
AddTest relu
AddTest relu6
AddTest sequence-expand
AddTest sequence-pool
AddTest sequence-softmax
AddTest sigmoid
AddTest softmax
AddTest tanh
AddTest topk
#!/usr/bin/env bash
apt-get update
apt-get install -y gcc g++ cmake
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册