提交 21c7a2ba 编写于 作者: qnqinan's avatar qnqinan 提交者: jameswu2014

update v1 and v3 kernel file in FPGA track fixed#1696 (#1697)

* update concat and split kernel and related files in FPGA v2(v3) track

* update

* update

* update kernel and related files in FPGA v2 track

* update

* update

* update kernel and related files for static quantization in FPGA v2 track

* update

* update feed and fetch kernel in FPGA v2 track

* update io file

* update feed fetch and softmax kernel in FPGA v2 track

* update proposal kernel and other kernels in FPGA v2 track

* update fetch and softmax kernel in fpga v2 track

* update v1 and v3 kernel file in FPGA track
上级 13a9b9cb
......@@ -35,19 +35,23 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto float_input = new LoDTensor;
PADDLE_MOBILE_ENFORCE(input->dims().size() == 4,
"Softmax should have 4-order input");
auto channel = dims[3];
if (channel == 1) { // This input is generated by FC op, dims = [N C 1 1]
PADDLE_MOBILE_ENFORCE(dims[2] == 1, "Softmax input must come from FC op");
dims[3] = dims[1];
dims[1] = 1;
int input_n = 1, input_c = 1, input_h = 1, input_w = 1;
if (dims.size() == 4) {
input_h = dims[1];
input_w = dims[2];
input_c = dims[3];
if (input_c == 1) { // This input is generated by FC op, dims = [N C 1 1]
PADDLE_MOBILE_ENFORCE(input_w == 1, "Softmax input must come from FC op");
input_c = dims[1];
input_h = 1;
}
} else if (dims.size() == 2) {
input_c = dims[1];
}
input->Resize(framework::make_ddim(dims));
float_input->Resize(framework::make_ddim(dims));
if (channel == 2 && input->type() == type_id<half>()) { // Use FPGA
if (input_c == 2 && input->type() == type_id<half>()) { // Use FPGA
fpga::format_fp16_ofm(out);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
......@@ -55,9 +59,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr;
args.image.height = (uint32_t)input->dims()[1];
args.image.width = (uint32_t)input->dims()[2];
args.image.channels = (uint32_t)input->dims()[3];
args.image.height = input_h;
args.image.width = input_w;
args.image.channels = input_c;
args.output.address = out->data<half>();
args.output.scale_address = out->scale;
args.output.activation.activation_type = fpga::SOFTMAX;
......@@ -67,8 +71,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
out->mutable_data<float>(framework::make_ddim(dims));
float_input->init(type_id<float>().hash_code());
float_input->mutable_data<float>(framework::make_ddim(dims));
// fpga::format_fp32_ofm(float_input);
// fpga::format_fp32_ofm(out);
fpga::format_fp32_ofm(float_input);
fpga::format_fp32_ofm(out);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
......@@ -76,9 +80,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.image.address = input_ptr;
args.image.height = (uint32_t)dims[1] * dims[0];
args.image.width = (uint32_t)dims[2];
args.image.channels = (uint32_t)dims[3];
args.image.height = input_h;
args.image.width = input_w;
args.image.channels = input_c;
args.output.address = float_input->data<float>();
args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input);
......@@ -91,6 +95,23 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
template <>
void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
auto *in_x = (param.InputX());
auto dims = in_x->dims();
auto n = 1;
auto h = 1;
auto w = 1;
auto c = 1;
if (dims.size() == 4) {
h = dims[1];
w = dims[2];
c = dims[3];
if (c == 1) { // This input is generated by FC op, dims = [N C 1 1]
PADDLE_MOBILE_ENFORCE(w == 1, "Softmax input must come from FC op");
c = dims[1];
h = 1;
}
} else if (dims.size() == 2) {
c = dims[1];
}
if (in_x->type() == type_id<half>()) {
fpga::PerformBypass(param.FpgaArgs());
if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) {
......@@ -105,8 +126,7 @@ void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
} else {
if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) {
Tensor *out = param.Out();
out->Resize(
{in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
out->Resize({n, h, w, c});
math::SoftmaxFuntor<CPU, float>()(in_x, out);
}
}
......
......@@ -19,13 +19,10 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true;
// paddle_mobile::fpga::ActivationType activation_enable =
// paddle_mobile::fpga::LEAKYRELU;
auto input = const_cast<LoDTensor *>(param->Input());
auto bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -42,6 +39,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0],
"Output channel should be equal to bias number");
......@@ -75,6 +73,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
new_bias_ptr);
param->SetFpgaArgs(dwconv_arg);
fpga::fpga_free(bs_ptr);
delete new_scale;
} else {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
......@@ -82,9 +81,10 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
param->Groups(), strides[0], strides[1], paddings[0],
paddings[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
delete new_scale;
delete new_bias;
}
delete new_scale;
delete new_bias;
return true;
}
......
......@@ -114,6 +114,7 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
output->ShareDataWith(*input);
framework::LoD lod = input->lod();
output->set_lod(lod);
output->scale[0] = input->scale[0];
return;
}
......
......@@ -21,11 +21,12 @@ namespace operators {
template <>
bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<LoDTensor *>(param->InputX());
auto input_ptr = input->data<int8_t>();
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope =
fpga::fp32_2_fp16(input->scale[0] / 127.0);
auto out = param->Out();
fpga::format_ofm(out);
......@@ -47,6 +48,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
template <>
void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
fpga::PerformBypass(param.FpgaArgs());
param.Out()->scale[0] = 127.0;
}
} // namespace operators
......
......@@ -28,17 +28,22 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto out = param->Out();
out->Resize(framework::make_ddim(dims));
PADDLE_MOBILE_ENFORCE(input->dims().size() == 4,
"Softmax should have 4-order input");
auto channel = dims[3];
if (channel == 1) { // This input is generated by FC op, dims = [N C 1 1]
PADDLE_MOBILE_ENFORCE(dims[2] == 1, "Softmax input must come from FC op");
dims[3] = dims[1];
dims[1] = 1;
int input_c = 1, input_h = 1, input_w = 1;
if (dims.size() == 4) {
input_h = dims[1];
input_w = dims[2];
input_c = dims[3];
if (input_c == 1) { // This input is generated by FC op, dims = [N C 1 1]
PADDLE_MOBILE_ENFORCE(input_w == 1, "Softmax input must come from FC op");
input_c = dims[1];
input_h = 1;
}
} else if (dims.size() == 2) {
input_c = dims[1];
}
input->Resize(framework::make_ddim(dims));
if ((channel == 2) && (input->type() == type_id<int8_t>())) {
if ((input_c == 2) && (input->type() == type_id<int8_t>())) {
auto input_ptr = input->data<int8_t>();
float Si = input->scale[0];
int16_t slope = fpga::fp32_2_fp16(Si / 127);
......@@ -50,22 +55,14 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr;
args.image.height = (uint32_t)input->dims()[1];
args.image.width = (uint32_t)input->dims()[2];
args.image.channels = (uint32_t)input->dims()[3];
args.image.height = input_h;
args.image.width = input_w;
args.image.channels = input_c;
args.output.address = out->data<int8_t>();
args.output.scale_address = out->scale;
args.output.activation.activation_type = fpga::SOFTMAX;
args.output.activation.leaky_relu_negative_slope = slope;
param->SetFpgaArgs(args);
} else if (input->type() == type_id<int8_t>()) {
auto float_input_x = param->float_input_x_;
float_input_x = std::make_shared<Tensor>();
float_input_x->Resize(input->dims());
float_input_x->init(type_id<float>().hash_code());
fpga::format_ofm(float_input_x.get());
out->mutable_data<float>(framework::make_ddim(dims));
fpga::format_ofm(out);
} else {
out->mutable_data<float>(framework::make_ddim(dims));
fpga::format_ofm(out);
......@@ -78,36 +75,45 @@ template <>
void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
auto *in_x = (param.InputX());
auto dims = in_x->dims();
auto n = dims[0];
auto h = dims[1];
auto w = dims[2];
auto c = dims[3];
auto n = 1;
auto h = 1;
auto w = 1;
auto c = 1;
if (dims.size() == 4) {
h = dims[1];
w = dims[2];
c = dims[3];
if (c == 1) { // This input is generated by FC op, dims = [N C 1 1]
PADDLE_MOBILE_ENFORCE(w == 1, "Softmax input must come from FC op");
c = dims[1];
h = 1;
}
} else if (dims.size() == 2) {
c = dims[1];
}
if ((c == 2) && (in_x->type() == type_id<int8_t>())) {
fpga::PerformBypass(param.FpgaArgs());
} else if (in_x->type() == type_id<int8_t>()) {
auto in_data = in_x->data<int8_t>();
float Si = in_x->scale[0];
Tensor *out = param.Out();
out->Resize(
{in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
out->Resize({n, h, w, c});
auto float_input_x = param.float_input_x_;
float_input_x = std::make_shared<Tensor>();
float_input_x->Resize(in_x->dims());
float_input_x->init(type_id<float>().hash_code());
fpga::format_fp32_ofm(float_input_x.get());
auto float_input_x_data = float_input_x->data<float>();
int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT);
for (int i = 0; i < dataNum; i++) {
float_input_x_data[i] = in_data[i] * Si / 127;
}
math::SoftmaxFuntor<CPU, float>()(float_input_x.get(), out);
auto out_data = out->data<float>();
fpga::fpga_flush(out_data, dataNum * sizeof(float));
} else {
Tensor *out = param.Out();
out->Resize(
{in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
out->Resize({n, h, w, c});
math::SoftmaxFuntor<CPU, float>()(in_x, out);
int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT);
auto out_data = out->data<float>();
fpga::fpga_flush(out_data, dataNum * sizeof(float));
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册