提交 56f4465b 编写于 作者: qnqinan's avatar qnqinan 提交者: jameswu2014

update v1 and v3 kernel file in FPGA track fixed#1696 (#1697)

* update concat and split kernel and related files in FPGA v2(v3) track

* update

* update

* update kernel and related files in FPGA v2 track

* update

* update

* update kernel and related files for static quantization in FPGA v2 track

* update

* update feed and fetch kernel in FPGA v2 track

* update io file

* update feed fetch and softmax kernel in FPGA v2 track

* update proposal kernel and other kernels in FPGA v2 track

* update fetch and softmax kernel in fpga v2 track

* update v1 and v3 kernel file in FPGA track
上级 8fca2857
...@@ -35,19 +35,23 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -35,19 +35,23 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto float_input = new LoDTensor; auto float_input = new LoDTensor;
PADDLE_MOBILE_ENFORCE(input->dims().size() == 4, int input_n = 1, input_c = 1, input_h = 1, input_w = 1;
"Softmax should have 4-order input"); if (dims.size() == 4) {
input_h = dims[1];
auto channel = dims[3]; input_w = dims[2];
if (channel == 1) { // This input is generated by FC op, dims = [N C 1 1] input_c = dims[3];
PADDLE_MOBILE_ENFORCE(dims[2] == 1, "Softmax input must come from FC op"); if (input_c == 1) { // This input is generated by FC op, dims = [N C 1 1]
dims[3] = dims[1]; PADDLE_MOBILE_ENFORCE(input_w == 1, "Softmax input must come from FC op");
dims[1] = 1; input_c = dims[1];
input_h = 1;
}
} else if (dims.size() == 2) {
input_c = dims[1];
} }
input->Resize(framework::make_ddim(dims)); input->Resize(framework::make_ddim(dims));
float_input->Resize(framework::make_ddim(dims)); float_input->Resize(framework::make_ddim(dims));
if (channel == 2 && input->type() == type_id<half>()) { // Use FPGA if (input_c == 2 && input->type() == type_id<half>()) { // Use FPGA
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC; args.input_layout_type = fpga::LAYOUT_HWC;
...@@ -55,9 +59,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -55,9 +59,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.input_data_type = fpga::DATA_TYPE_FP16; args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP16; args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr; args.image.address = input_ptr;
args.image.height = (uint32_t)input->dims()[1]; args.image.height = input_h;
args.image.width = (uint32_t)input->dims()[2]; args.image.width = input_w;
args.image.channels = (uint32_t)input->dims()[3]; args.image.channels = input_c;
args.output.address = out->data<half>(); args.output.address = out->data<half>();
args.output.scale_address = out->scale; args.output.scale_address = out->scale;
args.output.activation.activation_type = fpga::SOFTMAX; args.output.activation.activation_type = fpga::SOFTMAX;
...@@ -67,8 +71,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -67,8 +71,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
out->mutable_data<float>(framework::make_ddim(dims)); out->mutable_data<float>(framework::make_ddim(dims));
float_input->init(type_id<float>().hash_code()); float_input->init(type_id<float>().hash_code());
float_input->mutable_data<float>(framework::make_ddim(dims)); float_input->mutable_data<float>(framework::make_ddim(dims));
// fpga::format_fp32_ofm(float_input); fpga::format_fp32_ofm(float_input);
// fpga::format_fp32_ofm(out); fpga::format_fp32_ofm(out);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC; args.input_layout_type = fpga::LAYOUT_HWC;
...@@ -76,9 +80,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -76,9 +80,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.input_data_type = fpga::DATA_TYPE_FP16; args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32; args.output_data_type = fpga::DATA_TYPE_FP32;
args.image.address = input_ptr; args.image.address = input_ptr;
args.image.height = (uint32_t)dims[1] * dims[0]; args.image.height = input_h;
args.image.width = (uint32_t)dims[2]; args.image.width = input_w;
args.image.channels = (uint32_t)dims[3]; args.image.channels = input_c;
args.output.address = float_input->data<float>(); args.output.address = float_input->data<float>();
args.output.scale_address = float_input->scale; args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input); param->SetFloatInput(float_input);
...@@ -91,6 +95,23 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -91,6 +95,23 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
template <> template <>
void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) { void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
auto *in_x = (param.InputX()); auto *in_x = (param.InputX());
auto dims = in_x->dims();
auto n = 1;
auto h = 1;
auto w = 1;
auto c = 1;
if (dims.size() == 4) {
h = dims[1];
w = dims[2];
c = dims[3];
if (c == 1) { // This input is generated by FC op, dims = [N C 1 1]
PADDLE_MOBILE_ENFORCE(w == 1, "Softmax input must come from FC op");
c = dims[1];
h = 1;
}
} else if (dims.size() == 2) {
c = dims[1];
}
if (in_x->type() == type_id<half>()) { if (in_x->type() == type_id<half>()) {
fpga::PerformBypass(param.FpgaArgs()); fpga::PerformBypass(param.FpgaArgs());
if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) { if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) {
...@@ -105,8 +126,7 @@ void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) { ...@@ -105,8 +126,7 @@ void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
} else { } else {
if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) { if (param.FpgaArgs().output.activation.activation_type != fpga::SOFTMAX) {
Tensor *out = param.Out(); Tensor *out = param.Out();
out->Resize( out->Resize({n, h, w, c});
{in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
math::SoftmaxFuntor<CPU, float>()(in_x, out); math::SoftmaxFuntor<CPU, float>()(in_x, out);
} }
} }
......
...@@ -19,13 +19,10 @@ limitations under the License. */ ...@@ -19,13 +19,10 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool ConvAddBNReluKernel<FPGA, float>::Init( bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) { FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = true;
// paddle_mobile::fpga::ActivationType activation_enable =
// paddle_mobile::fpga::LEAKYRELU;
auto input = const_cast<LoDTensor *>(param->Input()); auto input = const_cast<LoDTensor *>(param->Input());
auto bias = param->Bias(); auto bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -42,6 +39,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -42,6 +39,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
auto bn_scale_ptr = param->InputScale()->data<float>(); auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>(); auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon(); const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] && PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0], bias->dims()[0] == param->InputBias()->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
...@@ -75,6 +73,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -75,6 +73,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
new_bias_ptr); new_bias_ptr);
param->SetFpgaArgs(dwconv_arg); param->SetFpgaArgs(dwconv_arg);
fpga::fpga_free(bs_ptr); fpga::fpga_free(bs_ptr);
delete new_scale;
} else { } else {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
...@@ -82,9 +81,10 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -82,9 +81,10 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
param->Groups(), strides[0], strides[1], paddings[0], param->Groups(), strides[0], strides[1], paddings[0],
paddings[1], bs_ptr); paddings[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
}
delete new_scale; delete new_scale;
delete new_bias; delete new_bias;
}
return true; return true;
} }
......
...@@ -114,6 +114,7 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) { ...@@ -114,6 +114,7 @@ void Reshape2Kernel<FPGA, float>::Compute(const Reshape2Param<FPGA> &param) {
output->ShareDataWith(*input); output->ShareDataWith(*input);
framework::LoD lod = input->lod(); framework::LoD lod = input->lod();
output->set_lod(lod); output->set_lod(lod);
output->scale[0] = input->scale[0];
return; return;
} }
......
...@@ -21,11 +21,12 @@ namespace operators { ...@@ -21,11 +21,12 @@ namespace operators {
template <> template <>
bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) { bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<LoDTensor *>(param->InputX()); auto input = const_cast<LoDTensor *>(param->InputX());
auto input_ptr = input->data<int8_t>(); auto input_ptr = input->data<int8_t>();
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope =
fpga::fp32_2_fp16(input->scale[0] / 127.0);
auto out = param->Out(); auto out = param->Out();
fpga::format_ofm(out); fpga::format_ofm(out);
...@@ -47,6 +48,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) { ...@@ -47,6 +48,7 @@ bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
template <> template <>
void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) { void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
fpga::PerformBypass(param.FpgaArgs()); fpga::PerformBypass(param.FpgaArgs());
param.Out()->scale[0] = 127.0;
} }
} // namespace operators } // namespace operators
......
...@@ -28,17 +28,22 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -28,17 +28,22 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto out = param->Out(); auto out = param->Out();
out->Resize(framework::make_ddim(dims)); out->Resize(framework::make_ddim(dims));
PADDLE_MOBILE_ENFORCE(input->dims().size() == 4, int input_c = 1, input_h = 1, input_w = 1;
"Softmax should have 4-order input"); if (dims.size() == 4) {
input_h = dims[1];
auto channel = dims[3]; input_w = dims[2];
if (channel == 1) { // This input is generated by FC op, dims = [N C 1 1] input_c = dims[3];
PADDLE_MOBILE_ENFORCE(dims[2] == 1, "Softmax input must come from FC op"); if (input_c == 1) { // This input is generated by FC op, dims = [N C 1 1]
dims[3] = dims[1]; PADDLE_MOBILE_ENFORCE(input_w == 1, "Softmax input must come from FC op");
dims[1] = 1; input_c = dims[1];
input_h = 1;
}
} else if (dims.size() == 2) {
input_c = dims[1];
} }
input->Resize(framework::make_ddim(dims)); input->Resize(framework::make_ddim(dims));
if ((channel == 2) && (input->type() == type_id<int8_t>())) { if ((input_c == 2) && (input->type() == type_id<int8_t>())) {
auto input_ptr = input->data<int8_t>(); auto input_ptr = input->data<int8_t>();
float Si = input->scale[0]; float Si = input->scale[0];
int16_t slope = fpga::fp32_2_fp16(Si / 127); int16_t slope = fpga::fp32_2_fp16(Si / 127);
...@@ -50,22 +55,14 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -50,22 +55,14 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.input_data_type = fpga::DATA_TYPE_FP16; args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP16; args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr; args.image.address = input_ptr;
args.image.height = (uint32_t)input->dims()[1]; args.image.height = input_h;
args.image.width = (uint32_t)input->dims()[2]; args.image.width = input_w;
args.image.channels = (uint32_t)input->dims()[3]; args.image.channels = input_c;
args.output.address = out->data<int8_t>(); args.output.address = out->data<int8_t>();
args.output.scale_address = out->scale; args.output.scale_address = out->scale;
args.output.activation.activation_type = fpga::SOFTMAX; args.output.activation.activation_type = fpga::SOFTMAX;
args.output.activation.leaky_relu_negative_slope = slope; args.output.activation.leaky_relu_negative_slope = slope;
param->SetFpgaArgs(args); param->SetFpgaArgs(args);
} else if (input->type() == type_id<int8_t>()) {
auto float_input_x = param->float_input_x_;
float_input_x = std::make_shared<Tensor>();
float_input_x->Resize(input->dims());
float_input_x->init(type_id<float>().hash_code());
fpga::format_ofm(float_input_x.get());
out->mutable_data<float>(framework::make_ddim(dims));
fpga::format_ofm(out);
} else { } else {
out->mutable_data<float>(framework::make_ddim(dims)); out->mutable_data<float>(framework::make_ddim(dims));
fpga::format_ofm(out); fpga::format_ofm(out);
...@@ -78,36 +75,45 @@ template <> ...@@ -78,36 +75,45 @@ template <>
void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) { void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
auto *in_x = (param.InputX()); auto *in_x = (param.InputX());
auto dims = in_x->dims(); auto dims = in_x->dims();
auto n = dims[0];
auto h = dims[1]; auto n = 1;
auto w = dims[2]; auto h = 1;
auto c = dims[3]; auto w = 1;
auto c = 1;
if (dims.size() == 4) {
h = dims[1];
w = dims[2];
c = dims[3];
if (c == 1) { // This input is generated by FC op, dims = [N C 1 1]
PADDLE_MOBILE_ENFORCE(w == 1, "Softmax input must come from FC op");
c = dims[1];
h = 1;
}
} else if (dims.size() == 2) {
c = dims[1];
}
if ((c == 2) && (in_x->type() == type_id<int8_t>())) { if ((c == 2) && (in_x->type() == type_id<int8_t>())) {
fpga::PerformBypass(param.FpgaArgs()); fpga::PerformBypass(param.FpgaArgs());
} else if (in_x->type() == type_id<int8_t>()) { } else if (in_x->type() == type_id<int8_t>()) {
auto in_data = in_x->data<int8_t>(); auto in_data = in_x->data<int8_t>();
float Si = in_x->scale[0]; float Si = in_x->scale[0];
Tensor *out = param.Out(); Tensor *out = param.Out();
out->Resize( out->Resize({n, h, w, c});
{in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
auto float_input_x = param.float_input_x_; auto float_input_x = param.float_input_x_;
float_input_x = std::make_shared<Tensor>();
float_input_x->Resize(in_x->dims());
float_input_x->init(type_id<float>().hash_code());
fpga::format_fp32_ofm(float_input_x.get());
auto float_input_x_data = float_input_x->data<float>(); auto float_input_x_data = float_input_x->data<float>();
int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT); int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT);
for (int i = 0; i < dataNum; i++) { for (int i = 0; i < dataNum; i++) {
float_input_x_data[i] = in_data[i] * Si / 127; float_input_x_data[i] = in_data[i] * Si / 127;
} }
math::SoftmaxFuntor<CPU, float>()(float_input_x.get(), out); math::SoftmaxFuntor<CPU, float>()(float_input_x.get(), out);
auto out_data = out->data<float>();
fpga::fpga_flush(out_data, dataNum * sizeof(float));
} else { } else {
Tensor *out = param.Out(); Tensor *out = param.Out();
out->Resize( out->Resize({n, h, w, c});
{in_x->dims()[0], out->dims()[1], out->dims()[2], out->dims()[3]});
math::SoftmaxFuntor<CPU, float>()(in_x, out); math::SoftmaxFuntor<CPU, float>()(in_x, out);
int dataNum = n * h * fpga::align_to_x(w * c, IMAGE_ALIGNMENT);
auto out_data = out->data<float>();
fpga::fpga_flush(out_data, dataNum * sizeof(float));
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册