提交 b574d803 编写于 作者: qnqinan's avatar qnqinan 提交者: GitHub

Merge pull request #1487 from qnqinan/develop

add int8 to fp16 convert function in FPGA track fixed#1486
......@@ -28,14 +28,26 @@ namespace fpga {
void format_image(framework::Tensor *image_tensor) {
auto dims = image_tensor->dims();
auto channel = dims[1], height = dims[2], width = dims[3];
std::type_index input_type = image_tensor->type();
if (input_type == typeid(float)) {
auto data_ptr = image_tensor->data<float>();
auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data);
float *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
image::format_image(&p_data, channel, height, width);
image::format_image<float>(&p_data, channel, height, width);
if (p_data != data_ptr && external_ptr == nullptr) {
image_tensor->reset_data_ptr(p_data);
}
} else {
auto data_ptr = image_tensor->data<int8_t>();
auto external_ptr = reinterpret_cast<int8_t *>(image_tensor->external_data);
int8_t *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
image::format_image<int8_t>(&p_data, channel, height, width);
if (p_data != data_ptr && external_ptr == nullptr) {
image_tensor->reset_data_ptr(p_data);
}
}
}
void format_ofm(framework::Tensor *ofm_tensor) {
......
......@@ -13,9 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "fpga/V1/image.h"
#include <memory.h>
#include <algorithm>
#include "fpga/common/fpga_common.h"
namespace paddle_mobile {
namespace fpga {
......@@ -58,37 +55,6 @@ void convert_to_chw(float **data_in, int channel, int height, int width,
*data_in = data_tmp;
}
void align_element_conv(float **data_in, int height, int cw) {
int h = 0;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
float *data_tmp =
(float *)fpga_malloc(height * align_cw * sizeof(float)); // NOLINT
memset(data_tmp, 0, height * align_cw * sizeof(float));
for (h = 0; h < height; h++) {
memcpy((void *)(data_tmp + h * align_cw), // NOLINT
(void *)(*data_in + h * cw), // NOLINT
cw * sizeof(float));
}
*data_in = data_tmp;
}
void format_image(float **data_in, int channel, int height, int width) {
// convert_to_hwc(data_in, channel, height, width);
int cw = channel * width;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
if (align_cw != cw) {
float *hwc_temp = *data_in;
align_element_conv(data_in, height, channel * width);
fpga_free(hwc_temp);
}
fpga_flush(*data_in, align_to_x(channel * width, IMAGE_ALIGNMENT) * height *
sizeof(float));
}
void concat_images(int16_t **images_in, float **scales_in, void *image_out,
float *scale_out, int image_num, uint32_t *channel_num,
int height, int width) {
......
......@@ -14,8 +14,10 @@ limitations under the License. */
#pragma once
#include <memory.h>
#include <algorithm>
#include <cstdint>
#include "fpga/common/fpga_common.h"
namespace paddle_mobile {
namespace fpga {
namespace image {
......@@ -24,10 +26,42 @@ void convert_to_hwc(float** data_in, int channel, int height, int width,
int num = 1);
void convert_to_chw(float** data_in, int channel, int height, int width,
int num = 1);
// template <typename Dtype>
// void align_element_conv(Dtype** data_in, int height, int cw);
// template <typename T>
// void format_image(T** data_in, int channel, int height, int width);
template <typename Dtype>
void align_element_conv(Dtype** data_in, int height, int cw);
template <typename Dtype>
void align_element_conv(Dtype** data_in, int height, int cw) {
int h = 0;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
Dtype* data_tmp =
(Dtype*)fpga_malloc(height * align_cw * sizeof(Dtype)); // NOLINT
memset(data_tmp, 0, height * align_cw * sizeof(Dtype));
void align_element_conv(float** data_in, int height, int cw);
void format_image(float** data_in, int channel, int height, int width);
for (h = 0; h < height; h++) {
memcpy((void*)(data_tmp + h * align_cw), // NOLINT
(void*)(*data_in + h * cw), // NOLINT
cw * sizeof(Dtype));
}
*data_in = data_tmp;
}
template <typename T>
void format_image(T** data_in, int channel, int height, int width) {
int cw = channel * width;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
if (align_cw != cw) {
T* hwc_temp = *data_in;
align_element_conv(data_in, height, channel * width);
fpga_free(hwc_temp);
}
fpga_flush(*data_in,
align_to_x(channel * width, IMAGE_ALIGNMENT) * height * sizeof(T));
}
// Concat featuremaps along channel direction
void concat_images(int16_t** images_in, float** scales_in, void* image_out,
float* scale_out, int image_num, uint32_t* channel_num,
......
......@@ -38,10 +38,12 @@ using namespace std; // NOLINT
#define CMD_FP16_TO_FP32 1
#define CMD_FP32_TO_FP16 2
#define CMD_FP32_TO_FP32 3
#define CMD_INT8_TO_FP16 4
// bypass macro
#define SIZE_FP16 2
#define SIZE_FP32 4
#define SIZE_INT8 1
#define PE_IRQ_TIMEOUT 1000000
......@@ -607,6 +609,16 @@ int PerformBypass(const struct BypassArgs &args) {
}
} break;
case DATA_TYPE_INT8: {
if (args.output_data_type != DATA_TYPE_FP16) {
DLOG << "error:Output Datetype error,not DATA_TYPE_FP16: "
<< args.output_data_type;
}
data_cell_in = SIZE_INT8;
data_cell_out = SIZE_FP16;
cmd = CMD_INT8_TO_FP16;
} break;
case DATA_TYPE_FP32: {
switch (args.output_data_type) {
case DATA_TYPE_FP16:
......@@ -630,10 +642,13 @@ int PerformBypass(const struct BypassArgs &args) {
break;
}
if (cmd != CMD_FP16_TO_FP16 && cmd != CMD_FP16_TO_FP32 &&
cmd != CMD_FP32_TO_FP16 && cmd != CMD_FP32_TO_FP32) {
cmd != CMD_FP32_TO_FP16 && cmd != CMD_FP32_TO_FP32 &&
cmd != CMD_INT8_TO_FP16) {
// std::cout<< " err back Error1!" <<std::endl;
return -EFAULT;
}
if ((data_cell_in != SIZE_FP16 && data_cell_in != SIZE_FP32) ||
if ((data_cell_in != SIZE_FP16 && data_cell_in != SIZE_FP32 &&
data_cell_in != SIZE_INT8) ||
(data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) {
return -EFAULT;
}
......
......@@ -31,6 +31,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace fpga {
enum DataType {
DATA_TYPE_INT8 = 2,
DATA_TYPE_FP32 = 1,
DATA_TYPE_FP16 = 0,
};
......
......@@ -20,13 +20,10 @@ namespace operators {
template <>
bool FeedKernel<FPGA, float>::Init(FeedParam<FPGA> *param) {
auto output = param->Out();
auto input = const_cast<LoDTensor *>(param->InputX());
input->init(typeid(float));
input->Resize(output->dims());
if (output->dims().size() != 4) {
return true;
}
fpga::format_fp16_ofm(output);
return true;
}
......@@ -35,6 +32,14 @@ template <>
void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
auto output = param.Out();
auto input = const_cast<LoDTensor *>(param.InputX());
std::type_index input_type = input->type();
if (input_type == typeid(float)) {
input->init(typeid(float));
} else { // input_type == typeid(int8_t)
input->init(typeid(int8_t));
}
input->Resize(output->dims());
if (output->dims().size() != 4) {
size_t size = output->numel() * sizeof(float);
......@@ -48,14 +53,13 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
}
fpga::format_image(input);
auto output_ptr = output->data<half>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
if (input_type == typeid(float)) {
auto input_ptr = input->data<float>();
auto external_ptr = reinterpret_cast<float *>(input->external_data);
float *p_data = external_ptr == nullptr ? input_ptr : external_ptr;
auto output_ptr = output->data<half>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
args.input_data_type = fpga::DATA_TYPE_FP32;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW;
......@@ -69,8 +73,27 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
args.output.address = output_ptr;
args.output.scale_address = output->scale;
fpga::PerformBypass(args);
input->external_data = nullptr;
} else { // input_type == typeid(int8_t)
auto input_ptr = input->data<int8_t>();
auto external_ptr = reinterpret_cast<int8_t *>(input->external_data);
int8_t *p_data = external_ptr == nullptr ? input_ptr : external_ptr;
args.input_data_type = fpga::DATA_TYPE_INT8;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = p_data;
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (uint32_t)input->dims()[2];
args.image.width = (uint32_t)input->dims()[3];
args.image.pad_height = 0;
args.image.pad_width = 0;
args.output.address = output_ptr;
args.output.scale_address = output->scale;
fpga::PerformBypass(args);
input->external_data = nullptr;
}
}
template class FeedKernel<FPGA, float>;
......
......@@ -2554,13 +2554,13 @@ class FusionDeconvBNReluParam : public ConvTransposeParam<Dtype> {
public:
FusionDeconvBNReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope)
const AttributeMap &attrs, Scope *scope)
: ConvTransposeParam<Dtype>(inputs, outputs, attrs, scope) {
output_ = OpParam::OutFrom<GType>(outputs, scope);
input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope);
input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope);
input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope);
input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope);
output_ = OpParam::OutFrom<GType>(outputs, *scope);
input_bias_ = OpParam::InputBiasFrom<GType>(inputs, *scope);
input_mean_ = OpParam::InputMeanFrom<GType>(inputs, *scope);
input_scale_ = OpParam::InputScaleFrom<GType>(inputs, *scope);
input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, *scope);
epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
momentum_ = OpParam::GetAttr<float>("momentum", attrs);
}
......
......@@ -138,6 +138,8 @@ if (CON GREATER -1)
set(CONV_TRANSPOSE_OP ON)
set(FUSION_DECONVADDBNRELU_OP ON)
set(FUSION_DECONVADDBN_OP ON)
set(FUSION_DECONVBNRELU_OP ON)
set(CONV_OP ON)
set(ELEMENTWISEMUL_OP ON)
set(FUSION_FCRELU_OP ON)
set(RELU_OP ON)
......@@ -616,6 +618,9 @@ endif()
if (FUSION_DECONVADDBNRELU_OP)
add_definitions(-DFUSION_DECONVADDBNRELU_OP)
endif()
if (FUSION_DECONVBNRELU_OP)
add_definitions(-DFUSION_DECONVBNRELU_OP)
endif()
if (FUSION_DECONVADDBN_OP)
add_definitions(-DFUSION_DECONVADDBN_OP)
endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册