提交 b574d803 编写于 作者: qnqinan's avatar qnqinan 提交者: GitHub

Merge pull request #1487 from qnqinan/develop

add int8 to fp16 convert function in FPGA track fixed#1486
...@@ -28,13 +28,25 @@ namespace fpga { ...@@ -28,13 +28,25 @@ namespace fpga {
void format_image(framework::Tensor *image_tensor) { void format_image(framework::Tensor *image_tensor) {
auto dims = image_tensor->dims(); auto dims = image_tensor->dims();
auto channel = dims[1], height = dims[2], width = dims[3]; auto channel = dims[1], height = dims[2], width = dims[3];
auto data_ptr = image_tensor->data<float>(); std::type_index input_type = image_tensor->type();
auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data); if (input_type == typeid(float)) {
float *p_data = external_ptr == nullptr ? data_ptr : external_ptr; auto data_ptr = image_tensor->data<float>();
auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data);
float *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
image::format_image<float>(&p_data, channel, height, width);
if (p_data != data_ptr && external_ptr == nullptr) {
image_tensor->reset_data_ptr(p_data);
}
} else {
auto data_ptr = image_tensor->data<int8_t>();
auto external_ptr = reinterpret_cast<int8_t *>(image_tensor->external_data);
int8_t *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
image::format_image(&p_data, channel, height, width); image::format_image<int8_t>(&p_data, channel, height, width);
if (p_data != data_ptr && external_ptr == nullptr) { if (p_data != data_ptr && external_ptr == nullptr) {
image_tensor->reset_data_ptr(p_data); image_tensor->reset_data_ptr(p_data);
}
} }
} }
......
...@@ -13,9 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "fpga/V1/image.h" #include "fpga/V1/image.h"
#include <memory.h>
#include <algorithm>
#include "fpga/common/fpga_common.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
...@@ -58,37 +55,6 @@ void convert_to_chw(float **data_in, int channel, int height, int width, ...@@ -58,37 +55,6 @@ void convert_to_chw(float **data_in, int channel, int height, int width,
*data_in = data_tmp; *data_in = data_tmp;
} }
void align_element_conv(float **data_in, int height, int cw) {
int h = 0;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
float *data_tmp =
(float *)fpga_malloc(height * align_cw * sizeof(float)); // NOLINT
memset(data_tmp, 0, height * align_cw * sizeof(float));
for (h = 0; h < height; h++) {
memcpy((void *)(data_tmp + h * align_cw), // NOLINT
(void *)(*data_in + h * cw), // NOLINT
cw * sizeof(float));
}
*data_in = data_tmp;
}
void format_image(float **data_in, int channel, int height, int width) {
// convert_to_hwc(data_in, channel, height, width);
int cw = channel * width;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
if (align_cw != cw) {
float *hwc_temp = *data_in;
align_element_conv(data_in, height, channel * width);
fpga_free(hwc_temp);
}
fpga_flush(*data_in, align_to_x(channel * width, IMAGE_ALIGNMENT) * height *
sizeof(float));
}
void concat_images(int16_t **images_in, float **scales_in, void *image_out, void concat_images(int16_t **images_in, float **scales_in, void *image_out,
float *scale_out, int image_num, uint32_t *channel_num, float *scale_out, int image_num, uint32_t *channel_num,
int height, int width) { int height, int width) {
......
...@@ -14,8 +14,10 @@ limitations under the License. */ ...@@ -14,8 +14,10 @@ limitations under the License. */
#pragma once #pragma once
#include <memory.h>
#include <algorithm>
#include <cstdint> #include <cstdint>
#include "fpga/common/fpga_common.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
namespace image { namespace image {
...@@ -24,10 +26,42 @@ void convert_to_hwc(float** data_in, int channel, int height, int width, ...@@ -24,10 +26,42 @@ void convert_to_hwc(float** data_in, int channel, int height, int width,
int num = 1); int num = 1);
void convert_to_chw(float** data_in, int channel, int height, int width, void convert_to_chw(float** data_in, int channel, int height, int width,
int num = 1); int num = 1);
// template <typename Dtype>
// void align_element_conv(Dtype** data_in, int height, int cw);
// template <typename T>
// void format_image(T** data_in, int channel, int height, int width);
template <typename Dtype>
void align_element_conv(Dtype** data_in, int height, int cw);
template <typename Dtype>
void align_element_conv(Dtype** data_in, int height, int cw) {
int h = 0;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
Dtype* data_tmp =
(Dtype*)fpga_malloc(height * align_cw * sizeof(Dtype)); // NOLINT
memset(data_tmp, 0, height * align_cw * sizeof(Dtype));
void align_element_conv(float** data_in, int height, int cw); for (h = 0; h < height; h++) {
void format_image(float** data_in, int channel, int height, int width); memcpy((void*)(data_tmp + h * align_cw), // NOLINT
(void*)(*data_in + h * cw), // NOLINT
cw * sizeof(Dtype));
}
*data_in = data_tmp;
}
template <typename T>
void format_image(T** data_in, int channel, int height, int width) {
int cw = channel * width;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
if (align_cw != cw) {
T* hwc_temp = *data_in;
align_element_conv(data_in, height, channel * width);
fpga_free(hwc_temp);
}
fpga_flush(*data_in,
align_to_x(channel * width, IMAGE_ALIGNMENT) * height * sizeof(T));
}
// Concat featuremaps along channel direction // Concat featuremaps along channel direction
void concat_images(int16_t** images_in, float** scales_in, void* image_out, void concat_images(int16_t** images_in, float** scales_in, void* image_out,
float* scale_out, int image_num, uint32_t* channel_num, float* scale_out, int image_num, uint32_t* channel_num,
......
...@@ -38,10 +38,12 @@ using namespace std; // NOLINT ...@@ -38,10 +38,12 @@ using namespace std; // NOLINT
#define CMD_FP16_TO_FP32 1 #define CMD_FP16_TO_FP32 1
#define CMD_FP32_TO_FP16 2 #define CMD_FP32_TO_FP16 2
#define CMD_FP32_TO_FP32 3 #define CMD_FP32_TO_FP32 3
#define CMD_INT8_TO_FP16 4
// bypass macro // bypass macro
#define SIZE_FP16 2 #define SIZE_FP16 2
#define SIZE_FP32 4 #define SIZE_FP32 4
#define SIZE_INT8 1
#define PE_IRQ_TIMEOUT 1000000 #define PE_IRQ_TIMEOUT 1000000
...@@ -607,6 +609,16 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -607,6 +609,16 @@ int PerformBypass(const struct BypassArgs &args) {
} }
} break; } break;
case DATA_TYPE_INT8: {
if (args.output_data_type != DATA_TYPE_FP16) {
DLOG << "error:Output Datetype error,not DATA_TYPE_FP16: "
<< args.output_data_type;
}
data_cell_in = SIZE_INT8;
data_cell_out = SIZE_FP16;
cmd = CMD_INT8_TO_FP16;
} break;
case DATA_TYPE_FP32: { case DATA_TYPE_FP32: {
switch (args.output_data_type) { switch (args.output_data_type) {
case DATA_TYPE_FP16: case DATA_TYPE_FP16:
...@@ -630,10 +642,13 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -630,10 +642,13 @@ int PerformBypass(const struct BypassArgs &args) {
break; break;
} }
if (cmd != CMD_FP16_TO_FP16 && cmd != CMD_FP16_TO_FP32 && if (cmd != CMD_FP16_TO_FP16 && cmd != CMD_FP16_TO_FP32 &&
cmd != CMD_FP32_TO_FP16 && cmd != CMD_FP32_TO_FP32) { cmd != CMD_FP32_TO_FP16 && cmd != CMD_FP32_TO_FP32 &&
cmd != CMD_INT8_TO_FP16) {
// std::cout<< " err back Error1!" <<std::endl;
return -EFAULT; return -EFAULT;
} }
if ((data_cell_in != SIZE_FP16 && data_cell_in != SIZE_FP32) || if ((data_cell_in != SIZE_FP16 && data_cell_in != SIZE_FP32 &&
data_cell_in != SIZE_INT8) ||
(data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) { (data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) {
return -EFAULT; return -EFAULT;
} }
......
...@@ -31,6 +31,7 @@ limitations under the License. */ ...@@ -31,6 +31,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
enum DataType { enum DataType {
DATA_TYPE_INT8 = 2,
DATA_TYPE_FP32 = 1, DATA_TYPE_FP32 = 1,
DATA_TYPE_FP16 = 0, DATA_TYPE_FP16 = 0,
}; };
......
...@@ -20,13 +20,10 @@ namespace operators { ...@@ -20,13 +20,10 @@ namespace operators {
template <> template <>
bool FeedKernel<FPGA, float>::Init(FeedParam<FPGA> *param) { bool FeedKernel<FPGA, float>::Init(FeedParam<FPGA> *param) {
auto output = param->Out(); auto output = param->Out();
auto input = const_cast<LoDTensor *>(param->InputX());
input->init(typeid(float));
input->Resize(output->dims());
if (output->dims().size() != 4) { if (output->dims().size() != 4) {
return true; return true;
} }
fpga::format_fp16_ofm(output); fpga::format_fp16_ofm(output);
return true; return true;
} }
...@@ -35,6 +32,14 @@ template <> ...@@ -35,6 +32,14 @@ template <>
void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) { void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
auto output = param.Out(); auto output = param.Out();
auto input = const_cast<LoDTensor *>(param.InputX()); auto input = const_cast<LoDTensor *>(param.InputX());
std::type_index input_type = input->type();
if (input_type == typeid(float)) {
input->init(typeid(float));
} else { // input_type == typeid(int8_t)
input->init(typeid(int8_t));
}
input->Resize(output->dims());
if (output->dims().size() != 4) { if (output->dims().size() != 4) {
size_t size = output->numel() * sizeof(float); size_t size = output->numel() * sizeof(float);
...@@ -48,29 +53,47 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) { ...@@ -48,29 +53,47 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
} }
fpga::format_image(input); fpga::format_image(input);
auto input_ptr = input->data<float>();
auto external_ptr = reinterpret_cast<float *>(input->external_data);
float *p_data = external_ptr == nullptr ? input_ptr : external_ptr;
auto output_ptr = output->data<half>(); auto output_ptr = output->data<half>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP32}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
if (input_type == typeid(float)) {
auto input_ptr = input->data<float>();
auto external_ptr = reinterpret_cast<float *>(input->external_data);
float *p_data = external_ptr == nullptr ? input_ptr : external_ptr;
args.input_data_type = fpga::DATA_TYPE_FP32; args.input_data_type = fpga::DATA_TYPE_FP32;
args.output_data_type = fpga::DATA_TYPE_FP16; args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW; args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC; args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = p_data; args.image.address = p_data;
args.image.channels = (uint32_t)input->dims()[1]; args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (uint32_t)input->dims()[2]; args.image.height = (uint32_t)input->dims()[2];
args.image.width = (uint32_t)input->dims()[3]; args.image.width = (uint32_t)input->dims()[3];
args.image.pad_height = 0; args.image.pad_height = 0;
args.image.pad_width = 0; args.image.pad_width = 0;
args.output.address = output_ptr; args.output.address = output_ptr;
args.output.scale_address = output->scale; args.output.scale_address = output->scale;
fpga::PerformBypass(args); fpga::PerformBypass(args);
input->external_data = nullptr;
} else { // input_type == typeid(int8_t)
auto input_ptr = input->data<int8_t>();
auto external_ptr = reinterpret_cast<int8_t *>(input->external_data);
int8_t *p_data = external_ptr == nullptr ? input_ptr : external_ptr;
input->external_data = nullptr; args.input_data_type = fpga::DATA_TYPE_INT8;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = p_data;
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (uint32_t)input->dims()[2];
args.image.width = (uint32_t)input->dims()[3];
args.image.pad_height = 0;
args.image.pad_width = 0;
args.output.address = output_ptr;
args.output.scale_address = output->scale;
fpga::PerformBypass(args);
input->external_data = nullptr;
}
} }
template class FeedKernel<FPGA, float>; template class FeedKernel<FPGA, float>;
......
...@@ -2554,13 +2554,13 @@ class FusionDeconvBNReluParam : public ConvTransposeParam<Dtype> { ...@@ -2554,13 +2554,13 @@ class FusionDeconvBNReluParam : public ConvTransposeParam<Dtype> {
public: public:
FusionDeconvBNReluParam(const VariableNameMap &inputs, FusionDeconvBNReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) const AttributeMap &attrs, Scope *scope)
: ConvTransposeParam<Dtype>(inputs, outputs, attrs, scope) { : ConvTransposeParam<Dtype>(inputs, outputs, attrs, scope) {
output_ = OpParam::OutFrom<GType>(outputs, scope); output_ = OpParam::OutFrom<GType>(outputs, *scope);
input_bias_ = OpParam::InputBiasFrom<GType>(inputs, scope); input_bias_ = OpParam::InputBiasFrom<GType>(inputs, *scope);
input_mean_ = OpParam::InputMeanFrom<GType>(inputs, scope); input_mean_ = OpParam::InputMeanFrom<GType>(inputs, *scope);
input_scale_ = OpParam::InputScaleFrom<GType>(inputs, scope); input_scale_ = OpParam::InputScaleFrom<GType>(inputs, *scope);
input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, scope); input_variance_ = OpParam::InputVarianceFrom<GType>(inputs, *scope);
epsilon_ = OpParam::GetAttr<float>("epsilon", attrs); epsilon_ = OpParam::GetAttr<float>("epsilon", attrs);
momentum_ = OpParam::GetAttr<float>("momentum", attrs); momentum_ = OpParam::GetAttr<float>("momentum", attrs);
} }
......
...@@ -138,6 +138,8 @@ if (CON GREATER -1) ...@@ -138,6 +138,8 @@ if (CON GREATER -1)
set(CONV_TRANSPOSE_OP ON) set(CONV_TRANSPOSE_OP ON)
set(FUSION_DECONVADDBNRELU_OP ON) set(FUSION_DECONVADDBNRELU_OP ON)
set(FUSION_DECONVADDBN_OP ON) set(FUSION_DECONVADDBN_OP ON)
set(FUSION_DECONVBNRELU_OP ON)
set(CONV_OP ON)
set(ELEMENTWISEMUL_OP ON) set(ELEMENTWISEMUL_OP ON)
set(FUSION_FCRELU_OP ON) set(FUSION_FCRELU_OP ON)
set(RELU_OP ON) set(RELU_OP ON)
...@@ -616,6 +618,9 @@ endif() ...@@ -616,6 +618,9 @@ endif()
if (FUSION_DECONVADDBNRELU_OP) if (FUSION_DECONVADDBNRELU_OP)
add_definitions(-DFUSION_DECONVADDBNRELU_OP) add_definitions(-DFUSION_DECONVADDBNRELU_OP)
endif() endif()
if (FUSION_DECONVBNRELU_OP)
add_definitions(-DFUSION_DECONVBNRELU_OP)
endif()
if (FUSION_DECONVADDBN_OP) if (FUSION_DECONVADDBN_OP)
add_definitions(-DFUSION_DECONVADDBN_OP) add_definitions(-DFUSION_DECONVADDBN_OP)
endif() endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册