提交 85d6c449 编写于 作者: qnqinan's avatar qnqinan

update

上级 b19dee42
...@@ -30,9 +30,9 @@ void format_image(framework::Tensor *image_tensor) { ...@@ -30,9 +30,9 @@ void format_image(framework::Tensor *image_tensor) {
auto data_ptr = image_tensor->data<float>(); auto data_ptr = image_tensor->data<float>();
auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data); auto external_ptr = reinterpret_cast<float *>(image_tensor->external_data);
float *p_data = external_ptr == nullptr ? data_ptr : external_ptr; float *p_data = external_ptr == nullptr ? data_ptr : external_ptr;
float *old_p = p_data;
image::format_image(&p_data, channel, height, width); image::format_image(&p_data, channel, height, width);
if (p_data != data_ptr) { if (old_p != p_data) {
image_tensor->reset_data_ptr(p_data); image_tensor->reset_data_ptr(p_data);
} }
} }
...@@ -48,9 +48,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) { ...@@ -48,9 +48,9 @@ void format_fp16_ofm(framework::Tensor *ofm_tensor) {
auto dims = ofm_tensor->dims(); auto dims = ofm_tensor->dims();
size_t memory_size = 0; size_t memory_size = 0;
if (dims.size() == 4) { if (dims.size() == 4) {
auto channel = dims[1], height = dims[2], width = dims[3], num = dims[0]; auto channel = dims[1], height = dims[2], width = dims[3];
memory_size = num * height * align_to_x(channel * width, IMAGE_ALIGNMENT) * memory_size =
sizeof(half); height * align_to_x(channel * width, IMAGE_ALIGNMENT) * sizeof(half);
} else if (dims.size() == 2) { } else if (dims.size() == 2) {
memory_size = align_to_x(dims[1], IMAGE_ALIGNMENT) * sizeof(half); memory_size = align_to_x(dims[1], IMAGE_ALIGNMENT) * sizeof(half);
} else { } else {
...@@ -162,7 +162,7 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) { ...@@ -162,7 +162,7 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) {
fpga_copy(new_data, data_ptr, memory_size); fpga_copy(new_data, data_ptr, memory_size);
filter::format_dwconv_filter(&new_data, num, height, width, scale_ptr); filter::format_dwconv_filter(&new_data, num, height, width, scale_ptr);
filter_tensor->reset_data_ptr(new_data); filter_tensor->reset_data_ptr(new_data);
filter_tensor->set_type(typeid(int8_t)); filter_tensor->set_type(typeid(int16_t));
} }
void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr, void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
...@@ -396,8 +396,8 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -396,8 +396,8 @@ void expand_conv_arg(ConvArgs *arg) {
// auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto cmd = 0UL | USE_BIAS; auto cmd = 0UL | USE_BIAS;
auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) | auto deconv_param = ((args.deconv_tx_param.deconv_en) << 16) |
((args.deconv_tx_param.sub_conv_num) << 16) | ((args.deconv_tx_param.sub_conv_num) << 8) |
((args.deconv_tx_param.omit_size) << 0); ((args.deconv_tx_param.omit_size) << 0);
(*arg).driver.image_address_phy = vaddr_to_paddr(args.image.address); (*arg).driver.image_address_phy = vaddr_to_paddr(args.image.address);
(*arg).driver.sb_address_phy = vaddr_to_paddr(args.sb_address); (*arg).driver.sb_address_phy = vaddr_to_paddr(args.sb_address);
...@@ -623,7 +623,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -623,7 +623,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
fpga::format_fp16_ofm(out, dims_out_new); fpga::format_fp16_ofm(out, dims_out_new);
auto out_ptr = out->data<half>(); auto out_ptr = out->data<half>();
arg->output.address = arg->output.address =
out_ptr + (half *)out_ptr + // NOLINT
omit_size * sizeof(half) * omit_size * sizeof(half) *
(align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT)); (align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT));
arg->output.scale_address = out->scale; arg->output.scale_address = out->scale;
...@@ -713,6 +713,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -713,6 +713,7 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
} }
for (int j = 0; j < split_num; ++j) { for (int j = 0; j < split_num; ++j) {
// arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type = arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
activation_enable; activation_enable;
arg->split_conv_args[i] arg->split_conv_args[i]
...@@ -758,9 +759,9 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -758,9 +759,9 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
align_to_x(arg->split_conv_args[i]->conv_arg[j].filter_num, align_to_x(arg->split_conv_args[i]->conv_arg[j].filter_num,
FILTER_NUM_ALIGNMENT) * FILTER_NUM_ALIGNMENT) *
sizeof(int8_t); sizeof(int8_t);
auto filter_head = auto filter_head = &((
&filter_ptr[j * element_num * filter_num_per_div + // NOLINT int8_t *)filter_ptr)[j * element_num * filter_num_per_div + // NOLINT
i * filter_sub_conv_offset]; i * filter_sub_conv_offset];
arg->split_conv_args[i]->conv_arg[j].filter_address = arg->split_conv_args[i]->conv_arg[j].filter_address =
fpga_malloc(filter_size); fpga_malloc(filter_size);
arg->split_conv_args[i]->vector_conv_space.push_back( arg->split_conv_args[i]->vector_conv_space.push_back(
...@@ -774,6 +775,19 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -774,6 +775,19 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
fpga_flush(arg->split_conv_args[i]->conv_arg[j].filter_address, fpga_flush(arg->split_conv_args[i]->conv_arg[j].filter_address,
filter_size); filter_size);
/*{
static int cnt = 0;
std::string str = "deconv_filter";
if(cnt <= 1){
cnt++;
str += std::to_string(cnt);
int8_t result = 0;
fpga::savefile<int8_t>(str,
arg->split_conv_args[i]->conv_arg[j].filter_address, filter_size, result);
}
}*/
size_t bs_align_num = align_to_x( size_t bs_align_num = align_to_x(
arg->split_conv_args[i]->conv_arg[j].filter_num, BS_NUM_ALIGNMENT); arg->split_conv_args[i]->conv_arg[j].filter_num, BS_NUM_ALIGNMENT);
size_t bs_size = 2 * bs_align_num * sizeof(float); size_t bs_size = 2 * bs_align_num * sizeof(float);
...@@ -789,6 +803,20 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -789,6 +803,20 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
memcpy(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_head, bs_size); memcpy(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_head, bs_size);
fpga_flush(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_size); fpga_flush(arg->split_conv_args[i]->conv_arg[j].sb_address, bs_size);
/* {
static int cnt = 0;
std::string str = "deconv_sb";
if(cnt <= 1){
cnt++;
str += std::to_string(cnt);
float result = 0;
fpga::savefile<float>(str,
arg->split_conv_args[i]->conv_arg[j].sb_address, 2 * bs_align_num,
result);
}
}*/
if (split_num == 1) { if (split_num == 1) {
arg->split_conv_args[i]->conv_arg[j].output.address = arg->split_conv_args[i]->conv_arg[j].output.address =
arg->split_conv_args[i]->output.address; arg->split_conv_args[i]->output.address;
...@@ -835,13 +863,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, ...@@ -835,13 +863,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
int16_t leaky_relu_negative_slope, int stride_h, int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w, int stride_w, int padding_h, int padding_w,
float *bias_ptr) { float *bias_ptr) {
auto deleter = [](void *p) { fpga_free(p); }; auto filter_ptr = filter->data<int16_t>();
arg->vector_dwconv_space.push_back(
std::shared_ptr<char>(reinterpret_cast<char *>(bias_ptr), deleter));
auto filter_ptr = filter->data<uint8_t>();
auto input_ptr = input->data<half>(); auto input_ptr = input->data<half>();
auto output_ptr = out->mutable_data<half>(); auto output_ptr = out->data<half>();
arg->sub_conv_num = 1; arg->sub_conv_num = 1;
// arg->relu_enabled = relu_enabled; // arg->relu_enabled = relu_enabled;
arg->output.activation.activation_type = activation_enable; arg->output.activation.activation_type = activation_enable;
...@@ -960,10 +985,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, ...@@ -960,10 +985,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
sizeof(int16_t)); sizeof(int16_t));
arg->dw_conv_args[i]->output.scale_address = arg->dw_conv_args[i]->output.scale_address =
static_cast<float *>(fpga_malloc(2 * sizeof(float))); static_cast<float *>(fpga_malloc(2 * sizeof(float)));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>( // NOLINT arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->output.address), reinterpret_cast<char *>(arg->dw_conv_args[i]->output.address),
deleter)); deleter));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>( // NOLINT arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->output.scale_address), reinterpret_cast<char *>(arg->dw_conv_args[i]->output.scale_address),
deleter)); deleter));
} }
......
...@@ -43,7 +43,7 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) { ...@@ -43,7 +43,7 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
for (int i = 0; i < channel * sub_conv_n; i++) { for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = 1; bs_ptr[i + sub_conv_n * channel] = 1;
// bs_ptr[i] = bias_ptr[i % (channel)]; bs_ptr[i] = 0; // bias_ptr[i % (channel)];
} }
PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0], PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/pad2d_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool Pad2dKernel<FPGA, float>::Init(Pad2dParam<FPGA> *param) {
Tensor *output = param->Out();
fpga::format_fp16_ofm(output);
return true;
}
void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
auto input_data = (input->data<half>());
auto output_data = (output->data<half>());
auto input_c = input->dims()[1];
auto input_h = input->dims()[2];
auto input_w = input->dims()[3];
auto output_c = output->dims()[1];
auto output_w = output->dims()[3];
auto copysize = input_c * input_w;
for (int h = 0; h < input_h; ++h) {
auto input_offset = h * input_c * input_w;
auto output_offset = h * paddle_mobile::fpga::align_to_x(
output_c * output_w, IMAGE_ALIGNMENT);
memcpy((output_data + output_offset), (input_data + input_offset),
copysize * sizeof(half));
}
}
template <>
void Pad2dKernel<FPGA, float>::Compute(const Pad2dParam<FPGA> &param) {
auto in_x = param.InputX();
auto out = param.Out();
fpga::fpga_invalidate((void *)in_x->data<half>(), // NOLINT
in_x->numel() * sizeof(half));
pad2dFunc(in_x, out);
(out->scale)[0] = (in_x->scale)[0];
(out->scale)[1] = (in_x->scale)[1];
DLOG << (out->scale)[0];
DLOG << (out->scale)[1];
size_t outputSize =
out->dims()[2] *
paddle_mobile::fpga::align_to_x((out->dims()[1]) * (out->dims()[3]),
IMAGE_ALIGNMENT) *
sizeof(half);
fpga::fpga_flush(out->data<half>(), outputSize);
}
} // namespace operators
} // namespace paddle_mobile
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册