提交 7a8b998f 编写于 作者: qnqinan's avatar qnqinan

fix some bugs in fpga V2 track and update fpga V2 pe code

上级 344c1df7
......@@ -204,7 +204,8 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
arg->conv_arg[i].image.address = input_ptr;
arg->conv_arg[i].image.scale_address = input->scale;
arg->conv_arg[i].image.channels = (uint32_t)input->dims()[1];
arg->conv_arg[i].image.channels =
(uint32_t)get_aligned_channel_num((int)(input->dims()[1])); // NOLINT
arg->conv_arg[i].image.height = (uint32_t)input->dims()[2];
arg->conv_arg[i].image.width = (uint32_t)input->dims()[3];
arg->conv_arg[i].image.pad_height = (uint32_t)padding_h;
......@@ -216,7 +217,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
int num_after_alignment = filter::calc_aligned_num(
arg->filter_num, (int)input->dims()[1]); // NOLINT
arg->conv_arg[i].free_space =
fpga_malloc(num_after_alignment * 2 * sizeof(half));
fpga_malloc(num_after_alignment * 2 * sizeof(float)); // half
}
}
......
......@@ -16,7 +16,6 @@ limitations under the License. */
#include <memory.h>
#include <algorithm>
#include "fpga/common/fpga_common.h"
namespace paddle_mobile {
namespace fpga {
namespace filter {
......@@ -88,12 +87,25 @@ void align_filter(float **data_in, int num, int channel, int height,
*data_in = new_data;
fpga_free(temp);
}
void convert_to_fp16(float **data_in, int data_size) {
float *tmp = *data_in;
// half_float::half *tmp_data = (half_float::half *)fpga_malloc(data_size *
// sizeof(half_float::half));
int16_t *tmp_data =
(int16_t *)fpga_malloc(data_size * sizeof(int16_t)); // NOLINT
for (int i = 0; i < data_size; i++) {
// tmp_data[i] = (half_float::half)((*data_in)[i]);
tmp_data[i] = fp32_2_fp16((*data_in)[i]);
}
*data_in = (float *)tmp_data; // NOLINT
fpga_free(tmp);
}
void format_filter(float **data_in, int num, int channel, int height, int width,
int group_num, float max) {
convert_to_hwc(data_in, num, channel, height, width);
align_filter(data_in, num, channel, height, width);
int pixel_num = calc_aligned_total_pixel_num(num, channel, height, width);
convert_to_fp16(data_in, pixel_num);
fpga_flush(*data_in, pixel_num * sizeof(float));
}
......@@ -115,6 +127,7 @@ void format_fc_filter(float **data_in, int num, int channel, int height,
convert_fc_filter(data_in, num, chw);
align_filter(data_in, num, channel, height, width);
int pixel_num = calc_aligned_total_pixel_num(num, channel, height, width);
convert_to_fp16(data_in, pixel_num);
fpga_flush(*data_in, pixel_num * sizeof(float));
}
......
此差异已折叠。
......@@ -113,6 +113,12 @@ int fpga_invalidate(void *address, size_t size) {
return 0;
#endif
}
uint64_t vaddr_to_paddr(void *address) {
#ifdef PADDLE_MOBILE_ZU5
return driver::vaddr_to_paddr(address);
#else
return 0;
#endif
}
} // namespace fpga
} // namespace paddle_mobile
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
#include "fpga/V2/filter.h"
namespace paddle_mobile {
namespace operators {
......@@ -24,7 +24,6 @@ bool FeedKernel<FPGA, float>::Init(FeedParam<FPGA> *param) {
fpga::format_fp16_ofm(output, aligned_channel);
return true;
}
template <>
void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
auto input =
......@@ -33,6 +32,9 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
auto input_ptr = input->data<float>();
Tensor *output = param.Out();
auto output_ptr = output->data<float>();
auto channel = input->dims()[1];
uint32_t aligned_channels =
fpga::filter::calc_aligned_channel((int)channel); // NOLINT
fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
......@@ -41,7 +43,7 @@ void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.channels = aligned_channels;
args.image.height = (uint32_t)input->dims()[2];
args.image.width = (uint32_t)input->dims()[3];
args.image.pad_height = 0;
......
......@@ -25,7 +25,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input_ptr = input->data<float>();
auto float_input = new Tensor;
float_input->mutable_data<float>({1, input->dims()[1]});
fpga::format_fp32_ofm(float_input, 8);
fpga::format_fp32_ofm(float_input, 1024);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册