提交 51d144f3 编写于 作者: C chonwhite

format code

上级 4dddc907
......@@ -205,8 +205,8 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl
--model_dir=${LITE_MODEL_DIR}/inception_v4 SERIAL)
add_dependencies(test_inceptionv4 extern_lite_download_inception_v4_simple_tar_gz)
# lite_cc_test(test_ocr_attention SRCS ocr_attention_test.cc
# DEPS ${lite_model_test_DEPS})
lite_cc_test(test_ocr_attention_fpga SRCS ocr_attention_test_fpga.cc
DEPS ${lite_model_test_DEPS})
# lite_cc_test(model_run_test_image SRCS model_run_test_image.cc
# DEPS ${lite_model_test_DEPS}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_string(input_file, "", "input_file");
namespace paddle {
namespace lite {
// float* temp_data = new float(33 * 10 * 23);
// std::vector<std::string> GetDirectoryFiles(const std::string& dir) {
// std::vector<std::string> files;
// std::shared_ptr<DIR> directory_ptr(opendir(dir.c_str()),
// [](DIR* dir) { dir&& closedir(dir); });
// struct dirent* dirent_ptr;
// if (!directory_ptr) {
// std::cout << "Error opening : " << std::strerror(errno) << dir <<
// std::endl;
// return files;
// }
// while ((dirent_ptr = readdir(directory_ptr.get())) != nullptr) {
// files.push_back(std::string(dirent_ptr->d_name));
// }
// return files;
// }
void read_from_file(const std::string& path, float* data, int num) {
std::ifstream file_stream;
file_stream.open(path);
if (!file_stream) {
exit(-1);
return;
}
for (int i = 0; i < num; ++i) {
float value = 0;
file_stream >> value;
data[i] = value;
}
}
void chw_to_hwc(float* src, float* dst, int channel, int height, int width) {
int amount_per_row = width * channel;
int index = 0;
for (int c = 0; c < channel; c++) {
for (int h = 0; h < height; h++) {
int offset_height = h * amount_per_row;
for (int w = 0; w < width; w++) {
int dst_index = offset_height + w * channel + c;
dst[dst_index] = src[index];
index = index + 1;
}
}
}
}
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
bool use_npu = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
// predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build("", "attention/model", "attention/params", valid_places);
auto* input_tensor = predictor.GetInput(0);
// input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 1, 48, 512})));
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 1, 100, 200})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
// std::ifstream file_stream(FLAGS_input_file);
// // file_stream.open(path);
// if (!file_stream.good()) {
// std::cout << "file: " << FLAGS_input_file << " dones not exist!\n";
// exit(-1);
// return;
// }
// read_from_file("n7cu17.data", data, 100 * 200);
read_from_file(FLAGS_input_file, data, 100 * 200);
// read_from_file("t.data", data, 48 * 512);
// for (int i = 0;i < 48 * 512;i++ ) {
// std::cout << ":" << data[i] << std::endl;
// }
//=============================================
auto* init_ids = predictor.GetInput(1);
init_ids->Resize(DDim(std::vector<DDim::value_type>({1, 1})));
auto* data_ids = init_ids->mutable_data<float>();
auto ids_size = init_ids->dims().production();
for (int i = 0; i < ids_size; i++) {
data_ids[i] = 0;
}
auto lod_ids = init_ids->mutable_lod();
std::vector<std::vector<uint64_t>> lod_i{{0, 1}, {0, 1}};
*lod_ids = lod_i;
//=============================================
auto* init_scores = predictor.GetInput(2);
init_scores->Resize(DDim(std::vector<DDim::value_type>({1, 1})));
auto* data_scores = init_scores->mutable_data<float>();
auto scores_size = input_tensor->dims().production();
for (int i = 0; i < scores_size; i++) {
data_scores[i] = 0;
}
auto lod_scores = init_scores->mutable_lod();
std::vector<std::vector<uint64_t>> lod_s{{0, 1}, {0, 1}};
*lod_scores = lod_s;
//=============================================
auto* position_encoding = predictor.GetInput(3);
position_encoding->Resize(
DDim(std::vector<DDim::value_type>({1, 33, 10, 23})));
auto* position_encoding_data = position_encoding->mutable_data<float>();
float* temp_data = position_encoding_data;
std::cout << "====================== 1\n";
for (int i = 0; i < position_encoding->dims().production(); ++i) {
temp_data[i] = 0;
}
std::cout << "====================== 2\n";
int index = 0;
for (int i = 0; i < 10; i++) {
for (int row = 0; row < 10; row++) {
for (int col = 0; col < 23; col++) {
if (i == row) {
temp_data[index] = 1.0f;
} else {
temp_data[index] = 0.0f;
}
index++;
}
}
}
std::cout << "====================== 3\n";
for (int i = 0; i < 23; i++) {
for (int row = 0; row < 10; row++) {
for (int col = 0; col < 23; col++) {
if (i == col) {
temp_data[index] = 1.0f;
} else {
temp_data[index] = 0.0f;
}
index++;
}
}
}
std::cout << "====================== 4\n";
// chw_to_hwc(temp_data, position_encoding_data, 33, 10, 23);
// delete[] temp_data;
// read_from_file("position_encoding.data", position_encoding_data, 33 * 10 *
// 23);
// position_encoding->ZynqTensor()->readFromFile("position_encoding.data");
// exit(-1);
// for (int i = 0; i < FLAGS_warmup; ++i) {
// predictor.Run();
// }
auto start = GetCurrentUS();
for (int i = 0; i < 2; ++i) {
predictor.Run();
}
std::cout << "================== Speed Report ===================";
std::cout << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
// std::vector<std::vector<float>> results;
// // i = 1
// results.emplace_back(std::vector<float>(
// {0.00019130898, 9.467885e-05, 0.00015971427, 0.0003650665,
// 0.00026431272, 0.00060884043, 0.0002107942, 0.0015819625,
// 0.0010323516, 0.00010079765, 0.00011006987, 0.0017364529,
// 0.0048292773, 0.0013995157, 0.0018453331, 0.0002428986,
// 0.00020211363, 0.00013668182, 0.0005855956, 0.00025901722}));
auto* out = predictor.GetOutput(0);
// ASSERT_EQ(out->dims().size(), 2);
// ASSERT_EQ(out->dims()[0], 1);
// ASSERT_EQ(out->dims()[1], 1000);
//
// int step = 50;
for (int i = 0; i < 10; i++) {
// std::cout << ":" << out->data<float>()[i] << std::endl;
}
// for (int i = 0; i < results.size(); ++i) {
// for (int j = 0; j < results[i].size(); ++j) {
// EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
// results[i][j],
// 1e-6);
// }
// }
std::string file = "plate_data/" + FLAGS_input_file.substr(9);
std::cout << "file:::" << file << std::endl;
std::ofstream ofs;
ofs.open(file);
for (int i = 0; i < out->dims().production(); i++) {
float value = out->data<float>()[i];
ofs << value << std::endl;
}
ofs.close();
}
TEST(OcrAttention, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
// Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
TestModel(valid_places, Place{TARGET(kARM), PRECISION(kFloat)});
}
} // namespace lite
} // namespace paddle
......@@ -20,14 +20,8 @@ namespace zynqmp {
DLEngine::DLEngine() {
open_device();
int ret = get_device_info(info_);
// filter::set_filter_capacity(2048);
filter::set_filter_capacity(info_.filter_cap);
filter::set_colunm(info_.colunm);
std::cout << " version:" << info_.version;
std::cout << " device_type:" << info_.device_type;
std::cout << " filter_cap:" << info_.filter_cap;
std::cout << " colunm:" << info_.colunm << std::endl;
}
} // namespace zynqmp
......
......@@ -30,8 +30,7 @@ class DLEngine {
DeviceInfo& deviceInfo();
// bool isZU3() { return info_.device_type / 100 == 3; }
bool isZU3() { return true; }
bool isZU3() { return info_.device_type / 100 == 3; }
float* out_data = nullptr;
......
......@@ -61,8 +61,6 @@ void reset_device() {
// memory management;
void *fpga_malloc(size_t size) {
// std::cout << "fpga malloc: 0x" << std::hex << size << std::dec << " (" <<
// size << ") - ";
#ifdef ENABLE_DEBUG
// std::cout << "fpga_malloc:" << size << std::endl;
#endif
......@@ -73,7 +71,6 @@ void *fpga_malloc(size_t size) {
std::cout << "not enough memory !";
exit(-1);
}
// std::cout << std::hex << ptr << std::dec << std::endl;
memory_map.insert(std::make_pair(ptr, size));
memory_size += size;
if (memory_size > memory_size_max) {
......@@ -91,8 +88,6 @@ size_t fpga_get_memory_size_max() { return memory_size_max; }
size_t fpga_diagnose_memory(int detailed) {
size_t total = 0;
// size_t size = 0;
// int i = 0;
auto iter = memory_map.begin(); // std::map<void *, size_t>::iterator
while (iter != memory_map.end()) {
total += iter->second;
......@@ -108,11 +103,8 @@ void fpga_free(void *ptr) {
size = iter->second;
memory_map.erase(iter);
}
memory_size -= size;
#ifdef PADDLE_OS_LINUX
munmap(ptr, size);
#else
free(ptr);
......@@ -129,9 +121,6 @@ int fpga_flush(void *address, size_t size) {
}
int fpga_invalidate(void *address, size_t size) {
// std::cout <<
// "=================================================================================="
// << std::endl;
struct MemoryCacheArgs args;
args.address = address;
args.size = size;
......@@ -162,84 +151,21 @@ int fpga_reset() {
}
int ioctl_conv(const struct ConvArgs &args) {
#ifdef ENABLE_DEBUG
// std::cout << "======Compute Basic Conv======";
// std::cout << " relu_enabled:" << args.relu_enabled
// << " sb_address:" << args.sb_address
// << " filter_address:" << args.filter_address
// << " filter_num:" << args.filter_num
// << " group_num:" << args.group_num;
// std::cout << " image_address:" << args.image.address
// << " image_scale_address:" << args.image.scale_address
// << " image_channels:" << args.image.channels
// << " image_height:" << args.image.height
// << " image_width:" << args.image.width
// << " pad_height:" << args.image.pad_height
// << " pad_width:" << args.image.pad_width;
// std::cout << " kernel_height:" << args.kernel.height
// << " kernel_width:" << args.kernel.width
// << " stride_h:" << args.kernel.stride_h
// << " stride_w:" << args.kernel.stride_w;
// std::cout << " out_address:" << args.output.address
// << " out_scale_address:" << args.output.scale_address;
//
// float* in_scale = (float*)args.image.scale_address;
// std::cout << "inv_scale:" << in_scale[0] << "," << in_scale[1] <<
// std::endl;
#endif
return do_ioctl(IOCTL_CONFIG_CONV, &args);
// return 0;
}
int compute_fpga_conv_basic(const struct ConvArgs &args) {
#ifdef ENABLE_DEBUG
// std::cout << "======Compute Basic Conv======";
// std::cout << " relu_enabled:" << args.relu_enabled
// << " sb_address:" << args.sb_address
// << " filter_address:" << args.filter_address
// << " filter_num:" << args.filter_num
// << " group_num:" << args.group_num;
// std::cout << " image_address:" << args.image.address
// << " image_scale_address:" << args.image.scale_address
// << " image_channels:" << args.image.channels
// << " image_height:" << args.image.height
// << " image_width:" << args.image.width
// << " pad_height:" << args.image.pad_height
// << " pad_width:" << args.image.pad_width;
// std::cout << " kernel_height:" << args.kernel.height
// << " kernel_width:" << args.kernel.width
// << " stride_h:" << args.kernel.stride_h
// << " stride_w:" << args.kernel.stride_w;
// std::cout << " out_address:" << args.output.address
// << " out_scale_address:" << args.output.scale_address;
// float *in_scale = (float *)args.image.scale_address;
// std::cout << " scale:" << in_scale[0] << "," << in_scale[1] <<
// std::endl;
// float *filter_scale = (float *)args.filter_scale_address;
// std::cout << " filter scale:" << filter_scale[0] << "," <<
// filter_scale[1] << std::endl;
#endif
return do_ioctl(IOCTL_CONFIG_CONV, &args);
}
int compute_fpga_conv(const struct SplitConvArgs &args) {
// return do_ioctl(IOCTL_CONFIG_CONV, &args);
int split_num = args.split_num;
int ret = -1;
for (int i = 0; i < split_num; i++) {
// ComputeBasicConv(args.conv_args[i]);
ret = compute_fpga_conv_basic(args.conv_arg[i]);
}
if (split_num > 1) {
std::cout << "Split num > 1 !!!!!!!!!!!!!!!!!!" << std::endl;
exit(-1);
}
return ret;
......@@ -254,10 +180,7 @@ int compute_fpga_ewadd(const struct EWAddArgs &args) {
}
int get_device_info(const struct DeviceInfo &args) {
// DeviceInfo info;
// struct DeviceInfo* a = &info;
int ret = do_ioctl(IOCTL_DEVICE_INFO, &args);
// std::cout << "a." << a->filter_cap << std::endl;
return ret;
}
......@@ -299,7 +222,6 @@ int perform_bypass(const struct BypassArgs &args) {
}
int remainder = size - max_size * count;
// std::cout << "remainder:" << remainder << std::endl;
if (remainder > 0) {
bypassArgs.image.channels = remainder;
bypassArgs.image.address =
......@@ -309,7 +231,6 @@ int perform_bypass(const struct BypassArgs &args) {
ret = do_ioctl(IOCTL_CONFIG_BYPASS, &bypassArgs);
scale = std::max(scale, scales[0]);
}
args.output.scale_address[0] = scale;
args.output.scale_address[1] = 1.0f / scale;
return ret;
......@@ -318,52 +239,10 @@ int perform_bypass(const struct BypassArgs &args) {
int compute_fpga_concat(const struct ConcatArgs &args) { return -1; }
int compute_fpga_scale(const struct ScaleArgs &args) {
#ifdef ENABLE_DEBUG
std::cout << "======Compute Scale======";
std::cout << "scale_address:" << args.scale_address << std::endl;
std::cout << "bias_address:" << args.bias_address << std::endl;
std::cout << "wc_alignment:" << args.wc_alignment << std::endl;
std::cout << "channel_alignment:" << args.channel_alignment << std::endl;
std::cout << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
<< " image_height:" << args.image.height
<< " image_width:" << args.image.width
<< " pad_height:" << args.image.pad_height
<< " pad_width:" << args.image.pad_width;
std::cout << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address;
#endif
return do_ioctl(IOCTL_CONFIG_SCALE, &args);
}
int compute_fpga_dwconv(const struct DWconvArgs &args) {
#ifdef ENABLE_DEBUG
std::cout << "======Compute Basic Conv======";
std::cout << " relu_enabled:" << args.relu_enabled
<< " filter_address:" << args.filter_address;
std::cout << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
<< " image_height:" << args.image.height
<< " image_width:" << args.image.width
<< " pad_height:" << args.image.pad_height
<< " pad_width:" << args.image.pad_width;
std::cout << " kernel_height:" << args.kernel.height
<< " kernel_width:" << args.kernel.width
<< " stride_h:" << args.kernel.stride_h
<< " stride_w:" << args.kernel.stride_w;
std::cout << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address;
// float *in_scale = (float *)args.image.scale_address;
// std::cout << "inv_scale:" << in_scale[0] << "," << in_scale[1] <<
// std::endl;
#endif
return do_ioctl(IOCTL_CONFIG_DWCONV, &args);
}
......
......@@ -130,9 +130,6 @@ class ConvPE : public PE {
wi = w - wstart;
}
const int index = (h * image_width + w) * image_channels + c;
// int weight_index = (hi *
// kernel_width + wi) * image_channels
// + c;//TODO
int weight_index = oc * filter_chw +
kernel_width * kernel_height * c +
kernel_width * hi + wi;
......@@ -141,8 +138,6 @@ class ConvPE : public PE {
}
}
}
// std::cout << " ============================= pool_index:" <<
// pool_index << " sum:" << sum << std::endl;
if (param_.relu.enabled && sum < 0) {
sum = -sum;
......@@ -171,13 +166,6 @@ class ConvPE : public PE {
float_input.copyFrom(input);
float_input.syncToCPU();
// float_input.saveToFile("input", true);
// param_.filter->saveToFile("filter", true);
// param_.bias()->saveToFile("bias", true);
// exit(-1);
// float16* data_out = output->data<float16>();
float* out = float_output.mutableData<float>(FP32, output->shape());
float* bias_data = param_.bias()->data<float>();
......@@ -205,14 +193,8 @@ class ConvPE : public PE {
int image_index = h * out_width * in_channel + w * in_channel + j;
float value = image_addr[image_index] * filter_ptr[j];
sum += value;
// mi[j] = value;
}
// for (int j = 0; j < in_channel; j++) {
// sum += mi[j];
// }
sum += bias_data[i];
if (param_.relu.enabled && sum < 0) {
......@@ -232,10 +214,6 @@ class ConvPE : public PE {
output->copyFrom(&float_output);
output->scale()[0] = max / 127;
output->scale()[1] = 127 / max;
// float_output.saveToFile("out", true);
// exit(-1);
}
bool dispatch() {
......@@ -264,7 +242,6 @@ class ConvPE : public PE {
std::vector<BasicConvParam*>& params = param_.splitParams();
int ret = 0;
for (auto conv_param : params) {
// conv_param->input.printScale();
ret |= compute_fpga_conv_basic(conv_param->args);
}
......@@ -282,34 +259,16 @@ class ConvPE : public PE {
size_t size = params.size();
if (split_axis == 0 && ret == 0 && size > 1) {
// std::cout << "concat size:" << size << std::endl;
concatPE_.dispatch();
}
if (split_axis == 1 && ret == 0 && size > 1) {
// for (int n = 0; n < size - 1; n++) {
ElementwiseAddParam& add_param = addPE_.param();
add_param.inputs = {&params[0]->output, &params[1]->output};
add_param.output = param_.output;
addPE_.init();
addPE_.apply();
addPE_.dispatch();
// param_.output->printScale();
// params[0]->input.saveToFile("conv_1.txt");
// params[1]->input.saveToFile("conv_2.txt");
// params[0]->output.saveToFile("ew_o1.txt");
// params[1]->output.saveToFile("ew_o2.txt");
// std::cout << "\n ================== EW ================== \n";
// }
}
if (param_.input->shape().channel() == 64 &&
param_.output->shape().channel() == 128) {
// exit(-1);
}
return ret == 0;
}
......
......@@ -212,7 +212,6 @@ inline void format_filter(Tensor* filter,
for (size_t i = 0; i < max_values.size(); i++) {
scales.push_back(max_values[i] / max_value);
// scales.push_back(1.0f);
}
// filter->saveToFile("filter.txt");
......@@ -345,10 +344,8 @@ inline void split_filter_num(const ConvParam& c_param) {
Shape s_shape(N, {filter_num});
float* scale_data = scale.mutableData<float>(FP32, s_shape);
float* bias_data = bias.mutableData<float>(FP32, s_shape);
// std::cout << "v size: " << v.size() << std::endl;
for (int n = 0; n < filter_num; n++) {
scale_data[n] = param.scale()->data<float>()[n + chnnnel_start] * v[n];
// scale_data[n] = param.scale()->data<float>()[n + chnnnel_start];
}
for (int n = 0; n < filter_num; n++) {
bias_data[n] = param.bias()->data<float>()[n + chnnnel_start];
......@@ -366,8 +363,6 @@ inline void split_filter_num(const ConvParam& c_param) {
// param.scale()->saveToFile("scale.txt");
// param.bias()->saveToFile("bias.txt");
// exit(-1);
args.group_num = param.groups;
args.relu_enabled = param.relu.enabled;
args.sb_address = conv_param->scaleBias.data<float>();
......@@ -492,7 +487,6 @@ inline int fill_split_arg(const ConvParam& c_param) {
split_filter_num(c_param);
return 0;
}
// split_filter_num(c_param);
}
inline bool compute_conv(const ConvParam& c_conv_params) {
......
......@@ -114,8 +114,6 @@ class PoolingPE : public PE {
for (int c = 0; c < image_channels; ++c) {
const int pool_index = (ph * pooled_width_ + pw) * image_channels + c;
float sum = 0;
// const int index =
// (hstart * image_width + wstart) * image_channels + c;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
const int index = (h * image_width + w) * image_channels + c;
......@@ -144,9 +142,7 @@ class PoolingPE : public PE {
Tensor float_input;
float_input.mutableData<float>(FP32, input->shape());
float_input.copyFrom(input);
// float_input.saveToFile("pool_float.txt");
float16* data_out = output->data<float16>();
int kernel_hw = param_.kernelSize[0] * param_.kernelSize[1];
float scale_max = 0;
......@@ -163,7 +159,6 @@ class PoolingPE : public PE {
output->scale()[0] = scale_max / 127.0f;
output->scale()[1] = 127.0f / scale_max;
output->flush();
// exit(-1);
}
void cpu_compute() {
......@@ -193,7 +188,6 @@ class PoolingPE : public PE {
output->scale()[0] = scale_max / 127.0f;
output->scale()[1] = 127.0f / scale_max;
output->flush();
// exit(-1);
}
bool dispatch() {
......
......@@ -43,81 +43,6 @@ class ScalePE : public PE {
return true;
}
// void apply() {
// Tensor* input = param_.input;
// Tensor* output = param_.output;
// Shape& input_shape = input->shape();
// int channel = input_shape.channel();
// int repeat = 1;
// int alignment = 16;
// int length = channel;
// if (channel % alignment != 0 || channel < alignment) {
// int c_lcm = lcm(channel, alignment);
// repeat = c_lcm / (channel);
// }
// Shape shape(N, {channel * repeat});
// param_.alignedBias()->mutableData<float16>(FP16, shape);
// param_.alignedScale()->mutableData<float16>(FP16, shape);
// float16* bias_data = param_.alignedBias()->data<float16>();
// float16* scale_data = param_.alignedScale()->data<float16>();
// if (param_.bias != nullptr) {
// float* bias_data_float = param_.bias->data<float>();
// for (int i = 0; i < repeat; i++) {
// for (int j = 0; j < length; j++) {
// float16 value = float_to_half(bias_data_float[j]);
// bias_data[i * length + j] = value;
// // bias_data[i * length + j] = float_to_half(1.0f);
// }
// }
// } else {
// float16 zero = float_to_half(0.0f);
// for (int i = 0; i < repeat; i++) {
// for (int j = 0; j < length; j++) {
// bias_data[i * length + j] = zero;
// }
// }
// }
// float* scale_data_float = param_.scale->data<float>();
// for (int i = 0; i < repeat; i++) {
// for (int j = 0; j < length; j++) {
// float16 value = float_to_half(scale_data_float[j]);
// scale_data[i * length + j] = value;
// }
// }
// param_.alignedScale()->flush();
// param_.alignedBias()->flush();
// int wc = input_shape.width() * input_shape.channel();
// int wc_aligned = align_image(wc);
// ScaleArgs& args = param_.args;
// args.scale_address = param_.alignedScale()->data<void>();
// args.bias_address = param_.alignedBias()->data<void>();
// args.wc_alignment = wc_aligned;
// args.channel_alignment = channel * repeat;
// args.image.address = input->data<void>();
// args.image.scale_address = input->scale();
// args.image.channels = channel;
// args.image.height = input_shape.height();
// args.image.width = input_shape.width();
// args.image.pad_width = 0;
// args.image.pad_height = 0;
// args.output.address = output->data<void>();
// args.output.scale_address = output->scale();
// }
// bool dispatch() {
// param_.input->syncToDevice();
// std::cout << "scale dispatch" << std::endl;
// return compute_fpga_scale(param_.args) == 0;
// }
void apply() {
Tensor* input = param_.input;
Tensor* output = param_.output;
......@@ -241,8 +166,6 @@ class ScalePE : public PE {
for (int c = 0; c < input->shape().channel(); c++) {
int index = i * input->shape().channel() + c;
float value = half_to_float(in_data[index]) * scale_data[c];
std::cout << "value:" << value << " = " << half_to_float(in_data[index])
<< " x " << scale_data[c] << std::endl;
data_out[index] = float_to_half(value);
if (value < 0) {
......@@ -273,12 +196,6 @@ class ScalePE : public PE {
dw_param.quantizedFilter()->flush();
// apply();
}
// param_.scale->saveToFile("scale.txt");
// cpu_compute();
// return true;
// param_.input->syncToDevice();
// return compute_fpga_scale(param_.args) == 0;
param_.input->syncToDevice();
return dw_pe_.dispatch();
}
......
......@@ -221,10 +221,6 @@ void BoxCoderCompute::Run() {
}
}
}
// prior_box->ZynqTensor()->saveToFile("prior_box", true);
// prior_box_var->ZynqTensor()->saveToFile("prior_box_var", true);
// output_box->ZynqTensor()->saveToFile("box_coder", true);
}
} // namespace arm
......
......@@ -61,25 +61,9 @@ class FillConstantCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<ARMContext>();
if (param.dtype == static_cast<int32_t>(lite::core::FluidType::FP32)) {
auto data = param.Out->template mutable_data<float>();
for (int i = 0; i < param.Out->numel(); i++) {
data[i] = param.value;
}
} else if (param.dtype ==
static_cast<int32_t>(lite::core::FluidType::INT32)) {
auto data = param.Out->template mutable_data<int32_t>();
for (int i = 0; i < param.Out->numel(); i++) {
data[i] = param.value;
}
} else if (param.dtype ==
static_cast<int32_t>(lite::core::FluidType::INT8)) {
auto data = param.Out->template mutable_data<int8_t>();
for (int i = 0; i < param.Out->numel(); i++) {
data[i] = param.value;
}
} else {
LOG(FATAL) << "not supported dtype " << param.dtype;
auto data = param.Out->template mutable_data<T>();
for (int i = 0; i < param.Out->numel(); i++) {
data[i] = param.value;
}
}
......
......@@ -85,9 +85,6 @@ void PriorBoxCompute::Run() {
is_clip,
order,
min_max_aspect_ratios_order);
param.boxes->ZynqTensor()->saveToFile("pb_boxes", true);
param.variances->ZynqTensor()->saveToFile("pb_variance", true);
}
} // namespace arm
......@@ -106,17 +103,3 @@ REGISTER_LITE_KERNEL(prior_box,
.BindOutput("Boxes", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Variances", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// REGISTER_LITE_KERNEL(prior_box,
// kFPGA,
// kFP16,
// kNHWC,
// paddle::lite::kernels::arm::PriorBoxCompute,
// def)
// .BindInput("Input",{LiteType::GetTensorTy(
// TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC))})
// .BindInput("Image", {LiteType::GetTensorTy(
// TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC))})
// .BindOutput("Boxes", {LiteType::GetTensorTy(TARGET(kARM))})
// .BindOutput("Variances", {LiteType::GetTensorTy(TARGET(kARM))})
// .Finalize();
......@@ -9,14 +9,14 @@ set(fpga_deps fpga_target_wrapper kernel_fpga)
# add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps})
# add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
# add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps})
add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps})
# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps})
# add_kernel(mul_compute_fpga FPGA basic SRCS mul_compute.cc DEPS ${fpga_deps})
# add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps})
add_kernel(mul_compute_fpga FPGA basic SRCS mul_compute.cc DEPS ${fpga_deps})
add_kernel(multiclass_nms_compute_fpga FPGA basic SRCS multiclass_nms_compute.cc DEPS ${fpga_deps})
add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps})
# add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps})
......
......@@ -47,7 +47,7 @@ void ConcatCompute::Run() {
pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR
zynqmp::ConcatParam& concat_param = pe_.param();
Debugger.get_instance()::registerOutput("concat", concat_param.output);
Debugger::get_instance().registerOutput("concat", concat_param.output);
#endif
}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/fpga/density_prior_box_compute.h"
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace fpga {
// inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
// bool flip,
// std::vector<float>* output_aspect_ratior) {
// constexpr float epsilon = 1e-6;
// output_aspect_ratior->clear();
// output_aspect_ratior->push_back(1.0f);
// for (size_t i = 0; i < input_aspect_ratior.size(); ++i) {
// float ar = input_aspect_ratior[i];
// bool already_exist = false;
// for (size_t j = 0; j < output_aspect_ratior->size(); ++j) {
// if (fabs(ar - output_aspect_ratior->at(j)) < epsilon) {
// already_exist = true;
// break;
// }
// }
// if (!already_exist) {
// output_aspect_ratior->push_back(ar);
// if (flip) {
// output_aspect_ratior->push_back(1.0f / ar);
// }
// }
// }
// }
void DensityPriorBoxCompute::Run() {
// auto& param = Param<operators::DensityPriorBoxParam>();
// bool is_flip = param.flip;
// bool is_clip = param.clip;
// std::vector<float> min_size = param.min_sizes;
// std::vector<float> fixed_size = param.fixed_sizes;
// std::vector<float> fixed_ratio = param.fixed_ratios;
// auto density_size = param.density_sizes;
// std::vector<float> max_size = param.max_sizes;
// std::vector<float> aspect_ratio = param.aspect_ratios;
// std::vector<float> variance = param.variances_;
// int img_w = param.img_w;
// int img_h = param.img_h;
// float step_w = param.step_w;
// float step_h = param.step_h;
// float offset = param.offset;
// std::vector<float> aspect_ratios_vec;
// ExpandAspectRatios(aspect_ratio, is_flip, &aspect_ratios_vec);
// size_t prior_num = aspect_ratios_vec.size() * min_size.size();
// prior_num += max_size.size();
// if (fixed_size.size() > 0) {
// prior_num = fixed_size.size() * fixed_ratio.size();
// }
// if (density_size.size() > 0) {
// for (int i = 0; i < density_size.size(); ++i) {
// if (fixed_ratio.size() > 0) {
// prior_num += (fixed_ratio.size() * ((pow(density_size[i], 2)) - 1));
// } else {
// prior_num +=
// ((fixed_ratio.size() + 1) * ((pow(density_size[i], 2)) - 1));
// }
// }
// }
// std::vector<std::string> order = param.order;
// lite::arm::math::density_prior_box(param.input,
// param.image,
// &param.boxes,
// &param.variances,
// min_size,
// fixed_size,
// fixed_ratio,
// density_size,
// max_size,
// aspect_ratio,
// variance,
// img_w,
// img_h,
// step_w,
// step_h,
// offset,
// prior_num,
// is_flip,
// is_clip,
// order);
}
} // namespace fpga
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(density_prior_box,
kFPGA,
kFP16,
kNHWC,
paddle::lite::kernels::fpga::DensityPriorBoxCompute,
def)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindInput("Image",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindOutput("Boxes", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Variances",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace fpga {
class DensityPriorBoxCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
using param_t = operators::DensityPriorBoxParam;
void Run() override;
virtual ~DensityPriorBoxCompute() = default;
};
} // namespace fpga
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -45,7 +45,6 @@ class IoCopyHostToFpgaCompute
auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kFPGA));
// param.y->CopyDataFrom(*param.x);
param.y->mutable_data<float16>();
if (param.x->ZynqTensor()->aligned() &&
param.x->ZynqTensor()->shape().shouldAlign()) {
......@@ -53,10 +52,8 @@ class IoCopyHostToFpgaCompute
tempTensor.mutableData<float16>(zynqmp::FP16,
param.x->ZynqTensor()->shape());
tempTensor.copyFrom(param.x->ZynqTensor());
// tempTensor.saveToFile("tempTensor", true);
tempTensor.setAligned(true);
tempTensor.unalignImage();
// tempTensor.saveToFile("unaligned", true);
param.y->ZynqTensor()->copyFrom(&tempTensor);
} else {
param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor());
......@@ -97,11 +94,9 @@ class IoCopyFpgaToHostCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
void Run() override {
// std::cout << "IoCopyFpgaToHostCompute \n";
auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kFPGA));
// std::cout << "before CopyDataFrom \n";
param.y->mutable_data<float>();
param.y->ZynqTensor()->setDataType(zynqmp::FP32);
......@@ -113,10 +108,8 @@ class IoCopyFpgaToHostCompute
tempTensor.mutableData<float16>(zynqmp::FP16,
param.x->ZynqTensor()->shape());
tempTensor.copyFrom(param.x->ZynqTensor());
// tempTensor.saveToFile("tempTensor", true);
tempTensor.setAligned(true);
tempTensor.unalignImage();
// tempTensor.saveToFile("unaligned", true);
param.y->ZynqTensor()->copyFrom(&tempTensor);
} else {
param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor());
......
......@@ -29,11 +29,6 @@ using float16 = zynqmp::float16;
template <typename T>
void convert_to_hwc(
T* chw_data, T* hwc_data, int num, int channel, int height, int width) {
std::cout << " -------------- chw -> HWC ---------------\n";
std::cout << "channel: " << channel << std::endl;
std::cout << "height: " << height << std::endl;
std::cout << "width: " << width << std::endl;
int chw = channel * height * width;
int wc = width * channel;
int index = 0;
......@@ -52,10 +47,6 @@ void convert_to_hwc(
template <typename T>
void hwc_to_chw(
T* chw_data, T* hwc_data, int num, int channel, int height, int width) {
std::cout << " ============= HWC -> CHW =============\n";
std::cout << "channel: " << channel << std::endl;
std::cout << "height: " << height << std::endl;
std::cout << "width: " << width << std::endl;
int chw = channel * height * width;
int wc = width * channel;
int wh = width * height;
......@@ -73,10 +64,7 @@ void hwc_to_chw(
}
void TransHwcToChw(Tensor* dest, const Tensor* src) {
std::cout << "precision:" << static_cast<int>(src->precision()) << std::endl;
std::cout << "dataType:" << src->ZynqTensor()->dataType() << std::endl;
if (src->ZynqTensor()->dataType() == zynqmp::FP32) {
std::cout << "float\n";
float* chw = dest->mutable_data<float>();
float* hwc = const_cast<float*>(src->data<float>());
int num = dest->dims()[0];
......@@ -94,7 +82,6 @@ void TransHwcToChw(Tensor* dest, const Tensor* src) {
}
if (src->ZynqTensor()->dataType() == zynqmp::FP16) {
std::cout << "float16\n";
float16* chw = dest->mutable_data<float16>();
float16* hwc = const_cast<float16*>(src->data<float16>());
int num = dest->dims()[0];
......@@ -126,9 +113,6 @@ class TransHwcToChwCompute
param.y->ZynqTensor()->flush();
param.y->ZynqTensor()->copyScaleFrom(param.x->ZynqTensor());
// param.x->ZynqTensor()->saveToFile("src_hwc", true);
// param.y->ZynqTensor()->saveToFile("src_dst", true);
auto out_lod = param.y->mutable_lod();
*out_lod = param.x->lod();
}
......
......@@ -84,7 +84,7 @@ void MulCompute::Run() {
#ifdef FPGA_PRINT_TENSOR
zynqmp::FullyConnectedParam& fc_param = pe_.param();
Debugger.get_instance().registerOutput("mul", fc_param.output);
Debugger::get_instance().registerOutput("mul", fc_param.output);
#endif
}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "lite/kernels/fpga/sequence_pool_compute.h"
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace fpga {
void SequencePoolCompute::PrepareForRun() {}
void SequencePoolCompute::Run() {
auto& param = Param<operators::SequencePoolParam>();
auto& output = param.Out;
const auto* din = param.X->data<float>();
float* dout = output->mutable_data<float>();
const auto pool_type = param.pool_type;
const auto lod = param.X->lod()[0];
int64_t width = param.X->numel() / param.X->dims()[0];
// if (pool_type == "SUM") {
// lite::arm::math::seq_pool_sum(din, dout, lod, width);
// } else if (pool_type == "AVERAGE") {
// lite::arm::math::seq_pool_average(din, dout, lod, width);
// } else if (pool_type == "SQRT") {
// lite::arm::math::seq_pool_sqrt(din, dout, lod, width);
// } else if (pool_type == "MAX") {
// lite::arm::math::seq_pool_max(din, dout, lod, width);
// } else if (pool_type == "MIN") {
// lite::arm::math::seq_pool_min(din, dout, lod, width);
// } else if (pool_type == "FIRST") {
// lite::arm::math::seq_pool_first(din, dout, lod, width);
// } else if (pool_type == "LAST") {
// lite::arm::math::seq_pool_last(din, dout, lod, width);
// } else {
// LOG(ERROR) << " UNKNOWN sequence pool type";
// }
int batch_size = lod.size() - 1;
std::vector<uint64_t> offset_new(static_cast<uint64_t>(batch_size + 1));
for (int i = 0; i <= batch_size; i++) {
offset_new[i] = i;
}
(output->mutable_lod())->push_back(offset_new);
}
} // namespace fpga
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(sequence_pool,
kFPGA,
kFP16,
kNHWC,
paddle::lite::kernels::fpga::SequencePoolCompute,
def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindOutput("MaxIndex", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include "lite/backends/arm/math/type_trans.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace fpga {
class SequencePoolCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
void PrepareForRun() override;
void Run() override;
virtual ~SequencePoolCompute() = default;
private:
};
} // namespace fpga
} // namespace kernels
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册