提交 51d144f3 编写于 作者: C chonwhite

format code

上级 4dddc907
...@@ -205,8 +205,8 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING) ...@@ -205,8 +205,8 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl
--model_dir=${LITE_MODEL_DIR}/inception_v4 SERIAL) --model_dir=${LITE_MODEL_DIR}/inception_v4 SERIAL)
add_dependencies(test_inceptionv4 extern_lite_download_inception_v4_simple_tar_gz) add_dependencies(test_inceptionv4 extern_lite_download_inception_v4_simple_tar_gz)
# lite_cc_test(test_ocr_attention SRCS ocr_attention_test.cc lite_cc_test(test_ocr_attention_fpga SRCS ocr_attention_test_fpga.cc
# DEPS ${lite_model_test_DEPS}) DEPS ${lite_model_test_DEPS})
# lite_cc_test(model_run_test_image SRCS model_run_test_image.cc # lite_cc_test(model_run_test_image SRCS model_run_test_image.cc
# DEPS ${lite_model_test_DEPS} # DEPS ${lite_model_test_DEPS}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_string(input_file, "", "input_file");
namespace paddle {
namespace lite {
// float* temp_data = new float(33 * 10 * 23);
// std::vector<std::string> GetDirectoryFiles(const std::string& dir) {
// std::vector<std::string> files;
// std::shared_ptr<DIR> directory_ptr(opendir(dir.c_str()),
// [](DIR* dir) { dir&& closedir(dir); });
// struct dirent* dirent_ptr;
// if (!directory_ptr) {
// std::cout << "Error opening : " << std::strerror(errno) << dir <<
// std::endl;
// return files;
// }
// while ((dirent_ptr = readdir(directory_ptr.get())) != nullptr) {
// files.push_back(std::string(dirent_ptr->d_name));
// }
// return files;
// }
void read_from_file(const std::string& path, float* data, int num) {
std::ifstream file_stream;
file_stream.open(path);
if (!file_stream) {
exit(-1);
return;
}
for (int i = 0; i < num; ++i) {
float value = 0;
file_stream >> value;
data[i] = value;
}
}
void chw_to_hwc(float* src, float* dst, int channel, int height, int width) {
int amount_per_row = width * channel;
int index = 0;
for (int c = 0; c < channel; c++) {
for (int h = 0; h < height; h++) {
int offset_height = h * amount_per_row;
for (int w = 0; w < width; w++) {
int dst_index = offset_height + w * channel + c;
dst[dst_index] = src[index];
index = index + 1;
}
}
}
}
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
bool use_npu = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
// predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build("", "attention/model", "attention/params", valid_places);
auto* input_tensor = predictor.GetInput(0);
// input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 1, 48, 512})));
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 1, 100, 200})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
// std::ifstream file_stream(FLAGS_input_file);
// // file_stream.open(path);
// if (!file_stream.good()) {
// std::cout << "file: " << FLAGS_input_file << " dones not exist!\n";
// exit(-1);
// return;
// }
// read_from_file("n7cu17.data", data, 100 * 200);
read_from_file(FLAGS_input_file, data, 100 * 200);
// read_from_file("t.data", data, 48 * 512);
// for (int i = 0;i < 48 * 512;i++ ) {
// std::cout << ":" << data[i] << std::endl;
// }
//=============================================
auto* init_ids = predictor.GetInput(1);
init_ids->Resize(DDim(std::vector<DDim::value_type>({1, 1})));
auto* data_ids = init_ids->mutable_data<float>();
auto ids_size = init_ids->dims().production();
for (int i = 0; i < ids_size; i++) {
data_ids[i] = 0;
}
auto lod_ids = init_ids->mutable_lod();
std::vector<std::vector<uint64_t>> lod_i{{0, 1}, {0, 1}};
*lod_ids = lod_i;
//=============================================
auto* init_scores = predictor.GetInput(2);
init_scores->Resize(DDim(std::vector<DDim::value_type>({1, 1})));
auto* data_scores = init_scores->mutable_data<float>();
auto scores_size = input_tensor->dims().production();
for (int i = 0; i < scores_size; i++) {
data_scores[i] = 0;
}
auto lod_scores = init_scores->mutable_lod();
std::vector<std::vector<uint64_t>> lod_s{{0, 1}, {0, 1}};
*lod_scores = lod_s;
//=============================================
auto* position_encoding = predictor.GetInput(3);
position_encoding->Resize(
DDim(std::vector<DDim::value_type>({1, 33, 10, 23})));
auto* position_encoding_data = position_encoding->mutable_data<float>();
float* temp_data = position_encoding_data;
std::cout << "====================== 1\n";
for (int i = 0; i < position_encoding->dims().production(); ++i) {
temp_data[i] = 0;
}
std::cout << "====================== 2\n";
int index = 0;
for (int i = 0; i < 10; i++) {
for (int row = 0; row < 10; row++) {
for (int col = 0; col < 23; col++) {
if (i == row) {
temp_data[index] = 1.0f;
} else {
temp_data[index] = 0.0f;
}
index++;
}
}
}
std::cout << "====================== 3\n";
for (int i = 0; i < 23; i++) {
for (int row = 0; row < 10; row++) {
for (int col = 0; col < 23; col++) {
if (i == col) {
temp_data[index] = 1.0f;
} else {
temp_data[index] = 0.0f;
}
index++;
}
}
}
std::cout << "====================== 4\n";
// chw_to_hwc(temp_data, position_encoding_data, 33, 10, 23);
// delete[] temp_data;
// read_from_file("position_encoding.data", position_encoding_data, 33 * 10 *
// 23);
// position_encoding->ZynqTensor()->readFromFile("position_encoding.data");
// exit(-1);
// for (int i = 0; i < FLAGS_warmup; ++i) {
// predictor.Run();
// }
auto start = GetCurrentUS();
for (int i = 0; i < 2; ++i) {
predictor.Run();
}
std::cout << "================== Speed Report ===================";
std::cout << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
// std::vector<std::vector<float>> results;
// // i = 1
// results.emplace_back(std::vector<float>(
// {0.00019130898, 9.467885e-05, 0.00015971427, 0.0003650665,
// 0.00026431272, 0.00060884043, 0.0002107942, 0.0015819625,
// 0.0010323516, 0.00010079765, 0.00011006987, 0.0017364529,
// 0.0048292773, 0.0013995157, 0.0018453331, 0.0002428986,
// 0.00020211363, 0.00013668182, 0.0005855956, 0.00025901722}));
auto* out = predictor.GetOutput(0);
// ASSERT_EQ(out->dims().size(), 2);
// ASSERT_EQ(out->dims()[0], 1);
// ASSERT_EQ(out->dims()[1], 1000);
//
// int step = 50;
for (int i = 0; i < 10; i++) {
// std::cout << ":" << out->data<float>()[i] << std::endl;
}
// for (int i = 0; i < results.size(); ++i) {
// for (int j = 0; j < results[i].size(); ++j) {
// EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
// results[i][j],
// 1e-6);
// }
// }
std::string file = "plate_data/" + FLAGS_input_file.substr(9);
std::cout << "file:::" << file << std::endl;
std::ofstream ofs;
ofs.open(file);
for (int i = 0; i < out->dims().production(); i++) {
float value = out->data<float>()[i];
ofs << value << std::endl;
}
ofs.close();
}
TEST(OcrAttention, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
// Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
TestModel(valid_places, Place{TARGET(kARM), PRECISION(kFloat)});
}
} // namespace lite
} // namespace paddle
...@@ -20,14 +20,8 @@ namespace zynqmp { ...@@ -20,14 +20,8 @@ namespace zynqmp {
DLEngine::DLEngine() { DLEngine::DLEngine() {
open_device(); open_device();
int ret = get_device_info(info_); int ret = get_device_info(info_);
// filter::set_filter_capacity(2048);
filter::set_filter_capacity(info_.filter_cap); filter::set_filter_capacity(info_.filter_cap);
filter::set_colunm(info_.colunm); filter::set_colunm(info_.colunm);
std::cout << " version:" << info_.version;
std::cout << " device_type:" << info_.device_type;
std::cout << " filter_cap:" << info_.filter_cap;
std::cout << " colunm:" << info_.colunm << std::endl;
} }
} // namespace zynqmp } // namespace zynqmp
......
...@@ -30,8 +30,7 @@ class DLEngine { ...@@ -30,8 +30,7 @@ class DLEngine {
DeviceInfo& deviceInfo(); DeviceInfo& deviceInfo();
// bool isZU3() { return info_.device_type / 100 == 3; } bool isZU3() { return info_.device_type / 100 == 3; }
bool isZU3() { return true; }
float* out_data = nullptr; float* out_data = nullptr;
......
...@@ -61,8 +61,6 @@ void reset_device() { ...@@ -61,8 +61,6 @@ void reset_device() {
// memory management; // memory management;
void *fpga_malloc(size_t size) { void *fpga_malloc(size_t size) {
// std::cout << "fpga malloc: 0x" << std::hex << size << std::dec << " (" <<
// size << ") - ";
#ifdef ENABLE_DEBUG #ifdef ENABLE_DEBUG
// std::cout << "fpga_malloc:" << size << std::endl; // std::cout << "fpga_malloc:" << size << std::endl;
#endif #endif
...@@ -73,7 +71,6 @@ void *fpga_malloc(size_t size) { ...@@ -73,7 +71,6 @@ void *fpga_malloc(size_t size) {
std::cout << "not enough memory !"; std::cout << "not enough memory !";
exit(-1); exit(-1);
} }
// std::cout << std::hex << ptr << std::dec << std::endl;
memory_map.insert(std::make_pair(ptr, size)); memory_map.insert(std::make_pair(ptr, size));
memory_size += size; memory_size += size;
if (memory_size > memory_size_max) { if (memory_size > memory_size_max) {
...@@ -91,8 +88,6 @@ size_t fpga_get_memory_size_max() { return memory_size_max; } ...@@ -91,8 +88,6 @@ size_t fpga_get_memory_size_max() { return memory_size_max; }
size_t fpga_diagnose_memory(int detailed) { size_t fpga_diagnose_memory(int detailed) {
size_t total = 0; size_t total = 0;
// size_t size = 0;
// int i = 0;
auto iter = memory_map.begin(); // std::map<void *, size_t>::iterator auto iter = memory_map.begin(); // std::map<void *, size_t>::iterator
while (iter != memory_map.end()) { while (iter != memory_map.end()) {
total += iter->second; total += iter->second;
...@@ -108,11 +103,8 @@ void fpga_free(void *ptr) { ...@@ -108,11 +103,8 @@ void fpga_free(void *ptr) {
size = iter->second; size = iter->second;
memory_map.erase(iter); memory_map.erase(iter);
} }
memory_size -= size; memory_size -= size;
#ifdef PADDLE_OS_LINUX #ifdef PADDLE_OS_LINUX
munmap(ptr, size); munmap(ptr, size);
#else #else
free(ptr); free(ptr);
...@@ -129,9 +121,6 @@ int fpga_flush(void *address, size_t size) { ...@@ -129,9 +121,6 @@ int fpga_flush(void *address, size_t size) {
} }
int fpga_invalidate(void *address, size_t size) { int fpga_invalidate(void *address, size_t size) {
// std::cout <<
// "=================================================================================="
// << std::endl;
struct MemoryCacheArgs args; struct MemoryCacheArgs args;
args.address = address; args.address = address;
args.size = size; args.size = size;
...@@ -162,84 +151,21 @@ int fpga_reset() { ...@@ -162,84 +151,21 @@ int fpga_reset() {
} }
int ioctl_conv(const struct ConvArgs &args) { int ioctl_conv(const struct ConvArgs &args) {
#ifdef ENABLE_DEBUG
// std::cout << "======Compute Basic Conv======";
// std::cout << " relu_enabled:" << args.relu_enabled
// << " sb_address:" << args.sb_address
// << " filter_address:" << args.filter_address
// << " filter_num:" << args.filter_num
// << " group_num:" << args.group_num;
// std::cout << " image_address:" << args.image.address
// << " image_scale_address:" << args.image.scale_address
// << " image_channels:" << args.image.channels
// << " image_height:" << args.image.height
// << " image_width:" << args.image.width
// << " pad_height:" << args.image.pad_height
// << " pad_width:" << args.image.pad_width;
// std::cout << " kernel_height:" << args.kernel.height
// << " kernel_width:" << args.kernel.width
// << " stride_h:" << args.kernel.stride_h
// << " stride_w:" << args.kernel.stride_w;
// std::cout << " out_address:" << args.output.address
// << " out_scale_address:" << args.output.scale_address;
//
// float* in_scale = (float*)args.image.scale_address;
// std::cout << "inv_scale:" << in_scale[0] << "," << in_scale[1] <<
// std::endl;
#endif
return do_ioctl(IOCTL_CONFIG_CONV, &args); return do_ioctl(IOCTL_CONFIG_CONV, &args);
// return 0;
} }
int compute_fpga_conv_basic(const struct ConvArgs &args) { int compute_fpga_conv_basic(const struct ConvArgs &args) {
#ifdef ENABLE_DEBUG
// std::cout << "======Compute Basic Conv======";
// std::cout << " relu_enabled:" << args.relu_enabled
// << " sb_address:" << args.sb_address
// << " filter_address:" << args.filter_address
// << " filter_num:" << args.filter_num
// << " group_num:" << args.group_num;
// std::cout << " image_address:" << args.image.address
// << " image_scale_address:" << args.image.scale_address
// << " image_channels:" << args.image.channels
// << " image_height:" << args.image.height
// << " image_width:" << args.image.width
// << " pad_height:" << args.image.pad_height
// << " pad_width:" << args.image.pad_width;
// std::cout << " kernel_height:" << args.kernel.height
// << " kernel_width:" << args.kernel.width
// << " stride_h:" << args.kernel.stride_h
// << " stride_w:" << args.kernel.stride_w;
// std::cout << " out_address:" << args.output.address
// << " out_scale_address:" << args.output.scale_address;
// float *in_scale = (float *)args.image.scale_address;
// std::cout << " scale:" << in_scale[0] << "," << in_scale[1] <<
// std::endl;
// float *filter_scale = (float *)args.filter_scale_address;
// std::cout << " filter scale:" << filter_scale[0] << "," <<
// filter_scale[1] << std::endl;
#endif
return do_ioctl(IOCTL_CONFIG_CONV, &args); return do_ioctl(IOCTL_CONFIG_CONV, &args);
} }
int compute_fpga_conv(const struct SplitConvArgs &args) { int compute_fpga_conv(const struct SplitConvArgs &args) {
// return do_ioctl(IOCTL_CONFIG_CONV, &args);
int split_num = args.split_num; int split_num = args.split_num;
int ret = -1; int ret = -1;
for (int i = 0; i < split_num; i++) { for (int i = 0; i < split_num; i++) {
// ComputeBasicConv(args.conv_args[i]);
ret = compute_fpga_conv_basic(args.conv_arg[i]); ret = compute_fpga_conv_basic(args.conv_arg[i]);
} }
if (split_num > 1) { if (split_num > 1) {
std::cout << "Split num > 1 !!!!!!!!!!!!!!!!!!" << std::endl;
exit(-1); exit(-1);
} }
return ret; return ret;
...@@ -254,10 +180,7 @@ int compute_fpga_ewadd(const struct EWAddArgs &args) { ...@@ -254,10 +180,7 @@ int compute_fpga_ewadd(const struct EWAddArgs &args) {
} }
int get_device_info(const struct DeviceInfo &args) { int get_device_info(const struct DeviceInfo &args) {
// DeviceInfo info;
// struct DeviceInfo* a = &info;
int ret = do_ioctl(IOCTL_DEVICE_INFO, &args); int ret = do_ioctl(IOCTL_DEVICE_INFO, &args);
// std::cout << "a." << a->filter_cap << std::endl;
return ret; return ret;
} }
...@@ -299,7 +222,6 @@ int perform_bypass(const struct BypassArgs &args) { ...@@ -299,7 +222,6 @@ int perform_bypass(const struct BypassArgs &args) {
} }
int remainder = size - max_size * count; int remainder = size - max_size * count;
// std::cout << "remainder:" << remainder << std::endl;
if (remainder > 0) { if (remainder > 0) {
bypassArgs.image.channels = remainder; bypassArgs.image.channels = remainder;
bypassArgs.image.address = bypassArgs.image.address =
...@@ -309,7 +231,6 @@ int perform_bypass(const struct BypassArgs &args) { ...@@ -309,7 +231,6 @@ int perform_bypass(const struct BypassArgs &args) {
ret = do_ioctl(IOCTL_CONFIG_BYPASS, &bypassArgs); ret = do_ioctl(IOCTL_CONFIG_BYPASS, &bypassArgs);
scale = std::max(scale, scales[0]); scale = std::max(scale, scales[0]);
} }
args.output.scale_address[0] = scale; args.output.scale_address[0] = scale;
args.output.scale_address[1] = 1.0f / scale; args.output.scale_address[1] = 1.0f / scale;
return ret; return ret;
...@@ -318,52 +239,10 @@ int perform_bypass(const struct BypassArgs &args) { ...@@ -318,52 +239,10 @@ int perform_bypass(const struct BypassArgs &args) {
int compute_fpga_concat(const struct ConcatArgs &args) { return -1; } int compute_fpga_concat(const struct ConcatArgs &args) { return -1; }
int compute_fpga_scale(const struct ScaleArgs &args) { int compute_fpga_scale(const struct ScaleArgs &args) {
#ifdef ENABLE_DEBUG
std::cout << "======Compute Scale======";
std::cout << "scale_address:" << args.scale_address << std::endl;
std::cout << "bias_address:" << args.bias_address << std::endl;
std::cout << "wc_alignment:" << args.wc_alignment << std::endl;
std::cout << "channel_alignment:" << args.channel_alignment << std::endl;
std::cout << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
<< " image_height:" << args.image.height
<< " image_width:" << args.image.width
<< " pad_height:" << args.image.pad_height
<< " pad_width:" << args.image.pad_width;
std::cout << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address;
#endif
return do_ioctl(IOCTL_CONFIG_SCALE, &args); return do_ioctl(IOCTL_CONFIG_SCALE, &args);
} }
int compute_fpga_dwconv(const struct DWconvArgs &args) { int compute_fpga_dwconv(const struct DWconvArgs &args) {
#ifdef ENABLE_DEBUG
std::cout << "======Compute Basic Conv======";
std::cout << " relu_enabled:" << args.relu_enabled
<< " filter_address:" << args.filter_address;
std::cout << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
<< " image_height:" << args.image.height
<< " image_width:" << args.image.width
<< " pad_height:" << args.image.pad_height
<< " pad_width:" << args.image.pad_width;
std::cout << " kernel_height:" << args.kernel.height
<< " kernel_width:" << args.kernel.width
<< " stride_h:" << args.kernel.stride_h
<< " stride_w:" << args.kernel.stride_w;
std::cout << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address;
// float *in_scale = (float *)args.image.scale_address;
// std::cout << "inv_scale:" << in_scale[0] << "," << in_scale[1] <<
// std::endl;
#endif
return do_ioctl(IOCTL_CONFIG_DWCONV, &args); return do_ioctl(IOCTL_CONFIG_DWCONV, &args);
} }
......
...@@ -130,9 +130,6 @@ class ConvPE : public PE { ...@@ -130,9 +130,6 @@ class ConvPE : public PE {
wi = w - wstart; wi = w - wstart;
} }
const int index = (h * image_width + w) * image_channels + c; const int index = (h * image_width + w) * image_channels + c;
// int weight_index = (hi *
// kernel_width + wi) * image_channels
// + c;//TODO
int weight_index = oc * filter_chw + int weight_index = oc * filter_chw +
kernel_width * kernel_height * c + kernel_width * kernel_height * c +
kernel_width * hi + wi; kernel_width * hi + wi;
...@@ -141,8 +138,6 @@ class ConvPE : public PE { ...@@ -141,8 +138,6 @@ class ConvPE : public PE {
} }
} }
} }
// std::cout << " ============================= pool_index:" <<
// pool_index << " sum:" << sum << std::endl;
if (param_.relu.enabled && sum < 0) { if (param_.relu.enabled && sum < 0) {
sum = -sum; sum = -sum;
...@@ -171,13 +166,6 @@ class ConvPE : public PE { ...@@ -171,13 +166,6 @@ class ConvPE : public PE {
float_input.copyFrom(input); float_input.copyFrom(input);
float_input.syncToCPU(); float_input.syncToCPU();
// float_input.saveToFile("input", true);
// param_.filter->saveToFile("filter", true);
// param_.bias()->saveToFile("bias", true);
// exit(-1);
// float16* data_out = output->data<float16>();
float* out = float_output.mutableData<float>(FP32, output->shape()); float* out = float_output.mutableData<float>(FP32, output->shape());
float* bias_data = param_.bias()->data<float>(); float* bias_data = param_.bias()->data<float>();
...@@ -205,14 +193,8 @@ class ConvPE : public PE { ...@@ -205,14 +193,8 @@ class ConvPE : public PE {
int image_index = h * out_width * in_channel + w * in_channel + j; int image_index = h * out_width * in_channel + w * in_channel + j;
float value = image_addr[image_index] * filter_ptr[j]; float value = image_addr[image_index] * filter_ptr[j];
sum += value; sum += value;
// mi[j] = value;
} }
// for (int j = 0; j < in_channel; j++) {
// sum += mi[j];
// }
sum += bias_data[i]; sum += bias_data[i];
if (param_.relu.enabled && sum < 0) { if (param_.relu.enabled && sum < 0) {
...@@ -232,10 +214,6 @@ class ConvPE : public PE { ...@@ -232,10 +214,6 @@ class ConvPE : public PE {
output->copyFrom(&float_output); output->copyFrom(&float_output);
output->scale()[0] = max / 127; output->scale()[0] = max / 127;
output->scale()[1] = 127 / max; output->scale()[1] = 127 / max;
// float_output.saveToFile("out", true);
// exit(-1);
} }
bool dispatch() { bool dispatch() {
...@@ -264,7 +242,6 @@ class ConvPE : public PE { ...@@ -264,7 +242,6 @@ class ConvPE : public PE {
std::vector<BasicConvParam*>& params = param_.splitParams(); std::vector<BasicConvParam*>& params = param_.splitParams();
int ret = 0; int ret = 0;
for (auto conv_param : params) { for (auto conv_param : params) {
// conv_param->input.printScale();
ret |= compute_fpga_conv_basic(conv_param->args); ret |= compute_fpga_conv_basic(conv_param->args);
} }
...@@ -282,34 +259,16 @@ class ConvPE : public PE { ...@@ -282,34 +259,16 @@ class ConvPE : public PE {
size_t size = params.size(); size_t size = params.size();
if (split_axis == 0 && ret == 0 && size > 1) { if (split_axis == 0 && ret == 0 && size > 1) {
// std::cout << "concat size:" << size << std::endl;
concatPE_.dispatch(); concatPE_.dispatch();
} }
if (split_axis == 1 && ret == 0 && size > 1) { if (split_axis == 1 && ret == 0 && size > 1) {
// for (int n = 0; n < size - 1; n++) {
ElementwiseAddParam& add_param = addPE_.param(); ElementwiseAddParam& add_param = addPE_.param();
add_param.inputs = {&params[0]->output, &params[1]->output}; add_param.inputs = {&params[0]->output, &params[1]->output};
add_param.output = param_.output; add_param.output = param_.output;
addPE_.init(); addPE_.init();
addPE_.apply(); addPE_.apply();
addPE_.dispatch(); addPE_.dispatch();
// param_.output->printScale();
// params[0]->input.saveToFile("conv_1.txt");
// params[1]->input.saveToFile("conv_2.txt");
// params[0]->output.saveToFile("ew_o1.txt");
// params[1]->output.saveToFile("ew_o2.txt");
// std::cout << "\n ================== EW ================== \n";
// }
} }
if (param_.input->shape().channel() == 64 &&
param_.output->shape().channel() == 128) {
// exit(-1);
}
return ret == 0; return ret == 0;
} }
......
...@@ -212,7 +212,6 @@ inline void format_filter(Tensor* filter, ...@@ -212,7 +212,6 @@ inline void format_filter(Tensor* filter,
for (size_t i = 0; i < max_values.size(); i++) { for (size_t i = 0; i < max_values.size(); i++) {
scales.push_back(max_values[i] / max_value); scales.push_back(max_values[i] / max_value);
// scales.push_back(1.0f);
} }
// filter->saveToFile("filter.txt"); // filter->saveToFile("filter.txt");
...@@ -345,10 +344,8 @@ inline void split_filter_num(const ConvParam& c_param) { ...@@ -345,10 +344,8 @@ inline void split_filter_num(const ConvParam& c_param) {
Shape s_shape(N, {filter_num}); Shape s_shape(N, {filter_num});
float* scale_data = scale.mutableData<float>(FP32, s_shape); float* scale_data = scale.mutableData<float>(FP32, s_shape);
float* bias_data = bias.mutableData<float>(FP32, s_shape); float* bias_data = bias.mutableData<float>(FP32, s_shape);
// std::cout << "v size: " << v.size() << std::endl;
for (int n = 0; n < filter_num; n++) { for (int n = 0; n < filter_num; n++) {
scale_data[n] = param.scale()->data<float>()[n + chnnnel_start] * v[n]; scale_data[n] = param.scale()->data<float>()[n + chnnnel_start] * v[n];
// scale_data[n] = param.scale()->data<float>()[n + chnnnel_start];
} }
for (int n = 0; n < filter_num; n++) { for (int n = 0; n < filter_num; n++) {
bias_data[n] = param.bias()->data<float>()[n + chnnnel_start]; bias_data[n] = param.bias()->data<float>()[n + chnnnel_start];
...@@ -366,8 +363,6 @@ inline void split_filter_num(const ConvParam& c_param) { ...@@ -366,8 +363,6 @@ inline void split_filter_num(const ConvParam& c_param) {
// param.scale()->saveToFile("scale.txt"); // param.scale()->saveToFile("scale.txt");
// param.bias()->saveToFile("bias.txt"); // param.bias()->saveToFile("bias.txt");
// exit(-1);
args.group_num = param.groups; args.group_num = param.groups;
args.relu_enabled = param.relu.enabled; args.relu_enabled = param.relu.enabled;
args.sb_address = conv_param->scaleBias.data<float>(); args.sb_address = conv_param->scaleBias.data<float>();
...@@ -492,7 +487,6 @@ inline int fill_split_arg(const ConvParam& c_param) { ...@@ -492,7 +487,6 @@ inline int fill_split_arg(const ConvParam& c_param) {
split_filter_num(c_param); split_filter_num(c_param);
return 0; return 0;
} }
// split_filter_num(c_param);
} }
inline bool compute_conv(const ConvParam& c_conv_params) { inline bool compute_conv(const ConvParam& c_conv_params) {
......
...@@ -114,8 +114,6 @@ class PoolingPE : public PE { ...@@ -114,8 +114,6 @@ class PoolingPE : public PE {
for (int c = 0; c < image_channels; ++c) { for (int c = 0; c < image_channels; ++c) {
const int pool_index = (ph * pooled_width_ + pw) * image_channels + c; const int pool_index = (ph * pooled_width_ + pw) * image_channels + c;
float sum = 0; float sum = 0;
// const int index =
// (hstart * image_width + wstart) * image_channels + c;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
const int index = (h * image_width + w) * image_channels + c; const int index = (h * image_width + w) * image_channels + c;
...@@ -144,9 +142,7 @@ class PoolingPE : public PE { ...@@ -144,9 +142,7 @@ class PoolingPE : public PE {
Tensor float_input; Tensor float_input;
float_input.mutableData<float>(FP32, input->shape()); float_input.mutableData<float>(FP32, input->shape());
float_input.copyFrom(input); float_input.copyFrom(input);
// float_input.saveToFile("pool_float.txt");
float16* data_out = output->data<float16>(); float16* data_out = output->data<float16>();
int kernel_hw = param_.kernelSize[0] * param_.kernelSize[1]; int kernel_hw = param_.kernelSize[0] * param_.kernelSize[1];
float scale_max = 0; float scale_max = 0;
...@@ -163,7 +159,6 @@ class PoolingPE : public PE { ...@@ -163,7 +159,6 @@ class PoolingPE : public PE {
output->scale()[0] = scale_max / 127.0f; output->scale()[0] = scale_max / 127.0f;
output->scale()[1] = 127.0f / scale_max; output->scale()[1] = 127.0f / scale_max;
output->flush(); output->flush();
// exit(-1);
} }
void cpu_compute() { void cpu_compute() {
...@@ -193,7 +188,6 @@ class PoolingPE : public PE { ...@@ -193,7 +188,6 @@ class PoolingPE : public PE {
output->scale()[0] = scale_max / 127.0f; output->scale()[0] = scale_max / 127.0f;
output->scale()[1] = 127.0f / scale_max; output->scale()[1] = 127.0f / scale_max;
output->flush(); output->flush();
// exit(-1);
} }
bool dispatch() { bool dispatch() {
......
...@@ -43,81 +43,6 @@ class ScalePE : public PE { ...@@ -43,81 +43,6 @@ class ScalePE : public PE {
return true; return true;
} }
// void apply() {
// Tensor* input = param_.input;
// Tensor* output = param_.output;
// Shape& input_shape = input->shape();
// int channel = input_shape.channel();
// int repeat = 1;
// int alignment = 16;
// int length = channel;
// if (channel % alignment != 0 || channel < alignment) {
// int c_lcm = lcm(channel, alignment);
// repeat = c_lcm / (channel);
// }
// Shape shape(N, {channel * repeat});
// param_.alignedBias()->mutableData<float16>(FP16, shape);
// param_.alignedScale()->mutableData<float16>(FP16, shape);
// float16* bias_data = param_.alignedBias()->data<float16>();
// float16* scale_data = param_.alignedScale()->data<float16>();
// if (param_.bias != nullptr) {
// float* bias_data_float = param_.bias->data<float>();
// for (int i = 0; i < repeat; i++) {
// for (int j = 0; j < length; j++) {
// float16 value = float_to_half(bias_data_float[j]);
// bias_data[i * length + j] = value;
// // bias_data[i * length + j] = float_to_half(1.0f);
// }
// }
// } else {
// float16 zero = float_to_half(0.0f);
// for (int i = 0; i < repeat; i++) {
// for (int j = 0; j < length; j++) {
// bias_data[i * length + j] = zero;
// }
// }
// }
// float* scale_data_float = param_.scale->data<float>();
// for (int i = 0; i < repeat; i++) {
// for (int j = 0; j < length; j++) {
// float16 value = float_to_half(scale_data_float[j]);
// scale_data[i * length + j] = value;
// }
// }
// param_.alignedScale()->flush();
// param_.alignedBias()->flush();
// int wc = input_shape.width() * input_shape.channel();
// int wc_aligned = align_image(wc);
// ScaleArgs& args = param_.args;
// args.scale_address = param_.alignedScale()->data<void>();
// args.bias_address = param_.alignedBias()->data<void>();
// args.wc_alignment = wc_aligned;
// args.channel_alignment = channel * repeat;
// args.image.address = input->data<void>();
// args.image.scale_address = input->scale();
// args.image.channels = channel;
// args.image.height = input_shape.height();
// args.image.width = input_shape.width();
// args.image.pad_width = 0;
// args.image.pad_height = 0;
// args.output.address = output->data<void>();
// args.output.scale_address = output->scale();
// }
// bool dispatch() {
// param_.input->syncToDevice();
// std::cout << "scale dispatch" << std::endl;
// return compute_fpga_scale(param_.args) == 0;
// }
void apply() { void apply() {
Tensor* input = param_.input; Tensor* input = param_.input;
Tensor* output = param_.output; Tensor* output = param_.output;
...@@ -241,8 +166,6 @@ class ScalePE : public PE { ...@@ -241,8 +166,6 @@ class ScalePE : public PE {
for (int c = 0; c < input->shape().channel(); c++) { for (int c = 0; c < input->shape().channel(); c++) {
int index = i * input->shape().channel() + c; int index = i * input->shape().channel() + c;
float value = half_to_float(in_data[index]) * scale_data[c]; float value = half_to_float(in_data[index]) * scale_data[c];
std::cout << "value:" << value << " = " << half_to_float(in_data[index])
<< " x " << scale_data[c] << std::endl;
data_out[index] = float_to_half(value); data_out[index] = float_to_half(value);
if (value < 0) { if (value < 0) {
...@@ -273,12 +196,6 @@ class ScalePE : public PE { ...@@ -273,12 +196,6 @@ class ScalePE : public PE {
dw_param.quantizedFilter()->flush(); dw_param.quantizedFilter()->flush();
// apply(); // apply();
} }
// param_.scale->saveToFile("scale.txt");
// cpu_compute();
// return true;
// param_.input->syncToDevice();
// return compute_fpga_scale(param_.args) == 0;
param_.input->syncToDevice(); param_.input->syncToDevice();
return dw_pe_.dispatch(); return dw_pe_.dispatch();
} }
......
...@@ -221,10 +221,6 @@ void BoxCoderCompute::Run() { ...@@ -221,10 +221,6 @@ void BoxCoderCompute::Run() {
} }
} }
} }
// prior_box->ZynqTensor()->saveToFile("prior_box", true);
// prior_box_var->ZynqTensor()->saveToFile("prior_box_var", true);
// output_box->ZynqTensor()->saveToFile("box_coder", true);
} }
} // namespace arm } // namespace arm
......
...@@ -61,26 +61,10 @@ class FillConstantCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> { ...@@ -61,26 +61,10 @@ class FillConstantCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<ARMContext>(); auto& context = ctx_->As<ARMContext>();
if (param.dtype == static_cast<int32_t>(lite::core::FluidType::FP32)) { auto data = param.Out->template mutable_data<T>();
auto data = param.Out->template mutable_data<float>();
for (int i = 0; i < param.Out->numel(); i++) {
data[i] = param.value;
}
} else if (param.dtype ==
static_cast<int32_t>(lite::core::FluidType::INT32)) {
auto data = param.Out->template mutable_data<int32_t>();
for (int i = 0; i < param.Out->numel(); i++) {
data[i] = param.value;
}
} else if (param.dtype ==
static_cast<int32_t>(lite::core::FluidType::INT8)) {
auto data = param.Out->template mutable_data<int8_t>();
for (int i = 0; i < param.Out->numel(); i++) { for (int i = 0; i < param.Out->numel(); i++) {
data[i] = param.value; data[i] = param.value;
} }
} else {
LOG(FATAL) << "not supported dtype " << param.dtype;
}
} }
virtual ~FillConstantCompute() = default; virtual ~FillConstantCompute() = default;
......
...@@ -85,9 +85,6 @@ void PriorBoxCompute::Run() { ...@@ -85,9 +85,6 @@ void PriorBoxCompute::Run() {
is_clip, is_clip,
order, order,
min_max_aspect_ratios_order); min_max_aspect_ratios_order);
param.boxes->ZynqTensor()->saveToFile("pb_boxes", true);
param.variances->ZynqTensor()->saveToFile("pb_variance", true);
} }
} // namespace arm } // namespace arm
...@@ -106,17 +103,3 @@ REGISTER_LITE_KERNEL(prior_box, ...@@ -106,17 +103,3 @@ REGISTER_LITE_KERNEL(prior_box,
.BindOutput("Boxes", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Boxes", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Variances", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Variances", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize(); .Finalize();
// REGISTER_LITE_KERNEL(prior_box,
// kFPGA,
// kFP16,
// kNHWC,
// paddle::lite::kernels::arm::PriorBoxCompute,
// def)
// .BindInput("Input",{LiteType::GetTensorTy(
// TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC))})
// .BindInput("Image", {LiteType::GetTensorTy(
// TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC))})
// .BindOutput("Boxes", {LiteType::GetTensorTy(TARGET(kARM))})
// .BindOutput("Variances", {LiteType::GetTensorTy(TARGET(kARM))})
// .Finalize();
...@@ -9,14 +9,14 @@ set(fpga_deps fpga_target_wrapper kernel_fpga) ...@@ -9,14 +9,14 @@ set(fpga_deps fpga_target_wrapper kernel_fpga)
# add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps}) # add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps})
# add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps}) # add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps}) add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps}) # add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps}) add_kernel(dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS ${fpga_deps})
add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps}) add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps})
# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) # add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps}) # add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps})
# add_kernel(mul_compute_fpga FPGA basic SRCS mul_compute.cc DEPS ${fpga_deps}) add_kernel(mul_compute_fpga FPGA basic SRCS mul_compute.cc DEPS ${fpga_deps})
add_kernel(multiclass_nms_compute_fpga FPGA basic SRCS multiclass_nms_compute.cc DEPS ${fpga_deps}) add_kernel(multiclass_nms_compute_fpga FPGA basic SRCS multiclass_nms_compute.cc DEPS ${fpga_deps})
add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps}) add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps})
# add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps}) # add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps})
......
...@@ -47,7 +47,7 @@ void ConcatCompute::Run() { ...@@ -47,7 +47,7 @@ void ConcatCompute::Run() {
pe_.dispatch(); pe_.dispatch();
#ifdef FPGA_PRINT_TENSOR #ifdef FPGA_PRINT_TENSOR
zynqmp::ConcatParam& concat_param = pe_.param(); zynqmp::ConcatParam& concat_param = pe_.param();
Debugger.get_instance()::registerOutput("concat", concat_param.output); Debugger::get_instance().registerOutput("concat", concat_param.output);
#endif #endif
} }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/fpga/density_prior_box_compute.h"
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace fpga {
// inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
// bool flip,
// std::vector<float>* output_aspect_ratior) {
// constexpr float epsilon = 1e-6;
// output_aspect_ratior->clear();
// output_aspect_ratior->push_back(1.0f);
// for (size_t i = 0; i < input_aspect_ratior.size(); ++i) {
// float ar = input_aspect_ratior[i];
// bool already_exist = false;
// for (size_t j = 0; j < output_aspect_ratior->size(); ++j) {
// if (fabs(ar - output_aspect_ratior->at(j)) < epsilon) {
// already_exist = true;
// break;
// }
// }
// if (!already_exist) {
// output_aspect_ratior->push_back(ar);
// if (flip) {
// output_aspect_ratior->push_back(1.0f / ar);
// }
// }
// }
// }
void DensityPriorBoxCompute::Run() {
// auto& param = Param<operators::DensityPriorBoxParam>();
// bool is_flip = param.flip;
// bool is_clip = param.clip;
// std::vector<float> min_size = param.min_sizes;
// std::vector<float> fixed_size = param.fixed_sizes;
// std::vector<float> fixed_ratio = param.fixed_ratios;
// auto density_size = param.density_sizes;
// std::vector<float> max_size = param.max_sizes;
// std::vector<float> aspect_ratio = param.aspect_ratios;
// std::vector<float> variance = param.variances_;
// int img_w = param.img_w;
// int img_h = param.img_h;
// float step_w = param.step_w;
// float step_h = param.step_h;
// float offset = param.offset;
// std::vector<float> aspect_ratios_vec;
// ExpandAspectRatios(aspect_ratio, is_flip, &aspect_ratios_vec);
// size_t prior_num = aspect_ratios_vec.size() * min_size.size();
// prior_num += max_size.size();
// if (fixed_size.size() > 0) {
// prior_num = fixed_size.size() * fixed_ratio.size();
// }
// if (density_size.size() > 0) {
// for (int i = 0; i < density_size.size(); ++i) {
// if (fixed_ratio.size() > 0) {
// prior_num += (fixed_ratio.size() * ((pow(density_size[i], 2)) - 1));
// } else {
// prior_num +=
// ((fixed_ratio.size() + 1) * ((pow(density_size[i], 2)) - 1));
// }
// }
// }
// std::vector<std::string> order = param.order;
// lite::arm::math::density_prior_box(param.input,
// param.image,
// &param.boxes,
// &param.variances,
// min_size,
// fixed_size,
// fixed_ratio,
// density_size,
// max_size,
// aspect_ratio,
// variance,
// img_w,
// img_h,
// step_w,
// step_h,
// offset,
// prior_num,
// is_flip,
// is_clip,
// order);
}
} // namespace fpga
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(density_prior_box,
kFPGA,
kFP16,
kNHWC,
paddle::lite::kernels::fpga::DensityPriorBoxCompute,
def)
.BindInput("Input",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindInput("Image",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindOutput("Boxes", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Variances",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace fpga {
class DensityPriorBoxCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
using param_t = operators::DensityPriorBoxParam;
void Run() override;
virtual ~DensityPriorBoxCompute() = default;
};
} // namespace fpga
} // namespace kernels
} // namespace lite
} // namespace paddle
...@@ -45,7 +45,6 @@ class IoCopyHostToFpgaCompute ...@@ -45,7 +45,6 @@ class IoCopyHostToFpgaCompute
auto& param = Param<operators::IoCopyParam>(); auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kHost) || CHECK(param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kFPGA)); param.x->target() == TARGET(kFPGA));
// param.y->CopyDataFrom(*param.x);
param.y->mutable_data<float16>(); param.y->mutable_data<float16>();
if (param.x->ZynqTensor()->aligned() && if (param.x->ZynqTensor()->aligned() &&
param.x->ZynqTensor()->shape().shouldAlign()) { param.x->ZynqTensor()->shape().shouldAlign()) {
...@@ -53,10 +52,8 @@ class IoCopyHostToFpgaCompute ...@@ -53,10 +52,8 @@ class IoCopyHostToFpgaCompute
tempTensor.mutableData<float16>(zynqmp::FP16, tempTensor.mutableData<float16>(zynqmp::FP16,
param.x->ZynqTensor()->shape()); param.x->ZynqTensor()->shape());
tempTensor.copyFrom(param.x->ZynqTensor()); tempTensor.copyFrom(param.x->ZynqTensor());
// tempTensor.saveToFile("tempTensor", true);
tempTensor.setAligned(true); tempTensor.setAligned(true);
tempTensor.unalignImage(); tempTensor.unalignImage();
// tempTensor.saveToFile("unaligned", true);
param.y->ZynqTensor()->copyFrom(&tempTensor); param.y->ZynqTensor()->copyFrom(&tempTensor);
} else { } else {
param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor()); param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor());
...@@ -97,11 +94,9 @@ class IoCopyFpgaToHostCompute ...@@ -97,11 +94,9 @@ class IoCopyFpgaToHostCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kAny), DATALAYOUT(kAny)> { : public KernelLite<TARGET(kFPGA), PRECISION(kAny), DATALAYOUT(kAny)> {
public: public:
void Run() override { void Run() override {
// std::cout << "IoCopyFpgaToHostCompute \n";
auto& param = Param<operators::IoCopyParam>(); auto& param = Param<operators::IoCopyParam>();
CHECK(param.x->target() == TARGET(kHost) || CHECK(param.x->target() == TARGET(kHost) ||
param.x->target() == TARGET(kFPGA)); param.x->target() == TARGET(kFPGA));
// std::cout << "before CopyDataFrom \n";
param.y->mutable_data<float>(); param.y->mutable_data<float>();
param.y->ZynqTensor()->setDataType(zynqmp::FP32); param.y->ZynqTensor()->setDataType(zynqmp::FP32);
...@@ -113,10 +108,8 @@ class IoCopyFpgaToHostCompute ...@@ -113,10 +108,8 @@ class IoCopyFpgaToHostCompute
tempTensor.mutableData<float16>(zynqmp::FP16, tempTensor.mutableData<float16>(zynqmp::FP16,
param.x->ZynqTensor()->shape()); param.x->ZynqTensor()->shape());
tempTensor.copyFrom(param.x->ZynqTensor()); tempTensor.copyFrom(param.x->ZynqTensor());
// tempTensor.saveToFile("tempTensor", true);
tempTensor.setAligned(true); tempTensor.setAligned(true);
tempTensor.unalignImage(); tempTensor.unalignImage();
// tempTensor.saveToFile("unaligned", true);
param.y->ZynqTensor()->copyFrom(&tempTensor); param.y->ZynqTensor()->copyFrom(&tempTensor);
} else { } else {
param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor()); param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor());
......
...@@ -29,11 +29,6 @@ using float16 = zynqmp::float16; ...@@ -29,11 +29,6 @@ using float16 = zynqmp::float16;
template <typename T> template <typename T>
void convert_to_hwc( void convert_to_hwc(
T* chw_data, T* hwc_data, int num, int channel, int height, int width) { T* chw_data, T* hwc_data, int num, int channel, int height, int width) {
std::cout << " -------------- chw -> HWC ---------------\n";
std::cout << "channel: " << channel << std::endl;
std::cout << "height: " << height << std::endl;
std::cout << "width: " << width << std::endl;
int chw = channel * height * width; int chw = channel * height * width;
int wc = width * channel; int wc = width * channel;
int index = 0; int index = 0;
...@@ -52,10 +47,6 @@ void convert_to_hwc( ...@@ -52,10 +47,6 @@ void convert_to_hwc(
template <typename T> template <typename T>
void hwc_to_chw( void hwc_to_chw(
T* chw_data, T* hwc_data, int num, int channel, int height, int width) { T* chw_data, T* hwc_data, int num, int channel, int height, int width) {
std::cout << " ============= HWC -> CHW =============\n";
std::cout << "channel: " << channel << std::endl;
std::cout << "height: " << height << std::endl;
std::cout << "width: " << width << std::endl;
int chw = channel * height * width; int chw = channel * height * width;
int wc = width * channel; int wc = width * channel;
int wh = width * height; int wh = width * height;
...@@ -73,10 +64,7 @@ void hwc_to_chw( ...@@ -73,10 +64,7 @@ void hwc_to_chw(
} }
void TransHwcToChw(Tensor* dest, const Tensor* src) { void TransHwcToChw(Tensor* dest, const Tensor* src) {
std::cout << "precision:" << static_cast<int>(src->precision()) << std::endl;
std::cout << "dataType:" << src->ZynqTensor()->dataType() << std::endl;
if (src->ZynqTensor()->dataType() == zynqmp::FP32) { if (src->ZynqTensor()->dataType() == zynqmp::FP32) {
std::cout << "float\n";
float* chw = dest->mutable_data<float>(); float* chw = dest->mutable_data<float>();
float* hwc = const_cast<float*>(src->data<float>()); float* hwc = const_cast<float*>(src->data<float>());
int num = dest->dims()[0]; int num = dest->dims()[0];
...@@ -94,7 +82,6 @@ void TransHwcToChw(Tensor* dest, const Tensor* src) { ...@@ -94,7 +82,6 @@ void TransHwcToChw(Tensor* dest, const Tensor* src) {
} }
if (src->ZynqTensor()->dataType() == zynqmp::FP16) { if (src->ZynqTensor()->dataType() == zynqmp::FP16) {
std::cout << "float16\n";
float16* chw = dest->mutable_data<float16>(); float16* chw = dest->mutable_data<float16>();
float16* hwc = const_cast<float16*>(src->data<float16>()); float16* hwc = const_cast<float16*>(src->data<float16>());
int num = dest->dims()[0]; int num = dest->dims()[0];
...@@ -126,9 +113,6 @@ class TransHwcToChwCompute ...@@ -126,9 +113,6 @@ class TransHwcToChwCompute
param.y->ZynqTensor()->flush(); param.y->ZynqTensor()->flush();
param.y->ZynqTensor()->copyScaleFrom(param.x->ZynqTensor()); param.y->ZynqTensor()->copyScaleFrom(param.x->ZynqTensor());
// param.x->ZynqTensor()->saveToFile("src_hwc", true);
// param.y->ZynqTensor()->saveToFile("src_dst", true);
auto out_lod = param.y->mutable_lod(); auto out_lod = param.y->mutable_lod();
*out_lod = param.x->lod(); *out_lod = param.x->lod();
} }
......
...@@ -84,7 +84,7 @@ void MulCompute::Run() { ...@@ -84,7 +84,7 @@ void MulCompute::Run() {
#ifdef FPGA_PRINT_TENSOR #ifdef FPGA_PRINT_TENSOR
zynqmp::FullyConnectedParam& fc_param = pe_.param(); zynqmp::FullyConnectedParam& fc_param = pe_.param();
Debugger.get_instance().registerOutput("mul", fc_param.output); Debugger::get_instance().registerOutput("mul", fc_param.output);
#endif #endif
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "lite/kernels/fpga/sequence_pool_compute.h"
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace fpga {
void SequencePoolCompute::PrepareForRun() {}
void SequencePoolCompute::Run() {
auto& param = Param<operators::SequencePoolParam>();
auto& output = param.Out;
const auto* din = param.X->data<float>();
float* dout = output->mutable_data<float>();
const auto pool_type = param.pool_type;
const auto lod = param.X->lod()[0];
int64_t width = param.X->numel() / param.X->dims()[0];
// if (pool_type == "SUM") {
// lite::arm::math::seq_pool_sum(din, dout, lod, width);
// } else if (pool_type == "AVERAGE") {
// lite::arm::math::seq_pool_average(din, dout, lod, width);
// } else if (pool_type == "SQRT") {
// lite::arm::math::seq_pool_sqrt(din, dout, lod, width);
// } else if (pool_type == "MAX") {
// lite::arm::math::seq_pool_max(din, dout, lod, width);
// } else if (pool_type == "MIN") {
// lite::arm::math::seq_pool_min(din, dout, lod, width);
// } else if (pool_type == "FIRST") {
// lite::arm::math::seq_pool_first(din, dout, lod, width);
// } else if (pool_type == "LAST") {
// lite::arm::math::seq_pool_last(din, dout, lod, width);
// } else {
// LOG(ERROR) << " UNKNOWN sequence pool type";
// }
int batch_size = lod.size() - 1;
std::vector<uint64_t> offset_new(static_cast<uint64_t>(batch_size + 1));
for (int i = 0; i <= batch_size; i++) {
offset_new[i] = i;
}
(output->mutable_lod())->push_back(offset_new);
}
} // namespace fpga
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(sequence_pool,
kFPGA,
kFP16,
kNHWC,
paddle::lite::kernels::fpga::SequencePoolCompute,
def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kFPGA),
PRECISION(kFP16),
DATALAYOUT(kNHWC))})
.BindOutput("MaxIndex", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdint.h>
#include "lite/backends/arm/math/type_trans.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace fpga {
class SequencePoolCompute
: public KernelLite<TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)> {
public:
void PrepareForRun() override;
void Run() override;
virtual ~SequencePoolCompute() = default;
private:
};
} // namespace fpga
} // namespace kernels
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册