diff --git a/lite/api/ocr_attention_test_fpga.cc b/lite/api/ocr_attention_test_fpga.cc old mode 100644 new mode 100755 index a4154cf27ba8a718fd9b90fa012b0834ae33b348..326de883d1625f7196426094cc4ccec970f8a399 --- a/lite/api/ocr_attention_test_fpga.cc +++ b/lite/api/ocr_attention_test_fpga.cc @@ -27,25 +27,6 @@ DEFINE_string(input_file, "", "input_file"); namespace paddle { namespace lite { -// float* temp_data = new float(33 * 10 * 23); - -// std::vector GetDirectoryFiles(const std::string& dir) { -// std::vector files; -// std::shared_ptr directory_ptr(opendir(dir.c_str()), -// [](DIR* dir) { dir&& closedir(dir); }); -// struct dirent* dirent_ptr; -// if (!directory_ptr) { -// std::cout << "Error opening : " << std::strerror(errno) << dir << -// std::endl; -// return files; -// } - -// while ((dirent_ptr = readdir(directory_ptr.get())) != nullptr) { -// files.push_back(std::string(dirent_ptr->d_name)); -// } -// return files; -// } - void read_from_file(const std::string& path, float* data, int num) { std::ifstream file_stream; file_stream.open(path); @@ -87,7 +68,6 @@ void TestModel(const std::vector& valid_places, predictor.Build("", "attention/model", "attention/params", valid_places); auto* input_tensor = predictor.GetInput(0); - // input_tensor->Resize(DDim(std::vector({1, 1, 48, 512}))); input_tensor->Resize(DDim(std::vector({1, 1, 100, 200}))); auto* data = input_tensor->mutable_data(); auto item_size = input_tensor->dims().production(); @@ -95,22 +75,7 @@ void TestModel(const std::vector& valid_places, data[i] = 1; } - // std::ifstream file_stream(FLAGS_input_file); - // // file_stream.open(path); - // if (!file_stream.good()) { - // std::cout << "file: " << FLAGS_input_file << " dones not exist!\n"; - // exit(-1); - // return; - // } - - // read_from_file("n7cu17.data", data, 100 * 200); read_from_file(FLAGS_input_file, data, 100 * 200); - // read_from_file("t.data", data, 48 * 512); - - // for (int i = 0;i < 48 * 512;i++ ) { - // std::cout << ":" << data[i] << std::endl; - // } - //============================================= auto* init_ids = predictor.GetInput(1); init_ids->Resize(DDim(std::vector({1, 1}))); @@ -143,12 +108,9 @@ void TestModel(const std::vector& valid_places, float* temp_data = position_encoding_data; - std::cout << "====================== 1\n"; - for (int i = 0; i < position_encoding->dims().production(); ++i) { temp_data[i] = 0; } - std::cout << "====================== 2\n"; int index = 0; for (int i = 0; i < 10; i++) { for (int row = 0; row < 10; row++) { @@ -162,7 +124,6 @@ void TestModel(const std::vector& valid_places, } } } - std::cout << "====================== 3\n"; for (int i = 0; i < 23; i++) { for (int row = 0; row < 10; row++) { for (int col = 0; col < 23; col++) { @@ -175,20 +136,11 @@ void TestModel(const std::vector& valid_places, } } } - std::cout << "====================== 4\n"; // chw_to_hwc(temp_data, position_encoding_data, 33, 10, 23); // delete[] temp_data; // read_from_file("position_encoding.data", position_encoding_data, 33 * 10 * // 23); - // position_encoding->ZynqTensor()->readFromFile("position_encoding.data"); - - // exit(-1); - - // for (int i = 0; i < FLAGS_warmup; ++i) { - // predictor.Run(); - // } - auto start = GetCurrentUS(); for (int i = 0; i < 2; ++i) { predictor.Run(); @@ -200,32 +152,8 @@ void TestModel(const std::vector& valid_places, << ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0 << " ms in average."; - // std::vector> results; - // // i = 1 - // results.emplace_back(std::vector( - // {0.00019130898, 9.467885e-05, 0.00015971427, 0.0003650665, - // 0.00026431272, 0.00060884043, 0.0002107942, 0.0015819625, - // 0.0010323516, 0.00010079765, 0.00011006987, 0.0017364529, - // 0.0048292773, 0.0013995157, 0.0018453331, 0.0002428986, - // 0.00020211363, 0.00013668182, 0.0005855956, 0.00025901722})); auto* out = predictor.GetOutput(0); - // ASSERT_EQ(out->dims().size(), 2); - // ASSERT_EQ(out->dims()[0], 1); - // ASSERT_EQ(out->dims()[1], 1000); - // - // int step = 50; - for (int i = 0; i < 10; i++) { - // std::cout << ":" << out->data()[i] << std::endl; - } - // for (int i = 0; i < results.size(); ++i) { - // for (int j = 0; j < results[i].size(); ++j) { - // EXPECT_NEAR(out->data()[j * step + (out->dims()[1] * i)], - // results[i][j], - // 1e-6); - // } - // } - std::string file = "plate_data/" + FLAGS_input_file.substr(9); std::cout << "file:::" << file << std::endl; @@ -244,9 +172,6 @@ TEST(OcrAttention, test_arm) { Place{TARGET(kHost), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)}, }); - - // Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}, - TestModel(valid_places, Place{TARGET(kARM), PRECISION(kFloat)}); } diff --git a/lite/api/resnet50_test_fpga.cc b/lite/api/resnet50_test_fpga.cc old mode 100644 new mode 100755 index 75e6f0cbbc43c3cd7eb9bfa89bc004554ea6f85b..6178915819ebcad2782778d0938c67d012d93d24 --- a/lite/api/resnet50_test_fpga.cc +++ b/lite/api/resnet50_test_fpga.cc @@ -31,7 +31,11 @@ TEST(ResNet50, test) { std::vector valid_places( {Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}}); - predictor.Build(FLAGS_model_dir, "", "", valid_places); + predictor.Build(FLAGS_model_dir, + "", + "", + Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}, + valid_places); auto* input_tensor = predictor.GetInput(0); input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); diff --git a/lite/backends/fpga/KD/pes/conv_process.hpp b/lite/backends/fpga/KD/pes/conv_process.hpp index dd5672cb0a88b7839e612161dcefbeb0bc4c4e77..3db9662b62cce6ed33d059f60835dca25be5f60e 100755 --- a/lite/backends/fpga/KD/pes/conv_process.hpp +++ b/lite/backends/fpga/KD/pes/conv_process.hpp @@ -48,7 +48,6 @@ inline int get_split_num(Tensor* filter) { filter->shape().width(); auto num = filter->shape().num(); int div_capacity = filter::calc_division_capacity(chw); - // int aligned_num = align_to_x(num ,FILTER_NUM_ALIGNMENT); int filter_num_alignment = filter::get_filter_num_alignment(); int aligned_num = align_to_x(num, filter_num_alignment); return filter::calc_split_num(aligned_num, div_capacity); @@ -213,16 +212,6 @@ inline void format_filter(Tensor* filter, for (size_t i = 0; i < max_values.size(); i++) { scales.push_back(max_values[i] / max_value); } - - // filter->saveToFile("filter.txt"); - // std::ofstream ofs; - // ofs.open("quant.txt"); - // for (int i = 0; i < mem_size; i++) { - // float value = quantized_data[i]; - // ofs << value << std::endl; - // } - // ofs.close(); - // exit(-1); } inline void format_dw_filter(Tensor* filter, @@ -356,12 +345,9 @@ inline void split_filter_num(const ConvParam& c_param) { &conv_param->filter, &conv_param->scaleBias, param.groups); - // conv_param->scaleBias.saveToFile("sb.txt"); + conv_param->scaleBias.flush(); float* bs_data = conv_param->scaleBias.data(); - // conv_param->scaleBias.saveToFile("sb.txt"); - // param.scale()->saveToFile("scale.txt"); - // param.bias()->saveToFile("bias.txt"); args.group_num = param.groups; args.relu_enabled = param.relu.enabled; @@ -445,7 +431,6 @@ inline void split_channel(const ConvParam& c_param) { &conv_param->scaleBias, param.groups); conv_param->scaleBias.flush(); - // conv_param->scaleBias.saveToFile("sb.txt"); ConvArgs& args = conv_param->args; args.group_num = param.groups; diff --git a/lite/backends/fpga/KD/pes/fully_connected_pe.hpp b/lite/backends/fpga/KD/pes/fully_connected_pe.hpp old mode 100644 new mode 100755 index dd59f02e4fce716da4997cb68802bde7f6f53736..db3e05276171607da4cea421dd554846a00314a6 --- a/lite/backends/fpga/KD/pes/fully_connected_pe.hpp +++ b/lite/backends/fpga/KD/pes/fully_connected_pe.hpp @@ -39,12 +39,6 @@ class FullyConnectedPE : public PE { convParam_.input = param_.input; num_ = param_.input->shape().num(); - // if (num_ == 1) { - - // } else { - // tempOut_.mutableData(FP16, param_.out->shape()); - // convParam_.output = &tempOut_; - // } convParam_.output = param_.output; convParam_.groups = 1; @@ -72,11 +66,8 @@ class FullyConnectedPE : public PE { new_filter_data[i * chw + j] = scale; } } - // conv_filter->copyFrom(param_.filter); - conv_filter->flush(); convParam_.filter = conv_filter; - // convParam_.filter = param_.filter; Shape sb_shape(N, {channel}); float* scale_data = convParam_.scale()->mutableData(FP32, sb_shape); diff --git a/lite/backends/fpga/KD/tensor.hpp b/lite/backends/fpga/KD/tensor.hpp old mode 100644 new mode 100755 index d06cf7be455991fd9c79a8d909af0344cc19c16f..047498eed009dded5ce398ddabc2079b62d937df --- a/lite/backends/fpga/KD/tensor.hpp +++ b/lite/backends/fpga/KD/tensor.hpp @@ -25,13 +25,10 @@ limitations under the License. */ #include #include -// #include "lite/core/tensor.h" - #include "lite/backends/fpga/KD/dl_engine.hpp" #include "lite/backends/fpga/KD/float16.hpp" #include "lite/backends/fpga/KD/llapi/zynqmp_api.h" #include "lite/backends/fpga/KD/shape.hpp" -// #include "lite/backends/fpga/KD/types.hpp" namespace paddle { namespace zynqmp { @@ -398,15 +395,9 @@ class Tensor { void save_file_with_name(std::string path) { // return; invalidate(); - // usleep(20000); - // return; std::ofstream ofs; - ofs.open(path); - ofs << "dataType: " << dataType_ << std::endl; - ofs << "scale: " << scale()[0] << " , " << scale()[1] << std::endl; - for (int i = 0; i < shape_->numel(); i++) { float value = 0; if (dataType_ == FP32) { diff --git a/lite/core/program.cc b/lite/core/program.cc old mode 100644 new mode 100755 index cafd93e0aa6852294dd17f72471490acb318933d..edf6a2a1034952fca44308d77ea9bc0195293f8b --- a/lite/core/program.cc +++ b/lite/core/program.cc @@ -121,7 +121,9 @@ void RuntimeProgram::Run() { inst.Run(); #ifdef LITE_WITH_PROFILE #ifdef LITE_WITH_PRECISION_PROFILE -// LITE_PRECISION_PROFILE(inst) +#ifndef LITE_WITH_FPGA + LITE_PRECISION_PROFILE(inst) +#endif #endif // LITE_WITH_PRECISION_PROFILE #endif // LITE_WITH_PROFILE } diff --git a/lite/kernels/fpga/transpose_compute.cc b/lite/kernels/fpga/transpose_compute.cc old mode 100644 new mode 100755 index 3d42878514c46553a0ec60936847f204a9c32da2..d7581f5b089eb24b608add0214b2993c3f4f97ee --- a/lite/kernels/fpga/transpose_compute.cc +++ b/lite/kernels/fpga/transpose_compute.cc @@ -47,8 +47,6 @@ void Transpose2Compute::Run() { // int num = input_x_dims[1]; // int channel = input_x_dims[2]; - // // DLOG << "num::" << num << " channel::" << channel; - // int index = 0; // for (int n = 0; n < num; n++) { // for (int c = 0; c < channel; c++) {