From c6dcffc61a12a505da7043f2c1de2a56deef105a Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Wed, 24 Oct 2018 05:13:34 +0800 Subject: [PATCH] lb. add debug output --- paddle/fluid/framework/executor.cc | 102 ++++++- .../inference/api/demo_ci/CMakeLists.txt | 21 +- .../inference/api/demo_ci/inference_icnet.cc | 249 +++++++++--------- .../inference/api/demo_ci/inference_icnet.h | 21 ++ .../api/demo_ci/real_data_icnet_tester.cc | 123 +++++++++ paddle/fluid/inference/api/demo_ci/test.cc | 99 +++++++ .../api/demo_ci/thread_icnet_test.cc | 105 ++++++++ paddle/fluid/operators/batch_norm_op.cu.cc | 21 ++ paddle/fluid/operators/load_combine_op.cc | 12 +- 9 files changed, 626 insertions(+), 127 deletions(-) create mode 100644 paddle/fluid/inference/api/demo_ci/inference_icnet.h create mode 100644 paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc create mode 100644 paddle/fluid/inference/api/demo_ci/test.cc create mode 100644 paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 1101707f80..c318c5fc1a 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -333,9 +333,49 @@ std::vector> Executor::Prepare( return result; } +// void CheckResult(const std::string op_type, ExecutorPrepareContext* ctx, Scope* local_scope) { +// VLOG(3) << "before checking result"; +// auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_); +// std::vector outputs; +// auto& block = ctx->prog_.Block(0); +// bool found = false; +// framework::OpDesc* myop = nullptr; +// for(auto& op : block.AllOps()) { +// if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() == "feed") return; +// if (op->Type() == op_type) { +// found = true; +// myop = op; +// break; +// } +// } +// } +// if(!found) { +// VLOG(3) << "not found op!"; +// return; +// } +// auto* op = myop; +// VLOG(3) << "start op output" << op->Type(); +// for(auto var_name: op->OutputArgumentNames()) { +// auto* var = local_scope->Var(var_name); +// auto* var_desc = block.FindVar(var_name); +// if (var_desc->Persistable()) continue; +// auto* tensor = var->GetMutable(); +// framework::Tensor check; +// VLOG(3) << "before tensor copy"; +// framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check); +// VLOG(3) << "after tensor copy"; +// float sum = .0; +// for(size_t i=0; i < check.numel(); ++i) { +// sum += check.data()[i]; +// } +// VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum " << sum; +// VLOG(3) << "after checking result"; +// } + void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, bool create_local_scope, bool create_vars, bool keep_kids) { + VLOG(3) << "RunPreparedContext inside"; Scope* local_scope = scope; if (create_vars) { if (create_local_scope) { @@ -346,13 +386,73 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, for (auto& op : ctx->ops_) { op->Run(*local_scope, place_); - + // CheckResult(op->Type(), ctx, local_scope); if (FLAGS_benchmark) { VLOG(2) << "Memory used after operator " + op->Type() + " running: " << memory::memory_usage(place_); } } platform::DeviceContextPool::Instance().Get(place_)->Wait(); + + VLOG(3) << "start checking"; + auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_); + std::vector outputs; + auto& block = ctx->prog_.Block(0); + + for(auto& op : block.AllOps()) { + if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() == "feed") continue; + // for(auto& real_op : ctx->ops_) { + // if(real_op->Type() == op->Type()) { + // VLOG(3) << real_op->Type() << " " <DebugStringEx(local_scope); + // } + // } + + //VLOG(3) << "start op output" << op->Type(); + for(auto var_name: op->InputArgumentNames()) { + auto* var = local_scope->Var(var_name); + auto* var_desc = block.FindVar(var_name); + if (var_desc->Persistable()) continue; + auto* tensor = var->GetMutable(); + framework::Tensor check; + VLOG(3) << "before tensor copy"; + + framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check); + + VLOG(3) << "after tensor copy"; + float sum = .0; + for(size_t i=0; i < check.numel(); ++i) { + sum += check.data()[i]; + } + VLOG(3) << "op " << op->Type() << " input var " << var_name << " sum " << sum; + } + + VLOG(3) << "op " << op->Type() << "input finished"; + for(auto var_name: op->OutputArgumentNames()) { + auto* var = local_scope->Var(var_name); + auto* var_desc = block.FindVar(var_name); + if (var_desc->Persistable()) continue; + auto* tensor = var->GetMutable(); + framework::Tensor check; + VLOG(3) << "before tensor copy"; + if(op->Type() == "batch_norm" && platform::is_gpu_place(place_)) { + VLOG(3) << "op " << op->Type() << " output var " << var_name << " " << tensor->numel(); + tensor->mutable_data(place_); + framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check); + } else { + framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check); + } + + VLOG(3) << "after tensor copy"; + float sum = .0; + for(size_t i=0; i < check.numel(); ++i) { + sum += check.data()[i]; + } + VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum " << sum; + } + } + + VLOG(3) << "after checking result"; + if (local_scope != scope) { scope->DeleteScope(local_scope); } else { diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 4c30e1b321..93b554c83d 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -46,7 +46,7 @@ if(WITH_GPU) endif(NOT WIN32) endif() -include_directories("D:/Paddle/") +include_directories("E:/Paddle/") include_directories("${PADDLE_LIB}") include_directories("${PADDLE_LIB}/third_party/install/protobuf/include") include_directories("${PADDLE_LIB}/third_party/install/glog/include") @@ -72,7 +72,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib") link_directories("${PADDLE_LIB}/paddle/fluid/inference") # add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) -add_library(${DEMO_NAME} ${DEMO_NAME}.cc) + # add_library(${DEMO_NAME} ${DEMO_NAME}.cc) + add_library(${DEMO_NAME} SHARED ${DEMO_NAME}.cc) +add_executable(real_data_icnet_tester real_data_icnet_tester.cc) +add_executable(test test.cc) +add_executable(thread_icnet_test thread_icnet_test.cc) + if(WITH_MKL) include_directories("${PADDLE_LIB}/third_party/install/mklml/include") set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} @@ -89,7 +94,11 @@ endif() # Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a if(WITH_STATIC_LIB) set(DEPS - ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) +# ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX} + D:/Paddle/bazel-dll/fluid_install_dir/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX} + # E:/Paddle/build/paddle/fluid/inference/api/Release/libpaddle_inference_api${CMAKE_STATIC_LIBRARY_SUFFIX} + D:/Paddle/bazel-dll/paddle/fluid/inference/api/Release/libpaddle_inference_api${CMAKE_STATIC_LIBRARY_SUFFIX} + ) else() set(DEPS ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) @@ -121,3 +130,9 @@ if(WITH_GPU) endif() target_link_libraries(${DEMO_NAME} ${DEPS}) +target_link_libraries(test ${DEMO_NAME} ) +target_link_libraries(thread_icnet_test ${DEPS}) +target_link_libraries(real_data_icnet_tester ${DEPS}) + +target_compile_definitions(${DEMO_NAME} PRIVATE "API_DEFINITION") + diff --git a/paddle/fluid/inference/api/demo_ci/inference_icnet.cc b/paddle/fluid/inference/api/demo_ci/inference_icnet.cc index 869002b94e..8b16351604 100644 --- a/paddle/fluid/inference/api/demo_ci/inference_icnet.cc +++ b/paddle/fluid/inference/api/demo_ci/inference_icnet.cc @@ -19,139 +19,144 @@ #include #include #include +#include #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "inference_icnet.h" -namespace paddle { - -std::string DIRNAME = "./infer_model"; -std::string DATA = "./test-image.txt"; -const int C = 3; // image channel -const int H = 449; // image height -const int W = 581; // image width // 数据格式 // "\t data; - std::vector shape; +using namespace paddle; + +class Predictor { +private: + std::unique_ptr predictor; + struct Record + { + std::vector data; + std::vector shape; + }; + + const int C = 3; // image channel + const int H = 449; // image height + const int W = 581; // image width + + using Time = decltype(std::chrono::high_resolution_clock::now()); + + Time time() { return std::chrono::high_resolution_clock::now(); }; + + double time_diff(Time t1, Time t2) { + typedef std::chrono::microseconds ms; + auto diff = t2 - t1; + ms counter = std::chrono::duration_cast(diff); + return counter.count() / 1000.0; + } + + static void split(const std::string& str, char sep, + std::vector* pieces) { + pieces->clear(); + if (str.empty()) { + return; + } + size_t pos = 0; + size_t next = str.find(sep, pos); + while (next != std::string::npos) { + pieces->push_back(str.substr(pos, next - pos)); + pos = next + 1; + next = str.find(sep, pos); + } + if (!str.substr(pos).empty()) { + pieces->push_back(str.substr(pos)); + } + } + + Record ProcessALine(const std::string& line) { + std::vector columns; + split(line, '\t', &columns); + + Record record; + std::vector data_strs; + split(columns[0], ' ', &data_strs); + for (auto& d : data_strs) { + record.data.push_back(std::stof(d)); + } + + std::vector shape_strs; + split(columns[1], ' ', &shape_strs); + for (auto& s : shape_strs) { + record.shape.push_back(std::stoi(s)); + } + return record; + } + +public: + Predictor (const char* prog_file, + const char* param_file, const float fraction_of_gpu_memory, + const bool use_gpu, const int device) { + + NativeConfig config; + config.prog_file = prog_file; + config.param_file = param_file; + config.fraction_of_gpu_memory = fraction_of_gpu_memory; + config.use_gpu = use_gpu; + config.device = device; + + predictor = CreatePaddlePredictor(config); + } + + void predict(float* input, const int channel, const int height, const int width, + int64_t** output, int* output_length, int batch_size) { + std::vector data; + int intput_length = channel * height * width * batch_size; + for (int i = 0; i < intput_length; i++) { + data.push_back(*((float*)input + i)); + } + + // initialize the input data + PaddleTensor tensor; + tensor.shape = std::vector({ batch_size, channel, height, width }); + tensor.data.Resize(sizeof(float) * batch_size * channel * height * width); + std::copy(data.begin(), data.end(), static_cast(tensor.data.data())); + + tensor.dtype = PaddleDType::FLOAT32; + std::vector paddle_tensor_feeds(1, tensor); + + // initialize the output data + PaddleTensor tensor_out; + std::vector outputs(1, tensor_out); + predictor->Run(paddle_tensor_feeds, &outputs, batch_size); + *output_length = (int)outputs[0].data.length(); + std::memcpy(static_cast(*output), outputs[0].data.data(), outputs[0].data.length()); + int64_t sum_out = 0; + for(int i=0; i < outputs[0].data.length()/sizeof(int64_t); ++i) { + int64_t item = static_cast(outputs[0].data.data())[i]; + sum_out += item; + if (item != 0) { + std::cout << item << std::endl; + } + } + + std::cout << "sum_out" << sum_out << std::endl; + } }; -NativeConfig GetConfig() { - NativeConfig config; - config.prog_file=DIRNAME + "/__model__"; - config.param_file=DIRNAME + "/__params__"; - config.fraction_of_gpu_memory = 0.0; - config.use_gpu = true; - config.device = 0; - return config; -} - -using Time = decltype(std::chrono::high_resolution_clock::now()); - -Time time() { return std::chrono::high_resolution_clock::now(); }; - -double time_diff(Time t1, Time t2) { - typedef std::chrono::microseconds ms; - auto diff = t2 - t1; - ms counter = std::chrono::duration_cast(diff); - return counter.count() / 1000.0; +API_REFERENCE void * init_predictor(const char* prog_file, + const char* param_file, const float fraction_of_gpu_memory, + const bool use_gpu, const int device) { + return new Predictor(prog_file, param_file, fraction_of_gpu_memory, use_gpu, device); } -static void split(const std::string& str, char sep, - std::vector* pieces) { - pieces->clear(); - if (str.empty()) { - return; - } - size_t pos = 0; - size_t next = str.find(sep, pos); - while (next != std::string::npos) { - pieces->push_back(str.substr(pos, next - pos)); - pos = next + 1; - next = str.find(sep, pos); - } - if (!str.substr(pos).empty()) { - pieces->push_back(str.substr(pos)); - } +API_REFERENCE void predict(void* handle, float* input, const int channel, const int height, const int width, + int64_t** output, int* output_length, int batch_size) { + assert(handle != nullptr); + ((Predictor*)handle)->predict(input, channel, height, width, output, output_length, batch_size); } -Record ProcessALine(const std::string& line) { - std::vector columns; - split(line, '\t', &columns); - - Record record; - std::vector data_strs; - split(columns[0], ' ', &data_strs); - for (auto& d : data_strs) { - record.data.push_back(std::stof(d)); - } - - std::vector shape_strs; - split(columns[1], ' ', &shape_strs); - for (auto& s : shape_strs) { - record.shape.push_back(std::stoi(s)); - } - return record; +API_REFERENCE void destory_predictor(void *handle) { + if (handle) { + delete handle; + handle = nullptr; + } } - -void test_naive(int batch_size){ - NativeConfig config = GetConfig(); - auto predictor = CreatePaddlePredictor(config); - int height = H; - int width = W; - int channel = C; - int num_sum = height * width * channel * batch_size; - - // 1. use fake data - std::vector data; - for(int i = 0; i < num_sum; i++) { - data.push_back(0.0); - } - - PaddleTensor tensor; - tensor.shape = std::vector({batch_size, channel, height, width}); - tensor.data.Resize(sizeof(float) * batch_size * channel * height * width); - std::copy(data.begin(), data.end(), static_cast(tensor.data.data())); - tensor.dtype = PaddleDType::FLOAT32; - - // 2. read data from file - // std::string line; - // std::ifstream file(DATA); - // std::getline(file, line); - // auto record = ProcessALine(line); - // file.close(); - // PaddleTensor tensor; - // tensor.shape = record.shape; - // tensor.data = - // PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); - - std::vector paddle_tensor_feeds(1, tensor); - PaddleTensor tensor_out; - - std::vector outputs(1, tensor_out); - - predictor->Run(paddle_tensor_feeds, &outputs, batch_size); - auto time1 = time(); - - for(size_t i = 0; i < 2; i++) { - std::cout << "Pass " << i << "predict"; - predictor->Run(paddle_tensor_feeds, &outputs, batch_size); - } - - auto time2 = time(); - std::ofstream ofresult("naive_test_result.txt", std::ios::app); - - std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 100.0 << "ms" << std::endl; - std::cout << outputs.size() << std::endl; - -} -} // namespace paddle - -int main(int argc, char** argv) { - paddle::test_naive(1 << 0); - return 0; -} \ No newline at end of file diff --git a/paddle/fluid/inference/api/demo_ci/inference_icnet.h b/paddle/fluid/inference/api/demo_ci/inference_icnet.h new file mode 100644 index 0000000000..b2657e7988 --- /dev/null +++ b/paddle/fluid/inference/api/demo_ci/inference_icnet.h @@ -0,0 +1,21 @@ + +#ifdef _WIN32 +#ifdef inference_icnet_EXPORTS +#define API_REFERENCE extern "C" __declspec(dllexport) +#else +#define API_REFERENCE extern "C" __declspec(dllimport) +#endif +#else +#define API_REFERENCE +#endif + +//API_REFERENCE void * init_predictor(); +//API_REFERENCE void destory_predictor(void *handle); +//API_REFERENCE void predict(void *handle, int n); + +API_REFERENCE void * init_predictor(const char* prog_file, + const char* param_file, const float fraction_of_gpu_memory, + const bool use_gpu, const int device); +API_REFERENCE void predict(void* handle, float* input, const int channel, const int height, + const int width, int64_t** output, int* output_length, int batch_size); +API_REFERENCE void destory_predictor(void *handle); diff --git a/paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc b/paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc new file mode 100644 index 0000000000..677a6b976d --- /dev/null +++ b/paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#define GOOGLE_GLOG_DLL_DECL +#include +#include +#include +#include +#include +#include "paddle/fluid/inference/api/paddle_inference_api.h" + +namespace paddle { + +// DEFINE_string(dirname, "./lb", +// "Directory of the inference model."); + +NativeConfig GetConfig() { + NativeConfig config; + // config.model_dir = FLAGS_dirname; + config.prog_file= "lb/__model__"; + config.param_file= "lb/__params__"; + config.fraction_of_gpu_memory = 0.8; + config.use_gpu = true; + config.device = 0; + return config; +} + +using Time = decltype(std::chrono::high_resolution_clock::now()); +Time time() { return std::chrono::high_resolution_clock::now(); }; +double time_diff(Time t1, Time t2) { + typedef std::chrono::microseconds ms; + auto diff = t2 - t1; + ms counter = std::chrono::duration_cast(diff); + return counter.count() / 1000.0; +} + +void test_naive(int batch_size){ + NativeConfig config = GetConfig(); + auto predictor = CreatePaddlePredictor(config); + int height = 449; + int width = 581; + + // =============read file list ============= + std::ifstream infile("new_file.list"); + std::string temp_s; + std::vector all_files; + while (!infile.eof()) { + infile >> temp_s; + all_files.push_back(temp_s); + } + + // size_t file_num = all_files.size(); + infile.close(); + // =============read file list ============= + for (size_t f_k = 0; f_k < 1; f_k ++) { + std::ifstream in_img(all_files[f_k]); + std::cout << all_files[f_k] << std::endl; + float temp_v; + + float sum_n = 0.0; + std::vector data; + while (!in_img.eof()) { + in_img >> temp_v; + data.push_back(float(temp_v)); + // std::cout << temp_v << " "; + sum_n += temp_v; + } + + in_img.close(); + std::cout << "sum: " << sum_n << std::endl; + + PaddleTensor tensor; + tensor.shape = std::vector({batch_size, 3, height, width}); + tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width); + std::copy(data.begin(), data.end(), static_cast(tensor.data.data())); + tensor.dtype = PaddleDType::FLOAT32; + std::vector paddle_tensor_feeds(1, tensor); + PaddleTensor tensor_out; + + std::vector outputs(1, tensor_out); + predictor->Run(paddle_tensor_feeds, &outputs, batch_size); + std::cout << "start predict123:" << std::endl; + auto time1 = time(); + + + for(size_t i = 0; i < 1; i++) { + predictor->Run(paddle_tensor_feeds, &outputs, batch_size); + } + + auto time2 = time(); + std::ofstream ofresult("naive_test_result.txt", std::ios::app); + + std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 1000.0 << "ms" << std::endl; + std::cout << outputs.size() << std::endl; + int64_t * data_o = static_cast(outputs[0].data.data()); + int64_t sum_out = 0; + for (size_t j = 0; j < outputs[0].data.length() / sizeof(int64_t); ++j) { + ofresult << std::to_string(data_o[j]) << " "; + sum_out += data_o[j]; + } + std::cout << "sum_out " << sum_out << std::endl; + ofresult << std::endl; + ofresult.close(); + } +} + +} // namespace paddle + +int main(int argc, char** argv) { +// google::ParseCommandLineFlags(&argc, &argv, true); + paddle::test_naive(1<<0); + return 0; +} diff --git a/paddle/fluid/inference/api/demo_ci/test.cc b/paddle/fluid/inference/api/demo_ci/test.cc new file mode 100644 index 0000000000..41f05a9b50 --- /dev/null +++ b/paddle/fluid/inference/api/demo_ci/test.cc @@ -0,0 +1,99 @@ + +#include +#include +#include "inference_icnet.h" +#include +#include +#include +#include + +#include +using namespace std; + + +template +Type stringToNum(const string& str) +{ + istringstream iss(str); + Type num; + iss >> num; + return num; +} + +void test_imgs() { + void *h = init_predictor("./lb/__model__", "./lb/__params__", 0.3f, true, 0); + + std::ifstream infile("new_file.list"); + std::ofstream ofs("./1.png.output.txt"); + + std::string temp_s; + std::vector all_files; + while (!infile.eof()) { + infile >> temp_s; + all_files.push_back(temp_s); + } + // size_t file_num = all_files.size(); + infile.close(); + // =============read file list ============= + for (size_t f_k = 0; f_k < 1; f_k++) { + // std::string path = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\"; + // std::ifstream in_img(path + all_files[f_k]); + std::string mypath = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\1.png.txt"; + std::cout << "file" << mypath << std::endl; + std::ifstream in_img(mypath); + //std::cout << path + all_files[f_k] << std::endl; + double temp_v; + const int size = 3 * 449 * 581 * 1; + float * data = new float[size]; + std::string value; + + if (!in_img.is_open()) { + cout << "open failed" << endl; + } + double sum_input = .0; + for (auto i = 0; i < size; i++) { + getline(in_img, value, '\n'); + double v = stringToNum(value); + data[i] = static_cast(v); + sum_input += v; + } + std::cout << "sum_input" << sum_input << std::endl; + + in_img.close(); + const int SIZE = 449 * 581 * 1; + int64_t * p = new int64_t[SIZE](); + int out_size = 0; + //memset(p, 0, size); + predict(h, data, 3, 449, 581, &p, &out_size, 1); + std::cout << "out_size = " << out_size << std::endl; + + double out_sum = .0; + for (auto i = 0; i < out_size / sizeof(int64_t); i++) { + out_sum += p[i]; + ofs << p[i] << " "; + } + ofs.close(); + + std::cout << "inferece out sum" << out_sum << std::endl; + delete p; + } + + destory_predictor(h); +} + +int main(int argc, char** argv) { + //if (true) { + // std::thread t1(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0)); + // std::thread t2(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0)); + // //std::thread t3(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0)); + // //std::thread t4(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0)); + // t1.join(); + // t2.join(); + // //t3.join(); + // //t4.join(); + // //Sleep(1); + //} + test_imgs(); + + return 0; +} diff --git a/paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc b/paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc new file mode 100644 index 0000000000..d669b04dc9 --- /dev/null +++ b/paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc @@ -0,0 +1,105 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define GOOGLE_GLOG_DLL_DECL + +#include +#include +//#include +#include +#include +#include +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include // NOLINT + +#define ASSERT_TRUE(x) x +#define ASSERT_EQ(x, y) assert(x == y) + +namespace paddle { + +// DEFINE_string(dirname, "./LB_icnet_model", +// "Directory of the inference model."); + +NativeConfig GetConfig() { + NativeConfig config; + config.prog_file= "./dzh_lb/__model__"; + config.param_file= "./dzh_lb/__params__"; + config.fraction_of_gpu_memory = 0.08; + config.use_gpu = true; + config.device = 0; + return config; +} + +using Time = decltype(std::chrono::high_resolution_clock::now()); +Time time() { return std::chrono::high_resolution_clock::now(); }; +double time_diff(Time t1, Time t2) { + typedef std::chrono::microseconds ms; + auto diff = t2 - t1; + ms counter = std::chrono::duration_cast(diff); + return counter.count() / 1000.0; +} + +void test_naive(int batch_size, std::string model_path){ + PaddlePredictor* pres[2]; + + NativeConfig config = GetConfig(); + // config.model_dir = model_path; + auto predictor0 = CreatePaddlePredictor(config); + auto predictor1 = CreatePaddlePredictor(config); + pres[0] = predictor0.get(); + pres[1] = predictor1.get(); + + int height = 449; + int width = 581; + + std::vector data; + for (int i = 0; i < 3 * height * width; i++) { + data.push_back(0); + } + + PaddleTensor tensor; + tensor.shape = std::vector({batch_size, 3, height, width}); + tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width); + std::copy(data.begin(), data.end(), static_cast(tensor.data.data())); + tensor.dtype = PaddleDType::FLOAT32; + std::vector paddle_tensor_feeds(1, tensor); + + constexpr int num_jobs = 5; // each job run 1 batch + std::vector threads; + for (int tid = 0; tid < num_jobs; ++tid) { + threads.emplace_back([&, tid]() { + auto predictor = pres[tid]; + std::vector local_outputs; + for(size_t i = 0; i < 1000; i++) { + ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &local_outputs)); + std::cout << "run: " << tid << std::endl; + } + ASSERT_EQ(local_outputs.size(), 1UL); + }); + } + for (int i = 0; i < num_jobs; ++i) { + threads[i].join(); + } +} + +//TEST(alexnet, naive) { +// test_naive(1 << 0, "./trt_models/vgg19"); +//} + +} // namespace paddle + +int main(int argc, char** argv) { + paddle::test_naive(1 << 0, ""); +} + diff --git a/paddle/fluid/operators/batch_norm_op.cu.cc b/paddle/fluid/operators/batch_norm_op.cu.cc index ca6cd86693..08a10757ed 100644 --- a/paddle/fluid/operators/batch_norm_op.cu.cc +++ b/paddle/fluid/operators/batch_norm_op.cu.cc @@ -141,6 +141,27 @@ class BatchNormKernel bias->template data>(), est_mean->template data>(), est_var->template data>(), epsilon)); + + VLOG(3) << "before tensor copy"; + Tensor mean_, var_, x_, y_; + framework::TensorCopy(*est_mean, platform::CPUPlace(), dev_ctx, &mean_); + framework::TensorCopy(*est_var, platform::CPUPlace(), dev_ctx, &var_); + framework::TensorCopy(*x, platform::CPUPlace(), dev_ctx, &x_); + framework::TensorCopy(*y, platform::CPUPlace(), dev_ctx, &y_); + VLOG(3) << "after tensor copy"; + auto check_tensor = [&](const Tensor& check) { + float sum = .0; + for(size_t i=0; i < check.numel(); ++i) { + sum += check.data()[i]; + } + return sum; + }; + VLOG(3) << "BatchNormKernel"; + VLOG(3) << "mean" << check_tensor(mean_); + VLOG(3) << "var" << check_tensor(var_); + VLOG(3) << "x" << check_tensor(x_); + VLOG(3) << "y" << check_tensor(y_); + } else { // Run training mode. // obtain running mean and running inv var, and see if we need to diff --git a/paddle/fluid/operators/load_combine_op.cc b/paddle/fluid/operators/load_combine_op.cc index e2f98164be..ccc497affb 100644 --- a/paddle/fluid/operators/load_combine_op.cc +++ b/paddle/fluid/operators/load_combine_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include +#include #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" @@ -34,6 +35,7 @@ class LoadCombineOp : public framework::OperatorBase { auto load_as_fp16 = Attr("load_as_fp16"); std::ifstream fin(filename, std::ios_base::in | std::ios_base::binary); + //std::ifstream fin(filename, std::ios_base::in); PADDLE_ENFORCE(!fin.bad(), "Cannot open file %s for load_combine op", filename); @@ -46,7 +48,7 @@ class LoadCombineOp : public framework::OperatorBase { auto &dev_ctx = *pool.Get(place); for (size_t i = 0; i < out_var_names.size(); i++) { - VLOG(3) << "load " << out_var_names[i]; + VLOG(3) << "load variable " << out_var_names[i]; auto *out_var = scope.FindVar(out_var_names[i]); PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", @@ -61,6 +63,13 @@ class LoadCombineOp : public framework::OperatorBase { // Get data from fin to tensor DeserializeFromStream(fin, tensor, dev_ctx); VLOG(3) << "after deserialization"; + framework::Tensor check; + framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check); + float sum = .0; + for(size_t i=0; i < check.numel(); ++i) { + sum += check.data()[i]; + } + VLOG(3) << "sum result" << sum; auto in_dtype = framework::ToDataType(tensor->type()); auto out_dtype = load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; @@ -80,6 +89,7 @@ class LoadCombineOp : public framework::OperatorBase { tensor = out_var->GetMutable(); tensor->set_lod(fp16_tensor.lod()); tensor->ShareDataWith(fp16_tensor); + } VLOG(3) << "load " << out_var_names[i] << " finished"; } -- GitLab