From 372caf40005aed16b497f2f539bc3fa9c47d5cd3 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Fri, 14 Sep 2018 17:54:09 +0800 Subject: [PATCH] windows staff --- cmake/configure.cmake | 1 - paddle/fluid/inference/api/api_impl.cc | 23 +- .../inference/api/demo_ci/CMakeLists.txt | 1 + .../inference/api/demo_ci/inference_icnet.cc | 249 +++++++++++------- .../inference/api/paddle_inference_api.h | 23 +- paddle/fluid/platform/enforce.h | 16 ++ paddle/fluid/platform/macros.h | 7 +- 7 files changed, 196 insertions(+), 124 deletions(-) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 42ad79aac23..e9852f00b18 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -82,7 +82,6 @@ if(WIN32) if (NOT MSVC) message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.") endif(NOT MSVC) - add_definitions(/DPADDLE_COMPILE) endif(WIN32) if(NOT WITH_GOLANG) diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 0ce78b39656..f0ea482994d 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -67,6 +67,7 @@ bool NativePaddlePredictor::Init( } else { place_ = paddle::platform::CPUPlace(); } + VLOG(3) << "before scope"; if (parent_scope) { scope_ = parent_scope; sub_scope_ = &(parent_scope->NewScope()); @@ -75,26 +76,30 @@ bool NativePaddlePredictor::Init( paddle::framework::InitDevices(false); scope_.reset(new paddle::framework::Scope()); } - + VLOG(3) << "after scope" executor_.reset(new paddle::framework::Executor(place_)); - + VLOG(3) << "executor"; // Initialize the inference program if (!config_.model_dir.empty()) { // Parameters are saved in separate files sited in // the specified `dirname`. + VLOG(3) << config_.model_dir; inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(), config_.model_dir); + VLOG(3) << "load model Finish"; } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { // All parameters are saved in a single file. // The file names should be consistent with that used // in Python API `fluid.io.save_inference_model`. + VLOG(3) << "load program"; inference_program_ = paddle::inference::Load( executor_.get(), scope_.get(), config_.prog_file, config_.param_file); + VLOG(3) << "load program finish"; } else { LOG(ERROR) << "fail to load inference model."; return false; } - + VLOG(3) << "prepare"; ctx_ = executor_->Prepare(*inference_program_, 0); executor_->CreateVariables(*inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); @@ -289,10 +294,13 @@ std::unique_ptr CreatePaddlePredictor< VLOG(3) << "create NativePaddlePredictor"; if (config.use_gpu) { // 1. GPU memeroy - PADDLE_ENFORCE_GT( - config.fraction_of_gpu_memory, 0.f, - "fraction_of_gpu_memory in the config should be set to range (0., 1.]"); + VLOG(3) << "before check"; + // PADDLE_ENFORCE_GT( + // config.fraction_of_gpu_memory, 0.f, + // "fraction_of_gpu_memory in the config should be set to range (0., 1.]"); + VLOG(3) << "failed on first"; PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); + VLOG(3) << "after flags"; std::vector flags; if (config.fraction_of_gpu_memory >= 0.0f || config.fraction_of_gpu_memory <= 0.95f) { @@ -302,9 +310,10 @@ std::unique_ptr CreatePaddlePredictor< flags.push_back(flag); VLOG(3) << "set flag: " << flag; framework::InitGflags(flags); + VLOG(3) << "flags setting"; } } - + VLOG(3) << "Init flags Done"; std::unique_ptr predictor(new NativePaddlePredictor(config)); if (!dynamic_cast(predictor.get())->Init(nullptr)) { return nullptr; diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index f1615506553..573f38111b9 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -17,6 +17,7 @@ endmacro() if (WIN32) if (WITH_STATIC_LIB) safe_set_static_flag() + add_definitions(-DSTATIC_LIB) set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "/w") set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} "/w") endif() diff --git a/paddle/fluid/inference/api/demo_ci/inference_icnet.cc b/paddle/fluid/inference/api/demo_ci/inference_icnet.cc index 5e06c3161e2..4a048684bcb 100644 --- a/paddle/fluid/inference/api/demo_ci/inference_icnet.cc +++ b/paddle/fluid/inference/api/demo_ci/inference_icnet.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include #include +#include #include #include #include @@ -27,9 +28,17 @@ limitations under the License. */ #include //NOLINT #include "paddle/fluid/inference/paddle_inference_api.h" -std::string DIRNAME = ""; /* "Directory of the inference model." */ // NOLINT +std::string MODELDIR = ""; /* "Directory of the inference model." */ // NOLINT +std::string REFER = ""; /*"path to reference result for comparison."*/ //NOTLINT +/*path of data; each line is a record, format: +\t + +Please check the demo data of data.txt for details. + */ +std::string DATA = ""; bool USE_GPU = false; /*"Whether use gpu."*/ + auto message_err = []() { std::cout << "Copyright (c) 2018 PaddlePaddle Authors." << std::endl; std::cout << "Demo Case for windows inference. " @@ -40,19 +49,52 @@ auto message_err = []() { std::cout << std::endl; }; -void ParseArgs() { - message_err(); - std::cout << "DIRNAME:[D:/Paddle/xxx/path_to_model_dir]" << std::endl; - std::cin >> DIRNAME; - std::cout << "USE_GPU:[yes|no]"; - std::string value; - std::cin >> value; - std::transform(value.begin(), value.end(), value.begin(), ::toupper); - USE_GPU = (value == "YES") ? true : false; -} namespace paddle { namespace demo { + +void split(const std::string& str, char sep, + std::vector* pieces) { + pieces->clear(); + if (str.empty()) { + return; + } + size_t pos = 0; + size_t next = str.find(sep, pos); + while (next != std::string::npos) { + pieces->push_back(str.substr(pos, next - pos)); + pos = next + 1; + next = str.find(sep, pos); + } + if (!str.substr(pos).empty()) { + pieces->push_back(str.substr(pos)); + } +} + +/* + * Get a summary of a PaddleTensor content. + */ +std::string SummaryTensor(const PaddleTensor& tensor) { + std::stringstream ss; + int num_elems = tensor.data.length() / PaddleDtypeSize(tensor.dtype); + + ss << "data[:10]\t"; + switch (tensor.dtype) { + case PaddleDType::INT64: { + for (int i = 0; i < std::min(num_elems, 10); i++) { + ss << static_cast(tensor.data.data())[i] << " "; + } + break; + } + case PaddleDType::FLOAT32: + for (int i = 0; i < std::min(num_elems, 10); i++) { + ss << static_cast(tensor.data.data())[i] << " "; + } + break; + } + return ss.str(); +} + std::string ToString(const NativeConfig& config) { std::stringstream ss; ss << "Use GPU : " << (config.use_gpu ? "True" : "False") << "\n" @@ -65,119 +107,122 @@ std::string ToString(const NativeConfig& config) { return ss.str(); } -void Main(bool use_gpu) { - //# 1. Create PaddlePredictor with a config. - NativeConfig config; - config.model_dir = DIRNAME; - config.use_gpu = USE_GPU; - config.fraction_of_gpu_memory = 0.15; - config.device = 0; - std::cout << ToString(config) << std::endl; - auto predictor = - CreatePaddlePredictor(config); +struct Record { + std::vector data; + std::vector shape; +}; + + +Record ProcessALine(const std::string& line) { + std::cout << "process a line" << std::endl;; + std::vector columns; + split(line, '\t', &columns); + assert(columns.size() == 2UL, + "data format error, should be \t"); + + Record record; + std::vector data_strs; + split(columns[0], ' ', &data_strs); + for (auto& d : data_strs) { + record.data.push_back(std::stof(d)); + } + + std::vector shape_strs; + split(columns[1], ' ', &shape_strs); + for (auto& s : shape_strs) { + record.shape.push_back(std::stoi(s)); + } + std::cout << "data size " << record.data.size() << std::endl; + std::cout << "data shape size " << record.shape.size() << std::endl; + return record; +} - for (int batch_id = 0; batch_id < 3; batch_id++) { - //# 2. Prepare input. - int64_t data[4] = {1, 2, 3, 4}; - - PaddleTensor tensor; - tensor.shape = std::vector({4, 1}); - tensor.data = PaddleBuf(data, sizeof(data)); - tensor.dtype = PaddleDType::INT64; - - // For simplicity, we set all the slots with the same data. - std::vector slots(4, tensor); - - //# 3. Run - std::vector outputs; - assert(predictor->Run(slots, &outputs) == true && - "Predict run expect true"); - - //# 4. Get output. - assert(outputs.size() == 1UL); - // Check the output buffer size and result of each tid. - assert(outputs.front().data.length() == 33168UL); - float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, - 0.000932706}; - const size_t num_elements = outputs.front().data.length() / sizeof(float); - // The outputs' buffers are in CPU memory. - for (size_t i = 0; i < std::min(static_cast(5), num_elements); - i++) { - assert(static_cast(outputs.front().data.data())[i] == result[i]); - std::cout << "expect the output " - << static_cast(outputs.front().data.data())[i] - << std::endl; +void CheckOutput(const std::string& referfile, const PaddleTensor& output) { + std::string line; + std::ifstream file(referfile); + std::getline(file, line); + auto refer = ProcessALine(line); + file.close(); + + size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); + std::cout << "predictor output numel " << numel << std::endl; + std::cout << "reference output numel " << refer.data.size() << std::endl; + assert(numel == refer.data.size()); + switch (output.dtype) { + case PaddleDType::INT64: { + for (size_t i = 0; i < numel; ++i) { + assert(static_cast(output.data.data())[i] == + refer.data[i]); + } + break; } + case PaddleDType::FLOAT32: + for (size_t i = 0; i < numel; ++i) { + assert( + fabs(static_cast(output.data.data())[i] - refer.data[i]) <= + 1e-5); + } + break; } } -void MainThreads(int num_threads, bool USE_GPU) { - // Multi-threads only support on CPU - // 0. Create PaddlePredictor with a config. +/* + * Use the native fluid engine to inference the demo. + */ +void Main(bool use_gpu) { NativeConfig config; - config.model_dir = DIRNAME; + config.param_file = MODELDIR + "/__params__"; + config.prog_file = MODELDIR + "/__model__"; config.use_gpu = USE_GPU; - config.fraction_of_gpu_memory = 0.15; config.device = 0; + if (USE_GPU) { + config.fraction_of_gpu_memory = 0.1f; // set by yourself + } std::cout << ToString(config) << std::endl; - auto main_predictor = + std::cout << "init predictor" << std::endl; + auto predictor = CreatePaddlePredictor(config); - std::vector threads; - for (int tid = 0; tid < num_threads; ++tid) { - threads.emplace_back([&, tid]() { - // 1. clone a predictor which shares the same parameters - auto predictor = main_predictor->Clone(); - constexpr int num_batches = 3; - for (int batch_id = 0; batch_id < num_batches; ++batch_id) { - // 2. Dummy Input Data - int64_t data[4] = {1, 2, 3, 4}; - PaddleTensor tensor; - tensor.shape = std::vector({4, 1}); - tensor.data = PaddleBuf(data, sizeof(data)); - tensor.dtype = PaddleDType::INT64; - - std::vector inputs(4, tensor); - std::vector outputs; - // 3. Run - assert(predictor->Run(inputs, &outputs) == true); - - // 4. Get output. - assert(outputs.size() == 1UL); - // Check the output buffer size and result of each tid. - assert(outputs.front().data.length() == 33168UL); - float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, - 0.000932706}; - const size_t num_elements = - outputs.front().data.length() / sizeof(float); - // The outputs' buffers are in CPU memory. - for (size_t i = 0; i < std::min(static_cast(5), num_elements); - i++) { - assert(static_cast(outputs.front().data.data())[i] == - result[i]); - } - } - }); - } - for (int i = 0; i < num_threads; ++i) { - threads[i].join(); - } + std::cout << "begin to process data" << std::endl; + // Just a single batch of data. + std::string line; + std::ifstream file(DATA); + std::getline(file, line); + auto record = ProcessALine(line); + file.close(); + + // Inference. + PaddleTensor input; + input.shape = record.shape; + input.data = + PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); + input.dtype = PaddleDType::FLOAT32; + + std::cout << "run executor" << std::endl; + std::vector output; + predictor->Run({input}, &output); + + std::cout << "output.size " << output.size() << std::endl; + auto& tensor = output.front(); + std::cout << "output: " << SummaryTensor(tensor) << std::endl; + + // compare with reference result + CheckOutput(REFER, tensor); } + } // namespace demo } // namespace paddle int main(int argc, char** argv) { // ParseArgs(); - DIRNAME = "./icnet"; + MODELDIR = "./mobilenet/model"; + DATA = "./mobilenet/data.txt"; + REFER = "./mobilenet/result.txt"; USE_GPU = true; paddle::demo::Main(false /* USE_GPU*/); - paddle::demo::MainThreads(1, false /* USE_GPU*/); - paddle::demo::MainThreads(4, false /* USE_GPU*/); if (USE_GPU) { paddle::demo::Main(true /*USE_GPU*/); - paddle::demo::MainThreads(1, true /*USE_GPU*/); - paddle::demo::MainThreads(4, true /*USE_GPU*/); } system("pause"); return 0; diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 4b084009ff3..1baa64c249f 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -25,7 +25,6 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/macros.h" namespace paddle { @@ -34,7 +33,7 @@ enum PaddleDType { INT64, }; -class PADDLE_DLL PaddleBuf { +class PaddleBuf { public: PaddleBuf() = default; PaddleBuf(PaddleBuf&& other); @@ -46,7 +45,7 @@ class PADDLE_DLL PaddleBuf { PaddleBuf(void* data, size_t length) : data_(data), length_(length), memory_owned_{false} {} // Own memory. - explicit PaddleBuf(size_t length) + PaddleBuf(size_t length) : data_(new char[length]), length_(length), memory_owned_(true) {} // Resize to `length` bytes. void Resize(size_t length); @@ -65,7 +64,7 @@ class PADDLE_DLL PaddleBuf { bool memory_owned_{true}; }; -struct PADDLE_DLL PaddleTensor { +struct PaddleTensor { PaddleTensor() = default; std::string name; // variable name. std::vector shape; @@ -88,7 +87,7 @@ enum class PaddleEngineKind { * A simple Inference API for Paddle. Currently this API can be used by * non-sequence scenerios. */ -class PADDLE_DLL PaddlePredictor { +class PaddlePredictor { public: struct Config; PaddlePredictor() = default; @@ -97,6 +96,7 @@ class PADDLE_DLL PaddlePredictor { // Predict an record. // The caller should be responsible for allocating and releasing the memory of + // `inputs`. `inputs` should be available until Run returns. Caller should be // responsible for the output tensor's buffer, either allocated or passed from // outside. virtual bool Run(const std::vector& inputs, @@ -111,12 +111,12 @@ class PADDLE_DLL PaddlePredictor { virtual ~PaddlePredictor() = default; // The common configs for all the predictors. - struct PADDLE_DLL Config { + struct Config { std::string model_dir; // path to the model directory. }; }; -struct PADDLE_DLL NativeConfig : public PaddlePredictor::Config { +struct NativeConfig : public PaddlePredictor::Config { // GPU related fields. bool use_gpu{false}; int device{0}; @@ -129,7 +129,7 @@ struct PADDLE_DLL NativeConfig : public PaddlePredictor::Config { }; // Configurations for Anakin engine. -struct PADDLE_DLL AnakinConfig : public PaddlePredictor::Config { +struct AnakinConfig : public PaddlePredictor::Config { enum TargetType { NVGPU = 0, X86 }; int device; std::string model_file; @@ -137,7 +137,7 @@ struct PADDLE_DLL AnakinConfig : public PaddlePredictor::Config { TargetType target_type; }; -struct PADDLE_DLL TensorRTConfig : public NativeConfig { +struct TensorRTConfig : public NativeConfig { // Determine whether a subgraph will be executed by TRT. int min_subgraph_size{1}; // While TensorRT allows an engine optimized for a given max batch size @@ -159,9 +159,8 @@ struct PADDLE_DLL TensorRTConfig : public NativeConfig { // // Similarly, each engine kind should map to a unique predictor implementation. template -PADDLE_DLL std::unique_ptr CreatePaddlePredictor( - const ConfigT& config); +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); -PADDLE_DLL int PaddleDtypeSize(PaddleDType dtype); +int PaddleDtypeSize(PaddleDType dtype); } // namespace paddle diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 78bca5cb33b..cc24e84d595 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -308,6 +308,8 @@ inline void throw_on_error(T e) { __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) + +#if !defined(_WIN32) #define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ do { \ if (UNLIKELY(nullptr == (__VAL))) { \ @@ -327,6 +329,20 @@ inline void throw_on_error(T e) { paddle::string::Sprintf("" __VA_ARGS__)); \ } \ } while (0) +#else +#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ + do { \ + if (!((__VAL0)__CMP(__VAL1))) { \ + PADDLE_THROW("Windows disable the enforce. Enforce failed."); \ + } \ + } while(0) +#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \ + do { \ + if (nullptr == (__VAL1)) { \ + PADDLE_THROW("Windows disable the enforce. Enforce failed"); \ + } \ + } while(0) +#endif // !_WIN32 } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/macros.h b/paddle/fluid/platform/macros.h index bbb1c60f099..18ac838a0f1 100644 --- a/paddle/fluid/platform/macros.h +++ b/paddle/fluid/platform/macros.h @@ -30,11 +30,14 @@ limitations under the License. */ #endif // __FLT_MAX__ #ifdef _WIN32 -#ifdef PADDLE_COMPILE +#if defined(PADDLE_COMPILE) +// by default, msvc has predefined macro _LIB for static library +// only shared library need to export and import symbols +// static library export all symbols by default. #define PADDLE_DLL __declspec(dllexport) #else #define PADDLE_DLL __declspec(dllimport) #endif #else -#define PADDLE_COMPILE +#define PADDLE_DLL #endif -- GitLab