提交 372caf40 编写于 作者: D dzhwinter

windows staff

上级 c3e1fb5a
......@@ -82,7 +82,6 @@ if(WIN32)
if (NOT MSVC)
message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
endif(NOT MSVC)
add_definitions(/DPADDLE_COMPILE)
endif(WIN32)
if(NOT WITH_GOLANG)
......
......@@ -67,6 +67,7 @@ bool NativePaddlePredictor::Init(
} else {
place_ = paddle::platform::CPUPlace();
}
VLOG(3) << "before scope";
if (parent_scope) {
scope_ = parent_scope;
sub_scope_ = &(parent_scope->NewScope());
......@@ -75,26 +76,30 @@ bool NativePaddlePredictor::Init(
paddle::framework::InitDevices(false);
scope_.reset(new paddle::framework::Scope());
}
VLOG(3) << "after scope"
executor_.reset(new paddle::framework::Executor(place_));
VLOG(3) << "executor";
// Initialize the inference program
if (!config_.model_dir.empty()) {
// Parameters are saved in separate files sited in
// the specified `dirname`.
VLOG(3) << config_.model_dir;
inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
config_.model_dir);
VLOG(3) << "load model Finish";
} else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
VLOG(3) << "load program";
inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
VLOG(3) << "load program finish";
} else {
LOG(ERROR) << "fail to load inference model.";
return false;
}
VLOG(3) << "prepare";
ctx_ = executor_->Prepare(*inference_program_, 0);
executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0);
......@@ -289,10 +294,13 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memeroy
PADDLE_ENFORCE_GT(
config.fraction_of_gpu_memory, 0.f,
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
VLOG(3) << "before check";
// PADDLE_ENFORCE_GT(
// config.fraction_of_gpu_memory, 0.f,
// "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
VLOG(3) << "failed on first";
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
VLOG(3) << "after flags";
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
......@@ -302,9 +310,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
flags.push_back(flag);
VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags);
VLOG(3) << "flags setting";
}
}
VLOG(3) << "Init flags Done";
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
return nullptr;
......
......@@ -17,6 +17,7 @@ endmacro()
if (WIN32)
if (WITH_STATIC_LIB)
safe_set_static_flag()
add_definitions(-DSTATIC_LIB)
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "/w")
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} "/w")
endif()
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include <cctype>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <iterator>
#include <memory>
......@@ -27,9 +28,17 @@ limitations under the License. */
#include <thread> //NOLINT
#include "paddle/fluid/inference/paddle_inference_api.h"
std::string DIRNAME = ""; /* "Directory of the inference model." */ // NOLINT
std::string MODELDIR = ""; /* "Directory of the inference model." */ // NOLINT
std::string REFER = ""; /*"path to reference result for comparison."*/ //NOTLINT
/*path of data; each line is a record, format:
<space splitted floats as data>\t<space splitted ints as shape>
Please check the demo data of data.txt for details.
*/
std::string DATA = "";
bool USE_GPU = false; /*"Whether use gpu."*/
auto message_err = []() {
std::cout << "Copyright (c) 2018 PaddlePaddle Authors." << std::endl;
std::cout << "Demo Case for windows inference. "
......@@ -40,19 +49,52 @@ auto message_err = []() {
std::cout << std::endl;
};
void ParseArgs() {
message_err();
std::cout << "DIRNAME:[D:/Paddle/xxx/path_to_model_dir]" << std::endl;
std::cin >> DIRNAME;
std::cout << "USE_GPU:[yes|no]";
std::string value;
std::cin >> value;
std::transform(value.begin(), value.end(), value.begin(), ::toupper);
USE_GPU = (value == "YES") ? true : false;
}
namespace paddle {
namespace demo {
void split(const std::string& str, char sep,
std::vector<std::string>* pieces) {
pieces->clear();
if (str.empty()) {
return;
}
size_t pos = 0;
size_t next = str.find(sep, pos);
while (next != std::string::npos) {
pieces->push_back(str.substr(pos, next - pos));
pos = next + 1;
next = str.find(sep, pos);
}
if (!str.substr(pos).empty()) {
pieces->push_back(str.substr(pos));
}
}
/*
* Get a summary of a PaddleTensor content.
*/
std::string SummaryTensor(const PaddleTensor& tensor) {
std::stringstream ss;
int num_elems = tensor.data.length() / PaddleDtypeSize(tensor.dtype);
ss << "data[:10]\t";
switch (tensor.dtype) {
case PaddleDType::INT64: {
for (int i = 0; i < std::min(num_elems, 10); i++) {
ss << static_cast<int64_t*>(tensor.data.data())[i] << " ";
}
break;
}
case PaddleDType::FLOAT32:
for (int i = 0; i < std::min(num_elems, 10); i++) {
ss << static_cast<float*>(tensor.data.data())[i] << " ";
}
break;
}
return ss.str();
}
std::string ToString(const NativeConfig& config) {
std::stringstream ss;
ss << "Use GPU : " << (config.use_gpu ? "True" : "False") << "\n"
......@@ -65,119 +107,122 @@ std::string ToString(const NativeConfig& config) {
return ss.str();
}
void Main(bool use_gpu) {
//# 1. Create PaddlePredictor with a config.
NativeConfig config;
config.model_dir = DIRNAME;
config.use_gpu = USE_GPU;
config.fraction_of_gpu_memory = 0.15;
config.device = 0;
std::cout << ToString(config) << std::endl;
auto predictor =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
struct Record {
std::vector<float> data;
std::vector<int32_t> shape;
};
Record ProcessALine(const std::string& line) {
std::cout << "process a line" << std::endl;;
std::vector<std::string> columns;
split(line, '\t', &columns);
assert(columns.size() == 2UL,
"data format error, should be <data>\t<shape>");
Record record;
std::vector<std::string> data_strs;
split(columns[0], ' ', &data_strs);
for (auto& d : data_strs) {
record.data.push_back(std::stof(d));
}
std::vector<std::string> shape_strs;
split(columns[1], ' ', &shape_strs);
for (auto& s : shape_strs) {
record.shape.push_back(std::stoi(s));
}
std::cout << "data size " << record.data.size() << std::endl;
std::cout << "data shape size " << record.shape.size() << std::endl;
return record;
}
for (int batch_id = 0; batch_id < 3; batch_id++) {
//# 2. Prepare input.
int64_t data[4] = {1, 2, 3, 4};
PaddleTensor tensor;
tensor.shape = std::vector<int>({4, 1});
tensor.data = PaddleBuf(data, sizeof(data));
tensor.dtype = PaddleDType::INT64;
// For simplicity, we set all the slots with the same data.
std::vector<PaddleTensor> slots(4, tensor);
//# 3. Run
std::vector<PaddleTensor> outputs;
assert(predictor->Run(slots, &outputs) == true &&
"Predict run expect true");
//# 4. Get output.
assert(outputs.size() == 1UL);
// Check the output buffer size and result of each tid.
assert(outputs.front().data.length() == 33168UL);
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
i++) {
assert(static_cast<float*>(outputs.front().data.data())[i] == result[i]);
std::cout << "expect the output "
<< static_cast<float*>(outputs.front().data.data())[i]
<< std::endl;
void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
std::string line;
std::ifstream file(referfile);
std::getline(file, line);
auto refer = ProcessALine(line);
file.close();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
std::cout << "predictor output numel " << numel << std::endl;
std::cout << "reference output numel " << refer.data.size() << std::endl;
assert(numel == refer.data.size());
switch (output.dtype) {
case PaddleDType::INT64: {
for (size_t i = 0; i < numel; ++i) {
assert(static_cast<int64_t*>(output.data.data())[i] ==
refer.data[i]);
}
break;
}
case PaddleDType::FLOAT32:
for (size_t i = 0; i < numel; ++i) {
assert(
fabs(static_cast<float*>(output.data.data())[i] - refer.data[i]) <=
1e-5);
}
break;
}
}
void MainThreads(int num_threads, bool USE_GPU) {
// Multi-threads only support on CPU
// 0. Create PaddlePredictor with a config.
/*
* Use the native fluid engine to inference the demo.
*/
void Main(bool use_gpu) {
NativeConfig config;
config.model_dir = DIRNAME;
config.param_file = MODELDIR + "/__params__";
config.prog_file = MODELDIR + "/__model__";
config.use_gpu = USE_GPU;
config.fraction_of_gpu_memory = 0.15;
config.device = 0;
if (USE_GPU) {
config.fraction_of_gpu_memory = 0.1f; // set by yourself
}
std::cout << ToString(config) << std::endl;
auto main_predictor =
std::cout << "init predictor" << std::endl;
auto predictor =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
std::vector<std::thread> threads;
for (int tid = 0; tid < num_threads; ++tid) {
threads.emplace_back([&, tid]() {
// 1. clone a predictor which shares the same parameters
auto predictor = main_predictor->Clone();
constexpr int num_batches = 3;
for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
// 2. Dummy Input Data
int64_t data[4] = {1, 2, 3, 4};
PaddleTensor tensor;
tensor.shape = std::vector<int>({4, 1});
tensor.data = PaddleBuf(data, sizeof(data));
tensor.dtype = PaddleDType::INT64;
std::vector<PaddleTensor> inputs(4, tensor);
std::vector<PaddleTensor> outputs;
// 3. Run
assert(predictor->Run(inputs, &outputs) == true);
// 4. Get output.
assert(outputs.size() == 1UL);
// Check the output buffer size and result of each tid.
assert(outputs.front().data.length() == 33168UL);
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706};
const size_t num_elements =
outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
i++) {
assert(static_cast<float*>(outputs.front().data.data())[i] ==
result[i]);
}
}
});
}
for (int i = 0; i < num_threads; ++i) {
threads[i].join();
}
std::cout << "begin to process data" << std::endl;
// Just a single batch of data.
std::string line;
std::ifstream file(DATA);
std::getline(file, line);
auto record = ProcessALine(line);
file.close();
// Inference.
PaddleTensor input;
input.shape = record.shape;
input.data =
PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32;
std::cout << "run executor" << std::endl;
std::vector<PaddleTensor> output;
predictor->Run({input}, &output);
std::cout << "output.size " << output.size() << std::endl;
auto& tensor = output.front();
std::cout << "output: " << SummaryTensor(tensor) << std::endl;
// compare with reference result
CheckOutput(REFER, tensor);
}
} // namespace demo
} // namespace paddle
int main(int argc, char** argv) {
// ParseArgs();
DIRNAME = "./icnet";
MODELDIR = "./mobilenet/model";
DATA = "./mobilenet/data.txt";
REFER = "./mobilenet/result.txt";
USE_GPU = true;
paddle::demo::Main(false /* USE_GPU*/);
paddle::demo::MainThreads(1, false /* USE_GPU*/);
paddle::demo::MainThreads(4, false /* USE_GPU*/);
if (USE_GPU) {
paddle::demo::Main(true /*USE_GPU*/);
paddle::demo::MainThreads(1, true /*USE_GPU*/);
paddle::demo::MainThreads(4, true /*USE_GPU*/);
}
system("pause");
return 0;
......
......@@ -25,7 +25,6 @@ limitations under the License. */
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/platform/macros.h"
namespace paddle {
......@@ -34,7 +33,7 @@ enum PaddleDType {
INT64,
};
class PADDLE_DLL PaddleBuf {
class PaddleBuf {
public:
PaddleBuf() = default;
PaddleBuf(PaddleBuf&& other);
......@@ -46,7 +45,7 @@ class PADDLE_DLL PaddleBuf {
PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {}
// Own memory.
explicit PaddleBuf(size_t length)
PaddleBuf(size_t length)
: data_(new char[length]), length_(length), memory_owned_(true) {}
// Resize to `length` bytes.
void Resize(size_t length);
......@@ -65,7 +64,7 @@ class PADDLE_DLL PaddleBuf {
bool memory_owned_{true};
};
struct PADDLE_DLL PaddleTensor {
struct PaddleTensor {
PaddleTensor() = default;
std::string name; // variable name.
std::vector<int> shape;
......@@ -88,7 +87,7 @@ enum class PaddleEngineKind {
* A simple Inference API for Paddle. Currently this API can be used by
* non-sequence scenerios.
*/
class PADDLE_DLL PaddlePredictor {
class PaddlePredictor {
public:
struct Config;
PaddlePredictor() = default;
......@@ -97,6 +96,7 @@ class PADDLE_DLL PaddlePredictor {
// Predict an record.
// The caller should be responsible for allocating and releasing the memory of
// `inputs`. `inputs` should be available until Run returns. Caller should be
// responsible for the output tensor's buffer, either allocated or passed from
// outside.
virtual bool Run(const std::vector<PaddleTensor>& inputs,
......@@ -111,12 +111,12 @@ class PADDLE_DLL PaddlePredictor {
virtual ~PaddlePredictor() = default;
// The common configs for all the predictors.
struct PADDLE_DLL Config {
struct Config {
std::string model_dir; // path to the model directory.
};
};
struct PADDLE_DLL NativeConfig : public PaddlePredictor::Config {
struct NativeConfig : public PaddlePredictor::Config {
// GPU related fields.
bool use_gpu{false};
int device{0};
......@@ -129,7 +129,7 @@ struct PADDLE_DLL NativeConfig : public PaddlePredictor::Config {
};
// Configurations for Anakin engine.
struct PADDLE_DLL AnakinConfig : public PaddlePredictor::Config {
struct AnakinConfig : public PaddlePredictor::Config {
enum TargetType { NVGPU = 0, X86 };
int device;
std::string model_file;
......@@ -137,7 +137,7 @@ struct PADDLE_DLL AnakinConfig : public PaddlePredictor::Config {
TargetType target_type;
};
struct PADDLE_DLL TensorRTConfig : public NativeConfig {
struct TensorRTConfig : public NativeConfig {
// Determine whether a subgraph will be executed by TRT.
int min_subgraph_size{1};
// While TensorRT allows an engine optimized for a given max batch size
......@@ -159,9 +159,8 @@ struct PADDLE_DLL TensorRTConfig : public NativeConfig {
//
// Similarly, each engine kind should map to a unique predictor implementation.
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
PADDLE_DLL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
const ConfigT& config);
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
PADDLE_DLL int PaddleDtypeSize(PaddleDType dtype);
int PaddleDtypeSize(PaddleDType dtype);
} // namespace paddle
......@@ -308,6 +308,8 @@ inline void throw_on_error(T e) {
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#if !defined(_WIN32)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
......@@ -327,6 +329,20 @@ inline void throw_on_error(T e) {
paddle::string::Sprintf("" __VA_ARGS__)); \
} \
} while (0)
#else
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (!((__VAL0)__CMP(__VAL1))) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed."); \
} \
} while(0)
#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \
do { \
if (nullptr == (__VAL1)) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed"); \
} \
} while(0)
#endif // !_WIN32
} // namespace platform
} // namespace paddle
......@@ -30,11 +30,14 @@ limitations under the License. */
#endif // __FLT_MAX__
#ifdef _WIN32
#ifdef PADDLE_COMPILE
#if defined(PADDLE_COMPILE)
// by default, msvc has predefined macro _LIB for static library
// only shared library need to export and import symbols
// static library export all symbols by default.
#define PADDLE_DLL __declspec(dllexport)
#else
#define PADDLE_DLL __declspec(dllimport)
#endif
#else
#define PADDLE_COMPILE
#define PADDLE_DLL
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册