提交 c6dcffc6 编写于 作者: D dzhwinter

lb. add debug output

上级 607080e8
...@@ -333,9 +333,49 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare( ...@@ -333,9 +333,49 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
return result; return result;
} }
// void CheckResult(const std::string op_type, ExecutorPrepareContext* ctx, Scope* local_scope) {
// VLOG(3) << "before checking result";
// auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_);
// std::vector<std::string> outputs;
// auto& block = ctx->prog_.Block(0);
// bool found = false;
// framework::OpDesc* myop = nullptr;
// for(auto& op : block.AllOps()) {
// if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() == "feed") return;
// if (op->Type() == op_type) {
// found = true;
// myop = op;
// break;
// }
// }
// }
// if(!found) {
// VLOG(3) << "not found op!";
// return;
// }
// auto* op = myop;
// VLOG(3) << "start op output" << op->Type();
// for(auto var_name: op->OutputArgumentNames()) {
// auto* var = local_scope->Var(var_name);
// auto* var_desc = block.FindVar(var_name);
// if (var_desc->Persistable()) continue;
// auto* tensor = var->GetMutable<framework::LoDTensor>();
// framework::Tensor check;
// VLOG(3) << "before tensor copy";
// framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
// VLOG(3) << "after tensor copy";
// float sum = .0;
// for(size_t i=0; i < check.numel(); ++i) {
// sum += check.data<float>()[i];
// }
// VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum " << sum;
// VLOG(3) << "after checking result";
// }
void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope, bool create_vars, bool create_local_scope, bool create_vars,
bool keep_kids) { bool keep_kids) {
VLOG(3) << "RunPreparedContext inside";
Scope* local_scope = scope; Scope* local_scope = scope;
if (create_vars) { if (create_vars) {
if (create_local_scope) { if (create_local_scope) {
...@@ -346,13 +386,73 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -346,13 +386,73 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
for (auto& op : ctx->ops_) { for (auto& op : ctx->ops_) {
op->Run(*local_scope, place_); op->Run(*local_scope, place_);
// CheckResult(op->Type(), ctx, local_scope);
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: " VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_); << memory::memory_usage(place_);
} }
} }
platform::DeviceContextPool::Instance().Get(place_)->Wait(); platform::DeviceContextPool::Instance().Get(place_)->Wait();
VLOG(3) << "start checking";
auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_);
std::vector<std::string> outputs;
auto& block = ctx->prog_.Block(0);
for(auto& op : block.AllOps()) {
if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() == "feed") continue;
// for(auto& real_op : ctx->ops_) {
// if(real_op->Type() == op->Type()) {
// VLOG(3) << real_op->Type() << " " <<place_ << " " << real_op->DebugStringEx(local_scope);
// }
// }
//VLOG(3) << "start op output" << op->Type();
for(auto var_name: op->InputArgumentNames()) {
auto* var = local_scope->Var(var_name);
auto* var_desc = block.FindVar(var_name);
if (var_desc->Persistable()) continue;
auto* tensor = var->GetMutable<framework::LoDTensor>();
framework::Tensor check;
VLOG(3) << "before tensor copy";
framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
VLOG(3) << "after tensor copy";
float sum = .0;
for(size_t i=0; i < check.numel(); ++i) {
sum += check.data<float>()[i];
}
VLOG(3) << "op " << op->Type() << " input var " << var_name << " sum " << sum;
}
VLOG(3) << "op " << op->Type() << "input finished";
for(auto var_name: op->OutputArgumentNames()) {
auto* var = local_scope->Var(var_name);
auto* var_desc = block.FindVar(var_name);
if (var_desc->Persistable()) continue;
auto* tensor = var->GetMutable<framework::LoDTensor>();
framework::Tensor check;
VLOG(3) << "before tensor copy";
if(op->Type() == "batch_norm" && platform::is_gpu_place(place_)) {
VLOG(3) << "op " << op->Type() << " output var " << var_name << " " << tensor->numel();
tensor->mutable_data<float>(place_);
framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
} else {
framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
}
VLOG(3) << "after tensor copy";
float sum = .0;
for(size_t i=0; i < check.numel(); ++i) {
sum += check.data<float>()[i];
}
VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum " << sum;
}
}
VLOG(3) << "after checking result";
if (local_scope != scope) { if (local_scope != scope) {
scope->DeleteScope(local_scope); scope->DeleteScope(local_scope);
} else { } else {
......
...@@ -46,7 +46,7 @@ if(WITH_GPU) ...@@ -46,7 +46,7 @@ if(WITH_GPU)
endif(NOT WIN32) endif(NOT WIN32)
endif() endif()
include_directories("D:/Paddle/") include_directories("E:/Paddle/")
include_directories("${PADDLE_LIB}") include_directories("${PADDLE_LIB}")
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include") include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
include_directories("${PADDLE_LIB}/third_party/install/glog/include") include_directories("${PADDLE_LIB}/third_party/install/glog/include")
...@@ -72,7 +72,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib") ...@@ -72,7 +72,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories("${PADDLE_LIB}/paddle/fluid/inference") link_directories("${PADDLE_LIB}/paddle/fluid/inference")
# add_executable(${DEMO_NAME} ${DEMO_NAME}.cc) # add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
add_library(${DEMO_NAME} ${DEMO_NAME}.cc) # add_library(${DEMO_NAME} ${DEMO_NAME}.cc)
add_library(${DEMO_NAME} SHARED ${DEMO_NAME}.cc)
add_executable(real_data_icnet_tester real_data_icnet_tester.cc)
add_executable(test test.cc)
add_executable(thread_icnet_test thread_icnet_test.cc)
if(WITH_MKL) if(WITH_MKL)
include_directories("${PADDLE_LIB}/third_party/install/mklml/include") include_directories("${PADDLE_LIB}/third_party/install/mklml/include")
set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
...@@ -89,7 +94,11 @@ endif() ...@@ -89,7 +94,11 @@ endif()
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a # Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
if(WITH_STATIC_LIB) if(WITH_STATIC_LIB)
set(DEPS set(DEPS
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) # ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}
D:/Paddle/bazel-dll/fluid_install_dir/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}
# E:/Paddle/build/paddle/fluid/inference/api/Release/libpaddle_inference_api${CMAKE_STATIC_LIBRARY_SUFFIX}
D:/Paddle/bazel-dll/paddle/fluid/inference/api/Release/libpaddle_inference_api${CMAKE_STATIC_LIBRARY_SUFFIX}
)
else() else()
set(DEPS set(DEPS
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
...@@ -121,3 +130,9 @@ if(WITH_GPU) ...@@ -121,3 +130,9 @@ if(WITH_GPU)
endif() endif()
target_link_libraries(${DEMO_NAME} ${DEPS}) target_link_libraries(${DEMO_NAME} ${DEPS})
target_link_libraries(test ${DEMO_NAME} )
target_link_libraries(thread_icnet_test ${DEPS})
target_link_libraries(real_data_icnet_tester ${DEPS})
target_compile_definitions(${DEMO_NAME} PRIVATE "API_DEFINITION")
...@@ -19,49 +19,42 @@ ...@@ -19,49 +19,42 @@
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include <string> #include <string>
#include <memory>
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "inference_icnet.h"
namespace paddle {
std::string DIRNAME = "./infer_model";
std::string DATA = "./test-image.txt";
const int C = 3; // image channel
const int H = 449; // image height
const int W = 581; // image width
// 数据格式 // 数据格式
// "<space splitted floats as data>\t<space splitted ints as shape" // "<space splitted floats as data>\t<space splitted ints as shape"
// 1. 存储为float32格式。 // 1. 存储为float32格式。
// 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42 // 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42
using namespace paddle;
struct Record class Predictor {
{ private:
std::unique_ptr<PaddlePredictor> predictor;
struct Record
{
std::vector<float> data; std::vector<float> data;
std::vector<int32_t> shape; std::vector<int32_t> shape;
}; };
NativeConfig GetConfig() { const int C = 3; // image channel
NativeConfig config; const int H = 449; // image height
config.prog_file=DIRNAME + "/__model__"; const int W = 581; // image width
config.param_file=DIRNAME + "/__params__";
config.fraction_of_gpu_memory = 0.0;
config.use_gpu = true;
config.device = 0;
return config;
}
using Time = decltype(std::chrono::high_resolution_clock::now()); using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); }; Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) { double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms; typedef std::chrono::microseconds ms;
auto diff = t2 - t1; auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff); ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0; return counter.count() / 1000.0;
} }
static void split(const std::string& str, char sep, static void split(const std::string& str, char sep,
std::vector<std::string>* pieces) { std::vector<std::string>* pieces) {
pieces->clear(); pieces->clear();
if (str.empty()) { if (str.empty()) {
...@@ -77,9 +70,9 @@ static void split(const std::string& str, char sep, ...@@ -77,9 +70,9 @@ static void split(const std::string& str, char sep,
if (!str.substr(pos).empty()) { if (!str.substr(pos).empty()) {
pieces->push_back(str.substr(pos)); pieces->push_back(str.substr(pos));
} }
} }
Record ProcessALine(const std::string& line) { Record ProcessALine(const std::string& line) {
std::vector<std::string> columns; std::vector<std::string> columns;
split(line, '\t', &columns); split(line, '\t', &columns);
...@@ -96,62 +89,74 @@ Record ProcessALine(const std::string& line) { ...@@ -96,62 +89,74 @@ Record ProcessALine(const std::string& line) {
record.shape.push_back(std::stoi(s)); record.shape.push_back(std::stoi(s));
} }
return record; return record;
} }
public:
Predictor (const char* prog_file,
const char* param_file, const float fraction_of_gpu_memory,
const bool use_gpu, const int device) {
void test_naive(int batch_size){ NativeConfig config;
NativeConfig config = GetConfig(); config.prog_file = prog_file;
auto predictor = CreatePaddlePredictor<NativeConfig>(config); config.param_file = param_file;
int height = H; config.fraction_of_gpu_memory = fraction_of_gpu_memory;
int width = W; config.use_gpu = use_gpu;
int channel = C; config.device = device;
int num_sum = height * width * channel * batch_size;
predictor = CreatePaddlePredictor<NativeConfig>(config);
}
// 1. use fake data void predict(float* input, const int channel, const int height, const int width,
int64_t** output, int* output_length, int batch_size) {
std::vector<float> data; std::vector<float> data;
for(int i = 0; i < num_sum; i++) { int intput_length = channel * height * width * batch_size;
data.push_back(0.0); for (int i = 0; i < intput_length; i++) {
data.push_back(*((float*)input + i));
} }
// initialize the input data
PaddleTensor tensor; PaddleTensor tensor;
tensor.shape = std::vector<int>({batch_size, channel, height, width}); tensor.shape = std::vector<int>({ batch_size, channel, height, width });
tensor.data.Resize(sizeof(float) * batch_size * channel * height * width); tensor.data.Resize(sizeof(float) * batch_size * channel * height * width);
std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data())); std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
tensor.dtype = PaddleDType::FLOAT32;
// 2. read data from file
// std::string line;
// std::ifstream file(DATA);
// std::getline(file, line);
// auto record = ProcessALine(line);
// file.close();
// PaddleTensor tensor;
// tensor.shape = record.shape;
// tensor.data =
// PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
tensor.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor); std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
PaddleTensor tensor_out;
// initialize the output data
PaddleTensor tensor_out;
std::vector<PaddleTensor> outputs(1, tensor_out); std::vector<PaddleTensor> outputs(1, tensor_out);
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
auto time1 = time();
for(size_t i = 0; i < 2; i++) {
std::cout << "Pass " << i << "predict";
predictor->Run(paddle_tensor_feeds, &outputs, batch_size); predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
*output_length = (int)outputs[0].data.length();
std::memcpy(static_cast<void *>(*output), outputs[0].data.data(), outputs[0].data.length());
int64_t sum_out = 0;
for(int i=0; i < outputs[0].data.length()/sizeof(int64_t); ++i) {
int64_t item = static_cast<int64_t*>(outputs[0].data.data())[i];
sum_out += item;
if (item != 0) {
std::cout << item << std::endl;
}
} }
auto time2 = time(); std::cout << "sum_out" << sum_out << std::endl;
std::ofstream ofresult("naive_test_result.txt", std::ios::app); }
};
std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 100.0 << "ms" << std::endl; API_REFERENCE void * init_predictor(const char* prog_file,
std::cout << outputs.size() << std::endl; const char* param_file, const float fraction_of_gpu_memory,
const bool use_gpu, const int device) {
return new Predictor(prog_file, param_file, fraction_of_gpu_memory, use_gpu, device);
}
API_REFERENCE void predict(void* handle, float* input, const int channel, const int height, const int width,
int64_t** output, int* output_length, int batch_size) {
assert(handle != nullptr);
((Predictor*)handle)->predict(input, channel, height, width, output, output_length, batch_size);
} }
} // namespace paddle
int main(int argc, char** argv) { API_REFERENCE void destory_predictor(void *handle) {
paddle::test_naive(1 << 0); if (handle) {
return 0; delete handle;
handle = nullptr;
}
} }
#ifdef _WIN32
#ifdef inference_icnet_EXPORTS
#define API_REFERENCE extern "C" __declspec(dllexport)
#else
#define API_REFERENCE extern "C" __declspec(dllimport)
#endif
#else
#define API_REFERENCE
#endif
//API_REFERENCE void * init_predictor();
//API_REFERENCE void destory_predictor(void *handle);
//API_REFERENCE void predict(void *handle, int n);
API_REFERENCE void * init_predictor(const char* prog_file,
const char* param_file, const float fraction_of_gpu_memory,
const bool use_gpu, const int device);
API_REFERENCE void predict(void* handle, float* input, const int channel, const int height,
const int width, int64_t** output, int* output_length, int batch_size);
API_REFERENCE void destory_predictor(void *handle);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace paddle {
// DEFINE_string(dirname, "./lb",
// "Directory of the inference model.");
NativeConfig GetConfig() {
NativeConfig config;
// config.model_dir = FLAGS_dirname;
config.prog_file= "lb/__model__";
config.param_file= "lb/__params__";
config.fraction_of_gpu_memory = 0.8;
config.use_gpu = true;
config.device = 0;
return config;
}
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0;
}
void test_naive(int batch_size){
NativeConfig config = GetConfig();
auto predictor = CreatePaddlePredictor<NativeConfig>(config);
int height = 449;
int width = 581;
// =============read file list =============
std::ifstream infile("new_file.list");
std::string temp_s;
std::vector<std::string> all_files;
while (!infile.eof()) {
infile >> temp_s;
all_files.push_back(temp_s);
}
// size_t file_num = all_files.size();
infile.close();
// =============read file list =============
for (size_t f_k = 0; f_k < 1; f_k ++) {
std::ifstream in_img(all_files[f_k]);
std::cout << all_files[f_k] << std::endl;
float temp_v;
float sum_n = 0.0;
std::vector<float> data;
while (!in_img.eof()) {
in_img >> temp_v;
data.push_back(float(temp_v));
// std::cout << temp_v << " ";
sum_n += temp_v;
}
in_img.close();
std::cout << "sum: " << sum_n << std::endl;
PaddleTensor tensor;
tensor.shape = std::vector<int>({batch_size, 3, height, width});
tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width);
std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
tensor.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
PaddleTensor tensor_out;
std::vector<PaddleTensor> outputs(1, tensor_out);
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
std::cout << "start predict123:" << std::endl;
auto time1 = time();
for(size_t i = 0; i < 1; i++) {
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
}
auto time2 = time();
std::ofstream ofresult("naive_test_result.txt", std::ios::app);
std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 1000.0 << "ms" << std::endl;
std::cout << outputs.size() << std::endl;
int64_t * data_o = static_cast<int64_t*>(outputs[0].data.data());
int64_t sum_out = 0;
for (size_t j = 0; j < outputs[0].data.length() / sizeof(int64_t); ++j) {
ofresult << std::to_string(data_o[j]) << " ";
sum_out += data_o[j];
}
std::cout << "sum_out " << sum_out << std::endl;
ofresult << std::endl;
ofresult.close();
}
}
} // namespace paddle
int main(int argc, char** argv) {
// google::ParseCommandLineFlags(&argc, &argv, true);
paddle::test_naive(1<<0);
return 0;
}
#include<windows.h>
#include <fstream>
#include "inference_icnet.h"
#include <thread>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
using namespace std;
template <class Type>
Type stringToNum(const string& str)
{
istringstream iss(str);
Type num;
iss >> num;
return num;
}
void test_imgs() {
void *h = init_predictor("./lb/__model__", "./lb/__params__", 0.3f, true, 0);
std::ifstream infile("new_file.list");
std::ofstream ofs("./1.png.output.txt");
std::string temp_s;
std::vector<std::string> all_files;
while (!infile.eof()) {
infile >> temp_s;
all_files.push_back(temp_s);
}
// size_t file_num = all_files.size();
infile.close();
// =============read file list =============
for (size_t f_k = 0; f_k < 1; f_k++) {
// std::string path = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\";
// std::ifstream in_img(path + all_files[f_k]);
std::string mypath = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\1.png.txt";
std::cout << "file" << mypath << std::endl;
std::ifstream in_img(mypath);
//std::cout << path + all_files[f_k] << std::endl;
double temp_v;
const int size = 3 * 449 * 581 * 1;
float * data = new float[size];
std::string value;
if (!in_img.is_open()) {
cout << "open failed" << endl;
}
double sum_input = .0;
for (auto i = 0; i < size; i++) {
getline(in_img, value, '\n');
double v = stringToNum<double>(value);
data[i] = static_cast<float>(v);
sum_input += v;
}
std::cout << "sum_input" << sum_input << std::endl;
in_img.close();
const int SIZE = 449 * 581 * 1;
int64_t * p = new int64_t[SIZE]();
int out_size = 0;
//memset(p, 0, size);
predict(h, data, 3, 449, 581, &p, &out_size, 1);
std::cout << "out_size = " << out_size << std::endl;
double out_sum = .0;
for (auto i = 0; i < out_size / sizeof(int64_t); i++) {
out_sum += p[i];
ofs << p[i] << " ";
}
ofs.close();
std::cout << "inferece out sum" << out_sum << std::endl;
delete p;
}
destory_predictor(h);
}
int main(int argc, char** argv) {
//if (true) {
// std::thread t1(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// std::thread t2(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// //std::thread t3(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// //std::thread t4(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// t1.join();
// t2.join();
// //t3.join();
// //t4.join();
// //Sleep(1);
//}
test_imgs();
return 0;
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
//#include <gtest/gtest.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include <thread> // NOLINT
#define ASSERT_TRUE(x) x
#define ASSERT_EQ(x, y) assert(x == y)
namespace paddle {
// DEFINE_string(dirname, "./LB_icnet_model",
// "Directory of the inference model.");
NativeConfig GetConfig() {
NativeConfig config;
config.prog_file= "./dzh_lb/__model__";
config.param_file= "./dzh_lb/__params__";
config.fraction_of_gpu_memory = 0.08;
config.use_gpu = true;
config.device = 0;
return config;
}
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0;
}
void test_naive(int batch_size, std::string model_path){
PaddlePredictor* pres[2];
NativeConfig config = GetConfig();
// config.model_dir = model_path;
auto predictor0 = CreatePaddlePredictor<NativeConfig>(config);
auto predictor1 = CreatePaddlePredictor<NativeConfig>(config);
pres[0] = predictor0.get();
pres[1] = predictor1.get();
int height = 449;
int width = 581;
std::vector<float> data;
for (int i = 0; i < 3 * height * width; i++) {
data.push_back(0);
}
PaddleTensor tensor;
tensor.shape = std::vector<int>({batch_size, 3, height, width});
tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width);
std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
tensor.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
constexpr int num_jobs = 5; // each job run 1 batch
std::vector<std::thread> threads;
for (int tid = 0; tid < num_jobs; ++tid) {
threads.emplace_back([&, tid]() {
auto predictor = pres[tid];
std::vector<PaddleTensor> local_outputs;
for(size_t i = 0; i < 1000; i++) {
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &local_outputs));
std::cout << "run: " << tid << std::endl;
}
ASSERT_EQ(local_outputs.size(), 1UL);
});
}
for (int i = 0; i < num_jobs; ++i) {
threads[i].join();
}
}
//TEST(alexnet, naive) {
// test_naive(1 << 0, "./trt_models/vgg19");
//}
} // namespace paddle
int main(int argc, char** argv) {
paddle::test_naive(1 << 0, "");
}
...@@ -141,6 +141,27 @@ class BatchNormKernel<platform::CUDADeviceContext, T> ...@@ -141,6 +141,27 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
bias->template data<BatchNormParamType<T>>(), bias->template data<BatchNormParamType<T>>(),
est_mean->template data<BatchNormParamType<T>>(), est_mean->template data<BatchNormParamType<T>>(),
est_var->template data<BatchNormParamType<T>>(), epsilon)); est_var->template data<BatchNormParamType<T>>(), epsilon));
VLOG(3) << "before tensor copy";
Tensor mean_, var_, x_, y_;
framework::TensorCopy(*est_mean, platform::CPUPlace(), dev_ctx, &mean_);
framework::TensorCopy(*est_var, platform::CPUPlace(), dev_ctx, &var_);
framework::TensorCopy(*x, platform::CPUPlace(), dev_ctx, &x_);
framework::TensorCopy(*y, platform::CPUPlace(), dev_ctx, &y_);
VLOG(3) << "after tensor copy";
auto check_tensor = [&](const Tensor& check) {
float sum = .0;
for(size_t i=0; i < check.numel(); ++i) {
sum += check.data<float>()[i];
}
return sum;
};
VLOG(3) << "BatchNormKernel";
VLOG(3) << "mean" << check_tensor(mean_);
VLOG(3) << "var" << check_tensor(var_);
VLOG(3) << "x" << check_tensor(x_);
VLOG(3) << "y" << check_tensor(y_);
} else { } else {
// Run training mode. // Run training mode.
// obtain running mean and running inv var, and see if we need to // obtain running mean and running inv var, and see if we need to
......
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <fstream> #include <fstream>
#include <vector>
#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
...@@ -34,6 +35,7 @@ class LoadCombineOp : public framework::OperatorBase { ...@@ -34,6 +35,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto load_as_fp16 = Attr<bool>("load_as_fp16"); auto load_as_fp16 = Attr<bool>("load_as_fp16");
std::ifstream fin(filename, std::ios_base::in | std::ios_base::binary); std::ifstream fin(filename, std::ios_base::in | std::ios_base::binary);
//std::ifstream fin(filename, std::ios_base::in);
PADDLE_ENFORCE(!fin.bad(), PADDLE_ENFORCE(!fin.bad(),
"Cannot open file %s for load_combine op", filename); "Cannot open file %s for load_combine op", filename);
...@@ -46,7 +48,7 @@ class LoadCombineOp : public framework::OperatorBase { ...@@ -46,7 +48,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
for (size_t i = 0; i < out_var_names.size(); i++) { for (size_t i = 0; i < out_var_names.size(); i++) {
VLOG(3) << "load " << out_var_names[i]; VLOG(3) << "load variable " << out_var_names[i];
auto *out_var = scope.FindVar(out_var_names[i]); auto *out_var = scope.FindVar(out_var_names[i]);
PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found",
...@@ -61,6 +63,13 @@ class LoadCombineOp : public framework::OperatorBase { ...@@ -61,6 +63,13 @@ class LoadCombineOp : public framework::OperatorBase {
// Get data from fin to tensor // Get data from fin to tensor
DeserializeFromStream(fin, tensor, dev_ctx); DeserializeFromStream(fin, tensor, dev_ctx);
VLOG(3) << "after deserialization"; VLOG(3) << "after deserialization";
framework::Tensor check;
framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
float sum = .0;
for(size_t i=0; i < check.numel(); ++i) {
sum += check.data<float>()[i];
}
VLOG(3) << "sum result" << sum;
auto in_dtype = framework::ToDataType(tensor->type()); auto in_dtype = framework::ToDataType(tensor->type());
auto out_dtype = auto out_dtype =
load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
...@@ -80,6 +89,7 @@ class LoadCombineOp : public framework::OperatorBase { ...@@ -80,6 +89,7 @@ class LoadCombineOp : public framework::OperatorBase {
tensor = out_var->GetMutable<framework::LoDTensor>(); tensor = out_var->GetMutable<framework::LoDTensor>();
tensor->set_lod(fp16_tensor.lod()); tensor->set_lod(fp16_tensor.lod());
tensor->ShareDataWith(fp16_tensor); tensor->ShareDataWith(fp16_tensor);
} }
VLOG(3) << "load " << out_var_names[i] << " finished"; VLOG(3) << "load " << out_var_names[i] << " finished";
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册