未验证 提交 95a89900 编写于 作者: L littletomatodonkey 提交者: GitHub

fix py infer (#376)

fix python and cpp inference
上级 f921b9bd
......@@ -17,7 +17,6 @@
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
......@@ -31,6 +30,8 @@
#include <include/preprocess_op.h>
using namespace paddle_infer;
namespace PaddleClas {
class Classifier {
......@@ -38,14 +39,13 @@ public:
explicit Classifier(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const bool &use_zero_copy_run,
const int &resize_short_size, const int &crop_size) {
const bool &use_mkldnn, const int &resize_short_size,
const int &crop_size) {
this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem;
this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
this->use_mkldnn_ = use_mkldnn;
this->use_zero_copy_run_ = use_zero_copy_run;
this->resize_short_size_ = resize_short_size;
this->crop_size_ = crop_size;
......@@ -60,14 +60,13 @@ public:
void Run(cv::Mat &img);
private:
std::shared_ptr<PaddlePredictor> predictor_;
std::shared_ptr<Predictor> predictor_;
bool use_gpu_ = false;
int gpu_id_ = 0;
int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false;
bool use_zero_copy_run_ = false;
std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
......
......@@ -25,9 +25,9 @@
namespace PaddleClas {
class Config {
class ClsConfig {
public:
explicit Config(const std::string &config_file) {
explicit ClsConfig(const std::string &config_file) {
config_map_ = LoadConfig(config_file);
this->use_gpu = bool(stoi(config_map_["use_gpu"]));
......@@ -41,8 +41,6 @@ public:
this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"]));
this->use_zero_copy_run = bool(stoi(config_map_["use_zero_copy_run"]));
this->cls_model_dir.assign(config_map_["cls_model_dir"]);
this->resize_short_size = stoi(config_map_["resize_short_size"]);
......@@ -60,8 +58,6 @@ public:
bool use_mkldnn = false;
bool use_zero_copy_run = false;
std::string cls_model_dir;
int resize_short_size = 256;
......
......@@ -28,7 +28,6 @@
#include <numeric>
using namespace std;
using namespace paddle;
namespace PaddleClas {
......
......@@ -17,7 +17,7 @@
namespace PaddleClas {
void Classifier::LoadModel(const std::string &model_dir) {
AnalysisConfig config;
paddle_infer::Config config;
config.SetModel(model_dir + "/model", model_dir + "/params");
if (this->use_gpu_) {
......@@ -32,9 +32,7 @@ void Classifier::LoadModel(const std::string &model_dir) {
config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
}
// false for zero copy tensor
// true for commom tensor
config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
config.SwitchUseFeedFetchOps(false);
// true for multiple input
config.SwitchSpecifyInputNames(true);
......@@ -43,7 +41,7 @@ void Classifier::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim();
config.DisableGlogInfo();
this->predictor_ = CreatePaddlePredictor(config);
this->predictor_ = CreatePredictor(config);
}
void Classifier::Run(cv::Mat &img) {
......@@ -60,32 +58,21 @@ void Classifier::Run(cv::Mat &img) {
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
this->permute_op_.Run(&resize_img, input.data());
// Inference.
if (this->use_zero_copy_run_) {
auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputTensor(input_names[0]);
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
input_t->copy_from_cpu(input.data());
this->predictor_->ZeroCopyRun();
} else {
paddle::PaddleTensor input_t;
input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
input_t.data =
paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
input_t.dtype = PaddleDType::FLOAT32;
std::vector<paddle::PaddleTensor> outputs;
this->predictor_->Run({input_t}, &outputs, 1);
}
auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputHandle(input_names[0]);
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
input_t->CopyFromCpu(input.data());
this->predictor_->Run();
std::vector<float> out_data;
auto output_names = this->predictor_->GetOutputNames();
auto output_t = this->predictor_->GetOutputTensor(output_names[0]);
auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_t->CopyToCpu(out_data.data());
int maxPosition =
max_element(out_data.begin(), out_data.end()) - out_data.begin();
......
......@@ -12,12 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <include/config.h>
#include <include/cls_config.h>
namespace PaddleClas {
std::vector<std::string> Config::split(const std::string &str,
const std::string &delim) {
std::vector<std::string> ClsConfig::split(const std::string &str,
const std::string &delim) {
std::vector<std::string> res;
if ("" == str)
return res;
......@@ -38,7 +38,7 @@ std::vector<std::string> Config::split(const std::string &str,
}
std::map<std::string, std::string>
Config::LoadConfig(const std::string &config_path) {
ClsConfig::LoadConfig(const std::string &config_path) {
auto config = Utility::ReadDict(config_path);
std::map<std::string, std::string> dict;
......@@ -53,7 +53,7 @@ Config::LoadConfig(const std::string &config_path) {
return dict;
}
void Config::PrintConfigInfo() {
void ClsConfig::PrintConfigInfo() {
std::cout << "=======Paddle Class inference config======" << std::endl;
for (auto iter = config_map_.begin(); iter != config_map_.end(); iter++) {
std::cout << iter->first << " : " << iter->second << std::endl;
......
......@@ -27,7 +27,7 @@
#include <numeric>
#include <include/cls.h>
#include <include/config.h>
#include <include/cls_config.h>
using namespace std;
using namespace cv;
......@@ -40,7 +40,7 @@ int main(int argc, char **argv) {
exit(1);
}
Config config(argv[1]);
ClsConfig config(argv[1]);
config.PrintConfigInfo();
......@@ -61,8 +61,8 @@ int main(int argc, char **argv) {
Classifier classifier(config.cls_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads,
config.use_mkldnn, config.use_zero_copy_run,
config.resize_short_size, config.crop_size);
config.use_mkldnn, config.resize_short_size,
config.crop_size);
double elapsed_time = 0.0;
int warmup_iter = img_files_list.size() > 5 ? 5 : 0;
......@@ -81,10 +81,13 @@ int main(int argc, char **argv) {
std::chrono::microseconds::period::den;
if (idx >= warmup_iter) {
elapsed_time += curr_time;
std::cout << "Current image path: " << img_path << std::endl;
std::cout << "Current time cost: " << curr_time << " s, "
<< "average time cost in all: "
<< elapsed_time / (idx + 1 - warmup_iter) << " s." << std::endl;
} else {
std::cout << "Current time cost: " << curr_time << " s." << std::endl;
}
std::cout << "Current time cost: " << curr_time << " s, "
<< "average time cost in all: "
<< elapsed_time / (idx + 1 - warmup_iter) << " s." << std::endl;
}
return 0;
......
......@@ -4,7 +4,6 @@ gpu_id 0
gpu_mem 4000
cpu_math_library_num_threads 10
use_mkldnn 1
use_zero_copy_run 1
# cls config
cls_model_dir ./inference/
......
......@@ -17,8 +17,8 @@ import utils
import numpy as np
import time
from paddle.fluid.core import AnalysisConfig
from paddle.fluid.core import create_paddle_predictor
from paddle.inference import Config
from paddle.inference import create_predictor
def parse_args():
......@@ -41,8 +41,8 @@ def parse_args():
return parser.parse_args()
def create_predictor(args):
config = AnalysisConfig(args.model_file, args.params_file)
def create_paddle_predictor(args):
config = Config(args.model_file, args.params_file)
if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0)
......@@ -53,14 +53,14 @@ def create_predictor(args):
config.switch_ir_optim(args.ir_optim) # default true
if args.use_tensorrt:
config.enable_tensorrt_engine(
precision_mode=AnalysisConfig.Precision.Half
if args.use_fp16 else AnalysisConfig.Precision.Float32,
precision_mode=Config.PrecisionType.Half
if args.use_fp16 else Config.PrecisionType.Float32,
max_batch_size=args.batch_size)
config.enable_memory_optim()
# use zero copy
config.switch_use_feed_fetch_ops(False)
predictor = create_paddle_predictor(config)
predictor = create_predictor(config)
return predictor
......@@ -103,13 +103,13 @@ def main():
assert args.use_tensorrt is True
operators = create_operators()
predictor = create_predictor(args)
predictor = create_paddle_predictor(args)
input_names = predictor.get_input_names()
input_tensor = predictor.get_input_tensor(input_names[0])
input_tensor = predictor.get_input_handle(input_names[0])
output_names = predictor.get_output_names()
output_tensor = predictor.get_output_tensor(output_names[0])
output_tensor = predictor.get_output_handle(output_names[0])
test_num = 500
test_time = 0.0
......@@ -120,7 +120,7 @@ def main():
args.batch_size, axis=0).copy()
input_tensor.copy_from_cpu(inputs)
predictor.zero_copy_run()
predictor.run()
output = output_tensor.copy_to_cpu()
output = output.flatten()
......@@ -136,7 +136,7 @@ def main():
start_time = time.time()
input_tensor.copy_from_cpu(inputs)
predictor.zero_copy_run()
predictor.run()
output = output_tensor.copy_to_cpu()
output = output.flatten()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册