提交 b1b3c9d9 编写于 作者: 风吴痕's avatar 风吴痕

update

上级 37d936fc
# CMakeLists.txt
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(LibTorchDemo)
# compile options
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_STANDARD 14)
# package
find_package(OpenCV REQUIRED)
find_package(Torch REQUIRED PATHS "/usr/local/lib/libtorch") # 将libtorch放在 /usr/local/lib
add_executable(digit digit.cpp)
# libtorch
target_link_libraries(digit ${TORCH_LIBRARIES})
target_link_libraries(digit ${OpenCV_LIBS})
\ No newline at end of file
import torch
from digit import Digit
model = Digit()
model.load_state_dict(torch.load("model/digit.pth", map_location="cpu"))
sample = torch.randn(1, 1, 8, 8)
trace_model = torch.jit.trace(model, sample)
trace_model.save("model/digit.jit")
\ No newline at end of file
#include "iostream"
#include "opencv2/opencv.hpp"
#include "torch/script.h"
#include "fstream"
void checkPath(const char* path) {
std::ifstream in;
in.open(path);
bool flag = (bool)in;
in.close();
if (flag) return;
else {
std::cout << "file " << path << " doesn't exist!" << std::endl;
exit(-1);
}
}
int main(int argc, char const *argv[])
{
if (argc != 3) {
std::cout << "usage : digit <model path> <image path>" << std::endl;
return -1;
}
checkPath(argv[1]);
checkPath(argv[2]);
cv::Mat img = cv::imread(argv[2]), gimg, fimg, rimg;
cv::cvtColor(img, gimg, CV_BGR2GRAY);
gimg.convertTo(fimg, CV_32F, - 1. / 255., 1.);
cv::resize(fimg, rimg, {8, 8});
// convert Mat to tensor
at::Tensor img_tensor = torch::from_blob(
rimg.data,
{1, 1, 8, 8},
torch::kFloat32
);
// load model
torch::jit::Module model = torch::jit::load(argv[1]);
// torch.no_grad()
torch::NoGradGuard no_grad; // 请一定加入torch::NoGradGuard no_grad; 这句话,否则内存会炸。
// forward
torch::Tensor out = model({img_tensor}).toTensor();
int pre_lab = torch::argmax(out, 1).item().toInt();
std::cout << "predict number is " << pre_lab << std::endl;
return 0;
}
\ No newline at end of file
from sklearn.datasets import load_digits
import torch
from torch import nn
import torch.utils.data as Data
import numpy as np
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import os
class Digit(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 16, 3, 1, 1),
nn.Tanh(),
nn.Conv2d(16, 32, 3, 2, 1),
nn.Tanh(),
nn.Conv2d(32, 16, 3, 2, 1),
nn.Tanh(),
nn.Conv2d(16, 8, 3, 1, 1)
)
self.output = nn.Linear(32, 10)
def forward(self, x):
out = self.conv(x)
out = self.output(out.flatten(1))
return out
RATIO = 0.8
BATCH_SIZE = 128
EPOCH = 10
if __name__ == "__main__":
X, y = load_digits(return_X_y=True)
X = X / 16.
sample_num = len(y)
X = [x.reshape(1, 8, 8).tolist() for x in X]
indice = np.arange(sample_num)
np.random.shuffle(indice)
X = torch.FloatTensor(X)
y = torch.LongTensor(y)
offline = int(sample_num * RATIO)
train = Data.TensorDataset(X[indice[:offline]], y[indice[:offline]])
test = Data.TensorDataset(X[indice[offline:]], y[indice[offline:]])
train_loader = Data.DataLoader(train, BATCH_SIZE, True)
test_loader = Data.DataLoader(test, BATCH_SIZE, False)
model = Digit()
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss(reduction="mean")
test_losses = []
test_accs = []
for epoch in range(EPOCH):
model.train()
for bx, by in train_loader:
out = model(bx)
loss = criterion(out, by)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.eval()
correct = 0
total = 0
test_loss = []
test_acc = []
for bx, by in test_loader:
with torch.no_grad():
out = model(bx)
pre_lab = out.argmax(1)
loss = criterion(out, by)
test_loss.append(loss.item())
test_acc.append(accuracy_score(pre_lab, by))
test_losses.append(np.mean(test_loss))
test_accs.append(np.mean(test_acc))
plt.figure(dpi=120)
plt.plot(test_losses, 'o-', label="loss")
plt.plot(test_accs, 'o-', label="accuracy")
plt.legend()
plt.grid()
plt.show()
if not os.path.exists("model"):
os.makedirs("model")
torch.save(model.state_dict(), "model/digit.pth")
\ No newline at end of file
#include "iostream"
#include "opencv2/opencv.hpp"
#include "torch/script.h"
int main(int argc, char const *argv[])
{
std::cout << "hello world!" << std::endl;
return 0;
}
\ No newline at end of file
# 使用
```py
# 测试
1 digit_test.cpp 改成 digit.cpp
2mkdir build && cd build
3camke .. && make -j4
4./digit
# libtorch加载模型
1python digit.py # 训练一个原生pytorch模型
2python convert2jit.py # pytorch模型 转成 jit模型
3python test_jit.py # 测试 jit模型
4mkdir build && cd build
5camke .. && make -j4
6./digit model/digit.jit image/sample.png
```
import time
import torch
import cv2 as cv
from digit import Digit
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
def run_model(model, image):
s = time.time()
out = model(image)
pre_lab = torch.argmax(out, dim=1)
cost_time = round(time.time() - s, 5)
return cost_time
image = cv.imread("image/sample.png")
image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
image = 1 - image / 255.
image = cv.resize(image, (8, 8))
image = torch.FloatTensor(image).unsqueeze(0).unsqueeze(0).contiguous()
origin_model = Digit()
origin_model.load_state_dict(torch.load("model/digit.pth"))
jit_model = torch.jit.load("model/digit.jit")
# init jit
for _ in range(3):
run_model(origin_model, image)
run_model(jit_model, image)
test_times = 10
# begin testing
results = pd.DataFrame({
"type" : ["orgin"] * test_times + ["jit"] * test_times,
"cost_time" : [run_model(origin_model, image) for _ in range(test_times)] + [run_model(jit_model, image) for _ in range(test_times)]
})
plt.figure(dpi=120)
sns.boxplot(
x=results["type"],
y=results["cost_time"]
)
plt.show()
\ No newline at end of file
- https://pytorch.org/tutorials/advanced/cpp_export.html
- [LibTorch的安装、配置与使用](https://blog.csdn.net/weixin_45632168/article/details/114679263)
- [libtorch c++调用 (五)Linux下的调用](https://blog.csdn.net/juluwangriyue/article/details/108463026)
- https://pytorch.org/cppdocs/
- [libtorch教程](https://www.zhihu.com/column/c_1373368181138972672)
- [VS2019 配置 LibTorch 和 OpenCV](https://zhuanlan.zhihu.com/p/375084412)
[toc]
# 环境
```python
Ubuntu 9.4.0-1ubuntu1~20.04.1
python3.7
pytorch-1.11.0
cuda-11.0
libtorch-1.12-cuda113
gcc version 9.4.0
cmake version 3.22.5
GNU Make 4.2.1
# ---------------------------------------
centos7
python3.7
cuda-10.2
cudnn-10.2-linux-x64-v8.1.0.77
torch-1.11.0+cu102-cp37-cp37m-linux_x86_64
torchvision-0.12.0+cu102-cp37-cp37m-linux_x86_64
libtorch-shared-with-deps-1.12.1+cu102
onnxruntime-linux-x64-1.12.1
cmake version 3.14.5
GNU Make 3.82
gcc version 8.3.1 20190311
```
# 下载 libtorch
- https://pytorch.org/
```python
# cuda113-linux (需要安装 cuda-11.3 以及对应版本的 cudnn-8.0)
https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.12.1%2Bcu113.zip
# cpu-linux
https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-1.12.1%2Bcpu.zip
```
# 将Pytorch模型转化为Torch Script
```python
import torch
import torchvision
# An instance of your model.
model = torchvision.models.resnet18()
# An example input you would normally provide to your model's forward() method.
example = torch.rand(1, 3, 224, 224)
# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save("traced_resnet_model.pt")
```
# 在C++中加载Model
```c
#include<iostream>
#include<torch/script.h>
#include <torch/torch.h> // cuda相关函数头文件
#include<memory>
int main(int argc, const char* argv[]) {
if (argc != 2) {
std::cerr << "usage: example-app <path-to-exported-script-module>\n";
return -1;
}
torch::DeviceType device_type = at::kCPU; // 定义设备类型
if (torch::cuda::is_available())
device_type = at::kCUDA;
torch::jit::script::Module model;
try {
// Deserialize the ScriptModule from a file using torch::jit::load().
model = torch::jit::load(argv[1]);
}
catch (const c10::Error& e) {
std::cerr << "error loading the model\n"; return -1;
}
std::cout << "ok\n";
// Create a vector of inputs.
// std::vector<torch::jit::IValue> inputs;
// inputs.push_back(torch::ones({1, 3, 224, 224}));
model.to(device_type);
std::vector<torch::jit::IValue> inputs;
inputs.push_back(torch::ones({ 1, 3, 224, 224 }).to(device_type));
// Execute the model and turn its output into a tensor.
at::Tensor output = model.forward(inputs).toTensor();
std::cout << output.slice(/*dim=*/1, /*start=*/0, /*end=*/5) << '\n';
}
```
## 结合opencv
- https://blog.csdn.net/mmmkl1/article/details/118522533
- https://github.com/qubvel/segmentation_models.pytorch
```c++
#include "DemoPytorch.h"
#include <iostream>
#include <memory>
#include <algorithm>
#include <stdio.h>
#include <opencv2/core.hpp>
#include <opencv2/opencv.hpp>
#include <torch/torch.h>
#include <torch/script.h>
int main() {
// load model
torch::jit::script::Module module;
try {
module = torch::jit::load("./torch_script_eval.pt");
module.to(torch::kCPU); // set model to cpu mode
/*module.to(torch::kCUDA);*/ // set model to cuda mode
module.eval();
std::cout << "MODEL LOADED";
}
catch (const c10::Error& e) {
std::cerr << "error loading the model\n";
}
// load img
cv::Mat img_original = cv::imread("./00011584_002.png",0);
cv::Mat img = cv::Mat(img_original);
// normalize
cv::resize(img, img, cv::Size(512, 512));
img.convertTo(img, CV_32FC1);
// img to tensor
torch::Tensor mean = torch::tensor({ 0.485,0.456,0.406 });
torch::Tensor std = torch::tensor({ 0.229, 0.224, 0.225 });
auto input_tensor = torch::from_blob(img.data, { 512,512,1 });
input_tensor = input_tensor / 255.0f;
input_tensor = input_tensor - mean;
input_tensor = input_tensor / std;
input_tensor = input_tensor.permute({ 2,0,1 });
input_tensor = input_tensor.to(torch::kCPU);
/*input_tensor = input_tensor.to(torch::kCUDA);*/
input_tensor = input_tensor.unsqueeze(0);
std::vector<torch::jit::IValue> input;
input.push_back(input_tensor);
// pred begin
auto pred = module.forward(input).toTensor();
// pred tensor to mat
pred = pred.squeeze().detach();
pred = pred * 255;
pred = pred.to(torch::kU8);
pred = pred.to(torch::kCPU);
cv::Mat output_mat(cv::Size{ 512,512 }, CV_8UC1, pred.data_ptr());
// show result
cv::imshow("original img", img_original);
cv::imshow("mask", output_mat);
cv::waitKey(0);
cv::destroyWindow("original img");
cv::destroyWindow("mask");
return 0;
}
```
# test
```cpp
//%%file main.cpp
#include <iostream>
#include <torch/torch.h>
using namespace std;
int main()
{
// torch::Tensor tensor = torch::eye(3);
torch::Tensor tensor = torch::eye(3).to(at::kCUDA); // 数据加载至GPU
std::cout << tensor << std::endl;
cout << "Hello World!" << endl;
return 0;
}
// 测试gpu是否可以使用
#include<iostream>
#include<torch/script.h>
#include <torch/torch.h> // cuda相关函数头文件
#include<memory>
int main()
{
std::cout <<"cuda::is_available():" << torch::cuda::is_available() << std::endl;
// system("pause");
return 0;
}
```
```makefile
#%%file CMakeLists.txt
cmake_minimum_required(VERSION 3.5)
project(libtorch_demo LANGUAGES CXX)
# packages
#find_package(CUDA)
# nvcc flags
#set(CUDA_NVCC_FLAGS -gencode arch=compute_20,code=sm_20;-G;-g)
#set(CUDA_NVCC_FLAGS -gencode;arch=compute_60,code=sm_60;-G;-g)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(Torch_DIR /kaggle/working/libtorch/share/cmake/Torch)
find_package(Torch REQUIRED)
add_executable(libtorch_demo main.cpp)
target_link_libraries(libtorch_demo "${TORCH_LIBRARIES}")
set_property(TARGET libtorch_demo PROPERTY CXX_STANDARD 17)
```
```python
mkdir build
cd build
cmake ..
# cmake .. -DCUDNN_INCLUDE_DIR=/usr/include -DCUDNN_LIBRARY=/usr/lib/x86_64-linux-gnu
make
```
```python
cmake_minimum_required(VERSION 3.5)
project(dtp)
#find_package(OpenCV REQUIRED)
#message(STATUS "OpenCV library status:")
#message(STATUS " version: ${OpenCV_VERSION}")
#message(STATUS " libraries: ${OpenCV_LIBS}")
#message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
set(Torch_DIR /kaggle/working/libtorch/share/cmake/Torch)
find_package(Torch REQUIRED)
#include_directories(${OpenCV_INCLUDE_DIRS})
include_directories(${TORCH_INCLUDE_DIRS})
add_executable(dtp main.cpp)
target_link_libraries(dtp
"${TORCH_LIBRARIES}"
# ${OpenCV_LIBS}
)
set_property(TARGET dtp PROPERTY CXX_STANDARD 14)
```
# 几个常见错误
```python
# 1、
CMake Error: CMake was unable to find a build program corresponding to "Unix Makefiles". CMAKE_MAKE_PROGRAot set. You probably need to select a different build tool.
CMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage
# 解决方法:yum install make -y
# 2
-- Could NOT find CUDA (missing: CUDA_TOOLKIT_ROOT_DIR CUDA_NVCC_EXECUTABLE CUDA_INCLUDE_DIRS CUDA_CUDART_LIBRARY)
# 解决方法:需要安装 cuda (也可以将现有的/usr/local/cuda 复制过来)
# 3、
OSError: libcudnn.so.8: cannot open shared object file: No such file or directory
# 下载 cudnn8.0以上 将文件复制到 /usr/local/cuda
# 4、
cannot find -lCUDA_cublas_LIBRARY-NOTFOUND
# 找到所有 libcublas.so 复制到 /usr/local/cuda/lib64
```
- https://onnxruntime.ai/
- https://onnxruntime.ai/docs/tutorials/traditional-ml.html
- https://github.com/microsoft/onnxruntime
- https://github.com/microsoft/onnxruntime-inference-examples
- `pip install onnxruntime`
[toc]
```python
from torchvision.models import resnet18
import torch
model = resnet18()
torch.onnx.export(model, torch.randn(1, 3, 224, 224),
'model.onnx', verbose=True, opset_version=11,
input_names=['input'], # the model's input names
output_names=['output']
)
```
##### 将 ONNX 模型转换为 ORT 格式脚本使用
```python
python -m onnxruntime.tools.convert_onnx_models_to_ort <onnx model file or dir> # 会生成 .ort文件
```
```python
import onnxruntime
import torch
def to_numpy(tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
x = torch.randn(1,3,224,224)
model_path = "model.onnx" # or 'model.ort'
# ort_session = onnxruntime.InferenceSession(model_path) # 默认cpu
ort_session = onnxruntime.InferenceSession(model_path,providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'])
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
ort_outs = ort_session.run(None, ort_inputs)[0]
ort_outs = torch.softmax(torch.from_numpy(ort_outs), -1)
print(ort_outs.argmax(-1))
```
----
# 安装
```python
# 在任何一种环境中,一次只能安装其中一个软件包
pip install onnxruntime
pip install onnxruntime-gpu
# 安装 ONNX 以导出模型
## ONNX is built into PyTorch
pip install torch
## tensorflow
pip install tf2onnx
## sklearn
pip install skl2onnx
```
## PyTorch CV
- 使用导出模型`torch.onnx.export`
```python
torch.onnx.export(model, # model being run
torch.randn(1, 28, 28).to(device), # model input (or a tuple for multiple inputs)
"fashion_mnist_model.onnx", # where to save the model (can be a file or file-like object)
input_names = ['input'], # the model's input names
output_names = ['output']) # the model's output names
# Export the model
torch.onnx.export(model, # model being run
(text, offsets), # model input (or a tuple for multiple inputs)
"ag_news_model.onnx", # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=10, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names = ['input', 'offsets'], # the model's input names
output_names = ['output'], # the model's output names
dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes
'output' : {0 : 'batch_size'}})
```
- 加载 onnx 模型`onnx.load`
```python
import onnx
onnx_model = onnx.load("fashion_mnist_model.onnx")
onnx.checker.check_model(onnx_model)
```
- 使用创建推理会话`ort.InferenceSession`
```python
import onnxruntime as ort
import numpy as np
x, y = test_data[0][0], test_data[0][1]
ort_sess = ort.InferenceSession('fashion_mnist_model.onnx')
outputs = ort_sess.run(None, {'input': x.numpy()})
# Print Result
predicted, actual = classes[outputs[0][0].argmax(0)], classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')
```
## SciKit Learn CV
```python
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
from sklearn.linear_model import LogisticRegression
clr = LogisticRegression()
clr.fit(X_train, y_train)
print(clr)
# LogisticRegression()
```
- 将模型转换或导出为 ONNX 格式
```python
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_sklearn(clr, initial_types=initial_type)
with open("logreg_iris.onnx", "wb") as f:
f.write(onx.SerializeToString())
```
- 使用 ONNX Runtime 加载和运行模型我们将使用 ONNX Runtime 来计算此机器学习模型的预测。
```python
import numpy
import onnxruntime as rt
# sess = rt.InferenceSession("logreg_iris.onnx") # 默认cpu
sess = rt.InferenceSession("logreg_iris.onnx",providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'])
input_name = sess.get_inputs()[0].name
pred_onx = sess.run(None, {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)
```
- 获取预测类
```python
import numpy
import onnxruntime as rt
sess = rt.InferenceSession("logreg_iris.onnx")
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run(
[label_name], {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)
```
## C++ 版本
- 下载安装包 https://github.com/microsoft/onnxruntime/releases
```c++
//main.cpp
//https://blog.csdn.net/baidu_34595620/article/details/112176278
//https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/squeezenet/main.cpp
#include<iostream>
#include <vector>
#include <assert.h>
// #include "onnxruntime_c_api.h"
#include "onnxruntime_cxx_api.h"
// #include "cuda_provider_factory.h"
#include<memory>
#include<ctime> //头文件
clock_t t_start,t_end;
int main(int argc, const char* argv[]) {
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
#ifdef _WIN32
const wchar_t* model_path = L"model.onnx";
#else
const char* model_path = "model.onnx";
#endif
Ort::Session session(env, model_path, session_options);
// print model input layer (node names, types, shape etc.)
Ort::AllocatorWithDefaultOptions allocator;
// print number of model input nodes
size_t num_input_nodes = session.GetInputCount();
std::vector<const char*> input_node_names(num_input_nodes);
std::vector<int64_t> input_node_dims; // simplify... this model has only 1 input node {1, 3, 224, 224}.
// Otherwise need vector<vector<>>
printf("Number of inputs = %zu\n", num_input_nodes);
// iterate over all input nodes
for (int i = 0; i < num_input_nodes; i++) {
// print input node names
char* input_name = session.GetInputName(i, allocator);
printf("Input %d : name=%s\n", i, input_name);
input_node_names[i] = input_name;
// print input node types
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType type = tensor_info.GetElementType();
printf("Input %d : type=%d\n", i, type);
// print input shapes/dims
input_node_dims = tensor_info.GetShape();
printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
for (size_t j = 0; j < input_node_dims.size(); j++)
printf("Input %d : dim %zu=%jd\n", i, j, input_node_dims[j]);
}
size_t input_tensor_size = 224 * 224 * 3; // simplify ... using known dim values to calculate size
// use OrtGetTensorShapeElementCount() to get official size!
std::vector<float> input_tensor_values(input_tensor_size);
std::vector<const char*> output_node_names = {"output"};//{"softmaxout_1"};
// initialize input data with values in [0.0, 1.0]
for (unsigned int i = 0; i < input_tensor_size; i++)
input_tensor_values[i] = (float)i / (input_tensor_size + 1);
// create input tensor object from data values
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
assert(input_tensor.IsTensor());
int nums=100;
t_start=clock(); //程序开始计时
for(int i=0;i<nums;++i){
// score model & input tensor, get back output tensor
auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
// assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
}
t_end=clock(); //程序结束用时
double endtime=(double)(t_end-t_start)/CLOCKS_PER_SEC;//计算
std::cout<<"Total time:"<<endtime/nums<<"s"<<std::endl; //s为单位
/*
// Get pointer to output tensor float values
float* floatarr = output_tensors.front().GetTensorMutableData<float>();
// assert(abs(floatarr[0] - 0.000045) < 1e-6);
// score the model, and print scores for first 5 classes
for (int i = 0; i < 5; i++)
printf("Score for class [%d] = %f\n", i, floatarr[i]);
*/
// Results should be as below...
// Score for class[0] = 0.000045
// Score for class[1] = 0.003846
// Score for class[2] = 0.000125
// Score for class[3] = 0.001180
// Score for class[4] = 0.001317
// release buffers allocated by ORT alloctor
for(const char* node_name : input_node_names)
allocator.Free(const_cast<void*>(reinterpret_cast<const void*>(node_name)));
printf("Done!\n");
}
```
```makefile
# CMakeLists.txt
project(capi_test)
set(CMAKE_BUILD_TYPE Debug)
cmake_minimum_required(VERSION 3.13)
#option(ONNXRUNTIME_ROOTDIR "onnxruntime root dir")
# tensorrt_provider_factory.h contains old APIs of the tensorrt execution provider
#include(CheckIncludeFileCXX)
#CHECK_INCLUDE_FILE_CXX(tensorrt_provider_factory.h HAVE_TENSORRT_PROVIDER_FACTORY_H)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
#include_directories(
# ${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session/
# ${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/providers/tensorrt/
#)
include_directories("/opt/onnxruntime/include")
link_directories("/opt/onnxruntime/lib")
ADD_EXECUTABLE(capi_test main.cpp)
if(HAVE_TENSORRT_PROVIDER_FACTORY_H)
target_compile_definitions(capi_test PRIVATE -DHAVE_TENSORRT_PROVIDER_FACTORY_H)
endif()
target_link_libraries(capi_test onnxruntime)
```
## [使用 PyTorch 进行 ORT 训练](https://onnxruntime.ai/docs/get-started/training-pytorch.html)
```python
pip install torch-ort
python -m torch_ort.configure
from torch_ort import ORTModule
.
.
.
model = ORTModule(model)
```
- [NCNN、OpenVino、 TensorRT、MediaPipe、ONNX,各种推理部署架构,到底哪家强?](https://www.bilibili.com/read/cv13656068)
[toc]
# 1、纯python
- 0.02707[cpu], 0.00655[gpu]
```python
from torchvision.models import resnet18
import torch
import time
device = "cpu" # 0.02707 0.00655
nums = 100
model = resnet18().to(device)
inputs = torch.randn(nums, 3, 224, 224).to(device)
start = time.perf_counter()
for i in range(nums):
preds = model(inputs[[i]])
end = time.perf_counter()
print(f"mean_time:{((end - start) / nums):.5f}")
torch.onnx.export(model, torch.randn(1, 3, 224, 224),
'model.onnx', verbose=True, opset_version=11,
input_names=['input'], # the model's input names
output_names=['output']
)
example = torch.rand(1, 3, 224, 224)
# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save("traced_resnet_model.pt")
# python -m onnxruntime.tools.convert_onnx_models_to_ort model.onnx # 会生成 model.ort
```
# 2、libtorch
- 0.1934[cpu],0.0077[gpu]
```c++
#include<iostream>
#include<torch/script.h>
#include <torch/torch.h> // cuda相关函数头文件
#include<memory>
#include<ctime> //头文件
clock_t t_start,t_end;
int main(int argc, const char* argv[]) {
if (argc != 2) {
std::cerr << "usage: example-app <path-to-exported-script-module>\n";
return -1;
}
torch::DeviceType device_type = at::kCPU; // 定义设备类型
if (torch::cuda::is_available())
device_type = at::kCUDA;
torch::jit::script::Module model;
try {
// Deserialize the ScriptModule from a file using torch::jit::load().
model = torch::jit::load(argv[1]);
}
catch (const c10::Error& e) {
std::cerr << "error loading the model\n"; return -1;
}
std::cout << "ok\n";
// Create a vector of inputs.
// std::vector<torch::jit::IValue> inputs;
// inputs.push_back(torch::ones({1, 3, 224, 224}));
int nums = 100;
model.to(device_type);
// std::vector<torch::jit::IValue> inputs;
// inputs.push_back(torch::ones({ 1, 3, 224, 224 }).to(device_type));
std::vector<std::vector<torch::jit::IValue>> inputs;
std::vector<torch::jit::IValue> inputs2;
for(int i=0;i<nums;++i){
inputs2.push_back(torch::randn({ 1, 3, 224, 224 }).to(device_type));
inputs.push_back(inputs2);
inputs2.clear();
}
// Execute the model and turn its output into a tensor.
at::Tensor output;
t_start=clock(); //程序开始计时
for(int i=0;i<nums;++i){
output = model.forward(inputs[i]).toTensor();
}
t_end=clock(); //程序结束用时
double endtime=(double)(t_end-t_start)/CLOCKS_PER_SEC;//计算
std::cout<<"Total time:"<<endtime/nums<<"s"<<std::endl; //s为单位
// at::Tensor output = model.forward(inputs).toTensor();
std::cout << output.slice(/*dim=*/1, /*start=*/0, /*end=*/5) << '\n';
}
```
# 3、onnxruntime
- 0.01482[cpu py],0.0679[cpu c++]
```python
import onnxruntime
import numpy as np
import time
nums = 100
model_path = "model.onnx" # or 'model.ort'
ort_session = onnxruntime.InferenceSession(
model_path,
providers=['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider'])
inputs = np.random.randn(nums, 3, 224, 224).astype(np.float32)
start = time.perf_counter()
for i in range(nums):
ort_inputs = {ort_session.get_inputs()[0].name: inputs[[i]]}
ort_outs = ort_session.run(None, ort_inputs)[0]
end = time.perf_counter()
print(f"mean_time:{((end - start) / nums):.5f}")
# ort_outs = torch.softmax(torch.from_numpy(ort_outs), -1)
# print(ort_outs.argmax(-1))
```
此差异已折叠。
- https://www.cvmart.net/community/detail/7040
- https://www.cvmart.net/community/detail/5609
- https://github.com/pytorch/TensorRT
# 1、训练模型 python训练 (略过)
# 2、模型推理部署
......
- https://pytorch.org/cppdocs/
- https://pytorch.org/get-started/locally/ # 下载 libtorch库
- [LibTorch的安装与基本使用](https://zhuanlan.zhihu.com/p/513571175)
- https://docs.openvino.ai/2023.3/openvino_docs_install_guides_installing_openvino_apt.html # openvino c++安装
- https://github.com/openvinotoolkit/openvino
- https://github.com/openvinotoolkit/openvino_notebooks
- https://github.com/microsoft/onnxruntime
- https://github.com/microsoft/onnxruntime-inference-examples
- https://github.com/pytorch/TensorRT
- https://github.com/NVIDIA/TensorRT
- https://github.com/onnx/onnx-tensorrt
- https://github.com/wang-xinyu/tensorrtx
# 1、安装 opencv(c++)
- [Linux安装Opencv(C++)](https://blog.csdn.net/weixin_44384491/article/details/121142093)
# 2、安装libtorch
下载的libtorch的版本最好和你的pytorch的版本一致。Linux下各个libtorch的release版本的下载链接可以在下面这篇文章中找到:
## 2.2.1-cu118
>Download here (Pre-cxx11 ABI):
>https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.2.1%2Bcu118.zip
>Download here (cxx11 ABI):
>https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.2.1%2Bcu118.zip
## 1.11.0-cu115
>https://download.pytorch.org/libtorch/cu115
## 2.2.1-cpu
>Download here (Pre-cxx11 ABI):
>https://download.pytorch.org/libtorch/cpu/libtorch-shared-with-deps-2.2.1%2Bcpu.zip
>Download here (cxx11 ABI):
>https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.2.1%2Bcpu.zip
下载完成后,随便丢到一个地方去解压,完成。比如我是习惯性放在/usr/local/lib下的。我也建议linux小白将libtorch放在 /usr/local/lib下,并保证libtorch文件夹下存在include这个文件夹。
## 配置CMakeLists.txt
```py
## 目录结构
xxxx
- CMakeLists.txt
- digit.cpp
- digit.py
```
```cmake
# CMakeLists.txt
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(LibTorchDemo)
# compile options
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_STANDARD 14)
# package
find_package(OpenCV REQUIRED)
find_package(Torch REQUIRED PATHS "/usr/local/lib/libtorch")
add_executable(digit digit.cpp)
# libtorch
target_link_libraries(digit ${TORCH_LIBRARIES})
target_link_libraries(digit ${OpenCV_LIBS})
```
> "/usr/local/lib/libtorch" :Torch的package路径根据你的安装目录指定
然后写个文件include一下库,文件名为digit.cpp:
```c++
#include "iostream"
#include "opencv2/opencv.hpp"
#include "torch/script.h"
int main(int argc, char const *argv[])
{
std::cout << "hello world!" << std::endl;
return 0;
}
```
编译一把:
```bash
$mkdir build
$cd build
$cmake .. && make -j8 install
```
> 如果出现错误,基本都是找不到头文件或者静态库,如果找不到头文件,在CMakeLists.txt中include_directories()中添加能够搜索到你在cpp中写的相对路径的根目录路径。如果静态库找不到,请检查安装包是否损坏,或者静态库目录是否在gcc的搜索路径中。
## 第一步:先用PyTorch训练一个网络
既然我们需要将PyTorch模型使用C++部署,那么首先肯定需要一个Torch的模型。我们先使用PyTorch简单训练一个手写数字识别,相信看这篇文章的靓仔都是torch老手了,我直接上代码:
> 如果你已经有一个模型文件了,请直接跳转到第二步
```python
from sklearn.datasets import load_digits
import torch
from torch import nn
import torch.utils.data as Data
import numpy as np
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import os
class Digit(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 16, 3, 1, 1),
nn.Tanh(),
nn.Conv2d(16, 32, 3, 2, 1),
nn.Tanh(),
nn.Conv2d(32, 16, 3, 2, 1),
nn.Tanh(),
nn.Conv2d(16, 8, 3, 1, 1)
)
self.output = nn.Linear(32, 10)
def forward(self, x):
out = self.conv(x)
out = self.output(out.flatten(1))
return out
RATIO = 0.8
BATCH_SIZE = 128
EPOCH = 10
if __name__ == "__main__":
X, y = load_digits(return_X_y=True)
X = X / 16.
sample_num = len(y)
X = [x.reshape(1, 8, 8).tolist() for x in X]
indice = np.arange(sample_num)
np.random.shuffle(indice)
X = torch.FloatTensor(X)
y = torch.LongTensor(y)
offline = int(sample_num * RATIO)
train = Data.TensorDataset(X[indice[:offline]], y[indice[:offline]])
test = Data.TensorDataset(X[indice[offline:]], y[indice[offline:]])
train_loader = Data.DataLoader(train, BATCH_SIZE, True)
test_loader = Data.DataLoader(test, BATCH_SIZE, False)
model = Digit()
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss(reduction="mean")
test_losses = []
test_accs = []
for epoch in range(EPOCH):
model.train()
for bx, by in train_loader:
out = model(bx)
loss = criterion(out, by)
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.eval()
correct = 0
total = 0
test_loss = []
test_acc = []
for bx, by in test_loader:
with torch.no_grad():
out = model(bx)
pre_lab = out.argmax(1)
loss = criterion(out, by)
test_loss.append(loss.item())
test_acc.append(accuracy_score(pre_lab, by))
test_losses.append(np.mean(test_loss))
test_accs.append(np.mean(test_acc))
plt.figure(dpi=120)
plt.plot(test_losses, 'o-', label="loss")
plt.plot(test_accs, 'o-', label="accuracy")
plt.legend()
plt.grid()
plt.show()
if not os.path.exists("model"):
os.makedirs("model")
torch.save(model.state_dict(), "model/digit.pth")
```
## 第二步:使用tracing将模型文件转化成TorchScript
PyTorch导出的模型文件是不能直接被libtorch读取的,因为PyTorch默认导出的后端的序列化是joblib。PyTorch通过JIT搭建了Python和C++的桥梁,我们可以将模型转成TorchScript Module,将Python运行时的部分运行时包裹进去。
转换方法非常简单:
```py
import torch
from digit import Digit
model = Digit()
model.load_state_dict(torch.load("model/digit.pth", map_location="cpu"))
sample = torch.randn(1, 1, 8, 8)
trace_model = torch.jit.trace(model, sample)
trace_model.save("model/digit.jit")
```
运行下述测试代码,由于Python本身的特性和JIT的即时编译的特性,模型在同一进程生命周期内运行时前几次会比较慢,所以在测试前,需要空跑几次:
## 第三步:使用libtorch重写推理程序
由于TorchScript可以被C++直接调用,所以我们只需要使用libtorch重写推理代码,并将模型读入就完成了。
libtorch的语法和PyTorch基本一致,学起来很快,于此锦恢就不再赘述了。相应的,在C++中,我们用cv::Mat来取代Python中的numpy.ndarray对象,如何将cv::Mat转成libtorch可以读入的数据结构也会在demo中涉及。
下面的例子会完成一个C++命令行程序,它的第一个参数为模型,第二个参数为需要读入的手写数字图像的路径,预测结果会打印到控制台上。期待已久的C++代码如下:
```c
#include "iostream"
#include "opencv2/opencv.hpp"
#include "torch/script.h"
#include "fstream"
void checkPath(const char* path) {
std::ifstream in;
in.open(path);
bool flag = (bool)in;
in.close();
if (flag) return;
else {
std::cout << "file " << path << " doesn't exist!" << std::endl;
exit(-1);
}
}
int main(int argc, char const *argv[])
{
if (argc != 3) {
std::cout << "usage : digit <model path> <image path>" << std::endl;
return -1;
}
checkPath(argv[1]);
checkPath(argv[2]);
cv::Mat img = cv::imread(argv[2]), gimg, fimg, rimg;
cv::cvtColor(img, gimg, CV_BGR2GRAY);
gimg.convertTo(fimg, CV_32F, - 1. / 255., 1.);
cv::resize(fimg, rimg, {8, 8});
// convert Mat to tensor
at::Tensor img_tensor = torch::from_blob(
rimg.data,
{1, 1, 8, 8},
torch::kFloat32
);
// load model
torch::jit::Module model = torch::jit::load(argv[1]);
// torch.no_grad()
torch::NoGradGuard no_grad; // 请一定加入torch::NoGradGuard no_grad; 这句话,否则内存会炸。
// forward
torch::Tensor out = model({img_tensor}).toTensor();
int pre_lab = torch::argmax(out, 1).item().toInt();
std::cout << "predict number is " << pre_lab << std::endl;
return 0;
}
```
# 实践
```py
# 安装环境依赖
sudo apt-get update -y
sudo apt-get install cmake -y
sudo apt-get install build-essential libgtk2.0-dev libavcodec-dev libavformat-dev libjpeg-dev libswscale-dev libtiff5-dev -y
sudo apt-get install libgtk2.0-dev -y
sudo apt-get install pkg-config -y
# 安装opencv
!wget https://github.com/opencv/opencv/archive/4.9.0.zip
cd opencv-4.9.0
mkdir build
cd build
cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local ..
make -j8
make install
# 安装libtorch
!wget https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.1%2Bcu121.zip
unzip libtorch-cxx11-abi-shared-with-deps-2.2.1%2Bcu121.zip
cp -r libtorch /usr/local/lib
```
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册