Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
2eb3a7a9
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2eb3a7a9
编写于
2月 07, 2020
作者:
Z
Zhaolong Xing
提交者:
GitHub
2月 07, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Cherry-pick] [Fix BUG]: Core when multi thread + clone + paddle-tr #22442 (#22471)
test=release/1.7
上级
6892deb1
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
15 addition
and
3 deletion
+15
-3
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+2
-1
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+1
-1
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+2
-1
paddle/fluid/inference/tests/api/trt_quant_int8_test.cc
paddle/fluid/inference/tests/api/trt_quant_int8_test.cc
+10
-0
未找到文件。
paddle/fluid/inference/api/details/zero_copy_tensor.cc
浏览文件 @
2eb3a7a9
...
@@ -138,7 +138,8 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
...
@@ -138,7 +138,8 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
data
),
gpu_place
,
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
data
),
gpu_place
,
t_data
,
ele_num
*
sizeof
(
T
),
dev_ctx
->
stream
());
t_data
,
ele_num
*
sizeof
(
T
),
dev_ctx
->
stream
());
cudaDeviceSynchronize
();
cudaStreamSynchronize
(
dev_ctx
->
stream
());
#else
#else
PADDLE_THROW
(
"Not compile with CUDA, should not reach here."
);
PADDLE_THROW
(
"Not compile with CUDA, should not reach here."
);
#endif
#endif
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
2eb3a7a9
...
@@ -38,13 +38,13 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
...
@@ -38,13 +38,13 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
const
std
::
thread
::
id
tid
=
std
::
this_thread
::
get_id
();
const
std
::
thread
::
id
tid
=
std
::
this_thread
::
get_id
();
batch_size_
=
batch_size
;
batch_size_
=
batch_size
;
if
(
infer_context_
.
find
(
tid
)
==
infer_context_
.
end
())
{
if
(
infer_context_
.
find
(
tid
)
==
infer_context_
.
end
())
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
infer_engine_
,
infer_engine_
,
"You should build engine first and then set the context."
);
"You should build engine first and then set the context."
);
infer_context_
[
tid
].
reset
(
infer_engine_
->
createExecutionContext
());
infer_context_
[
tid
].
reset
(
infer_engine_
->
createExecutionContext
());
}
}
infer_context_
[
tid
]
->
enqueue
(
batch_size
,
buffers
->
data
(),
stream
,
nullptr
);
infer_context_
[
tid
]
->
enqueue
(
batch_size
,
buffers
->
data
(),
stream
,
nullptr
);
cudaStreamSynchronize
(
stream
);
SetRuntimeBatch
(
batch_size
);
SetRuntimeBatch
(
batch_size
);
}
}
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
2eb3a7a9
...
@@ -82,7 +82,7 @@ class TensorRTEngine {
...
@@ -82,7 +82,7 @@ class TensorRTEngine {
void
Build
(
const
DescType
&
paddle_model
);
void
Build
(
const
DescType
&
paddle_model
);
void
Execute
(
int
batch_size
,
std
::
vector
<
void
*>*
buffers
,
void
Execute
(
int
batch_size
,
std
::
vector
<
void
*>*
buffers
,
cudaStream_t
stream
);
cudaStream_t
stream
=
nullptr
);
// Initialize the inference network, so that TensorRT layers can add to this
// Initialize the inference network, so that TensorRT layers can add to this
// network.
// network.
...
@@ -216,6 +216,7 @@ class TensorRTEngine {
...
@@ -216,6 +216,7 @@ class TensorRTEngine {
infer_context_
;
infer_context_
;
infer_ptr
<
nvinfer1
::
IHostMemory
>
ihost_memory_
;
infer_ptr
<
nvinfer1
::
IHostMemory
>
ihost_memory_
;
std
::
unordered_map
<
nvinfer1
::
ITensor
*
,
float
>
quant_dynamic_range_
;
std
::
unordered_map
<
nvinfer1
::
ITensor
*
,
float
>
quant_dynamic_range_
;
std
::
mutex
mutex_
;
};
// class TensorRTEngine
};
// class TensorRTEngine
#define IS_TRT_VERSION_GE(version) \
#define IS_TRT_VERSION_GE(version) \
...
...
paddle/fluid/inference/tests/api/trt_quant_int8_test.cc
浏览文件 @
2eb3a7a9
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <numeric>
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
...
@@ -44,6 +45,15 @@ TEST(quant_int8, resnet50) {
...
@@ -44,6 +45,15 @@ TEST(quant_int8, resnet50) {
input_t
->
copy_from_cpu
(
input
);
input_t
->
copy_from_cpu
(
input
);
ASSERT_TRUE
(
predictor
->
ZeroCopyRun
());
ASSERT_TRUE
(
predictor
->
ZeroCopyRun
());
std
::
vector
<
float
>
out_data
;
auto
output_names
=
predictor
->
GetOutputNames
();
auto
output_t
=
predictor
->
GetOutputTensor
(
output_names
[
0
]);
std
::
vector
<
int
>
output_shape
=
output_t
->
shape
();
int
out_num
=
std
::
accumulate
(
output_shape
.
begin
(),
output_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
out_data
.
resize
(
out_num
);
output_t
->
copy_to_cpu
(
out_data
.
data
());
}
}
}
// namespace inference
}
// namespace inference
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录