Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2fa3ce2b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2fa3ce2b
编写于
8月 26, 2021
作者:
Z
zhangchunle
提交者:
GitHub
8月 26, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "Add copy from tensor (#34406)"
This reverts commit
ac33c0ca
.
上级
fa6c59a4
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
11 addition
and
710 deletion
+11
-710
cmake/configure.cmake
cmake/configure.cmake
+0
-4
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+2
-3
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+8
-65
paddle/fluid/inference/api/paddle_infer_contrib.cc
paddle/fluid/inference/api/paddle_infer_contrib.cc
+0
-190
paddle/fluid/inference/api/paddle_infer_contrib.h
paddle/fluid/inference/api/paddle_infer_contrib.h
+0
-40
paddle/fluid/inference/api/paddle_tensor.h
paddle/fluid/inference/api/paddle_tensor.h
+1
-38
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+0
-5
paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc
...nference/tests/api/paddle_infer_api_copy_tensor_tester.cc
+0
-329
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+0
-8
python/paddle/inference/contrib/__init__.py
python/paddle/inference/contrib/__init__.py
+0
-13
python/paddle/inference/contrib/utils/__init__.py
python/paddle/inference/contrib/utils/__init__.py
+0
-15
未找到文件。
cmake/configure.cmake
浏览文件 @
2fa3ce2b
...
@@ -20,10 +20,6 @@ if(WITH_TESTING)
...
@@ -20,10 +20,6 @@ if(WITH_TESTING)
add_definitions
(
-DPADDLE_WITH_TESTING
)
add_definitions
(
-DPADDLE_WITH_TESTING
)
endif
(
WITH_TESTING
)
endif
(
WITH_TESTING
)
if
(
WITH_INFERENCE_API_TEST
)
add_definitions
(
-DPADDLE_WITH_INFERENCE_API_TEST
)
endif
(
WITH_INFERENCE_API_TEST
)
if
(
NOT WITH_PROFILER
)
if
(
NOT WITH_PROFILER
)
add_definitions
(
-DPADDLE_DISABLE_PROFILER
)
add_definitions
(
-DPADDLE_DISABLE_PROFILER
)
endif
(
NOT WITH_PROFILER
)
endif
(
NOT WITH_PROFILER
)
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
2fa3ce2b
...
@@ -28,15 +28,14 @@ if(WITH_MKLDNN)
...
@@ -28,15 +28,14 @@ if(WITH_MKLDNN)
endif
()
endif
()
cc_library
(
analysis_config SRCS analysis_config.cc DEPS
${
mkldnn_quantizer_cfg
}
lod_tensor paddle_pass_builder table_printer
)
cc_library
(
analysis_config SRCS analysis_config.cc DEPS
${
mkldnn_quantizer_cfg
}
lod_tensor paddle_pass_builder table_printer
)
cc_library
(
paddle_infer_contrib SRCS paddle_infer_contrib.cc DEPS zero_copy_tensor
)
cc_library
(
paddle_pass_builder SRCS paddle_pass_builder.cc
)
cc_library
(
paddle_pass_builder SRCS paddle_pass_builder.cc
)
if
(
WITH_CRYPTO
)
if
(
WITH_CRYPTO
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope reset_tensor_array
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope reset_tensor_array
analysis_config
paddle_infer_contrib
zero_copy_tensor trainer_desc_proto paddle_crypto custom_operator
)
analysis_config zero_copy_tensor trainer_desc_proto paddle_crypto custom_operator
)
else
()
else
()
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope reset_tensor_array
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope reset_tensor_array
analysis_config
paddle_infer_contrib
zero_copy_tensor trainer_desc_proto custom_operator
)
analysis_config zero_copy_tensor trainer_desc_proto custom_operator
)
endif
()
endif
()
if
(
WIN32
)
if
(
WIN32
)
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
浏览文件 @
2fa3ce2b
...
@@ -121,8 +121,6 @@ DataType Tensor::type() const {
...
@@ -121,8 +121,6 @@ DataType Tensor::type() const {
return
DataType
::
FLOAT32
;
return
DataType
::
FLOAT32
;
}
}
PlaceType
Tensor
::
place
()
const
{
return
place_
;
}
template
<
typename
T
>
template
<
typename
T
>
void
Tensor
::
CopyFromCpu
(
const
T
*
data
)
{
void
Tensor
::
CopyFromCpu
(
const
T
*
data
)
{
EAGER_GET_TENSOR
;
EAGER_GET_TENSOR
;
...
@@ -187,8 +185,7 @@ void Tensor::CopyFromCpu(const T *data) {
...
@@ -187,8 +185,7 @@ void Tensor::CopyFromCpu(const T *data) {
}
}
template
<
typename
T
>
template
<
typename
T
>
void
Tensor
::
CopyToCpuImpl
(
T
*
data
,
void
*
exec_stream
,
CallbackFunc
cb
,
void
Tensor
::
CopyToCpu
(
T
*
data
)
{
void
*
cb_params
)
const
{
EAGER_GET_TENSOR
;
EAGER_GET_TENSOR
;
auto
ele_num
=
tensor
->
numel
();
auto
ele_num
=
tensor
->
numel
();
auto
*
t_data
=
tensor
->
data
<
T
>
();
auto
*
t_data
=
tensor
->
data
<
T
>
();
...
@@ -225,16 +222,7 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
...
@@ -225,16 +222,7 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipStreamSynchronize
(
dev_ctx
->
stream
());
hipStreamSynchronize
(
dev_ctx
->
stream
());
#else
#else
// async, return stream
cudaStreamSynchronize
(
dev_ctx
->
stream
());
if
(
nullptr
!=
exec_stream
)
{
*
(
static_cast
<
cudaStream_t
*>
(
exec_stream
))
=
dev_ctx
->
stream
();
// async with callback
}
else
if
(
cb
)
{
cudaLaunchHostFunc
(
dev_ctx
->
stream
(),
cb
,
cb_params
);
// sync
}
else
{
cudaStreamSynchronize
(
dev_ctx
->
stream
());
}
#endif
#endif
#else
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
...
@@ -273,22 +261,6 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
...
@@ -273,22 +261,6 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb,
"The analysis predictor supports CPU, GPU, NPU and XPU now."
));
"The analysis predictor supports CPU, GPU, NPU and XPU now."
));
}
}
}
}
template
<
typename
T
>
void
Tensor
::
CopyToCpu
(
T
*
data
)
const
{
CopyToCpuImpl
<
T
>
(
data
,
nullptr
,
nullptr
,
nullptr
);
}
template
<
typename
T
>
void
Tensor
::
CopyToCpuAsync
(
T
*
data
,
void
*
exec_stream
)
const
{
CopyToCpuImpl
<
T
>
(
data
,
exec_stream
,
nullptr
,
nullptr
);
}
template
<
typename
T
>
void
Tensor
::
CopyToCpuAsync
(
T
*
data
,
CallbackFunc
cb
,
void
*
cb_params
)
const
{
CopyToCpuImpl
<
T
>
(
data
,
nullptr
,
cb
,
cb_params
);
}
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
float
>(
const
float
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
float
>(
const
float
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
int64_t
>(
const
int64_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
int64_t
>(
const
int64_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
int32_t
>(
const
int32_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
int32_t
>(
const
int32_t
*
data
);
...
@@ -296,38 +268,12 @@ template PD_INFER_DECL void Tensor::CopyFromCpu<uint8_t>(const uint8_t *data);
...
@@ -296,38 +268,12 @@ template PD_INFER_DECL void Tensor::CopyFromCpu<uint8_t>(const uint8_t *data);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
int8_t
>(
const
int8_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
int8_t
>(
const
int8_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
float16
>(
const
float16
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyFromCpu
<
float16
>(
const
float16
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
float
>(
float
*
data
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
float
>(
float
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
int64_t
>(
int64_t
*
data
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
int64_t
>(
int64_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
int32_t
>(
int32_t
*
data
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
int32_t
>(
int32_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
uint8_t
>(
uint8_t
*
data
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
uint8_t
>(
uint8_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
int8_t
>(
int8_t
*
data
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
int8_t
>(
int8_t
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
float16
>(
float16
*
data
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpu
<
float16
>(
float16
*
data
);
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
float
>(
float
*
data
,
void
*
exec_stream
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
int64_t
>(
int64_t
*
data
,
void
*
exec_stream
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
int32_t
>(
int32_t
*
data
,
void
*
exec_stream
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
uint8_t
>(
uint8_t
*
data
,
void
*
exec_stream
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
int8_t
>(
int8_t
*
data
,
void
*
exec_stream
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
float16
>(
float16
*
data
,
void
*
exec_stream
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
float
>(
float
*
data
,
CallbackFunc
cb
,
void
*
cb_params
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
int64_t
>(
int64_t
*
data
,
CallbackFunc
cb
,
void
*
cb_params
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
int32_t
>(
int32_t
*
data
,
CallbackFunc
cb
,
void
*
cb_params
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
uint8_t
>(
uint8_t
*
data
,
CallbackFunc
cb
,
void
*
cb_params
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
int8_t
>(
int8_t
*
data
,
CallbackFunc
cb
,
void
*
cb_params
)
const
;
template
PD_INFER_DECL
void
Tensor
::
CopyToCpuAsync
<
float16
>(
float16
*
data
,
CallbackFunc
cb
,
void
*
cb_params
)
const
;
template
PD_INFER_DECL
float
*
Tensor
::
data
<
float
>(
PlaceType
*
place
,
template
PD_INFER_DECL
float
*
Tensor
::
data
<
float
>(
PlaceType
*
place
,
int
*
size
)
const
;
int
*
size
)
const
;
...
@@ -339,15 +285,12 @@ template PD_INFER_DECL uint8_t *Tensor::data<uint8_t>(PlaceType *place,
...
@@ -339,15 +285,12 @@ template PD_INFER_DECL uint8_t *Tensor::data<uint8_t>(PlaceType *place,
int
*
size
)
const
;
int
*
size
)
const
;
template
PD_INFER_DECL
int8_t
*
Tensor
::
data
<
int8_t
>(
PlaceType
*
place
,
template
PD_INFER_DECL
int8_t
*
Tensor
::
data
<
int8_t
>(
PlaceType
*
place
,
int
*
size
)
const
;
int
*
size
)
const
;
template
PD_INFER_DECL
float16
*
Tensor
::
data
<
float16
>(
PlaceType
*
place
,
int
*
size
)
const
;
template
PD_INFER_DECL
float
*
Tensor
::
mutable_data
<
float
>(
PlaceType
place
);
template
PD_INFER_DECL
float
*
Tensor
::
mutable_data
<
float
>(
PlaceType
place
);
template
PD_INFER_DECL
int64_t
*
Tensor
::
mutable_data
<
int64_t
>(
PlaceType
place
);
template
PD_INFER_DECL
int64_t
*
Tensor
::
mutable_data
<
int64_t
>(
PlaceType
place
);
template
PD_INFER_DECL
int32_t
*
Tensor
::
mutable_data
<
int32_t
>(
PlaceType
place
);
template
PD_INFER_DECL
int32_t
*
Tensor
::
mutable_data
<
int32_t
>(
PlaceType
place
);
template
PD_INFER_DECL
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>(
PlaceType
place
);
template
PD_INFER_DECL
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>(
PlaceType
place
);
template
PD_INFER_DECL
int8_t
*
Tensor
::
mutable_data
<
int8_t
>(
PlaceType
place
);
template
PD_INFER_DECL
int8_t
*
Tensor
::
mutable_data
<
int8_t
>(
PlaceType
place
);
template
PD_INFER_DECL
float16
*
Tensor
::
mutable_data
<
float16
>(
PlaceType
place
);
Tensor
::
Tensor
(
void
*
scope
)
:
scope_
{
scope
}
{
Tensor
::
Tensor
(
void
*
scope
)
:
scope_
{
scope
}
{
PADDLE_ENFORCE_NOT_NULL
(
scope_
,
PADDLE_ENFORCE_NOT_NULL
(
scope_
,
...
...
paddle/fluid/inference/api/paddle_infer_contrib.cc
已删除
100644 → 0
浏览文件 @
fa6c59a4
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle_infer
{
namespace
contrib
{
using
paddle
::
PaddleDType
;
void
*
TensorUtils
::
CudaMallocPinnedMemory
(
size_t
size
)
{
#if defined(PADDLE_WITH_CUDA)
void
*
ptr
=
nullptr
;
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaMallocHost
(
&
ptr
,
size
));
return
ptr
;
#else
return
nullptr
;
#endif
}
void
TensorUtils
::
CudaFreePinnedMemory
(
void
*
ptr
)
{
#if defined(PADDLE_WITH_CUDA)
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaFreeHost
(
ptr
));
#endif
}
void
TensorUtils
::
CopyTensorImpl
(
Tensor
*
p_dst
,
const
Tensor
&
src
,
void
*
exec_stream
,
CallbackFunc
cb
,
void
*
cb_params
)
{
Tensor
&
dst
=
*
p_dst
;
dst
.
Reshape
(
src
.
shape
());
PADDLE_ENFORCE
(
src
.
place
()
==
PlaceType
::
kCPU
||
src
.
place
()
==
PlaceType
::
kGPU
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"CopyTensor only support PlaceType kCPU/kGPU now."
));
PADDLE_ENFORCE
(
dst
.
place
()
==
PlaceType
::
kCPU
||
dst
.
place
()
==
PlaceType
::
kGPU
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"CopyTensor only support PlaceType kCPU/kGPU now."
));
// copy to cpu, gpu => cpu or cpu => cpu
if
(
dst
.
place
()
==
PlaceType
::
kCPU
)
{
switch
(
src
.
type
())
{
case
PaddleDType
::
INT32
:
src
.
CopyToCpuImpl
(
dst
.
mutable_data
<
int32_t
>
(
PlaceType
::
kCPU
),
exec_stream
,
cb
,
cb_params
);
break
;
case
PaddleDType
::
INT64
:
src
.
CopyToCpuImpl
(
dst
.
mutable_data
<
int64_t
>
(
PlaceType
::
kCPU
),
exec_stream
,
cb
,
cb_params
);
break
;
case
PaddleDType
::
FLOAT32
:
src
.
CopyToCpuImpl
(
dst
.
mutable_data
<
float
>
(
PlaceType
::
kCPU
),
exec_stream
,
cb
,
cb_params
);
break
;
case
PaddleDType
::
UINT8
:
src
.
CopyToCpuImpl
(
dst
.
mutable_data
<
uint8_t
>
(
PlaceType
::
kCPU
),
exec_stream
,
cb
,
cb_params
);
break
;
case
PaddleDType
::
INT8
:
src
.
CopyToCpuImpl
(
dst
.
mutable_data
<
int8_t
>
(
PlaceType
::
kCPU
),
exec_stream
,
cb
,
cb_params
);
break
;
case
PaddleDType
::
FLOAT16
:
src
.
CopyToCpuImpl
(
dst
.
mutable_data
<
paddle
::
platform
::
float16
>
(
PlaceType
::
kCPU
),
exec_stream
,
cb
,
cb_params
);
break
;
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unimplemented
(
"Only INT32, INT64, UINT8, INT8, FLOAT16 and "
"FLOAT32 is supported in Tensor. Others not implements"
));
}
// gpu => gpu or cpu => gpu
}
else
{
#if defined(PADDLE_WITH_CUDA)
void
*
dst_data
=
nullptr
;
void
*
src_data
=
nullptr
;
size_t
data_len
=
0
;
int
data_size
=
0
;
PlaceType
src_place
;
switch
(
src
.
type
())
{
case
PaddleDType
::
INT32
:
dst_data
=
static_cast
<
void
*>
(
dst
.
mutable_data
<
int32_t
>
(
PlaceType
::
kGPU
));
src_data
=
static_cast
<
void
*>
(
src
.
data
<
int32_t
>
(
&
src_place
,
&
data_size
));
data_len
=
data_size
*
sizeof
(
int32_t
);
break
;
case
PaddleDType
::
INT64
:
dst_data
=
static_cast
<
void
*>
(
dst
.
mutable_data
<
int64_t
>
(
PlaceType
::
kGPU
));
src_data
=
static_cast
<
void
*>
(
src
.
data
<
int64_t
>
(
&
src_place
,
&
data_size
));
data_len
=
data_size
*
sizeof
(
int64_t
);
break
;
case
PaddleDType
::
FLOAT32
:
dst_data
=
static_cast
<
void
*>
(
dst
.
mutable_data
<
float
>
(
PlaceType
::
kGPU
));
src_data
=
static_cast
<
void
*>
(
src
.
data
<
float
>
(
&
src_place
,
&
data_size
));
data_len
=
data_size
*
sizeof
(
float
);
break
;
case
PaddleDType
::
UINT8
:
dst_data
=
static_cast
<
void
*>
(
dst
.
mutable_data
<
uint8_t
>
(
PlaceType
::
kGPU
));
src_data
=
static_cast
<
void
*>
(
src
.
data
<
uint8_t
>
(
&
src_place
,
&
data_size
));
data_len
=
data_size
*
sizeof
(
uint8_t
);
break
;
case
PaddleDType
::
INT8
:
dst_data
=
static_cast
<
void
*>
(
dst
.
mutable_data
<
int8_t
>
(
PlaceType
::
kGPU
));
src_data
=
static_cast
<
void
*>
(
src
.
data
<
int8_t
>
(
&
src_place
,
&
data_size
));
data_len
=
data_size
*
sizeof
(
int8_t
);
break
;
case
PaddleDType
::
FLOAT16
:
dst_data
=
static_cast
<
void
*>
(
dst
.
mutable_data
<
paddle
::
platform
::
float16
>
(
PlaceType
::
kGPU
));
src_data
=
static_cast
<
void
*>
(
src
.
data
<
paddle
::
platform
::
float16
>
(
&
src_place
,
&
data_size
));
data_len
=
data_size
*
2
;
break
;
default:
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unimplemented
(
"Only INT32, INT64, UINT8, INT8, FLOAT16 and "
"FLOAT32 is supported in Tensor. Others not implements"
));
}
paddle
::
platform
::
DeviceContextPool
&
pool
=
paddle
::
platform
::
DeviceContextPool
::
Instance
();
paddle
::
platform
::
CUDAPlace
gpu_place
(
dst
.
device_
);
auto
*
dev_ctx
=
static_cast
<
const
paddle
::
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
if
(
src
.
place
()
==
PlaceType
::
kCPU
)
{
paddle
::
memory
::
Copy
(
gpu_place
,
static_cast
<
void
*>
(
dst_data
),
paddle
::
platform
::
CPUPlace
(),
src_data
,
data_len
,
dev_ctx
->
stream
());
}
else
{
paddle
::
memory
::
Copy
(
gpu_place
,
static_cast
<
void
*>
(
dst_data
),
paddle
::
platform
::
CUDAPlace
(),
src_data
,
data_len
,
dev_ctx
->
stream
());
}
if
(
nullptr
!=
exec_stream
)
{
*
(
static_cast
<
cudaStream_t
*>
(
exec_stream
))
=
dev_ctx
->
stream
();
}
else
if
(
cb
)
{
cudaLaunchHostFunc
(
dev_ctx
->
stream
(),
cb
,
cb_params
);
}
else
{
cudaStreamSynchronize
(
dev_ctx
->
stream
());
}
#else
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"Can not copy tensor to GPU CUDA place because paddle is not compiled "
"with CUDA."
));
#endif
}
return
;
}
void
TensorUtils
::
CopyTensor
(
Tensor
*
p_dst
,
const
Tensor
&
src
)
{
CopyTensorImpl
(
p_dst
,
src
,
nullptr
,
nullptr
,
nullptr
);
}
void
TensorUtils
::
CopyTensorAsync
(
Tensor
*
p_dst
,
const
Tensor
&
src
,
void
*
exec_stream
)
{
CopyTensorImpl
(
p_dst
,
src
,
exec_stream
,
nullptr
,
nullptr
);
}
void
TensorUtils
::
CopyTensorAsync
(
Tensor
*
p_dst
,
const
Tensor
&
src
,
CallbackFunc
cb
,
void
*
cb_params
)
{
CopyTensorImpl
(
p_dst
,
src
,
nullptr
,
cb
,
cb_params
);
}
}
// namespace contrib
}
// namespace paddle_infer
paddle/fluid/inference/api/paddle_infer_contrib.h
已删除
100644 → 0
浏览文件 @
fa6c59a4
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle_infer
{
namespace
contrib
{
class
TensorUtils
{
public:
static
void
*
CudaMallocPinnedMemory
(
size_t
size
);
static
void
CudaFreePinnedMemory
(
void
*
mem
);
static
void
CopyTensor
(
Tensor
*
p_dst
,
const
Tensor
&
src
);
static
void
CopyTensorAsync
(
Tensor
*
p_dst
,
const
Tensor
&
src
,
void
*
exec_stream
);
static
void
CopyTensorAsync
(
Tensor
*
p_dst
,
const
Tensor
&
src
,
CallbackFunc
cb
,
void
*
cb_params
);
private:
static
void
CopyTensorImpl
(
Tensor
*
p_dst
,
const
Tensor
&
src
,
void
*
exec_stream
,
CallbackFunc
cb
,
void
*
cb_params
);
};
}
// namespace contrib
}
// namespace paddle_infer
paddle/fluid/inference/api/paddle_tensor.h
浏览文件 @
2fa3ce2b
...
@@ -18,16 +18,6 @@
...
@@ -18,16 +18,6 @@
namespace
paddle_infer
{
namespace
paddle_infer
{
typedef
void
(
*
CallbackFunc
)(
void
*
);
#if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST)
class
InferApiTesterUtils
;
#endif
namespace
contrib
{
class
TensorUtils
;
}
/// \brief Paddle data type.
/// \brief Paddle data type.
enum
DataType
{
enum
DataType
{
FLOAT32
,
FLOAT32
,
...
@@ -82,21 +72,7 @@ class PD_INFER_DECL Tensor {
...
@@ -82,21 +72,7 @@ class PD_INFER_DECL Tensor {
/// It's usually used to get the output tensor data.
/// It's usually used to get the output tensor data.
/// \param[out] data The tensor will copy the data to the address.
/// \param[out] data The tensor will copy the data to the address.
template
<
typename
T
>
template
<
typename
T
>
void
CopyToCpu
(
T
*
data
)
const
;
void
CopyToCpu
(
T
*
data
);
/// \brief Copy the tensor data to the host memory asynchronously.
/// \param[out] data The tensor will copy the data to the address.
/// \param[out] exec_stream The tensor will excute copy in this stream(Only
/// GPU CUDA stream suppported now).
template
<
typename
T
>
void
CopyToCpuAsync
(
T
*
data
,
void
*
exec_stream
)
const
;
/// \brief Copy the tensor data to the host memory asynchronously.
/// \param[out] data The tensor will copy the data to the address.
/// \param[out] cb Callback function cb(cb_params) will be executed on the
/// host after all currently enqueued items in the stream have completed .
template
<
typename
T
>
void
CopyToCpuAsync
(
T
*
data
,
CallbackFunc
cb
,
void
*
cb_params
)
const
;
/// \brief Return the shape of the Tensor.
/// \brief Return the shape of the Tensor.
std
::
vector
<
int
>
shape
()
const
;
std
::
vector
<
int
>
shape
()
const
;
...
@@ -116,20 +92,12 @@ class PD_INFER_DECL Tensor {
...
@@ -116,20 +92,12 @@ class PD_INFER_DECL Tensor {
/// \return The data type of the tensor.
/// \return The data type of the tensor.
DataType
type
()
const
;
DataType
type
()
const
;
/// \brief Return the place type of the tensor.
/// \return The place type of the tensor.
PlaceType
place
()
const
;
protected:
protected:
explicit
Tensor
(
void
*
scope
);
explicit
Tensor
(
void
*
scope
);
void
*
FindTensor
()
const
;
void
*
FindTensor
()
const
;
void
SetPlace
(
PlaceType
place
,
int
device
=
-
1
);
void
SetPlace
(
PlaceType
place
,
int
device
=
-
1
);
void
SetName
(
const
std
::
string
&
name
);
void
SetName
(
const
std
::
string
&
name
);
template
<
typename
T
>
void
CopyToCpuImpl
(
T
*
data
,
void
*
stream
=
nullptr
,
CallbackFunc
cb
=
nullptr
,
void
*
cb_params
=
nullptr
)
const
;
std
::
string
name_
;
std
::
string
name_
;
// The corresponding tensor pointer inside Paddle workspace is cached for
// The corresponding tensor pointer inside Paddle workspace is cached for
// performance.
// performance.
...
@@ -139,11 +107,6 @@ class PD_INFER_DECL Tensor {
...
@@ -139,11 +107,6 @@ class PD_INFER_DECL Tensor {
void
*
scope_
{
nullptr
};
void
*
scope_
{
nullptr
};
PlaceType
place_
;
PlaceType
place_
;
int
device_
;
int
device_
;
friend
class
paddle_infer
::
contrib
::
TensorUtils
;
#if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST)
friend
class
paddle_infer
::
InferApiTesterUtils
;
#endif
};
};
}
// namespace paddle_infer
}
// namespace paddle_infer
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
2fa3ce2b
...
@@ -682,11 +682,6 @@ if(WITH_GPU)
...
@@ -682,11 +682,6 @@ if(WITH_GPU)
inference_analysis_test
(
paddle_infer_api_test SRCS paddle_infer_api_test.cc
inference_analysis_test
(
paddle_infer_api_test SRCS paddle_infer_api_test.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
RESNET50_MODEL_DIR
}
)
ARGS --infer_model=
${
RESNET50_MODEL_DIR
}
)
inference_analysis_test
(
paddle_infer_api_copy_tensor_tester SRCS paddle_infer_api_copy_tensor_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
RESNET50_MODEL_DIR
}
)
set_tests_properties
(
paddle_infer_api_copy_tensor_tester PROPERTIES TIMEOUT 30
)
endif
()
endif
()
if
(
"$ENV{CI_SKIP_CPP_TEST}"
STREQUAL
"ON"
)
if
(
"$ENV{CI_SKIP_CPP_TEST}"
STREQUAL
"ON"
)
...
...
paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc
已删除
100644 → 0
浏览文件 @
fa6c59a4
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cuda_runtime.h>
#include <gtest/gtest.h>
#include <cstring>
#include <numeric>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle_infer
{
class
InferApiTesterUtils
{
public:
static
std
::
unique_ptr
<
Tensor
>
CreateInferTensorForTest
(
const
std
::
string
&
name
,
PlaceType
place
,
void
*
p_scope
)
{
auto
var
=
static_cast
<
paddle
::
framework
::
Scope
*>
(
p_scope
)
->
Var
(
name
);
var
->
GetMutable
<
paddle
::
framework
::
LoDTensor
>
();
std
::
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
p_scope
));
res
->
input_or_output_
=
true
;
res
->
SetName
(
name
);
res
->
SetPlace
(
place
,
0
/*device id*/
);
return
res
;
}
};
TEST
(
Tensor
,
copy_to_cpu_async_stream
)
{
LOG
(
INFO
)
<<
GetVersion
();
UpdateDllFlag
(
"conv_workspace_size_limit"
,
"4000"
);
std
::
string
model_dir
=
FLAGS_infer_model
+
"/model"
;
Config
config
;
config
.
SetModel
(
model_dir
+
"/model"
,
model_dir
+
"/params"
);
config
.
EnableUseGpu
(
100
,
0
);
auto
predictor
=
CreatePredictor
(
config
);
auto
pred_clone
=
predictor
->
Clone
();
std
::
vector
<
int
>
in_shape
=
{
1
,
3
,
318
,
318
};
int
in_num
=
std
::
accumulate
(
in_shape
.
begin
(),
in_shape
.
end
(),
1
,
[](
int
&
a
,
int
&
b
)
{
return
a
*
b
;
});
std
::
vector
<
float
>
input
(
in_num
,
1.0
);
const
auto
&
input_names
=
predictor
->
GetInputNames
();
auto
input_tensor
=
predictor
->
GetInputHandle
(
input_names
[
0
]);
input_tensor
->
Reshape
(
in_shape
);
input_tensor
->
CopyFromCpu
(
input
.
data
());
predictor
->
Run
();
const
auto
&
output_names
=
predictor
->
GetOutputNames
();
auto
output_tensor
=
predictor
->
GetOutputHandle
(
output_names
[
0
]);
std
::
vector
<
int
>
output_shape
=
output_tensor
->
shape
();
int
out_num
=
std
::
accumulate
(
output_shape
.
begin
(),
output_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
float
*
out_data
=
static_cast
<
float
*>
(
contrib
::
TensorUtils
::
CudaMallocPinnedMemory
(
sizeof
(
float
)
*
out_num
));
memset
(
out_data
,
0
,
sizeof
(
float
)
*
out_num
);
std
::
vector
<
float
>
correct_out_data
=
{
127.78
,
1.07353
,
-
229.42
,
1127.28
,
-
177.365
,
-
292.412
,
-
271.614
,
466.054
,
540.436
,
-
214.223
,
};
for
(
int
i
=
0
;
i
<
100
;
i
++
)
{
predictor
->
Run
();
}
cudaStream_t
stream
;
output_tensor
->
CopyToCpuAsync
(
out_data
,
static_cast
<
void
*>
(
&
stream
));
// sync
cudaStreamSynchronize
(
stream
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
]
/
correct_out_data
[
i
],
1.0
,
1e-3
);
}
contrib
::
TensorUtils
::
CudaFreePinnedMemory
(
static_cast
<
void
*>
(
out_data
));
}
TEST
(
Tensor
,
copy_to_cpu_async_callback
)
{
LOG
(
INFO
)
<<
GetVersion
();
UpdateDllFlag
(
"conv_workspace_size_limit"
,
"4000"
);
std
::
string
model_dir
=
FLAGS_infer_model
+
"/model"
;
Config
config
;
config
.
SetModel
(
model_dir
+
"/model"
,
model_dir
+
"/params"
);
config
.
EnableUseGpu
(
100
,
0
);
auto
predictor
=
CreatePredictor
(
config
);
auto
pred_clone
=
predictor
->
Clone
();
std
::
vector
<
int
>
in_shape
=
{
1
,
3
,
318
,
318
};
int
in_num
=
std
::
accumulate
(
in_shape
.
begin
(),
in_shape
.
end
(),
1
,
[](
int
&
a
,
int
&
b
)
{
return
a
*
b
;
});
std
::
vector
<
float
>
input
(
in_num
,
1.0
);
const
auto
&
input_names
=
predictor
->
GetInputNames
();
auto
input_tensor
=
predictor
->
GetInputHandle
(
input_names
[
0
]);
input_tensor
->
Reshape
(
in_shape
);
input_tensor
->
CopyFromCpu
(
input
.
data
());
predictor
->
Run
();
const
auto
&
output_names
=
predictor
->
GetOutputNames
();
auto
output_tensor
=
predictor
->
GetOutputHandle
(
output_names
[
0
]);
std
::
vector
<
int
>
output_shape
=
output_tensor
->
shape
();
int
out_num
=
std
::
accumulate
(
output_shape
.
begin
(),
output_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
float
*
out_data
=
static_cast
<
float
*>
(
contrib
::
TensorUtils
::
CudaMallocPinnedMemory
(
sizeof
(
float
)
*
out_num
));
memset
(
out_data
,
0
,
sizeof
(
float
)
*
out_num
);
for
(
int
i
=
0
;
i
<
100
;
i
++
)
{
predictor
->
Run
();
}
output_tensor
->
CopyToCpuAsync
(
out_data
,
[](
void
*
cb_params
)
{
float
*
data
=
static_cast
<
float
*>
(
cb_params
);
std
::
vector
<
float
>
correct_out_data
=
{
127.78
,
1.07353
,
-
229.42
,
1127.28
,
-
177.365
,
-
292.412
,
-
271.614
,
466.054
,
540.436
,
-
214.223
,
};
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
]
/
correct_out_data
[
i
],
1.0
,
1e-3
);
}
},
static_cast
<
void
*>
(
out_data
));
cudaDeviceSynchronize
();
contrib
::
TensorUtils
::
CudaFreePinnedMemory
(
static_cast
<
void
*>
(
out_data
));
}
template
<
class
DTYPE
>
static
void
test_copy_tensor
(
PlaceType
src_place
,
PlaceType
dst_place
)
{
paddle
::
framework
::
Scope
scope
;
auto
tensor_src
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_src"
,
src_place
,
static_cast
<
void
*>
(
&
scope
));
auto
tensor_dst
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_dst"
,
dst_place
,
static_cast
<
void
*>
(
&
scope
));
std
::
vector
<
DTYPE
>
data_src
(
6
,
1
);
tensor_src
->
Reshape
({
2
,
3
});
tensor_src
->
CopyFromCpu
(
data_src
.
data
());
std
::
vector
<
DTYPE
>
data_dst
(
4
,
2
);
tensor_dst
->
Reshape
({
2
,
2
});
tensor_dst
->
CopyFromCpu
(
data_dst
.
data
());
paddle_infer
::
contrib
::
TensorUtils
::
CopyTensor
(
tensor_dst
.
get
(),
*
tensor_src
);
EXPECT_EQ
(
tensor_dst
->
shape
().
size
(),
(
size_t
)
2
);
EXPECT_EQ
(
tensor_dst
->
shape
()[
0
],
2
);
EXPECT_EQ
(
tensor_dst
->
shape
()[
1
],
3
);
std
::
vector
<
DTYPE
>
data_check
(
6
,
3
);
tensor_dst
->
CopyToCpu
<
DTYPE
>
(
static_cast
<
DTYPE
*>
(
data_check
.
data
()));
for
(
int
i
=
0
;
i
<
6
;
i
++
)
{
EXPECT_NEAR
(
data_check
[
i
],
1
,
1e-5
);
}
}
TEST
(
CopyTensor
,
float32
)
{
test_copy_tensor
<
float
>
(
PlaceType
::
kCPU
,
PlaceType
::
kCPU
);
test_copy_tensor
<
float
>
(
PlaceType
::
kCPU
,
PlaceType
::
kGPU
);
test_copy_tensor
<
float
>
(
PlaceType
::
kGPU
,
PlaceType
::
kGPU
);
}
TEST
(
CopyTensor
,
int32
)
{
test_copy_tensor
<
int32_t
>
(
PlaceType
::
kCPU
,
PlaceType
::
kCPU
);
test_copy_tensor
<
int32_t
>
(
PlaceType
::
kGPU
,
PlaceType
::
kGPU
);
}
TEST
(
CopyTensor
,
int64
)
{
test_copy_tensor
<
int64_t
>
(
PlaceType
::
kCPU
,
PlaceType
::
kCPU
);
test_copy_tensor
<
int64_t
>
(
PlaceType
::
kGPU
,
PlaceType
::
kGPU
);
}
TEST
(
CopyTensor
,
int8
)
{
test_copy_tensor
<
int8_t
>
(
PlaceType
::
kCPU
,
PlaceType
::
kCPU
);
test_copy_tensor
<
int8_t
>
(
PlaceType
::
kGPU
,
PlaceType
::
kGPU
);
}
TEST
(
CopyTensor
,
uint8
)
{
test_copy_tensor
<
uint8_t
>
(
PlaceType
::
kCPU
,
PlaceType
::
kCPU
);
test_copy_tensor
<
uint8_t
>
(
PlaceType
::
kGPU
,
PlaceType
::
kGPU
);
}
TEST
(
CopyTensor
,
float16
)
{
paddle
::
framework
::
Scope
scope
;
auto
tensor_src
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_src"
,
PlaceType
::
kCPU
,
static_cast
<
void
*>
(
&
scope
));
auto
tensor_dst
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_dst"
,
PlaceType
::
kCPU
,
static_cast
<
void
*>
(
&
scope
));
using
paddle
::
platform
::
float16
;
std
::
vector
<
float16
>
data_src
(
6
,
float16
(
1.0
));
tensor_src
->
Reshape
({
2
,
3
});
tensor_src
->
CopyFromCpu
(
data_src
.
data
());
std
::
vector
<
float16
>
data_dst
(
4
,
float16
(
2.0
));
tensor_dst
->
Reshape
({
2
,
2
});
tensor_dst
->
CopyFromCpu
(
data_dst
.
data
());
paddle_infer
::
contrib
::
TensorUtils
::
CopyTensor
(
tensor_dst
.
get
(),
*
tensor_src
);
EXPECT_EQ
(
tensor_dst
->
shape
().
size
(),
(
size_t
)
2
);
EXPECT_EQ
(
tensor_dst
->
shape
()[
0
],
2
);
EXPECT_EQ
(
tensor_dst
->
shape
()[
1
],
3
);
std
::
vector
<
float16
>
data_check
(
6
,
float16
(
1.0
));
tensor_dst
->
CopyToCpu
<
float16
>
(
data_check
.
data
());
for
(
int
i
=
0
;
i
<
6
;
i
++
)
{
EXPECT_TRUE
(
data_check
[
i
]
==
float16
(
1.0
));
}
}
TEST
(
CopyTensor
,
float16_gpu
)
{
paddle
::
framework
::
Scope
scope
;
auto
tensor_src
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_src"
,
PlaceType
::
kGPU
,
static_cast
<
void
*>
(
&
scope
));
auto
tensor_dst
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_dst"
,
PlaceType
::
kGPU
,
static_cast
<
void
*>
(
&
scope
));
using
paddle
::
platform
::
float16
;
std
::
vector
<
float16
>
data_src
(
6
,
float16
(
1.0
));
tensor_src
->
Reshape
({
2
,
3
});
tensor_src
->
CopyFromCpu
(
data_src
.
data
());
std
::
vector
<
float16
>
data_dst
(
4
,
float16
(
2.0
));
tensor_dst
->
Reshape
({
2
,
2
});
tensor_dst
->
CopyFromCpu
(
data_dst
.
data
());
paddle_infer
::
contrib
::
TensorUtils
::
CopyTensor
(
tensor_dst
.
get
(),
*
tensor_src
);
EXPECT_EQ
(
tensor_dst
->
shape
().
size
(),
(
size_t
)
2
);
EXPECT_EQ
(
tensor_dst
->
shape
()[
0
],
2
);
EXPECT_EQ
(
tensor_dst
->
shape
()[
1
],
3
);
std
::
vector
<
float16
>
data_check
(
6
,
float16
(
1.0
));
tensor_dst
->
CopyToCpu
<
float16
>
(
data_check
.
data
());
for
(
int
i
=
0
;
i
<
6
;
i
++
)
{
EXPECT_TRUE
(
data_check
[
i
]
==
float16
(
1.0
));
}
}
TEST
(
CopyTensor
,
async_stream
)
{
paddle
::
framework
::
Scope
scope
;
auto
tensor_src
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_src"
,
PlaceType
::
kGPU
,
static_cast
<
void
*>
(
&
scope
));
auto
tensor_dst
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_dst"
,
PlaceType
::
kGPU
,
static_cast
<
void
*>
(
&
scope
));
std
::
vector
<
float
>
data_src
(
6
,
1.0
);
tensor_src
->
Reshape
({
2
,
3
});
tensor_src
->
CopyFromCpu
(
data_src
.
data
());
std
::
vector
<
float
>
data_dst
(
4
,
2.0
);
tensor_dst
->
Reshape
({
2
,
2
});
tensor_dst
->
CopyFromCpu
(
data_dst
.
data
());
cudaStream_t
stream
;
paddle_infer
::
contrib
::
TensorUtils
::
CopyTensorAsync
(
tensor_dst
.
get
(),
*
tensor_src
,
static_cast
<
void
*>
(
&
stream
));
EXPECT_EQ
(
tensor_dst
->
shape
().
size
(),
(
size_t
)
2
);
EXPECT_EQ
(
tensor_dst
->
shape
()[
0
],
2
);
EXPECT_EQ
(
tensor_dst
->
shape
()[
1
],
3
);
cudaStreamSynchronize
(
stream
);
std
::
vector
<
float
>
data_check
(
6
,
1.0
);
tensor_dst
->
CopyToCpu
<
float
>
(
data_check
.
data
());
for
(
int
i
=
0
;
i
<
6
;
i
++
)
{
EXPECT_NEAR
(
data_check
[
i
],
static_cast
<
float
>
(
1.0
),
1e-5
);
}
}
TEST
(
CopyTensor
,
async_callback
)
{
paddle
::
framework
::
Scope
scope
;
auto
tensor_src
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_src"
,
PlaceType
::
kCPU
,
static_cast
<
void
*>
(
&
scope
));
auto
tensor_dst
=
paddle_infer
::
InferApiTesterUtils
::
CreateInferTensorForTest
(
"tensor_dst"
,
PlaceType
::
kGPU
,
static_cast
<
void
*>
(
&
scope
));
std
::
vector
<
float
>
data_src
(
6
,
1.0
);
tensor_src
->
Reshape
({
2
,
3
});
tensor_src
->
CopyFromCpu
(
data_src
.
data
());
std
::
vector
<
float
>
data_dst
(
4
,
2.0
);
tensor_dst
->
Reshape
({
2
,
2
});
tensor_dst
->
CopyFromCpu
(
data_dst
.
data
());
paddle_infer
::
contrib
::
TensorUtils
::
CopyTensorAsync
(
tensor_dst
.
get
(),
*
tensor_src
,
[](
void
*
cb_params
)
{
Tensor
*
tensor
=
static_cast
<
Tensor
*>
(
cb_params
);
EXPECT_EQ
(
tensor
->
shape
().
size
(),
(
size_t
)
2
);
EXPECT_EQ
(
tensor
->
shape
()[
0
],
2
);
EXPECT_EQ
(
tensor
->
shape
()[
1
],
3
);
},
static_cast
<
void
*>
(
&
(
*
tensor_dst
)));
cudaDeviceSynchronize
();
}
}
// namespace paddle_infer
paddle/fluid/pybind/inference_api.cc
浏览文件 @
2fa3ce2b
...
@@ -28,7 +28,6 @@
...
@@ -28,7 +28,6 @@
#include <vector>
#include <vector>
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_infer_contrib.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/inference/utils/io_utils.h"
...
@@ -287,12 +286,6 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT
...
@@ -287,12 +286,6 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT
paddle
::
inference
::
SerializePDTensorToStream
(
&
ss
,
tensor
);
paddle
::
inference
::
SerializePDTensorToStream
(
&
ss
,
tensor
);
return
static_cast
<
py
::
bytes
>
(
ss
.
str
());
return
static_cast
<
py
::
bytes
>
(
ss
.
str
());
}
}
void
CopyPaddleInferTensor
(
paddle_infer
::
Tensor
&
dst
,
const
paddle_infer
::
Tensor
&
src
)
{
return
paddle_infer
::
contrib
::
TensorUtils
::
CopyTensor
(
&
dst
,
src
);
}
}
// namespace
}
// namespace
void
BindInferenceApi
(
py
::
module
*
m
)
{
void
BindInferenceApi
(
py
::
module
*
m
)
{
...
@@ -324,7 +317,6 @@ void BindInferenceApi(py::module *m) {
...
@@ -324,7 +317,6 @@ void BindInferenceApi(py::module *m) {
new
paddle_infer
::
Predictor
(
config
));
new
paddle_infer
::
Predictor
(
config
));
return
std
::
move
(
pred
);
return
std
::
move
(
pred
);
});
});
m
->
def
(
"copy_tensor"
,
&
CopyPaddleInferTensor
);
m
->
def
(
"paddle_dtype_size"
,
&
paddle
::
PaddleDtypeSize
);
m
->
def
(
"paddle_dtype_size"
,
&
paddle
::
PaddleDtypeSize
);
m
->
def
(
"paddle_tensor_to_bytes"
,
&
SerializePDTensorToBytes
);
m
->
def
(
"paddle_tensor_to_bytes"
,
&
SerializePDTensorToBytes
);
m
->
def
(
"get_version"
,
&
paddle_infer
::
GetVersion
);
m
->
def
(
"get_version"
,
&
paddle_infer
::
GetVersion
);
...
...
python/paddle/inference/contrib/__init__.py
已删除
100644 → 0
浏览文件 @
fa6c59a4
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
python/paddle/inference/contrib/utils/__init__.py
已删除
100644 → 0
浏览文件 @
fa6c59a4
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
....fluid.core
import
copy_tensor
# noqa: F401
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录