Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
eb3050fa
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
eb3050fa
编写于
2月 20, 2021
作者:
Q
Qi Li
提交者:
GitHub
2月 20, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[ROCM] update fluid inference for rocm (part1), test=develop (#31018)
上级
6df1ca54
变更
15
显示空白变更内容
内联
并排
Showing
15 changed file
with
36 addition
and
30 deletion
+36
-30
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+6
-6
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+3
-3
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+2
-2
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-1
paddle/fluid/inference/api/api_impl_tester.cc
paddle/fluid/inference/api/api_impl_tester.cc
+1
-1
paddle/fluid/inference/api/demo_ci/vis_demo.cc
paddle/fluid/inference/api/demo_ci/vis_demo.cc
+1
-1
paddle/fluid/inference/api/details/zero_copy_tensor.cc
paddle/fluid/inference/api/details/zero_copy_tensor.cc
+6
-3
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+3
-0
paddle/fluid/inference/lite/engine.cc
paddle/fluid/inference/lite/engine.cc
+1
-1
paddle/fluid/inference/lite/tensor_utils.cc
paddle/fluid/inference/lite/tensor_utils.cc
+1
-1
paddle/fluid/inference/lite/test_engine_lite.cc
paddle/fluid/inference/lite/test_engine_lite.cc
+5
-5
paddle/fluid/inference/lite/test_tensor_utils.cc
paddle/fluid/inference/lite/test_tensor_utils.cc
+3
-3
paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
+1
-1
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
+1
-1
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+1
-1
未找到文件。
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
eb3050fa
...
...
@@ -18,7 +18,7 @@
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DECLARE_uint64
(
initial_gpu_memory_in_mb
);
#endif
...
...
@@ -71,7 +71,7 @@ void AnalysisConfig::SetModel(const std::string &prog_file_path,
}
void
AnalysisConfig
::
EnableUseGpu
(
uint64_t
memory_pool_init_size_mb
,
int
device_id
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
use_gpu_
=
true
;
memory_pool_init_size_mb_
=
memory_pool_init_size_mb
;
FLAGS_initial_gpu_memory_in_mb
=
memory_pool_init_size_mb_
;
...
...
@@ -214,7 +214,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
}
void
AnalysisConfig
::
EnableCUDNN
()
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
use_cudnn_
=
use_gpu_
;
#else
LOG
(
ERROR
)
<<
"Please compile with CUDA first to use cuDNN"
;
...
...
@@ -288,7 +288,7 @@ void AnalysisConfig::EnableTensorRtEngine(
int
workspace_size
,
int
max_batch_size
,
int
min_subgraph_size
,
AnalysisConfig
::
Precision
precision_mode
,
bool
use_static
,
bool
use_calib_mode
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
!
use_gpu
())
{
LOG
(
ERROR
)
<<
"To use TensorRT engine, please call EnableGpu() first"
;
return
;
...
...
@@ -384,7 +384,7 @@ void AnalysisConfig::Update() {
}
}
if
(
use_gpu
()
&&
use_cudnn_
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
!
enable_ir_optim_
)
{
LOG
(
ERROR
)
<<
"EnableCUDNN() only works when IR optimization is enabled."
;
}
else
{
...
...
@@ -526,7 +526,7 @@ void AnalysisConfig::SetCpuMathLibraryNumThreads(
}
float
AnalysisConfig
::
fraction_of_gpu_memory_for_pool
()
const
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// Get the GPU memory details and calculate the fraction of memory for the
// GPU memory pool.
size_t
gpu_total
,
gpu_available
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
eb3050fa
...
...
@@ -107,7 +107,7 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, framework::LoDTensor *t,
PADDLE_ENFORCE_EQ
(
platform
::
is_xpu_place
(
place
),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place
));
...
...
@@ -192,7 +192,7 @@ bool AnalysisPredictor::PrepareScope(
paddle
::
framework
::
InitDevices
();
scope_
.
reset
(
new
paddle
::
framework
::
Scope
(),
[](
framework
::
Scope
*
scope
)
{
delete
scope
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
for
(
int
dev_id
=
0
;
dev_id
<
paddle
::
platform
::
GetCUDADeviceCount
();
++
dev_id
)
{
memory
::
Release
(
platform
::
CUDAPlace
(
dev_id
));
...
...
@@ -244,7 +244,7 @@ bool AnalysisPredictor::CreateExecutor() {
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
gpu_device_id
());
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
config_
.
thread_local_stream_enabled
())
{
auto
*
ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
));
...
...
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
eb3050fa
...
...
@@ -63,7 +63,7 @@ TEST(AnalysisPredictor, analysis_on) {
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
SwitchIrOptim
(
true
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
config
.
EnableUseGpu
(
100
,
0
);
#else
config
.
DisableGpu
();
...
...
@@ -486,7 +486,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) {
}
#endif
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST
(
AnalysisPredictor
,
bf16_gpu_pass_strategy
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
eb3050fa
...
...
@@ -242,7 +242,7 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
platform
::
is_xpu_place
(
place_
),
false
,
platform
::
errors
::
InvalidArgument
(
"Only one choice can be made between CPU and XPU."
));
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
...
...
paddle/fluid/inference/api/api_impl_tester.cc
浏览文件 @
eb3050fa
...
...
@@ -297,7 +297,7 @@ TEST(inference_api_native, image_classification_xpu) {
}
#endif
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST
(
inference_api_native
,
word2vec_gpu
)
{
MainWord2Vec
(
paddle
::
PaddlePlace
::
kGPU
);
}
...
...
paddle/fluid/inference/api/demo_ci/vis_demo.cc
浏览文件 @
eb3050fa
...
...
@@ -20,7 +20,7 @@ limitations under the License. */
#include "gflags/gflags.h"
#include "utils.h" // NOLINT
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
#endif
DEFINE_string
(
modeldir
,
""
,
"Directory of the inference model."
);
...
...
paddle/fluid/inference/api/details/zero_copy_tensor.cc
浏览文件 @
eb3050fa
...
...
@@ -116,7 +116,7 @@ void ZeroCopyTensor::copy_from_cpu(const T *data) {
auto
*
t_data
=
tensor
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
std
::
memcpy
(
static_cast
<
void
*>
(
t_data
),
data
,
ele_size
);
}
else
if
(
place_
==
PaddlePlace
::
kGPU
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
CUDAPlace
gpu_place
(
device_
);
auto
*
t_data
=
tensor
->
mutable_data
<
T
>
(
gpu_place
);
...
...
@@ -155,15 +155,18 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
if
(
platform
::
is_cpu_place
(
t_place
))
{
std
::
memcpy
(
static_cast
<
void
*>
(
data
),
t_data
,
ele_num
*
sizeof
(
T
));
}
else
if
(
place_
==
PaddlePlace
::
kGPU
)
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
t_place
);
auto
*
dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
memory
::
Copy
(
platform
::
CPUPlace
(),
static_cast
<
void
*>
(
data
),
gpu_place
,
t_data
,
ele_num
*
sizeof
(
T
),
dev_ctx
->
stream
());
#ifdef PADDLE_WITH_HIP
hipStreamSynchronize
(
dev_ctx
->
stream
());
#else
cudaStreamSynchronize
(
dev_ctx
->
stream
());
#endif
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Not compile with CUDA, should not reach here."
));
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
eb3050fa
...
...
@@ -16,6 +16,9 @@
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <miopen/miopen.h>
#endif
#include <glog/logging.h>
#include <sstream>
...
...
paddle/fluid/inference/lite/engine.cc
浏览文件 @
eb3050fa
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#define LITE_WITH_CUDA 1
#endif
...
...
paddle/fluid/inference/lite/tensor_utils.cc
浏览文件 @
eb3050fa
...
...
@@ -123,7 +123,7 @@ void MemoryCopyAsync(const platform::Place& dst_place, void* dst_data,
if
(
platform
::
is_cpu_place
(
dst_place
)
&&
platform
::
is_cpu_place
(
src_place
))
{
memory
::
Copy
(
cpu_place
,
dst_data
,
cpu_place
,
src_data
,
size
);
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
platform
::
is_cpu_place
(
dst_place
)
&&
platform
::
is_gpu_place
(
src_place
))
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
...
...
paddle/fluid/inference/lite/test_engine_lite.cc
浏览文件 @
eb3050fa
...
...
@@ -74,7 +74,7 @@ void make_fake_model(std::string* model, std::string* param) {
*
block_
->
add_ops
()
=
*
fetch
->
Proto
();
framework
::
Scope
scope
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
#else
...
...
@@ -102,7 +102,7 @@ TEST(EngineManager, engine) {
const
std
::
string
unique_key
(
"engine_0"
);
config
.
model_from_memory
=
true
;
config
.
valid_places
=
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle
::
lite_api
::
Place
({
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)}),
#endif
paddle
::
lite_api
::
Place
({
TARGET
(
kX86
),
PRECISION
(
kFloat
)}),
...
...
paddle/fluid/inference/lite/test_tensor_utils.cc
浏览文件 @
eb3050fa
...
...
@@ -115,7 +115,7 @@ void test_tensor_copy(const platform::DeviceContext& ctx) {
// Copy to LoDTensor.
framework
::
LoDTensor
lod_tensor_n
;
TensorCopyAsync
(
&
lod_tensor_n
,
lite_api_tensor
,
ctx
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
platform
::
GpuStreamSync
(
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
());
...
...
@@ -151,7 +151,7 @@ TEST(LiteEngineOp, TensorCopyAsync) {
auto
*
ctx_cpu
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CPUPlace
());
test_tensor_copy
(
*
ctx_cpu
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto
*
ctx_gpu
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CUDAPlace
(
0
));
test_tensor_copy
(
*
ctx_gpu
);
...
...
@@ -162,7 +162,7 @@ TEST(LiteEngineOp, TensorShare) {
auto
*
ctx_cpu
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CPUPlace
());
test_tensor_share
(
*
ctx_cpu
);
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto
*
ctx_gpu
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CUDAPlace
(
0
));
test_tensor_share
(
*
ctx_gpu
);
...
...
paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
浏览文件 @
eb3050fa
...
...
@@ -163,7 +163,7 @@ TEST(Analyzer_ernie, profile_mkldnn) { profile(true, false); }
#endif
// Check the model by gpu
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST
(
Analyzer_ernie
,
profile_gpu
)
{
profile
(
false
,
true
);
}
#endif
...
...
paddle/fluid/inference/tests/api/lite_mul_model_test.cc
浏览文件 @
eb3050fa
...
...
@@ -118,7 +118,7 @@ TEST(AnalysisPredictor, lite_xpu) {
}
#endif
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST
(
AnalysisPredictor
,
thread_local_stream
)
{
const
size_t
thread_num
=
5
;
std
::
vector
<
std
::
thread
>
threads
(
thread_num
);
...
...
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
eb3050fa
...
...
@@ -168,7 +168,7 @@ void TestInference(const std::string& dirname,
if
(
paddle
::
platform
::
is_cpu_place
(
place
))
{
state
=
paddle
::
platform
::
ProfilerState
::
kCPU
;
}
else
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
state
=
paddle
::
platform
::
ProfilerState
::
kAll
;
// The default device_id of paddle::platform::CUDAPlace is 0.
// Users can get the device_id using:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录