Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
cb636a48
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
cb636a48
编写于
12月 09, 2021
作者:
J
jianghaicheng
提交者:
GitHub
12月 09, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ipu device p2 (#37840)
上级
890638cf
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
599 addition
and
14 deletion
+599
-14
paddle/fluid/eager/accumulation/gradient_accumulation.cc
paddle/fluid/eager/accumulation/gradient_accumulation.cc
+16
-0
paddle/fluid/framework/dlpack_tensor.cc
paddle/fluid/framework/dlpack_tensor.cc
+5
-0
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+8
-0
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+3
-0
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+46
-5
paddle/fluid/imperative/gradient_accumulator.cc
paddle/fluid/imperative/gradient_accumulator.cc
+7
-0
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+28
-0
paddle/fluid/operators/ipu_runtime_op.cc
paddle/fluid/operators/ipu_runtime_op.cc
+62
-0
paddle/fluid/operators/ipu_runtime_op.h
paddle/fluid/operators/ipu_runtime_op.h
+69
-0
paddle/fluid/operators/math/math_function.cc
paddle/fluid/operators/math/math_function.cc
+7
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+30
-2
paddle/fluid/platform/place.cc
paddle/fluid/platform/place.cc
+7
-0
paddle/fluid/platform/place.h
paddle/fluid/platform/place.h
+40
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+200
-1
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+15
-0
python/paddle/__init__.py
python/paddle/__init__.py
+2
-0
python/paddle/device/__init__.py
python/paddle/device/__init__.py
+46
-3
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+7
-1
python/paddle/framework/__init__.py
python/paddle/framework/__init__.py
+1
-0
未找到文件。
paddle/fluid/eager/accumulation/gradient_accumulation.cc
浏览文件 @
cb636a48
...
...
@@ -116,6 +116,22 @@ class TensorAddFunctor : public boost::static_visitor<> {
}
#endif
#ifdef PADDLE_WITH_IPU
void
operator
()(
const
paddle
::
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
#else
void
operator
()(
const
paddle
::
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
#endif
void
operator
()(
const
paddle
::
platform
::
NPUPinnedPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
...
...
paddle/fluid/framework/dlpack_tensor.cc
浏览文件 @
cb636a48
...
...
@@ -81,6 +81,11 @@ struct DLDeviceVisitor : public boost::static_visitor<::DLDevice> {
return
device
;
}
inline
::
DLDevice
operator
()(
const
platform
::
IPUPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::IPUPlace is not supported"
));
}
inline
::
DLDevice
operator
()(
const
platform
::
XPUPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::XPUPlace is not supported"
));
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
cb636a48
...
...
@@ -463,6 +463,14 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No XPU gc found in CPU/GPU paddle"
));
#endif
}
else
if
(
platform
::
is_ipu_place
(
place_
))
{
#ifdef PADDLE_WITH_IPU
gc
.
reset
(
new
IPUGarbageCollector
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
place_
),
max_memory_size
));
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No IPU gc found in CPU/IPU paddle"
));
#endif
}
else
if
(
platform
::
is_npu_place
(
place_
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
cb636a48
...
...
@@ -327,6 +327,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
#define REGISTER_OP_IPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, IPU, ::paddle::platform::IPUPlace, __VA_ARGS__)
#define REGISTER_OP_XPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)
...
...
paddle/fluid/framework/tensor_util.cc
浏览文件 @
cb636a48
...
...
@@ -76,6 +76,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_IPU
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
#endif
#ifdef PADDLE_WITH_XPU
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
...
...
@@ -386,17 +402,33 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_IPU
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
{
// NOLINT
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Copy from %s to %s is not supported."
,
src_place
,
dst_place
));
}
#endif
#ifdef PADDLE_WITH_XPU
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
XPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
}
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
<<
dst_place
;
...
...
@@ -404,7 +436,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
}
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
XPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
{
// NOLINT
}
else
{
// NOLINT
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Copy from %s to %s is not supported."
,
src_place
,
dst_place
));
}
...
...
@@ -571,6 +604,11 @@ class AnyVisitor : public boost::static_visitor<bool> {
platform
::
errors
::
Unimplemented
(
"Not supported on place (%s) "
,
npu
));
// return GetResultHelper(out, npu);
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
IPUPlace
&
ipu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Not supported on place (%s) "
,
ipu
));
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
NPUPinnedPlace
&
cpu
)
const
{
...
...
@@ -762,6 +800,9 @@ struct BothFalseVisitor : public boost::static_visitor<> {
void
VisitorImpl
(
const
platform
::
XPUPlace
&
xpu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"XPUPlace is not supported"
));
}
void
VisitorImpl
(
const
platform
::
IPUPlace
&
ipu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported"
));
}
void
VisitorImpl
(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/imperative/gradient_accumulator.cc
浏览文件 @
cb636a48
...
...
@@ -155,6 +155,13 @@ class TensorAddFunctor : public boost::static_visitor<> {
"is not supported in imperative mode"
,
place
));
}
// there is NO support in IPUPlace
void
operator
()(
const
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
private:
int64_t
numel_
;
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
cb636a48
...
...
@@ -116,6 +116,34 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
return
GetCPUBuddyAllocator
()
->
Used
();
}
// For Graphcore IPU
template
<
>
void
*
Alloc
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
VLOG
(
10
)
<<
"IPUPlace, Allocate on cpu."
;
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
10
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
,
void
*
p
,
size_t
size
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
uint64_t
Release
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Release
();
}
template
<
>
size_t
Used
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
// For kunlun XPU
template
<
>
void
*
Alloc
<
platform
::
XPUPlace
>
(
const
platform
::
XPUPlace
&
place
,
size_t
size
)
{
...
...
paddle/fluid/operators/ipu_runtime_op.cc
0 → 100644
浏览文件 @
cb636a48
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/ipu_runtime_op.h"
namespace
paddle
{
namespace
operators
{
class
IpuRuntimeOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
Type
(
ctx
.
Attr
<
int
>
(
"dtype"
)),
ctx
.
device_context
());
}
};
class
IpuRuntimeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"FeedList"
,
"FeedList of Graph"
).
AsDuplicable
();
AddOutput
(
"FetchList"
,
"FetchList of Graph"
).
AsDuplicable
();
AddAttr
<
int
>
(
"dtype"
,
"(int, default 5 (FP32)) "
"Output data type"
)
.
SetDefault
(
framework
::
proto
::
VarType
::
FP32
);
AddComment
(
R"DOC(
Run graph by PopART runtime.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
ipu_runtime
,
ops
::
IpuRuntimeOp
,
ops
::
IpuRuntimeOpMaker
);
REGISTER_OP_IPU_KERNEL
(
ipu_runtime
,
ops
::
IpuRuntimeKernel
<
float
>
,
ops
::
IpuRuntimeKernel
<
double
>
,
ops
::
IpuRuntimeKernel
<
int
>
,
ops
::
IpuRuntimeKernel
<
int64_t
>
,
ops
::
IpuRuntimeKernel
<
bool
>
,
ops
::
IpuRuntimeKernel
<
int8_t
>
,
ops
::
IpuRuntimeKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/ipu_runtime_op.h
0 → 100644
浏览文件 @
cb636a48
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/framework/ipu/ipu_backend.h"
#include "paddle/fluid/framework/tensor.h"
#endif
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
IpuRuntimeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#ifdef PADDLE_WITH_IPU
auto
ipu_backend
=
framework
::
ipu
::
IpuBackend
::
GetInstance
();
if
(
!
ipu_backend
->
DeviceIsAttached
())
{
const
platform
::
IPUDeviceContext
&
ipu_ctx
=
reinterpret_cast
<
const
platform
::
IPUDeviceContext
&>
(
ctx
.
device_context
());
ipu_backend
->
AttachDevice
(
ipu_ctx
.
DeviceId
());
}
auto
inputs
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"FeedList"
);
auto
outputs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"FetchList"
);
auto
output_names
=
ctx
.
OutputNames
(
"FetchList"
);
VLOG
(
4
)
<<
"IpuRuntime Kernel, begin to run graph"
;
ipu_backend
->
Run
(
inputs
,
outputs
,
ctx
);
// post-run
// resize tensor when tensor.dims() is empty
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
auto
*
out
=
outputs
[
i
];
if
(
out
->
dims
().
size
()
==
0
)
{
auto
tensor_dtype
=
out
->
type
();
auto
sizeof_dtype
=
framework
::
SizeOfType
(
tensor_dtype
);
int64_t
dim
=
out
->
memory_size
()
/
sizeof_dtype
;
out
->
Resize
({
dim
});
VLOG
(
10
)
<<
"set ipu_runtime_op output: "
<<
output_names
[
i
]
<<
" dims from () to: "
<<
"("
<<
dim
<<
")"
;
}
}
#else
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Please compile WITH_IPU option to enable ipu_runtime op"
));
#endif
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/math_function.cc
浏览文件 @
cb636a48
...
...
@@ -173,6 +173,13 @@ void set_constant_with_place<platform::NPUPinnedPlace>(
platform
::
errors
::
Unimplemented
(
"NPUPinnedPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
IPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
CPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
cb636a48
...
...
@@ -16,6 +16,9 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#endif
#include "glog/logging.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -96,8 +99,9 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
if
(
it
==
device_contexts_
.
end
())
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Place %s is not supported. Please check that your paddle compiles "
"with WITH_GPU, WITH_XPU or WITH_ASCEND_CL option or check that "
"your train process set the correct device id if you use Executor."
,
"with WITH_GPU, WITH_XPU, WITH_IPU or WITH_ASCEND_CL option or check "
"that your train process set the correct device id if you use "
"Executor."
,
place
));
}
return
it
->
second
.
get
().
get
();
...
...
@@ -158,6 +162,14 @@ DeviceContextPool::DeviceContextPool(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"XPUPlace is not supported. Please "
"re-compile with WITH_XPU option."
));
#endif
}
else
if
(
platform
::
is_ipu_place
(
p
))
{
#ifdef PADDLE_WITH_IPU
EmplaceDeviceContext
<
IPUDeviceContext
,
IPUPlace
>
(
&
device_contexts_
,
p
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported. Please "
"re-compile with WITH_IPU option."
));
#endif
}
else
if
(
platform
::
is_npu_place
(
p
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
@@ -195,6 +207,22 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place
CPUDeviceContext
::
GetPlace
()
const
{
return
place_
;
}
#ifdef PADDLE_WITH_IPU
IPUDeviceContext
::
IPUDeviceContext
(
IPUPlace
place
)
:
place_
(
place
)
{
int
id
=
place
.
GetDeviceId
();
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>
ipu_backend
=
platform
::
ipu
::
IpuBackend
::
GetInstance
();
device_
=
ipu_backend
->
GetDevice
(
id
);
}
Place
IPUDeviceContext
::
GetPlace
()
const
{
return
place_
;
}
void
IPUDeviceContext
::
Wait
()
const
{
/*! \brief Wait for all operations completion in the stream. */
}
IPUDeviceContext
::~
IPUDeviceContext
()
{}
#endif
#ifdef PADDLE_WITH_XPU
XPUDeviceContext
::
XPUDeviceContext
()
{
context_
=
xpu
::
create_context
();
...
...
paddle/fluid/platform/place.cc
浏览文件 @
cb636a48
...
...
@@ -36,6 +36,7 @@ class PlacePrinter : public boost::static_visitor<> {
void
operator
()(
const
XPUPlace
&
p
)
{
os_
<<
"XPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
NPUPlace
&
p
)
{
os_
<<
"NPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
NPUPinnedPlace
&
p
)
{
os_
<<
"NPUPinnedPlace"
;
}
void
operator
()(
const
IPUPlace
&
p
)
{
os_
<<
"IPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
CUDAPinnedPlace
&
p
)
{
os_
<<
"CUDAPinnedPlace"
;
}
private:
...
...
@@ -56,6 +57,10 @@ bool is_npu_place(const Place &p) {
return
boost
::
apply_visitor
(
IsNPUPlace
(),
p
);
}
bool
is_ipu_place
(
const
Place
&
p
)
{
return
boost
::
apply_visitor
(
IsIPUPlace
(),
p
);
}
bool
is_cpu_place
(
const
Place
&
p
)
{
return
boost
::
apply_visitor
(
IsCPUPlace
(),
p
);
}
...
...
@@ -80,6 +85,8 @@ bool is_same_place(const Place &p1, const Place &p2) {
return
BOOST_GET_CONST
(
XPUPlace
,
p1
)
==
BOOST_GET_CONST
(
XPUPlace
,
p2
);
}
else
if
(
is_npu_place
(
p1
))
{
return
BOOST_GET_CONST
(
NPUPlace
,
p1
)
==
BOOST_GET_CONST
(
NPUPlace
,
p2
);
}
else
if
(
is_ipu_place
(
p1
))
{
return
BOOST_GET_CONST
(
IPUPlace
,
p1
)
==
BOOST_GET_CONST
(
IPUPlace
,
p2
);
}
else
{
return
BOOST_GET_CONST
(
CUDAPlace
,
p1
)
==
BOOST_GET_CONST
(
CUDAPlace
,
p2
);
}
...
...
paddle/fluid/platform/place.h
浏览文件 @
cb636a48
...
...
@@ -95,12 +95,25 @@ struct NPUPinnedPlace {
inline
bool
operator
!=
(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
inline
bool
operator
<
(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
};
struct
IPUPlace
{
IPUPlace
()
:
IPUPlace
(
0
)
{}
explicit
IPUPlace
(
int
d
)
:
device
(
d
)
{}
inline
int
GetDeviceId
()
const
{
return
device
;
}
// needed for variant equality comparison
inline
bool
operator
==
(
const
IPUPlace
&
o
)
const
{
return
device
==
o
.
device
;
}
inline
bool
operator
!=
(
const
IPUPlace
&
o
)
const
{
return
!
(
*
this
==
o
);
}
inline
bool
operator
<
(
const
IPUPlace
&
o
)
const
{
return
device
<
o
.
device
;
}
int
device
;
};
struct
IsCUDAPlace
:
public
boost
::
static_visitor
<
bool
>
{
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -110,6 +123,7 @@ struct IsCPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -119,6 +133,7 @@ struct IsCUDAPinnedPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
cuda_pinned
)
const
{
return
true
;
}
};
...
...
@@ -128,6 +143,7 @@ struct IsXPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -137,6 +153,7 @@ struct IsNPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -145,22 +162,33 @@ struct IsNPUPinnedPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
true
;
}
};
struct
IsIPUPlace
:
public
boost
::
static_visitor
<
bool
>
{
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
};
class
Place
:
public
boost
::
variant
<
CUDAPlace
,
XPUPlace
,
NPUPlace
,
CPUPlace
,
CUDAPinnedPlace
,
NPUPinnedPlace
>
{
CUDAPinnedPlace
,
NPUPinnedPlace
,
IPUPlace
>
{
private:
using
PlaceBase
=
boost
::
variant
<
CUDAPlace
,
XPUPlace
,
NPUPlace
,
CPUPlace
,
CUDAPinnedPlace
,
NPUPinnedPlace
>
;
CUDAPinnedPlace
,
NPUPinnedPlace
,
IPUPlace
>
;
public:
Place
()
=
default
;
Place
(
const
CPUPlace
&
cpu_place
)
:
PlaceBase
(
cpu_place
)
{}
// NOLINT
Place
(
const
XPUPlace
&
xpu_place
)
:
PlaceBase
(
xpu_place
)
{}
// NOLINT
Place
(
const
NPUPlace
&
npu_place
)
:
PlaceBase
(
npu_place
)
{}
// NOLINT
Place
(
const
IPUPlace
&
ipu_place
)
:
PlaceBase
(
ipu_place
)
{}
// NOLINT
Place
(
const
CUDAPlace
&
cuda_place
)
:
PlaceBase
(
cuda_place
)
{}
// NOLINT
Place
(
const
CUDAPinnedPlace
&
cuda_pinned_place
)
// NOLINT
:
PlaceBase
(
cuda_pinned_place
)
{}
...
...
@@ -180,6 +208,7 @@ using PlaceList = std::vector<Place>;
bool
is_gpu_place
(
const
Place
&
);
bool
is_xpu_place
(
const
Place
&
);
bool
is_npu_place
(
const
Place
&
);
bool
is_ipu_place
(
const
Place
&
);
bool
is_cpu_place
(
const
Place
&
);
bool
is_cuda_pinned_place
(
const
Place
&
);
bool
is_npu_pinned_place
(
const
Place
&
);
...
...
@@ -228,6 +257,15 @@ struct PlaceVisitorWrapper
return
typename
Visitor
::
result_type
();
#endif
}
typename
Visitor
::
result_type
operator
()(
const
IPUPlace
&
ipu
)
const
{
#ifdef PADDLE_WITH_IPU
return
visitor_
(
ipu
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with IPU. Cannot visit ipu device"
));
return
typename
Visitor
::
result_type
();
#endif
}
typename
Visitor
::
result_type
operator
()(
const
CUDAPlace
&
cuda
)
const
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
cb636a48
...
...
@@ -132,6 +132,10 @@ limitations under the License. */
#endif
#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h"
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#include "paddle/fluid/platform/ipu_info.h"
#endif
#ifdef PADDLE_WITH_CRYPTO
#include "paddle/fluid/pybind/crypto.h"
...
...
@@ -201,6 +205,14 @@ bool IsCompiledWithNPU() {
#endif
}
bool
IsCompiledWithIPU
()
{
#ifndef PADDLE_WITH_IPU
return
false
;
#else
return
true
;
#endif
}
bool
IsCompiledWithMKLDNN
()
{
#ifndef PADDLE_WITH_MKLDNN
return
false
;
...
...
@@ -816,6 +828,8 @@ PYBIND11_MODULE(core_noavx, m) {
py
::
arg
(
"array"
),
py
::
arg
(
"place"
),
py
::
arg
(
"zero_copy"
)
=
false
)
.
def
(
"set"
,
SetTensorFromPyArray
<
paddle
::
platform
::
NPUPlace
>
,
py
::
arg
(
"array"
),
py
::
arg
(
"place"
),
py
::
arg
(
"zero_copy"
)
=
false
)
.
def
(
"set"
,
SetTensorFromPyArray
<
paddle
::
platform
::
IPUPlace
>
,
py
::
arg
(
"array"
),
py
::
arg
(
"place"
),
py
::
arg
(
"zero_copy"
)
=
false
)
.
def
(
"set"
,
SetTensorFromPyArray
<
paddle
::
platform
::
CUDAPinnedPlace
>
,
py
::
arg
(
"array"
),
py
::
arg
(
"place"
),
py
::
arg
(
"zero_copy"
)
=
false
,
R"DOC(
...
...
@@ -823,7 +837,7 @@ PYBIND11_MODULE(core_noavx, m) {
Args:
lod (numpy.ndarray): The data to set.
place (CPUPlace|CUDAPlace|XPUPlace|CUDAPinnedPlace|NPUPlace): The place where the
place (CPUPlace|CUDAPlace|XPUPlace|
IPUPlace|
CUDAPinnedPlace|NPUPlace): The place where the
LoDTensor is to be set.
zero_copy (bool, optional): Whether to share memory with the input numpy array.
This parameter only works with CPUPlace. Default: False.
...
...
@@ -1913,6 +1927,58 @@ All parameter, weight, gradient are variables in Paddle.
[](
const
platform
::
NPUPlace
&
self
)
{
return
self
.
GetDeviceId
();
})
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
NPUPlace
&>
);
// IPUPlace
py
::
class_
<
platform
::
IPUPlace
>
(
m
,
"IPUPlace"
,
R"DOC(
IPUPlace is a descriptor of a device.
It represents a IPU device on which a tensor will be allocated and a model will run.
Examples:
.. code-block:: python
import paddle
# required: ipu
ipu_place = paddle.IPUPlace()
)DOC"
)
.
def
(
"__init__"
,
[](
platform
::
IPUPlace
&
self
)
{
#ifdef PADDLE_WITH_IPU
if
(
platform
::
GetIPUDeviceCount
()
==
0
)
{
LOG
(
ERROR
)
<<
"Cannot use IPU because there is no IPU "
"detected on your "
"machine."
;
std
::
exit
(
-
1
);
}
// use ipu(0) to comile, while run with the number user configure
// in sharding and pipline.
new
(
&
self
)
platform
::
IPUPlace
(
0
);
#else
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"Cannot use IPU because you didn't install IPU version "
"PaddlePaddle.
\n
"
"If you want to use IPU, please try to install IPU version "
"PaddlePaddle by: pip install paddlepaddle*
\n
"
"If you only have CPU, please change IPUPlace to be "
"CPUPlace().
\n
"
);
std
::
exit
(
-
1
);
#endif
})
.
def
(
"_type"
,
&
PlaceIndex
<
platform
::
IPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
Place
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
CUDAPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
CPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
XPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
NPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
IPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
CUDAPinnedPlace
>
)
#ifdef PADDLE_WITH_IPU
.
def
(
"get_device_id"
,
[](
const
platform
::
IPUPlace
&
self
)
{
return
self
.
GetDeviceId
();
})
#endif
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
IPUPlace
&>
);
py
::
class_
<
platform
::
Place
>
platformplace
(
m
,
"Place"
);
g_place_pytype
=
reinterpret_cast
<
PyTypeObject
*>
(
platformplace
.
ptr
());
platformplace
.
def
(
py
::
init
<>
())
...
...
@@ -1922,6 +1988,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
CPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
XPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
NPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
IPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
CUDAPinnedPlace
>
)
.
def
(
"is_gpu_place"
,
[](
platform
::
Place
&
self
)
{
return
platform
::
is_gpu_place
(
self
);
})
...
...
@@ -1931,6 +1998,8 @@ All parameter, weight, gradient are variables in Paddle.
[](
platform
::
Place
&
self
)
{
return
platform
::
is_xpu_place
(
self
);
})
.
def
(
"is_npu_place"
,
[](
platform
::
Place
&
self
)
{
return
platform
::
is_npu_place
(
self
);
})
.
def
(
"is_ipu_place"
,
[](
platform
::
Place
&
self
)
{
return
platform
::
is_ipu_place
(
self
);
})
.
def
(
"is_cuda_pinned_place"
,
[](
platform
::
Place
&
self
)
{
return
platform
::
is_cuda_pinned_place
(
self
);
...
...
@@ -1947,6 +2016,10 @@ All parameter, weight, gradient are variables in Paddle.
[](
platform
::
Place
&
self
)
{
return
BOOST_GET_CONST
(
platform
::
NPUPlace
,
self
).
device
;
})
.
def
(
"ipu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
BOOST_GET_CONST
(
platform
::
IPUPlace
,
self
).
device
;
})
.
def
(
"set_place"
,
[](
platform
::
Place
&
self
,
const
platform
::
Place
&
other
)
{
self
=
other
;
})
.
def
(
"set_place"
,
...
...
@@ -1970,6 +2043,10 @@ All parameter, weight, gradient are variables in Paddle.
[](
platform
::
Place
&
self
,
const
platform
::
NPUPlace
&
npu_place
)
{
self
=
npu_place
;
})
.
def
(
"set_place"
,
[](
platform
::
Place
&
self
,
const
platform
::
IPUPlace
&
ipu_place
)
{
self
=
ipu_place
;
})
.
def
(
"__repr__"
,
string
::
to_string
<
const
platform
::
Place
&>
)
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
Place
&>
);
...
...
@@ -2201,6 +2278,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"is_compiled_with_ascend"
,
IsCompiledWithAscend
);
m
.
def
(
"is_compiled_with_rocm"
,
IsCompiledWithROCM
);
m
.
def
(
"is_compiled_with_npu"
,
IsCompiledWithNPU
);
m
.
def
(
"is_compiled_with_ipu"
,
IsCompiledWithIPU
);
m
.
def
(
"is_compiled_with_xpu"
,
IsCompiledWithXPU
);
m
.
def
(
"is_compiled_with_mkldnn"
,
IsCompiledWithMKLDNN
);
m
.
def
(
"is_compiled_with_cinn"
,
IsCompiledWithCINN
);
...
...
@@ -2520,6 +2598,10 @@ All parameter, weight, gradient are variables in Paddle.
});
#endif
#ifdef PADDLE_WITH_IPU
m
.
def
(
"get_ipu_device_count"
,
platform
::
GetIPUDeviceCount
);
#endif
py
::
enum_
<
platform
::
TracerOption
>
(
m
,
"TracerOption"
,
py
::
arithmetic
())
.
value
(
"kDefault"
,
platform
::
TracerOption
::
kDefault
)
.
value
(
"kOpDetail"
,
platform
::
TracerOption
::
kOpDetail
)
...
...
@@ -2597,6 +2679,11 @@ All parameter, weight, gradient are variables in Paddle.
bool
val
)
{
self
.
Set
<
bool
>
(
name
,
new
bool
(
val
));
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
std
::
vector
<
std
::
string
>
set
)
{
self
.
Set
(
name
,
new
std
::
vector
<
std
::
string
>
(
set
));
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
std
::
unordered_set
<
std
::
string
>
set
)
{
...
...
@@ -3429,6 +3516,118 @@ All parameter, weight, gradient are variables in Paddle.
})
.
def
(
"device_count"
,
&
ParallelExecutor
::
DeviceCount
);
#ifdef PADDLE_WITH_IPU
py
::
class_
<
platform
::
ipu
::
IpuBackend
,
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>>
(
m
,
"IpuBackend"
)
.
def
(
py
::
init
(
&
platform
::
ipu
::
IpuBackend
::
GetNewInstance
))
.
def
(
"clear"
,
&
platform
::
ipu
::
IpuBackend
::
Clear
)
.
def
(
"set_scope"
,
&
platform
::
ipu
::
IpuBackend
::
SetScope
)
.
def
(
"set_ipu_strategy"
,
&
platform
::
ipu
::
IpuBackend
::
SetIpuStrategy
);
py
::
class_
<
platform
::
ipu
::
IpuStrategy
>
(
m
,
"IpuStrategy"
)
.
def
(
py
::
init
())
.
def_property
(
"num_ipus"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
num_ipus
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
int
num_ipus
)
{
self
.
num_ipus
=
num_ipus
;
},
R"DOC(
Int type, set the number ipu we need. Default 1.
)DOC"
)
.
def_property
(
"accumulationFactor"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
popart_options_
.
accumulationFactor
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
int
accumulationFactor
)
{
self
.
popart_options_
.
accumulationFactor
=
accumulationFactor
;
},
R"DOC(
Specify the number of micro-batches to accumulate before
applying the varUpdate. Default 1.
)DOC"
)
.
def_property
(
"batches_per_step"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
batches_per_step
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
int
batches_per_step
)
{
self
.
batches_per_step
=
batches_per_step
;
},
R"DOC(
Int type, set batches_per_step. Default 1.
)DOC"
)
.
def_property
(
"is_training"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
is_training
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
is_training
)
{
self
.
is_training
=
is_training
;
},
R"DOC(
Bool type, True for training, False inference. Default True.
)DOC"
)
.
def_property
(
"enable_pipelining"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
popart_options_
.
enablePipelining
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
enable_pipelining
)
{
self
.
popart_options_
.
enablePipelining
=
enable_pipelining
;
},
R"DOC(
Bool type, True enable pipeline, otherwise disable. Default False.
)DOC"
)
.
def_property
(
"enable_manual_shard"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
popart_options_
.
virtualGraphMode
==
platform
::
ipu
::
VirtualGraphMode
::
Manual
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
enable_ipu_shard
)
{
if
(
enable_ipu_shard
)
{
self
.
popart_options_
.
virtualGraphMode
=
platform
::
ipu
::
VirtualGraphMode
::
Manual
;
}
else
{
self
.
popart_options_
.
virtualGraphMode
=
platform
::
ipu
::
VirtualGraphMode
::
Off
;
}
},
R"DOC(
Bool type, True enable model sharding, otherwise disable. Default "
"False.
)DOC"
)
.
def_property
(
"need_avg_shard"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
need_avg_shard
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
need_avg_shard
)
{
self
.
need_avg_shard
=
need_avg_shard
;
},
R"DOC(
Bool type, True enable avg shard, otherwise disable. Default False.
)DOC"
)
.
def_property
(
"batch_size"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
batch_size
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
int
batch_size
)
{
self
.
batch_size
=
batch_size
;
},
R"DOC(
Int type, used to make batch size fixed. Default 1.
)DOC"
)
.
def_property
(
"enable_fp16"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
enable_fp16
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
enable_fp16
)
{
self
.
enable_fp16
=
enable_fp16
;
},
R"DOC(
Bool type, True enable float16 mode, otherwise disable. Default False.)DOC"
);
#endif
BindFleetWrapper
(
&
m
);
BindIO
(
&
m
);
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
cb636a48
...
...
@@ -313,6 +313,21 @@ void SetTensorFromPyArrayT(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Cannot use XPUPlace in CPU/GPU version, "
"Please recompile or reinstall Paddle with XPU support."
));
#endif
}
else
if
(
paddle
::
platform
::
is_ipu_place
(
place
))
{
#ifdef PADDLE_WITH_IPU
if
(
zero_copy
)
{
auto
holder
=
std
::
make_shared
<
details
::
NumpyAllocation
<
T
>>
(
array
);
auto
type
=
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
)));
self
->
ResetHolderWithType
(
holder
,
type
);
}
else
{
auto
dst
=
self
->
mutable_data
<
T
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
array
.
nbytes
());
}
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Cannot use IPUPlace in CPU/GPU/XPU/NPU version, "
"Please recompile or reinstall Paddle with IPU support."
));
#endif
}
else
if
(
paddle
::
platform
::
is_npu_place
(
place
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
python/paddle/__init__.py
浏览文件 @
cb636a48
...
...
@@ -260,6 +260,7 @@ from .framework.random import set_cuda_rng_state # noqa: F401
from
.framework
import
ParamAttr
# noqa: F401
from
.framework
import
create_parameter
# noqa: F401
from
.framework
import
CPUPlace
# noqa: F401
from
.framework
import
IPUPlace
# noqa: F401
from
.framework
import
CUDAPlace
# noqa: F401
from
.framework
import
NPUPlace
# noqa: F401
from
.framework
import
CUDAPinnedPlace
# noqa: F401
...
...
@@ -291,6 +292,7 @@ from .fluid.framework import get_flags # noqa: F401
from
.fluid.framework
import
set_flags
# noqa: F401
from
.device
import
is_compiled_with_xpu
# noqa: F401
from
.device
import
is_compiled_with_npu
# noqa: F401
from
.device
import
is_compiled_with_ipu
# noqa: F401
from
.device
import
XPUPlace
# noqa: F401
from
.fluid.dygraph.base
import
enable_dygraph
as
disable_static
# noqa: F401
...
...
python/paddle/device/__init__.py
浏览文件 @
cb636a48
...
...
@@ -28,7 +28,9 @@ __all__ = [ # noqa
'set_device'
,
'get_device'
,
'XPUPlace'
,
'IPUPlace'
,
'is_compiled_with_xpu'
,
'is_compiled_with_ipu'
,
'is_compiled_with_cinn'
,
'is_compiled_with_cuda'
,
'is_compiled_with_rocm'
,
...
...
@@ -55,6 +57,36 @@ def is_compiled_with_npu():
return
core
.
is_compiled_with_npu
()
def
is_compiled_with_ipu
():
"""
Whether paddle was built with WITH_IPU=ON to support Graphcore IPU.
Returns (bool): `True` if IPU is supported, otherwise `False`.
Examples:
.. code-block:: python
import paddle
support_ipu = paddle.is_compiled_with_ipu()
"""
return
core
.
is_compiled_with_ipu
()
def
IPUPlace
():
"""
Return a Graphcore IPU Place
Examples:
.. code-block:: python
# required: ipu
import paddle
place = paddle.device.IPUPlace()
"""
return
core
.
IPUPlace
()
def
is_compiled_with_xpu
():
"""
Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun
...
...
@@ -143,13 +175,19 @@ def _convert_to_place(device):
selected_npus
=
os
.
getenv
(
"FLAGS_selected_npus"
,
"0"
).
split
(
","
)
device_id
=
int
(
selected_npus
[
0
])
place
=
core
.
NPUPlace
(
device_id
)
elif
lower_device
==
'ipu'
:
if
not
core
.
is_compiled_with_ipu
():
raise
ValueError
(
"The device should not be 'ipu', "
\
"since PaddlePaddle is not compiled with IPU"
)
place
=
core
.
IPUPlace
()
else
:
avaliable_gpu_device
=
re
.
match
(
r
'gpu:\d+'
,
lower_device
)
avaliable_xpu_device
=
re
.
match
(
r
'xpu:\d+'
,
lower_device
)
avaliable_npu_device
=
re
.
match
(
r
'npu:\d+'
,
lower_device
)
if
not
avaliable_gpu_device
and
not
avaliable_xpu_device
and
not
avaliable_npu_device
:
raise
ValueError
(
"The device must be a string which is like 'cpu', 'gpu', 'gpu:x', 'xpu', 'xpu:x', 'npu'
or 'npu:x'
"
"The device must be a string which is like 'cpu', 'gpu', 'gpu:x', 'xpu', 'xpu:x', 'npu'
, 'npu:x' or ipu
"
)
if
avaliable_gpu_device
:
if
not
core
.
is_compiled_with_cuda
():
...
...
@@ -183,13 +221,13 @@ def _convert_to_place(device):
def
set_device
(
device
):
"""
Paddle supports running calculations on various types of devices, including CPU, GPU, XPU
and N
PU.
Paddle supports running calculations on various types of devices, including CPU, GPU, XPU
, NPU and I
PU.
They are represented by string identifiers. This function can specify the global device
which the OP will run.
Parameters:
device(str): This parameter determines the specific running device.
It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``
and ``npu:x
``,
It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``
, ``npu:x`` and ``ipu
``,
where ``x`` is the index of the GPUs, XPUs or NPUs.
Examples:
...
...
@@ -236,5 +274,10 @@ def get_device():
elif
isinstance
(
place
,
core
.
NPUPlace
):
device_id
=
place
.
get_device_id
()
device
=
'npu:'
+
str
(
device_id
)
elif
isinstance
(
place
,
core
.
IPUPlace
):
num_devices
=
core
.
get_ipu_device_count
()
device
=
"ipus:{{0-{}}}"
.
format
(
num_devices
-
1
)
else
:
raise
ValueError
(
"The device specification {} is invalid"
.
format
(
place
))
return
device
python/paddle/fluid/__init__.py
浏览文件 @
cb636a48
...
...
@@ -71,7 +71,7 @@ from . import distribute_lookup_table
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.data_feeder
import
DataFeeder
from
.core
import
LoDTensor
,
LoDTensorArray
,
Scope
,
_Scope
from
.core
import
CPUPlace
,
XPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
NPUPlace
from
.core
import
CPUPlace
,
XPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
NPUPlace
,
IPUPlace
from
.incubate
import
fleet
from
.transpiler
import
DistributeTranspiler
,
\
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
...
...
@@ -132,6 +132,7 @@ __all__ = framework.__all__ + executor.__all__ + \
'CUDAPlace'
,
'CUDAPinnedPlace'
,
'NPUPlace'
,
'IPUPlace'
,
'Tensor'
,
'ParamAttr'
,
'WeightNormParamAttr'
,
...
...
@@ -197,6 +198,11 @@ def __bootstrap__():
if
os
.
name
==
'nt'
:
remove_flag_if_exists
(
'cpu_deterministic'
)
if
core
.
is_compiled_with_ipu
():
# Currently we request all ipu available for training and testing
# finer control of pod of IPUs will be added later
read_env_flags
+=
[]
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
# Note(zhouwei25): sys may not have argv in some cases,
# Such as: use Python/C API to call Python from C++
...
...
python/paddle/framework/__init__.py
浏览文件 @
cb636a48
...
...
@@ -23,6 +23,7 @@ from .framework import set_grad_enabled # noqa: F401
from
..fluid.param_attr
import
ParamAttr
# noqa: F401
from
..fluid.layers.tensor
import
create_parameter
# noqa: F401
from
..fluid.core
import
CPUPlace
# noqa: F401
from
..fluid.core
import
IPUPlace
# noqa: F401
from
..fluid.core
import
CUDAPlace
# noqa: F401
from
..fluid.core
import
CUDAPinnedPlace
# noqa: F401
from
..fluid.core
import
NPUPlace
# noqa: F401
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录