Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
cb636a48
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
cb636a48
编写于
12月 09, 2021
作者:
J
jianghaicheng
提交者:
GitHub
12月 09, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ipu device p2 (#37840)
上级
890638cf
变更
19
显示空白变更内容
内联
并排
Showing
19 changed file
with
599 addition
and
14 deletion
+599
-14
paddle/fluid/eager/accumulation/gradient_accumulation.cc
paddle/fluid/eager/accumulation/gradient_accumulation.cc
+16
-0
paddle/fluid/framework/dlpack_tensor.cc
paddle/fluid/framework/dlpack_tensor.cc
+5
-0
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+8
-0
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+3
-0
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+46
-5
paddle/fluid/imperative/gradient_accumulator.cc
paddle/fluid/imperative/gradient_accumulator.cc
+7
-0
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+28
-0
paddle/fluid/operators/ipu_runtime_op.cc
paddle/fluid/operators/ipu_runtime_op.cc
+62
-0
paddle/fluid/operators/ipu_runtime_op.h
paddle/fluid/operators/ipu_runtime_op.h
+69
-0
paddle/fluid/operators/math/math_function.cc
paddle/fluid/operators/math/math_function.cc
+7
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+30
-2
paddle/fluid/platform/place.cc
paddle/fluid/platform/place.cc
+7
-0
paddle/fluid/platform/place.h
paddle/fluid/platform/place.h
+40
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+200
-1
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+15
-0
python/paddle/__init__.py
python/paddle/__init__.py
+2
-0
python/paddle/device/__init__.py
python/paddle/device/__init__.py
+46
-3
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+7
-1
python/paddle/framework/__init__.py
python/paddle/framework/__init__.py
+1
-0
未找到文件。
paddle/fluid/eager/accumulation/gradient_accumulation.cc
浏览文件 @
cb636a48
...
...
@@ -116,6 +116,22 @@ class TensorAddFunctor : public boost::static_visitor<> {
}
#endif
#ifdef PADDLE_WITH_IPU
void
operator
()(
const
paddle
::
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
#else
void
operator
()(
const
paddle
::
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
#endif
void
operator
()(
const
paddle
::
platform
::
NPUPinnedPlace
&
place
)
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
...
...
paddle/fluid/framework/dlpack_tensor.cc
浏览文件 @
cb636a48
...
...
@@ -81,6 +81,11 @@ struct DLDeviceVisitor : public boost::static_visitor<::DLDevice> {
return
device
;
}
inline
::
DLDevice
operator
()(
const
platform
::
IPUPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::IPUPlace is not supported"
));
}
inline
::
DLDevice
operator
()(
const
platform
::
XPUPlace
&
place
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"platform::XPUPlace is not supported"
));
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
cb636a48
...
...
@@ -463,6 +463,14 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No XPU gc found in CPU/GPU paddle"
));
#endif
}
else
if
(
platform
::
is_ipu_place
(
place_
))
{
#ifdef PADDLE_WITH_IPU
gc
.
reset
(
new
IPUGarbageCollector
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
place_
),
max_memory_size
));
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"No IPU gc found in CPU/IPU paddle"
));
#endif
}
else
if
(
platform
::
is_npu_place
(
place_
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
cb636a48
...
...
@@ -327,6 +327,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
#define REGISTER_OP_IPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, IPU, ::paddle::platform::IPUPlace, __VA_ARGS__)
#define REGISTER_OP_XPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, XPU, ::paddle::platform::XPUPlace, __VA_ARGS__)
...
...
paddle/fluid/framework/tensor_util.cc
浏览文件 @
cb636a48
...
...
@@ -76,6 +76,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_IPU
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
#endif
#ifdef PADDLE_WITH_XPU
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
...
...
@@ -386,16 +402,32 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
#ifdef PADDLE_WITH_IPU
else
if
(
platform
::
is_ipu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
IPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_ipu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
IPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
{
// NOLINT
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Copy from %s to %s is not supported."
,
src_place
,
dst_place
));
}
#endif
#ifdef PADDLE_WITH_XPU
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
CPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
XPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
}
else
if
(
platform
::
is_cpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
CPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
}
else
if
(
platform
::
is_xpu_place
(
src_place
)
&&
// NOLINT
platform
::
is_xpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
...
...
@@ -404,7 +436,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
}
memory
::
Copy
(
BOOST_GET_CONST
(
platform
::
XPUPlace
,
dst_place
),
dst_ptr
,
BOOST_GET_CONST
(
platform
::
XPUPlace
,
src_place
),
src_ptr
,
size
);
}
else
{
// NOLINT
}
else
{
// NOLINT
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Copy from %s to %s is not supported."
,
src_place
,
dst_place
));
}
...
...
@@ -571,6 +604,11 @@ class AnyVisitor : public boost::static_visitor<bool> {
platform
::
errors
::
Unimplemented
(
"Not supported on place (%s) "
,
npu
));
// return GetResultHelper(out, npu);
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
IPUPlace
&
ipu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Not supported on place (%s) "
,
ipu
));
}
bool
GetResult
(
const
framework
::
Tensor
&
out
,
const
platform
::
NPUPinnedPlace
&
cpu
)
const
{
...
...
@@ -762,6 +800,9 @@ struct BothFalseVisitor : public boost::static_visitor<> {
void
VisitorImpl
(
const
platform
::
XPUPlace
&
xpu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"XPUPlace is not supported"
));
}
void
VisitorImpl
(
const
platform
::
IPUPlace
&
ipu
)
const
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported"
));
}
void
VisitorImpl
(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/imperative/gradient_accumulator.cc
浏览文件 @
cb636a48
...
...
@@ -155,6 +155,13 @@ class TensorAddFunctor : public boost::static_visitor<> {
"is not supported in imperative mode"
,
place
));
}
// there is NO support in IPUPlace
void
operator
()(
const
platform
::
IPUPlace
&
place
)
{
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode"
,
place
));
}
private:
int64_t
numel_
;
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
cb636a48
...
...
@@ -116,6 +116,34 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
return
GetCPUBuddyAllocator
()
->
Used
();
}
// For Graphcore IPU
template
<
>
void
*
Alloc
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
VLOG
(
10
)
<<
"IPUPlace, Allocate on cpu."
;
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
10
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
,
void
*
p
,
size_t
size
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
uint64_t
Release
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Release
();
}
template
<
>
size_t
Used
<
platform
::
IPUPlace
>
(
const
platform
::
IPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
// For kunlun XPU
template
<
>
void
*
Alloc
<
platform
::
XPUPlace
>
(
const
platform
::
XPUPlace
&
place
,
size_t
size
)
{
...
...
paddle/fluid/operators/ipu_runtime_op.cc
0 → 100644
浏览文件 @
cb636a48
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/ipu_runtime_op.h"
namespace
paddle
{
namespace
operators
{
class
IpuRuntimeOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
Type
(
ctx
.
Attr
<
int
>
(
"dtype"
)),
ctx
.
device_context
());
}
};
class
IpuRuntimeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"FeedList"
,
"FeedList of Graph"
).
AsDuplicable
();
AddOutput
(
"FetchList"
,
"FetchList of Graph"
).
AsDuplicable
();
AddAttr
<
int
>
(
"dtype"
,
"(int, default 5 (FP32)) "
"Output data type"
)
.
SetDefault
(
framework
::
proto
::
VarType
::
FP32
);
AddComment
(
R"DOC(
Run graph by PopART runtime.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
ipu_runtime
,
ops
::
IpuRuntimeOp
,
ops
::
IpuRuntimeOpMaker
);
REGISTER_OP_IPU_KERNEL
(
ipu_runtime
,
ops
::
IpuRuntimeKernel
<
float
>
,
ops
::
IpuRuntimeKernel
<
double
>
,
ops
::
IpuRuntimeKernel
<
int
>
,
ops
::
IpuRuntimeKernel
<
int64_t
>
,
ops
::
IpuRuntimeKernel
<
bool
>
,
ops
::
IpuRuntimeKernel
<
int8_t
>
,
ops
::
IpuRuntimeKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/ipu_runtime_op.h
0 → 100644
浏览文件 @
cb636a48
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/framework/ipu/ipu_backend.h"
#include "paddle/fluid/framework/tensor.h"
#endif
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
IpuRuntimeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
#ifdef PADDLE_WITH_IPU
auto
ipu_backend
=
framework
::
ipu
::
IpuBackend
::
GetInstance
();
if
(
!
ipu_backend
->
DeviceIsAttached
())
{
const
platform
::
IPUDeviceContext
&
ipu_ctx
=
reinterpret_cast
<
const
platform
::
IPUDeviceContext
&>
(
ctx
.
device_context
());
ipu_backend
->
AttachDevice
(
ipu_ctx
.
DeviceId
());
}
auto
inputs
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"FeedList"
);
auto
outputs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"FetchList"
);
auto
output_names
=
ctx
.
OutputNames
(
"FetchList"
);
VLOG
(
4
)
<<
"IpuRuntime Kernel, begin to run graph"
;
ipu_backend
->
Run
(
inputs
,
outputs
,
ctx
);
// post-run
// resize tensor when tensor.dims() is empty
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
auto
*
out
=
outputs
[
i
];
if
(
out
->
dims
().
size
()
==
0
)
{
auto
tensor_dtype
=
out
->
type
();
auto
sizeof_dtype
=
framework
::
SizeOfType
(
tensor_dtype
);
int64_t
dim
=
out
->
memory_size
()
/
sizeof_dtype
;
out
->
Resize
({
dim
});
VLOG
(
10
)
<<
"set ipu_runtime_op output: "
<<
output_names
[
i
]
<<
" dims from () to: "
<<
"("
<<
dim
<<
")"
;
}
}
#else
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Please compile WITH_IPU option to enable ipu_runtime op"
));
#endif
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/math_function.cc
浏览文件 @
cb636a48
...
...
@@ -173,6 +173,13 @@ void set_constant_with_place<platform::NPUPinnedPlace>(
platform
::
errors
::
Unimplemented
(
"NPUPinnedPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
IPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
CPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
cb636a48
...
...
@@ -16,6 +16,9 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#endif
#include "glog/logging.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -96,8 +99,9 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
if
(
it
==
device_contexts_
.
end
())
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Place %s is not supported. Please check that your paddle compiles "
"with WITH_GPU, WITH_XPU or WITH_ASCEND_CL option or check that "
"your train process set the correct device id if you use Executor."
,
"with WITH_GPU, WITH_XPU, WITH_IPU or WITH_ASCEND_CL option or check "
"that your train process set the correct device id if you use "
"Executor."
,
place
));
}
return
it
->
second
.
get
().
get
();
...
...
@@ -158,6 +162,14 @@ DeviceContextPool::DeviceContextPool(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"XPUPlace is not supported. Please "
"re-compile with WITH_XPU option."
));
#endif
}
else
if
(
platform
::
is_ipu_place
(
p
))
{
#ifdef PADDLE_WITH_IPU
EmplaceDeviceContext
<
IPUDeviceContext
,
IPUPlace
>
(
&
device_contexts_
,
p
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported. Please "
"re-compile with WITH_IPU option."
));
#endif
}
else
if
(
platform
::
is_npu_place
(
p
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
@@ -195,6 +207,22 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place
CPUDeviceContext
::
GetPlace
()
const
{
return
place_
;
}
#ifdef PADDLE_WITH_IPU
IPUDeviceContext
::
IPUDeviceContext
(
IPUPlace
place
)
:
place_
(
place
)
{
int
id
=
place
.
GetDeviceId
();
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>
ipu_backend
=
platform
::
ipu
::
IpuBackend
::
GetInstance
();
device_
=
ipu_backend
->
GetDevice
(
id
);
}
Place
IPUDeviceContext
::
GetPlace
()
const
{
return
place_
;
}
void
IPUDeviceContext
::
Wait
()
const
{
/*! \brief Wait for all operations completion in the stream. */
}
IPUDeviceContext
::~
IPUDeviceContext
()
{}
#endif
#ifdef PADDLE_WITH_XPU
XPUDeviceContext
::
XPUDeviceContext
()
{
context_
=
xpu
::
create_context
();
...
...
paddle/fluid/platform/place.cc
浏览文件 @
cb636a48
...
...
@@ -36,6 +36,7 @@ class PlacePrinter : public boost::static_visitor<> {
void
operator
()(
const
XPUPlace
&
p
)
{
os_
<<
"XPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
NPUPlace
&
p
)
{
os_
<<
"NPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
NPUPinnedPlace
&
p
)
{
os_
<<
"NPUPinnedPlace"
;
}
void
operator
()(
const
IPUPlace
&
p
)
{
os_
<<
"IPUPlace("
<<
p
.
device
<<
")"
;
}
void
operator
()(
const
CUDAPinnedPlace
&
p
)
{
os_
<<
"CUDAPinnedPlace"
;
}
private:
...
...
@@ -56,6 +57,10 @@ bool is_npu_place(const Place &p) {
return
boost
::
apply_visitor
(
IsNPUPlace
(),
p
);
}
bool
is_ipu_place
(
const
Place
&
p
)
{
return
boost
::
apply_visitor
(
IsIPUPlace
(),
p
);
}
bool
is_cpu_place
(
const
Place
&
p
)
{
return
boost
::
apply_visitor
(
IsCPUPlace
(),
p
);
}
...
...
@@ -80,6 +85,8 @@ bool is_same_place(const Place &p1, const Place &p2) {
return
BOOST_GET_CONST
(
XPUPlace
,
p1
)
==
BOOST_GET_CONST
(
XPUPlace
,
p2
);
}
else
if
(
is_npu_place
(
p1
))
{
return
BOOST_GET_CONST
(
NPUPlace
,
p1
)
==
BOOST_GET_CONST
(
NPUPlace
,
p2
);
}
else
if
(
is_ipu_place
(
p1
))
{
return
BOOST_GET_CONST
(
IPUPlace
,
p1
)
==
BOOST_GET_CONST
(
IPUPlace
,
p2
);
}
else
{
return
BOOST_GET_CONST
(
CUDAPlace
,
p1
)
==
BOOST_GET_CONST
(
CUDAPlace
,
p2
);
}
...
...
paddle/fluid/platform/place.h
浏览文件 @
cb636a48
...
...
@@ -95,12 +95,25 @@ struct NPUPinnedPlace {
inline
bool
operator
!=
(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
inline
bool
operator
<
(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
};
struct
IPUPlace
{
IPUPlace
()
:
IPUPlace
(
0
)
{}
explicit
IPUPlace
(
int
d
)
:
device
(
d
)
{}
inline
int
GetDeviceId
()
const
{
return
device
;
}
// needed for variant equality comparison
inline
bool
operator
==
(
const
IPUPlace
&
o
)
const
{
return
device
==
o
.
device
;
}
inline
bool
operator
!=
(
const
IPUPlace
&
o
)
const
{
return
!
(
*
this
==
o
);
}
inline
bool
operator
<
(
const
IPUPlace
&
o
)
const
{
return
device
<
o
.
device
;
}
int
device
;
};
struct
IsCUDAPlace
:
public
boost
::
static_visitor
<
bool
>
{
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -110,6 +123,7 @@ struct IsCPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -119,6 +133,7 @@ struct IsCUDAPinnedPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
cuda_pinned
)
const
{
return
true
;
}
};
...
...
@@ -128,6 +143,7 @@ struct IsXPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -137,6 +153,7 @@ struct IsNPUPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
};
...
...
@@ -145,22 +162,33 @@ struct IsNPUPinnedPlace : public boost::static_visitor<bool> {
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
true
;
}
};
struct
IsIPUPlace
:
public
boost
::
static_visitor
<
bool
>
{
bool
operator
()(
const
CPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
XPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
IPUPlace
&
)
const
{
return
true
;
}
bool
operator
()(
const
CUDAPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
CUDAPinnedPlace
&
)
const
{
return
false
;
}
bool
operator
()(
const
NPUPinnedPlace
&
)
const
{
return
false
;
}
};
class
Place
:
public
boost
::
variant
<
CUDAPlace
,
XPUPlace
,
NPUPlace
,
CPUPlace
,
CUDAPinnedPlace
,
NPUPinnedPlace
>
{
CUDAPinnedPlace
,
NPUPinnedPlace
,
IPUPlace
>
{
private:
using
PlaceBase
=
boost
::
variant
<
CUDAPlace
,
XPUPlace
,
NPUPlace
,
CPUPlace
,
CUDAPinnedPlace
,
NPUPinnedPlace
>
;
CUDAPinnedPlace
,
NPUPinnedPlace
,
IPUPlace
>
;
public:
Place
()
=
default
;
Place
(
const
CPUPlace
&
cpu_place
)
:
PlaceBase
(
cpu_place
)
{}
// NOLINT
Place
(
const
XPUPlace
&
xpu_place
)
:
PlaceBase
(
xpu_place
)
{}
// NOLINT
Place
(
const
NPUPlace
&
npu_place
)
:
PlaceBase
(
npu_place
)
{}
// NOLINT
Place
(
const
IPUPlace
&
ipu_place
)
:
PlaceBase
(
ipu_place
)
{}
// NOLINT
Place
(
const
CUDAPlace
&
cuda_place
)
:
PlaceBase
(
cuda_place
)
{}
// NOLINT
Place
(
const
CUDAPinnedPlace
&
cuda_pinned_place
)
// NOLINT
:
PlaceBase
(
cuda_pinned_place
)
{}
...
...
@@ -180,6 +208,7 @@ using PlaceList = std::vector<Place>;
bool
is_gpu_place
(
const
Place
&
);
bool
is_xpu_place
(
const
Place
&
);
bool
is_npu_place
(
const
Place
&
);
bool
is_ipu_place
(
const
Place
&
);
bool
is_cpu_place
(
const
Place
&
);
bool
is_cuda_pinned_place
(
const
Place
&
);
bool
is_npu_pinned_place
(
const
Place
&
);
...
...
@@ -228,6 +257,15 @@ struct PlaceVisitorWrapper
return
typename
Visitor
::
result_type
();
#endif
}
typename
Visitor
::
result_type
operator
()(
const
IPUPlace
&
ipu
)
const
{
#ifdef PADDLE_WITH_IPU
return
visitor_
(
ipu
);
#else
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"Paddle is not compiled with IPU. Cannot visit ipu device"
));
return
typename
Visitor
::
result_type
();
#endif
}
typename
Visitor
::
result_type
operator
()(
const
CUDAPlace
&
cuda
)
const
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
cb636a48
...
...
@@ -132,6 +132,10 @@ limitations under the License. */
#endif
#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h"
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/ipu/ipu_backend.h"
#include "paddle/fluid/platform/ipu_info.h"
#endif
#ifdef PADDLE_WITH_CRYPTO
#include "paddle/fluid/pybind/crypto.h"
...
...
@@ -201,6 +205,14 @@ bool IsCompiledWithNPU() {
#endif
}
bool
IsCompiledWithIPU
()
{
#ifndef PADDLE_WITH_IPU
return
false
;
#else
return
true
;
#endif
}
bool
IsCompiledWithMKLDNN
()
{
#ifndef PADDLE_WITH_MKLDNN
return
false
;
...
...
@@ -816,6 +828,8 @@ PYBIND11_MODULE(core_noavx, m) {
py
::
arg
(
"array"
),
py
::
arg
(
"place"
),
py
::
arg
(
"zero_copy"
)
=
false
)
.
def
(
"set"
,
SetTensorFromPyArray
<
paddle
::
platform
::
NPUPlace
>
,
py
::
arg
(
"array"
),
py
::
arg
(
"place"
),
py
::
arg
(
"zero_copy"
)
=
false
)
.
def
(
"set"
,
SetTensorFromPyArray
<
paddle
::
platform
::
IPUPlace
>
,
py
::
arg
(
"array"
),
py
::
arg
(
"place"
),
py
::
arg
(
"zero_copy"
)
=
false
)
.
def
(
"set"
,
SetTensorFromPyArray
<
paddle
::
platform
::
CUDAPinnedPlace
>
,
py
::
arg
(
"array"
),
py
::
arg
(
"place"
),
py
::
arg
(
"zero_copy"
)
=
false
,
R"DOC(
...
...
@@ -823,7 +837,7 @@ PYBIND11_MODULE(core_noavx, m) {
Args:
lod (numpy.ndarray): The data to set.
place (CPUPlace|CUDAPlace|XPUPlace|CUDAPinnedPlace|NPUPlace): The place where the
place (CPUPlace|CUDAPlace|XPUPlace|
IPUPlace|
CUDAPinnedPlace|NPUPlace): The place where the
LoDTensor is to be set.
zero_copy (bool, optional): Whether to share memory with the input numpy array.
This parameter only works with CPUPlace. Default: False.
...
...
@@ -1913,6 +1927,58 @@ All parameter, weight, gradient are variables in Paddle.
[](
const
platform
::
NPUPlace
&
self
)
{
return
self
.
GetDeviceId
();
})
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
NPUPlace
&>
);
// IPUPlace
py
::
class_
<
platform
::
IPUPlace
>
(
m
,
"IPUPlace"
,
R"DOC(
IPUPlace is a descriptor of a device.
It represents a IPU device on which a tensor will be allocated and a model will run.
Examples:
.. code-block:: python
import paddle
# required: ipu
ipu_place = paddle.IPUPlace()
)DOC"
)
.
def
(
"__init__"
,
[](
platform
::
IPUPlace
&
self
)
{
#ifdef PADDLE_WITH_IPU
if
(
platform
::
GetIPUDeviceCount
()
==
0
)
{
LOG
(
ERROR
)
<<
"Cannot use IPU because there is no IPU "
"detected on your "
"machine."
;
std
::
exit
(
-
1
);
}
// use ipu(0) to comile, while run with the number user configure
// in sharding and pipline.
new
(
&
self
)
platform
::
IPUPlace
(
0
);
#else
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"Cannot use IPU because you didn't install IPU version "
"PaddlePaddle.
\n
"
"If you want to use IPU, please try to install IPU version "
"PaddlePaddle by: pip install paddlepaddle*
\n
"
"If you only have CPU, please change IPUPlace to be "
"CPUPlace().
\n
"
);
std
::
exit
(
-
1
);
#endif
})
.
def
(
"_type"
,
&
PlaceIndex
<
platform
::
IPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
Place
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
CUDAPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
CPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
XPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
NPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
IPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
IPUPlace
,
platform
::
CUDAPinnedPlace
>
)
#ifdef PADDLE_WITH_IPU
.
def
(
"get_device_id"
,
[](
const
platform
::
IPUPlace
&
self
)
{
return
self
.
GetDeviceId
();
})
#endif
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
IPUPlace
&>
);
py
::
class_
<
platform
::
Place
>
platformplace
(
m
,
"Place"
);
g_place_pytype
=
reinterpret_cast
<
PyTypeObject
*>
(
platformplace
.
ptr
());
platformplace
.
def
(
py
::
init
<>
())
...
...
@@ -1922,6 +1988,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
CPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
XPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
NPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
IPUPlace
>
)
.
def
(
"_equals"
,
&
IsSamePlace
<
platform
::
Place
,
platform
::
CUDAPinnedPlace
>
)
.
def
(
"is_gpu_place"
,
[](
platform
::
Place
&
self
)
{
return
platform
::
is_gpu_place
(
self
);
})
...
...
@@ -1931,6 +1998,8 @@ All parameter, weight, gradient are variables in Paddle.
[](
platform
::
Place
&
self
)
{
return
platform
::
is_xpu_place
(
self
);
})
.
def
(
"is_npu_place"
,
[](
platform
::
Place
&
self
)
{
return
platform
::
is_npu_place
(
self
);
})
.
def
(
"is_ipu_place"
,
[](
platform
::
Place
&
self
)
{
return
platform
::
is_ipu_place
(
self
);
})
.
def
(
"is_cuda_pinned_place"
,
[](
platform
::
Place
&
self
)
{
return
platform
::
is_cuda_pinned_place
(
self
);
...
...
@@ -1947,6 +2016,10 @@ All parameter, weight, gradient are variables in Paddle.
[](
platform
::
Place
&
self
)
{
return
BOOST_GET_CONST
(
platform
::
NPUPlace
,
self
).
device
;
})
.
def
(
"ipu_device_id"
,
[](
platform
::
Place
&
self
)
{
return
BOOST_GET_CONST
(
platform
::
IPUPlace
,
self
).
device
;
})
.
def
(
"set_place"
,
[](
platform
::
Place
&
self
,
const
platform
::
Place
&
other
)
{
self
=
other
;
})
.
def
(
"set_place"
,
...
...
@@ -1970,6 +2043,10 @@ All parameter, weight, gradient are variables in Paddle.
[](
platform
::
Place
&
self
,
const
platform
::
NPUPlace
&
npu_place
)
{
self
=
npu_place
;
})
.
def
(
"set_place"
,
[](
platform
::
Place
&
self
,
const
platform
::
IPUPlace
&
ipu_place
)
{
self
=
ipu_place
;
})
.
def
(
"__repr__"
,
string
::
to_string
<
const
platform
::
Place
&>
)
.
def
(
"__str__"
,
string
::
to_string
<
const
platform
::
Place
&>
);
...
...
@@ -2201,6 +2278,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"is_compiled_with_ascend"
,
IsCompiledWithAscend
);
m
.
def
(
"is_compiled_with_rocm"
,
IsCompiledWithROCM
);
m
.
def
(
"is_compiled_with_npu"
,
IsCompiledWithNPU
);
m
.
def
(
"is_compiled_with_ipu"
,
IsCompiledWithIPU
);
m
.
def
(
"is_compiled_with_xpu"
,
IsCompiledWithXPU
);
m
.
def
(
"is_compiled_with_mkldnn"
,
IsCompiledWithMKLDNN
);
m
.
def
(
"is_compiled_with_cinn"
,
IsCompiledWithCINN
);
...
...
@@ -2520,6 +2598,10 @@ All parameter, weight, gradient are variables in Paddle.
});
#endif
#ifdef PADDLE_WITH_IPU
m
.
def
(
"get_ipu_device_count"
,
platform
::
GetIPUDeviceCount
);
#endif
py
::
enum_
<
platform
::
TracerOption
>
(
m
,
"TracerOption"
,
py
::
arithmetic
())
.
value
(
"kDefault"
,
platform
::
TracerOption
::
kDefault
)
.
value
(
"kOpDetail"
,
platform
::
TracerOption
::
kOpDetail
)
...
...
@@ -2597,6 +2679,11 @@ All parameter, weight, gradient are variables in Paddle.
bool
val
)
{
self
.
Set
<
bool
>
(
name
,
new
bool
(
val
));
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
std
::
vector
<
std
::
string
>
set
)
{
self
.
Set
(
name
,
new
std
::
vector
<
std
::
string
>
(
set
));
})
.
def
(
"set"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
std
::
unordered_set
<
std
::
string
>
set
)
{
...
...
@@ -3429,6 +3516,118 @@ All parameter, weight, gradient are variables in Paddle.
})
.
def
(
"device_count"
,
&
ParallelExecutor
::
DeviceCount
);
#ifdef PADDLE_WITH_IPU
py
::
class_
<
platform
::
ipu
::
IpuBackend
,
std
::
shared_ptr
<
platform
::
ipu
::
IpuBackend
>>
(
m
,
"IpuBackend"
)
.
def
(
py
::
init
(
&
platform
::
ipu
::
IpuBackend
::
GetNewInstance
))
.
def
(
"clear"
,
&
platform
::
ipu
::
IpuBackend
::
Clear
)
.
def
(
"set_scope"
,
&
platform
::
ipu
::
IpuBackend
::
SetScope
)
.
def
(
"set_ipu_strategy"
,
&
platform
::
ipu
::
IpuBackend
::
SetIpuStrategy
);
py
::
class_
<
platform
::
ipu
::
IpuStrategy
>
(
m
,
"IpuStrategy"
)
.
def
(
py
::
init
())
.
def_property
(
"num_ipus"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
num_ipus
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
int
num_ipus
)
{
self
.
num_ipus
=
num_ipus
;
},
R"DOC(
Int type, set the number ipu we need. Default 1.
)DOC"
)
.
def_property
(
"accumulationFactor"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
popart_options_
.
accumulationFactor
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
int
accumulationFactor
)
{
self
.
popart_options_
.
accumulationFactor
=
accumulationFactor
;
},
R"DOC(
Specify the number of micro-batches to accumulate before
applying the varUpdate. Default 1.
)DOC"
)
.
def_property
(
"batches_per_step"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
batches_per_step
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
int
batches_per_step
)
{
self
.
batches_per_step
=
batches_per_step
;
},
R"DOC(
Int type, set batches_per_step. Default 1.
)DOC"
)
.
def_property
(
"is_training"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
is_training
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
is_training
)
{
self
.
is_training
=
is_training
;
},
R"DOC(
Bool type, True for training, False inference. Default True.
)DOC"
)
.
def_property
(
"enable_pipelining"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
popart_options_
.
enablePipelining
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
enable_pipelining
)
{
self
.
popart_options_
.
enablePipelining
=
enable_pipelining
;
},
R"DOC(
Bool type, True enable pipeline, otherwise disable. Default False.
)DOC"
)
.
def_property
(
"enable_manual_shard"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
popart_options_
.
virtualGraphMode
==
platform
::
ipu
::
VirtualGraphMode
::
Manual
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
enable_ipu_shard
)
{
if
(
enable_ipu_shard
)
{
self
.
popart_options_
.
virtualGraphMode
=
platform
::
ipu
::
VirtualGraphMode
::
Manual
;
}
else
{
self
.
popart_options_
.
virtualGraphMode
=
platform
::
ipu
::
VirtualGraphMode
::
Off
;
}
},
R"DOC(
Bool type, True enable model sharding, otherwise disable. Default "
"False.
)DOC"
)
.
def_property
(
"need_avg_shard"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
need_avg_shard
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
need_avg_shard
)
{
self
.
need_avg_shard
=
need_avg_shard
;
},
R"DOC(
Bool type, True enable avg shard, otherwise disable. Default False.
)DOC"
)
.
def_property
(
"batch_size"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
batch_size
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
int
batch_size
)
{
self
.
batch_size
=
batch_size
;
},
R"DOC(
Int type, used to make batch size fixed. Default 1.
)DOC"
)
.
def_property
(
"enable_fp16"
,
[](
const
platform
::
ipu
::
IpuStrategy
&
self
)
{
return
self
.
enable_fp16
;
},
[](
platform
::
ipu
::
IpuStrategy
&
self
,
bool
enable_fp16
)
{
self
.
enable_fp16
=
enable_fp16
;
},
R"DOC(
Bool type, True enable float16 mode, otherwise disable. Default False.)DOC"
);
#endif
BindFleetWrapper
(
&
m
);
BindIO
(
&
m
);
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
cb636a48
...
...
@@ -313,6 +313,21 @@ void SetTensorFromPyArrayT(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Cannot use XPUPlace in CPU/GPU version, "
"Please recompile or reinstall Paddle with XPU support."
));
#endif
}
else
if
(
paddle
::
platform
::
is_ipu_place
(
place
))
{
#ifdef PADDLE_WITH_IPU
if
(
zero_copy
)
{
auto
holder
=
std
::
make_shared
<
details
::
NumpyAllocation
<
T
>>
(
array
);
auto
type
=
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
)));
self
->
ResetHolderWithType
(
holder
,
type
);
}
else
{
auto
dst
=
self
->
mutable_data
<
T
>
(
place
);
std
::
memcpy
(
dst
,
array
.
data
(),
array
.
nbytes
());
}
#else
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"Cannot use IPUPlace in CPU/GPU/XPU/NPU version, "
"Please recompile or reinstall Paddle with IPU support."
));
#endif
}
else
if
(
paddle
::
platform
::
is_npu_place
(
place
))
{
#ifdef PADDLE_WITH_ASCEND_CL
...
...
python/paddle/__init__.py
浏览文件 @
cb636a48
...
...
@@ -260,6 +260,7 @@ from .framework.random import set_cuda_rng_state # noqa: F401
from
.framework
import
ParamAttr
# noqa: F401
from
.framework
import
create_parameter
# noqa: F401
from
.framework
import
CPUPlace
# noqa: F401
from
.framework
import
IPUPlace
# noqa: F401
from
.framework
import
CUDAPlace
# noqa: F401
from
.framework
import
NPUPlace
# noqa: F401
from
.framework
import
CUDAPinnedPlace
# noqa: F401
...
...
@@ -291,6 +292,7 @@ from .fluid.framework import get_flags # noqa: F401
from
.fluid.framework
import
set_flags
# noqa: F401
from
.device
import
is_compiled_with_xpu
# noqa: F401
from
.device
import
is_compiled_with_npu
# noqa: F401
from
.device
import
is_compiled_with_ipu
# noqa: F401
from
.device
import
XPUPlace
# noqa: F401
from
.fluid.dygraph.base
import
enable_dygraph
as
disable_static
# noqa: F401
...
...
python/paddle/device/__init__.py
浏览文件 @
cb636a48
...
...
@@ -28,7 +28,9 @@ __all__ = [ # noqa
'set_device'
,
'get_device'
,
'XPUPlace'
,
'IPUPlace'
,
'is_compiled_with_xpu'
,
'is_compiled_with_ipu'
,
'is_compiled_with_cinn'
,
'is_compiled_with_cuda'
,
'is_compiled_with_rocm'
,
...
...
@@ -55,6 +57,36 @@ def is_compiled_with_npu():
return
core
.
is_compiled_with_npu
()
def
is_compiled_with_ipu
():
"""
Whether paddle was built with WITH_IPU=ON to support Graphcore IPU.
Returns (bool): `True` if IPU is supported, otherwise `False`.
Examples:
.. code-block:: python
import paddle
support_ipu = paddle.is_compiled_with_ipu()
"""
return
core
.
is_compiled_with_ipu
()
def
IPUPlace
():
"""
Return a Graphcore IPU Place
Examples:
.. code-block:: python
# required: ipu
import paddle
place = paddle.device.IPUPlace()
"""
return
core
.
IPUPlace
()
def
is_compiled_with_xpu
():
"""
Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun
...
...
@@ -143,13 +175,19 @@ def _convert_to_place(device):
selected_npus
=
os
.
getenv
(
"FLAGS_selected_npus"
,
"0"
).
split
(
","
)
device_id
=
int
(
selected_npus
[
0
])
place
=
core
.
NPUPlace
(
device_id
)
elif
lower_device
==
'ipu'
:
if
not
core
.
is_compiled_with_ipu
():
raise
ValueError
(
"The device should not be 'ipu', "
\
"since PaddlePaddle is not compiled with IPU"
)
place
=
core
.
IPUPlace
()
else
:
avaliable_gpu_device
=
re
.
match
(
r
'gpu:\d+'
,
lower_device
)
avaliable_xpu_device
=
re
.
match
(
r
'xpu:\d+'
,
lower_device
)
avaliable_npu_device
=
re
.
match
(
r
'npu:\d+'
,
lower_device
)
if
not
avaliable_gpu_device
and
not
avaliable_xpu_device
and
not
avaliable_npu_device
:
raise
ValueError
(
"The device must be a string which is like 'cpu', 'gpu', 'gpu:x', 'xpu', 'xpu:x', 'npu'
or 'npu:x'
"
"The device must be a string which is like 'cpu', 'gpu', 'gpu:x', 'xpu', 'xpu:x', 'npu'
, 'npu:x' or ipu
"
)
if
avaliable_gpu_device
:
if
not
core
.
is_compiled_with_cuda
():
...
...
@@ -183,13 +221,13 @@ def _convert_to_place(device):
def
set_device
(
device
):
"""
Paddle supports running calculations on various types of devices, including CPU, GPU, XPU
and N
PU.
Paddle supports running calculations on various types of devices, including CPU, GPU, XPU
, NPU and I
PU.
They are represented by string identifiers. This function can specify the global device
which the OP will run.
Parameters:
device(str): This parameter determines the specific running device.
It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``
and ``npu:x
``,
It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``
, ``npu:x`` and ``ipu
``,
where ``x`` is the index of the GPUs, XPUs or NPUs.
Examples:
...
...
@@ -236,5 +274,10 @@ def get_device():
elif
isinstance
(
place
,
core
.
NPUPlace
):
device_id
=
place
.
get_device_id
()
device
=
'npu:'
+
str
(
device_id
)
elif
isinstance
(
place
,
core
.
IPUPlace
):
num_devices
=
core
.
get_ipu_device_count
()
device
=
"ipus:{{0-{}}}"
.
format
(
num_devices
-
1
)
else
:
raise
ValueError
(
"The device specification {} is invalid"
.
format
(
place
))
return
device
python/paddle/fluid/__init__.py
浏览文件 @
cb636a48
...
...
@@ -71,7 +71,7 @@ from . import distribute_lookup_table
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.data_feeder
import
DataFeeder
from
.core
import
LoDTensor
,
LoDTensorArray
,
Scope
,
_Scope
from
.core
import
CPUPlace
,
XPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
NPUPlace
from
.core
import
CPUPlace
,
XPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
NPUPlace
,
IPUPlace
from
.incubate
import
fleet
from
.transpiler
import
DistributeTranspiler
,
\
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
...
...
@@ -132,6 +132,7 @@ __all__ = framework.__all__ + executor.__all__ + \
'CUDAPlace'
,
'CUDAPinnedPlace'
,
'NPUPlace'
,
'IPUPlace'
,
'Tensor'
,
'ParamAttr'
,
'WeightNormParamAttr'
,
...
...
@@ -197,6 +198,11 @@ def __bootstrap__():
if
os
.
name
==
'nt'
:
remove_flag_if_exists
(
'cpu_deterministic'
)
if
core
.
is_compiled_with_ipu
():
# Currently we request all ipu available for training and testing
# finer control of pod of IPUs will be added later
read_env_flags
+=
[]
core
.
init_gflags
([
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
# Note(zhouwei25): sys may not have argv in some cases,
# Such as: use Python/C API to call Python from C++
...
...
python/paddle/framework/__init__.py
浏览文件 @
cb636a48
...
...
@@ -23,6 +23,7 @@ from .framework import set_grad_enabled # noqa: F401
from
..fluid.param_attr
import
ParamAttr
# noqa: F401
from
..fluid.layers.tensor
import
create_parameter
# noqa: F401
from
..fluid.core
import
CPUPlace
# noqa: F401
from
..fluid.core
import
IPUPlace
# noqa: F401
from
..fluid.core
import
CUDAPlace
# noqa: F401
from
..fluid.core
import
CUDAPinnedPlace
# noqa: F401
from
..fluid.core
import
NPUPlace
# noqa: F401
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录