Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
315b133e
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
315b133e
编写于
1月 16, 2019
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add single GPU support to imperative
上级
91d87ec0
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
289 addition
and
108 deletion
+289
-108
paddle/fluid/imperative/CMakeLists.txt
paddle/fluid/imperative/CMakeLists.txt
+2
-2
paddle/fluid/imperative/layer.cc
paddle/fluid/imperative/layer.cc
+64
-12
paddle/fluid/imperative/layer.h
paddle/fluid/imperative/layer.h
+9
-1
paddle/fluid/imperative/tracer.cc
paddle/fluid/imperative/tracer.cc
+53
-14
paddle/fluid/imperative/tracer.h
paddle/fluid/imperative/tracer.h
+9
-4
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+3
-2
paddle/fluid/pybind/imperative.cc
paddle/fluid/pybind/imperative.cc
+23
-3
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+14
-2
python/paddle/fluid/imperative/base.py
python/paddle/fluid/imperative/base.py
+14
-5
python/paddle/fluid/imperative/nn.py
python/paddle/fluid/imperative/nn.py
+6
-6
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+1
-1
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+2
-1
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+2
-1
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+16
-15
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+0
-1
python/paddle/fluid/tests/unittests/test_imperative.py
python/paddle/fluid/tests/unittests/test_imperative.py
+5
-5
python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
...paddle/fluid/tests/unittests/test_imperative_optimizer.py
+1
-1
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
...on/paddle/fluid/tests/unittests/test_imperative_resnet.py
+65
-32
未找到文件。
paddle/fluid/imperative/CMakeLists.txt
浏览文件 @
315b133e
cc_library
(
layer SRCS layer.cc DEPS proto_desc operator
)
cc_library
(
tracer SRCS tracer.cc DEPS proto_desc
)
cc_library
(
layer SRCS layer.cc DEPS proto_desc operator
device_context blas
)
cc_library
(
tracer SRCS tracer.cc DEPS proto_desc
device_context
)
cc_library
(
engine SRCS engine.cc
)
paddle/fluid/imperative/layer.cc
浏览文件 @
315b133e
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/imperative/layer.h"
#include <deque>
#include <limits>
#include <map>
...
...
@@ -22,6 +23,9 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
...
...
@@ -31,22 +35,68 @@ std::map<int, py::object> py_funcs_;
using
framework
::
Variable
;
void
AddTo
(
Variable
*
src
,
Variable
*
dst
)
{
framework
::
LoDTensor
*
dst_tensor
=
dst
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
LoDTensor
*
src_tensor
=
src
->
GetMutable
<
framework
::
LoDTensor
>
();
namespace
detail
{
template
<
typename
T
>
class
TensorAddToFunctor
:
public
boost
::
static_visitor
<>
{
public:
TensorAddToFunctor
(
int64_t
numel
,
const
T
*
x
,
T
*
y
)
:
numel_
(
numel
),
x_
(
x
),
y_
(
y
)
{}
void
operator
()(
const
platform
::
CPUPlace
&
place
)
{
platform
::
CPUDeviceContext
*
ctx
=
dynamic_cast
<
platform
::
CPUDeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
blas
=
operators
::
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
float
>
(
*
ctx
);
blas
.
AXPY
(
numel_
,
1.
,
x_
,
y_
);
}
#ifdef PADDLE_WITH_CUDA
void
operator
()(
const
platform
::
CUDAPlace
&
place
)
{
platform
::
CUDADeviceContext
*
ctx
=
dynamic_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
blas
=
operators
::
math
::
GetBlas
<
platform
::
CUDADeviceContext
,
float
>
(
*
ctx
);
blas
.
AXPY
(
numel_
,
1.
,
x_
,
y_
);
}
#else
void
operator
()(
const
platform
::
CUDAPlace
&
place
)
{
PADDLE_THROW
(
"Do NOT support gradient merge in place %s"
,
place
);
}
#endif
// there is NO blas in CUDAPinnedPlace
void
operator
()(
const
platform
::
CUDAPinnedPlace
&
place
)
{
PADDLE_THROW
(
"Do NOT support gradient merge in place %s"
,
place
);
}
private:
int64_t
numel_
;
const
T
*
x_
;
T
*
y_
;
};
}
// namespace detail
void
AddGradTo
(
Variable
*
src
,
Variable
*
dst
,
platform
::
Place
place
)
{
framework
::
Tensor
*
dst_tensor
=
dst
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Tensor
*
src_tensor
=
src
->
GetMutable
<
framework
::
LoDTensor
>
();
// FIXME(minqiyang): loss_grad op will pass a zero grad of label
// ugly fix for it
if
(
src_tensor
->
numel
()
==
0
)
{
return
;
}
PADDLE_ENFORCE
(
dst_tensor
->
numel
()
==
src_tensor
->
numel
(),
"dst_numel %lld vs. src_numel %lld"
,
dst_tensor
->
numel
(),
src_tensor
->
numel
());
float
*
dst_data
=
dst_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
const
float
*
src_data
=
src_tensor
->
data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
src_tensor
->
numel
();
++
i
)
{
dst_data
[
i
]
+=
src_data
[
i
]
;
}
detail
::
TensorAddToFunctor
<
float
>
func
(
src_tensor
->
numel
(),
src_tensor
->
data
<
float
>
(),
dst_tensor
->
mutable_data
<
float
>
(
place
))
;
boost
::
apply_visitor
(
func
,
place
);
}
class
Autograd
{
...
...
@@ -158,7 +208,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
PADDLE_ENFORCE_NOT_NULL
(
op_kernel
,
"only support op with kernel"
);
framework
::
Scope
scope
;
platform
::
CPUPlace
place
;
platform
::
Place
place
=
expected_place_
;
PreparedOp
p
=
PreparedOp
::
Prepare
(
ctx
,
*
op_kernel
,
place
);
p
.
op
.
RuntimeInferShape
(
scope
,
place
,
ctx
);
p
.
func
(
framework
::
ExecutionContext
(
p
.
op
,
scope
,
*
p
.
dev_ctx
,
p
.
ctx
));
...
...
@@ -172,7 +222,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
framework
::
Variable
*
grad
=
outputs
[
i
];
framework
::
Variable
*
orig_grad
=
origin_outputs
[
i
];
Add
To
(
grad
,
orig_grad
);
Add
GradTo
(
grad
,
orig_grad
,
expected_place_
);
delete
grad
;
}
}
...
...
@@ -184,8 +234,10 @@ void VarBase::RunBackward() {
VLOG
(
3
)
<<
"start backward"
;
auto
grads_t
=
grads_
->
var_
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
data
=
grads_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
std
::
fill
(
data
,
data
+
grads_t
->
numel
(),
1.0
);
operators
::
math
::
set_constant
(
*
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
var_
->
GetMutable
<
framework
::
LoDTensor
>
()
->
place
())),
grads_t
,
1.0
);
PADDLE_ENFORCE
(
grads_
==
...
...
paddle/fluid/imperative/layer.h
浏览文件 @
315b133e
...
...
@@ -26,12 +26,15 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace
paddle
{
namespace
imperative
{
class
VarBase
;
namespace
py
=
::
pybind11
;
class
PreparedOp
{
...
...
@@ -81,6 +84,8 @@ class PreparedOp {
return
PreparedOp
(
op
,
ctx
,
kernel_iter
->
second
,
dev_ctx
);
}
inline
platform
::
DeviceContext
*
GetDeviceContext
()
const
{
return
dev_ctx
;
}
const
framework
::
OperatorBase
&
op
;
const
framework
::
RuntimeContext
&
ctx
;
framework
::
OperatorWithKernel
::
OpKernelFunc
func
;
...
...
@@ -159,7 +164,8 @@ class OpBase {
:
op_desc_
(
nullptr
),
forward_id_
(
-
1
),
grad_op_desc_
(
nullptr
),
backward_id_
(
-
1
)
{}
backward_id_
(
-
1
),
expected_place_
(
platform
::
CPUPlace
())
{}
virtual
~
OpBase
()
{
if
(
grad_op_desc_
)
delete
grad_op_desc_
;
...
...
@@ -176,6 +182,8 @@ class OpBase {
framework
::
OpDesc
*
grad_op_desc_
;
int
backward_id_
;
platform
::
Place
expected_place_
;
VarBasePtrMap
input_vars_
;
VarBasePtrMap
output_vars_
;
OpBasePtrMap
pre_ops_
;
...
...
paddle/fluid/imperative/tracer.cc
浏览文件 @
315b133e
...
...
@@ -14,6 +14,10 @@
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
imperative
{
...
...
@@ -31,16 +35,38 @@ void CreateGradOp(const framework::OpDesc& op_desc,
*
grad_op_desc
=
grad_op_descs
[
0
].
release
();
}
void
InitVar
(
framework
::
Variable
*
var
,
framework
::
Variable
*
grad_var
)
{
void
InitVar
(
framework
::
Variable
*
var
,
framework
::
Variable
*
grad_var
,
platform
::
DeviceContext
*
dev_ctx
)
{
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx
,
"Could not get valid device from forward op"
);
auto
&
var_t
=
var
->
Get
<
framework
::
LoDTensor
>
();
float
*
data
=
grad_var
->
GetMutable
<
framework
::
LoDTensor
>
()
->
mutable_data
<
float
>
(
var_t
.
dims
(),
platform
::
CPUPlace
());
std
::
fill
(
data
,
data
+
var_t
.
numel
(),
0.0
);
grad_var
->
GetMutable
<
framework
::
LoDTensor
>
()
->
mutable_data
<
float
>
(
var_t
.
dims
(),
dev_ctx
->
GetPlace
());
operators
::
math
::
set_constant
(
*
dev_ctx
,
grad_var
->
GetMutable
<
framework
::
LoDTensor
>
(),
.0
f
);
}
platform
::
Place
GetExpectedPlace
(
platform
::
Place
place
,
VarBasePtrMap
inputs
)
{
platform
::
Place
result
=
place
;
for
(
auto
it
:
inputs
)
{
for
(
VarBase
*
var
:
it
.
second
)
{
platform
::
Place
tmp_place
=
var
->
var_
->
Get
<
framework
::
LoDTensor
>
().
place
();
if
(
!
platform
::
is_same_place
(
tmp_place
,
result
))
{
PADDLE_THROW
(
"Input variable should keep in the same place: %s, but get place: "
"%s of input %s instead"
,
result
,
tmp_place
,
it
.
first
);
}
}
}
return
result
;
}
void
Tracer
::
Trace
(
OpBase
*
op
,
const
VarBasePtrMap
&
inputs
,
const
VarBasePtrMap
&
outputs
,
framework
::
BlockDesc
*
block
,
const
platform
::
Place
expected_place
,
const
bool
stop_gradient
)
{
std
::
map
<
std
::
string
,
VarBase
*>
vars
;
...
...
@@ -108,10 +134,12 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE_NOT_NULL
(
op_kernel
,
"only support op with kernel"
);
framework
::
Scope
scope
;
platform
::
CPUPlace
place
;
PreparedOp
p
=
PreparedOp
::
Prepare
(
ctx
,
*
op_kernel
,
place
);
p
.
op
.
RuntimeInferShape
(
scope
,
place
,
ctx
);
p
.
func
(
framework
::
ExecutionContext
(
p
.
op
,
scope
,
*
p
.
dev_ctx
,
p
.
ctx
));
op
->
expected_place_
=
GetExpectedPlace
(
expected_place
,
inputs
);
PreparedOp
prepared_op
=
PreparedOp
::
Prepare
(
ctx
,
*
op_kernel
,
op
->
expected_place_
);
prepared_op
.
op
.
RuntimeInferShape
(
scope
,
op
->
expected_place_
,
ctx
);
prepared_op
.
func
(
framework
::
ExecutionContext
(
prepared_op
.
op
,
scope
,
*
prepared_op
.
dev_ctx
,
prepared_op
.
ctx
));
if
(
!
stop_gradient
)
{
framework
::
OpDesc
*
grad_op_desc
;
...
...
@@ -134,7 +162,8 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
}
else
{
VarBase
*
var
=
vars
[
var_it
->
second
];
if
(
!
var
->
grads_
->
var_
->
IsInitialized
())
{
InitVar
(
var
->
var_
,
var
->
grads_
->
var_
);
InitVar
(
var
->
var_
,
var
->
grads_
->
var_
,
prepared_op
.
GetDeviceContext
());
}
// Douts.
grad_in_vars
.
push_back
(
var
->
grads_
->
var_
);
...
...
@@ -147,10 +176,13 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
for
(
const
std
::
string
&
grad_outvar
:
it
.
second
)
{
block
->
FindRecursiveOrCreateVar
(
grad_outvar
);
auto
var_it
=
grad_to_var
->
find
(
grad_outvar
);
PADDLE_ENFORCE
(
var_it
!=
grad_to_var
->
end
());
PADDLE_ENFORCE
(
var_it
!=
grad_to_var
->
end
(),
"Could not found the grad op output var, should this "
"operator %s's stop gradient be True"
,
op_desc
->
Type
());
VarBase
*
var
=
vars
[
var_it
->
second
];
if
(
!
var
->
grads_
->
var_
->
IsInitialized
())
{
InitVar
(
var
->
var_
,
var
->
grads_
->
var_
);
InitVar
(
var
->
var_
,
var
->
grads_
->
var_
,
prepared_op
.
GetDeviceContext
()
);
}
grad_out_vars
.
push_back
(
var
->
grads_
->
var_
);
}
...
...
@@ -193,16 +225,23 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
for
(
VarBase
*
out
:
outputs
)
{
grad_input_vars
.
push_back
(
out
->
var_
);
}
platform
::
CPUPlace
place
;
for
(
VarBase
*
out
:
outputs
)
{
grad_input_vars
.
push_back
(
out
->
grads_
->
var_
);
if
(
!
grad_input_vars
.
back
()
->
IsInitialized
())
{
InitVar
(
out
->
var_
,
grad_input_vars
.
back
());
// TODO(minqiyang): Add GPU support for PyLayer, only support CPU now
InitVar
(
out
->
var_
,
grad_input_vars
.
back
(),
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
}
}
for
(
const
VarBase
*
inp
:
inputs
)
{
grad_output_vars
.
push_back
(
inp
->
grads_
->
var_
);
if
(
!
grad_output_vars
.
back
()
->
IsInitialized
())
{
InitVar
(
inp
->
var_
,
grad_output_vars
.
back
());
// TODO(minqiyang): Add GPU support for PyLayer, only support CPU now
InitVar
(
inp
->
var_
,
grad_output_vars
.
back
(),
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
}
}
}
...
...
paddle/fluid/imperative/tracer.h
浏览文件 @
315b133e
...
...
@@ -22,6 +22,7 @@
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/engine.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
imperative
{
...
...
@@ -34,21 +35,25 @@ void CreateGradOp(const framework::OpDesc& op_desc,
void
InitVar
(
framework
::
Variable
*
var
,
framework
::
Variable
*
grad_var
);
platform
::
Place
GetExpectedPlace
(
platform
::
Place
place
,
VarBasePtrMap
inputs
);
class
Tracer
{
public:
explicit
Tracer
(
framework
::
BlockDesc
*
root_block
)
:
root_block_
(
root_block
)
{}
virtual
~
Tracer
()
{}
void
Trace
(
OpBase
*
op
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>&
inputs
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>&
outputs
,
framework
::
BlockDesc
*
block
,
const
bool
stop_gradient
=
false
);
void
Trace
(
OpBase
*
op
,
const
VarBasePtrMap
&
inputs
,
const
VarBasePtrMap
&
outputs
,
framework
::
BlockDesc
*
block
,
const
platform
::
Place
expected_place
,
const
bool
stop_gradient
=
false
);
std
::
vector
<
VarBase
*>
PyTrace
(
OpBase
*
op
,
const
std
::
vector
<
VarBase
*>&
inputs
,
bool
stop_gradient
=
false
);
private:
platform
::
Place
GetPlace
(
const
VarBasePtrMap
&
inputs
);
framework
::
BlockDesc
*
root_block_
;
};
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
315b133e
...
...
@@ -30,8 +30,9 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) {
auto
it
=
device_contexts_
.
find
(
place
);
if
(
it
==
device_contexts_
.
end
())
{
PADDLE_THROW
(
"'Place' is not supported, Please re-compile with WITH_GPU "
"option"
);
"Place %s is not supported, Please re-compile with WITH_GPU "
"option"
,
place
);
}
return
it
->
second
.
get
().
get
();
}
...
...
paddle/fluid/pybind/imperative.cc
浏览文件 @
315b133e
...
...
@@ -15,18 +15,38 @@ limitations under the License. */
#include "paddle/fluid/pybind/imperative.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace
paddle
{
namespace
pybind
{
// Bind Methods
void
BindTracer
(
pybind11
::
module
*
m
)
{
void
BindTracer
(
pybind11
::
module
*
m
)
{
pybind11
::
class_
<
imperative
::
Tracer
>
(
*
m
,
"Tracer"
,
""
)
.
def
(
"__init__"
,
[](
imperative
::
Tracer
&
self
,
framework
::
BlockDesc
*
root_block
)
{
[](
imperative
::
Tracer
&
self
,
framework
::
BlockDesc
*
root_block
)
{
new
(
&
self
)
imperative
::
Tracer
(
root_block
);
})
.
def
(
"trace"
,
&
imperative
::
Tracer
::
Trace
)
.
def
(
"trace"
,
[](
imperative
::
Tracer
&
self
,
imperative
::
OpBase
*
op
,
const
imperative
::
VarBasePtrMap
&
inputs
,
const
imperative
::
VarBasePtrMap
&
outputs
,
framework
::
BlockDesc
*
block
,
const
platform
::
CPUPlace
expected_place
,
const
bool
stop_gradient
=
false
)
{
self
.
Trace
(
op
,
inputs
,
outputs
,
block
,
expected_place
,
stop_gradient
);
})
.
def
(
"trace"
,
[](
imperative
::
Tracer
&
self
,
imperative
::
OpBase
*
op
,
const
imperative
::
VarBasePtrMap
&
inputs
,
const
imperative
::
VarBasePtrMap
&
outputs
,
framework
::
BlockDesc
*
block
,
const
platform
::
CUDAPlace
expected_place
,
const
bool
stop_gradient
=
false
)
{
self
.
Trace
(
op
,
inputs
,
outputs
,
block
,
expected_place
,
stop_gradient
);
})
.
def
(
"py_trace"
,
&
imperative
::
Tracer
::
PyTrace
,
pybind11
::
return_value_policy
::
take_ownership
);
}
...
...
python/paddle/fluid/framework.py
浏览文件 @
315b133e
...
...
@@ -66,6 +66,7 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix()
CONTROL_DEP_VAR_PREFIX
=
core
.
kControlDepVarName
()
_imperative_tracer_
=
None
_current_expected_place_
=
None
def
_in_imperative_mode
():
...
...
@@ -76,6 +77,10 @@ def _imperative_tracer():
return
_imperative_tracer_
def
_current_expected_place
():
return
_current_expected_place_
class
NameScope
(
object
):
def
__init__
(
self
,
name
=
""
,
parent
=
None
):
self
.
_children
=
dict
()
...
...
@@ -1299,7 +1304,7 @@ class Block(object):
def
_trace_op
(
self
,
op
,
stop_gradient
=
False
):
if
_in_imperative_mode
():
_imperative_tracer
().
trace
(
op
.
iop
,
op
.
inputs
,
op
.
outputs
,
self
.
desc
,
stop_gradient
)
_current_expected_place_
,
stop_gradient
)
def
_insert_op
(
self
,
index
,
*
args
,
**
kwargs
):
"""
...
...
@@ -2312,9 +2317,16 @@ def _get_var(name, program=None):
@
contextlib
.
contextmanager
def
_imperative_guard
(
tracer
):
def
_imperative_guard
(
tracer
,
place
):
global
_imperative_tracer_
tmp_trace
=
_imperative_tracer_
_imperative_tracer_
=
tracer
global
_current_expected_place_
tmp_place
=
_current_expected_place_
_current_expected_place_
=
place
yield
_imperative_tracer_
=
tmp_trace
_current_expected_place_
=
tmp_place
python/paddle/fluid/imperative/base.py
浏览文件 @
315b133e
...
...
@@ -25,17 +25,28 @@ def enabled():
@
contextlib
.
contextmanager
def
guard
():
def
guard
(
device
=
0
):
train
=
framework
.
Program
()
startup
=
framework
.
Program
()
tracer
=
core
.
Tracer
(
train
.
current_block
().
desc
)
if
device
is
None
:
place
=
core
.
CPUPlace
()
else
:
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
device
)
else
:
place
=
core
.
CPUPlace
()
with
framework
.
program_guard
(
train
,
startup
):
with
framework
.
unique_name
.
guard
():
with
framework
.
_imperative_guard
(
tracer
):
with
framework
.
_imperative_guard
(
tracer
,
place
):
yield
def
to_variable
(
value
,
block
=
None
):
assert
enabled
(),
"to_variable could only be called in imperative mode"
if
isinstance
(
value
,
np
.
ndarray
):
if
not
block
:
block
=
framework
.
default_main_program
().
current_block
()
...
...
@@ -47,9 +58,7 @@ def to_variable(value, block=None):
dtype
=
value
.
dtype
)
var
=
py_var
.
_ivar
.
value
()
tensor
=
var
.
get_tensor
()
tensor
.
set
(
value
,
core
.
CPUP
lace
())
tensor
.
set
(
value
,
framework
.
_current_expected_p
lace
())
return
py_var
elif
isinstance
(
value
,
framework
.
Variable
):
return
value
else
:
raise
ValueError
(
"Unsupported type %s"
%
type
(
value
))
python/paddle/fluid/imperative/nn.py
浏览文件 @
315b133e
...
...
@@ -252,15 +252,15 @@ class FC(layers.Layer):
"y_num_col_dims"
:
1
})
out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
pre_bias
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
self
.
_helper
.
append_op
(
type
=
"sum"
,
inputs
=
{
"X"
:
[
tmp
]},
outputs
=
{
"Out"
:
out
},
outputs
=
{
"Out"
:
pre_bias
},
attrs
=
{
"use_mkldnn"
:
False
})
pre_activation
=
self
.
_helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
pre_bias
,
dim_start
=
self
.
_
num_flatten_dims
)
return
self
.
_helper
.
append_activation
(
pre_activation
)
...
...
@@ -355,11 +355,11 @@ class BatchNorm(layers.Layer):
variance_out
=
self
.
_variance
saved_mean
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
dtype
=
self
.
_
dtype
,
stop_gradient
=
True
)
saved_variance
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
dtype
=
self
.
_
dtype
,
stop_gradient
=
True
)
batch_norm_out
=
input
if
self
.
_in_place
else
self
.
_helper
.
create_variable_for_type_inference
(
dtype
)
self
.
_
dtype
)
self
.
_helper
.
append_op
(
type
=
"batch_norm"
,
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
315b133e
...
...
@@ -321,7 +321,7 @@ def append_LARS(params_grads, learning_rate, weight_decay):
The decayed learning rate
Examples:
.. code-block:: python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
"""
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
315b133e
...
...
@@ -5810,7 +5810,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1):
type
=
'increment'
,
inputs
=
{
'X'
:
[
counter
]},
outputs
=
{
'Out'
:
[
counter
]},
attrs
=
{
'step'
:
float
(
step
)})
attrs
=
{
'step'
:
float
(
step
)},
stop_gradient
=
True
)
counter
.
stop_gradient
=
True
return
counter
...
...
python/paddle/fluid/layers/tensor.py
浏览文件 @
315b133e
...
...
@@ -382,7 +382,8 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
'dtype'
:
out
.
dtype
,
'value'
:
float
(
value
),
'force_cpu'
:
force_cpu
or
force_init_on_cpu
()
})
},
stop_gradient
=
True
)
out
.
stop_gradient
=
True
return
out
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
315b133e
...
...
@@ -301,10 +301,10 @@ class Optimizer(object):
no_grad_set (set|None): set of Variables should be ignored.
callbacks (list|None): list of callables to run when appending backward
operator for one parameter.
Return:
list: list of (param, grad) pair, grad is the output of backward.
Examples:
See examples in `apply_gradients`.
"""
...
...
@@ -322,10 +322,10 @@ class Optimizer(object):
Args:
params_grads (list): list of (param, grad) pair to do optimization.
Returns:
list: A list of operators appended to the current program.
Examples:
.. code-block:: python
...
...
@@ -364,7 +364,7 @@ class Optimizer(object):
This method combines interface `backward()` and
`apply_gradients()` into one.
Args:
loss (Variable): loss variable to run optimizations.
startup_program (Program): startup_program for initializing parameters
...
...
@@ -381,18 +381,19 @@ class Optimizer(object):
optimize_ops
=
[]
if
imperative_base
.
enabled
():
if
parameter_list
is
not
None
:
param
s_grad
s
=
parameter_list
param
eter
s
=
parameter_list
else
:
parameters
=
program
.
global_block
().
all_parameters
()
params_grads
=
[]
for
param
in
parameters
:
# create gradient variable
grad_var
=
Variable
(
block
=
loss
.
block
,
name
=
param
.
_ivar
.
_grad_name
(),
stop_gradient
=
True
,
ivar
=
param
.
_ivar
.
_grad_ivar
())
params_grads
.
append
((
param
,
grad_var
))
params_grads
=
[]
for
param
in
parameters
:
# create gradient variable
grad_var
=
Variable
(
block
=
loss
.
block
,
name
=
param
.
_ivar
.
_grad_name
(),
stop_gradient
=
True
,
ivar
=
param
.
_ivar
.
_grad_ivar
())
params_grads
.
append
((
param
,
grad_var
))
with
program_guard
(
program
,
startup_program
):
optimize_ops
=
self
.
_create_optimization_pass
(
params_grads
)
else
:
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
315b133e
...
...
@@ -107,7 +107,6 @@ if(WITH_DISTRIBUTE)
endif
()
py_test_modules
(
test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL
)
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 150
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
if
(
NOT APPLE
)
py_test_modules
(
test_image_classification_resnet MODULES test_image_classification_resnet SERIAL
)
...
...
python/paddle/fluid/tests/unittests/test_imperative.py
浏览文件 @
315b133e
...
...
@@ -82,7 +82,7 @@ class MLP(fluid.imperative.Layer):
class
TestImperative
(
unittest
.
TestCase
):
def
test_layer
(
self
):
with
fluid
.
imperative
.
guard
():
with
fluid
.
imperative
.
guard
(
device
=
None
):
cl
=
core
.
Layer
()
cl
.
forward
([])
l
=
fluid
.
imperative
.
Layer
()
...
...
@@ -90,7 +90,7 @@ class TestImperative(unittest.TestCase):
def
test_pylayer_func_id
(
self
):
with
fluid
.
imperative
.
guard
():
with
fluid
.
imperative
.
guard
(
device
=
None
):
class
PyLayer1
(
fluid
.
imperative
.
PyLayer
):
def
__init__
(
self
):
...
...
@@ -130,7 +130,7 @@ class TestImperative(unittest.TestCase):
def
test_pylayer
(
self
):
np_inp
=
np
.
ones
([
2
,
2
],
np
.
float32
)
with
fluid
.
imperative
.
guard
():
with
fluid
.
imperative
.
guard
(
device
=
None
):
my_py_layer
=
MyPyLayer
()
var_inp
=
fluid
.
imperative
.
base
.
to_variable
(
np_inp
)
outs
=
my_py_layer
(
var_inp
)
...
...
@@ -158,7 +158,7 @@ class TestImperative(unittest.TestCase):
def
test_layer_in_out
(
self
):
np_inp
=
np
.
array
([
1.0
,
2.0
,
-
1.0
],
dtype
=
np
.
float32
)
with
fluid
.
imperative
.
guard
():
with
fluid
.
imperative
.
guard
(
device
=
None
):
var_inp
=
fluid
.
imperative
.
base
.
to_variable
(
np_inp
)
l
=
MyLayer
()
x
=
l
(
var_inp
)[
0
]
...
...
@@ -185,7 +185,7 @@ class TestImperative(unittest.TestCase):
def
test_mlp
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
],
[
3.0
,
4.0
]],
dtype
=
np
.
float32
)
with
fluid
.
imperative
.
guard
():
with
fluid
.
imperative
.
guard
(
device
=
None
):
var_inp
=
fluid
.
imperative
.
base
.
to_variable
(
np_inp
)
mlp
=
MLP
()
out
=
mlp
(
var_inp
)
...
...
python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
浏览文件 @
315b133e
...
...
@@ -101,7 +101,7 @@ class TestImperativeMnist(unittest.TestCase):
def
test_mnist_cpu_float32
(
self
):
seed
=
90
with
fluid
.
imperative
.
guard
():
with
fluid
.
imperative
.
guard
(
device
=
None
):
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
...
...
python/paddle/fluid/tests/unittests/test_imperative_resnet.py
浏览文件 @
315b133e
...
...
@@ -34,7 +34,10 @@ train_parameters = {
"batch_size"
:
256
,
"epochs"
:
[
30
,
60
,
90
],
"steps"
:
[
0.1
,
0.01
,
0.001
,
0.0001
]
}
},
"batch_size"
:
256
,
"lr"
:
0.1
,
"total_images"
:
1281164
,
}
...
...
@@ -52,24 +55,33 @@ def optimizer_setting(params):
base_lr
=
params
[
"lr"
]
lr
=
[]
lr
=
[
base_lr
*
(
0.1
**
i
)
for
i
in
range
(
len
(
bd
)
+
1
)]
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
),
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
params
[
"lr"
])
# optimizer = fluid.optimizer.Momentum(
# learning_rate=params["lr"],
# learning_rate=fluid.layers.piecewise_decay(
# boundaries=bd, values=lr),
# momentum=0.9,
# regularization=fluid.regularizer.L2Decay(1e-4))
return
optimizer
class
ConvBNLayer
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
):
def
__init__
(
self
,
num_channels
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
_conv
=
Conv2D
(
3
,
num_filters
,
filter_size
,
stride
,
(
filter_size
-
1
)
//
2
,
num_channels
=
num_channels
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
bias_attr
=
None
)
...
...
@@ -84,36 +96,54 @@ class ConvBNLayer(fluid.imperative.Layer):
class
BottleneckBlock
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
,
num_
filters
,
stride
,
shortcut
=
Fals
e
):
def
__init__
(
self
,
num_
channels
,
num_filters
,
stride
,
shortcut
=
Tru
e
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
num_filters
=
num_filters
,
filter_size
=
1
,
act
=
'relu'
)
num_channels
=
num_channels
,
num_filters
=
num_filters
,
filter_size
=
1
,
act
=
'relu'
)
self
.
conv1
=
ConvBNLayer
(
num_filters
=
num_filters
,
filter_size
=
3
,
stride
=
stride
,
act
=
'relu'
)
num_channels
=
num_filters
,
num_filters
=
num_filters
,
filter_size
=
3
,
stride
=
stride
,
act
=
'relu'
)
self
.
conv2
=
ConvBNLayer
(
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
act
=
None
)
num_channels
=
num_filters
,
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
act
=
None
)
if
shortcut
:
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
stride
=
stride
)
num_channels
=
num_channels
,
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
stride
=
stride
)
self
.
shortcut
=
shortcut
self
.
_num_channels_out
=
num_filters
*
4
def
forward
(
self
,
inputs
):
self
.
conv0
(
)
self
.
conv1
(
)
self
.
conv2
(
)
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
conv2
=
self
.
conv2
(
conv1
)
if
self
.
shortcut
:
self
.
short
()
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
return
fluid
.
layers
.
elementwise_add
(
x
=
self
.
short
,
y
=
self
.
conv2
,
act
=
'relu'
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
'relu'
)
class
ResNet
(
fluid
.
imperative
.
Layer
):
def
__init__
(
self
,
layers
=
50
,
class_dim
=
1000
):
super
(
ResNet
,
self
).
__init__
()
self
.
layers
=
layers
supported_layers
=
[
50
,
101
,
152
]
assert
layers
in
supported_layers
,
\
...
...
@@ -128,20 +158,23 @@ class ResNet(fluid.imperative.Layer):
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
conv
=
ConvBNLayer
(
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
num_
channels
=
3
,
num_
filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
self
.
pool2d_max
=
Pool2D
(
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
self
.
bottleneck_block_list
=
[]
num_channels
=
64
for
block
in
range
(
len
(
depth
)):
shortcut
=
Tru
e
shortcut
=
Fals
e
for
i
in
range
(
depth
[
block
]):
bottleneck_block
=
BottleneckBlock
(
num_channels
=
num_channels
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
)
num_channels
=
bottleneck_block
.
_num_channels_out
self
.
bottleneck_block_list
.
append
(
bottleneck_block
)
shortcut
=
Fals
e
shortcut
=
Tru
e
self
.
pool2d_avg
=
Pool2D
(
pool_size
=
7
,
pool_type
=
'avg'
,
global_pooling
=
True
)
...
...
@@ -160,12 +193,12 @@ class ResNet(fluid.imperative.Layer):
for
bottleneck_block
in
self
.
bottleneck_block_list
:
y
=
bottleneck_block
(
y
)
y
=
self
.
pool2d_avg
(
y
)
y
=
self
.
out
()
y
=
self
.
out
(
y
)
return
y
class
TestImperativeResnet
(
unittest
.
TestCase
):
def
test_resnet_
c
pu_float32
(
self
):
def
test_resnet_
g
pu_float32
(
self
):
seed
=
90
with
fluid
.
imperative
.
guard
():
...
...
@@ -183,17 +216,17 @@ class TestImperativeResnet(unittest.TestCase):
break
x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
[
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
128
,
1
)
256
,
1
)
img
=
to_variable
(
x_data
)
label
=
to_variable
(
y_data
)
label
.
_stop_gradient
=
True
cos
t
=
resnet
(
img
)
ou
t
=
resnet
(
img
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
out
,
label
=
label
)
avg_loss
=
fluid
.
layers
.
mean
(
x
=
cost
)
avg_loss
=
fluid
.
layers
.
mean
(
x
=
loss
)
dy_out
=
avg_loss
.
_numpy
()
if
batch_id
==
0
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录