Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
b4a32eaf
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b4a32eaf
编写于
10月 16, 2018
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into optimize-sum-seq-pooling-op
test=develop
上级
936926aa
af91d41a
变更
47
隐藏空白更改
内联
并排
Showing
47 changed file
with
901 addition
and
400 deletion
+901
-400
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+3
-2
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-0
paddle/fluid/CMakeLists.txt
paddle/fluid/CMakeLists.txt
+1
-2
paddle/fluid/framework/details/op_handle_base.h
paddle/fluid/framework/details/op_handle_base.h
+2
-1
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+43
-41
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+19
-25
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+35
-104
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+4
-0
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+1
-1
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+17
-12
paddle/fluid/framework/scope.h
paddle/fluid/framework/scope.h
+5
-0
paddle/fluid/operators/adadelta_op.cc
paddle/fluid/operators/adadelta_op.cc
+12
-0
paddle/fluid/operators/adadelta_op.h
paddle/fluid/operators/adadelta_op.h
+11
-0
paddle/fluid/operators/adagrad_op.h
paddle/fluid/operators/adagrad_op.h
+20
-13
paddle/fluid/operators/adam_op.h
paddle/fluid/operators/adam_op.h
+9
-16
paddle/fluid/operators/adamax_op.cc
paddle/fluid/operators/adamax_op.cc
+10
-0
paddle/fluid/operators/adamax_op.h
paddle/fluid/operators/adamax_op.h
+11
-0
paddle/fluid/operators/decayed_adagrad_op.cc
paddle/fluid/operators/decayed_adagrad_op.cc
+10
-0
paddle/fluid/operators/decayed_adagrad_op.h
paddle/fluid/operators/decayed_adagrad_op.h
+11
-0
paddle/fluid/operators/ftrl_op.cc
paddle/fluid/operators/ftrl_op.cc
+10
-0
paddle/fluid/operators/ftrl_op.h
paddle/fluid/operators/ftrl_op.h
+11
-0
paddle/fluid/operators/math/algorithm.h
paddle/fluid/operators/math/algorithm.h
+44
-0
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+1
-0
paddle/fluid/operators/math/selected_rows_functor.h
paddle/fluid/operators/math/selected_rows_functor.h
+1
-0
paddle/fluid/operators/math/sequence_pooling.cc
paddle/fluid/operators/math/sequence_pooling.cc
+18
-3
paddle/fluid/operators/momentum_op.cc
paddle/fluid/operators/momentum_op.cc
+5
-0
paddle/fluid/operators/momentum_op.cu
paddle/fluid/operators/momentum_op.cu
+11
-0
paddle/fluid/operators/momentum_op.h
paddle/fluid/operators/momentum_op.h
+6
-0
paddle/fluid/operators/reader/blocking_queue.h
paddle/fluid/operators/reader/blocking_queue.h
+6
-3
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
+6
-4
paddle/fluid/operators/reader/reader_blocking_queue_test.cc
paddle/fluid/operators/reader/reader_blocking_queue_test.cc
+24
-0
paddle/fluid/operators/rmsprop_op.cc
paddle/fluid/operators/rmsprop_op.cc
+5
-0
paddle/fluid/operators/rmsprop_op.h
paddle/fluid/operators/rmsprop_op.h
+229
-41
paddle/fluid/operators/sgd_op.cc
paddle/fluid/operators/sgd_op.cc
+16
-13
paddle/fluid/operators/sgd_op.cu
paddle/fluid/operators/sgd_op.cu
+6
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+15
-5
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+5
-3
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+7
-0
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+18
-0
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+6
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+10
-5
paddle/fluid/train/demo/README.md
paddle/fluid/train/demo/README.md
+1
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+12
-12
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-1
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+50
-0
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+12
-0
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
+139
-92
未找到文件。
cmake/inference_lib.cmake
浏览文件 @
b4a32eaf
...
...
@@ -18,7 +18,7 @@ function(copy TARGET)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DSTS DEPS
)
cmake_parse_arguments
(
copy_lib
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
inference_lib_dist_dep
${
TARGET
}
${
inference
_lib_dist_dep
}
PARENT_SCOPE
)
set
(
fluid_lib_dist_dep
${
TARGET
}
${
fluid
_lib_dist_dep
}
PARENT_SCOPE
)
list
(
LENGTH copy_lib_SRCS copy_lib_SRCS_len
)
list
(
LENGTH copy_lib_DSTS copy_lib_DSTS_len
)
...
...
@@ -185,7 +185,8 @@ copy(cmake_cache
SRCS
${
CMAKE_CURRENT_BINARY_DIR
}
/CMakeCache.txt
DSTS
${
FLUID_INSTALL_DIR
}
)
add_custom_target
(
inference_lib_dist DEPENDS
${
inference_lib_dist_dep
}
)
# This command generates a complete fluid library for both train and inference
add_custom_target
(
fluid_lib_dist DEPENDS
${
fluid_lib_dist_dep
}
)
# paddle fluid version
execute_process
(
...
...
paddle/fluid/API.spec
浏览文件 @
b4a32eaf
...
...
@@ -127,6 +127,7 @@ paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None
paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.margin_rank_loss ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None))
paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None))
paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
...
...
paddle/fluid/CMakeLists.txt
浏览文件 @
b4a32eaf
...
...
@@ -12,6 +12,5 @@ endif(NOT WIN32)
if
(
WITH_INFERENCE
)
# NOTE: please add subdirectory inference at last.
add_subdirectory
(
inference
)
add_subdirectory
(
train
)
endif
()
add_subdirectory
(
train
)
paddle/fluid/framework/details/op_handle_base.h
浏览文件 @
b4a32eaf
...
...
@@ -64,7 +64,8 @@ class OpHandleBase {
virtual
bool
IsMultiDeviceTransfer
()
{
return
false
;
}
const
platform
::
DeviceContext
*
DeviceContext
(
platform
::
Place
place
)
{
return
dev_ctxes_
[
place
];
auto
it
=
dev_ctxes_
.
find
(
place
);
return
it
!=
dev_ctxes_
.
end
()
?
it
->
second
:
nullptr
;
}
void
SetDeviceContext
(
platform
::
Place
place
,
platform
::
DeviceContext
*
ctx_
)
{
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
b4a32eaf
...
...
@@ -46,6 +46,41 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG
(
5
)
<<
"destroy ExecutorPrepareContext"
;
}
template
<
typename
RefCntMap
>
static
void
DeleteUnusedTensors
(
const
Scope
&
scope
,
const
OperatorBase
*
op
,
GarbageCollector
<
Tensor
>*
gc
,
RefCntMap
*
ref_cnts
)
{
std
::
unordered_set
<
Tensor
*>
erase_tensors
;
auto
handler
=
[
&
](
const
VariableNameMap
&
name_map
)
{
for
(
auto
&
name_pair
:
name_map
)
{
for
(
auto
&
name
:
name_pair
.
second
)
{
auto
it
=
ref_cnts
->
find
(
name
);
if
(
it
==
ref_cnts
->
end
())
continue
;
if
((
it
->
second
)
--
==
1
)
{
auto
*
var
=
scope
.
FindVar
(
name
);
if
(
var
!=
nullptr
)
{
VLOG
(
10
)
<<
"Erase tensor
\'
"
<<
name
<<
"
\'
"
;
if
(
var
->
IsType
<
LoDTensor
>
())
{
erase_tensors
.
insert
(
var
->
GetMutable
<
LoDTensor
>
());
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
erase_tensors
.
insert
(
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
());
}
}
}
}
}
};
handler
(
op
->
Inputs
());
handler
(
op
->
Outputs
());
if
(
!
erase_tensors
.
empty
())
{
gc
->
Add
(
erase_tensors
);
}
}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
void
Executor
::
Close
()
{
...
...
@@ -331,9 +366,13 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
int64_t
max_memory_size
=
GetEagerDeletionThreshold
();
std
::
unique_ptr
<
GarbageCollector
<
Tensor
>>
gc
;
if
(
max_memory_size
>=
0
)
{
// WhileOp would set keep_kids to false
// WhileGradOp would need the scopes created in WhileOp
// Perhaps, we should not perform eager deletion in WhileOp
// The scopes and variables created by WhileOp would be deleted
// in WhileGradOp.
if
(
max_memory_size
>=
0
&&
!
keep_kids
)
{
ctx
->
ResetReferenceCount
();
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
place_
))
{
...
...
@@ -352,45 +391,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
op
->
Run
(
*
local_scope
,
place_
);
if
(
gc
!=
nullptr
)
{
std
::
vector
<
std
::
string
>
erase_vars
;
for
(
auto
&
input
:
op
->
Inputs
())
{
for
(
auto
&
input_name
:
input
.
second
)
{
auto
it
=
ctx
->
cur_ref_cnts_
.
find
(
input_name
);
if
(
it
==
ctx
->
cur_ref_cnts_
.
end
())
continue
;
if
(
it
->
second
==
1
)
{
// should delete it
erase_vars
.
emplace_back
(
input_name
);
ctx
->
cur_ref_cnts_
.
erase
(
input_name
);
}
else
{
--
(
it
->
second
);
}
}
}
for
(
auto
&
output
:
op
->
Outputs
())
{
for
(
auto
&
output_name
:
output
.
second
)
{
auto
it
=
ctx
->
cur_ref_cnts_
.
find
(
output_name
);
if
(
it
==
ctx
->
cur_ref_cnts_
.
end
())
continue
;
if
(
it
->
second
==
1
)
{
erase_vars
.
emplace_back
(
output_name
);
ctx
->
cur_ref_cnts_
.
erase
(
output_name
);
}
else
{
--
(
it
->
second
);
}
}
}
if
(
!
erase_vars
.
empty
())
{
std
::
vector
<
framework
::
LoDTensor
*>
erase_tensors
;
for
(
auto
&
name
:
erase_vars
)
{
auto
*
var
=
local_scope
->
FindVar
(
name
);
if
(
var
==
nullptr
)
continue
;
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
erase_tensors
.
push_back
(
tensor
);
}
}
if
(
!
erase_tensors
.
empty
())
gc
->
Add
(
erase_tensors
);
}
DeleteUnusedTensors
(
*
local_scope
,
op
.
get
(),
gc
.
get
(),
&
(
ctx
->
cur_ref_cnts_
));
}
if
(
FLAGS_benchmark
)
{
...
...
paddle/fluid/framework/executor.h
浏览文件 @
b4a32eaf
...
...
@@ -32,38 +32,32 @@ template <typename T>
std
::
unordered_map
<
std
::
string
,
T
>
GetNonPersistableReferenceCount
(
const
ProgramDesc
&
prog
,
size_t
block_id
)
{
auto
&
block
=
prog
.
Block
(
block_id
);
std
::
unordered_set
<
std
::
string
>
ignored_vars
;
std
::
unordered_map
<
std
::
string
,
T
>
ref_cnts
;
for
(
auto
var_desc
:
block
.
AllVars
())
{
auto
type
=
var_desc
->
Proto
()
->
type
().
type
();
if
(
type
!=
proto
::
VarType
::
LOD_TENSOR
||
var_desc
->
Persistable
())
{
ignored_vars
.
insert
(
var_desc
->
Name
());
// ignore persistable vars
}
}
for
(
auto
op_desc
:
block
.
AllOps
())
{
for
(
auto
&
input
:
op_desc
->
Inputs
())
{
for
(
auto
&
input_name
:
input
.
second
)
{
if
(
!
ignored_vars
.
count
(
input_name
))
{
if
(
ref_cnts
.
count
(
input_name
))
++
ref_cnts
[
input_name
];
else
ref_cnts
[
input_name
]
=
1
;
auto
update_ref_cnts
=
[
&
](
OpDesc
*
op_desc
,
const
VariableNameMap
&
name_map
)
{
for
(
auto
&
name_pair
:
name_map
)
{
for
(
auto
&
name
:
name_pair
.
second
)
{
auto
*
var_desc
=
block
.
FindVar
(
name
);
if
(
var_desc
==
nullptr
||
var_desc
->
Persistable
())
continue
;
auto
type
=
var_desc
->
Proto
()
->
type
().
type
();
if
(
type
!=
proto
::
VarType
::
LOD_TENSOR
&&
type
!=
proto
::
VarType
::
SELECTED_ROWS
)
{
continue
;
}
}
}
for
(
auto
&
output
:
op_desc
->
Outputs
())
{
for
(
auto
output_name
:
output
.
second
)
{
if
(
!
ignored_vars
.
count
(
output_name
))
{
if
(
ref_cnts
.
count
(
output_name
))
++
ref_cnts
[
output_name
];
else
ref_cnts
[
output_name
]
=
1
;
auto
it
=
ref_cnts
.
find
(
name
);
if
(
it
!=
ref_cnts
.
end
())
{
++
it
->
second
;
}
else
{
ref_cnts
[
name
]
=
1
;
}
}
}
};
for
(
auto
op_desc
:
block
.
AllOps
())
{
update_ref_cnts
(
op_desc
,
op_desc
->
Inputs
());
update_ref_cnts
(
op_desc
,
op_desc
->
Outputs
());
}
return
ref_cnts
;
}
...
...
paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
浏览文件 @
b4a32eaf
...
...
@@ -44,89 +44,6 @@ namespace ir {
GET_IR_NODE_FROM_SUBGRAPH(bn_saved_mean, bn_saved_mean, pattern_name); \
GET_IR_NODE_FROM_SUBGRAPH(bn_saved_variance, bn_saved_variance, pattern_name)
template
<
typename
UnaryOperation
>
LoDTensor
tensor_apply
(
const
LoDTensor
&
vec
,
UnaryOperation
f
)
{
LoDTensor
vec_y
;
vec_y
.
Resize
(
vec
.
dims
());
const
float
*
x
=
vec
.
data
<
float
>
();
float
*
y
=
vec_y
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int64_t
i
=
0
;
i
<
vec
.
numel
();
i
++
)
{
y
[
i
]
=
f
(
x
[
i
]);
}
return
vec_y
;
}
void
tensor_apply_inplace
(
LoDTensor
*
vec
,
float
(
*
f
)(
float
))
{
float
*
data
=
vec
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int64_t
i
=
0
;
i
<
vec
->
numel
();
i
++
)
{
data
[
i
]
=
f
(
data
[
i
]);
}
}
template
<
typename
BinaryOperation
>
LoDTensor
tensor_apply_eltwise
(
const
LoDTensor
&
vec_a
,
const
LoDTensor
&
vec_b
,
BinaryOperation
f
)
{
PADDLE_ENFORCE_EQ
(
vec_a
.
dims
(),
vec_b
.
dims
());
LoDTensor
vec_y
;
vec_y
.
Resize
(
vec_a
.
dims
());
const
float
*
a
=
vec_a
.
data
<
float
>
();
const
float
*
b
=
vec_b
.
data
<
float
>
();
float
*
y
=
vec_y
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int64_t
i
=
0
;
i
<
vec_a
.
numel
();
i
++
)
{
y
[
i
]
=
f
(
a
[
i
],
b
[
i
]);
}
return
vec_y
;
}
template
<
typename
BinaryOperation
>
LoDTensor
tensor_apply_eltwise_broadcast
(
const
LoDTensor
&
vec_a
,
const
LoDTensor
&
vec_b
,
BinaryOperation
f
)
{
PADDLE_ENFORCE_EQ
(
vec_a
.
dims
().
size
(),
2
);
PADDLE_ENFORCE_EQ
(
vec_b
.
dims
().
size
(),
2
);
PADDLE_ENFORCE_EQ
(
vec_a
.
dims
()[
0
],
vec_b
.
dims
()[
0
]);
PADDLE_ENFORCE_EQ
(
vec_b
.
dims
()[
1
],
1
);
LoDTensor
vec_y
;
vec_y
.
Resize
(
vec_a
.
dims
());
const
float
*
a
=
vec_a
.
data
<
float
>
();
const
float
*
b
=
vec_b
.
data
<
float
>
();
float
*
y
=
vec_y
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
size_t
a_height
=
vec_a
.
dims
()[
0
];
size_t
a_width
=
vec_a
.
dims
()[
1
];
for
(
size_t
h
=
0
;
h
<
a_height
;
h
++
)
{
for
(
size_t
w
=
0
;
w
<
a_width
;
++
w
)
{
*
(
y
++
)
=
f
(
*
(
a
++
),
b
[
h
]);
}
}
return
vec_y
;
}
// reshape to two dimensions {A, B * C * ...}
void
make_tensor_2d
(
LoDTensor
*
tensor_to_reshape
)
{
auto
dims_count
=
tensor_to_reshape
->
dims
().
size
();
PADDLE_ENFORCE_GT
(
dims_count
,
0
);
int
size2
=
1
;
for
(
int
i
=
1
;
i
<
dims_count
;
i
++
)
{
size2
*=
tensor_to_reshape
->
dims
()[
i
];
}
tensor_to_reshape
->
Resize
(
make_ddim
({
tensor_to_reshape
->
dims
()[
0
],
size2
}));
}
void
recompute_conv_weights
(
LoDTensor
*
weights
,
LoDTensor
*
tmp
)
{
// remember the weights tensor shape {A, B, C, ...}
auto
weights_shape
=
weights
->
dims
();
// reduce the weights to 2d {A, B * C * ...}
make_tensor_2d
(
weights
);
// make tmp tensor 2d by adding 1 as second dim {A, 1}
make_tensor_2d
(
tmp
);
*
weights
=
tensor_apply_eltwise_broadcast
(
*
weights
,
*
tmp
,
std
::
multiplies
<
float
>
());
// reshape weights to the original dims {A, B, C, ...}
weights
->
Resize
(
weights_shape
);
}
void
recompute_bias_and_weights
(
const
Scope
*
scope
,
ir
::
Node
*
conv_weight
,
//
const
ir
::
Node
&
bn_scale
,
//
...
...
@@ -135,6 +52,13 @@ void recompute_bias_and_weights(const Scope* scope,
const
ir
::
Node
&
bn_variance
,
//
LoDTensor
*
eltwise_y_in_tensor
,
//
float
epsilon
)
{
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
EigenMatrixArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>>
;
// Re-compute bias of conv2d from BN
PADDLE_ENFORCE_EQ
(
eltwise_y_in_tensor
->
dims
(),
bn_bias_tensor
.
dims
());
...
...
@@ -143,31 +67,38 @@ void recompute_bias_and_weights(const Scope* scope,
scope
->
FindVar
(
bn_variance
.
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
*
mean_tensor
=
scope
->
FindVar
(
bn_mean
.
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
std_tensor
=
LoDTensor
();
std_tensor
.
Resize
(
bn_bias_tensor
.
dims
());
std_tensor
=
tensor_apply
(
*
variance_tensor
,
[
&
](
float
x
)
{
return
x
+
epsilon
;
});
ConstEigenVectorArrayMap
scale_array
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
1
);
EigenVectorArrayMap
variance_array
(
variance_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
variance_tensor
->
numel
(),
1
);
ConstEigenVectorArrayMap
mean_array
(
mean_tensor
->
data
<
float
>
(),
mean_tensor
->
numel
(),
1
);
ConstEigenVectorArrayMap
bn_bias_array
(
bn_bias_tensor
.
data
<
float
>
(),
bn_bias_tensor
.
numel
(),
1
);
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
// variance will not be used anymore, so make it std_array and then tmp_array
variance_array
+=
epsilon
;
variance_array
=
variance_array
.
sqrt
();
variance_array
=
scale_array
/
variance_array
;
EigenVectorArrayMap
eltwise_y_in_array
(
eltwise_y_in_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
eltwise_y_in_tensor
->
numel
(),
1
);
EigenVectorArrayMap
std_vec
(
std_tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
std_tensor
.
numel
(),
1
);
std_vec
=
std_vec
.
sqrt
();
auto
tmp_tensor
=
tensor_apply_eltwise
(
*
scale_tensor
,
std_tensor
,
std
::
divides
<
float
>
());
auto
tensor_minus
=
tensor_apply_eltwise
(
*
eltwise_y_in_tensor
,
*
mean_tensor
,
std
::
minus
<
float
>
());
auto
tensor_mul
=
tensor_apply_eltwise
(
tensor_minus
,
tmp_tensor
,
std
::
multiplies
<
float
>
());
*
eltwise_y_in_tensor
=
tensor_apply_eltwise
(
tensor_mul
,
bn_bias_tensor
,
std
::
plus
<
float
>
());
eltwise_y_in_array
=
((
eltwise_y_in_array
-
mean_array
)
*
variance_array
)
+
bn_bias_array
;
// Re-compute weight of conv2d from BN
auto
*
current_param
=
scope
->
FindVar
(
conv_weight
->
Name
())
->
GetMutable
<
LoDTensor
>
();
recompute_conv_weights
(
current_param
,
&
tmp_tensor
);
auto
*
weights
=
scope
->
FindVar
(
conv_weight
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
weights_shape
=
weights
->
dims
();
auto
weights_shape_2d
=
flatten_to_2d
(
weights_shape
,
1
);
EigenMatrixArrayMap
weights_array_2d
(
weights
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
weights_shape_2d
[
0
],
weights_shape_2d
[
1
]);
weights_array_2d
.
colwise
()
*=
variance_array
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvBNFusePass
::
ApplyImpl
(
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
b4a32eaf
...
...
@@ -307,6 +307,10 @@ ParallelExecutor::~ParallelExecutor() {
}
}
}
// member_ must be destructed before gcs_ since the destructor of
// ReferenceCountOpHandle use raw pointers of gcs_ inside.
member_
.
reset
();
}
}
// namespace framework
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
b4a32eaf
...
...
@@ -75,7 +75,7 @@ class ParallelExecutor {
private:
void
BCastParamsToDevices
(
const
std
::
unordered_set
<
std
::
string
>
&
vars
)
const
;
ParallelExecutorPrivate
*
member_
;
std
::
unique_ptr
<
ParallelExecutorPrivate
>
member_
;
#ifdef PADDLE_WITH_CUDA
// ref_cnts_ is only initialized when ParallelExecutor constructs, and then
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
b4a32eaf
...
...
@@ -49,18 +49,18 @@ int64_t GetEagerDeletionThreshold() {
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
kids_
.
push_back
(
new
Scope
(
this
));
return
*
kids_
.
back
();
}
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
return
VarInternal
(
name
);
}
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
if
(
name
!=
nullptr
)
{
*
name
=
new_name
;
...
...
@@ -69,29 +69,34 @@ Variable* Scope::Var(std::string* name) {
}
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
return
FindVarInternal
(
name
);
}
Variable
*
Scope
::
FindLocalVar
(
const
std
::
string
&
name
)
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
return
FindVarLocally
(
name
);
}
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
return
FindScopeInternal
(
var
);
}
void
Scope
::
DropKids
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
for
(
Scope
*
s
:
kids_
)
delete
s
;
kids_
.
clear
();
}
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
return
it
!=
this
->
kids_
.
end
();
}
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
vector
<
std
::
string
>
known_vars
;
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
...
...
@@ -101,7 +106,7 @@ std::vector<std::string> Scope::LocalVarNames() const {
}
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"Cannot find %p as kid scope"
,
scope
);
this
->
kids_
.
erase
(
it
);
...
...
@@ -114,7 +119,7 @@ void Scope::DeleteScope(Scope* scope) const {
}
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
...
...
@@ -127,12 +132,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
const
std
::
string
&
new_name
)
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
RenameInternal
(
origin_name
,
new_name
);
}
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
RenameInternal
(
origin_name
,
new_name
);
return
new_name
;
...
...
paddle/fluid/framework/scope.h
浏览文件 @
b4a32eaf
...
...
@@ -63,6 +63,11 @@ class Scope {
/// Caller doesn't own the returned Variable.
Variable
*
FindVar
(
const
std
::
string
&
name
)
const
;
/// Find a variable in the current scope.
/// Return nullptr if cannot find.
/// Caller doesn't own the returned Variable.
Variable
*
FindLocalVar
(
const
std
::
string
&
name
)
const
;
const
Scope
*
parent
()
const
{
return
parent_
;
}
/// Find the scope or an ancestor scope that contains the given variable.
...
...
paddle/fluid/operators/adadelta_op.cc
浏览文件 @
b4a32eaf
...
...
@@ -18,6 +18,7 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
class
AdadeltaOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
...
@@ -31,6 +32,16 @@ class AdadeltaOp : public framework::OperatorWithKernel {
"Input(AvgSquaredGrad) of AdadeltaOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"AvgSquaredUpdate"
),
"Input(AvgSquaredUpdate) of AdadeltaOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
());
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of AdadeltaOp should not be null."
);
...
...
@@ -56,6 +67,7 @@ class AdadeltaOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"AvgSquaredGradOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"AvgSquaredUpdateOut"
,
param_dim
);
}
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
...
...
paddle/fluid/operators/adadelta_op.h
浏览文件 @
b4a32eaf
...
...
@@ -23,6 +23,17 @@ template <typename DeviceContext, typename T>
class
AdadeltaOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
avg_squared_grad_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"AvgSquaredGradOut"
);
...
...
paddle/fluid/operators/adagrad_op.h
浏览文件 @
b4a32eaf
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -21,25 +22,31 @@ namespace operators {
template
<
typename
DeviceContext
,
typename
T
>
struct
SparseAdagradFunctor
{
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
SelectedRows
&
grad
,
const
framework
::
Tensor
&
learning_rate
,
T
epsilon
,
framework
::
Tensor
*
moment
,
framework
::
Tensor
*
param
);
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
SelectedRows
&
grad
,
const
framework
::
Tensor
&
learning_rate
,
T
epsilon
,
framework
::
Tensor
*
moment
,
framework
::
Tensor
*
param
);
};
template
<
typename
DeviceContext
,
typename
T
>
class
AdagradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
auto
*
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
param_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
if
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
param
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
));
...
...
@@ -47,16 +54,16 @@ class AdagradOpKernel : public framework::OpKernel<T> {
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
));
auto
moment
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
framework
::
Tensor
>
(
"Moment"
));
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
auto
param_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out_tensor
);
auto
moment_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment_out_tensor
);
auto
*
place
=
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
*
place
=
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
moment_out
.
device
(
*
place
)
=
moment
+
grad
*
grad
;
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
moment_out_tensor
->
numel
());
if
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()))
{
auto
*
lr
=
learning_rate
->
data
<
T
>
();
auto
*
lr
=
learning_rate
->
data
<
T
>
();
param_out
.
device
(
*
place
)
=
param
-
lr
[
0
]
*
grad
/
(
moment_out
.
sqrt
()
+
epsilon
);
}
else
{
...
...
@@ -66,10 +73,10 @@ class AdagradOpKernel : public framework::OpKernel<T> {
lr
.
broadcast
(
m_dsize
)
*
grad
/
(
moment_out
.
sqrt
()
+
epsilon
);
}
}
else
if
(
grad_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
*
param_tensor
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_tensor
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_tensor
,
param_out_tensor
);
auto
*
moment_tensor
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Moment"
);
auto
*
moment_tensor
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Moment"
);
PADDLE_ENFORCE_EQ
(
moment_tensor
,
moment_out_tensor
);
SparseAdagradFunctor
<
DeviceContext
,
T
>
functor
;
...
...
paddle/fluid/operators/adam_op.h
浏览文件 @
b4a32eaf
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/algorithm.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/for_range.h"
...
...
@@ -199,23 +200,9 @@ struct SparseAdamFunctor {
row_numel_
(
row_numel
),
row_count_
(
row_count
)
{}
inline
HOSTDEVICE
int64_t
BinarySearchInRows
(
int64_t
row
)
const
{
int64_t
beg
=
0
,
end
=
row_count_
-
1
;
while
(
beg
<=
end
)
{
auto
mid
=
((
beg
+
end
)
>>
1
);
if
(
rows_
[
mid
]
==
row
)
return
mid
;
else
if
(
rows_
[
mid
]
<
row
)
beg
=
mid
+
1
;
else
end
=
mid
-
1
;
}
return
-
1
;
}
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
int64_t
row
=
i
/
row_numel_
;
auto
row_idx
=
BinarySearchInRows
(
row
);
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_count_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
grad_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
// The following code is the same as dense
...
...
@@ -244,6 +231,12 @@ template <typename DeviceContext, typename T>
class
AdamOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
...
...
paddle/fluid/operators/adamax_op.cc
浏览文件 @
b4a32eaf
...
...
@@ -35,6 +35,16 @@ class AdamaxOp : public framework::OperatorWithKernel {
"Input(LearningRate) of AdamaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Beta1Pow"
),
"Input(Beta1Pow) of AdamaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
());
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of AdamaxOp should not be null."
);
...
...
paddle/fluid/operators/adamax_op.h
浏览文件 @
b4a32eaf
...
...
@@ -23,6 +23,17 @@ template <typename DeviceContext, typename T>
class
AdamaxOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
auto
inf_norm_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"InfNormOut"
);
...
...
paddle/fluid/operators/decayed_adagrad_op.cc
浏览文件 @
b4a32eaf
...
...
@@ -32,6 +32,16 @@ class DecayedAdagradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
"Input(LearningRate) of DecayedAdagradOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
());
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of DecayedAdagradOp should not be null."
);
...
...
paddle/fluid/operators/decayed_adagrad_op.h
浏览文件 @
b4a32eaf
...
...
@@ -23,6 +23,17 @@ template <typename DeviceContext, typename T>
class
DecayedAdagradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/ftrl_op.cc
浏览文件 @
b4a32eaf
...
...
@@ -34,6 +34,16 @@ class FTRLOp : public framework::OperatorWithKernel {
"Input(Grad) of FTRL should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
"Input(LearningRate) of FTRL should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
());
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of FTRL should not be null."
);
...
...
paddle/fluid/operators/ftrl_op.h
浏览文件 @
b4a32eaf
...
...
@@ -28,6 +28,17 @@ template <typename DeviceContext, typename T>
class
FTRLOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
auto
*
param_out
=
ctx
.
Output
<
Tensor
>
(
"ParamOut"
);
auto
*
sq_accum_out
=
ctx
.
Output
<
Tensor
>
(
"SquaredAccumOut"
);
auto
*
lin_accum_out
=
ctx
.
Output
<
Tensor
>
(
"LinearAccumOut"
);
...
...
paddle/fluid/operators/math/algorithm.h
0 → 100644
浏览文件 @
b4a32eaf
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <cstdint> // for int64_t
#include <numeric>
#include "paddle/fluid/platform/hostdevice.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
template
<
typename
T
>
HOSTDEVICE
inline
int64_t
BinarySearch
(
const
T
*
x
,
int64_t
num
,
const
T
&
val
)
{
int64_t
beg
=
0
,
end
=
num
-
1
;
while
(
beg
<=
end
)
{
auto
mid
=
((
beg
+
end
)
>>
1
);
if
(
x
[
mid
]
==
val
)
return
mid
;
else
if
(
x
[
mid
]
<
val
)
beg
=
mid
+
1
;
else
end
=
mid
-
1
;
}
return
-
1
;
}
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
b4a32eaf
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include <set>
#include <unordered_map>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
namespace
paddle
{
...
...
paddle/fluid/operators/math/selected_rows_functor.h
浏览文件 @
b4a32eaf
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
...
...
paddle/fluid/operators/math/sequence_pooling.cc
浏览文件 @
b4a32eaf
...
...
@@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/sequence_pooling.h"
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence_pooling.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -180,6 +182,7 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
}
auto
lod
=
input
.
lod
()[
0
];
auto
&
place
=
*
context
.
eigen_device
();
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod
.
size
())
-
1
;
++
i
)
{
Tensor
in_t
=
input
.
Slice
(
static_cast
<
int
>
(
lod
[
i
]),
static_cast
<
int
>
(
lod
[
i
+
1
]));
...
...
@@ -191,7 +194,14 @@ class SequencePoolFunctor<platform::CPUDeviceContext, T> {
if
(
pooltype
==
"AVERAGE"
)
{
out_e
.
device
(
place
)
=
in_e
.
mean
(
Eigen
::
array
<
int
,
1
>
({{
0
}}));
}
else
if
(
pooltype
==
"SUM"
)
{
out_e
.
device
(
place
)
=
in_e
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}}));
if
(
h
>
0
)
{
const
T
*
in_data
=
in_t
.
data
<
T
>
();
T
*
out_data
=
out_t
.
mutable_data
<
T
>
(
context
.
GetPlace
());
blas
.
VCOPY
(
w
,
in_data
,
out_data
);
for
(
int64_t
r
=
1
;
r
!=
h
;
++
r
)
{
blas
.
AXPY
(
w
,
1.
,
in_data
+
r
*
w
,
out_data
);
}
}
}
else
if
(
pooltype
==
"SQRT"
)
{
out_e
.
device
(
place
)
=
in_e
.
sum
(
Eigen
::
array
<
int
,
1
>
({{
0
}}))
/
std
::
sqrt
(
static_cast
<
T
>
(
h
));
...
...
@@ -223,6 +233,7 @@ class SequencePoolGradFunctor<platform::CPUDeviceContext, T> {
}
auto
lod
=
in_grad
->
lod
()[
0
];
auto
&
place
=
*
context
.
eigen_device
();
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
lod
.
size
())
-
1
;
++
i
)
{
auto
in_g_t
=
in_grad
->
Slice
(
static_cast
<
int
>
(
lod
[
i
]),
static_cast
<
int
>
(
lod
[
i
+
1
]));
...
...
@@ -237,7 +248,11 @@ class SequencePoolGradFunctor<platform::CPUDeviceContext, T> {
if
(
pooltype
==
"AVERAGE"
)
{
in_g_e
.
device
(
place
)
=
(
out_g_e
/
static_cast
<
T
>
(
h
)).
broadcast
(
bcast
);
}
else
if
(
pooltype
==
"SUM"
)
{
in_g_e
.
device
(
place
)
=
(
out_g_e
).
broadcast
(
bcast
);
const
T
*
out_g_data
=
out_g_t
.
data
<
T
>
();
T
*
in_g_data
=
in_g_t
.
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
int
r
=
0
;
r
!=
h
;
++
r
)
{
blas
.
VCOPY
(
w
,
out_g_data
,
in_g_data
+
r
*
w
);
}
}
else
if
(
pooltype
==
"SQRT"
)
{
in_g_e
.
device
(
place
)
=
(
out_g_e
/
std
::
sqrt
(
static_cast
<
T
>
(
h
))).
broadcast
(
bcast
);
...
...
paddle/fluid/operators/momentum_op.cc
浏览文件 @
b4a32eaf
...
...
@@ -33,6 +33,11 @@ class MomentumOp : public framework::OperatorWithKernel {
"Input(velocity) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
"Input(LearningRate) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of Momentum should not be null."
);
...
...
paddle/fluid/operators/momentum_op.cu
浏览文件 @
b4a32eaf
...
...
@@ -46,6 +46,17 @@ template <typename T>
class
MomentumOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
auto
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
velocity_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"VelocityOut"
);
auto
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
...
...
paddle/fluid/operators/momentum_op.h
浏览文件 @
b4a32eaf
...
...
@@ -23,6 +23,12 @@ template <typename T>
class
MomentumOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
auto
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
velocity_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"VelocityOut"
);
auto
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
...
...
paddle/fluid/operators/reader/blocking_queue.h
浏览文件 @
b4a32eaf
...
...
@@ -31,8 +31,8 @@ class BlockingQueue {
// is a workaround and a simplified version of framework::Channel as it
// doesn't support GPU and it implements on buffered blocking queue.
public:
explicit
BlockingQueue
(
size_t
capacity
)
:
capacity_
(
capacity
),
closed_
(
false
)
{
explicit
BlockingQueue
(
size_t
capacity
,
bool
speed_test_mode
=
false
)
:
capacity_
(
capacity
),
speed_test_mode_
(
speed_test_mode
),
closed_
(
false
)
{
PADDLE_ENFORCE_GT
(
capacity_
,
0
,
"The capacity of a reader::BlockingQueue must be greater than 0."
);
...
...
@@ -72,7 +72,9 @@ class BlockingQueue {
if
(
!
queue_
.
empty
())
{
PADDLE_ENFORCE_NOT_NULL
(
elem
);
*
elem
=
queue_
.
front
();
queue_
.
pop_front
();
if
(
LIKELY
(
!
speed_test_mode_
))
{
queue_
.
pop_front
();
}
send_cv_
.
notify_one
();
return
true
;
}
else
{
...
...
@@ -114,6 +116,7 @@ class BlockingQueue {
private:
size_t
capacity_
;
bool
speed_test_mode_
;
bool
closed_
;
std
::
deque
<
T
>
queue_
;
...
...
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
浏览文件 @
b4a32eaf
...
...
@@ -33,8 +33,9 @@ class LoDTensorBlockingQueue {
private:
LoDTensorBlockingQueue
(
size_t
capacity
,
const
std
::
vector
<
framework
::
DDim
>&
dims
)
:
queue_
(
capacity
),
dims_
(
dims
)
{}
const
std
::
vector
<
framework
::
DDim
>&
dims
,
bool
speed_test_mode
=
false
)
:
queue_
(
capacity
,
speed_test_mode
),
dims_
(
dims
)
{}
public:
bool
Push
(
const
std
::
vector
<
framework
::
LoDTensor
>&
lod_tensor_vec
)
{
...
...
@@ -69,11 +70,12 @@ class LoDTensorBlockingQueue {
class
LoDTensorBlockingQueueHolder
{
public:
void
InitOnce
(
size_t
capacity
,
const
std
::
vector
<
framework
::
DDim
>&
dims
)
{
void
InitOnce
(
size_t
capacity
,
const
std
::
vector
<
framework
::
DDim
>&
dims
,
bool
speed_test_mode
=
false
)
{
PADDLE_ENFORCE
(
queue_
==
nullptr
,
"LoDTensorBlockingQueueHolder::InitOnce() can only be called once"
);
queue_
.
reset
(
new
LoDTensorBlockingQueue
(
capacity
,
dims
));
queue_
.
reset
(
new
LoDTensorBlockingQueue
(
capacity
,
dims
,
speed_test_mode
));
}
inline
const
std
::
shared_ptr
<
LoDTensorBlockingQueue
>&
GetQueue
()
const
{
...
...
paddle/fluid/operators/reader/reader_blocking_queue_test.cc
浏览文件 @
b4a32eaf
...
...
@@ -217,3 +217,27 @@ TEST(BlockingQueue, MyClassTest) {
q
.
Receive
(
&
b
);
EXPECT_EQ
(
a
.
val_
,
b
.
val_
);
}
TEST
(
BlockingQueue
,
speed_test_mode
)
{
size_t
queue_size
=
10
;
BlockingQueue
<
size_t
>
q1
(
queue_size
,
false
);
for
(
size_t
i
=
0
;
i
<
queue_size
;
++
i
)
{
q1
.
Send
(
i
);
}
size_t
b
;
for
(
size_t
i
=
0
;
i
<
queue_size
;
++
i
)
{
q1
.
Receive
(
&
b
);
EXPECT_EQ
(
b
,
i
);
}
EXPECT_EQ
(
q1
.
Size
(),
0
);
BlockingQueue
<
size_t
>
q2
(
queue_size
,
true
);
for
(
size_t
i
=
0
;
i
<
queue_size
;
++
i
)
{
q2
.
Send
(
i
);
}
for
(
size_t
i
=
0
;
i
<
queue_size
;
++
i
)
{
q2
.
Receive
(
&
b
);
EXPECT_EQ
(
b
,
0
);
}
EXPECT_EQ
(
q2
.
Size
(),
queue_size
);
}
paddle/fluid/operators/rmsprop_op.cc
浏览文件 @
b4a32eaf
...
...
@@ -32,6 +32,11 @@ class RmspropOp : public framework::OperatorWithKernel {
"Input(Grad) of RmspropOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Moment"
),
"Input(Moment) of RmspropOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(param_out) of RmspropOp should not be null."
);
...
...
paddle/fluid/operators/rmsprop_op.h
浏览文件 @
b4a32eaf
...
...
@@ -13,66 +13,254 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <math.h>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/algorithm.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/for_range.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenVector
=
framework
::
EigenVector
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
>
struct
DenseRmspropGradFunctor
{
inline
explicit
DenseRmspropGradFunctor
(
const
T
*
grad
)
:
grad_
(
grad
)
{}
HOSTDEVICE
inline
T
operator
()(
int64_t
idx
)
const
{
return
grad_
[
idx
];
}
const
T
*
grad_
;
};
template
<
typename
T
>
struct
SparseRmspropGradFunctor
{
inline
SparseRmspropGradFunctor
(
const
T
*
grad
,
const
int64_t
*
rows
,
int64_t
row_numel
,
int64_t
row_count
)
:
grad_
(
grad
),
rows_
(
rows
),
row_numel_
(
row_numel
),
row_count_
(
row_count
)
{}
HOSTDEVICE
inline
T
operator
()(
int64_t
idx
)
const
{
auto
row_idx
=
math
::
BinarySearch
(
rows_
,
row_count_
,
idx
/
row_numel_
);
return
row_idx
>=
0
?
grad_
[
row_idx
*
row_numel_
+
idx
%
row_numel_
]
:
0
;
}
const
T
*
grad_
;
const
int64_t
*
rows_
;
int64_t
row_numel_
;
int64_t
row_count_
;
};
template
<
typename
T
,
typename
GradFunctor
>
struct
UncenteredRmspropFunctor
{
UncenteredRmspropFunctor
(
T
*
param
,
T
*
ms
,
T
*
mom
,
const
T
*
lr
,
T
rho
,
T
epsilon
,
T
momentum
,
const
GradFunctor
&
grad_functor
)
:
param_
(
param
),
ms_
(
ms
),
mom_
(
mom
),
lr_
(
lr
),
rho_
(
rho
),
epsilon_
(
epsilon
),
momentum_
(
momentum
),
grad_functor_
(
grad_functor
)
{}
HOSTDEVICE
inline
void
operator
()(
int64_t
idx
)
const
{
T
g
=
grad_functor_
(
idx
);
T
ms_out
=
rho_
*
ms_
[
idx
]
+
(
1
-
rho_
)
*
g
*
g
;
T
mom_out
=
momentum_
*
mom_
[
idx
]
+
lr_
[
0
]
*
g
/
sqrt
(
ms_out
+
epsilon_
);
param_
[
idx
]
-=
mom_out
;
ms_
[
idx
]
=
ms_out
;
mom_
[
idx
]
=
mom_out
;
}
T
*
param_
;
T
*
ms_
;
T
*
mom_
;
const
T
*
lr_
;
T
rho_
;
T
epsilon_
;
T
momentum_
;
GradFunctor
grad_functor_
;
};
template
<
typename
T
,
typename
GradFunctor
>
struct
CenteredRmspropFunctor
{
CenteredRmspropFunctor
(
T
*
param
,
T
*
ms
,
T
*
mom
,
T
*
mean_grad
,
const
T
*
lr
,
T
rho
,
T
epsilon
,
T
momentum
,
const
GradFunctor
&
grad_functor
)
:
param_
(
param
),
ms_
(
ms
),
mom_
(
mom
),
mean_grad_
(
mean_grad
),
lr_
(
lr
),
rho_
(
rho
),
epsilon_
(
epsilon
),
momentum_
(
momentum
),
grad_functor_
(
grad_functor
)
{}
HOSTDEVICE
inline
void
operator
()(
int64_t
idx
)
const
{
T
g
=
grad_functor_
(
idx
);
T
ms_out
=
rho_
*
ms_
[
idx
]
+
(
1
-
rho_
)
*
g
*
g
;
T
mg_out
=
rho_
*
mean_grad_
[
idx
]
+
(
1
-
rho_
)
*
g
;
T
mom_out
=
momentum_
*
mom_
[
idx
]
+
lr_
[
0
]
*
g
/
sqrt
(
ms_out
-
mg_out
*
mg_out
+
epsilon_
);
param_
[
idx
]
-=
mom_out
;
ms_
[
idx
]
=
ms_out
;
mom_
[
idx
]
=
mom_out
;
mean_grad_
[
idx
]
=
mg_out
;
}
T
*
param_
;
T
*
ms_
;
T
*
mom_
;
T
*
mean_grad_
;
const
T
*
lr_
;
T
rho_
;
T
epsilon_
;
T
momentum_
;
GradFunctor
grad_functor_
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
RmspropOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
param_out
=
ctx
.
Output
<
Tensor
>
(
"ParamOut"
);
auto
*
moment_out
=
ctx
.
Output
<
Tensor
>
(
"MomentOut"
);
auto
*
mean_square_out
=
ctx
.
Output
<
Tensor
>
(
"MeanSquareOut"
);
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
LoDTensor
=
framework
::
LoDTensor
;
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
param_out
=
ctx
.
Output
<
LoDTensor
>
(
"ParamOut"
);
auto
*
moment_out
=
ctx
.
Output
<
LoDTensor
>
(
"MomentOut"
);
auto
*
mean_square_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanSquareOut"
);
auto
grad
=
ctx
.
Input
<
Tensor
>
(
"Grad"
);
auto
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
auto
rho
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"decay"
));
auto
momentum
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"momentum"
));
bool
centered
=
ctx
.
Attr
<
bool
>
(
"centered"
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
&
p_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"Param"
);
auto
&
ms_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanSquare"
);
auto
&
lr_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"LearningRate"
);
auto
&
mom_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"Moment"
);
float
epsilon
=
ctx
.
Attr
<
float
>
(
"epsilon"
);
float
rho
=
ctx
.
Attr
<
float
>
(
"decay"
);
float
momentum
=
ctx
.
Attr
<
float
>
(
"momentum"
);
bool
centered
=
ctx
.
Attr
<
bool
>
(
"centered"
);
PADDLE_ENFORCE_EQ
(
&
p_tensor
,
param_out
,
"Param and ParamOut must be the same Tensor"
);
PADDLE_ENFORCE_EQ
(
&
mom_tensor
,
moment_out
,
"Moment and MomentOut must be the same Tensor"
);
PADDLE_ENFORCE_EQ
(
&
ms_tensor
,
mean_square_out
,
"MeanSquare and MeanSquareOut must be the same Tensor"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
size_t
limit
=
static_cast
<
size_t
>
(
ms_tensor
.
numel
());
if
(
grad_var
->
IsType
<
LoDTensor
>
())
{
auto
&
grad_tensor
=
grad_var
->
Get
<
LoDTensor
>
();
if
(
std
::
is_same
<
DeviceContext
,
platform
::
CPUDeviceContext
>::
value
)
{
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
lr_value
=
lr_tensor
.
data
<
T
>
()[
0
];
auto
p
=
EigenVector
<
T
>::
Flatten
(
p_tensor
);
auto
ms
=
EigenVector
<
T
>::
Flatten
(
ms_tensor
);
auto
g
=
EigenVector
<
T
>::
Flatten
(
grad_tensor
);
auto
mom
=
EigenVector
<
T
>::
Flatten
(
mom_tensor
);
auto
p_out
=
EigenVector
<
T
>::
Flatten
(
*
param_out
);
auto
mom_out
=
EigenVector
<
T
>::
Flatten
(
*
moment_out
);
auto
ms_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_square_out
);
ms_out
.
device
(
place
)
=
rho
*
ms
+
(
1
-
rho
)
*
g
*
g
;
if
(
centered
)
{
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
mg
=
EigenVector
<
T
>::
Flatten
(
mg_tensor
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
PADDLE_ENFORCE
(
&
mg_tensor
,
mean_grad_out
,
"MeanGrad and MeanGradOut must be the same Tensor"
);
auto
mg_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_grad_out
);
mg_out
.
device
(
place
)
=
rho
*
mg
+
(
1
-
rho
)
*
g
;
mom_out
.
device
(
place
)
=
momentum
*
mom
+
lr_value
*
g
/
(
ms_out
-
mg_out
.
square
()
+
epsilon
).
sqrt
();
}
else
{
mom_out
.
device
(
place
)
=
momentum
*
mom
+
lr_value
*
g
/
(
ms_out
+
epsilon
).
sqrt
();
}
p_out
.
device
(
place
)
=
p
-
mom_out
;
}
else
{
DenseRmspropGradFunctor
<
T
>
grad_func
(
grad_tensor
.
data
<
T
>
());
platform
::
ForRange
<
DeviceContext
>
for_range
(
dev_ctx
,
limit
);
if
(
centered
)
{
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
PADDLE_ENFORCE
(
&
mg_tensor
,
mean_grad_out
,
"MeanGrad and MeanGradOut must be the same Tensor"
);
for_range
(
CenteredRmspropFunctor
<
T
,
DenseRmspropGradFunctor
<
T
>>
(
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
moment_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_grad_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
lr_tensor
.
data
<
T
>
(),
rho
,
epsilon
,
momentum
,
grad_func
));
}
else
{
for_range
(
UncenteredRmspropFunctor
<
T
,
DenseRmspropGradFunctor
<
T
>>
(
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
moment_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
lr_tensor
.
data
<
T
>
(),
rho
,
epsilon
,
momentum
,
grad_func
));
}
}
}
else
if
(
grad_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
&
grad
=
grad_var
->
Get
<
framework
::
SelectedRows
>
();
auto
*
merged_grad
=
const_cast
<
framework
::
Scope
&>
(
ctx
.
scope
())
.
Var
()
->
GetMutable
<
framework
::
SelectedRows
>
();
math
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
merged_grad
);
platform
::
ForRange
<
DeviceContext
>
for_range
(
dev_ctx
,
limit
);
const
int64_t
*
rows
;
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
rows
=
merged_grad
->
rows
().
CUDAData
(
ctx
.
GetPlace
());
}
else
{
#endif
rows
=
merged_grad
->
rows
().
data
();
#ifdef PADDLE_WITH_CUDA
}
#endif
auto
&
merged_tensor
=
merged_grad
->
value
();
int64_t
row_count
=
merged_grad
->
rows
().
size
();
int64_t
row_numel
=
merged_tensor
.
numel
()
/
row_count
;
SparseRmspropGradFunctor
<
T
>
grad_func
(
merged_tensor
.
data
<
T
>
(),
rows
,
row_numel
,
row_count
);
auto
p
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"Param"
));
auto
ms
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"MeanSquare"
));
auto
lr
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"LearningRate"
));
auto
g
=
EigenVector
<
T
>::
Flatten
(
*
grad
);
auto
mom
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"Moment"
));
auto
p_out
=
EigenVector
<
T
>::
Flatten
(
*
param_out
);
auto
mom_out
=
EigenVector
<
T
>::
Flatten
(
*
moment_out
);
auto
ms_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_square_out
);
auto
&
place
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
Eigen
::
DSizes
<
int
,
1
>
grad_dsize
(
static_cast
<
int
>
(
grad
->
numel
()));
ms_out
.
device
(
place
)
=
rho
*
ms
+
(
1
-
rho
)
*
g
*
g
;
if
(
centered
)
{
auto
mg
=
EigenVector
<
T
>::
Flatten
(
*
ctx
.
Input
<
Tensor
>
(
"MeanGrad"
));
auto
*
mean_grad_out
=
ctx
.
Output
<
Tensor
>
(
"MeanGradOut"
);
mean_grad_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
mg_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_grad_out
);
mg_out
.
device
(
place
)
=
rho
*
mg
+
(
1
-
rho
)
*
g
;
mom_out
.
device
(
place
)
=
momentum
*
mom
+
lr
.
broadcast
(
grad_dsize
)
*
g
/
(
ms_out
-
mg_out
.
square
()
+
epsilon
).
sqrt
();
if
(
centered
)
{
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
PADDLE_ENFORCE
(
&
mg_tensor
,
mean_grad_out
,
"MeanGrad and MeanGradOut must be the same Tensor"
);
for_range
(
CenteredRmspropFunctor
<
T
,
SparseRmspropGradFunctor
<
T
>>
(
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
moment_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_grad_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
lr_tensor
.
data
<
T
>
(),
rho
,
epsilon
,
momentum
,
grad_func
));
}
else
{
for_range
(
UncenteredRmspropFunctor
<
T
,
SparseRmspropGradFunctor
<
T
>>
(
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
moment_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
lr_tensor
.
data
<
T
>
(),
rho
,
epsilon
,
momentum
,
grad_func
));
}
}
else
{
mom_out
.
device
(
place
)
=
momentum
*
mom
+
lr
.
broadcast
(
grad_dsize
)
*
g
/
(
ms_out
+
epsilon
).
sqrt
();
PADDLE_THROW
(
"RMSProp only supports LoDTensor or SelectedRows gradient"
);
}
p_out
.
device
(
place
)
=
p
-
mom_out
;
}
};
...
...
paddle/fluid/operators/sgd_op.cc
浏览文件 @
b4a32eaf
...
...
@@ -21,7 +21,7 @@ class SGDOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Param"
),
"Input(Param) of SGDOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Grad"
),
...
...
@@ -42,7 +42,7 @@ class SGDOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
data_type
=
framework
::
GetDataTypeOfVar
(
ctx
.
InputVar
(
"Param"
));
return
framework
::
OpKernelType
(
data_type
,
ctx
.
device_context
());
}
...
...
@@ -50,17 +50,20 @@ class SGDOp : public framework::OperatorWithKernel {
class
SGDOpInferVarType
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
input_var
=
op_desc
.
Input
(
"Param"
)[
0
];
for
(
auto
&
out_var
:
op_desc
.
Output
(
"ParamOut"
))
{
if
(
block
->
FindRecursiveOrCreateVar
(
input_var
).
GetType
()
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
block
->
FindRecursiveOrCreateVar
(
out_var
).
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
block
->
FindRecursiveOrCreateVar
(
out_var
).
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
input_var_n
=
op_desc
.
Input
(
"Param"
)[
0
];
auto
in_var_type
=
block
->
FindRecursiveOrCreateVar
(
input_var_n
).
GetType
();
PADDLE_ENFORCE
(
in_var_type
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
||
in_var_type
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input Var's type should be LoDtensor or SelectedRows,"
" but the received var(%s)'s type is %s"
,
input_var_n
,
in_var_type
);
for
(
auto
&
out_var_n
:
op_desc
.
Output
(
"ParamOut"
))
{
auto
&
out_var
=
block
->
FindRecursiveOrCreateVar
(
out_var_n
);
if
(
out_var
.
GetType
()
!=
in_var_type
)
{
out_var
.
SetType
(
in_var_type
);
}
}
}
...
...
paddle/fluid/operators/sgd_op.cu
浏览文件 @
b4a32eaf
...
...
@@ -56,6 +56,12 @@ template <typename T>
class
SGDOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
b4a32eaf
...
...
@@ -198,9 +198,9 @@ class CudnnHolder {
CUDADeviceContext
::
CUDADeviceContext
(
CUDAPlace
place
)
:
place_
(
place
),
cudnn_holder_
(
nullptr
)
{
SetDeviceId
(
place_
.
device
);
compute_capability
=
GetCUDAComputeCapability
(
place_
.
device
);
multi_process
=
GetCUDAMultiProcessors
(
place_
.
device
);
max_threads_per_mp
=
GetCUDAMaxThreadsPerMultiProcessor
(
place_
.
device
);
compute_capability
_
=
GetCUDAComputeCapability
(
place_
.
device
);
multi_process
_
=
GetCUDAMultiProcessors
(
place_
.
device
);
max_threads_per_mp
_
=
GetCUDAMaxThreadsPerMultiProcessor
(
place_
.
device
);
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream_
));
eigen_stream_
.
reset
(
new
EigenCudaStreamDevice
());
eigen_stream_
->
Reinitialize
(
&
stream_
,
place
);
...
...
@@ -211,6 +211,16 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
cudnn_holder_
.
reset
(
new
CudnnHolder
(
&
stream_
,
place
));
}
driver_version_
=
GetCUDADriverVersion
(
place_
.
device
);
runtime_version_
=
GetCUDARuntimeVersion
(
place_
.
device
);
LOG
(
INFO
)
<<
"device: "
<<
place_
.
device
<<
", CUDA Capability: "
<<
compute_capability_
<<
", Driver Version: "
<<
driver_version_
/
1000
<<
"."
<<
(
driver_version_
%
100
)
/
10
<<
", Runtime Version: "
<<
runtime_version_
/
1000
<<
"."
<<
(
runtime_version_
%
100
)
/
10
;
callback_manager_
.
reset
(
new
StreamCallbackManager
(
stream_
));
}
...
...
@@ -232,11 +242,11 @@ void CUDADeviceContext::Wait() const {
}
int
CUDADeviceContext
::
GetComputeCapability
()
const
{
return
compute_capability
;
return
compute_capability
_
;
}
int
CUDADeviceContext
::
GetMaxPhysicalThreadCount
()
const
{
return
multi_process
*
max_threads_per_mp
;
return
multi_process
_
*
max_threads_per_mp_
;
}
Eigen
::
GpuDevice
*
CUDADeviceContext
::
eigen_device
()
const
{
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
b4a32eaf
...
...
@@ -135,9 +135,11 @@ class CUDADeviceContext : public DeviceContext {
cudaStream_t
stream_
;
cublasHandle_t
cublas_handle_
;
int
compute_capability
;
int
multi_process
;
int
max_threads_per_mp
;
int
compute_capability_
;
int
runtime_version_
;
int
driver_version_
;
int
multi_process_
;
int
max_threads_per_mp_
;
mutable
std
::
mutex
mtx_
;
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
b4a32eaf
...
...
@@ -130,6 +130,13 @@ struct EOFException : public std::exception {
#define UNLIKELY(condition) (condition == 0)
#endif
#if !defined(_WIN32)
#define LIKELY(condition) __builtin_expect(static_cast<bool>(condition), 1)
#else
// there is no equivalent intrinsics in msvc.
#define LIKELY(condition) (condition != 0)
#endif
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
bool
stat
,
const
Args
&
...
args
)
{
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
b4a32eaf
...
...
@@ -46,6 +46,24 @@ int GetCUDAComputeCapability(int id) {
return
device_prop
.
major
*
10
+
device_prop
.
minor
;
}
int
GetCUDARuntimeVersion
(
int
id
)
{
PADDLE_ENFORCE_LT
(
id
,
GetCUDADeviceCount
(),
"id must less than GPU count"
);
int
runtime_version
=
0
;
PADDLE_ENFORCE
(
cudaRuntimeGetVersion
(
&
runtime_version
),
"cudaRuntimeGetVersion failed in "
"paddle::platform::cudaRuntimeGetVersion"
);
return
runtime_version
;
}
int
GetCUDADriverVersion
(
int
id
)
{
PADDLE_ENFORCE_LT
(
id
,
GetCUDADeviceCount
(),
"id must less than GPU count"
);
int
driver_version
=
0
;
PADDLE_ENFORCE
(
cudaDriverGetVersion
(
&
driver_version
),
"cudaDriverGetVersion failed in "
"paddle::platform::GetCUDADriverVersion"
);
return
driver_version
;
}
int
GetCUDAMultiProcessors
(
int
id
)
{
PADDLE_ENFORCE_LT
(
id
,
GetCUDADeviceCount
(),
"id must less than GPU count"
);
int
count
;
...
...
paddle/fluid/platform/gpu_info.h
浏览文件 @
b4a32eaf
...
...
@@ -29,6 +29,12 @@ int GetCUDADeviceCount();
//! Get the compute capability of the ith GPU (format: major * 10 + minor)
int
GetCUDAComputeCapability
(
int
i
);
//! Get the runtime version of the ith GPU
int
GetCUDARuntimeVersion
(
int
id
);
//! Get the driver version of the ith GPU
int
GetCUDADriverVersion
(
int
id
);
//! Get the MultiProcessors of the ith GPU.
int
GetCUDAMultiProcessors
(
int
i
);
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
b4a32eaf
...
...
@@ -57,6 +57,10 @@ limitations under the License. */
#include "pybind11/stl.h"
DEFINE_bool
(
reader_queue_speed_test_mode
,
false
,
"If set true, the queue.pop will only get data from queue but not "
"remove the data from queue for speed testing"
);
// disable auto conversion to list in Python
PYBIND11_MAKE_OPAQUE
(
paddle
::
framework
::
LoDTensorArray
);
...
...
@@ -170,14 +174,14 @@ PYBIND11_PLUGIN(core) {
A LoDTensor X can look like the example below. It contains 2 sequences.
The first has length 2 and the second has length 3, as described by x.lod.
The first tensor dimension
6
=2+3 is calculated from LoD if it's available.
The first tensor dimension
5
=2+3 is calculated from LoD if it's available.
It means the total number of sequence element. In X, each element has 2
columns, hence [
6
, 2].
columns, hence [
5
, 2].
x.lod = [[2, 3]]
x.data = [[1, 2], [3, 4],
[5, 6], [7, 8], [9, 10]
, [11, 12]
]
x.shape = [
6
, 2]
[5, 6], [7, 8], [9, 10]]
x.shape = [
5
, 2]
LoD can have multiple levels (for example, a paragraph can have multiple
sentences and a sentence can have multiple words). In the following
...
...
@@ -380,7 +384,8 @@ All parameter, weight, gradient are variables in Paddle.
return
make_ddim
(
shape
);
});
auto
*
holder
=
var
.
GetMutable
<
LoDTensorBlockingQueueHolder
>
();
holder
->
InitOnce
(
capacity
,
dims
);
holder
->
InitOnce
(
capacity
,
dims
,
FLAGS_reader_queue_speed_test_mode
);
return
holder
->
GetQueue
();
},
py
::
return_value_policy
::
copy
);
...
...
paddle/fluid/train/demo/README.md
浏览文件 @
b4a32eaf
...
...
@@ -15,7 +15,7 @@ cmake .. -DFLUID_INSTALL_DIR=$PADDLE_LIB \
-DWITH_MKL=OFF \
-DWITH_MKLDNN=OFF
make -j8
make -j8
inference
_lib_dist
make -j8
fluid
_lib_dist
```
### step 2. generate program desc
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
b4a32eaf
...
...
@@ -648,25 +648,25 @@ function gen_capi_package() {
fi
}
function
gen_fluid_
inference_
lib
()
{
function
gen_fluid_lib
()
{
mkdir
-p
${
PADDLE_ROOT
}
/build
cd
${
PADDLE_ROOT
}
/build
if
[[
${
WITH_C_API
:-
OFF
}
==
"OFF"
&&
${
WITH_INFERENCE
:-
ON
}
==
"ON"
]]
;
then
cat
<<
EOF
========================================
Generating fluid
inference library
...
Generating fluid
library for train and inference
...
========================================
EOF
cmake ..
-DWITH_DISTRIBUTE
=
OFF
make
-j
`
nproc
`
inference
_lib_dist
make
-j
`
nproc
`
fluid
_lib_dist
fi
}
function
tar_fluid_
inference_
lib
()
{
function
tar_fluid_lib
()
{
if
[[
${
WITH_C_API
:-
OFF
}
==
"OFF"
&&
${
WITH_INFERENCE
:-
ON
}
==
"ON"
]]
;
then
cat
<<
EOF
========================================
Taring fluid
inference library
...
Taring fluid
library for train and inference
...
========================================
EOF
cd
${
PADDLE_ROOT
}
/build
...
...
@@ -675,11 +675,11 @@ EOF
fi
}
function
test_fluid_
inference_
lib
()
{
function
test_fluid_lib
()
{
if
[[
${
WITH_C_API
:-
OFF
}
==
"OFF"
&&
${
WITH_INFERENCE
:-
ON
}
==
"ON"
]]
;
then
cat
<<
EOF
========================================
Testing fluid
inference library
...
Testing fluid
library for inference
...
========================================
EOF
cd
${
PADDLE_ROOT
}
/paddle/fluid/inference/api/demo_ci
...
...
@@ -731,9 +731,9 @@ function main() {
;;
fluid_inference_lib
)
cmake_gen
${
PYTHON_ABI
:-
""
}
gen_fluid_
inference_
lib
tar_fluid_
inference_
lib
test_fluid_
inference_
lib
gen_fluid_lib
tar_fluid_lib
test_fluid_lib
;;
check_style
)
check_style
...
...
@@ -744,8 +744,8 @@ function main() {
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
run_test
gen_capi_package
gen_fluid_
inference_
lib
test_fluid_
inference_
lib
gen_fluid_lib
test_fluid_lib
assert_api_spec_approvals
;;
maccheck
)
...
...
python/paddle/fluid/__init__.py
浏览文件 @
b4a32eaf
...
...
@@ -113,7 +113,8 @@ def __bootstrap__():
'use_pinned_memory'
,
'check_nan_inf'
,
'benchmark'
,
'warpctc_dir'
,
'eager_delete_scope'
,
'use_mkldnn'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'cpu_deterministic'
,
'eager_delete_tensor_gb'
'dist_threadpool_size'
,
'cpu_deterministic'
,
'eager_delete_tensor_gb'
,
'reader_queue_speed_test_mode'
]
if
core
.
is_compiled_with_dist
():
read_env_flags
.
append
(
'rpc_deadline'
)
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
b4a32eaf
...
...
@@ -107,6 +107,7 @@ __all__ = [
'log'
,
'crop'
,
'rank_loss'
,
'margin_rank_loss'
,
'elu'
,
'relu6'
,
'pow'
,
...
...
@@ -5827,6 +5828,54 @@ def rank_loss(label, left, right, name=None):
return
out
def
margin_rank_loss
(
label
,
left
,
right
,
margin
=
0.1
,
name
=
None
):
"""
Margin Ranking Loss Layer for ranking problem,
which compares left score and right score passed in.
The ranking loss can be defined as following equation:
.. math::
rank\_loss &= max(0, -label * (left - right) + margin)
Args:
label (Variable): Indicates whether the left is ranked higher than the right or not.
left (Variable): Ranking score for left.
right (Variable): Ranking score for right.
margin (float): Indicates the given margin.
name (str|None): A name for this layer (optional). If set None, the layer
will be named automatically.
Returns:
Variable: The ranking loss.
Raises:
ValueError: Any of label, left, and right is not a Variable.
Examples:
.. code-block:: python
label = fluid.layers.data(name="label", shape=[4, 1], dtype="float32")
left = fluid.layers.data(name="left", shape=[4, 1], dtype="float32")
right = fluid.layers.data(name="right", shape=[4, 1], dtype="float32")
out = fluid.layers.margin_rank_loss(label, left, right)
"""
helper
=
LayerHelper
(
'margin_rank_loss'
,
**
locals
())
if
not
isinstance
(
label
,
Variable
):
raise
ValueError
(
"The label should be a Variable."
)
if
not
isinstance
(
left
,
Variable
):
raise
ValueError
(
"The left should be a Variable."
)
if
not
isinstance
(
right
,
Variable
):
raise
ValueError
(
"The right should be a Variable."
)
out
=
helper
.
create_tmp_variable
(
left
.
dtype
)
act
=
helper
.
create_tmp_variable
(
left
.
dtype
)
helper
.
append_op
(
type
=
'margin_rank_loss'
,
inputs
=
{
"Label"
:
label
,
"X1"
:
left
,
"X2"
:
right
},
outputs
=
{
'Out'
:
out
,
'Activated'
:
act
},
attrs
=
{
'margin'
:
margin
})
return
out
def
pad2d
(
input
,
paddings
=
[
0
,
0
,
0
,
0
],
mode
=
'constant'
,
...
...
@@ -6290,6 +6339,7 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None):
outputs
=
{
'Out'
:
out
},
attrs
=
{
'win_size'
:
win_size
,
'pad_value'
:
pad_value
})
return
out
def
sequence_mask
(
x
,
maxlen
=
None
,
dtype
=
'int64'
,
name
=
None
):
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
b4a32eaf
...
...
@@ -659,6 +659,9 @@ class AdamaxOptimizer(Optimizer):
optimizer = fluid.optimizer.Adamax(learning_rate=0.2)
optimizer.minimize(cost)
Notes:
Currently, AdamaxOptimizer doesn't support sparse parameter optimization.
"""
_moment_acc_str
=
"moment"
_inf_norm_acc_str
=
"inf_norm"
...
...
@@ -778,6 +781,9 @@ class DecayedAdagradOptimizer(Optimizer):
optimizer = fluid.optimizer.DecayedAdagrad(learning_rate=0.2)
optimizer.minimize(cost)
Notes:
Currently, DecayedAdagradOptimizer doesn't support sparse parameter optimization.
"""
_moment_acc_str
=
"moment"
...
...
@@ -858,6 +864,9 @@ class AdadeltaOptimizer(Optimizer):
optimizer = fluid.optimizer.Adadelta(
learning_rate=0.0003, epsilon=1.0e-6, rho=0.95)
_, params_grads = optimizer.minimize(cost)
Notes:
Currently, AdadeltaOptimizer doesn't support sparse parameter optimization.
"""
_avg_squared_grad_acc_str
=
"_avg_squared_grad"
...
...
@@ -1126,6 +1135,9 @@ class FtrlOptimizer(Optimizer):
optimizer = fluid.optimizer.Ftrl(0.0001)
_, params_grads = optimizer.minimize(cost)
Notes:
Currently, FtrlOptimizer doesn't support sparse parameter optimization.
"""
_squared_acc_str
=
"squared"
...
...
python/paddle/fluid/tests/unittests/test_rmsprop_op.py
浏览文件 @
b4a32eaf
...
...
@@ -19,33 +19,76 @@ import unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
import
paddle.fluid
as
fluid
def
create_selected_rows_and_tensor
(
scope
,
place
,
height
,
row_num
,
embedding_size
):
sr
=
scope
.
var
(
"@selected_rows@"
).
get_selected_rows
()
tensor
=
scope
.
var
(
"grad"
).
get_tensor
()
rows
=
np
.
random
.
random_integers
(
low
=
0
,
high
=
height
-
1
,
size
=
[
row_num
,
]).
astype
(
'int64'
)
sr_val
=
np
.
random
.
random
(
size
=
[
row_num
,
embedding_size
]).
astype
(
'float32'
)
sr
.
set_height
(
height
)
sr
.
set_rows
(
rows
)
sr
.
get_tensor
().
set
(
sr_val
,
place
)
tensor_val
=
np
.
zeros
(
shape
=
[
height
,
embedding_size
],
dtype
=
'float32'
)
for
i
in
range
(
row_num
):
row
=
rows
[
i
]
tensor_val
[
row
,
:]
=
tensor_val
[
row
,
:]
+
sr_val
[
i
,
:]
tensor
.
set
(
tensor_val
,
place
)
return
tensor_val
,
sr_val
class
TestBase
(
unittest
.
TestCase
):
def
setup
(
self
,
centered
,
epsilon
=
1e-6
):
def
setup
(
self
,
place
,
is_sparse
,
centered
,
size
,
row_num
=
None
,
epsilon
=
1e-6
):
np
.
random
.
seed
(
5
)
# fix seed
self
.
scope
=
fluid
.
global_scope
()
self
.
place
=
place
self
.
param_name
=
"param"
self
.
param
=
np
.
random
.
random
(
(
123
,
321
)
).
astype
(
"float32"
)
self
.
param
=
np
.
random
.
random
(
size
).
astype
(
"float32"
)
self
.
mean_square_name
=
"mean_square"
self
.
mean_square
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
self
.
mean_square
=
np
.
random
.
uniform
(
low
=
1
,
high
=
2
,
size
=
size
).
astype
(
"float32"
)
self
.
mean_grad_name
=
"mean_grad"
self
.
mean_grad
=
np
.
random
.
random
(
(
123
,
321
)
).
astype
(
"float32"
)
self
.
mean_grad
=
np
.
random
.
random
(
size
).
astype
(
"float32"
)
self
.
lr_name
=
"lr"
self
.
learning_rate
=
np
.
array
([
0.01
]).
astype
(
"float32"
)
self
.
grad_name
=
"grad"
self
.
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
self
.
is_sparse
=
is_sparse
if
self
.
is_sparse
:
self
.
grad_sr_name
=
"@selected_rows@"
self
.
grad
,
self
.
grad_sr
=
create_selected_rows_and_tensor
(
self
.
scope
,
place
,
size
[
0
],
row_num
,
size
[
1
])
else
:
self
.
grad
=
np
.
random
.
random
(
size
).
astype
(
"float32"
)
grad_tensor
=
self
.
scope
.
var
(
self
.
grad_name
).
get_tensor
()
grad_tensor
.
set
(
self
.
grad
,
place
)
self
.
moment_name
=
"moment"
self
.
moment
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
self
.
moment
=
np
.
random
.
uniform
(
low
=
0
,
high
=
1
,
size
=
size
).
astype
(
"float32"
)
self
.
epsilon
=
epsilon
self
.
decay
=
0.9
self
.
momentum
=
0.
0
self
.
momentum
=
0.
1
self
.
centered
=
centered
self
.
ms_out
=
self
.
decay
*
self
.
mean_square
+
(
1
-
self
.
decay
...
...
@@ -61,118 +104,122 @@ class TestBase(unittest.TestCase):
self
.
param_out
=
self
.
param
-
self
.
moment_out
def
check
(
self
,
actual_t
,
expect_t
,
place
,
out_name
,
atol
=
1e-5
,
equal_nan
=
False
):
self
.
assertTrue
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
class
TestRmspropOp
(
TestBase
):
def
check_with_place
(
self
,
place
,
centered
,
epsilon
):
self
.
setup
(
centered
,
epsilon
)
scope
=
core
.
Scope
()
# create and initialize Param Variable
param
=
scope
.
var
(
self
.
param_name
).
get_tensor
()
param
.
set
(
self
.
param
,
place
)
self
.
param_tensor
=
self
.
scope
.
var
(
self
.
param_name
).
get_tensor
()
self
.
param_tensor
.
set
(
self
.
param
,
place
)
mean_square
=
scope
.
var
(
self
.
mean_square_name
).
get_tensor
()
mean_square
.
set
(
self
.
mean_square
,
place
)
self
.
mean_square_tensor
=
self
.
scope
.
var
(
self
.
mean_square_name
).
get_tensor
()
self
.
mean_square_tensor
.
set
(
self
.
mean_square
,
place
)
lr
=
scope
.
var
(
self
.
lr_name
).
get_tensor
()
lr
=
s
elf
.
s
cope
.
var
(
self
.
lr_name
).
get_tensor
()
lr
.
set
(
self
.
learning_rate
,
place
)
grad
=
scope
.
var
(
self
.
grad
_name
).
get_tensor
()
grad
.
set
(
self
.
grad
,
place
)
self
.
moment_tensor
=
self
.
scope
.
var
(
self
.
moment
_name
).
get_tensor
()
self
.
moment_tensor
.
set
(
self
.
moment
,
place
)
moment
=
scope
.
var
(
self
.
moment_name
).
get_tensor
()
moment
.
set
(
self
.
moment
,
place
)
if
self
.
centered
:
self
.
mean_grad_tensor
=
self
.
scope
.
var
(
self
.
mean_grad_name
).
get_tensor
()
self
.
mean_grad_tensor
.
set
(
self
.
mean_grad
,
place
)
# create and run sgd operator
def
check
(
self
,
actual_t
,
expect_t
,
place
,
out_name
,
atol
=
1e-5
):
self
.
assertTrue
(
np
.
allclose
(
actual_t
,
expect_t
,
atol
=
atol
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
if
self
.
centered
:
mean_grad
=
scope
.
var
(
self
.
mean_grad_name
).
get_tensor
()
mean_grad
.
set
(
self
.
mean_grad
,
place
)
rmsprop_op
=
Operator
(
"rmsprop"
,
Param
=
self
.
param_name
,
Grad
=
self
.
grad_name
,
MeanSquare
=
self
.
mean_square_name
,
MeanGrad
=
self
.
mean_grad_name
,
Moment
=
self
.
moment_name
,
LearningRate
=
self
.
lr_name
,
ParamOut
=
self
.
param_name
,
MeanSquareOut
=
self
.
mean_square_name
,
MomentOut
=
self
.
moment_name
,
MeanGradOut
=
self
.
mean_grad_name
,
epsilon
=
self
.
epsilon
,
decay
=
self
.
decay
,
momentum
=
self
.
momentum
,
centered
=
True
)
else
:
rmsprop_op
=
Operator
(
"rmsprop"
,
Param
=
self
.
param_name
,
Grad
=
self
.
grad_name
,
MeanSquare
=
self
.
mean_square_name
,
Moment
=
self
.
moment_name
,
LearningRate
=
self
.
lr_name
,
ParamOut
=
self
.
param_name
,
MeanSquareOut
=
self
.
mean_square_name
,
MomentOut
=
self
.
moment_name
,
epsilon
=
self
.
epsilon
,
decay
=
self
.
decay
,
momentum
=
self
.
momentum
,
centered
=
False
)
rmsprop_op
.
run
(
scope
,
place
)
atol
=
1e-5
equal_nan
=
False
class
TestRmspropOp
(
TestBase
):
def
check_with_place
(
self
,
place
,
is_sparse
,
centered
,
size
,
row_num
=
None
,
epsilon
=
1e-6
):
self
.
setup
(
place
,
is_sparse
,
centered
,
size
,
row_num
,
epsilon
)
self
.
run_and_check
()
def
run_and_check
(
self
):
grad_name
=
self
.
grad_sr_name
if
self
.
is_sparse
else
self
.
grad_name
kwargs
=
{
'Param'
:
self
.
param_name
,
'Grad'
:
grad_name
,
'MeanSquare'
:
self
.
mean_square_name
,
'Moment'
:
self
.
moment_name
,
'LearningRate'
:
self
.
lr_name
,
'ParamOut'
:
self
.
param_name
,
'MeanSquareOut'
:
self
.
mean_square_name
,
'MomentOut'
:
self
.
moment_name
,
'epsilon'
:
self
.
epsilon
,
'decay'
:
self
.
decay
,
'momentum'
:
self
.
momentum
,
'centered'
:
self
.
centered
}
if
self
.
centered
:
atol
=
1e-3
equal_nan
=
True
kwargs
[
'MeanGrad'
]
=
self
.
mean_grad_name
kwargs
[
'MeanGradOut'
]
=
self
.
mean_grad_name
rmsprop_op
=
Operator
(
'rmsprop'
,
**
kwargs
)
atol
=
1e-6
rmsprop_op
.
run
(
self
.
scope
,
self
.
place
)
self
.
check
(
np
.
array
(
mean_square
),
self
.
ms_out
,
place
,
self
.
mean_square_name
)
np
.
array
(
self
.
mean_square_tensor
),
self
.
ms_out
,
self
.
place
,
self
.
mean_square_name
,
atol
=
atol
)
self
.
check
(
np
.
array
(
moment
),
np
.
array
(
self
.
moment_tensor
),
self
.
moment_out
,
place
,
self
.
place
,
self
.
moment_name
,
atol
=
atol
,
equal_nan
=
equal_nan
)
atol
=
atol
)
self
.
check
(
np
.
array
(
param
),
np
.
array
(
self
.
param_tensor
),
self
.
param_out
,
place
,
self
.
place
,
self
.
param_name
,
atol
=
atol
,
equal_nan
=
equal_nan
)
atol
=
atol
)
if
self
.
centered
:
self
.
check
(
np
.
array
(
mean_grad
),
self
.
mg_out
,
place
,
self
.
mean_grad_name
)
np
.
array
(
self
.
mean_grad_tensor
),
self
.
mg_out
,
self
.
place
,
self
.
mean_grad_name
)
def
test_rmsprop
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
size
=
(
128
,
320
)
for
place
in
places
:
self
.
check_with_place
(
place
,
False
,
1e-6
)
self
.
check_with_place
(
place
,
False
,
1e-10
)
self
.
check_with_place
(
place
,
True
,
1e-6
)
self
.
check_with_place
(
place
,
True
,
1e-10
)
for
centered
in
[
False
,
True
]:
with
fluid
.
scope_guard
(
core
.
Scope
()):
self
.
check_with_place
(
place
,
is_sparse
=
False
,
centered
=
centered
,
size
=
size
)
with
fluid
.
scope_guard
(
core
.
Scope
()):
self
.
check_with_place
(
place
,
is_sparse
=
True
,
centered
=
centered
,
row_num
=
512
,
size
=
size
)
with
fluid
.
scope_guard
(
core
.
Scope
()):
self
.
check_with_place
(
place
,
is_sparse
=
True
,
centered
=
centered
,
row_num
=
60
,
size
=
size
)
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录