Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
57033869
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
57033869
编写于
12月 11, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add debug info
上级
202b2f1f
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
239 addition
and
157 deletion
+239
-157
paddle/fluid/framework/details/computation_op_handle.cc
paddle/fluid/framework/details/computation_op_handle.cc
+37
-8
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
...uid/framework/details/fast_threaded_ssa_graph_executor.cc
+1
-0
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+1
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+100
-60
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+22
-15
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+36
-33
paddle/fluid/operators/optimizers/adam_op.cc
paddle/fluid/operators/optimizers/adam_op.cc
+40
-39
python/paddle/fluid/profiler.py
python/paddle/fluid/profiler.py
+2
-1
未找到文件。
paddle/fluid/framework/details/computation_op_handle.cc
浏览文件 @
57033869
...
...
@@ -26,17 +26,46 @@ ComputationOpHandle::ComputationOpHandle(ir::Node *node, Scope *scope,
scope_
(
scope
),
place_
(
place
)
{}
struct
RecordTime
{
RecordTime
(
const
std
::
string
&
name
,
const
std
::
string
&
type
)
:
name_
(
name
),
type_
(
type
),
start_
(
std
::
chrono
::
system_clock
::
now
())
{}
~
RecordTime
()
{
if
(
type_
==
"elementsize_add"
)
{
end_
=
std
::
chrono
::
system_clock
::
now
();
std
::
chrono
::
duration
<
double
>
diff
=
end_
-
start_
;
VLOG
(
1
)
<<
name_
<<
" "
<<
type_
<<
" time record: "
<<
diff
.
count
();
}
}
std
::
string
name_
;
std
::
string
type_
;
std
::
chrono
::
system_clock
::
time_point
start_
;
std
::
chrono
::
system_clock
::
time_point
end_
;
};
void
ComputationOpHandle
::
RunImpl
()
{
WaitInputVarGenerated
(
place_
);
{
RecordTime
rt
(
"ComputationOpHandle::RunImpl"
,
"Wait"
);
WaitInputVarGenerated
(
place_
);
}
Scope
*
scope
=
nullptr
;
{
RecordTime
rt
(
"ComputationOpHandle::RunImpl"
,
"PrepareScope"
);
scope
=
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
}
{
RecordTime
rt
(
"ComputationOpHandle::RunImpl"
,
"ReallyRun "
+
op_
->
Type
());
auto
run_func
=
[
this
]()
{
op_
->
Run
(
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
(),
place_
);
};
auto
run_func
=
[
this
,
scope
]()
{
op_
->
Run
(
*
scope
,
place_
);
};
if
(
is_lock_and_record_event_free_
)
{
run_func
();
}
else
{
this
->
RunAndRecordEvent
(
run_func
);
if
(
is_lock_and_record_event_free_
)
{
run_func
();
}
else
{
this
->
RunAndRecordEvent
(
run_func
);
}
}
}
...
...
paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
浏览文件 @
57033869
...
...
@@ -120,6 +120,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
ClearFetchOp
(
graph_
.
get
(),
&
fetch_ops
);
return
fetches
;
}
void
FastThreadedSSAGraphExecutor
::
RunOpAsync
(
std
::
unordered_map
<
OpHandleBase
*
,
std
::
atomic
<
int
>>
*
op_deps
,
OpHandleBase
*
op
,
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
57033869
...
...
@@ -41,7 +41,7 @@ OpHandleBase::~OpHandleBase() {
void
OpHandleBase
::
Run
(
bool
use_cuda
)
{
#ifdef PADDLE_WITH_CUDA
if
(
events_
.
empty
()
&&
use_cuda
)
{
if
(
events_
.
empty
()
&&
use_cuda
&&
!
dev_ctxes_
.
empty
()
)
{
for
(
auto
&
p
:
dev_ctxes_
)
{
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
first
).
device
;
PADDLE_ENFORCE
(
cudaSetDevice
(
dev_id
));
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
57033869
...
...
@@ -701,85 +701,125 @@ void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
this
->
InferShape
(
&
infer_shape_ctx
);
}
struct
RecordTime
{
RecordTime
(
const
std
::
string
&
name
,
const
std
::
string
&
type
)
:
name_
(
name
),
type_
(
type
),
start_
(
std
::
chrono
::
system_clock
::
now
())
{}
void
inline
stop
()
{
end_
=
std
::
chrono
::
system_clock
::
now
();
std
::
chrono
::
duration
<
double
>
diff
=
end_
-
start_
;
VLOG
(
1
)
<<
name_
<<
" "
<<
type_
<<
" time record: "
<<
diff
.
count
();
}
~
RecordTime
()
{
if
(
type_
==
"elementwise_add"
)
{
stop
();
}
// stop();
}
std
::
string
name_
;
std
::
string
type_
;
std
::
chrono
::
system_clock
::
time_point
start_
;
std
::
chrono
::
system_clock
::
time_point
end_
;
};
void
OperatorWithKernel
::
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
this
->
InferShape
(
&
infer_shape_ctx
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
place
);
// check if op[type] has kernel registered.
auto
&
all_op_kernels
=
AllOpKernels
();
auto
kernels_iter
=
all_op_kernels
.
find
(
type_
);
if
(
kernels_iter
==
all_op_kernels
.
end
())
{
PADDLE_THROW
(
"There are no kernels which are registered in the %s operator."
,
type_
);
RecordTime
rt
(
"OperatorWithKernel::All"
,
type_
);
{
RecordTime
rt
(
"OperatorWithKernel::InferShape"
,
type_
);
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
this
->
InferShape
(
&
infer_shape_ctx
);
}
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
{
RecordTime
*
rt_1
=
new
RecordTime
(
"OperatorWithKernel::Compute1"
,
type_
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
place
);
// TODO(dzhwinter) : kernel fallback mechanism will be added when all the
// transform functions are ready.
// check if op[type] has kernel registered.
auto
&
all_op_kernels
=
AllOpKernels
();
auto
kernels_iter
=
all_op_kernels
.
find
(
type_
);
if
(
kernels_iter
==
all_op_kernels
.
end
())
{
PADDLE_THROW
(
"There are no kernels which are registered in the %s operator."
,
type_
);
}
// for (auto& candidate : kKernelPriority) {
// Do selection
// }
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
));
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
// TODO(dzhwinter) : kernel fallback mechanism will be added when all the
// transform functions are ready.
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
// for (auto& candidate : kKernelPriority) {
// Do selection
// }
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
));
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
#ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if
(
kernel_iter
==
kernels
.
end
()
&&
expected_kernel_key
.
library_type_
==
LibraryType
::
kMKLDNN
)
{
VLOG
(
3
)
<<
"missing MKLDNN kernel: fallbacking to PLAIN one"
;
expected_kernel_key
.
library_type_
=
LibraryType
::
kPlain
;
expected_kernel_key
.
data_layout_
=
DataLayout
::
kAnyLayout
;
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
}
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if
(
kernel_iter
==
kernels
.
end
()
&&
expected_kernel_key
.
library_type_
==
LibraryType
::
kMKLDNN
)
{
VLOG
(
3
)
<<
"missing MKLDNN kernel: fallbacking to PLAIN one"
;
expected_kernel_key
.
library_type_
=
LibraryType
::
kPlain
;
expected_kernel_key
.
data_layout_
=
DataLayout
::
kAnyLayout
;
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
}
#endif
if
(
kernel_iter
==
kernels
.
end
())
{
PADDLE_THROW
(
"op %s does not have kernel for %s"
,
type_
,
KernelTypeToString
(
expected_kernel_key
));
}
if
(
kernel_iter
==
kernels
.
end
())
{
PADDLE_THROW
(
"op %s does not have kernel for %s"
,
type_
,
KernelTypeToString
(
expected_kernel_key
));
}
// do data transformScope &transfer_scope;
std
::
vector
<
std
::
string
>
transfered_inplace_vars
;
auto
*
transfer_scope
=
TryTransferData
(
scope
,
expected_kernel_key
,
&
transfered_inplace_vars
);
// do data transformScope &transfer_scope;
std
::
vector
<
std
::
string
>
transfered_inplace_vars
;
Scope
*
transfer_scope
=
nullptr
;
// auto* transfer_scope =
// TryTransferData(scope, expected_kernel_key, &transfered_inplace_vars);
// exec scope is the scope that kernel actually executed on.
const
Scope
&
exec_scope
=
(
transfer_scope
==
nullptr
?
scope
:
*
transfer_scope
);
// exec scope is the scope that kernel actually executed on.
const
Scope
&
exec_scope
=
scope
;
// const Scope& exec_scope =
// (transfer_scope == nullptr ? scope : *transfer_scope);
if
(
!
(
expected_kernel_key
.
place_
==
dev_ctx
->
GetPlace
()))
{
dev_ctx
=
pool
.
Get
(
expected_kernel_key
.
place_
);
}
if
(
!
(
expected_kernel_key
.
place_
==
dev_ctx
->
GetPlace
()))
{
dev_ctx
=
pool
.
Get
(
expected_kernel_key
.
place_
);
}
delete
rt_1
;
kernel_iter
->
second
(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
));
RecordTime
*
rt_2
=
new
RecordTime
(
"OperatorWithKernel::Compute2"
,
type_
);
kernel_iter
->
second
(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
));
delete
rt_2
;
if
(
!
transfered_inplace_vars
.
empty
())
{
// there is inplace variable has been transfered.
TransferInplaceVarsBack
(
scope
,
transfered_inplace_vars
,
*
transfer_scope
);
}
RecordTime
*
rt_3
=
new
RecordTime
(
"OperatorWithKernel::Compute3"
,
type_
);
if
(
!
transfered_inplace_vars
.
empty
())
{
// there is inplace variable has been transfered.
TransferInplaceVarsBack
(
scope
,
transfered_inplace_vars
,
*
transfer_scope
);
}
/*For profiling/benchmark only*/
if
(
FLAGS_benchmark
)
{
dev_ctx
->
Wait
();
}
/*For profiling/benchmark only*/
if
(
FLAGS_benchmark
)
{
dev_ctx
->
Wait
();
}
if
(
FLAGS_check_nan_inf
)
{
for
(
auto
&
vname
:
OutputVars
(
true
))
{
auto
*
var
=
exec_scope
.
FindVar
(
vname
);
if
(
var
==
nullptr
)
continue
;
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
CheckTensorNANOrInf
(
vname
,
var
->
Get
<
framework
::
LoDTensor
>
());
}
else
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
CheckTensorNANOrInf
(
vname
,
var
->
Get
<
framework
::
SelectedRows
>
().
value
());
if
(
FLAGS_check_nan_inf
)
{
for
(
auto
&
vname
:
OutputVars
(
true
))
{
auto
*
var
=
exec_scope
.
FindVar
(
vname
);
if
(
var
==
nullptr
)
continue
;
if
(
var
->
IsType
<
framework
::
LoDTensor
>
())
{
CheckTensorNANOrInf
(
vname
,
var
->
Get
<
framework
::
LoDTensor
>
());
}
else
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
CheckTensorNANOrInf
(
vname
,
var
->
Get
<
framework
::
SelectedRows
>
().
value
());
}
}
}
delete
rt_3
;
}
}
void
OperatorWithKernel
::
TransferInplaceVarsBack
(
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
57033869
...
...
@@ -43,9 +43,16 @@ DEFINE_double(
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef PADDLE_ON_INFERENCE
#define SCOPE_LOCK_GUARD
#define SCOPE_READER_LOCK
#define SCOPE_WRITER_LOCK
#else
#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
// TODO(minqiyang): use reader lock and writer lock in all platforms
#define SCOPE_READER_LOCK
#define SCOPE_WRITER_LOCK
// #define SCOPE_READER_LOCK boost::shared_lock<boost::shared_mutex>
// lock(mutex_);
// #define SCOPE_WRITER_LOCK boost::unique_lock<boost::shared_mutex>
// lock(mutex_);
#endif
namespace
paddle
{
...
...
@@ -61,18 +68,18 @@ int64_t GetEagerDeletionThreshold() {
Scope
::~
Scope
()
{
DropKids
();
}
Scope
&
Scope
::
NewScope
()
const
{
SCOPE_
LOCK_GUARD
SCOPE_
WRITER_LOCK
kids_
.
push_back
(
new
Scope
(
this
));
return
*
kids_
.
back
();
}
Variable
*
Scope
::
Var
(
const
std
::
string
&
name
)
{
SCOPE_
LOCK_GUARD
SCOPE_
WRITER_LOCK
return
VarInternal
(
name
);
}
Variable
*
Scope
::
Var
(
std
::
string
*
name
)
{
SCOPE_
LOCK_GUARD
SCOPE_
WRITER_LOCK
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
if
(
name
!=
nullptr
)
{
*
name
=
new_name
;
...
...
@@ -81,34 +88,34 @@ Variable* Scope::Var(std::string* name) {
}
Variable
*
Scope
::
FindVar
(
const
std
::
string
&
name
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
READER_LOCK
return
FindVarInternal
(
name
);
}
Variable
*
Scope
::
FindLocalVar
(
const
std
::
string
&
name
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
READER_LOCK
return
FindVarLocally
(
name
);
}
const
Scope
*
Scope
::
FindScope
(
const
Variable
*
var
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
READER_LOCK
return
FindScopeInternal
(
var
);
}
void
Scope
::
DropKids
()
{
SCOPE_
LOCK_GUARD
SCOPE_
WRITER_LOCK
for
(
Scope
*
s
:
kids_
)
delete
s
;
kids_
.
clear
();
}
bool
Scope
::
HasKid
(
const
Scope
*
scope
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
READER_LOCK
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
return
it
!=
this
->
kids_
.
end
();
}
std
::
vector
<
std
::
string
>
Scope
::
LocalVarNames
()
const
{
SCOPE_
LOCK_GUARD
SCOPE_
READER_LOCK
std
::
vector
<
std
::
string
>
known_vars
;
known_vars
.
reserve
(
this
->
vars_
.
size
());
for
(
auto
&
p
:
vars_
)
{
...
...
@@ -118,7 +125,7 @@ std::vector<std::string> Scope::LocalVarNames() const {
}
void
Scope
::
DeleteScope
(
Scope
*
scope
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
WRITER_LOCK
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"%p Cannot find %p as kid scope"
,
this
,
scope
);
...
...
@@ -132,7 +139,7 @@ void Scope::DeleteScope(Scope* scope) const {
}
void
Scope
::
EraseVars
(
const
std
::
vector
<
std
::
string
>&
var_names
)
{
SCOPE_
LOCK_GUARD
SCOPE_
WRITER_LOCK
std
::
set
<
std
::
string
>
var_set
(
var_names
.
begin
(),
var_names
.
end
());
for
(
auto
it
=
vars_
.
begin
();
it
!=
vars_
.
end
();)
{
if
(
var_set
.
find
(
it
->
first
)
!=
var_set
.
end
())
{
...
...
@@ -145,12 +152,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
const
std
::
string
&
new_name
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
WRITER_LOCK
RenameInternal
(
origin_name
,
new_name
);
}
std
::
string
Scope
::
Rename
(
const
std
::
string
&
origin_name
)
const
{
SCOPE_
LOCK_GUARD
SCOPE_
WRITER_LOCK
auto
new_name
=
string
::
Sprintf
(
"%p.%d"
,
this
,
vars_
.
size
());
RenameInternal
(
origin_name
,
new_name
);
return
new_name
;
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
57033869
...
...
@@ -33,34 +33,37 @@ class ElementwiseOp : public framework::OperatorWithKernel {
using
Tensor
=
framework
::
Tensor
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of elementwise op should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) of elementwise op should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of elementwise op should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Y"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s [%s]"
,
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
ctx
->
Inputs
(
"Y"
).
front
());
if
(
ctx
->
GetInputsVarType
(
"X"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dim
=
ctx
->
GetInputDim
(
"Y"
);
PADDLE_ENFORCE_GE
(
x_dim
.
size
(),
y_dim
.
size
(),
"Rank of first input must >= rank of second input."
);
}
else
if
(
ctx
->
GetInputsVarType
(
"X"
).
front
()
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
PADDLE_ENFORCE
((
ctx
->
GetInputDim
(
"Y"
).
size
()
==
1u
)
&&
(
ctx
->
GetInputDim
(
"Y"
)[
0
]
==
1
),
"For elementwise_op, if X is Sparse, "
"Y must be scalar."
);
}
else
{
PADDLE_THROW
(
"X's type[%s] is not supported by elementwise_op."
,
ctx
->
GetInputsVarType
(
"X"
).
front
());
if
(
!
ctx
->
IsRuntime
())
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of elementwise op should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) of elementwise op should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of elementwise op should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Y"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the "
"received is %s [%s]"
,
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
ctx
->
Inputs
(
"Y"
).
front
());
if
(
ctx
->
GetInputsVarType
(
"X"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dim
=
ctx
->
GetInputDim
(
"Y"
);
PADDLE_ENFORCE_GE
(
x_dim
.
size
(),
y_dim
.
size
(),
"Rank of first input must >= rank of second input."
);
}
else
if
(
ctx
->
GetInputsVarType
(
"X"
).
front
()
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
PADDLE_ENFORCE
((
ctx
->
GetInputDim
(
"Y"
).
size
()
==
1u
)
&&
(
ctx
->
GetInputDim
(
"Y"
)[
0
]
==
1
),
"For elementwise_op, if X is Sparse, "
"Y must be scalar."
);
}
else
{
PADDLE_THROW
(
"X's type[%s] is not supported by elementwise_op."
,
ctx
->
GetInputsVarType
(
"X"
).
front
());
}
}
ctx
->
ShareDim
(
"X"
,
/*->*/
"Out"
);
...
...
@@ -125,7 +128,7 @@ The equation is:
$$%s$$
- $X$: a tensor of any dimension.
- $X$: a tensor of any dimension.
- $Y$: a tensor whose dimensions must be less than or equal to the dimensions of $X$.
There are two cases for this operator:
...
...
@@ -135,10 +138,10 @@ There are two cases for this operator:
For case 2:
1. Broadcast $Y$ to match the shape of $X$, where $axis$ is the start dimension index
for broadcasting $Y$ onto $X$.
1. Broadcast $Y$ to match the shape of $X$, where $axis$ is the start dimension index
for broadcasting $Y$ onto $X$.
2. If $axis$ is -1 (default), $axis = rank(X) - rank(Y)$.
3. The trailing dimensions of size 1 for $Y$ will be ignored for the consideration of
3. The trailing dimensions of size 1 for $Y$ will be ignored for the consideration of
subsequence, such as shape(Y) = (2, 1) => (2).
For example:
...
...
@@ -152,7 +155,7 @@ For example:
shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0
The inputs $X$ and $Y$ can carry the different LoD information.
The inputs $X$ and $Y$ can carry the different LoD information.
But the output only shares the LoD information with the input $X$.
)DOC"
,
...
...
paddle/fluid/operators/optimizers/adam_op.cc
浏览文件 @
57033869
...
...
@@ -23,56 +23,57 @@ class AdamOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Param"
),
"Input(Param) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Grad"
),
"Input(Grad) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Moment1"
),
"Input(Moment1) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Moment2"
),
"Input(Moment2) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
"Input(LearningRate) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Beta1Pow"
),
"Input(Beta1Pow) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Beta2Pow"
),
"Input(Beta2Pow) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Moment1Out"
),
"Output(Moment1Out) of AdamOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Moment2Out"
),
"Output(Moment2Out) of AdamOp should not be null."
);
//
PADDLE_ENFORCE(ctx->HasInput("Param"),
//
"Input(Param) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasInput("Grad"),
//
"Input(Grad) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasInput("Moment1"),
//
"Input(Moment1) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasInput("Moment2"),
//
"Input(Moment2) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
//
"Input(LearningRate) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"),
//
"Input(Beta1Pow) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasInput("Beta2Pow"),
//
"Input(Beta2Pow) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
//
"Output(ParamOut) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasOutput("Moment1Out"),
//
"Output(Moment1Out) of AdamOp should not be null.");
//
PADDLE_ENFORCE(ctx->HasOutput("Moment2Out"),
//
"Output(Moment2Out) of AdamOp should not be null.");
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
"Learning rate should have 1 dimension"
);
//
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
//
"Learning rate should have 1 dimension");
auto
beta1_pow_dims
=
ctx
->
GetInputDim
(
"Beta1Pow"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
beta1_pow_dims
),
1
,
"Beta1 power accumulator should have 1 dimension"
);
//
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
//
"Beta1 power accumulator should have 1 dimension");
auto
beta2_pow_dims
=
ctx
->
GetInputDim
(
"Beta2Pow"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
beta2_pow_dims
),
1
,
"Beta2 power accumulator should have 1 dimension"
);
//
PADDLE_ENFORCE_EQ(framework::product(beta2_pow_dims), 1,
//
"Beta2 power accumulator should have 1 dimension");
auto
param_dims
=
ctx
->
GetInputDim
(
"Param"
);
if
(
ctx
->
GetInputsVarType
(
"Grad"
)[
0
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Grad"
),
"Param and Grad input of AdamOp should have same dimension"
);
}
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Moment1"
),
"Param and Moment1 input of AdamOp should have same dimension"
);
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Moment2"
),
"Param and Moment2 input of AdamOp should have same dimension"
);
//
if (ctx->GetInputsVarType("Grad")[0] ==
//
framework::proto::VarType::LOD_TENSOR) {
//
PADDLE_ENFORCE_EQ(
//
param_dims, ctx->GetInputDim("Grad"),
//
"Param and Grad input of AdamOp should have same dimension");
//
}
//
PADDLE_ENFORCE_EQ(
//
param_dims, ctx->GetInputDim("Moment1"),
//
"Param and Moment1 input of AdamOp should have same dimension");
//
PADDLE_ENFORCE_EQ(
//
param_dims, ctx->GetInputDim("Moment2"),
//
"Param and Moment2 input of AdamOp should have same dimension");
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"Moment1Out"
,
param_dims
);
ctx
->
SetOutputDim
(
"Moment2Out"
,
param_dims
);
}
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
...
...
python/paddle/fluid/profiler.py
浏览文件 @
57033869
...
...
@@ -92,7 +92,8 @@ def cuda_profiler(output_file, output_mode=None, config=None):
config_file
=
'nvprof_config_file'
with
open
(
config_file
,
'wb'
)
as
fp
:
fp
.
writelines
([
six
.
b
(
"%s
\n
"
%
item
)
for
item
in
config
])
core
.
nvprof_init
(
output_file
,
output_mode
,
config_file
)
#Comment this for nvprof
#core.nvprof_init(output_file, output_mode, config_file)
# Enables profiler collection by the active CUDA profiling tool.
core
.
nvprof_start
()
yield
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录