Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
bfb07aaf
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bfb07aaf
编写于
4月 02, 2020
作者:
Z
zhongpu
提交者:
GitHub
4月 02, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "Exhaustive search (#22821)", test=develop (#23401)
This reverts commit
48144e40
.
上级
7fda333a
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
200 addition
and
197 deletion
+200
-197
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+15
-3
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+34
-2
paddle/fluid/framework/operator_kernel_configs.h
paddle/fluid/framework/operator_kernel_configs.h
+21
-61
paddle/fluid/framework/operator_test.cc
paddle/fluid/framework/operator_test.cc
+1
-1
paddle/fluid/imperative/execution_context.h
paddle/fluid/imperative/execution_context.h
+2
-1
paddle/fluid/imperative/prepared_operator.cc
paddle/fluid/imperative/prepared_operator.cc
+21
-11
paddle/fluid/imperative/prepared_operator.h
paddle/fluid/imperative/prepared_operator.h
+3
-1
paddle/fluid/imperative/tests/test_layer.cc
paddle/fluid/imperative/tests/test_layer.cc
+1
-1
paddle/fluid/operators/beam_search_decode_op.cc
paddle/fluid/operators/beam_search_decode_op.cc
+1
-1
paddle/fluid/operators/conv_cudnn_helper.h
paddle/fluid/operators/conv_cudnn_helper.h
+27
-69
paddle/fluid/operators/conv_cudnn_op.cu
paddle/fluid/operators/conv_cudnn_op.cu
+21
-38
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+45
-0
paddle/fluid/operators/elementwise/test_elementwise_mul_op_dim.cc
...luid/operators/elementwise/test_elementwise_mul_op_dim.cc
+2
-1
paddle/fluid/operators/fused/conv_fusion_op.cu
paddle/fluid/operators/fused/conv_fusion_op.cu
+3
-4
paddle/fluid/operators/warpctc_op.cc
paddle/fluid/operators/warpctc_op.cc
+3
-3
未找到文件。
paddle/fluid/framework/operator.cc
浏览文件 @
bfb07aaf
...
...
@@ -905,6 +905,16 @@ void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
this
->
InferShape
(
&
infer_shape_ctx
);
}
std
::
vector
<
KernelConfig
>*
OperatorWithKernel
::
GetKernelConfig
(
const
OpKernelType
&
key
)
const
{
auto
config_iter
=
kernel_configs_map_
.
find
(
key
);
std
::
vector
<
KernelConfig
>*
kernel_configs
=
nullptr
;
if
(
config_iter
!=
kernel_configs_map_
.
end
())
{
kernel_configs
=
&
(
config_iter
->
second
);
}
return
kernel_configs
;
}
void
OperatorWithKernel
::
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
// To reduce the elapsed time of HasAttr, we use bool variable to record the
...
...
@@ -941,6 +951,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
ChooseKernel
(
*
runtime_ctx
,
scope
,
place
);
}
std
::
vector
<
KernelConfig
>*
kernel_configs
=
GetKernelConfig
(
*
kernel_type_
);
// do data transformScope &transfer_scope;
std
::
vector
<
std
::
string
>
transfered_inplace_vars
;
Scope
*
transfer_scope
=
nullptr
;
...
...
@@ -976,8 +988,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
{
platform
::
RecordEvent
record_event
(
"compute"
,
platform
::
EventRole
::
kInnerOp
);
(
*
kernel_func_
)(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
,
*
runtime_ctx
));
(
*
kernel_func_
)(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
,
*
runtime_ctx
,
kernel_configs
));
}
if
(
!
transfered_inplace_vars
.
empty
())
{
...
...
@@ -1046,7 +1058,7 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
,
ctx
));
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
,
ctx
,
nullptr
));
if
(
HasAttr
(
"op_device"
))
{
if
(
Attr
<
std
::
string
>
(
"op_device"
)
==
"cpu"
)
{
expected_kernel_key
.
place_
=
platform
::
CPUPlace
();
...
...
paddle/fluid/framework/operator.h
浏览文件 @
bfb07aaf
...
...
@@ -31,6 +31,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/operator_kernel_configs.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor.h"
...
...
@@ -215,12 +216,30 @@ class OperatorBase {
const
platform
::
Place
&
place
)
const
=
0
;
};
#ifdef PADDLE_WITH_CUDA
using
KernelConfig
=
boost
::
variant
<
std
::
shared_ptr
<
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>>
,
std
::
shared_ptr
<
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>>
,
std
::
shared_ptr
<
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>>>
;
#else
using
KernelConfig
=
boost
::
variant
<
boost
::
blank
>
;
#endif
using
OpKernelConfigsMap
=
std
::
unordered_map
<
OpKernelType
,
std
::
vector
<
KernelConfig
>
,
OpKernelType
::
Hash
>
;
class
ExecutionContext
{
public:
ExecutionContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
device_context
,
const
RuntimeContext
&
ctx
)
:
op_
(
op
),
scope_
(
scope
),
device_context_
(
device_context
),
ctx_
(
ctx
)
{}
const
RuntimeContext
&
ctx
,
std
::
vector
<
KernelConfig
>*
configs
)
:
op_
(
op
),
scope_
(
scope
),
device_context_
(
device_context
),
ctx_
(
ctx
),
kernel_configs_
(
configs
)
{}
virtual
~
ExecutionContext
()
{}
virtual
std
::
string
InputName
(
const
std
::
string
&
name
)
const
{
...
...
@@ -386,6 +405,15 @@ class ExecutionContext {
return
temp_tensor
;
}
template
<
typename
T
>
T
&
GetKernelConfig
(
size_t
idx
)
const
{
PADDLE_ENFORCE
(
kernel_configs_
&&
kernel_configs_
->
size
()
>
static_cast
<
size_t
>
(
idx
),
"%s selected kernel doesn't have kernel config %lu <= %lu"
,
op_
.
Type
().
c_str
(),
kernel_configs_
->
size
(),
idx
);
return
*
boost
::
get
<
std
::
shared_ptr
<
T
>>
((
*
kernel_configs_
)[
idx
]);
}
const
RuntimeContext
Context
()
const
{
return
ctx_
;
}
std
::
string
DebugString
()
const
{
return
op_
.
DebugString
();
}
...
...
@@ -395,6 +423,7 @@ class ExecutionContext {
const
Scope
&
scope_
;
const
platform
::
DeviceContext
&
device_context_
;
const
RuntimeContext
&
ctx_
;
mutable
std
::
vector
<
KernelConfig
>*
kernel_configs_
;
};
template
<>
...
...
@@ -470,6 +499,8 @@ class OperatorWithKernel : public OperatorBase {
virtual
OpKernelType
GetExpectedKernelType
(
const
ExecutionContext
&
ctx
)
const
;
std
::
vector
<
KernelConfig
>*
GetKernelConfig
(
const
OpKernelType
&
key
)
const
;
// change this to public so that in dygraph mode we can call it to check if we
// need transform data
virtual
OpKernelType
GetKernelTypeForVar
(
...
...
@@ -506,6 +537,7 @@ class OperatorWithKernel : public OperatorBase {
const
platform
::
Place
&
place
)
const
;
protected:
mutable
OpKernelConfigsMap
kernel_configs_map_
;
mutable
std
::
unique_ptr
<
OpKernelType
>
kernel_type_
;
mutable
std
::
unique_ptr
<
OpKernelFunc
>
kernel_func_
;
mutable
std
::
unique_ptr
<
RuntimeContext
>
runtime_ctx_
;
...
...
paddle/fluid/framework/operator_kernel_configs.h
浏览文件 @
bfb07aaf
...
...
@@ -21,21 +21,19 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
//
thread-safe
.
//
Not thread-safe. Should be owned per-kernel
.
template
<
typename
TAlgorithm
>
class
AlgorithmsCache
{
public:
AlgorithmsCache
()
:
search_times_
(
0
)
{
hash_
.
clear
();
}
// Caches the best algorithm for a given
// combination of tensor dimensions & compute data type.
// cudnn_dtype set for different data type
TAlgorithm
GetAlgorithm
(
const
std
::
vector
<
int64_t
>&
dims1
,
const
std
::
vector
<
int64_t
>&
dims2
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
int
algorithmFlags
,
int64_t
cudnn_dtype
,
std
::
function
<
TAlgorithm
()
>
gen_func
);
TAlgorithm
GetAlgorithm
(
const
std
::
vector
<
int64_t
>&
dims1
,
const
std
::
vector
<
int64_t
>&
dims2
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
int
algorithmFlags
,
// can set for different data type
std
::
function
<
TAlgorithm
()
>
gen_func
);
TAlgorithm
GetAlgorithm
(
int64_t
area
,
int
search_times
,
int
algorithmFlags
,
std
::
function
<
TAlgorithm
()
>
gen_func
);
...
...
@@ -43,14 +41,13 @@ class AlgorithmsCache {
private:
std
::
unordered_map
<
int64_t
,
TAlgorithm
>
hash_
;
int
search_times_
;
std
::
mutex
cache_mutex
;
};
template
<
typename
TAlgorithm
>
TAlgorithm
framework
::
AlgorithmsCache
<
TAlgorithm
>::
GetAlgorithm
(
const
std
::
vector
<
int64_t
>&
dims1
,
const
std
::
vector
<
int64_t
>&
dims2
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
int
algorithmFlags
,
int64_t
cudnn_dtype
,
const
std
::
vector
<
int
>&
dilations
,
int
algorithmFlags
,
std
::
function
<
TAlgorithm
()
>
gen_func
)
{
int64_t
seed
=
0
;
// Hash all of the inputs, use to try and look up a previously
...
...
@@ -84,73 +81,36 @@ TAlgorithm framework::AlgorithmsCache<TAlgorithm>::GetAlgorithm(
seed
^=
hashFn
(
static_cast
<
int64_t
>
(
algorithmFlags
))
+
0x9e3779b9
+
(
seed
<<
6
)
+
(
seed
>>
2
)
+
5
;
seed
^=
hashFn
(
static_cast
<
int64_t
>
(
cudnn_dtype
))
+
0x9e3779b9
+
(
seed
<<
6
)
+
(
seed
>>
2
)
+
6
;
VLOG
(
10
)
<<
"seed:"
<<
seed
<<
", hash_.size:"
<<
hash_
.
size
();
if
(
seed
==
0
)
return
gen_func
();
TAlgorithm
ret
;
auto
it
=
hash_
.
end
();
bool
have_found
=
false
;
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex
);
it
=
hash_
.
find
(
seed
);
if
(
it
!=
hash_
.
end
())
{
ret
=
it
->
second
;
have_found
=
true
;
}
}
if
(
!
have_found
)
{
ret
=
gen_func
();
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex
);
hash_
[
seed
]
=
ret
;
if
(
hash_
.
find
(
seed
)
==
hash_
.
end
())
{
TAlgorithm
value
=
gen_func
();
hash_
[
seed
]
=
value
;
}
return
ret
;
return
hash_
[
seed
];
}
template
<
typename
TAlgorithm
>
TAlgorithm
AlgorithmsCache
<
TAlgorithm
>::
GetAlgorithm
(
int64_t
area
,
int
search_times
,
int
algorithmFlags
,
std
::
function
<
TAlgorithm
()
>
gen_func
)
{
auto
it
=
hash_
.
end
();
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex
);
it
=
hash_
.
find
(
area
);
if
(
it
!=
hash_
.
end
())
{
return
it
->
second
;
}
}
bool
gene_flag
=
false
;
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex
);
gene_flag
=
(
search_times_
<
search_times
);
if
(
hash_
.
find
(
area
)
!=
hash_
.
end
())
{
return
hash_
[
area
];
}
TAlgorithm
algo
{};
if
(
gene_flag
)
{
algo
=
gen_func
();
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex
);
if
(
search_times_
<
search_times
)
{
auto
algo
=
gen_func
();
hash_
[
area
]
=
algo
;
++
search_times_
;
return
algo
;
}
TAlgorithm
algo
{};
int64_t
min
=
static_cast
<
uint64_t
>
(
INT_MAX
);
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex
);
for
(
const
auto
&
m
:
hash_
)
{
if
(
m
.
first
<
min
)
{
min
=
m
.
first
;
algo
=
m
.
second
;
}
for
(
const
auto
&
m
:
hash_
)
{
if
(
m
.
first
<
min
)
{
min
=
m
.
first
;
algo
=
m
.
second
;
}
}
return
algo
;
...
...
paddle/fluid/framework/operator_test.cc
浏览文件 @
bfb07aaf
...
...
@@ -525,7 +525,7 @@ TEST(ExecutionContextAttrAndInOut, new_api) {
paddle
::
framework
::
RuntimeContext
ctx
({},
{});
paddle
::
framework
::
ExecutionContext
exe_context
(
*
(
op
.
get
()),
scope
,
*
dev_ctx
,
ctx
);
ctx
,
nullptr
);
ASSERT_EQ
(
exe_context
.
InputSize
(
"input"
),
1u
);
ASSERT_EQ
(
exe_context
.
OutputSize
(
"output"
),
1u
);
...
...
paddle/fluid/imperative/execution_context.h
浏览文件 @
bfb07aaf
...
...
@@ -33,10 +33,11 @@ class DygraphExecutionContext : public framework::ExecutionContext {
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
device_context
,
const
framework
::
RuntimeContext
&
ctx
,
std
::
vector
<
framework
::
KernelConfig
>*
configs
,
const
NameVarMap
<
VarType
>&
var_base_map_in
,
const
NameVarMap
<
VarType
>&
var_base_map_out
,
const
framework
::
AttributeMap
&
attrs
)
:
ExecutionContext
(
op
,
scope
,
device_context
,
ctx
),
:
ExecutionContext
(
op
,
scope
,
device_context
,
ctx
,
configs
),
var_base_map_in_
(
var_base_map_in
),
var_base_map_out_
(
var_base_map_out
),
attrs_
(
attrs
)
{}
...
...
paddle/fluid/imperative/prepared_operator.cc
浏览文件 @
bfb07aaf
...
...
@@ -80,8 +80,13 @@ void PreparedOp::PrepareData(
PreparedOp
::
PreparedOp
(
const
framework
::
OperatorBase
&
op
,
const
framework
::
RuntimeContext
&
ctx
,
const
framework
::
OperatorWithKernel
::
OpKernelFunc
&
func
,
platform
::
DeviceContext
*
dev_ctx
)
:
op_
(
op
),
ctx_
(
ctx
),
func_
(
func
),
dev_ctx_
(
dev_ctx
)
{}
platform
::
DeviceContext
*
dev_ctx
,
std
::
vector
<
framework
::
KernelConfig
>*
kernel_configs
)
:
op_
(
op
),
ctx_
(
ctx
),
func_
(
func
),
dev_ctx_
(
dev_ctx
),
kernel_configs_
(
kernel_configs
)
{}
template
<
typename
VarType
>
PreparedOp
PrepareOpImpl
(
const
NameVarMap
<
VarType
>&
ins
,
...
...
@@ -106,7 +111,7 @@ PreparedOp PrepareOpImpl(const NameVarMap<VarType>& ins,
framework
::
RuntimeContext
ctx
({},
{});
auto
expected_kernel_key
=
op
.
GetExpectedKernelType
(
DygraphExecutionContext
<
VarType
>
(
op
,
framework
::
Scope
(),
*
dev_ctx
,
ctx
,
ins
,
outs
,
attrs
));
op
,
framework
::
Scope
(),
*
dev_ctx
,
ctx
,
nullptr
,
ins
,
outs
,
attrs
));
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
...
...
@@ -115,6 +120,8 @@ PreparedOp PrepareOpImpl(const NameVarMap<VarType>& ins,
PADDLE_THROW
(
"op %s does not have kernel for %s"
,
op
.
Type
(),
KernelTypeToString
(
expected_kernel_key
));
}
std
::
vector
<
framework
::
KernelConfig
>*
kernel_configs
=
op
.
GetKernelConfig
(
expected_kernel_key
);
if
(
!
(
expected_kernel_key
.
place_
==
place
))
{
dev_ctx
=
pool
.
Get
(
expected_kernel_key
.
place_
);
...
...
@@ -122,7 +129,7 @@ PreparedOp PrepareOpImpl(const NameVarMap<VarType>& ins,
}
PrepareDataImpl
<
VarType
>
(
place
,
ins
,
op
,
expected_kernel_key
);
return
PreparedOp
(
op
,
ctx
,
kernel_iter
->
second
,
dev_ctx
);
return
PreparedOp
(
op
,
ctx
,
kernel_iter
->
second
,
dev_ctx
,
kernel_configs
);
}
PreparedOp
PreparedOp
::
Prepare
(
const
NameVarMap
<
VarBase
>&
ins
,
...
...
@@ -145,8 +152,10 @@ template <typename VarType>
static
void
PreparedOpRunImpl
(
const
framework
::
OperatorBase
&
op
,
const
framework
::
RuntimeContext
&
ctx
,
const
framework
::
OperatorWithKernel
::
OpKernelFunc
&
func
,
platform
::
DeviceContext
*
dev_ctx
,
const
NameVarMap
<
VarType
>&
ins
,
const
NameVarMap
<
VarType
>&
outs
,
const
framework
::
AttributeMap
&
attrs
)
{
platform
::
DeviceContext
*
dev_ctx
,
std
::
vector
<
framework
::
KernelConfig
>*
kernel_configs
,
const
NameVarMap
<
VarType
>&
ins
,
const
NameVarMap
<
VarType
>&
outs
,
const
framework
::
AttributeMap
&
attrs
)
{
// TODO(zjl): remove scope in dygraph
framework
::
Scope
scope
;
...
...
@@ -154,21 +163,22 @@ static void PreparedOpRunImpl(
static_cast
<
const
framework
::
OperatorWithKernel
&>
(
op
).
InferShape
(
&
infer_shape_ctx
);
func
(
DygraphExecutionContext
<
VarType
>
(
op
,
scope
,
*
dev_ctx
,
ctx
,
ins
,
outs
,
attrs
));
func
(
DygraphExecutionContext
<
VarType
>
(
op
,
scope
,
*
dev_ctx
,
ctx
,
kernel_configs
,
ins
,
outs
,
attrs
));
}
void
PreparedOp
::
Run
(
const
NameVarMap
<
VarBase
>&
ins
,
const
NameVarMap
<
VarBase
>&
outs
,
const
framework
::
AttributeMap
&
attrs
)
{
PreparedOpRunImpl
<
VarBase
>
(
op_
,
ctx_
,
func_
,
dev_ctx_
,
ins
,
outs
,
attrs
);
PreparedOpRunImpl
<
VarBase
>
(
op_
,
ctx_
,
func_
,
dev_ctx_
,
kernel_configs_
,
ins
,
outs
,
attrs
);
}
void
PreparedOp
::
Run
(
const
NameVarMap
<
VariableWrapper
>&
ins
,
const
NameVarMap
<
VariableWrapper
>&
outs
,
const
framework
::
AttributeMap
&
attrs
)
{
PreparedOpRunImpl
<
VariableWrapper
>
(
op_
,
ctx_
,
func_
,
dev_ctx_
,
ins
,
outs
,
attrs
);
PreparedOpRunImpl
<
VariableWrapper
>
(
op_
,
ctx_
,
func_
,
dev_ctx_
,
kernel_configs_
,
ins
,
outs
,
attrs
);
}
}
// namespace imperative
...
...
paddle/fluid/imperative/prepared_operator.h
浏览文件 @
bfb07aaf
...
...
@@ -33,7 +33,8 @@ class PreparedOp {
PreparedOp
(
const
framework
::
OperatorBase
&
op
,
const
framework
::
RuntimeContext
&
ctx
,
const
framework
::
OperatorWithKernel
::
OpKernelFunc
&
func
,
platform
::
DeviceContext
*
dev_ctx
);
platform
::
DeviceContext
*
dev_ctx
,
std
::
vector
<
framework
::
KernelConfig
>*
kernel_configs
);
static
PreparedOp
Prepare
(
const
NameVarMap
<
VarBase
>&
ins
,
const
NameVarMap
<
VarBase
>&
outs
,
...
...
@@ -71,6 +72,7 @@ class PreparedOp {
const
framework
::
RuntimeContext
&
ctx_
;
framework
::
OperatorWithKernel
::
OpKernelFunc
func_
;
platform
::
DeviceContext
*
dev_ctx_
;
std
::
vector
<
framework
::
KernelConfig
>*
kernel_configs_
;
};
}
// namespace imperative
...
...
paddle/fluid/imperative/tests/test_layer.cc
浏览文件 @
bfb07aaf
...
...
@@ -235,7 +235,7 @@ TEST(test_layer, test_dygraph_execution_context) {
framework
::
Scope
scope
;
DygraphExecutionContext
<
imperative
::
VarBase
>
dy_exe_context
(
*
(
op
.
get
()),
scope
,
*
dev_ctx
,
ctx
,
ins
,
outs
,
concat_att_map
);
*
(
op
.
get
()),
scope
,
*
dev_ctx
,
ctx
,
nullptr
,
ins
,
outs
,
concat_att_map
);
ASSERT_EQ
(
dy_exe_context
.
InputSize
(
"X"
),
1u
);
ASSERT_EQ
(
dy_exe_context
.
InputName
(
"X"
),
"vin"
);
...
...
paddle/fluid/operators/beam_search_decode_op.cc
浏览文件 @
bfb07aaf
...
...
@@ -123,7 +123,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
framework
::
RuntimeContext
run_ctx
(
Inputs
(),
Outputs
(),
scope
);
framework
::
ExecutionContext
ctx
(
*
this
,
scope
,
dev_ctx
,
run_ctx
);
framework
::
ExecutionContext
ctx
(
*
this
,
scope
,
dev_ctx
,
run_ctx
,
nullptr
);
const
LoDTensorArray
*
ids
=
ctx
.
Input
<
LoDTensorArray
>
(
"Ids"
);
const
LoDTensorArray
*
scores
=
ctx
.
Input
<
LoDTensorArray
>
(
"Scores"
);
...
...
paddle/fluid/operators/conv_cudnn_helper.h
浏览文件 @
bfb07aaf
...
...
@@ -21,7 +21,6 @@ limitations under the License. */
#include "paddle/fluid/framework/operator_kernel_configs.h"
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/platform/cudnn_desc.h"
// #include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -90,43 +89,7 @@ std::ostream& operator<<(std::ostream& out, const std::vector<T>& v) {
return
out
;
}
// ConvSearchCache using framework::AlgorithmsCache to search
// cudnnConvolutionFwdAlgo_t, cudnnConvolutionBwdDataAlgo_t or
// cudnnConvolutionBwdFilterAlgo_t
class
ConvSearchCache
{
public:
static
ConvSearchCache
&
Instance
()
{
static
ConvSearchCache
instance
;
return
instance
;
}
framework
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>*
GetForward
()
{
return
&
forward_cache_
;
}
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>*
GetBackwardData
()
{
return
&
backward_data_cache_
;
}
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>*
GetBackwardFilter
()
{
return
&
backward_filter_cache_
;
}
framework
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>*
GetConvFusion
()
{
return
&
fusion_forward_cache_
;
}
private:
ConvSearchCache
()
{}
~
ConvSearchCache
()
{}
ConvSearchCache
(
const
ConvSearchCache
&
)
{}
ConvSearchCache
&
operator
=
(
const
ConvSearchCache
&
)
{}
framework
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
forward_cache_
;
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>
backward_data_cache_
;
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>
backward_filter_cache_
;
framework
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
fusion_forward_cache_
;
};
using
framework
::
AlgorithmsCache
;
struct
ConvArgs
{
cudnnHandle_t
handle
;
...
...
@@ -134,7 +97,6 @@ struct ConvArgs {
platform
::
FilterDescriptor
wdesc
;
platform
::
ConvolutionDescriptor
cdesc
;
const
framework
::
Tensor
*
x
,
*
w
,
*
o
;
cudnnDataType_t
cudnn_dtype
;
// strides
std
::
vector
<
int
>
s
;
...
...
@@ -145,9 +107,8 @@ struct ConvArgs {
ConvArgs
(
const
framework
::
Tensor
*
x
,
const
framework
::
Tensor
*
w
,
const
framework
::
Tensor
*
o
,
const
std
::
vector
<
int
>
s
,
const
std
::
vector
<
int
>
p
,
const
std
::
vector
<
int
>
d
,
cudnnDataType_t
dtype
)
:
x
(
x
),
w
(
w
),
o
(
o
),
s
(
s
),
p
(
p
),
d
(
d
),
cudnn_dtype
(
dtype
)
{}
const
std
::
vector
<
int
>
p
,
const
std
::
vector
<
int
>
d
)
:
x
(
x
),
w
(
w
),
o
(
o
),
s
(
s
),
p
(
p
),
d
(
d
)
{}
};
template
<
typename
perf_t
>
...
...
@@ -160,7 +121,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
template
<
typename
T
>
static
algo_t
Find
(
const
ConvArgs
&
args
,
bool
exhaustive_search
,
bool
deterministic
,
bool
deterministic
,
int
algo_cache_id
,
const
framework
::
ExecutionContext
&
ctx
)
{
auto
dtype
=
platform
::
CudnnDataType
<
T
>::
type
;
bool
has_got_workspace_size
=
true
;
...
...
@@ -222,24 +183,22 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
#endif
VLOG
(
3
)
<<
"choose algo "
<<
algo
;
}
else
{
AlgorithmsCache
<
algo_t
>&
algo_cache
=
ctx
.
GetKernelConfig
<
AlgorithmsCache
<
algo_t
>>
(
algo_cache_id
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
auto
&
temp
=
ctx
.
cuda_device_context
();
AlgorithmsCache
<
algo_t
>&
algo_cache
=
*
(
ConvSearchCache
::
Instance
().
GetForward
());
auto
x_dims
=
framework
::
vectorize
(
args
.
x
->
dims
());
auto
w_dims
=
framework
::
vectorize
(
args
.
w
->
dims
());
VLOG
(
10
)
<<
"cudnnConvolutionFwdAlgoPerf_t:"
<<
", x_dims:"
<<
x_dims
<<
", w_dims:"
<<
w_dims
<<
", args.s"
<<
args
.
s
<<
", args.p"
<<
args
.
p
<<
", args.d"
<<
args
.
d
;
VLOG
(
10
)
<<
"cudnnConvolutionFwdAlgoPerf_t algo_cache_id:"
<<
algo_cache_id
<<
", x_dims:"
<<
x_dims
<<
", w_dims:"
<<
w_dims
<<
", args.s"
<<
args
.
s
<<
", args.p"
<<
args
.
p
<<
", args.d"
<<
args
.
d
;
algo
=
algo_cache
.
GetAlgorithm
(
x_dims
,
w_dims
,
args
.
s
,
args
.
p
,
args
.
d
,
0
,
static_cast
<
int64_t
>
(
args
.
cudnn_dtype
),
[
&
]()
{
x_dims
,
w_dims
,
args
.
s
,
args
.
p
,
args
.
d
,
0
,
[
&
]()
{
int
returned_algo_count
;
std
::
array
<
perf_t
,
kNUM_CUDNN_FWD_ALGS
>
perf_stat
;
...
...
@@ -285,7 +244,7 @@ struct SearchAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t> {
template
<
typename
T
>
static
algo_t
Find
(
const
ConvArgs
&
args
,
bool
exhaustive_search
,
bool
deterministic
,
bool
deterministic
,
int
algo_cache_id
,
const
framework
::
ExecutionContext
&
ctx
)
{
auto
dtype
=
platform
::
CudnnDataType
<
T
>::
type
;
bool
exhaustive
=
(
exhaustive_search
)
&
(
dtype
!=
CUDNN_DATA_HALF
);
...
...
@@ -362,23 +321,22 @@ struct SearchAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t> {
}
else
if
(
deterministic
)
{
return
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
;
}
else
{
AlgorithmsCache
<
algo_t
>&
algo_cache
=
ctx
.
GetKernelConfig
<
AlgorithmsCache
<
algo_t
>>
(
algo_cache_id
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
AlgorithmsCache
<
algo_t
>&
algo_cache
=
*
(
ConvSearchCache
::
Instance
().
GetBackwardData
());
auto
x_dims
=
framework
::
vectorize
(
args
.
x
->
dims
());
auto
w_dims
=
framework
::
vectorize
(
args
.
w
->
dims
());
VLOG
(
10
)
<<
"cudnnConvolutionFwdAlgoPerf_t"
<<
", x_dims:"
<<
x_dims
<<
", w_dims:"
<<
w_dims
<<
", args.s"
<<
args
.
s
<<
", args.p"
<<
args
.
p
<<
", args.d"
<<
args
.
d
;
VLOG
(
10
)
<<
"cudnnConvolutionFwdAlgoPerf_t algo_cache_id:"
<<
algo_cache_id
<<
", x_dims:"
<<
x_dims
<<
", w_dims:"
<<
w_dims
<<
", args.s"
<<
args
.
s
<<
", args.p"
<<
args
.
p
<<
", args.d"
<<
args
.
d
;
algo
=
algo_cache
.
GetAlgorithm
(
x_dims
,
w_dims
,
args
.
s
,
args
.
p
,
args
.
d
,
0
,
static_cast
<
int64_t
>
(
args
.
cudnn_dtype
),
[
&
]()
{
x_dims
,
w_dims
,
args
.
s
,
args
.
p
,
args
.
d
,
0
,
[
&
]()
{
int
returned_algo_count
;
std
::
array
<
perf_t
,
kNUM_CUDNN_FWD_ALGS
>
perf_stat
;
...
...
@@ -427,7 +385,7 @@ struct SearchAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t> {
template
<
typename
T
>
static
algo_t
Find
(
const
ConvArgs
&
args
,
bool
exhaustive_search
,
bool
deterministic
,
bool
deterministic
,
int
algo_cache_id
,
const
framework
::
ExecutionContext
&
ctx
)
{
auto
dtype
=
platform
::
CudnnDataType
<
T
>::
type
;
bool
exhaustive
=
(
exhaustive_search
)
&
(
dtype
!=
CUDNN_DATA_HALF
);
...
...
@@ -491,22 +449,22 @@ struct SearchAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t> {
}
else
if
(
deterministic
)
{
return
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
;
}
else
{
AlgorithmsCache
<
algo_t
>&
algo_cache
=
ctx
.
GetKernelConfig
<
AlgorithmsCache
<
algo_t
>>
(
algo_cache_id
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CUDADeviceContext
>();
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
AlgorithmsCache
<
algo_t
>&
algo_cache
=
*
(
ConvSearchCache
::
Instance
().
GetBackwardFilter
());
auto
x_dims
=
framework
::
vectorize
(
args
.
x
->
dims
());
auto
w_dims
=
framework
::
vectorize
(
args
.
w
->
dims
());
VLOG
(
10
)
<<
"cudnnConvolutionFwdAlgoPerf_t:"
<<
", x_dims:"
<<
x_dims
<<
", w_dims:"
<<
w_dims
<<
", args.s"
<<
args
.
s
<<
", args.p"
<<
args
.
p
<<
", args.d"
<<
args
.
d
;
VLOG
(
10
)
<<
"cudnnConvolutionFwdAlgoPerf_t algo_cache_id:"
<<
algo_cache_id
<<
", x_dims:"
<<
x_dims
<<
", w_dims:"
<<
w_dims
<<
", args.s"
<<
args
.
s
<<
", args.p"
<<
args
.
p
<<
", args.d"
<<
args
.
d
;
algo
=
algo_cache
.
GetAlgorithm
(
x_dims
,
w_dims
,
args
.
s
,
args
.
p
,
args
.
d
,
0
,
static_cast
<
int64_t
>
(
args
.
cudnn_dtype
),
[
&
]()
{
x_dims
,
w_dims
,
args
.
s
,
args
.
p
,
args
.
d
,
0
,
[
&
]()
{
int
returned_algo_count
;
std
::
array
<
perf_t
,
kNUM_CUDNN_FWD_ALGS
>
perf_stat
;
auto
cudnn_find_func
=
[
&
](
void
*
cudnn_workspace_ptr
)
{
...
...
paddle/fluid/operators/conv_cudnn_op.cu
浏览文件 @
bfb07aaf
...
...
@@ -216,13 +216,9 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
const
T
*
filter_data
=
transformed_filter_channel
.
data
<
T
>
();
// ------------------- cudnn descriptors ---------------------
ConvArgs
args
{
&
transformed_input
,
&
transformed_filter_channel
,
&
transformed_output
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args
{
&
transformed_input
,
&
transformed_filter_channel
,
&
transformed_output
,
strides
,
padding_common
,
dilations
};
auto
handle
=
dev_ctx
.
cudnn_handle
();
auto
workspace_handle
=
dev_ctx
.
cudnn_workspace_handle
();
...
...
@@ -273,7 +269,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnnConvolutionFwdAlgo_t
algo
{};
using
search
=
SearchAlgorithm
<
cudnnConvolutionFwdAlgoPerf_t
>
;
algo
=
search
::
Find
<
T
>
(
args
,
exhaustive_search
,
false
,
ctx
);
algo
=
search
::
Find
<
T
>
(
args
,
exhaustive_search
,
false
,
0
,
ctx
);
workspace_size
=
search
::
GetWorkspaceSize
(
args
,
algo
);
#if CUDNN_VERSION_MIN(7, 0, 1)
...
...
@@ -522,15 +518,13 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
&
transformed_output_grad_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
dilations
};
ConvArgs
args2
{
&
transformed_input
,
&
transformed_filter_grad_channel
,
&
transformed_output_grad_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
dilations
};
auto
handle
=
dev_ctx
.
cudnn_handle
();
DataLayout
layout
=
compute_format
==
DataLayout
::
kNHWC
?
DataLayout
::
kNHWC
...
...
@@ -586,7 +580,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
using
search1
=
SearchAlgorithm
<
cudnnConvolutionBwdDataAlgoPerf_t
>
;
data_algo
=
search1
::
Find
<
T
>
(
args1
,
exhaustive_search
,
deterministic
,
ctx
);
search1
::
Find
<
T
>
(
args1
,
exhaustive_search
,
deterministic
,
0
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search1
::
GetWorkspaceSize
(
args1
,
data_algo
));
}
...
...
@@ -603,7 +597,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
using
search2
=
SearchAlgorithm
<
cudnnConvolutionBwdFilterAlgoPerf_t
>
;
filter_algo
=
search2
::
Find
<
T
>
(
args2
,
exhaustive_search
,
deterministic
,
ctx
);
search2
::
Find
<
T
>
(
args2
,
exhaustive_search
,
deterministic
,
1
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search2
::
GetWorkspaceSize
(
args2
,
filter_algo
));
}
...
...
@@ -904,26 +898,15 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
auto
handle
=
dev_ctx
.
cudnn_handle
();
ConvArgs
args1
{
&
transformed_ddX
,
W
,
&
transformed_ddO_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args2
{
&
transformed_X
,
ddW
,
&
transformed_ddO_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args3
{
&
transformed_ddX
,
dW
,
&
transformed_dO_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args4
{
&
transformed_dX
,
ddW
,
&
transformed_dO_channel
,
strides
,
padding_common
,
dilations
,
dtype
};
ConvArgs
args1
{
&
transformed_ddX
,
W
,
&
transformed_ddO_channel
,
strides
,
padding_common
,
dilations
};
ConvArgs
args2
{
&
transformed_X
,
ddW
,
&
transformed_ddO_channel
,
strides
,
padding_common
,
dilations
};
ConvArgs
args3
{
&
transformed_ddX
,
dW
,
&
transformed_dO_channel
,
strides
,
padding_common
,
dilations
};
ConvArgs
args4
{
&
transformed_dX
,
ddW
,
&
transformed_dO_channel
,
strides
,
padding_common
,
dilations
};
cudnnConvolutionFwdAlgo_t
fwd_algo1
=
static_cast
<
cudnnConvolutionFwdAlgo_t
>
(
0
);
...
...
@@ -951,7 +934,7 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
args1
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search1
=
SearchAlgorithm
<
cudnnConvolutionFwdAlgoPerf_t
>
;
fwd_algo1
=
search1
::
Find
<
T
>
(
args1
,
exhaustive_search
,
false
,
ctx
);
fwd_algo1
=
search1
::
Find
<
T
>
(
args1
,
exhaustive_search
,
false
,
0
,
ctx
);
workspace_size
=
search1
::
GetWorkspaceSize
(
args1
,
fwd_algo1
);
}
...
...
@@ -966,7 +949,7 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
args2
.
cdesc
.
set
(
dtype
,
padding_common
,
strides
,
dilations
,
c_group
);
using
search2
=
SearchAlgorithm
<
cudnnConvolutionFwdAlgoPerf_t
>
;
fwd_algo2
=
search2
::
Find
<
T
>
(
args2
,
exhaustive_search
,
false
,
ctx
);
fwd_algo2
=
search2
::
Find
<
T
>
(
args2
,
exhaustive_search
,
false
,
0
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search2
::
GetWorkspaceSize
(
args2
,
fwd_algo2
));
}
...
...
@@ -984,7 +967,7 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
using
search3
=
SearchAlgorithm
<
cudnnConvolutionBwdFilterAlgoPerf_t
>
;
filter_algo
=
search3
::
Find
<
T
>
(
args3
,
exhaustive_search
,
deterministic
,
ctx
);
search3
::
Find
<
T
>
(
args3
,
exhaustive_search
,
deterministic
,
1
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search3
::
GetWorkspaceSize
(
args3
,
filter_algo
));
}
...
...
@@ -1000,7 +983,7 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel<T> {
using
search4
=
SearchAlgorithm
<
cudnnConvolutionBwdDataAlgoPerf_t
>
;
data_algo
=
search4
::
Find
<
T
>
(
args4
,
exhaustive_search
,
deterministic
,
ctx
);
search4
::
Find
<
T
>
(
args4
,
exhaustive_search
,
deterministic
,
2
,
ctx
);
workspace_size
=
std
::
max
(
workspace_size
,
search4
::
GetWorkspaceSize
(
args4
,
data_algo
));
}
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
bfb07aaf
...
...
@@ -178,6 +178,17 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
auto
type
=
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout
,
library
,
customized_type_value
);
#ifdef PADDLE_WITH_CUDA
std
::
vector
<
framework
::
KernelConfig
>&
configs
=
kernel_configs_map_
[
type
];
// TODO(dangqingqing): Currently conv_fusion_op use cudnn but sets use_cudnn
// to false. It should be fixed and then here should only create if library
// is kCUDNN.
if
(
configs
.
empty
())
{
std
::
shared_ptr
<
framework
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>>
p
(
new
framework
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
());
configs
.
push_back
(
p
);
}
#endif
return
type
;
}
...
...
@@ -552,6 +563,21 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
auto
type
=
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"Input"
),
ctx
.
GetPlace
(),
layout_
,
library_
,
customized_type_value
);
#ifdef PADDLE_WITH_CUDA
if
(
library_
==
framework
::
LibraryType
::
kCUDNN
)
{
std
::
vector
<
framework
::
KernelConfig
>&
configs
=
kernel_configs_map_
[
type
];
if
(
configs
.
empty
())
{
std
::
shared_ptr
<
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>>
p
(
new
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>
());
configs
.
push_back
(
p
);
std
::
shared_ptr
<
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>>
p2
(
new
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>
());
configs
.
push_back
(
p2
);
}
}
#endif
return
type
;
}
...
...
@@ -728,6 +754,25 @@ framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
auto
type
=
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"Input"
),
ctx
.
GetPlace
(),
layout_
,
library_
,
customized_type_value
);
#ifdef PADDLE_WITH_CUDA
if
(
library_
==
framework
::
LibraryType
::
kCUDNN
)
{
std
::
vector
<
framework
::
KernelConfig
>&
configs
=
kernel_configs_map_
[
type
];
if
(
configs
.
empty
())
{
std
::
shared_ptr
<
framework
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>>
p0
(
new
framework
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
());
configs
.
push_back
(
p0
);
std
::
shared_ptr
<
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>>
p1
(
new
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>
());
configs
.
push_back
(
p1
);
std
::
shared_ptr
<
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>>
p2
(
new
framework
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>
());
configs
.
push_back
(
p2
);
}
}
#endif
return
type
;
}
...
...
paddle/fluid/operators/elementwise/test_elementwise_mul_op_dim.cc
浏览文件 @
bfb07aaf
...
...
@@ -58,7 +58,8 @@ void MainTest(const TestData& test_data) {
RuntimeContext
runtime_ctx
=
RuntimeContext
(
op
->
Inputs
(),
op
->
Outputs
(),
scope
);
ExecutionContext
ctx
=
ExecutionContext
(
*
op
,
scope
,
*
dev_ctx
,
runtime_ctx
);
ExecutionContext
ctx
=
ExecutionContext
(
*
op
,
scope
,
*
dev_ctx
,
runtime_ctx
,
nullptr
);
bool
result
=
ElementwiseMulOp
::
AreDimsAndFormatCorrect
(
ctx
,
16
,
MKLDNNMemoryFormat
::
nChw16c
);
if
(
test_data
.
supposed_to_fail
)
...
...
paddle/fluid/operators/fused/conv_fusion_op.cu
浏览文件 @
bfb07aaf
...
...
@@ -14,10 +14,10 @@ limitations under the License. */
#include <array>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/conv_cudnn_helper.h"
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/operators/math/padding.h"
#include "paddle/fluid/platform/cudnn_helper.h"
DECLARE_int64
(
cudnn_exhaustive_search_times
);
...
...
@@ -233,7 +233,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
return
fwd_perf_stat
[
0
].
algo
;
};
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>&
algo_cache
=
*
(
ConvSearchCache
::
Instance
().
GetConvFusion
()
);
ctx
.
GetKernelConfig
<
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>>
(
0
);
int
search_times
=
ctx
.
Attr
<
int
>
(
"search_times"
);
search_times
=
std
::
max
(
static_cast
<
int
>
(
FLAGS_cudnn_exhaustive_search_times
),
search_times
);
...
...
@@ -245,9 +245,8 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
algo
=
algo_cache
.
GetAlgorithm
(
x_dims
[
2
]
*
x_dims
[
3
],
search_times
,
0
,
search_func
);
}
else
{
auto
dtype
=
platform
::
CudnnDataType
<
T
>::
type
;
algo
=
algo_cache
.
GetAlgorithm
(
x_dims
,
f_dims
,
strides
,
paddings
,
dilations
,
0
,
dtype
,
search_func
);
dilations
,
0
,
search_func
);
}
VLOG
(
3
)
<<
"choose algo "
<<
algo
;
}
...
...
paddle/fluid/operators/warpctc_op.cc
浏览文件 @
bfb07aaf
...
...
@@ -61,8 +61,8 @@ class WarpCTCOp : public framework::OperatorWithKernel {
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"Logits"
),
ctx
.
GetPlace
(),
layout_
,
library_
);
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"Logits"
),
ctx
.
device_context
(),
layout_
,
library_
);
}
};
...
...
@@ -174,7 +174,7 @@ class WarpCTCGradOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
framework
::
GradVarName
(
"Loss"
)),
ctx
.
GetPlace
());
ctx
.
device_context
());
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录