Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ae6f46a1
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ae6f46a1
编写于
12月 19, 2018
作者:
S
sneaxiy
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rewrite variable type
test=develop
上级
16c244bc
变更
42
隐藏空白更改
内联
并排
Showing
42 changed file
with
717 addition
and
366 deletion
+717
-366
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+12
-4
paddle/fluid/framework/data_device_transform_test.cu
paddle/fluid/framework/data_device_transform_test.cu
+1
-0
paddle/fluid/framework/details/eager_deletion_op_handle.cc
paddle/fluid/framework/details/eager_deletion_op_handle.cc
+1
-1
paddle/fluid/framework/details/variable_visitor.cc
paddle/fluid/framework/details/variable_visitor.cc
+2
-2
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+1
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+6
-6
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+12
-0
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-3
paddle/fluid/framework/var_type.h
paddle/fluid/framework/var_type.h
+15
-17
paddle/fluid/framework/var_type_traits.cc
paddle/fluid/framework/var_type_traits.cc
+27
-0
paddle/fluid/framework/var_type_traits.h
paddle/fluid/framework/var_type_traits.h
+207
-0
paddle/fluid/framework/var_type_traits_test.cc
paddle/fluid/framework/var_type_traits_test.cc
+75
-0
paddle/fluid/framework/variable.h
paddle/fluid/framework/variable.h
+33
-31
paddle/fluid/framework/variable_test.cc
paddle/fluid/framework/variable_test.cc
+12
-11
paddle/fluid/inference/api/details/reset_tensor_array.cc
paddle/fluid/inference/api/details/reset_tensor_array.cc
+1
-1
paddle/fluid/inference/api/details/reset_tensor_array.h
paddle/fluid/inference/api/details/reset_tensor_array.h
+6
-3
paddle/fluid/operators/affine_grid_op.cc
paddle/fluid/operators/affine_grid_op.cc
+2
-2
paddle/fluid/operators/clip_by_norm_op.h
paddle/fluid/operators/clip_by_norm_op.h
+1
-1
paddle/fluid/operators/controlflow/parallel_do_op.cc
paddle/fluid/operators/controlflow/parallel_do_op.cc
+2
-1
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+3
-4
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+2
-2
paddle/fluid/operators/cudnn_lstm_op.cu.cc
paddle/fluid/operators/cudnn_lstm_op.cu.cc
+4
-237
paddle/fluid/operators/cudnn_rnn_cache.h
paddle/fluid/operators/cudnn_rnn_cache.h
+255
-0
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
+1
-2
paddle/fluid/operators/distributed_ops/split_ids_op.h
paddle/fluid/operators/distributed_ops/split_ids_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_mul_op.h
paddle/fluid/operators/elementwise/elementwise_mul_op.h
+1
-1
paddle/fluid/operators/grid_sampler_op.cc
paddle/fluid/operators/grid_sampler_op.cc
+2
-2
paddle/fluid/operators/optimizers/adadelta_op.h
paddle/fluid/operators/optimizers/adadelta_op.h
+4
-2
paddle/fluid/operators/optimizers/adagrad_op.h
paddle/fluid/operators/optimizers/adagrad_op.h
+2
-1
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+2
-1
paddle/fluid/operators/optimizers/adamax_op.h
paddle/fluid/operators/optimizers/adamax_op.h
+4
-2
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+4
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+4
-2
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+1
-1
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+2
-1
paddle/fluid/operators/pool_op.cc
paddle/fluid/operators/pool_op.cc
+2
-2
paddle/fluid/operators/softmax_op.cc
paddle/fluid/operators/softmax_op.cc
+2
-2
paddle/fluid/operators/sum_mkldnn_op.cc
paddle/fluid/operators/sum_mkldnn_op.cc
+1
-1
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+1
-1
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+1
-1
paddle/fluid/operators/warpctc_op.cc
paddle/fluid/operators/warpctc_op.cc
+1
-1
paddle/fluid/platform/cudnn_helper.h
paddle/fluid/platform/cudnn_helper.h
+0
-13
未找到文件。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
ae6f46a1
...
...
@@ -78,17 +78,25 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
cc_test
(
reader_test SRCS reader_test.cc DEPS reader
)
cc_test
(
variable_test SRCS variable_test.cc
)
cc_library
(
threadpool SRCS threadpool.cc DEPS enforce
)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool
)
cc_library
(
var_type_traits SRCS var_type_traits DEPS lod_tensor selected_rows framework_proto
)
if
(
WITH_GPU
)
target_link_libraries
(
var_type_traits cudnn
)
if
(
NOT WIN32
)
target_link_libraries
(
var_type_traits nccl
)
endif
()
endif
()
cc_test
(
var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool var_type_traits
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_test
(
variable_test SRCS variable_test.cc DEPS tensor var_type_traits
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
nv_test
(
data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function
)
DEPS operator op_registry device_context math_function
scope
)
if
(
WITH_GPU
)
if
(
WIN32
)
...
...
paddle/fluid/framework/data_device_transform_test.cu
浏览文件 @
ae6f46a1
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
...
...
paddle/fluid/framework/details/eager_deletion_op_handle.cc
浏览文件 @
ae6f46a1
...
...
@@ -88,7 +88,7 @@ void EagerDeletionOpHandle::RunImpl() {
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
...
...
paddle/fluid/framework/details/variable_visitor.cc
浏览文件 @
ae6f46a1
...
...
@@ -24,7 +24,7 @@ static void VisitVariable(Variable* var, Func* func) {
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
->
GetMutable
<
SelectedRows
>
());
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
->
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -35,7 +35,7 @@ static void VisitVariable(const Variable& var, Func* func) {
}
else
if
(
var
.
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
.
Get
<
SelectedRows
>
());
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
.
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
.
Type
()
));
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
ae6f46a1
...
...
@@ -119,7 +119,7 @@ static void DeleteUnusedTensors(
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
}
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
ae6f46a1
...
...
@@ -365,7 +365,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
return
&
(
var
.
Get
<
SelectedRows
>
().
value
());
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
.
Type
().
name
(
));
ToTypeName
(
var
.
Type
()
));
}
}
...
...
@@ -376,7 +376,7 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
return
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -430,7 +430,7 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
});
return
res
;
...
...
@@ -454,7 +454,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
var
->
GetMutable
<
LoDTensor
>
();
});
return
res
;
...
...
@@ -641,7 +641,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW
(
"Only LoDTensor/SelectedRows support 'GetDim', but Variable %s's "
"type_id is %s."
,
name
,
var
->
Type
().
name
(
));
name
,
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -657,7 +657,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
}
else
{
PADDLE_THROW
(
"Variable %s type_id %s, expect LoDTensor/SelectedRows."
,
name
,
var
->
Type
().
name
(
));
name
,
ToTypeName
(
var
->
Type
()
));
}
}
...
...
paddle/fluid/framework/operator.h
浏览文件 @
ae6f46a1
...
...
@@ -288,6 +288,18 @@ class ExecutionContext {
const
platform
::
DeviceContext
&
device_context_
;
};
inline
bool
CanCUDNNBeUsed
(
const
framework
::
ExecutionContext
&
ctx
)
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
use_cudnn
&=
paddle
::
platform
::
is_gpu_place
(
ctx
.
GetPlace
());
#ifdef PADDLE_WITH_CUDA
if
(
use_cudnn
)
{
auto
&
dev_ctx
=
ctx
.
device_context
<
platform
::
CUDADeviceContext
>
();
use_cudnn
&=
dev_ctx
.
cudnn_handle
()
!=
nullptr
;
}
#endif
return
use_cudnn
;
}
template
<>
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
ae6f46a1
...
...
@@ -165,11 +165,9 @@ std::string Scope::Rename(const std::string& origin_name) const {
Variable
*
Scope
::
VarInternal
(
const
std
::
string
&
name
)
{
auto
*
v
=
FindVarLocally
(
name
);
if
(
v
!=
nullptr
)
return
v
;
v
=
new
Variable
();
vars_
[
name
].
reset
(
v
);
vars_
.
emplace
(
name
,
std
::
unique_ptr
<
Variable
>
(
v
)
);
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
}
...
...
paddle/fluid/framework/var_type.h
浏览文件 @
ae6f46a1
...
...
@@ -19,35 +19,33 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/variable.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
>
inline
bool
IsType
(
const
std
::
type_index
&
type
_index
)
{
return
type
_index
==
std
::
type_index
(
typeid
(
T
)
);
inline
bool
IsType
(
const
std
::
type_index
&
type
)
{
return
type
==
typeid
(
T
);
}
inline
proto
::
VarType
::
Type
ToVarType
(
std
::
type_index
type
)
{
if
(
IsType
<
LoDTensor
>
(
type
))
{
return
proto
::
VarType_Type_LOD_TENSOR
;
}
else
if
(
IsType
<
LoDRankTable
>
(
type
))
{
return
proto
::
VarType_Type_LOD_RANK_TABLE
;
}
else
if
(
IsType
<
LoDTensorArray
>
(
type
))
{
return
proto
::
VarType_Type_LOD_TENSOR_ARRAY
;
}
else
if
(
IsType
<
SelectedRows
>
(
type
))
{
return
proto
::
VarType_Type_SELECTED_ROWS
;
}
else
if
(
IsType
<
ReaderHolder
>
(
type
))
{
return
proto
::
VarType_Type_READER
;
}
else
{
PADDLE_THROW
(
"ToVarType:Unsupported type %s"
,
type
.
name
());
inline
proto
::
VarType
::
Type
ToVarType
(
int
type
)
{
switch
(
type
)
{
case
proto
::
VarType
::
LOD_TENSOR
:
case
proto
::
VarType
::
SELECTED_ROWS
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
READER
:
return
static_cast
<
proto
::
VarType
::
Type
>
(
type
);
default:
PADDLE_THROW
(
"ToVarType:Unsupported type %d"
,
type
);
}
}
template
<
typename
Visitor
>
inline
void
VisitVarType
(
const
framework
::
Variable
&
var
,
Visitor
visitor
)
{
switch
(
ToVarType
(
var
.
Type
()
))
{
switch
(
var
.
Type
(
))
{
case
proto
::
VarType_Type_LOD_TENSOR
:
visitor
(
var
.
Get
<
LoDTensor
>
());
return
;
...
...
@@ -64,7 +62,7 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
visitor
(
var
.
Get
<
ReaderHolder
>
());
return
;
default:
PADDLE_THROW
(
"Not supported visit type, %
d"
,
ToVarTyp
e
(
var
.
Type
()));
PADDLE_THROW
(
"Not supported visit type, %
s"
,
ToTypeNam
e
(
var
.
Type
()));
}
}
...
...
paddle/fluid/framework/var_type_traits.cc
0 → 100644
浏览文件 @
ae6f46a1
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/var_type_traits.h"
namespace
paddle
{
namespace
framework
{
const
char
*
ToTypeName
(
int
var_id
)
{
return
ToTypeIndex
(
var_id
).
name
();
}
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeIndex
(
var_id
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits.h
0 → 100644
浏览文件 @
ae6f46a1
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <tuple>
#include <typeinfo>
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include <nccl.h>
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include <cudnn.h>
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
namespace
detail
{
template
<
bool
kStop
,
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
,
typename
...
Args
>
struct
TypePosFinderImpl
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
TypePosFinderImpl
<
kStart
+
2
==
kEnd
,
kStart
+
1
,
kEnd
,
T1
,
Args
...
>::
kPos
;
};
template
<
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
>
struct
TypePosFinderImpl
<
true
,
kStart
,
kEnd
,
T1
,
T2
>
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
-
1
;
};
// TypePosFinder helps to find the position in which T is inside Args...
// If T is not inside Args..., kPos would be -1
template
<
typename
T
,
typename
...
Args
>
struct
TypePosFinder
{
static
constexpr
int
kPos
=
TypePosFinderImpl
<
sizeof
...(
Args
)
==
1
,
0
,
sizeof
...(
Args
),
T
,
Args
...
>::
kPos
;
};
template
<
typename
...
Args
>
struct
VarTypeRegistryImpl
{
static
constexpr
size_t
kRegisteredTypeNum
=
sizeof
...(
Args
);
using
ArgTuple
=
std
::
tuple
<
Args
...
>
;
// TypePos() returns the position in which T is inside Args...
// If T is not inside Args... or T is void, return -1
template
<
typename
T
>
static
constexpr
int
TypePos
()
{
return
std
::
is_same
<
T
,
void
>::
value
?
-
1
:
TypePosFinder
<
T
,
Args
...
>::
kPos
;
}
// IsRegistered() returns whether T is registered inside RegistryImpl
template
<
typename
T
>
static
constexpr
bool
IsRegistered
()
{
return
TypePos
<
T
>
()
>=
0
;
}
};
}
// namespace detail
#define REG_PROTO_VAR_TYPE_TRAIT(type, proto_id) \
template <> \
struct VarTypeTrait<type> { \
static_assert(VarTypeRegistry::IsRegistered<type>(), \
"Must be registered type"); \
using Type = type; \
static constexpr int kId = proto_id; \
}
/**
* The following codes are designed to register variable types.
* Only registered types can be stored in Variable.
* This registry mechanism is designed to speed up Variable.
*/
// Users should add other variable types below.
// Paddle would generate unique Ids for each registered variable types.
class
Scope
;
using
VarTypeRegistry
=
detail
::
VarTypeRegistryImpl
<
LoDTensor
,
SelectedRows
,
std
::
vector
<
Scope
*>
,
LoDRankTable
,
LoDTensorArray
,
platform
::
PlaceList
,
ReaderHolder
,
Tensor
,
std
::
string
,
Scope
*
,
std
::
map
<
size_t
,
Tensor
>
,
operators
::
reader
::
LoDTensorBlockingQueueHolder
,
int
,
float
,
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
ncclUniqueId
,
platform
::
Communicator
,
#endif
operators
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>
,
operators
::
CudnnRNNCache
,
#endif
void
>
;
// void indicates end of registration, add other types before void
template
<
typename
T
>
struct
VarTypeTrait
{
static_assert
(
std
::
is_same
<
T
,
void
>::
value
||
VarTypeRegistry
::
IsRegistered
<
T
>
(),
"Must be registered type"
);
using
Type
=
T
;
// Default id generation
static
constexpr
int
kId
=
VarTypeRegistry
::
TypePos
<
T
>
()
+
static_cast
<
int
>
(
proto
::
VarType
::
TUPLE
)
*
2
;
};
// Users should set some of variable type ids to be what is defined in
// framework.proto here
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensor
,
proto
::
VarType
::
LOD_TENSOR
);
REG_PROTO_VAR_TYPE_TRAIT
(
SelectedRows
,
proto
::
VarType
::
SELECTED_ROWS
);
REG_PROTO_VAR_TYPE_TRAIT
(
std
::
vector
<
Scope
*>
,
proto
::
VarType
::
STEP_SCOPES
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDRankTable
,
proto
::
VarType
::
LOD_RANK_TABLE
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensorArray
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
REG_PROTO_VAR_TYPE_TRAIT
(
platform
::
PlaceList
,
proto
::
VarType
::
PLACE_LIST
);
REG_PROTO_VAR_TYPE_TRAIT
(
ReaderHolder
,
proto
::
VarType
::
READER
);
/** End of variable type registration */
// Besides register variable id, it is helpful to register a
// var_id -> std::type_index (for example, get var names according to id)
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
VarIdToTypeIndexMapInitializerImpl
{
static
void
Init
(
std
::
unordered_map
<
int
,
std
::
type_index
>
*
m
)
{
using
Type
=
typename
std
::
tuple_element
<
kStart
,
VarTypeRegistry
::
ArgTuple
>::
type
;
constexpr
int
kId
=
VarTypeTrait
<
Type
>::
kId
;
if
(
!
std
::
is_same
<
Type
,
void
>::
value
)
{
m
->
emplace
(
kId
,
std
::
type_index
(
typeid
(
Type
)));
}
VarIdToTypeIndexMapInitializerImpl
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Init
(
m
);
}
};
template
<
int
kStart
,
int
kEnd
>
struct
VarIdToTypeIndexMapInitializerImpl
<
kStart
,
kEnd
,
true
>
{
static
void
Init
(
std
::
unordered_map
<
int
,
std
::
type_index
>
*
m
)
{}
};
// VarIdToTypeIndexMapInitializer is designed to initialize var_id ->
// std::type_index map
using
VarIdToTypeIndexMapInitializer
=
VarIdToTypeIndexMapInitializerImpl
<
0
,
VarTypeRegistry
::
kRegisteredTypeNum
,
VarTypeRegistry
::
kRegisteredTypeNum
==
0
>
;
struct
VarIdToTypeIndexMapHolder
{
public:
static
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
static
const
VarIdToTypeIndexMapHolder
instance
;
auto
it
=
instance
.
var_type_map_
.
find
(
var_id
);
PADDLE_ENFORCE
(
it
!=
instance
.
var_type_map_
.
end
(),
"VarId %d is not registered."
,
var_id
);
return
it
->
second
;
}
private:
VarIdToTypeIndexMapHolder
()
{
VarIdToTypeIndexMapInitializer
::
Init
(
&
var_type_map_
);
}
std
::
unordered_map
<
int
,
std
::
type_index
>
var_type_map_
;
};
}
// namespace detail
const
char
*
ToTypeName
(
int
var_id
);
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
);
template
<
typename
T
>
inline
constexpr
bool
IsRegisteredVarType
()
{
return
VarTypeRegistry
::
IsRegistered
<
T
>
();
}
#undef REG_PROTO_VAR_TYPE_TRAIT
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits_test.cc
0 → 100644
浏览文件 @
ae6f46a1
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/var_type_traits.h"
#include <gtest/gtest.h>
#include <cstdint>
namespace
paddle
{
namespace
framework
{
template
<
int
kPos
,
int
kEnd
,
bool
kStop
>
struct
TypeIndexChecker
{
static
void
Check
()
{
using
Type
=
typename
std
::
tuple_element
<
kPos
,
VarTypeRegistry
::
ArgTuple
>::
type
;
if
(
!
std
::
is_same
<
Type
,
void
>::
value
)
{
EXPECT_TRUE
(
ToTypeIndex
(
VarTypeTrait
<
Type
>::
kId
)
==
typeid
(
Type
));
EXPECT_TRUE
(
std
::
string
(
ToTypeName
(
VarTypeTrait
<
Type
>::
kId
))
==
typeid
(
Type
).
name
());
}
TypeIndexChecker
<
kPos
+
1
,
kEnd
,
kPos
+
1
==
kEnd
>::
Check
();
}
};
template
<
int
kPos
,
int
kEnd
>
struct
TypeIndexChecker
<
kPos
,
kEnd
,
true
>
{
static
void
Check
()
{}
};
TEST
(
var_type_traits
,
check_type_index
)
{
constexpr
size_t
kRegisteredNum
=
VarTypeRegistry
::
kRegisteredTypeNum
;
TypeIndexChecker
<
0
,
kRegisteredNum
,
kRegisteredNum
==
0
>::
Check
();
}
template
<
typename
T
>
bool
CheckVarId
(
int
proto_id
)
{
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
T
>::
Type
,
T
>::
value
,
"Type must be the same"
);
return
VarTypeTrait
<
T
>::
kId
==
proto_id
;
}
TEST
(
var_type_traits
,
check_proto_type_id
)
{
ASSERT_TRUE
(
CheckVarId
<
LoDTensor
>
(
proto
::
VarType
::
LOD_TENSOR
));
ASSERT_TRUE
(
CheckVarId
<
SelectedRows
>
(
proto
::
VarType
::
SELECTED_ROWS
));
ASSERT_TRUE
(
CheckVarId
<
std
::
vector
<
Scope
*>>
(
proto
::
VarType
::
STEP_SCOPES
));
ASSERT_TRUE
(
CheckVarId
<
LoDRankTable
>
(
proto
::
VarType
::
LOD_RANK_TABLE
));
ASSERT_TRUE
(
CheckVarId
<
LoDTensorArray
>
(
proto
::
VarType
::
LOD_TENSOR_ARRAY
));
ASSERT_TRUE
(
CheckVarId
<
platform
::
PlaceList
>
(
proto
::
VarType
::
PLACE_LIST
));
ASSERT_TRUE
(
CheckVarId
<
ReaderHolder
>
(
proto
::
VarType
::
READER
));
}
TEST
(
var_type_traits
,
test_registry
)
{
using
Registry
=
detail
::
VarTypeRegistryImpl
<
int8_t
,
int32_t
,
size_t
,
double
,
void
>
;
ASSERT_TRUE
(
Registry
::
TypePos
<
int8_t
>
()
==
0
);
ASSERT_TRUE
(
Registry
::
TypePos
<
int32_t
>
()
==
1
);
ASSERT_TRUE
(
Registry
::
TypePos
<
size_t
>
()
==
2
);
ASSERT_TRUE
(
Registry
::
TypePos
<
double
>
()
==
3
);
ASSERT_TRUE
(
Registry
::
TypePos
<
void
>
()
==
-
1
);
ASSERT_TRUE
(
Registry
::
TypePos
<
float
>
()
==
-
1
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/variable.h
浏览文件 @
ae6f46a1
...
...
@@ -18,7 +18,7 @@
#include <typeindex>
#include <typeinfo>
#include "paddle/fluid/
platform/enforce
.h"
#include "paddle/fluid/
framework/var_type_traits
.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -27,10 +27,14 @@ class Variable {
public:
template
<
typename
T
>
const
T
&
Get
()
const
{
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Variable must hold some thing"
);
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
return
*
static_cast
<
const
T
*>
(
holder_
->
Ptr
());
}
...
...
@@ -39,61 +43,59 @@ class Variable {
template
<
typename
T
>
T
*
GetMutable
()
{
if
(
!
holder_
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
(
new
T
()
));
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
());
}
else
{
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
}
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
}
template
<
typename
T
>
bool
IsType
()
const
{
return
holder_
!=
nullptr
&&
std
::
type_index
(
typeid
(
T
))
==
std
::
type_index
(
holder_
->
Type
());
return
holder_
&&
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
;
}
void
Clear
()
{
holder_
.
reset
();
}
std
::
type_index
Type
()
const
{
int
Type
()
const
{
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Must hold memory"
);
return
holder_
->
Type
();
}
private:
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
const
std
::
type_info
&
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
explicit
Placeholder
(
int
type
)
:
type_
(
type
)
{}
virtual
~
Placeholder
()
=
default
;
inline
int
Type
()
const
{
return
type_
;
}
inline
const
void
*
Ptr
()
const
{
return
ptr_
;
}
inline
void
*
Ptr
()
{
return
ptr_
;
}
protected:
void
*
ptr_
;
int
type_
;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
explicit
PlaceholderImpl
(
T
*
ptr
)
:
ptr_
(
ptr
),
type_
(
typeid
(
T
))
{}
virtual
const
std
::
type_info
&
Type
()
const
{
return
type_
;
}
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
ptr_
.
get
());
}
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PlaceholderImpl
()
:
Placeholder
(
VarTypeTrait
<
T
>::
kId
)
{
this
->
ptr_
=
&
obj_
;
}
std
::
unique_ptr
<
T
>
ptr_
;
const
std
::
type_info
&
type
_
;
private:
T
obj
_
;
};
std
::
unique_ptr
<
Placeholder
>
holder_
;
// pointers to a PlaceholderImpl object indeed.
// name_ is only meaningful with a Scope and accessible by it.
//
// NOTE: Please don't expose name_ by adding methods like
// Variable::Name or Scope::VarName! A variable could have a human
// readable name or an auto-generated scope-unique name. In the
// former case, the caller knows the name and doesn't need to access
// the name; in the latter case, the variable should be identified
// by its address but not the unreadable name.
friend
class
Scope
;
const
std
::
string
*
name_
;
// pointers to a PlaceholderImpl object indeed.
std
::
unique_ptr
<
Placeholder
>
holder_
;
};
}
// namespace framework
...
...
paddle/fluid/framework/variable_test.cc
浏览文件 @
ae6f46a1
...
...
@@ -16,27 +16,28 @@
#include <string>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
TEST
(
Variable
,
GetMutable
)
{
using
paddle
::
framework
::
Variable
;
struct
Tensor
{
int
content_
;
};
namespace
paddle
{
namespace
framework
{
TEST
(
Variable
,
GetMutable
)
{
std
::
unique_ptr
<
Variable
>
v
(
new
Variable
());
Tensor
*
t
=
v
->
GetMutable
<
Tensor
>
();
t
->
content_
=
1234
;
auto
*
t
=
v
->
GetMutable
<
std
::
string
>
();
*
t
=
"1234"
;
const
Tensor
&
tt
=
v
->
Get
<
Tensor
>
();
EXPECT_EQ
(
1234
,
tt
.
content_
);
const
auto
&
tt
=
v
->
Get
<
std
::
string
>
();
EXPECT_EQ
(
"1234"
,
tt
);
try
{
v
->
GetMutable
<
std
::
string
>
();
v
->
GetMutable
<
Tensor
>
();
}
catch
(
std
::
exception
&
e
)
{
return
;
}
EXPECT_TRUE
(
false
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/inference/api/details/reset_tensor_array.cc
浏览文件 @
ae6f46a1
...
...
@@ -25,7 +25,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// TODO(Superjomn) should avoid the case when a TensorArray is a
// parameter.
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var
->
Type
()
==
typeid
(
framework
::
LoDTensorArray
))
{
if
(
var
->
IsType
<
framework
::
LoDTensorArray
>
(
))
{
VLOG
(
4
)
<<
"collect "
<<
var_name
;
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
}
...
...
paddle/fluid/inference/api/details/reset_tensor_array.h
浏览文件 @
ae6f46a1
...
...
@@ -27,8 +27,11 @@ namespace details {
// training phase.
struct
TensorArrayBatchCleaner
{
TensorArrayBatchCleaner
()
{
valid_types_
.
insert
(
typeid
(
framework
::
Tensor
));
valid_types_
.
insert
(
typeid
(
framework
::
LoDTensor
));
constexpr
auto
kTensorId
=
framework
::
VarTypeTrait
<
framework
::
Tensor
>::
kId
;
constexpr
auto
kLoDTensorId
=
framework
::
VarTypeTrait
<
framework
::
LoDTensor
>::
kId
;
valid_types_
.
insert
(
kTensorId
);
valid_types_
.
insert
(
kLoDTensorId
);
}
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just
...
...
@@ -46,7 +49,7 @@ struct TensorArrayBatchCleaner {
bool
no_tensor_flag_
{
true
};
std
::
vector
<
framework
::
LoDTensorArray
*>
arrays_
;
std
::
unordered_set
<
std
::
type_index
>
valid_types_
;
std
::
unordered_set
<
int
>
valid_types_
;
std
::
unordered_set
<
framework
::
Variable
*>
no_tensor_vars_
;
};
...
...
paddle/fluid/operators/affine_grid_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -74,7 +74,7 @@ class AffineGridOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
@@ -184,7 +184,7 @@ class AffineGridOpGrad : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
paddle/fluid/operators/clip_by_norm_op.h
浏览文件 @
ae6f46a1
...
...
@@ -64,7 +64,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
else
{
PADDLE_THROW
(
"Unexpected branch, input variable type is %s"
,
in_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
in_var
->
Type
()
));
}
PADDLE_ENFORCE_NOT_NULL
(
input
);
...
...
paddle/fluid/operators/controlflow/parallel_do_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -92,7 +92,8 @@ inline void CopyOrShare(const framework::Variable &src,
TensorCopy
(
src_sr
.
value
(),
dst_place
,
dst_sr
->
mutable_value
());
}
}
else
{
PADDLE_THROW
(
"Expect LoDTensor/SelectedRows, get %s"
,
src
.
Type
().
name
());
PADDLE_THROW
(
"Expect LoDTensor/SelectedRows, get %s"
,
framework
::
ToTypeName
(
src
.
Type
()));
}
}
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -175,14 +175,13 @@ class WhileGradOp : public framework::OperatorBase {
auto
&
og_inside
=
detail
::
Ref
(
cur_scope
.
Var
(
inside_og_name
),
"Cannot find inside gradient %s"
,
inside_og_name
);
if
(
framework
::
IsType
<
framework
::
LoDTensor
>
(
og_outside
.
Type
()
))
{
if
(
og_outside
.
IsType
<
framework
::
LoDTensor
>
(
))
{
auto
&
outside_tensor
=
og_outside
.
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensor
>
());
inside_tensor
.
set_lod
(
outside_tensor
.
lod
());
inside_tensor
.
ShareDataWith
(
outside_tensor
);
}
else
if
(
framework
::
IsType
<
framework
::
LoDTensorArray
>
(
og_outside
.
Type
()))
{
}
else
if
(
og_outside
.
IsType
<
framework
::
LoDTensorArray
>
())
{
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
...
...
@@ -256,7 +255,7 @@ class WhileGradOp : public framework::OperatorBase {
var
->
IsType
<
LoDTensor
>
(),
"Currently the type of var only can be LoDTensorArray, "
"or LoDTensor, but the received var[%s] is %s."
,
inside_grad_name
,
var
->
Type
().
name
(
));
inside_grad_name
,
framework
::
ToTypeName
(
var
->
Type
()
));
if
(
var
->
IsType
<
LoDTensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -84,7 +84,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
framework
::
DataLayout
layout
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
@@ -369,7 +369,7 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
paddle/fluid/operators/cudnn_lstm_op.cu.cc
浏览文件 @
ae6f46a1
...
...
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -22,239 +22,6 @@ namespace operators {
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
Tensor
reserve_data_
;
Tensor
workspace_data_
;
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
framework
::
ExecutionContext
&
ctx
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
template
<
typename
T
>
class
CudnnLSTMGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -315,9 +82,9 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
auto
input_w_numel
=
w
->
numel
();
auto
batch_size
=
x
->
dims
()[
1
];
cudnn_rnn_cache
->
init
(
handle
,
ctx
,
max_len
,
batch_size
,
input
_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
cudnn_rnn_cache
->
init
(
handle
,
ctx
.
GetPlace
(),
max_len
,
batch
_size
,
input_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
}
auto
run_seq_len
=
x
->
dims
()[
0
];
...
...
paddle/fluid/operators/cudnn_rnn_cache.h
0 → 100644
浏览文件 @
ae6f46a1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
framework
::
Tensor
reserve_data_
;
framework
::
Tensor
workspace_data_
;
framework
::
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
platform
::
Place
&
place
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
place
);
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
place
);
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
place
);
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
浏览文件 @
ae6f46a1
...
...
@@ -171,8 +171,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
*
slr
=
var
->
GetMutable
<
framework
::
SelectedRows
>
();
size_t
rows_memory_size
=
slr
->
rows
().
size
()
*
framework
::
SizeOfType
(
typeid
(
int64_t
));
size_t
rows_memory_size
=
slr
->
rows
().
size
()
*
sizeof
(
int64_t
);
IOBufWriter
::
Append
(
iobuf
,
::
sendrecv
::
VariableMessage
::
kRowsFieldNumber
,
reinterpret_cast
<
const
char
*>
(
slr
->
rows
().
data
()),
...
...
paddle/fluid/operators/distributed_ops/split_ids_op.h
浏览文件 @
ae6f46a1
...
...
@@ -116,7 +116,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
}
else
{
PADDLE_THROW
(
"% should be LoDTensor or SelectedRows, but the received type is %s"
,
ctx
.
Inputs
(
"Ids"
)[
0
],
ids_var
->
Type
().
name
(
));
ctx
.
Inputs
(
"Ids"
)[
0
],
framework
::
ToTypeName
(
ids_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.h
浏览文件 @
ae6f46a1
...
...
@@ -83,7 +83,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
}
else
{
PADDLE_THROW
(
"X's type[%s] is not supported by elementwise_op."
,
x_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_var
->
Type
()
));
}
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/grid_sampler_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -59,7 +59,7 @@ class GridSampleOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
@@ -155,7 +155,7 @@ class GridSampleOpGrad : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
paddle/fluid/operators/optimizers/adadelta_op.h
浏览文件 @
ae6f46a1
...
...
@@ -27,12 +27,14 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
avg_squared_grad_out_tensor
=
...
...
paddle/fluid/operators/optimizers/adagrad_op.h
浏览文件 @
ae6f46a1
...
...
@@ -50,7 +50,8 @@ class AdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
ae6f46a1
...
...
@@ -235,7 +235,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
...
...
paddle/fluid/operators/optimizers/adamax_op.h
浏览文件 @
ae6f46a1
...
...
@@ -27,12 +27,14 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
浏览文件 @
ae6f46a1
...
...
@@ -27,12 +27,14 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
ae6f46a1
...
...
@@ -32,12 +32,14 @@ class FTRLOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
*
param_out
=
ctx
.
Output
<
Tensor
>
(
"ParamOut"
);
auto
*
sq_accum_out
=
ctx
.
Output
<
Tensor
>
(
"SquaredAccumOut"
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
ae6f46a1
...
...
@@ -393,7 +393,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
PADDLE_THROW
(
string
::
Sprintf
(
"MomentumOp only supports LoDTensor or SelectedRows "
"gradient, but the received Variable Type is %s"
,
grad_var
->
Type
().
name
(
)));
framework
::
ToTypeName
(
grad_var
->
Type
()
)));
}
}
};
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
ae6f46a1
...
...
@@ -60,7 +60,8 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
...
...
paddle/fluid/operators/pool_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -92,7 +92,7 @@ framework::OpKernelType PoolOp::GetExpectedKernelType(
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
@@ -122,7 +122,7 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
paddle/fluid/operators/softmax_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -50,7 +50,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
@@ -157,7 +157,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
paddle/fluid/operators/sum_mkldnn_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -245,7 +245,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/sum_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -126,7 +126,7 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_THROW
(
"Cannot find the input data type by all input data"
);
}
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
x_vars
[
0
]
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_vars
[
0
]
->
Type
()
));
}
};
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
ae6f46a1
...
...
@@ -163,7 +163,7 @@ class SumKernel : public framework::OpKernel<T> {
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/warpctc_op.cc
浏览文件 @
ae6f46a1
...
...
@@ -51,7 +51,7 @@ class WarpCTCOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
if
(
framework
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
}
#endif
...
...
paddle/fluid/platform/cudnn_helper.h
浏览文件 @
ae6f46a1
...
...
@@ -17,7 +17,6 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
...
...
@@ -451,18 +450,6 @@ class ScopedActivationDescriptor {
DISABLE_COPY_AND_ASSIGN
(
ScopedActivationDescriptor
);
};
inline
bool
CanCUDNNBeUsed
(
const
framework
::
ExecutionContext
&
ctx
)
{
bool
use_cudnn
=
ctx
.
Attr
<
bool
>
(
"use_cudnn"
);
use_cudnn
&=
paddle
::
platform
::
is_gpu_place
(
ctx
.
GetPlace
());
#ifdef PADDLE_WITH_CUDA
if
(
use_cudnn
)
{
auto
&
dev_ctx
=
ctx
.
device_context
<
platform
::
CUDADeviceContext
>
();
use_cudnn
&=
dev_ctx
.
cudnn_handle
()
!=
nullptr
;
}
#endif
return
use_cudnn
;
}
#if CUDNN_VERSION >= 7001
class
ScopedCTCLossDescriptor
{
public:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录