Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
d1a17cad
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d1a17cad
编写于
11月 29, 2018
作者:
P
phlrain
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix cudnn rnn; test=develop
上级
487ee36a
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
78 addition
and
49 deletion
+78
-49
paddle/fluid/operators/cudnn_lstm_op.cc
paddle/fluid/operators/cudnn_lstm_op.cc
+26
-12
paddle/fluid/operators/cudnn_lstm_op.cu.cc
paddle/fluid/operators/cudnn_lstm_op.cu.cc
+6
-3
paddle/fluid/operators/cudnn_lstm_op.h
paddle/fluid/operators/cudnn_lstm_op.h
+5
-2
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+41
-32
未找到文件。
paddle/fluid/operators/cudnn_lstm_op.cc
浏览文件 @
d1a17cad
/* Copyright (c) 201
6
PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 201
8
PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
...
@@ -122,13 +122,11 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -122,13 +122,11 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
"The will affect the shape of the Out, last_h, and last_c"
)
"The will affect the shape of the Out, last_h, and last_c"
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
int
>
(
"input_size"
,
"input size ot the Input Tensor"
).
SetDefault
(
10
);
AddAttr
<
int
>
(
"input_size"
,
"input size ot the Input Tensor"
).
SetDefault
(
10
);
AddAttr
<
int
>
(
"batch_size"
,
"the instance number the batch"
).
SetDefault
(
10
);
AddAttr
<
int
>
(
"hidden_size"
,
"hidden size of the LSTM"
).
SetDefault
(
100
);
AddAttr
<
int
>
(
"hidden_size"
,
"hidden size of the LSTM"
).
SetDefault
(
100
);
AddAttr
<
int
>
(
"num_layers"
,
"the total layer number of the LSTM"
)
AddAttr
<
int
>
(
"num_layers"
,
"the total layer number of the LSTM"
)
.
SetDefault
(
1
);
.
SetDefault
(
1
);
AddAttr
<
bool
>
(
"is_test"
,
"True if in test phase."
).
SetDefault
(
false
);
AddAttr
<
bool
>
(
"is_test"
,
"True if in test phase."
).
SetDefault
(
false
);
AddAttr
<
bool
>
(
"fix_seed"
,
"True if it fix dropout seed"
).
SetDefault
(
false
);
AddAttr
<
int
>
(
"seed"
,
"seed to used if fix_seed is True"
).
SetDefault
(
-
1
);
AddAttr
<
int
>
(
"seed"
,
"seed to used if fix_seed is True"
).
SetDefault
(
0
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
CUDNN LSTM implementation
CUDNN LSTM implementation
...
@@ -136,16 +134,32 @@ A four-gate Long Short-Term Memory network with no peephole connections.
...
@@ -136,16 +134,32 @@ A four-gate Long Short-Term Memory network with no peephole connections.
In the forward pass the output ht and cell output ct for a given iteration can be computed from the recurrent input ht-1,
In the forward pass the output ht and cell output ct for a given iteration can be computed from the recurrent input ht-1,
the cell input ct-1 and the previous layer input xt given matrices W, R and biases bW, bR from the following equations:
the cell input ct-1 and the previous layer input xt given matrices W, R and biases bW, bR from the following equations:
it = σ(Wi X xt + Ri X ht-1 + bWi + bRi)
$$ i_t = sigmoid(W_{ix}x_{t} + W_{ih}h_{t-1} + bx_i + bh_i) $$
ft = σ(Wf X xt + Rf X ht-1 + bWf + bRf)
ot = σ(Wo X xt + Ro X ht-1 + bWo + bRo)
c't = tanh(Wc X xt + Rc X ht-1 + bWc + bRc)
ct = ft * ct-1 + it * c't
ht = ot * tanh(ct)
Where σ is the sigmoid operator: σ(x) = 1 / (1 + e^-x), * represents a point-wise multiplication,
$$ f_t = sigmoid(W_{fx}x_{t} + W_{fh}h_{t-1} + bx_f + bh_f) $$
$$ o_t = sigmoid(W_{ox}x_{t} + W_{oh}h_{t-1} + bx_o + bh_o) $$
$$ \\tilde{c_t} = tanh(W_{cx}x_t + W_{ch}h_{t-1} + bx_c + bh_c) $$
$$ c_t = f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} $$
$$ h_t = o_t \\odot tanh(c_t) $$
- W terms denote weight matrices (e.g. $W_{ix}$ is the matrix
of weights from the input gate to the input)
- The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector).
- sigmoid is the logistic sigmoid function.
- $i, f, o$ and $c$ are the input gate, forget gate, output gate,
and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector $h$.
- The $\odot$ is the element-wise product of the vectors.
- `tanh` is the activation functions.
- $\tilde{c_t}$ is also called candidate hidden state,
which is computed based on the current input and the previous hidden state.
Where sigmoid is the sigmoid operator: sigmoid(x) = 1 / (1 + e^-x), * represents a point-wise multiplication,
X represensts a matrix multiplication
X represensts a matrix multiplication
and tanh is the hyperbolic tangent function. it, ft, ot, c't represent the input, forget, output and new gates respectively.
)DOC"
);
)DOC"
);
...
...
paddle/fluid/operators/cudnn_lstm_op.cu.cc
浏览文件 @
d1a17cad
/* Copyright (c) 201
6
PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 201
8
PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
...
@@ -273,7 +273,6 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
...
@@ -273,7 +273,6 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
size_t
max_len
=
ctx
.
Attr
<
int
>
(
"max_len"
);
size_t
max_len
=
ctx
.
Attr
<
int
>
(
"max_len"
);
float
dropout_prob
=
ctx
.
Attr
<
float
>
(
"dropout_prob"
);
float
dropout_prob
=
ctx
.
Attr
<
float
>
(
"dropout_prob"
);
bool
is_bidirec
=
ctx
.
Attr
<
bool
>
(
"is_bidirec"
);
bool
is_bidirec
=
ctx
.
Attr
<
bool
>
(
"is_bidirec"
);
int
batch_size
=
ctx
.
Attr
<
int
>
(
"batch_size"
);
int
input_size
=
ctx
.
Attr
<
int
>
(
"input_size"
);
int
input_size
=
ctx
.
Attr
<
int
>
(
"input_size"
);
int
hidden_size
=
ctx
.
Attr
<
int
>
(
"hidden_size"
);
int
hidden_size
=
ctx
.
Attr
<
int
>
(
"hidden_size"
);
int
num_layers
=
ctx
.
Attr
<
int
>
(
"num_layers"
);
int
num_layers
=
ctx
.
Attr
<
int
>
(
"num_layers"
);
...
@@ -304,9 +303,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
...
@@ -304,9 +303,13 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
cudnn_rnn_cache
=
const_cast
<
framework
::
Variable
*>
(
cache_var
)
cudnn_rnn_cache
=
const_cast
<
framework
::
Variable
*>
(
cache_var
)
->
GetMutable
<
CudnnRNNCache
>
();
->
GetMutable
<
CudnnRNNCache
>
();
std
::
random_device
rnd
;
std
::
random_device
rnd
;
int
seed
=
ctx
.
Attr
<
bool
>
(
"fix_seed"
)
?
ctx
.
Attr
<
int
>
(
"seed"
)
:
rnd
();
int
seed
=
ctx
.
Attr
<
int
>
(
"seed"
);
if
(
seed
==
-
1
)
{
seed
=
rnd
();
}
auto
input_w_numel
=
w
->
numel
();
auto
input_w_numel
=
w
->
numel
();
auto
batch_size
=
x
->
dims
()[
1
];
cudnn_rnn_cache
->
init
(
handle
,
ctx
,
max_len
,
batch_size
,
input_size
,
cudnn_rnn_cache
->
init
(
handle
,
ctx
,
max_len
,
batch_size
,
input_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
seed
,
input_w_numel
);
...
...
paddle/fluid/operators/cudnn_lstm_op.h
浏览文件 @
d1a17cad
/* Copyright (c) 201
6
PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 201
8
PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
...
@@ -29,7 +29,10 @@ using Tensor = framework::Tensor;
...
@@ -29,7 +29,10 @@ using Tensor = framework::Tensor;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
CudnnLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CudnnLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_THROW
(
"CPU is not support for this kernel now. Will be add in the future"
);
}
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
d1a17cad
...
@@ -169,7 +169,7 @@ __all__ = [
...
@@ -169,7 +169,7 @@ __all__ = [
'log_loss'
,
'log_loss'
,
'add_position_encoding'
,
'add_position_encoding'
,
'bilinear_tensor_product'
,
'bilinear_tensor_product'
,
'
cudnn_
lstm'
,
'lstm'
,
]
]
...
@@ -467,39 +467,53 @@ def dynamic_lstm(input,
...
@@ -467,39 +467,53 @@ def dynamic_lstm(input,
return
hidden
,
cell
return
hidden
,
cell
def
cudnn_lstm
(
input
,
def
lstm
(
input
,
init_h
,
init_h
,
init_c
,
init_c
,
batch_size
,
max_len
,
max_len
,
dropout_prob
,
dropout_prob
,
input_size
,
input_size
,
hidden_size
,
hidden_size
,
num_layers
,
num_layers
,
is_bidirec
=
False
,
is_bidirec
=
False
,
dtype
=
'float32'
,
dtype
=
'float32'
,
is_test
=
False
,
is_test
=
False
,
name
=
None
,
name
=
None
,
default_initializer
=
None
,
default_initializer
=
None
,
seed
=-
1
):
fix_seed
=
False
,
seed
=
0
):
"""
"""
CUDNN
LSTM implementation
If Device is GPU, This op will use cudnn
LSTM implementation
A four-gate Long Short-Term Memory network with no peephole connections.
A four-gate Long Short-Term Memory network with no peephole connections.
In the forward pass the output ht and cell output ct for a given iteration can be computed from the recurrent input ht-1,
In the forward pass the output ht and cell output ct for a given iteration can be computed from the recurrent input ht-1,
the cell input ct-1 and the previous layer input xt given matrices W, R and biases bW, bR from the following equations:
the cell input ct-1 and the previous layer input xt given matrices W, R and biases bW, bR from the following equations:
it = sigmoid(Wi X xt + Ri X ht-1 + bWi + bRi)
$$ i_t =
\\
sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + bx_i + bh_i) $$
ft = sigmoid(Wf X xt + Rf X ht-1 + bWf + bRf)
ot = sigmoid(Wo X xt + Ro X ht-1 + bWo + bRo)
$$ f_t =
\\
sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + bx_f + bh_f) $$
c't = tanh(Wc X xt + Rc X ht-1 + bWc + bRc)
ct = ft * ct-1 + it * c't
$$ o_t =
\\
sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + bx_o + bh_o) $$
ht = ot * tanh(ct)
$$
\\
tilde{c_t} = tanh(W_{cx}x_t + W_{ch}h_{t-1} + bx_c + bh_c) $$
$$ c_t = f_t
\\
odot c_{t-1} + i_t
\\
odot
\\
tilde{c_t} $$
$$ h_t = o_t
\\
odot tanh(c_t) $$
- W terms denote weight matrices (e.g. $W_{ix}$ is the matrix
of weights from the input gate to the input)
- The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector).
- sigmoid is the logistic sigmoid function.
- $i, f, o$ and $c$ are the input gate, forget gate, output gate,
and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector $h$.
- The $\odot$ is the element-wise product of the vectors.
- `tanh` is the activation functions.
- $
\t
ilde{c_t}$ is also called candidate hidden state,
which is computed based on the current input and the previous hidden state.
Where sigmoid is the sigmoid operator: sigmoid(x) = 1 / (1 + e^-x), * represents a point-wise multiplication,
Where sigmoid is the sigmoid operator: sigmoid(x) = 1 / (1 + e^-x), * represents a point-wise multiplication,
X represensts a matrix multiplication
X represensts a matrix multiplication
and tanh is the hyperbolic tangent function. it, ft, ot, c't represent the input, forget, output and new gates respectively.
Args:
Args:
...
@@ -510,7 +524,6 @@ def cudnn_lstm(input,
...
@@ -510,7 +524,6 @@ def cudnn_lstm(input,
init_c(Variable): The initial cell state of the LSTM.
init_c(Variable): The initial cell state of the LSTM.
This is a tensor with shape ( num_layers x batch_size x hidden_size )
This is a tensor with shape ( num_layers x batch_size x hidden_size )
if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size)
if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size)
batch_size (int): total distance numer of the batch
max_len (int): max length of LSTM. the first dim of input tensor CAN NOT greater than max_len
max_len (int): max length of LSTM. the first dim of input tensor CAN NOT greater than max_len
dropout_prob(float): dropout prob, dropout ONLY work between rnn layers, NOT between time steps
dropout_prob(float): dropout prob, dropout ONLY work between rnn layers, NOT between time steps
There is NO dropout work on rnn output of the last RNN layers
There is NO dropout work on rnn output of the last RNN layers
...
@@ -524,9 +537,7 @@ def cudnn_lstm(input,
...
@@ -524,9 +537,7 @@ def cudnn_lstm(input,
will be named automatically.
will be named automatically.
default_initializer(Initialize|None): Where use initializer to initialize the Weight
default_initializer(Initialize|None): Where use initializer to initialize the Weight
If set None, defaule initializer will be used
If set None, defaule initializer will be used
seed(int): Seed for dropout in LSTM, If it's -1, dropout will use random seed
fix_seed(bool): If it's True, fix seed will used for dropout in LSTM
seed(int): If fix_seed is True, dropout seed in LSTM will use this seed
Returns:
Returns:
...
@@ -553,7 +564,7 @@ def cudnn_lstm(input,
...
@@ -553,7 +564,7 @@ def cudnn_lstm(input,
init_hidden1 = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0, stop_grad=False)
init_hidden1 = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0, stop_grad=False)
init_cell1 = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0, stop_grad=False)
init_cell1 = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0, stop_grad=False)
rnn_out, last_h, last_c = layers.
cudnn_lstm( input, init_h, init_c, batch_size
,
\
rnn_out, last_h, last_c = layers.
lstm( input, init_h, init_c
,
\
max_len, dropout_prob, input_size, hidden_size,
\
max_len, dropout_prob, input_size, hidden_size,
\
num_layers)
num_layers)
"""
"""
...
@@ -610,12 +621,10 @@ def cudnn_lstm(input,
...
@@ -610,12 +621,10 @@ def cudnn_lstm(input,
'max_len'
:
max_len
,
'max_len'
:
max_len
,
'is_bidirec'
:
is_bidirec
,
'is_bidirec'
:
is_bidirec
,
'input_size'
:
input_size
,
'input_size'
:
input_size
,
'batch_size'
:
batch_size
,
'hidden_size'
:
hidden_size
,
'hidden_size'
:
hidden_size
,
'num_layers'
:
num_layers
,
'num_layers'
:
num_layers
,
'is_test'
:
is_test
,
'is_test'
:
is_test
,
'dropout_prob'
:
dropout_prob
,
'dropout_prob'
:
dropout_prob
,
'fix_seed'
:
fix_seed
,
'seed'
:
seed
,
'seed'
:
seed
,
})
})
return
out
,
last_h
,
last_c
return
out
,
last_h
,
last_c
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录