Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
43f7d7b7
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
43f7d7b7
编写于
10月 13, 2016
作者:
L
luotao1
提交者:
qingqing01
10月 13, 2016
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add interface and unittest for nce layer (#180)
* add interface and unittest for nce layer * follow comments
上级
e26f220d
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
170 addition
and
160 deletion
+170
-160
doc/ui/api/trainer_config_helpers/layers.rst
doc/ui/api/trainer_config_helpers/layers.rst
+6
-0
paddle/gserver/layers/NCELayer.cpp
paddle/gserver/layers/NCELayer.cpp
+9
-4
paddle/trainer/tests/test_config.conf
paddle/trainer/tests/test_config.conf
+68
-154
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+87
-2
未找到文件。
doc/ui/api/trainer_config_helpers/layers.rst
浏览文件 @
43f7d7b7
...
...
@@ -371,6 +371,12 @@ ctc_layer
:members: ctc_layer
:noindex:
nce_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
:members: nce_layer
:noindex:
hsigmoid
---------
.. automodule:: paddle.trainer_config_helpers.layers
...
...
paddle/gserver/layers/NCELayer.cpp
浏览文件 @
43f7d7b7
...
...
@@ -21,14 +21,18 @@ limitations under the License. */
namespace
paddle
{
/**
* Noise-contrastive estimation
* Noise-contrastive estimation
.
* Implements the method in the following paper:
* A fast and simple algorithm for training neural probabilistic language models
* A fast and simple algorithm for training neural probabilistic language models.
*
* The config file api is nce_layer.
*/
class
NCELayer
:
public
Layer
{
int
numClasses_
;
int
numInputs_
;
// number of input layer besides labelLayer and weightLayer
/// number of input layer besides labelLayer and weightLayer
int
numInputs_
;
LayerPtr
labelLayer_
;
/// weight layer, can be None
LayerPtr
weightLayer_
;
WeightList
weights_
;
std
::
unique_ptr
<
Weight
>
biases_
;
...
...
@@ -43,7 +47,8 @@ class NCELayer : public Layer {
real
weight
;
};
std
::
vector
<
Sample
>
samples_
;
bool
prepared_
;
// whether samples_ is prepared
/// whether samples_ is prepared
bool
prepared_
;
Argument
sampleOut_
;
IVectorPtr
labelIds_
;
...
...
paddle/trainer/tests/test_config.conf
浏览文件 @
43f7d7b7
...
...
@@ -13,157 +13,71 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
default_initial_std
(
0
.
5
)
model_type
(
"nn"
)
DataLayer
(
name
=
"input"
,
size
=
3
,
)
DataLayer
(
name
=
"weight"
,
size
=
1
,
)
Layer
(
name
=
"layer1_1"
,
type
=
"fc"
,
size
=
5
,
active_type
=
"sigmoid"
,
inputs
=
"input"
,
)
Layer
(
name
=
"layer1_2"
,
type
=
"fc"
,
size
=
12
,
active_type
=
"linear"
,
inputs
=
Input
(
"input"
,
parameter_name
=
'sharew'
),
)
Layer
(
name
=
"layer1_3"
,
type
=
"fc"
,
size
=
3
,
active_type
=
"tanh"
,
inputs
=
"input"
,
)
Layer
(
name
=
"layer1_5"
,
type
=
"fc"
,
size
=
3
,
active_type
=
"tanh"
,
inputs
=
Input
(
"input"
,
learning_rate
=
0
.
01
,
momentum
=
0
.
9
,
decay_rate
=
0
.
05
,
initial_mean
=
0
.
0
,
initial_std
=
0
.
01
,
format
=
"csc"
,
nnz
=
4
)
)
FCLayer
(
name
=
"layer1_4"
,
size
=
5
,
active_type
=
"square"
,
inputs
=
"input"
,
drop_rate
=
0
.
5
,
)
Layer
(
name
=
"pool"
,
type
=
"pool"
,
inputs
=
Input
(
"layer1_2"
,
pool
=
Pool
(
pool_type
=
"cudnn-avg-pool"
,
channels
=
1
,
size_x
=
2
,
size_y
=
3
,
img_width
=
3
,
padding
=
1
,
padding_y
=
2
,
stride
=
2
,
stride_y
=
3
))
)
Layer
(
name
=
"concat"
,
type
=
"concat"
,
inputs
= [
"layer1_3"
,
"layer1_4"
],
)
MixedLayer
(
name
=
"output"
,
size
=
3
,
active_type
=
"softmax"
,
inputs
= [
FullMatrixProjection
(
"layer1_1"
,
learning_rate
=
0
.
1
),
TransposedFullMatrixProjection
(
"layer1_2"
,
parameter_name
=
'sharew'
),
FullMatrixProjection
(
"concat"
),
IdentityProjection
(
"layer1_3"
),
],
)
Layer
(
name
=
"label"
,
type
=
"data"
,
size
=
1
,
)
Layer
(
name
=
"cost"
,
type
=
"multi-class-cross-entropy"
,
inputs
= [
"output"
,
"label"
,
"weight"
],
)
Layer
(
name
=
"cost2"
,
type
=
"nce"
,
num_classes
=
3
,
active_type
=
"sigmoid"
,
neg_sampling_dist
= [
0
.
1
,
0
.
3
,
0
.
6
],
inputs
= [
"layer1_2"
,
"label"
,
"weight"
],
)
Evaluator
(
name
=
"error"
,
type
=
"classification_error"
,
inputs
= [
"output"
,
"label"
,
"weight"
]
)
Inputs
(
"input"
,
"label"
,
"weight"
)
Outputs
(
"cost"
,
"cost2"
)
TrainData
(
ProtoData
(
files
=
"dummy_list"
,
constant_slots
= [
1
.
0
],
async_load_data
=
True
,
)
)
TestData
(
SimpleData
(
files
=
"trainer/tests/sample_filelist.txt"
,
feat_dim
=
3
,
context_len
=
0
,
buffer_capacity
=
1000000
,
async_load_data
=
False
,
),
)
Settings
(
algorithm
=
"sgd"
,
num_batches_per_send_parameter
=
1
,
num_batches_per_get_parameter
=
1
,
batch_size
=
100
,
learning_rate
=
0
.
001
,
learning_rate_decay_a
=
1
e
-
5
,
learning_rate_decay_b
=
0
.
5
,
)
from
paddle
.
trainer_config_helpers
import
*
TrainData
(
ProtoData
(
files
=
"dummy_list"
,
constant_slots
= [
1
.
0
],
async_load_data
=
True
))
TestData
(
SimpleData
(
files
=
"trainer/tests/sample_filelist.txt"
,
feat_dim
=
3
,
context_len
=
0
,
buffer_capacity
=
1000000
,
async_load_data
=
False
))
settings
(
batch_size
=
100
)
data
=
data_layer
(
name
=
'input'
,
size
=
3
)
wt
=
data_layer
(
name
=
'weight'
,
size
=
1
)
fc1
=
fc_layer
(
input
=
data
,
size
=
5
,
bias_attr
=
True
,
act
=
SigmoidActivation
())
fc2
=
fc_layer
(
input
=
data
,
size
=
12
,
bias_attr
=
True
,
param_attr
=
ParamAttr
(
name
=
'sharew'
),
act
=
LinearActivation
())
fc3
=
fc_layer
(
input
=
data
,
size
=
3
,
bias_attr
=
True
,
act
=
TanhActivation
())
fc4
=
fc_layer
(
input
=
data
,
size
=
5
,
bias_attr
=
True
,
layer_attr
=
ExtraAttr
(
drop_rate
=
0
.
5
),
act
=
SquareActivation
())
pool
=
img_pool_layer
(
input
=
fc2
,
pool_size
=
2
,
pool_size_y
=
3
,
num_channels
=
1
,
padding
=
1
,
padding_y
=
2
,
stride
=
2
,
stride_y
=
3
,
img_width
=
3
,
pool_type
=
CudnnAvgPooling
())
concat
=
concat_layer
(
input
=[
fc3
,
fc4
])
with
mixed_layer
(
size
=
3
,
act
=
SoftmaxActivation
())
as
output
:
output
+=
full_matrix_projection
(
input
=
fc1
)
output
+=
trans_full_matrix_projection
(
input
=
fc2
,
param_attr
=
ParamAttr
(
name
=
'sharew'
))
output
+=
full_matrix_projection
(
input
=
concat
)
output
+=
identity_projection
(
input
=
fc3
)
lbl
=
data_layer
(
name
=
'label'
,
size
=
1
)
cost
=
classification_cost
(
input
=
output
,
label
=
lbl
,
weight
=
wt
,
layer_attr
=
ExtraAttr
(
device
=-
1
))
nce
=
nce_layer
(
input
=
fc2
,
label
=
lbl
,
weight
=
wt
,
num_classes
=
3
,
neg_distribution
=[
0
.
1
,
0
.
3
,
0
.
6
])
outputs
(
cost
,
nce
)
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
43f7d7b7
...
...
@@ -50,6 +50,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'slope_intercept_layer'
,
'trans_full_matrix_projection'
,
'linear_comb_layer'
,
'convex_comb_layer'
,
'ctc_layer'
,
'crf_layer'
,
'crf_decoding_layer'
,
'nce_layer'
,
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'multi_binary_label_cross_entropy'
,
'rank_cost'
,
'lambda_cost'
,
'huber_cost'
,
...
...
@@ -115,6 +116,7 @@ class LayerType(object):
CTC_LAYER
=
"ctc"
CRF_LAYER
=
"crf"
CRF_DECODING_LAYER
=
"crf_decoding"
NCE_LAYER
=
'nce'
RANK_COST
=
"rank-cost"
LAMBDA_COST
=
"lambda_cost"
...
...
@@ -168,7 +170,7 @@ class LayerOutput(object):
:param activation: Layer Activation.
:type activation: BaseActivation.
:param parents: Layer's parents.
:type parents: list|tuple|collection.Sequence
:type parents: list|tuple|collection
s
.Sequence
"""
def
__init__
(
self
,
name
,
layer_type
,
parents
=
None
,
activation
=
None
,
...
...
@@ -1988,10 +1990,16 @@ def concat_layer(input, act=None, name=None, layer_attr=None):
Concat all input vector into one huge vector.
Inputs can be list of LayerOutput or list of projection.
The example usage is:
.. code-block:: python
concat = concat_layer(input=[layer1, layer2])
:param name: Layer name.
:type name: basestring
:param input: input layers or projections
:type input: list|tuple|collection.Sequence
:type input: list|tuple|collection
s
.Sequence
:param act: Activation type.
:type act: BaseActivation
:param layer_attr: Extra Layer Attribute.
...
...
@@ -3488,6 +3496,83 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None):
parents
.
append
(
label
)
return
LayerOutput
(
name
,
LayerType
.
CRF_DECODING_LAYER
,
parents
,
size
=
size
)
@
wrap_bias_attr_default
(
has_bias
=
True
)
@
wrap_name_default
()
@
layer_support
()
def
nce_layer
(
input
,
label
,
num_classes
,
weight
=
None
,
num_neg_samples
=
10
,
neg_distribution
=
None
,
name
=
None
,
bias_attr
=
None
,
layer_attr
=
None
):
"""
Noise-contrastive estimation.
Implements the method in the following paper:
A fast and simple algorithm for training neural probabilistic language models.
The example usage is:
.. code-block:: python
cost = nce_layer(input=layer1, label=layer2, weight=layer3,
num_classes=3, neg_distribution=[0.1,0.3,0.6])
:param name: layer name
:type name: basestring
:param input: input layers. It could be a LayerOutput of list/tuple of LayerOutput.
:type input: LayerOutput|list|tuple|collections.Sequence
:param label: label layer
:type label: LayerOutput
:param weight: weight layer, can be None(default)
:type weight: LayerOutput
:param num_classes: number of classes.
:type num_classes: int
:param num_neg_samples: number of negative samples. Default is 10.
:type num_neg_samples: int
:param neg_distribution: The distribution for generating the random negative labels.
A uniform distribution will be used if not provided.
If not None, its length must be equal to num_classes.
:type neg_distribution: list|tuple|collections.Sequence|None
:param bias_attr: Bias parameter attribute. True if no bias.
:type bias_attr: ParameterAttribute|None|False
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: layer name.
:rtype: LayerOutput
"""
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
assert
isinstance
(
input
,
collections
.
Sequence
)
assert
isinstance
(
label
,
LayerOutput
)
assert
label
.
layer_type
==
LayerType
.
DATA
if
neg_distribution
is
not
None
:
assert
isinstance
(
neg_distribution
,
collections
.
Sequence
)
assert
len
(
neg_distribution
)
==
num_classes
assert
sum
(
neg_distribution
)
==
1
ipts_for_layer
=
[]
parents
=
[]
for
each_input
in
input
:
assert
isinstance
(
each_input
,
LayerOutput
)
ipts_for_layer
.
append
(
each_input
.
name
)
parents
.
append
(
each_input
)
ipts_for_layer
.
append
(
label
.
name
)
parents
.
append
(
label
)
if
weight
is
not
None
:
assert
isinstance
(
weight
,
LayerOutput
)
assert
weight
.
layer_type
==
LayerType
.
DATA
ipts_for_layer
.
append
(
weight
.
name
)
parents
.
append
(
weight
)
Layer
(
name
=
name
,
type
=
LayerType
.
NCE_LAYER
,
num_classes
=
num_classes
,
neg_sampling_dist
=
neg_distribution
,
num_neg_samples
=
num_neg_samples
,
inputs
=
ipts_for_layer
,
bias
=
ParamAttr
.
to_bias
(
bias_attr
),
**
ExtraLayerAttribute
.
to_kwargs
(
layer_attr
)
)
return
LayerOutput
(
name
,
LayerType
.
NCE_LAYER
,
parents
=
parents
)
"""
following are cost Layers.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录