Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
c4f7f3a5
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c4f7f3a5
编写于
11月 04, 2017
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/paddle
into add-GRUOp-dev
上级
1d85b2bd
e745bcfc
变更
70
隐藏空白更改
内联
并排
Showing
70 changed file
with
1294 addition
and
484 deletion
+1294
-484
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+2
-1
paddle/framework/executor.cc
paddle/framework/executor.cc
+4
-1
paddle/framework/framework.proto
paddle/framework/framework.proto
+1
-0
paddle/framework/lod_rank_table.cc
paddle/framework/lod_rank_table.cc
+43
-0
paddle/framework/lod_rank_table.h
paddle/framework/lod_rank_table.h
+55
-0
paddle/framework/operator.h
paddle/framework/operator.h
+0
-2
paddle/framework/var_desc.h
paddle/framework/var_desc.h
+1
-0
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+2
-0
paddle/operators/accuracy_op.cc
paddle/operators/accuracy_op.cc
+13
-9
paddle/operators/batch_norm_op.cc
paddle/operators/batch_norm_op.cc
+4
-1
paddle/operators/concat_op.cc
paddle/operators/concat_op.cc
+17
-13
paddle/operators/cond_op.cc
paddle/operators/cond_op.cc
+6
-5
paddle/operators/conv2d_op.cc
paddle/operators/conv2d_op.cc
+18
-14
paddle/operators/conv2d_transpose_op.cc
paddle/operators/conv2d_transpose_op.cc
+11
-7
paddle/operators/conv_cudnn_op.cc
paddle/operators/conv_cudnn_op.cc
+1
-1
paddle/operators/conv_shift_op.cc
paddle/operators/conv_shift_op.cc
+5
-6
paddle/operators/cos_sim_op.cc
paddle/operators/cos_sim_op.cc
+7
-6
paddle/operators/crf_decoding_op.cc
paddle/operators/crf_decoding_op.cc
+136
-0
paddle/operators/crf_decoding_op.h
paddle/operators/crf_decoding_op.h
+127
-0
paddle/operators/crop_op.cc
paddle/operators/crop_op.cc
+22
-21
paddle/operators/cross_entropy_op.cc
paddle/operators/cross_entropy_op.cc
+10
-8
paddle/operators/decayed_adagrad_op.cc
paddle/operators/decayed_adagrad_op.cc
+10
-3
paddle/operators/dropout_op.cc
paddle/operators/dropout_op.cc
+8
-6
paddle/operators/dynamic_recurrent_op.cc
paddle/operators/dynamic_recurrent_op.cc
+10
-4
paddle/operators/elementwise_add_op.cc
paddle/operators/elementwise_add_op.cc
+1
-1
paddle/operators/elementwise_div_op.cc
paddle/operators/elementwise_div_op.cc
+1
-1
paddle/operators/elementwise_mul_op.cc
paddle/operators/elementwise_mul_op.cc
+1
-1
paddle/operators/elementwise_op.h
paddle/operators/elementwise_op.h
+30
-25
paddle/operators/elementwise_sub_op.cc
paddle/operators/elementwise_sub_op.cc
+1
-1
paddle/operators/feed_op.cc
paddle/operators/feed_op.cc
+7
-2
paddle/operators/fetch_op.cc
paddle/operators/fetch_op.cc
+7
-2
paddle/operators/fill_constant_batch_size_like_op.cc
paddle/operators/fill_constant_batch_size_like_op.cc
+7
-2
paddle/operators/fill_constant_op.cc
paddle/operators/fill_constant_op.cc
+6
-1
paddle/operators/fill_zeros_like_op.cc
paddle/operators/fill_zeros_like_op.cc
+5
-3
paddle/operators/gather_op.cc
paddle/operators/gather_op.cc
+20
-3
paddle/operators/gaussian_random_op.cc
paddle/operators/gaussian_random_op.cc
+24
-10
paddle/operators/gru_unit_op.cc
paddle/operators/gru_unit_op.cc
+22
-17
paddle/operators/huber_loss_op.cc
paddle/operators/huber_loss_op.cc
+4
-2
paddle/operators/increment_op.cc
paddle/operators/increment_op.cc
+8
-4
paddle/operators/l1_norm_op.cc
paddle/operators/l1_norm_op.cc
+1
-1
paddle/operators/linear_chain_crf_op.cc
paddle/operators/linear_chain_crf_op.cc
+33
-32
paddle/operators/linear_chain_crf_op.h
paddle/operators/linear_chain_crf_op.h
+2
-2
paddle/operators/load_op.cc
paddle/operators/load_op.cc
+8
-4
paddle/operators/lod_rank_table_op.cc
paddle/operators/lod_rank_table_op.cc
+80
-0
paddle/operators/lookup_table_op.cc
paddle/operators/lookup_table_op.cc
+16
-10
paddle/operators/lrn_op.cc
paddle/operators/lrn_op.cc
+41
-43
paddle/operators/lstm_op.cc
paddle/operators/lstm_op.cc
+31
-34
paddle/operators/lstm_unit_op.cc
paddle/operators/lstm_unit_op.cc
+12
-7
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+1
-1
paddle/operators/save_op.cc
paddle/operators/save_op.cc
+10
-5
paddle/operators/scale_op.cc
paddle/operators/scale_op.cc
+8
-5
paddle/operators/sequence_concat_op.cc
paddle/operators/sequence_concat_op.cc
+35
-33
paddle/operators/sgd_op.cc
paddle/operators/sgd_op.cc
+8
-6
paddle/operators/sign_op.cc
paddle/operators/sign_op.cc
+3
-2
paddle/operators/split_op.cc
paddle/operators/split_op.cc
+24
-16
paddle/operators/squared_l2_distance_op.cc
paddle/operators/squared_l2_distance_op.cc
+16
-13
paddle/operators/squared_l2_norm_op.cc
paddle/operators/squared_l2_norm_op.cc
+2
-2
paddle/operators/sum_op.cc
paddle/operators/sum_op.cc
+7
-5
paddle/operators/top_k_op.cc
paddle/operators/top_k_op.cc
+12
-12
paddle/operators/uniform_random_op.cc
paddle/operators/uniform_random_op.cc
+21
-9
paddle/pybind/protobuf.cc
paddle/pybind/protobuf.cc
+2
-1
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+13
-0
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+1
-0
python/paddle/v2/framework/framework.py
python/paddle/v2/framework/framework.py
+4
-0
python/paddle/v2/framework/layer_helper.py
python/paddle/v2/framework/layer_helper.py
+4
-1
python/paddle/v2/framework/layers.py
python/paddle/v2/framework/layers.py
+40
-21
python/paddle/v2/framework/tests/test_crf_decoding_op.py
python/paddle/v2/framework/tests/test_crf_decoding_op.py
+146
-0
python/paddle/v2/framework/tests/test_image_classification_train.py
...dle/v2/framework/tests/test_image_classification_train.py
+23
-34
python/paddle/v2/framework/tests/test_lod_rank_table.py
python/paddle/v2/framework/tests/test_lod_rank_table.py
+29
-0
python/paddle/v2/framework/tests/test_recognize_digits_mlp.py
...on/paddle/v2/framework/tests/test_recognize_digits_mlp.py
+4
-2
未找到文件。
paddle/framework/CMakeLists.txt
浏览文件 @
c4f7f3a5
...
...
@@ -45,8 +45,9 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library
(
backward SRCS backward.cc DEPS net_op
)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op
)
cc_library
(
lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog
lod_rank_table
)
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
cc_test
(
prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context
)
...
...
paddle/framework/executor.cc
浏览文件 @
c4f7f3a5
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include <vector>
#include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/scope.h"
...
...
@@ -70,10 +71,12 @@ static void CreateTensor(Variable* var, VarDesc::VarType var_type) {
var
->
GetMutable
<
FeedFetchList
>
();
}
else
if
(
var_type
==
VarDesc
::
STEP_SCOPES
)
{
var
->
GetMutable
<
std
::
vector
<
framework
::
Scope
>>
();
}
else
if
(
var_type
==
VarDesc
::
LOD_RANK_TABLE
)
{
var
->
GetMutable
<
LoDRankTable
>
();
}
else
{
PADDLE_THROW
(
"Variable type %d is not in "
"[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST]"
,
"[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST
, LOD_RANK_TABLE
]"
,
var_type
);
}
}
...
...
paddle/framework/framework.proto
浏览文件 @
c4f7f3a5
...
...
@@ -116,6 +116,7 @@ message VarDesc {
FEED_MINIBATCH
=
3
;
FETCH_LIST
=
4
;
STEP_SCOPES
=
5
;
LOD_RANK_TABLE
=
6
;
}
required
string
name
=
1
;
required
VarType
type
=
2
;
...
...
paddle/framework/lod_rank_table.cc
0 → 100644
浏览文件 @
c4f7f3a5
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_rank_table.h"
namespace
paddle
{
namespace
framework
{
void
LoDRankTable
::
Reset
(
const
LoD
&
lod
,
size_t
level
)
{
this
->
coarse_lod_
.
clear
();
this
->
items_
.
clear
();
PADDLE_ENFORCE
(
level
<
lod
.
size
(),
"Cannot rank lod since the level %d is less than lod size %d"
,
level
,
lod
.
size
());
coarse_lod_
.
reserve
(
level
);
for
(
size_t
i
=
0
;
i
<
level
;
++
i
)
{
coarse_lod_
.
push_back
(
lod
[
i
]);
}
auto
&
vec
=
lod
[
level
];
for
(
size_t
i
=
0
;
i
<
vec
.
size
()
-
1
;
++
i
)
{
TableItem
item
;
item
.
index
=
i
;
item
.
length
=
vec
[
i
+
1
]
-
vec
[
i
];
items_
.
emplace_back
(
item
);
}
std
::
sort
(
items_
.
begin
(),
items_
.
end
(),
[](
const
TableItem
&
a
,
const
TableItem
&
b
)
{
return
a
.
length
>
b
.
length
;
});
}
}
// namespace framework
}
// namespace paddle
paddle/framework/lod_rank_table.h
0 → 100644
浏览文件 @
c4f7f3a5
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor.h"
namespace
paddle
{
namespace
framework
{
// LoD Rank Table stores the `level` of `lod` which is ordered by sequence
// length in descending order. It is useful when implement dynamic RNN and is
// shared by dynamic RNN memory, dynamic RNN slice input and dynamic RNN slice
// output operators.
//
// The table item contains two element. The length of sequence and the index of
// sequence in that level.
//
// LoDRankTable also stores the coarse_lod, which is the lod information whose
// level is less than input level, in order to restore the output LoD
// information.
class
LoDRankTable
{
public:
struct
TableItem
{
size_t
index
;
size_t
length
;
};
LoDRankTable
()
{}
void
Reset
(
const
LoD
&
lod
,
size_t
level
);
const
std
::
vector
<
TableItem
>&
items
()
const
{
return
this
->
items_
;
}
const
LoD
&
coarse_lod
()
const
{
return
this
->
coarse_lod_
;
}
size_t
level
()
const
{
return
coarse_lod_
.
size
();
}
private:
LoD
coarse_lod_
;
std
::
vector
<
TableItem
>
items_
;
};
}
// namespace framework
}
// namespace paddle
paddle/framework/operator.h
浏览文件 @
c4f7f3a5
...
...
@@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase {
// indicate kernel DataType by input data. Defaultly all input data must be
// same.
virtual
DataType
IndicateDataType
(
const
ExecutionContext
&
ctx
)
const
{
VLOG
(
3
)
<<
"Default IndicateDataType "
<<
this
->
Type
();
auto
&
scope
=
ctx
.
scope
();
int
data_type
=
-
1
;
for
(
auto
&
input
:
this
->
inputs_
)
{
...
...
@@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase {
}
if
(
t
!=
nullptr
)
{
int
tmp
=
static_cast
<
int
>
(
ToDataType
(
t
->
type
()));
VLOG
(
3
)
<<
"Input "
<<
ipt_name
<<
" with data_type "
<<
tmp
;
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
"DataType of Paddle Op %s must be the same."
,
Type
());
...
...
paddle/framework/var_desc.h
浏览文件 @
c4f7f3a5
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <vector>
#include "glog/logging.h"
#include "paddle/framework/framework.pb.h"
namespace
paddle
{
...
...
paddle/operators/CMakeLists.txt
浏览文件 @
c4f7f3a5
...
...
@@ -141,6 +141,7 @@ set(DEPS_OPS
pool_with_index_op
nccl_op
sequence_conv_op
lod_rank_table_op
lstm_op
gru_op
)
...
...
@@ -150,6 +151,7 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library
(
sum_op DEPS net_op selected_rows_functor
)
op_library
(
pool_op DEPS pooling
)
op_library
(
pool_with_index_op DEPS pooling
)
op_library
(
lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table
)
if
(
WITH_GPU
)
op_library
(
nccl_op DEPS nccl_common
)
endif
()
...
...
paddle/operators/accuracy_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -33,7 +33,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
auto
inference_dim
=
ctx
->
GetInputDim
(
"Out"
);
auto
label_dim
=
ctx
->
GetInputDim
(
"Label"
);
// Assume indices has same shape
with inferne
ce, because
// Assume indices has same shape
as inferen
ce, because
// it's the output of topk.
PADDLE_ENFORCE_EQ
(
label_dim
.
size
(),
2
,
"label's rank must be 2."
);
...
...
@@ -60,20 +60,24 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
// TODO(typhoonzero): support both inference value and indices.
AddInput
(
"Out"
,
"
topk (inferences) the network output
"
);
AddInput
(
"Indices"
,
"
topk (indices) the network output
"
);
AddInput
(
"Out"
,
"
The network output of topk (inferences)
"
);
AddInput
(
"Indices"
,
"
The the network output of topk (indices)
"
);
AddInput
(
"Label"
,
"Label of the training data"
);
// TODO(typhoonzero): AddInput("Weight", ...
AddOutput
(
"Accuracy"
,
"The accuracy of current batch"
);
AddComment
(
R"DOC(
Accuracy. It will print accuracy rate for classification.
The accuracy is:
.. math::
accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})
Accuracy Operator.
It will print accuracy rate for classification.
The accuracy is calculated as follows:
$$accuracy = \frac{NumOfCorrectPredicts}{NumOfAllSamples}$$
Both the input Out and Label can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD information
with the input Out(Inference).
Both the input `Out` and `Label` can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD with input `Inference`.
)DOC"
);
}
};
...
...
paddle/operators/batch_norm_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"SavedMean"
),
""
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"SavedVariance"
),
""
);
const
float
epsilon
=
ctx
->
Attrs
().
Get
<
float
>
(
"epsilon"
);
PADDLE_ENFORCE_GE
(
epsilon
,
0.0
,
"epsilon should be larger than 0"
);
PADDLE_ENFORCE_LE
(
epsilon
,
0.001
,
"epsilon should not be too large"
);
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
PADDLE_ENFORCE_EQ
(
ctx
->
Inputs
(
"Mean"
)[
0
],
ctx
->
Outputs
(
"MeanOut"
)[
0
],
"Mean and MeanOut should share the same memory"
);
...
...
@@ -297,7 +301,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
VLOG
(
3
)
<<
"IndicateDataType "
<<
this
->
Type
();
const
auto
*
var
=
ctx
.
InputVar
(
framework
::
GradVarName
(
"Y"
));
if
(
var
==
nullptr
)
{
PADDLE_THROW
(
"can't find Y@GRAD"
);
...
...
paddle/operators/concat_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -56,20 +56,24 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ConcatOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"the input tensors of concat operator."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"the output tensor of concat operator."
);
AddComment
(
R"DOC(
Join the input tensors along with the axis.
Examples:
Input[0] = [[1,2],[3,4]]
Input[1] = [[5,6]]
axis = 0
Output = [[1,2],
[3,4],
[5,6]]
)DOC"
);
AddAttr
<
int
>
(
"axis"
,
"The axis which the inputs will be joined with."
)
AddInput
(
"X"
,
"Input tensors of concat operator."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"Output tensor of concat operator."
);
AddAttr
<
int
>
(
"axis"
,
"The axis along which the input tensors will be concatenated."
)
.
SetDefault
(
0
);
AddComment
(
R"DOC(
Concat Operator.
Concatenate the input tensors along dimension axis.
Examples:
Input[0] = [[1,2],[3,4]]
Input[1] = [[5,6]]
axis = 0
Output = [[1,2],
[3,4],
[5,6]]
)DOC"
);
}
};
...
...
paddle/operators/cond_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -216,11 +216,12 @@ class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"IndexTensors"
,
"Index Tensors contains indices for true/false"
);
AddComment
(
R"DOC(
Sample dependent Cond Operator:
Given Cond[i] as a 1/0 vector to indicate true/false
The equation is:
Out[i] = subnet_t[i], if Cond[i] == true
Out[i] = subnet_t[i], if Cond[i] == false
Sample Dependent Conditional Operator.
Given Cond[i] as a 1/0 vector to indicate true/false:
Out[i] = subnet_true[i], if Cond[i] == true
Out[i] = subnet_false[i], if Cond[i] == false
)DOC"
);
}
};
...
...
paddle/operators/conv2d_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -56,17 +56,18 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
AddInput
(
"Input"
,
"The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image."
);
"The format of input tensor is NCHW, where N is batch size, C is the "
"number of channels, H is the height of the image, "
"and W is the width of the image."
);
AddInput
(
"Filter"
,
"The filter tensor of convolution operator."
"The filter tensor of convolution operator.
"
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H
and W is height and width of
filter. "
"If the groups attribute is greater than 1, C equal the number of "
"H
is the height of the filter, and W is the width of the
filter. "
"If the groups attribute is greater than 1, C equal
s
the number of "
"input image channels divided by the groups."
);
AddOutput
(
"Output"
,
"The output tensor of convolution operator."
"The output tensor of convolution operator.
"
"The format of output tensor is also NCHW."
);
AddAttr
<
std
::
vector
<
int
>>
(
"strides"
,
"strides of convolution operator."
)
.
SetDefault
({
1
,
1
});
...
...
@@ -74,16 +75,19 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
.
SetDefault
({
0
,
0
});
AddAttr
<
int
>
(
"groups"
,
"
g
roup size of convolution operator. "
"
Refer to grouped convolution in Alex Krizhevsky's
paper: "
"when group=2, the first half of the filters
are
only connected to the "
"first half of the input channels,
and the second half only connected
"
"
to the second half
."
)
"
G
roup size of convolution operator. "
"
According to grouped convolution in Alex Krizhevsky's Deep CNN
paper: "
"when group=2, the first half of the filters
is
only connected to the "
"first half of the input channels,
while the second half of the filters
"
"
is only connected to the second half of the input channels
."
)
.
SetDefault
(
1
);
AddComment
(
R"DOC(
The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Convolution Operator.
The convolution operation calculates the output based on the input, filter,
strides, paddings, and groups parameters. The size of each dimension of the
parameters is checked in the infer-shape method.
)DOC"
);
}
...
...
paddle/operators/conv2d_transpose_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -54,15 +54,16 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
AddInput
(
"Input"
,
"(Tensor) The input tensor of convolution transpose operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of input channels, H and W is the height and width of image."
);
"The format of input tensor is NCHW, where N is batch size, C is the "
"number of input channels, H is the height of the image, and "
"W is the width of the image."
);
AddInput
(
"Filter"
,
"(Tensor) The filter tensor of convolution transpose operator."
"The format of the filter tensor is CMHW, where C is the number of "
"output image channels, M is the number of input image channels, "
"H
and W is height and width of
filter. "
"H
is the height of the filter, and W is the width of the
filter. "
"We enforce groups number == 1 and padding == 0 in "
"
convolution transpose S
cenario."
);
"
the convolution transpose s
cenario."
);
AddOutput
(
"Output"
,
"(Tensor) The output tensor of convolution transpose operator."
"The format of output tensor is also NCHW."
);
...
...
@@ -73,9 +74,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
"paddings of convolution transpose operator."
)
.
SetDefault
({
0
,
0
});
AddComment
(
R"DOC(
The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Convolution Transpose Operator.
The convolution transpose operation calculates the output based on the input,
filter, strides, paddings, and groups parameters. The size of each dimension
of the parameters is checked in the infer-shape method.
)DOC"
);
}
...
...
paddle/operators/conv_cudnn_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -29,7 +29,7 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardwar
d. This size should be carefully setted
."
)
"better hardwar
e. This size should be chosen carefully
."
)
.
SetDefault
(
4096
);
}
};
...
...
paddle/operators/conv_shift_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -96,14 +96,13 @@ as used in the Neural Turing Machine: https://arxiv.org/abs/1410.5401
The equation is:
\f[
Out[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} X_{i+j} * Y_{j}
\f]
$$Out[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} X_{i+j} * Y_{j}$$
where X's index is computed modulo M, and b's index is computed modulo N.
where X's index is computed modulo M, and Y's index is computed modulo N.
Both inputs X and Y can carry LoD (Level of Details) information.
However, the output only shares the LoD information with input X.
Both of the input `X` and `Y` can carry LoD (Level of Details) information.
However, the output only shares the LoD information with input `X`.
)DOC"
);
}
};
...
...
paddle/operators/cos_sim_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -79,15 +79,16 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
Cosine Similarity Operator.
The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)).
$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$
The input
`X` and `Y`
must have the same shape, except that the 1st dimension
of input
`Y` could be just 1 (different from input `X`
), which will be
broadcasted to match the shape of input
`X`
before computing their cosine
The input
X and Y
must have the same shape, except that the 1st dimension
of input
Y could be just 1 (different from input X
), which will be
broadcasted to match the shape of input
X
before computing their cosine
similarity.
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input X.
)DOC"
);
}
};
...
...
paddle/operators/crf_decoding_op.cc
0 → 100644
浏览文件 @
c4f7f3a5
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/crf_decoding_op.h"
namespace
paddle
{
namespace
operators
{
class
CRFDecodingOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
CRFDecodingOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Emission"
,
"(LoDTensor, default: LoDTensor<float>). A LoDTensor with shape "
"[N x D] where N is the size of the mini-batch and D is the total "
"tag number. This input is the unscaled emission weight matrix of "
"the linear_chain_crf operator."
);
AddInput
(
"Transition"
,
"(Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. "
"This input is the transition weights learned by the linear_chain_crf "
"operator, denoted as w. The 1st row of w are transition weights for "
"the start mask. The 2nd row of w are transition weights for the end "
"mask. Transition weights between other tags begin from the 3rd row of "
"w. See more details in comments of the linear_chain_crf operator."
);
AddInput
(
"Label"
,
"(LoDTensor, LoDTensor<int>). The ground truth with shape "
"[N x 1]. This input is optional. See more details in the operator's "
"comments."
)
.
AsDispensable
();
AddOutput
(
"ViterbiPath"
,
"(LoDTensor, LoDTensor<int>). The decoding results. What to "
"return changes depending on whether the Input(Label) (the groud "
"truth) is given. See more details in the operator's comment."
);
AddComment
(
R"DOC(
The crf_decoding operator reads the emission feature weights and the transition
freature weights learned by the linear_chain_crf operator. It implements the
Viterbi algorithm which is a dynamic programming algorithm for finding the most
likely sequence of hidden states, called the Viterbi path, that results in a
sequence of observed tags.
The output of this operator changes according to whether Input(Label) is given:
1. Input(Label) is given:
This happens in training. This operator is used to co-work with the chunk_eval
operator.
When Input(Label) is given, the crf_decoding operator returns a row vector
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect
prediction, or 1 indicating a tag is correctly predicted. Such an ouput is the
input to chunk_eval operator.
2. Input(Label) is not given:
This is the standard decoding process.
The crf_decoding operator returns a row vecotr with shape [N x 1] whose values
range from 0 to maximum tag number - 1. Each element indicates an index of a
predicted tag.
)DOC"
);
}
};
class
CRFDecodingOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Emission"
),
"Input(Emission) should be not null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Transition"
),
"Input(Transition) should be not null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ViterbiPath"
),
"Output(ViterbiPath) should be not null."
);
auto
emission_dims
=
ctx
->
GetInputDim
(
"Emission"
);
PADDLE_ENFORCE_EQ
(
emission_dims
.
size
(),
2UL
,
"The Input(Emission) should be a 2-D tensor."
);
PADDLE_ENFORCE
(
emission_dims
[
0
],
"An empty mini-batch is not allowed."
);
auto
transition_dims
=
ctx
->
GetInputDim
(
"Transition"
);
PADDLE_ENFORCE_EQ
(
transition_dims
.
size
(),
2UL
,
"The Input(Transition) should be a 2-D tensor."
);
PADDLE_ENFORCE_EQ
(
transition_dims
[
0
]
-
2
,
transition_dims
[
1
],
"An invalid dimension for the Input(Transition), which should "
"be a 2-D tensor with shape [(D + 2) x D]."
);
PADDLE_ENFORCE_EQ
(
emission_dims
[
1
],
transition_dims
[
1
],
"The 2nd dimension of the Input(Emission) and the Input(Transition) "
"should be equal to the tag number."
);
if
(
ctx
->
HasInput
(
"Label"
))
{
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
PADDLE_ENFORCE
(
label_dims
.
size
()
==
2UL
&&
label_dims
[
1
]
==
1UL
,
"The Input(Label) should be a 2-D tensor with the 2nd "
"dimensions fixed to 1."
);
PADDLE_ENFORCE_EQ
(
emission_dims
[
0
],
label_dims
[
0
],
"The height of Input(Emission) and the height of Input(Label) "
"should be the same."
);
}
ctx
->
ShareLoD
(
"Emission"
,
/*->*/
"ViterbiPath"
);
ctx
->
SetOutputDim
(
"ViterbiPath"
,
{
emission_dims
[
0
],
1
});
}
protected:
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
ToDataType
(
ctx
.
Input
<
LoDTensor
>
(
"Emission"
)
->
type
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
crf_decoding
,
ops
::
CRFDecodingOp
,
ops
::
CRFDecodingOpMaker
);
REGISTER_OP_CPU_KERNEL
(
crf_decoding
,
ops
::
CRFDecodingOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
CRFDecodingOpKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
paddle/operators/crf_decoding_op.h
0 → 100644
浏览文件 @
c4f7f3a5
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
LoDTensor
;
using
framework
::
LoD
;
using
framework
::
Tensor
;
template
<
typename
Place
,
typename
T
>
class
CRFDecodingOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"The crf_decoding operator can only run on CPU."
);
auto
*
emission_weights
=
ctx
.
Input
<
LoDTensor
>
(
"Emission"
);
auto
*
transition_weights
=
ctx
.
Input
<
Tensor
>
(
"Transition"
);
auto
*
label
=
ctx
.
Input
<
LoDTensor
>
(
"Label"
);
auto
*
decoded_path
=
ctx
.
Output
<
Tensor
>
(
"ViterbiPath"
);
PADDLE_ENFORCE_EQ
(
emission_weights
->
NumLevels
(),
1UL
,
"The Input(Emission) should be a sequence."
);
auto
lod
=
emission_weights
->
lod
();
PADDLE_ENFORCE
(
lod
.
size
(),
"Input(Emission) must be a sequence."
);
const
size_t
level
=
0
;
const
size_t
seq_num
=
lod
[
level
].
size
()
-
1
;
int
*
path
=
decoded_path
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
math
::
SetConstant
<
platform
::
CPUPlace
,
int
>
()(
ctx
.
device_context
(),
decoded_path
,
0
);
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
int
start_pos
=
static_cast
<
int
>
(
lod
[
level
][
i
]);
int
end_pos
=
static_cast
<
int
>
(
lod
[
level
][
i
+
1
]);
Tensor
decoded_path_one_seq
=
decoded_path
->
Slice
(
start_pos
,
end_pos
);
Decode
(
emission_weights
->
Slice
(
start_pos
,
end_pos
),
*
transition_weights
,
&
decoded_path_one_seq
);
}
if
(
label
)
{
PADDLE_ENFORCE_EQ
(
label
->
NumLevels
(),
1UL
,
"The Input(Label) should be a sequence."
);
const
int
*
label_value
=
label
->
data
<
int
>
();
size_t
batch_size
=
emission_weights
->
dims
()[
0
];
for
(
size_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
path
[
i
]
=
label_value
[
i
]
==
path
[
i
]
?
1
:
0
;
}
}
}
private:
void
Decode
(
const
Tensor
&
emission_weights
,
const
Tensor
&
transition_weights
,
Tensor
*
decoded_path
)
const
{
auto
emission_dims
=
emission_weights
.
dims
();
const
size_t
seq_len
=
emission_dims
[
0
];
const
size_t
tag_num
=
emission_dims
[
1
];
const
size_t
state_trans_base_idx
=
2
;
const
T
*
x
=
emission_weights
.
data
<
T
>
();
const
T
*
w
=
transition_weights
.
data
<
T
>
();
int
*
path
=
decoded_path
->
data
<
int
>
();
// alpha is a memo table. An element alpha(k, v) records the score of the
// best sequence of tags from position 1 to position k with v being the end
// tag.
Tensor
alpha
;
T
*
alpha_value
=
alpha
.
mutable_data
<
T
>
(
emission_dims
,
platform
::
CPUPlace
());
Tensor
track
;
int
*
track_value
=
track
.
mutable_data
<
int
>
(
emission_dims
,
platform
::
CPUPlace
());
for
(
size_t
i
=
0
;
i
<
tag_num
;
++
i
)
alpha_value
[
i
]
=
w
[
i
]
+
x
[
i
];
for
(
size_t
k
=
1
;
k
<
seq_len
;
++
k
)
{
for
(
size_t
i
=
0
;
i
<
tag_num
;
++
i
)
{
T
max_score
=
-
std
::
numeric_limits
<
T
>::
max
();
int
max_j
=
0
;
for
(
size_t
j
=
0
;
j
<
tag_num
;
++
j
)
{
T
score
=
alpha_value
[(
k
-
1
)
*
tag_num
+
j
]
+
w
[(
j
+
state_trans_base_idx
)
*
tag_num
+
i
];
if
(
score
>
max_score
)
{
max_score
=
score
;
max_j
=
j
;
}
}
alpha_value
[
k
*
tag_num
+
i
]
=
max_score
+
x
[
k
*
tag_num
+
i
];
track_value
[
k
*
tag_num
+
i
]
=
max_j
;
}
}
T
max_score
=
-
std
::
numeric_limits
<
T
>::
max
();
int
max_i
=
0
;
for
(
size_t
i
=
0
;
i
<
tag_num
;
++
i
)
{
T
score
=
alpha_value
[(
seq_len
-
1
)
*
tag_num
+
i
]
+
w
[
tag_num
+
i
];
if
(
score
>
max_score
)
{
max_score
=
score
;
max_i
=
i
;
}
}
path
[
seq_len
-
1
]
=
max_i
;
for
(
int
k
=
seq_len
-
1
;
k
>=
1
;
--
k
)
{
path
[
k
-
1
]
=
max_i
=
track_value
[
k
*
tag_num
+
max_i
];
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/operators/crop_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -56,34 +56,35 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input of pad op. "
"The input should be a k-D tensor(k > 0 and k < 7)"
);
"The input should be a k-D tensor(k > 0 and k < 7)
.
"
);
AddInput
(
"Y"
,
"The input used as reference for cropping"
"
with the same dimension as X.
"
)
"The input used as reference for cropping
,
"
"
which is of the same dimensions as X.
"
)
.
AsDispensable
();
AddOutput
(
"Out"
,
"The output of crop op "
"w
ith the same dimension
as X."
);
"The output of crop op
,
"
"w
hich is of the same dimensions
as X."
);
AddAttr
<
std
::
vector
<
int
>>
(
"offsets"
,
"A list<int> describing offsets to be cropped."
"The size of offsets list should be
as
same as "
"
dimension size of
input X."
);
"A list<int> describing offsets to be cropped.
"
"The size of offsets list should be
the
same as "
"
the dimension size of
input X."
);
AddAttr
<
std
::
vector
<
int
>>
(
"shape"
,
"A list<int> describing the shape of output."
"The size of shape list should be
as
same as "
"
dimension size of
input X."
)
"A list<int> describing the shape of output.
"
"The size of shape list should be
the
same as "
"
the dimension size of
input X."
)
.
SetDefault
(
std
::
vector
<
int
>
());
AddComment
(
R"DOC(
Crop Operator.
Crop input into output, as specified by offsets and shape.
There are two ways to set shape:
1. referenc
input: crop input X as
shape as reference input.
1. referenc
e input: crop input X into the same
shape as reference input.
The dimension of reference input should
be
as same as
input X.
2. shape list: crop input X
by
shape described by a list<int>.
The size of shape list should be
as
same as
dimension size of
input X.
be
the same as the dimension of
input X.
2. shape list: crop input X
into the
shape described by a list<int>.
The size of shape list should be
the
same as
the dimension size of
input X.
The input should be a k-D tensor(k > 0 and k < 7). As an example:
...
...
@@ -91,20 +92,20 @@ Given:
X = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]]
[0, 0, 0, 0, 0]]
,
and
offsets = [0, 1]
offsets = [0, 1]
,
and
shape = [2, 2]
shape = [2, 2]
,
then we get
we get:
Out = [[1, 2],
[3, 4]]
[3, 4]]
.
)DOC"
);
}
...
...
paddle/operators/cross_entropy_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -49,7 +49,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
}
protected:
// Explicitly set that
data type of the output of the cross_entropy operator
// Explicitly set that
the data type of computation kernel of cross_entropy
// is determined by its input "X".
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
...
...
@@ -96,7 +96,8 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
}
protected:
// CrossEntropy's data type just determined by "X"
// Explicitly set that the data type of computation kernel of cross_entropy
// is determined by its input "X".
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
());
...
...
@@ -117,9 +118,9 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
"Label"
,
"(Tensor, default Tensor<int>), the ground truth which is "
"a 2-D tensor. "
"When soft_label is set to false,
`Label`
is a Tensor<int> with shape "
"When soft_label is set to false,
Label
is a Tensor<int> with shape "
"[N x 1]. "
"When soft_label is set to true,
`Label`
is a Tensor<float/double> "
"When soft_label is set to true,
Label
is a Tensor<float/double> "
"with shape [N x K]."
);
AddOutput
(
"Y"
,
"(Tensor, default Tensor<float>), a 2-D tensor "
...
...
@@ -137,13 +138,13 @@ computation.
1) One-hot cross-entropy:
soft_label = false, Label[i, 0] indicates the class index for sample i:
Y[i] = -log(X[i, Label[i]])
$Y[i] = -\log(X[i, Label[i]])$
2) Soft-label cross-entropy:
soft_label = true, Label[i, j] indicates the soft label of class j
for sample i:
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
$Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}$
Please make sure that in this case the summuation of each row of Label
equals one.
...
...
@@ -153,8 +154,9 @@ computation.
non-zero element (equals 1), soft-label cross-entropy degenerates to a
one-hot cross-entropy with one-hot label representation.
Both the input `X` and `Label` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
Both the input X and Label can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input X.
)DOC"
);
}
};
...
...
paddle/operators/decayed_adagrad_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -75,11 +75,18 @@ class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
"Constant for numerical stability"
)
.
SetDefault
(
1.0e-6
f
);
AddComment
(
R"DOC(
Decayed Adagrad Optimizer.
Decayed Adagrad
The update is done as follows:
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
$$
moment\_out = decay * moment + (1 - decay) * grad * grad \\
param\_out = param - \frac{learning\_rate * grad}{\sqrt{moment\_out} + epsilon}
$$
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have an epsilon attribute. It is added here for numerical
stability to avoid the division by zero error.
)DOC"
);
}
...
...
paddle/operators/dropout_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -43,22 +43,24 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
DropoutOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddAttr
<
float
>
(
"dropout_prob"
,
"Probability of setting units to zero."
)
.
SetDefault
(
.5
f
);
AddAttr
<
bool
>
(
"is_training"
,
"Whether in training phase."
).
SetDefault
(
true
);
AddAttr
<
int
>
(
"seed"
,
"Dropout random seed."
).
SetDefault
(
0
);
AddInput
(
"X"
,
"The input of dropout op."
);
AddOutput
(
"Out"
,
"The output of dropout op."
);
AddOutput
(
"Mask"
,
"The random sampled dropout mask."
).
AsIntermediate
();
AddAttr
<
float
>
(
"dropout_prob"
,
"Probability of setting units to zero."
)
.
SetDefault
(
.5
f
);
AddAttr
<
bool
>
(
"is_training"
,
"True if in training phase."
).
SetDefault
(
true
);
AddAttr
<
int
>
(
"seed"
,
"Dropout random seed."
).
SetDefault
(
0
);
AddComment
(
R"DOC(
Dropout Operator.
'Dropout'
refers to randomly dropping out units in a nerual network. It is a
Dropout
refers to randomly dropping out units in a nerual network. It is a
regularization technique for reducing overfitting by preventing neuron
co-adaption during training. The dropout operator randomly set (according to
the given dropout probability) the outputs of some units to zero, while others
being set to their inputs.
are set equal to their corresponding inputs.
)DOC"
);
}
};
...
...
paddle/operators/dynamic_recurrent_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -386,12 +386,13 @@ class DynamicRecurrentOpProtoAndCheckerMaker
RNNAlgorithm
::
kArgNames
[
RNNAlgorithm
::
ComputeMode
::
kForward
];
// inputs and outputs stored in proto
AddInput
(
name
.
inlinks
,
"
t
he inputs that need to be segmented for each step."
)
"
T
he inputs that need to be segmented for each step."
)
.
AsDuplicable
();
AddInput
(
name
.
initial_states
,
"
variables to initializ
e states."
)
AddInput
(
name
.
initial_states
,
"
Variables to initialize th
e states."
)
.
AsDuplicable
();
AddOutput
(
name
.
outlinks
,
"the outputs that need to concated for all steps."
)
AddOutput
(
name
.
outlinks
,
"The outputs that need to be concatenated for all steps."
)
.
AsDuplicable
();
AddOutput
(
name
.
step_scopes
,
"step scopes"
);
...
...
@@ -399,7 +400,12 @@ class DynamicRecurrentOpProtoAndCheckerMaker
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
ex_states
,
"names of ex_states"
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
name
.
states
,
"names of states"
);
AddComment
(
"This is a RNN operator for varience-length sequences."
);
AddComment
(
R"DOC(
Dynamic Recurrent Operator.
This is a RNN operator for varience-length sequences.
)DOC"
);
}
};
...
...
paddle/operators/elementwise_add_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -22,7 +22,7 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
ElementwiseAddOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
ElementwiseOpMaker
(
proto
,
op_checker
)
{
SetComment
(
"
add"
,
"Out = X + Y
"
);
SetComment
(
"
Add"
,
"$Out = X + Y$
"
);
AddComment
(
comment_
);
}
};
...
...
paddle/operators/elementwise_div_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -22,7 +22,7 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
ElementwiseDivOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
ElementwiseOpMaker
(
proto
,
op_checker
)
{
SetComment
(
"Div"
,
"
Out = X / Y
"
);
SetComment
(
"Div"
,
"
$Out = X / Y$
"
);
AddComment
(
comment_
);
}
};
...
...
paddle/operators/elementwise_mul_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -23,7 +23,7 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
ElementwiseMulOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
ElementwiseOpMaker
(
proto
,
op_checker
)
{
SetComment
(
"Mul"
,
"
Out = X ⊙ Y
"
);
SetComment
(
"Mul"
,
"
$Out = X
\\
odot
\\
Y$
"
);
AddComment
(
comment_
);
}
};
...
...
paddle/operators/elementwise_op.h
浏览文件 @
c4f7f3a5
...
...
@@ -46,37 +46,42 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
ElementwiseOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
R"DOC(
The first input of elementwise op, it's a tensor of any dimensions.
)DOC"
);
AddInput
(
"Y"
,
R"DOC(
The sencond input of elementwise op, it's a tensor and it's dimensions
must be small or equal to X's dimensions.
)DOC"
);
AddInput
(
"X"
,
"(Tensor) The first input tensor of elementwise op"
);
AddInput
(
"Y"
,
"(Tensor) The second input tensor of elementwise op"
);
AddOutput
(
"Out"
,
"The output of elementwise op"
);
AddAttr
<
int
>
(
"axis"
,
R"DOC(
When the shape(Y) does not equal the shape(X),Y will be broadcasted
to match the shape of X and axis should be dimension index Y in X
)DOC"
)
"(int, default -1) The starting dimension index "
"for broadcasting Y onto X"
)
.
SetDefault
(
-
1
)
.
EqualGreaterThan
(
-
1
);
AddOutput
(
"Out"
,
"The output of elementwise op"
);
comment_
=
R"DOC(
Limited elementwise {name} operator.The equation is: Out = {equation}.
1. The shape of Y should be same with X or
2. Y's shape is a subset of X.
Y will be broadcasted to match the shape of X and axis should be dimension index Y in X.
example:
shape(X) = (2, 3, 4, 5), shape(Y) = (,)
shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5)
shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
Limited Elementwise {name} Operator.
The equation is:
{equation}
X is a tensor of any dimension and the dimensions of tensor Y must be smaller than
or equal to the dimensions of X.
There are two cases for this operator:
1. The shape of Y is same with X;
2. The shape of Y is a subset of X.
For case 2:
Y will be broadcasted to match the shape of X and axis should be
the starting dimension index for broadcasting Y onto X.
example:
shape(X) = (2, 3, 4, 5), shape(Y) = (,)
shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5)
shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input X.
or not. But the output only shares the LoD information with input X.
)DOC"
;
AddComment
(
comment_
);
}
...
...
paddle/operators/elementwise_sub_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -22,7 +22,7 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
ElementwiseSubOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
ElementwiseOpMaker
(
proto
,
op_checker
)
{
SetComment
(
"Sub"
,
"
Out = X - Y
"
);
SetComment
(
"Sub"
,
"
$Out = X - Y$
"
);
AddComment
(
comment_
);
}
};
...
...
paddle/operators/feed_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -59,8 +59,13 @@ class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input of feed op"
);
AddOutput
(
"Out"
,
"The output of feed op"
);
AddComment
(
"feed op, it should not be configured by users directly"
);
AddAttr
<
int
>
(
"col"
,
"column of feed"
);
AddAttr
<
int
>
(
"col"
,
"(int) The column of feed"
);
AddComment
(
R"DOC(
Feed Operator.
It should not be configured by users directly.
)DOC"
);
}
};
...
...
paddle/operators/fetch_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -66,8 +66,13 @@ class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input of fetch op"
);
AddOutput
(
"Out"
,
"The output of fetch op"
);
AddComment
(
"fetch op, it should not be configured by users directly"
);
AddAttr
<
int
>
(
"col"
,
"column of fetch"
);
AddAttr
<
int
>
(
"col"
,
"(int) The column of fetch"
);
AddComment
(
R"DOC(
Fetch Operator.
It should not be configured by users directly.
)DOC"
);
}
};
}
// namespace operators
...
...
paddle/operators/fill_constant_batch_size_like_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -70,11 +70,16 @@ class FillConstantBatchSizeLikeOpMaker
"with the specified value"
);
AddAttr
<
std
::
vector
<
int
>>
(
"shape"
,
"(vector<int>) The shape of the output"
);
AddAttr
<
int
>
(
"dim_idx"
,
"(int, default 0)
t
he index of batch size dimension"
)
"(int, default 0)
T
he index of batch size dimension"
)
.
SetDefault
(
0
);
AddAttr
<
float
>
(
"value"
,
"(float, default 0) The value to be filled"
)
.
SetDefault
(
0.0
f
);
AddComment
(
R"DOC(Fill up a variable with specified constant value.)DOC"
);
AddComment
(
R"DOC(
FillConstantBatchSizeLike Operator.
Fill up a variable with specified constant value.
)DOC"
);
}
};
}
// namespace operators
...
...
paddle/operators/fill_constant_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -54,7 +54,12 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Out"
,
"(Tensor) Tensor of specified shape will be filled "
"with the specified value"
);
AddComment
(
R"DOC(Fill up a variable with specified constant value.)DOC"
);
AddComment
(
R"DOC(
FillConstantBatchSizeLike Operator.
Fill up a variable with specified constant value.
)DOC"
);
}
};
}
// namespace operators
...
...
paddle/operators/fill_zeros_like_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -37,11 +37,13 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
framework
::
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input of fill-zeros-like op."
);
AddOutput
(
"Y"
,
"The vari
ba
le will be filled up with zeros."
);
AddOutput
(
"Y"
,
"The vari
ab
le will be filled up with zeros."
);
AddComment
(
R"DOC(
Fill up a vriable with zeros.
FillZerosLike Operator.
Fill up a variable with zeros.
The output will have the same size as the input.
The output will have the same size with input.
)DOC"
);
}
};
...
...
paddle/operators/gather_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The source input of gather op"
);
AddInput
(
"Index"
,
"The index input of gather op"
);
AddOutput
(
"Out"
,
"The output of
add
op"
);
AddOutput
(
"Out"
,
"The output of
gather
op"
);
AddComment
(
R"DOC(
Gather Operator by selecting from the first axis,
Gather Operator.
$Out = X[Index]$
Out is obtained by gathering entries of the outer-most dimension
of X indexed by Index and concatenate them together.
Example:
X = [[1, 2],
[3, 4],
[5, 6]]
Index = [[1, 2]]
Then:
Out = [[3, 4],
[5, 6]]
Out = X[Index]
)DOC"
);
}
};
...
...
paddle/operators/gaussian_random_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
GaussianRandomOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddOutput
(
"Out"
,
"output matrix of random op"
);
AddComment
(
R"DOC(
GaussianRandom operator.
Use to initialize tensor with gaussian random generator.
)DOC"
);
AddOutput
(
"Out"
,
"Output matrix of gaussian random op"
);
AddAttr
<
std
::
vector
<
int
>>
(
"shape"
,
"The dimension of random tensor."
);
AddAttr
<
float
>
(
"mean"
,
"mean of random tensor."
).
SetDefault
(
.0
f
);
AddAttr
<
float
>
(
"std"
,
"std of random tensor."
).
SetDefault
(
1.0
f
);
AddAttr
<
std
::
vector
<
int
>>
(
"shape"
,
"(vector<int>) "
"The dimension of random tensor."
);
AddAttr
<
float
>
(
"mean"
,
"(float, default 0.0) "
"mean of random tensor."
)
.
SetDefault
(
.0
f
);
AddAttr
<
float
>
(
"std"
,
"(float, default 1.0) "
"std of random tensor."
)
.
SetDefault
(
1.0
f
);
AddAttr
<
int
>
(
"seed"
,
"(int, default 0) "
"Random seed of generator."
"0 means use system wide seed"
)
"0 means use system wide seed
.
"
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"data_type"
,
"output data type"
)
AddAttr
<
int
>
(
"data_type"
,
"(int, default 5(FP32)) "
"Output data type."
)
.
SetDefault
(
framework
::
DataType
::
FP32
);
AddComment
(
R"DOC(
GaussianRandom Operator.
Used to initialize tensors with gaussian random generator.
)DOC"
);
}
};
...
...
paddle/operators/gru_unit_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"HiddenPrev"
,
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step."
);
AddInput
(
"Weight"
,
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]"
);
AddInput
(
"Bias"
,
"(Tensor) Bias vector with shape [1, frame_size * 3] concating "
"bias of the update gate, reset gate and output candidate."
)
AddInput
(
"Weight"
,
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]."
);
AddInput
(
"Bias"
,
"(Tensor) Bias vector with shape [1, frame_size * 3] concatenating "
"bias of the update gate, reset gate and output candidate."
)
.
AsDispensable
();
AddOutput
(
"Gate"
,
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate"
)
"output of update gate, reset gate and output candidate
.
"
)
.
AsIntermediate
();
AddOutput
(
"ResetHiddenPrev"
,
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
...
...
@@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
sigmoid
)
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
AddComment
(
R"DOC(
GRUUnit
Op implements part calculations of the GRU unit as following:
GRUUnit
Operator.
\f[
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev)
\f]
This operator implements partial calculations of the GRU unit as follows:
$$
update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\
output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev})
$$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
)DOC"
);
}
};
...
...
paddle/operators/huber_loss_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
"The shape is same as Input(X) and will be reused in backward."
)
.
AsIntermediate
();
AddOutput
(
"Out"
,
"The output tensor with shape [batch_size, 1]
which represents
"
"the huber loss."
);
"The output tensor with shape [batch_size, 1] "
"
which represents
the huber loss."
);
AddAttr
<
AttrType
>
(
"delta"
,
"Hyper parameter in huber loss."
);
AddComment
(
R"DOC(
HuberLoss Operator.
Huber loss is a loss function used in robust regression. We define X as the
input value and Y as the target value. Huber loss can evaluate the fitness of
X to Y. Different from MSE loss, Huber loss is more robust for outliers. The
...
...
paddle/operators/increment_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor) The input tensor of increment operator"
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of increment operator."
);
AddComment
(
R"DOC(Increment operator
The equation is: Out = X + step
)DOC"
);
AddAttr
<
AttrType
>
(
"step"
,
"(float, default 1.0) "
"The step size by which the "
"input tensor will be incremented."
)
.
SetDefault
(
1.0
);
AddComment
(
R"DOC(
Increment Operator.
The equation is:
$$Out = X + step$$
)DOC"
);
}
};
...
...
paddle/operators/l1_norm_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -57,7 +57,7 @@ L1 Norm Operator.
Computes the L1 norm of a tensor.
Out = sum (abs(X))
$$Out = \sum{|X|}$$
)DOC"
);
}
...
...
paddle/operators/linear_chain_crf_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -22,43 +22,44 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
LinearChainCRFOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Emission"
,
"(LoDTensor, default: LoDTensor<float>). "
"The unscaled emission weight matrix for the linear chain CRF. "
"This input is a LoDTensor with shape [N x D] where N is the size of "
"the mini-batch and D is the total tag number."
);
AddInput
(
"Transition"
,
"(Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. "
"The learnable parameter for the linear_chain_crf operator. "
"See more details in the operator's comments."
);
AddInput
(
"Label"
,
"(LoDTensor, default: LoDTensor<int>). The ground truth which is a 2-D "
"LoDTensor with shape [N x 1], where N is the total element number in "
"a mini-batch."
);
AddInput
(
"Emission"
,
"(LoDTensor, default: LoDTensor<float>). "
"A 2-D LoDTensor with shape [N x D] where N is the size of the "
"mini-batch and D is the total tag number. The unscaled emission "
"weight matrix for the linear chain CRF. "
);
AddInput
(
"Transition"
,
"(Tensor, default: Tensor<float>). A 2-D Tensor with shape "
"[(D + 2) x D]. The learnable parameter for the linear_chain_crf "
"operator. See more details in the operator's comments."
);
AddInput
(
"Label"
,
"(LoDTensor, default: LoDTensor<int>). A LoDTensor with shape "
"[N x 1], where N is the total element number in a mini-batch. "
"The ground truth."
);
AddOutput
(
"Alpha"
,
"
Tensor, default: Tensor<float>. The forward vectors for the entire
"
"
batch. A two dimensional tensor with shape [N x D],
"
"
denoted as
\f
$
\a
lpha
\f
$.
\f
$
\a
lpha$
\f
is a memo table used to
"
"
calculate the normalization factor in CRF.
\f
$
\a
lpha[k, v]$
\f
stores
"
"
the unnormalized probabilites of all possible unfinished sequences of
"
"
tags that end at
position
\f
$k$
\f
with tag
\f
$v$
\f
. For each
\f
$k$
\f
, "
"
(Tensor, default: Tensor<float>). A 2-D Tensor with shape [N x D].
"
"
The forward vectors for the entire batch. Denote it as
\f
$
\a
lpha
\f
$.
"
"
\f
$
\a
lpha$
\f
is a memo table used to calculate the normalization
"
"
factor in CRF.
\f
$
\a
lpha[k, v]$
\f
stores the unnormalized
"
"
probabilites of all possible unfinished sequences of tags that end at
"
"position
\f
$k$
\f
with tag
\f
$v$
\f
. For each
\f
$k$
\f
, "
"
\f
$
\a
lpha[k, v]$
\f
is a vector of length
\f
$D$
\f
with a component for "
"each tag value
\f
$v$
\f
. This vector is called a forward vecotr and "
"will also be used in backward computations."
)
.
AsIntermediate
();
AddOutput
(
"EmissionExps"
,
"The exponentials of Input(Emission). This is an intermediate "
"computational result in forward computation, and will be reused "
"in backward computation."
)
AddOutput
(
"EmissionExps"
,
"(Tensor, default: Tensor<float>). A 2-D Tensor with shape [N x D]. "
"The exponentials of Input(Emission). This is an intermediate "
"computational result in forward computation, and will be reused in "
"backward computation."
)
.
AsIntermediate
();
AddOutput
(
"TransitionExps"
,
"The exponentials of Input(Transition). This is an intermediate "
"computational result in forward computation, and will be reused "
"in backward computation."
)
AddOutput
(
"TransitionExps"
,
"(Tensor, default: Tensor<float>). A 2-D Tensor with shape "
"[(D + 2) x D]. The exponentials of Input(Transition). This is an "
"intermediate computational result in forward computation, and "
"will be reused in backward computation."
)
.
AsIntermediate
();
AddOutput
(
"LogLikelihood"
,
...
...
@@ -179,8 +180,8 @@ class LinearChainCRFOp : public framework::OperatorWithKernel {
}
protected:
// Explicitly set that the data type of
output of the
linear_chain_crf
//
operator
is determined by its input "Emission".
// Explicitly set that the data type of
computation kernel of
linear_chain_crf
// is determined by its input "Emission".
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
ToDataType
(
ctx
.
Input
<
LoDTensor
>
(
"Emission"
)
->
type
());
...
...
paddle/operators/linear_chain_crf_op.h
浏览文件 @
c4f7f3a5
...
...
@@ -134,7 +134,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
Tensor
emission_row_max
;
emission_row_max
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
static_cast
<
int
>
(
batch_size
),
1
}),
framework
::
make_ddim
({
static_cast
<
int
64_t
>
(
batch_size
),
1
}),
platform
::
CPUPlace
());
auto
place
=
ctx
.
GetEigenDevice
<
platform
::
CPUPlace
>
();
...
...
@@ -273,7 +273,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
const
int
*
lbl
=
label
.
data
<
int
>
();
PADDLE_ENFORCE_LT
(
*
std
::
max_element
(
lbl
,
lbl
+
seq_length
),
tag_num
,
static_cast
<
size_t
>
(
*
std
::
max_element
(
lbl
,
lbl
+
seq_length
)
),
tag_num
,
"An invalid tag label that execesses the largest tag number."
);
// Calculate the nominator part, which depends on the label sequence.
...
...
paddle/operators/load_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
LoadOpProtoMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddOutput
(
"Out"
,
"The tensor need to be loaded"
);
AddComment
(
R"DOC(Load Operator
Load operator will load a tensor variable from disk file.
)DOC"
);
AddOutput
(
"Out"
,
"(Tensor) The tensor need to be loaded"
);
AddAttr
<
std
::
string
>
(
"file_path"
,
"(string) "
"Variable will be loaded from
\"
file_path
\"
."
)
.
AddCustomChecker
(
[](
const
std
::
string
&
path
)
{
return
!
path
.
empty
();
});
AddComment
(
R"DOC(
Load Operator.
Load operator will load a tensor variable from disk file.
)DOC"
);
}
};
}
// namespace operators
...
...
paddle/operators/lod_rank_table_op.cc
0 → 100644
浏览文件 @
c4f7f3a5
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
class
LoDRankTableOp
:
public
framework
::
OperatorBase
{
public:
LoDRankTableOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
Run
(
const
framework
::
Scope
&
scope
,
const
platform
::
DeviceContext
&
dev_ctx
)
const
override
{
auto
x
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
();
auto
*
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDRankTable
>
();
out
->
Reset
(
x
.
lod
(),
static_cast
<
size_t
>
(
Attr
<
int
>
(
"level"
)));
}
};
class
LoDRankTableOpProtoMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
LoDRankTableOpProtoMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(LoDTensor) input lod tensor, must contain lod information."
);
AddOutput
(
"Out"
,
"(LoDRankTable) The rank table of specific level."
);
AddAttr
<
int
>
(
"level"
,
"(int) the specific lod level to rank."
)
.
SetDefault
(
0
)
.
EqualGreaterThan
(
0
);
AddComment
(
R"DOC(Create LoDRanTable by LoDTensor
LoD Rank Table stores the `level` of `lod` which is ordered by sequence
length in descending order. It is useful when implement dynamic RNN and is
shared by dynamic RNN memory, dynamic RNN slice input and dynamic RNN slice
output operators.
)DOC"
);
}
};
class
LoDRankTableInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"X"
),
"LoDRankTable must has input X"
);
}
};
class
LoDRankTableInferVarType
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDescBind
&
op_desc
,
framework
::
BlockDescBind
*
block
)
const
override
{
for
(
auto
&
o
:
op_desc
.
Output
(
"Out"
))
{
block
->
Var
(
o
)
->
SetType
(
framework
::
VarDesc
::
LOD_RANK_TABLE
);
}
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
lod_rank_table
,
paddle
::
operators
::
LoDRankTableOp
,
paddle
::
operators
::
LoDRankTableOpProtoMaker
,
paddle
::
operators
::
LoDRankTableInferShape
,
paddle
::
operators
::
LoDRankTableInferVarType
,
paddle
::
framework
::
EmptyGradOpMaker
);
paddle/operators/lookup_table_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"W"
,
"An input represents embedding tensors,"
"
which is a learnable parameter."
);
"An input represents embedding tensors,
"
"which is a learnable parameter."
);
AddInput
(
"Ids"
,
"An input with type int32 or int64"
"contains the ids to be looked up in W."
"Ids must be a column vector with rank = 2."
"The 2nd dimension size must be 1"
);
AddOutput
(
"Out"
,
"The lookup results, which have the same type with W."
);
AddAttr
<
bool
>
(
"is_sparse"
,
"Sparse update"
).
SetDefault
(
false
);
"An input with type int32 or int64 "
"contains the ids to be looked up in W. "
"Ids must be a column vector with rank = 2. "
"The 2nd dimension size must be 1."
);
AddOutput
(
"Out"
,
"The lookup results, which have the same type as W."
);
AddAttr
<
bool
>
(
"is_sparse"
,
"(boolean, default false) "
"Sparse update"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Lookup Table Operator.
This operator is used to perform lookups on the parameter W,
then concatenated into a dense tensor.
The input `Ids` can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD with input `Ids`.
The input Ids can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD information with input Ids.
)DOC"
);
}
};
...
...
paddle/operators/lrn_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LRNOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
R"DOC(
(Tensor) The input of LRN operator. It must be a 4D tenor with NCHW format.
)DOC"
);
AddInput
(
"X"
,
"(Tensor) The input of LRN operator. "
"It must be a 4D tenor with NCHW format."
);
AddOutput
(
"Out"
,
"(Tensor) The output of LRN operator, which is also the 4D "
"tensor with NCHW format."
);
AddOutput
(
"MidOut"
,
R"Doc(
(Tensor)Middle result of lrn op.It's computed in forward process
and also used in backward process.
)Doc"
);
AddAttr
<
int
>
(
"n"
,
R"DOC(
(int, default 5)n is “adjacent” kernel maps at the same spatial position.
)DOC
"
)
AddOutput
(
"MidOut"
,
"(Tensor) Middle result of LRN operator. It's computed in "
"forward process and also used in backward process."
);
AddAttr
<
int
>
(
"n"
,
"(int default 5) "
"n is the
\"
adjacent
\"
kernel that maps "
"at the same spatial position.
"
)
.
SetDefault
(
5
)
.
GreaterThan
(
0
);
AddAttr
<
T
>
(
"k"
,
R"DOC(
(float, default 2.0)k is the bias.
)DOC
"
)
AddAttr
<
T
>
(
"k"
,
"(float, default 2.0) "
"k is the bias.
"
)
.
SetDefault
(
2.0
)
.
GreaterThan
(
0.0
);
AddAttr
<
T
>
(
"alpha"
,
R"DOC(
(float, default 0.0001)alpha is the scale number.
)DOC
"
)
AddAttr
<
T
>
(
"alpha"
,
"(float, default 0.0001) "
"alpha is the scale number.
"
)
.
SetDefault
(
0.0001
)
.
GreaterThan
(
0.0
);
AddAttr
<
T
>
(
"beta"
,
R"DOC(
(float, default 0.75)beta is the power number.
)DOC
"
)
AddAttr
<
T
>
(
"beta"
,
"(float, default 0.75) "
"beta is the power number.
"
)
.
SetDefault
(
0.75
)
.
GreaterThan
(
0.0
);
AddComment
(
R"DOC(
Local Response Normalization.
This Function comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks".
Local Response Normalization Operator.
The original formula is:
This operator comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks".
Input(i, x, y)
Output(i, x, y) = ----------------------------------------------
-- upper
(k + alpha * > (Input(j, x, y))^2) ^ (beta)
-- j = lower
The original formula is:
upper is `min(C, c + n/2)`
lower if `max(0, c - n/2)`
$$
Output(i, x, y) = Input(i, x, y) / \left(
k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
(Input(j, x, y))^2
\right)^{\beta}
$$
Function implementation:
Function implementation:
inputs and outpus is NCHW format, while input.shape.ndims() is equal
4.
And the meaning of each dimension(0-3) is respectively batch size
,
feature maps, rows and columns
.
Inputs and outpus are in NCHW format, while input.shape.ndims() equals
4.
And dimensions 0 ~ 3 represent batch size, feature maps, rows
,
and columns, respectively
.
Input and Output in the above formula
is for each map(i) of one image, and
Input(i, x, y), Output(i, x, y) represents an element in an image.
Input and Output in the formula above
is for each map(i) of one image, and
Input(i, x, y), Output(i, x, y) represents an element in an image.
C is the number of feature maps of one image, and n is a hyper-parameters
is configured when Function is initialized. The sum in the denominator
is the sum of the same position in the neighboring maps.
)DOC"
);
C is the number of feature maps of one image. n is a hyper-parameter
configured when operator is initialized. The sum in the denominator
is the sum of the same positions in the neighboring maps.
)DOC"
);
}
};
...
...
paddle/operators/lstm_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -103,7 +103,7 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"H0"
,
"(Tensor, optional) the initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size
,
D is the hidden size."
)
"batch size
and
D is the hidden size."
)
.
AsDispensable
();
AddInput
(
"C0"
,
"(Tensor, optional) the initial cell state is an optional "
...
...
@@ -134,85 +134,82 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"BatchGate"
,
"(LoDTensor) This LoDTensor contains input gate, forget gate "
"and output gate after the nonlinear computation. This "
"LoDTensor has the same shape
with
the reorganized input, which "
"LoDTensor has the same shape
as
the reorganized input, which "
"is also be called batch input. The LoD size is 2. The first "
"LoD is the batch offsets and the second LoD contains the "
"indexes, which denote the position of reorganized sequence "
"in the raw input."
)
.
AsIntermediate
();
AddOutput
(
"BatchCellPreAct"
,
"(LoDTensor) This LoDTensor is
got
in the forward and used "
"(LoDTensor) This LoDTensor is
obtained
in the forward and used "
"in the backward."
)
.
AsIntermediate
();
AddAttr
<
bool
>
(
"usePeepholes"
,
"(bool, defa
lut:
True) "
"(bool, defa
ult
True) "
"whether to enable diagonal/peephole connections."
)
.
SetDefault
(
true
);
AddAttr
<
bool
>
(
"isReverse"
,
"(bool, defa
lut:
False) "
"(bool, defa
ult
False) "
"whether to compute reversed LSTM."
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"gateActivation"
,
"(string, default
:
sigmoid)"
"(string, default sigmoid)"
"The activation for input gate, forget gate and output "
"gate, `sigmoid` by default."
)
.
SetDefault
(
"sigmoid"
);
AddAttr
<
std
::
string
>
(
"cellActivation"
,
"(string, default
:
tanh)"
"(string, default tanh)"
"The activation for cell output, `tanh` by defalut."
)
.
SetDefault
(
"tanh"
);
AddAttr
<
std
::
string
>
(
"candidateActivation"
,
"(string, default
:
tanh)"
"(string, default tanh)"
"The activation for candidate hidden state, "
"`tanh` by default."
)
.
SetDefault
(
"tanh"
);
AddComment
(
R"DOC(Long-Short Term Memory (LSTM) Operator
AddComment
(
R"DOC(
Long-Short Term Memory (LSTM) Operator.
The defalut implementation is diagonal/peephole connection
[1], the formula is
as follows
The defalut implementation is diagonal/peephole connection
(https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
$$
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) \\
f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f)
f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) \\
\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \\
o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o)
o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) \\
c_t = f_t ⊙ c_{t-1} + i_t ⊙ \tilde{c_t}
c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\
h_t = o_t ⊙ act_h(c_t)
h_t = o_t \odot act_h(c_t)
$$
where the W terms denote weight matrices (e.g. \f$W_{xi}\f$ is the matrix
of weights from the input gate to the input), \f$W_{ic}, W_{fc}, W_{oc}\f$
are diagonal weight matrices for peephole connections. In our imple
nmen
tion,
W
e use vectors to reprenset these diagonal weight matrices. The b terms
are diagonal weight matrices for peephole connections. In our imple
menta
tion,
w
e use vectors to reprenset these diagonal weight matrices. The b terms
denote bias vectors (\f$b_i\f$ is the input gate bias vector), \f$\sigma\f$
is the non-line acti
c
ations, such as logistic sigmoid function, and
\f$i, f, o\f$ and \f$c\f$ are
respectively the input gate, forge
t gate,
output gate and cell activation vectors, all of which ar
e the same size as
is the non-line acti
v
ations, such as logistic sigmoid function, and
\f$i, f, o\f$ and \f$c\f$ are
the input gate, forget gate, outpu
t gate,
and cell activation vectors, respectively, all of which hav
e the same size as
the cell output activation vector \f$h\f$.
The
⊙ is the element-wise product of the vectors,
\f$act_g\f$ and \f$act_h\f$
are the cell input and cell output activation functions
,
`tanh` is usually
The
\f$\odot\f$ is the element-wise product of the vectors.
\f$act_g\f$ and \f$act_h\f$
are the cell input and cell output activation functions
and
`tanh` is usually
used for them. \f$\tilde{c_t}\f$ is also called candidate hidden state,
which is computed based on the current input and the previous hidden state.
Set `usePeepholes` False to disable peephole connection [2]. The formula
Set usePeepholes False to disable peephole connection
(http://www.bioinf.jku.at/publications/older/2604.pdf). The formula
is omitted here.
@note T
hese \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$
operations on the input
x_{t} we
re NOT included in this operator.
Note that t
hese \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$
operations on the input
\f$x_{t}\f$ a
re NOT included in this operator.
Users can choose to use fully-connect operator before LSTM operator.
[1] Hasim Sak, Andrew Senior, and Francoise Beaufays. Long short-term memory
recurrent neural network architectures for large scale acoustic modeling.
INTERSPEECH, 2014.
[2] S. Hochreiter and J. Schmidhuber. Long Short-Term Memory.
Neural Computation, 9(8):1735-1780, 1997.
)DOC"
);
}
};
...
...
paddle/operators/lstm_unit_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -57,17 +57,22 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker {
"The cell state tensor of last time-step in the Lstm Unit operator."
);
AddOutput
(
"C"
,
"The cell tensor of Lstm Unit operator."
);
AddOutput
(
"H"
,
"The hidden state tensor of Lstm Unit operator."
);
AddComment
(
R"DOC(Lstm-Unit Operator
AddAttr
<
float
>
(
"forget_bias"
,
"(float, default 0.0) "
"The forget bias of Lstm Unit."
)
.
SetDefault
(
0.0
);
AddComment
(
R"DOC(
Lstm Unit Operator
Equation:
i, f, o, j = split(X)
C = C_prev * sigm(f + forget_bias) + sigm(i) * tanh(j)
H = C * sigm(o)
$$
i, f, o, j = split(X) \\
C = C_{prev} * sigm(f + forget\_bias) + sigm(i) * tanh(j) \\
H = C * sigm(o)
$$
)DOC"
);
AddAttr
<
float
>
(
"forget_bias"
,
"The forget bias of Lstm Unit."
)
.
SetDefault
(
0.0
);
}
};
...
...
paddle/operators/math/CMakeLists.txt
浏览文件 @
c4f7f3a5
...
...
@@ -23,7 +23,7 @@ else()
cc_library
(
context_project SRCS context_project.cc DEPS device_context
)
cc_library
(
sequence2batch SRCS sequence2batch.cc DEPS device_context
)
cc_library
(
lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions
)
cc_library
(
gru_compute SRCS gru_compute.cc DEPS device_context activation_functions
)
cc_library
(
gru_compute SRCS gru_compute.cc DEPS device_context activation_functions
math_function
)
endif
()
cc_test
(
math_function_test SRCS math_function_test.cc DEPS math_function tensor
)
...
...
paddle/operators/save_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -163,14 +163,19 @@ class SaveOpProtoMaker : public framework::OpProtoAndCheckerMaker {
SaveOpProtoMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The tensor need to be saved"
);
AddComment
(
R"DOC(Save operator
Save operator will serialize and write a tensor variable to disk file.
AddInput
(
"X"
,
"(Tensor ) Input tensor to be saved"
);
AddComment
(
R"DOC(
Save operator
This operator will serialize and write a tensor variable to file on disk.
)DOC"
);
AddAttr
<
bool
>
(
"overwrite"
,
"Overwrite the output file if exist"
)
AddAttr
<
bool
>
(
"overwrite"
,
"(boolean, default true)"
"Overwrite the output file if exist"
)
.
SetDefault
(
true
);
AddAttr
<
std
::
string
>
(
"file_path"
,
"Variable will be saved to
\"
file_path
\"
."
)
"(string)"
"The
\"
file_path
\"
where the variable will be saved."
)
.
AddCustomChecker
(
[](
const
std
::
string
&
path
)
{
return
!
path
.
empty
();
});
}
...
...
paddle/operators/scale_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -40,13 +40,16 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ScaleOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input tensor of scale operator."
);
AddOutput
(
"Out"
,
"The output tensor of scale operator."
);
AddComment
(
R"DOC(Scale operator
AddInput
(
"X"
,
"(Tensor) Input tensor of scale operator."
);
AddOutput
(
"Out"
,
"(Tensor) Output tensor of scale operator."
);
AddComment
(
R"DOC(
Scale operator
The equation is: Out = scale*X
$$Out = scale*X$$
)DOC"
);
AddAttr
<
AttrType
>
(
"scale"
,
"The scaling factor of the scale operator."
)
AddAttr
<
AttrType
>
(
"scale"
,
"(float, default 0)"
"The scaling factor of the scale operator."
)
.
SetDefault
(
1.0
);
}
};
...
...
paddle/operators/sequence_concat_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -47,19 +47,19 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(
A vector of LoDTensor), the i
nput is a vector of LoDTensor, "
"(
vector<LoDTensor>) I
nput is a vector of LoDTensor, "
"each of which is a variable-length sequence or nested sequence."
)
.
AsDuplicable
();
AddOutput
(
"Out"
,
"(
A LoDTensor), the v
ariable-length output of "
"(
LoDTensor), V
ariable-length output of "
"sequence_concat Op."
);
AddAttr
<
int
>
(
"axis"
,
"(int, default 0)"
"The axis
which the inputs will be joined with
. "
"(int, default 0)
"
"The axis
along which the inputs will be joined
. "
"If axis is 0, the inputs will be joined with LoD index."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"level"
,
"(int, default 0)"
"(int, default 0)
"
"The level at which the inputs will be joined. "
"If the level is 0, the inputs will be joined at the nested "
"sequence level. "
...
...
@@ -68,34 +68,36 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
"The level should be less than the level number of inputs."
)
.
SetDefault
(
0
);
AddComment
(
R"DOC(
The sequence_concat operator concatenates multiple LoDTensors.
It only supports sequence (LoD Tensor with level number is 1)
or a nested sequence (LoD tensor with level number is 2) as its input.
- Case1:
If the axis is other than 0(here, axis is 1 and level is 1),
each input should have the same LoD information and the LoD
information of the output keeps the same as the input.
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,2,4}, {0,1,2,3,4}}; Dims(x1) = (4,4,4)
LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4)
- Case2:
If the axis is 0(here, leve is 0), the inputs are concatenated along
time steps, the LoD information of the output need to re-compute.
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,3,5}, {0,1,2,3,5}}; Dims(x1) = (5,3,4)
LoD(Out) = {{0,5,9}, {0,1,2,3,4,5,6,7,9}}; Dims(Out) = (9,3,4)
- Case3:
If the axis is 0(here, level is 1).
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (5,3,4)
LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4)
NOTE: The levels of all the inputs should be the same.
Sequence Concat operator
The sequence_concat operator concatenates multiple LoDTensors.
It only supports sequence (LoD Tensor with level number is 1)
or a nested sequence (LoD tensor with level number is 2) as its input.
- Case1:
If the axis is other than 0(here, axis is 1 and level is 1),
each input should have the same LoD information and the LoD
information of the output keeps the same as the input.
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,2,4}, {0,1,2,3,4}}; Dims(x1) = (4,4,4)
LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4)
- Case2:
If the axis is 0(here, leve is 0), the inputs are concatenated along
time steps, the LoD information of the output need to re-compute.
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,3,5}, {0,1,2,3,5}}; Dims(x1) = (5,3,4)
LoD(Out) = {{0,5,9}, {0,1,2,3,4,5,6,7,9}}; Dims(Out) = (9,3,4)
- Case3:
If the axis is 0(here, level is 1).
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (5,3,4)
LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4)
NOTE: The levels of all the inputs should be the same.
)DOC"
);
}
};
...
...
paddle/operators/sgd_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -45,15 +45,17 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SGDOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Param"
,
"Input parameter"
);
AddInput
(
"LearningRate"
,
"Learning rate of SGD"
);
AddInput
(
"Grad"
,
"Input gradient"
);
AddOutput
(
"ParamOut"
,
"
o
utput parameter"
);
AddInput
(
"Param"
,
"
(Tensor)
Input parameter"
);
AddInput
(
"LearningRate"
,
"
(Tensor)
Learning rate of SGD"
);
AddInput
(
"Grad"
,
"
(Tensor)
Input gradient"
);
AddOutput
(
"ParamOut"
,
"
(Tensor) O
utput parameter"
);
AddComment
(
R"DOC(
S
implest sgd algorithm.
S
GD operator
param_out = param - learning_rate * grad;
This operator implements one step of the stochastic gradient descent algorithm.
$$param_out = param - learning_rate * grad$$
)DOC"
);
}
...
...
paddle/operators/sign_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -38,9 +38,10 @@ class SignOpMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor) Input tensor of sign operator."
);
AddOutput
(
"Out"
,
"(Tensor) Output tensor of sign operator."
);
AddComment
(
R"DOC(Sign operator
AddComment
(
R"DOC(
Sign operator
The equation is: Out = X.sign()
$$Out = X.sign()$$
)DOC"
);
}
};
...
...
paddle/operators/split_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -67,30 +67,38 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SplitOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"the input tensor of split operator."
);
AddOutput
(
"Out"
,
"the output tensors of split operator."
).
AsDuplicable
();
AddInput
(
"X"
,
"(Tensor) Input tensor of the split operator."
);
AddOutput
(
"Out"
,
"(Tensor) Output tensors of the split operator."
)
.
AsDuplicable
();
AddComment
(
R"DOC(
Split the input tensor into multiple sub-tensors.
Example:
Input = [[1,2],
[3,4],
[5,6]]
sections = [2,1]
axis = 0
Output[0] = [[1,2],
[3,4]]
Output[1] = [[5,6]]
Split operator
This operator splits the input tensor into multiple sub-tensors.
Example:
Input = [[1,2],
[3,4],
[5,6]]
sections = [2,1]
axis = 0
Output[0] = [[1,2],
[3,4]]
Output[1] = [[5,6]]
)DOC"
);
AddAttr
<
std
::
vector
<
int
>>
(
"sections"
,
"the length for each"
"output along with the specify axis."
)
"(vector<int>) "
"the length of each output along the "
"specified axis."
)
.
SetDefault
(
std
::
vector
<
int
>
{});
AddAttr
<
int
>
(
"num"
,
"number of the sub-tensors, it must evenly divide "
"(int, default 0)"
"Number of sub-tensors. This must evenly divide "
"Input.dims()[axis]"
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"axis"
,
"The axis which the input will be splited on."
)
AddAttr
<
int
>
(
"axis"
,
"(int, default 0) "
"The axis which the input will be splited on."
)
.
SetDefault
(
0
);
}
};
...
...
paddle/operators/squared_l2_distance_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -59,23 +59,26 @@ class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker {
SquaredL2DistanceOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"Input of SquaredL2DistanceOp."
);
AddInput
(
"Y"
,
"Target of SquaredL2DistanceOp."
);
AddInput
(
"X"
,
"
(Tensor)
Input of SquaredL2DistanceOp."
);
AddInput
(
"Y"
,
"
(Tensor)
Target of SquaredL2DistanceOp."
);
AddOutput
(
"sub_result"
,
"
Buffering subs
traction result which "
"
(Tensor) Buffering sub
traction result which "
"will be reused in backward."
)
.
AsIntermediate
();
AddOutput
(
"Out"
,
"Squared l2 distance between input and target."
);
AddOutput
(
"Out"
,
"
(Tensor)
Squared l2 distance between input and target."
);
AddComment
(
R"DOC(
SquaredL2DistanceOp will cacluate the squared L2 distance for
input and target. Number of distance value equals to the
first dimension of input. First dimension of target could be equal to
input or to 1. If the first dimension of target is 1, SquaredL2DistanceOp
will broadcast target's first dimension to input's first dimension.
You can decide whether calculate the gradient of input and target.
Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input X.
SquaredL2Distance operator
This operator will cacluate the squared L2 distance for the input and
the target. Number of distance value will be equal to the first dimension
of input. First dimension of the target could be equal to the input or to 1.
If the first dimension of target is 1, the operator will broadcast target's
first dimension to input's first dimension. During backward propagation,
the user can decide whether to calculate the gradient of the input or
the target or both.
Both the input X and Y can carry the LoD (Level of Details) information.
However, the output only shares the LoD information with input X.
)DOC"
);
}
};
...
...
paddle/operators/squared_l2_norm_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -52,13 +52,13 @@ class SquaredL2NormOpMaker : public framework::OpProtoAndCheckerMaker {
framework
::
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor) The input of squared_l2_norm op."
);
AddOutput
(
"Out"
,
"(
Float
) The output of squared_l2_norm op."
);
AddOutput
(
"Out"
,
"(
Scalar
) The output of squared_l2_norm op."
);
AddComment
(
R"DOC(
SquaredL2Norm Operator.
Computes the squared L2 norm of a tensor.
Out = sum (X ** 2)
$$Out = \sum_{i} X_{i}^2$$
)DOC"
);
}
...
...
paddle/operators/sum_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -45,13 +45,15 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SumOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"the input tensors of sum operator."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"the output tensor of sum operator."
);
AddInput
(
"X"
,
"(vector<Tensor>) The input tensors of sum operator."
)
.
AsDuplicable
();
AddOutput
(
"Out"
,
"(Tensor) The output tensor of sum operator."
);
AddComment
(
R"DOC(
Sum
the input tensors
.
Sum
operator
.
All the inputs can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with the first input.
This operators sums the input tensors. All the inputs can carry the
LoD (Level of Details) information. However, the output only shares
the LoD information with the first input.
)DOC"
);
}
};
...
...
paddle/operators/top_k_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -48,20 +48,20 @@ class TopkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
TopkOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The input of Topk op"
);
AddOutput
(
"Out"
,
"The output tensor of Topk op"
);
AddOutput
(
"Indices"
,
"The indices of Topk elements of input"
);
AddComment
(
R"DOC(If the input is a vector (1d tensor),
finds the k largest entries in the vector
and outputs their values and indices as vectors.
Thus values[j] is the j-th largest entry in input,
and its index is indices[j].
AddInput
(
"X"
,
"(Tensor) The input of Topk op"
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of Topk op"
);
AddOutput
(
"Indices"
,
"(Tensor) The indices of Topk elements of input"
);
AddComment
(
R"DOC(
Top K operator
For matrices, computes the top k entries in each row. )DOC"
);
If the input is a vector (1d tensor), this operator finds the k largest
entries in the vector and outputs their values and indices as vectors.
Thus values[j] is the j-th largest entry in input, and its index is indices[j].
For matrices, this operator computes the top k entries in each row. )DOC"
);
AddAttr
<
int
>
(
"k"
,
"
Number of top elements to look for along the last
"
"dimension (along each row for matrices)."
)
"
(int, default 1) Number of top elements to look for along
"
"
the last
dimension (along each row for matrices)."
)
.
SetDefault
(
1
);
}
};
...
...
paddle/operators/uniform_random_op.cc
浏览文件 @
c4f7f3a5
...
...
@@ -74,18 +74,30 @@ class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
UniformRandomOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddOutput
(
"Out"
,
"The output tensor of uniform random op"
);
AddComment
(
R"DOC(Uniform random operator.
Used to initialize tensor with uniform random generator.
AddOutput
(
"Out"
,
"(Tensor) The output tensor of uniform random op"
);
AddComment
(
R"DOC(
Uniform random operator.
This operator initializes a tensor with random values sampled from a
uniform distribution.
)DOC"
);
AddAttr
<
std
::
vector
<
int
>>
(
"shape"
,
"the dimension of random tensor"
);
AddAttr
<
float
>
(
"min"
,
"Minimum value of uniform random"
).
SetDefault
(
-
1.0
f
);
AddAttr
<
float
>
(
"max"
,
"Maximun value of uniform random"
).
SetDefault
(
1.0
f
);
AddAttr
<
std
::
vector
<
int
>>
(
"shape"
,
"(vector<int>) The shape of the output tensor"
);
AddAttr
<
float
>
(
"min"
,
"(float, default -1.0) "
"Minimum value of uniform random"
)
.
SetDefault
(
-
1.0
f
);
AddAttr
<
float
>
(
"max"
,
"(float, default 1.0) "
"Maximun value of uniform random"
)
.
SetDefault
(
1.0
f
);
AddAttr
<
int
>
(
"seed"
,
"Random seed of uniform random. "
"0 means generate a seed by system"
)
"(int, default 0) "
"Random seed used for generating samples. "
"0 means use a seed generated by the system."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"data_type"
,
"
o
utput tensor data type"
)
AddAttr
<
int
>
(
"data_type"
,
"
(int, default 5(FP32)) O
utput tensor data type"
)
.
SetDefault
(
framework
::
DataType
::
FP32
);
}
};
...
...
paddle/pybind/protobuf.cc
浏览文件 @
c4f7f3a5
...
...
@@ -238,7 +238,8 @@ void BindVarDsec(py::module &m) {
.
value
(
"SELECTED_ROWS"
,
VarDesc
::
SELECTED_ROWS
)
.
value
(
"FEED_MINIBATCH"
,
VarDesc
::
FEED_MINIBATCH
)
.
value
(
"FETCH_LIST"
,
VarDesc
::
FETCH_LIST
)
.
value
(
"STEP_SCOPES"
,
VarDesc
::
STEP_SCOPES
);
.
value
(
"STEP_SCOPES"
,
VarDesc
::
STEP_SCOPES
)
.
value
(
"LOD_RANK_TABLE"
,
VarDesc
::
LOD_RANK_TABLE
);
}
void
BindOpDesc
(
py
::
module
&
m
)
{
...
...
paddle/pybind/pybind.cc
浏览文件 @
c4f7f3a5
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/framework/executor.h"
#include "paddle/framework/feed_fetch_method.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/prune.h"
#include "paddle/framework/selected_rows.h"
...
...
@@ -224,6 +225,9 @@ All parameter, weight, gradient are variables in Paddle.
return
self
.
GetMutable
<
LoDTensor
>
();
},
py
::
return_value_policy
::
reference
)
.
def
(
"get_lod_rank_table"
,
[](
Variable
&
self
)
{
return
self
.
GetMutable
<
LoDRankTable
>
();
},
py
::
return_value_policy
::
reference
)
.
def
(
"get_selected_rows"
,
[](
Variable
&
self
)
->
SelectedRows
*
{
return
self
.
GetMutable
<
SelectedRows
>
();
...
...
@@ -492,6 +496,15 @@ All parameter, weight, gradient are variables in Paddle.
BindVarDsec
(
m
);
BindOpDesc
(
m
);
py
::
class_
<
framework
::
LoDRankTable
>
(
m
,
"LodRankTable"
)
.
def
(
"items"
,
[](
framework
::
LoDRankTable
&
table
)
{
std
::
vector
<
std
::
pair
<
size_t
,
size_t
>>
res
;
for
(
auto
&
item
:
table
.
items
())
{
res
.
push_back
({
item
.
index
,
item
.
length
});
}
return
res
;
});
m
.
def
(
"op_support_gpu"
,
OpSupportGPU
);
#ifdef PADDLE_WITH_CUDA
m
.
def
(
"get_cuda_device_count"
,
platform
::
GetCUDADeviceCount
);
...
...
paddle/scripts/docker/build.sh
浏览文件 @
c4f7f3a5
...
...
@@ -162,6 +162,7 @@ ${DOCKERFILE_CUDNN_DSO}
${
DOCKERFILE_GPU_ENV
}
ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/
ADD paddle/pybind/print_operators_doc /usr/bin/
# default command shows the paddle version and exit
CMD ["paddle", "version"]
EOF
...
...
python/paddle/v2/framework/framework.py
浏览文件 @
c4f7f3a5
...
...
@@ -101,6 +101,10 @@ class Variable(object):
def
persistable
(
self
):
return
self
.
desc
.
persistable
()
@
persistable
.
setter
def
persistable
(
self
,
p
):
self
.
desc
.
set_persistable
(
p
)
@
property
def
name
(
self
):
return
self
.
desc
.
name
()
...
...
python/paddle/v2/framework/layer_helper.py
浏览文件 @
c4f7f3a5
...
...
@@ -112,9 +112,12 @@ class LayerHelper(object):
raise
ValueError
(
"Data Type mismatch"
)
return
dtype
def
create_parameter
(
self
,
attr
,
shape
,
dtype
,
suffix
=
'w'
):
def
create_parameter
(
self
,
attr
,
shape
,
dtype
,
suffix
=
'w'
,
initializer
=
None
):
# Deepcopy the attr so that parameters can be shared in program
attr_copy
=
copy
.
deepcopy
(
attr
)
if
initializer
is
not
None
:
attr_copy
[
'initializer'
]
=
initializer
if
attr_copy
[
'name'
]
is
None
:
attr_copy
[
'name'
]
=
unique_name
(
"."
.
join
([
self
.
name
,
suffix
]))
self
.
init_program
.
global_block
().
create_parameter
(
...
...
python/paddle/v2/framework/layers.py
浏览文件 @
c4f7f3a5
from
paddle.v2.framework.layer_helper
import
LayerHelper
,
unique_name
import
paddle.v2.framework.core
as
core
from
paddle.v2.framework.framework
import
OpProtoHolder
,
Variable
,
Program
,
\
Operato
r
from
paddle.v2.framework.
initializer
import
ConstantInitializer
from
paddle.v2.framework.framework
import
OpProtoHolder
,
Variable
,
Program
,
Operator
from
paddle.v2.framework.initializer
import
ConstantInitializer
,
NormalInitialize
r
from
paddle.v2.framework.
layer_helper
import
LayerHelper
,
unique_name
import
re
__all__
=
[
...
...
@@ -344,8 +343,13 @@ def conv2d(input,
input_shape
=
input
.
shape
filter_shape
=
[
num_filters
,
num_filter_channels
]
+
filter_size
std
=
(
2.0
/
(
filter_size
[
0
]
**
2
*
num_channels
))
**
0.5
filter
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
filter_shape
,
dtype
=
dtype
)
attr
=
helper
.
param_attr
,
shape
=
filter_shape
,
dtype
=
dtype
,
initializer
=
NormalInitializer
(
0.0
,
std
,
0
))
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
...
...
@@ -420,7 +424,7 @@ def batch_norm(input,
act
=
None
,
is_test
=
False
,
momentum
=
0.9
,
epsilon
=
1e05
,
epsilon
=
1e
-
05
,
param_attr
=
None
,
bias_attr
=
None
,
data_layout
=
'NCHW'
,
...
...
@@ -438,27 +442,29 @@ def batch_norm(input,
else
:
raise
ValueError
(
"unsupported data layout:"
+
data_layout
)
def
create_persistable_var
(
dtype
,
shape
,
initializer
=
None
):
name
=
unique_name
(
"."
.
join
([
helper
.
name
,
"xxxx"
]))
var
=
init_program
.
global_block
().
create_var
(
dtype
=
dtype
,
shape
=
shape
,
name
=
name
,
persistable
=
True
)
if
initializer
is
not
None
:
initializer
(
var
,
var
.
block
)
return
program
.
global_block
().
create_var
(
name
=
name
,
dtype
=
dtype
,
shape
=
shape
,
persistable
=
True
)
param_shape
=
[
channel_num
]
# create parameter
scale
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
)
attr
=
helper
.
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
,
initializer
=
ConstantInitializer
(
1.0
))
bias
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
)
attr
=
helper
.
param_attr
,
shape
=
param_shape
,
dtype
=
dtype
,
initializer
=
ConstantInitializer
(
0.0
))
# create input
mean
=
create_persistable_var
(
dtype
,
param_shape
,
ConstantInitializer
(
0.0
))
variance
=
create_persistable_var
(
dtype
,
param_shape
,
ConstantInitializer
(
1.0
))
mean
=
helper
.
create_global_variable
(
dtype
=
input
.
data_type
,
shape
=
param_shape
,
persistable
=
True
)
helper
.
set_variable_initializer
(
var
=
mean
,
initializer
=
ConstantInitializer
(
0.0
))
variance
=
helper
.
create_global_variable
(
dtype
=
input
.
data_type
,
shape
=
param_shape
,
persistable
=
True
)
helper
.
set_variable_initializer
(
var
=
variance
,
initializer
=
ConstantInitializer
(
1.0
))
# create output
# mean and mean_out share the same memory
...
...
@@ -729,3 +735,16 @@ class StaticRNN(object):
'states'
:
memories
,
'step_block'
:
rnn_block
})
def
lod_rank_table
(
x
,
level
=
0
,
program
=
None
):
helper
=
LayerHelper
(
"lod_rank_table"
,
**
locals
())
table
=
helper
.
create_variable
(
type
=
core
.
VarDesc
.
VarType
.
LOD_RANK_TABLE
,
name
=
unique_name
(
"lod_rank_table"
))
helper
.
append_op
(
type
=
'lod_rank_table'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
table
},
attrs
=
{
'level'
:
level
})
return
table
python/paddle/v2/framework/tests/test_crf_decoding_op.py
0 → 100644
浏览文件 @
c4f7f3a5
import
unittest
import
random
import
numpy
as
np
from
op_test
import
OpTest
class
CRFDecoding
(
object
):
def
__init__
(
self
,
emission_weights
,
transition_weights
,
seq_start_positions
):
assert
(
emission_weights
.
shape
[
0
]
==
seq_start_positions
[
-
1
])
self
.
tag_num
=
emission_weights
.
shape
[
1
]
self
.
seq_num
=
len
(
seq_start_positions
)
-
1
self
.
seq_start_positions
=
seq_start_positions
self
.
x
=
emission_weights
self
.
a
=
transition_weights
[
0
,
:]
self
.
b
=
transition_weights
[
1
,
:]
self
.
w
=
transition_weights
[
2
:,
:]
self
.
track
=
np
.
zeros
(
(
seq_start_positions
[
-
1
],
self
.
tag_num
),
dtype
=
"int32"
)
self
.
decoded_path
=
np
.
zeros
(
(
seq_start_positions
[
-
1
],
1
),
dtype
=
"int32"
)
def
_decode_one_sequence
(
self
,
decoded_path
,
x
):
seq_len
,
tag_num
=
x
.
shape
alpha
=
np
.
zeros
((
seq_len
,
tag_num
),
dtype
=
"float64"
)
track
=
np
.
zeros
((
seq_len
,
tag_num
),
dtype
=
"int32"
)
for
i
in
range
(
tag_num
):
alpha
[
0
,
i
]
=
self
.
a
[
i
]
+
x
[
0
,
i
]
for
k
in
range
(
1
,
seq_len
):
for
i
in
range
(
tag_num
):
max_score
=
-
np
.
finfo
(
"float64"
).
max
max_idx
=
0
for
j
in
range
(
tag_num
):
score
=
alpha
[
k
-
1
,
j
]
+
self
.
w
[
j
,
i
]
if
score
>
max_score
:
max_score
=
score
max_idx
=
j
alpha
[
k
,
i
]
=
max_score
+
x
[
k
,
i
]
track
[
k
,
i
]
=
max_idx
max_score
=
-
np
.
finfo
(
"float64"
).
max
max_idx
=
0
for
i
in
range
(
tag_num
):
score
=
alpha
[
seq_len
-
1
,
i
]
+
self
.
b
[
i
]
if
score
>
max_score
:
max_score
=
score
max_idx
=
i
decoded_path
[
-
1
]
=
max_idx
for
i
in
range
(
seq_len
-
1
,
0
,
-
1
):
decoded_path
[
i
-
1
]
=
max_idx
=
track
[
i
,
max_idx
]
def
decode
(
self
):
for
i
in
range
(
self
.
seq_num
):
start
=
self
.
seq_start_positions
[
i
]
end
=
self
.
seq_start_positions
[
i
+
1
]
self
.
_decode_one_sequence
(
self
.
decoded_path
[
start
:
end
,
:],
self
.
x
[
start
:
end
,
:])
return
self
.
decoded_path
class
TestCRFDecodingOp1
(
OpTest
):
"""
Compare the dynamic program with random generated parameters and inputs
with grouth truth not being given.
"""
def
set_test_data
(
self
):
SEQ_NUM
=
3
TAG_NUM
=
17
MAX_SEQ_LEN
=
10
lod
=
[[
0
]]
for
i
in
range
(
SEQ_NUM
):
lod
[
-
1
].
append
(
lod
[
-
1
][
-
1
]
+
random
.
randint
(
1
,
MAX_SEQ_LEN
))
emission
=
np
.
random
.
uniform
(
-
1
,
1
,
[
lod
[
-
1
][
-
1
],
TAG_NUM
]).
astype
(
"float64"
)
transition
=
np
.
random
.
uniform
(
-
0.5
,
0.5
,
[
TAG_NUM
+
2
,
TAG_NUM
]).
astype
(
"float64"
)
self
.
inputs
=
{
"Emission"
:
(
emission
,
lod
),
"Transition"
:
transition
,
}
decoder
=
CRFDecoding
(
emission
,
transition
,
lod
[
0
])
decoded_path
=
decoder
.
decode
()
self
.
outputs
=
{
"ViterbiPath"
:
decoded_path
}
def
setUp
(
self
):
self
.
op_type
=
"crf_decoding"
self
.
set_test_data
()
def
test_check_output
(
self
):
self
.
check_output
()
class
TestCRFDecodingOp2
(
OpTest
):
"""
Compare the dynamic program with brute force computation with
ground truth being given.
"""
def
setUp
(
self
):
self
.
op_type
=
"crf_decoding"
TAG_NUM
=
5
lod
=
[[
0
,
1
,
3
,
6
,
10
]]
transition
=
np
.
repeat
(
np
.
arange
(
TAG_NUM
,
dtype
=
"float64"
).
reshape
(
1
,
TAG_NUM
),
TAG_NUM
+
2
,
axis
=
0
)
emission
=
np
.
repeat
(
np
.
arange
(
TAG_NUM
,
dtype
=
"float64"
).
reshape
(
1
,
TAG_NUM
),
lod
[
-
1
][
-
1
],
axis
=
0
)
labels
=
np
.
random
.
randint
(
low
=
0
,
high
=
TAG_NUM
,
size
=
(
lod
[
-
1
][
-
1
],
1
),
dtype
=
"int32"
)
predicted_labels
=
np
.
ones
(
(
lod
[
-
1
][
-
1
],
1
),
dtype
=
"int32"
)
*
(
TAG_NUM
-
1
)
expected_output
=
(
labels
==
predicted_labels
).
astype
(
"int32"
)
self
.
inputs
=
{
"Emission"
:
(
emission
,
lod
),
"Transition"
:
transition
,
"Label"
:
(
labels
,
lod
)
}
self
.
outputs
=
{
"ViterbiPath"
:
expected_output
}
def
test_check_output
(
self
):
self
.
check_output
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_image_classification_train.py
浏览文件 @
c4f7f3a5
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.framework.core
as
core
import
paddle.v2.framework.layers
as
layers
import
paddle.v2.framework.nets
as
nets
import
paddle.v2.framework.core
as
core
import
paddle.v2.framework.optimizer
as
optimizer
from
paddle.v2.framework.framework
import
Program
,
g_program
from
paddle.v2.framework.executor
import
Executor
import
numpy
as
np
from
paddle.v2.framework.framework
import
g_init_program
,
g_program
from
paddle.v2.framework.initializer
import
XavierInitializer
def
resnet_cifar10
(
input
,
depth
=
32
,
program
=
None
,
init_program
=
None
):
...
...
@@ -124,7 +123,7 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
return
pool
def
vgg16_bn_drop
(
input
,
program
,
init_program
):
def
vgg16_bn_drop
(
input
,
program
=
None
,
init_program
=
None
):
def
conv_block
(
input
,
num_filter
,
groups
,
...
...
@@ -155,6 +154,7 @@ def vgg16_bn_drop(input, program, init_program):
fc1
=
layers
.
fc
(
input
=
drop
,
size
=
512
,
act
=
None
,
param_attr
=
{
"initializer"
:
XavierInitializer
()},
program
=
program
,
init_program
=
init_program
)
reshape1
=
layers
.
reshape
(
...
...
@@ -169,46 +169,34 @@ def vgg16_bn_drop(input, program, init_program):
fc2
=
layers
.
fc
(
input
=
drop2
,
size
=
512
,
act
=
None
,
param_attr
=
{
"initializer"
:
XavierInitializer
()},
program
=
program
,
init_program
=
init_program
)
return
fc2
init_program
=
Program
()
program
=
Program
()
classdim
=
10
data_shape
=
[
3
,
32
,
32
]
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
data_type
=
'float32'
,
program
=
program
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int64'
,
program
=
program
,
init_program
=
init_program
)
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
data_type
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int64'
)
# Add neural network config
# option 1. resnet
net
=
resnet_cifar10
(
images
,
32
,
program
,
init_program
)
# net = resnet_cifar10(images, 32
)
# option 2. vgg
# net = vgg16_bn_drop(images, program, init_program
)
net
=
vgg16_bn_drop
(
images
)
# print(program)
predict
=
layers
.
fc
(
input
=
net
,
size
=
classdim
,
act
=
'softmax'
,
program
=
program
,
init_program
=
init_program
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
,
program
=
program
,
init_program
=
init_program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
program
=
program
,
init_program
=
init_program
)
predict
=
layers
.
fc
(
input
=
net
,
size
=
classdim
,
act
=
'softmax'
)
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
layers
.
mean
(
x
=
cost
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
sgd_optimizer
=
optimizer
.
SGDOptimizer
(
learning_rate
=
0.001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
,
init_program
)
# optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
optimizer
=
optimizer
.
AdamOptimizer
(
learning_rate
=
0.001
)
opts
=
optimizer
.
minimize
(
avg_cost
)
BATCH_SIZE
=
128
PASS_NUM
=
1
...
...
@@ -221,7 +209,7 @@ train_reader = paddle.batch(
place
=
core
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
.
run
(
init_program
,
feed
=
{},
fetch_list
=
[])
exe
.
run
(
g_
init_program
,
feed
=
{},
fetch_list
=
[])
for
pass_id
in
range
(
PASS_NUM
):
batch_id
=
0
...
...
@@ -239,14 +227,15 @@ for pass_id in range(PASS_NUM):
tensor_img
.
set
(
img_data
,
place
)
tensor_y
.
set
(
y_data
,
place
)
outs
=
exe
.
run
(
program
,
outs
=
exe
.
run
(
g_
program
,
feed
=
{
"pixel"
:
tensor_img
,
"label"
:
tensor_y
},
fetch_list
=
[
avg_cost
])
fetch_list
=
[
avg_cost
,
accuracy
])
loss
=
np
.
array
(
outs
[
0
])
acc
=
np
.
array
(
outs
[
1
])
print
(
"pass_id:"
+
str
(
pass_id
)
+
" batch_id:"
+
str
(
batch_id
)
+
" loss:"
+
str
(
loss
))
" loss:"
+
str
(
loss
)
+
" acc:"
+
str
(
acc
)
)
batch_id
=
batch_id
+
1
if
batch_id
>
1
:
...
...
python/paddle/v2/framework/tests/test_lod_rank_table.py
0 → 100644
浏览文件 @
c4f7f3a5
from
paddle.v2.framework.layers
import
lod_rank_table
,
data
from
paddle.v2.framework.executor
import
Executor
from
paddle.v2.framework.framework
import
g_program
import
paddle.v2.framework.core
as
core
import
numpy
import
unittest
class
TestLoDRankTable
(
unittest
.
TestCase
):
def
test_lod_rank_table
(
self
):
x
=
data
(
name
=
'x'
,
shape
=
[
100
])
cpu
=
core
.
CPUPlace
()
rank_table
=
lod_rank_table
(
x
=
x
,
level
=
1
)
rank_table
.
persistable
=
True
exe
=
Executor
(
cpu
)
scope
=
core
.
Scope
()
tensor
=
core
.
LoDTensor
()
tensor
.
set
(
numpy
.
random
.
random
(
size
=
(
17
,
100
)),
cpu
)
tensor
.
set_lod
([[
0
,
1
,
3
],
[
0
,
5
,
6
,
7
],
[
0
,
3
,
4
,
9
,
10
,
13
,
16
,
17
]])
exe
.
run
(
g_program
,
scope
=
scope
,
feed
=
{
'x'
:
tensor
})
var
=
scope
.
find_var
(
rank_table
.
name
)
table
=
var
.
get_lod_rank_table
()
self
.
assertEqual
([(
0
,
5
),
(
1
,
1
),
(
2
,
1
)],
table
.
items
())
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_recognize_digits_mlp.py
浏览文件 @
c4f7f3a5
...
...
@@ -57,6 +57,8 @@ label = layers.data(
cost
=
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
,
program
=
program
,
init_program
=
init_program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
program
=
program
,
init_program
=
init_program
)
accuracy
=
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
program
=
program
,
init_program
=
init_program
)
optimizer
=
optimizer
.
MomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
opts
=
optimizer
.
minimize
(
avg_cost
,
init_program
)
...
...
@@ -87,9 +89,9 @@ for pass_id in range(PASS_NUM):
outs
=
exe
.
run
(
program
,
feed
=
{
'x'
:
tensor_x
,
'y'
:
tensor_y
},
fetch_list
=
[
avg_cost
])
fetch_list
=
[
avg_cost
,
accuracy
])
out
=
np
.
array
(
outs
[
0
])
acc
=
np
.
array
(
outs
[
1
])
if
out
[
0
]
<
5.0
:
exit
(
0
)
# if avg cost less than 5.0, we think our code is good.
exit
(
1
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录