Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
61d6db56
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
61d6db56
编写于
1月 22, 2018
作者:
Y
yangyaming
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix-7555
上级
41028f57
29603cf3
变更
29
隐藏空白更改
内联
并排
Showing
29 changed file
with
997 addition
and
129 deletion
+997
-129
doc/faq/local/index_cn.rst
doc/faq/local/index_cn.rst
+46
-0
paddle/framework/executor.cc
paddle/framework/executor.cc
+11
-0
paddle/framework/scope.cc
paddle/framework/scope.cc
+10
-2
paddle/operators/compare_op.cc
paddle/operators/compare_op.cc
+5
-6
paddle/operators/compare_op.cu
paddle/operators/compare_op.cu
+0
-4
paddle/operators/compare_op.h
paddle/operators/compare_op.h
+2
-20
paddle/operators/elementwise_op_function.h
paddle/operators/elementwise_op_function.h
+8
-6
python/paddle/v2/dataset/__init__.py
python/paddle/v2/dataset/__init__.py
+14
-2
python/paddle/v2/dataset/common.py
python/paddle/v2/dataset/common.py
+15
-6
python/paddle/v2/dataset/tests/wmt16_test.py
python/paddle/v2/dataset/tests/wmt16_test.py
+66
-0
python/paddle/v2/dataset/wmt14.py
python/paddle/v2/dataset/wmt14.py
+19
-11
python/paddle/v2/dataset/wmt16.py
python/paddle/v2/dataset/wmt16.py
+348
-0
python/paddle/v2/fluid/__init__.py
python/paddle/v2/fluid/__init__.py
+3
-1
python/paddle/v2/fluid/clip.py
python/paddle/v2/fluid/clip.py
+82
-7
python/paddle/v2/fluid/framework.py
python/paddle/v2/fluid/framework.py
+2
-2
python/paddle/v2/fluid/layers/control_flow.py
python/paddle/v2/fluid/layers/control_flow.py
+32
-13
python/paddle/v2/fluid/layers/device.py
python/paddle/v2/fluid/layers/device.py
+3
-3
python/paddle/v2/fluid/layers/layer_function_generator.py
python/paddle/v2/fluid/layers/layer_function_generator.py
+16
-11
python/paddle/v2/fluid/layers/ops.py
python/paddle/v2/fluid/layers/ops.py
+6
-17
python/paddle/v2/fluid/param_attr.py
python/paddle/v2/fluid/param_attr.py
+3
-3
python/paddle/v2/fluid/tests/CMakeLists.txt
python/paddle/v2/fluid/tests/CMakeLists.txt
+1
-0
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
...n/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+1
-1
python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt
...le/v2/fluid/tests/book_memory_optimization/CMakeLists.txt
+11
-0
python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
.../tests/book_memory_optimization/test_memopt_fit_a_line.py
+58
-0
python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py
...ry_optimization/test_memopt_image_classification_train.py
+147
-0
python/paddle/v2/fluid/tests/test_compare_op.py
python/paddle/v2/fluid/tests/test_compare_op.py
+0
-2
python/paddle/v2/fluid/tests/test_error_clip.py
python/paddle/v2/fluid/tests/test_error_clip.py
+0
-0
python/paddle/v2/fluid/tests/test_gradient_clip.py
python/paddle/v2/fluid/tests/test_gradient_clip.py
+81
-0
python/paddle/v2/fluid/tests/test_registry.py
python/paddle/v2/fluid/tests/test_registry.py
+7
-12
未找到文件。
doc/faq/local/index_cn.rst
浏览文件 @
61d6db56
...
...
@@ -211,3 +211,49 @@ decoder_inputs = paddle.layer.fc(
* list 中元素的个数等于网络中输出层的个数;
* list 中每个元素是一个layer的输出结果矩阵,类型是numpy的ndarray;
* 每一个layer输出矩阵的高度,在非序列输入时:等于样本数;序列输入时等于:输入序列中元素的总数;宽度等于配置中layer的size;
6. 如何在训练过程中获得某一个layer的output
-----------------------------------------------
可以在event_handler中,通过 :code:`event.gm.getLayerOutputs("layer_name")` 获得在模型配置中某一层的name :code:`layer_name` 在当前
mini-batch forward的output的值。获得的值类型均为 :code:`numpy.ndarray` ,可以通过这个输出来完成自定义的评估指标计算等功能。例如下面代码:
.. code-block:: python
def score_diff(right_score, left_score):
return np.average(np.abs(right_score - left_score))
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 25 == 0:
diff = score_diff(
event.gm.getLayerOutputs("right_score")["right_score"][
"value"],
event.gm.getLayerOutputs("left_score")["left_score"][
"value"])
logger.info(("Pass %d Batch %d : Cost %.6f, "
"average absolute diff scores: %.6f") %
(event.pass_id, event.batch_id, event.cost, diff))
注意:此方法不能获取 :code:`paddle.layer.recurrent_group` 里step的内容,但可以获取 :code:`paddle.layer.recurrent_group` 的输出。
7. 如何在训练过程中获得参数的权重和梯度
-----------------------------------------------
在某些情况下,获得当前mini-batch的权重(或称作weights, parameters)有助于在训练时观察具体数值,方便排查以及快速定位问题。
可以通过在 :code:`event_handler` 中打印其值(注意,需要使用 :code:`paddle.event.EndForwardBackward` 保证使用GPU训练时也可以获得),
示例代码如下:
.. code-block:: python
...
parameters = paddle.parameters.create(cost)
...
def event_handler(event):
if isinstance(event, paddle.event.EndForwardBackward):
if event.batch_id % 25 == 0:
for p in parameters.keys():
logger.info("Param %s, Grad %s",
parameters.get(p), parameters.get_grad(p))
注意:“在训练过程中获得某一个layer的output”和“在训练过程中获得参数的权重和梯度”都会造成训练中的数据从C++拷贝到numpy,会对训练性能造成影响。不要在注重性能的训练场景下使用。
\ No newline at end of file
paddle/framework/executor.cc
浏览文件 @
61d6db56
...
...
@@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/platform/place.h"
DECLARE_bool
(
do_memory_benchmark
);
DEFINE_bool
(
check_nan_inf
,
false
,
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely."
);
...
...
@@ -117,6 +118,10 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
VLOG
(
3
)
<<
op
->
DebugStringEx
(
local_scope
);
op
->
Run
(
*
local_scope
,
place_
);
if
(
FLAGS_do_memory_benchmark
)
{
VLOG
(
2
)
<<
"Memory used after operator "
+
op
->
Type
()
+
" running: "
<<
memory
::
memory_usage
(
place_
);
}
if
(
FLAGS_check_nan_inf
)
{
for
(
auto
&
vname
:
op
->
OutputVars
(
true
))
{
auto
*
var
=
local_scope
->
FindVar
(
vname
);
...
...
@@ -130,6 +135,12 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
if
(
create_vars
&&
create_local_scope
)
{
scope
->
DeleteScope
(
local_scope
);
}
if
(
FLAGS_do_memory_benchmark
)
{
VLOG
(
2
)
<<
"-------------------------------------------------------"
;
VLOG
(
2
)
<<
"Memory used after deleting local scope: "
<<
memory
::
memory_usage
(
place_
);
VLOG
(
2
)
<<
"-------------------------------------------------------"
;
}
}
}
// namespace framework
...
...
paddle/framework/scope.cc
浏览文件 @
61d6db56
...
...
@@ -20,6 +20,10 @@ limitations under the License. */
#include "paddle/framework/threadpool.h"
#include "paddle/string/printf.h"
DEFINE_bool
(
do_memory_benchmark
,
false
,
"Doing memory benchmark. It will make deleting scope synchronized, "
"and add some memory usage logs"
);
namespace
paddle
{
namespace
framework
{
...
...
@@ -88,8 +92,12 @@ void Scope::DeleteScope(Scope* scope) {
auto
it
=
std
::
find
(
this
->
kids_
.
begin
(),
this
->
kids_
.
end
(),
scope
);
PADDLE_ENFORCE
(
it
!=
this
->
kids_
.
end
(),
"Cannot find %p as kid scope"
,
scope
);
this
->
kids_
.
erase
(
it
);
// Make delete async.
Async
([
scope
]
{
delete
scope
;
});
// When making memory benchmark on Fluid, we have to delete scope sync.
if
(
FLAGS_do_memory_benchmark
)
{
delete
scope
;
}
else
{
Async
([
scope
]
{
delete
scope
;
});
}
}
void
Scope
::
Rename
(
const
std
::
string
&
origin_name
,
...
...
paddle/operators/compare_op.cc
浏览文件 @
61d6db56
...
...
@@ -39,6 +39,11 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is
calculated by %s
)DOC"
,
comment
.
type
,
comment
.
equation
));
AddAttr
<
int
>
(
"axis"
,
"(int, default -1). The start dimension index "
"for broadcasting Y onto X."
)
.
SetDefault
(
-
1
)
.
EqualGreaterThan
(
-
1
);
}
};
...
...
@@ -95,11 +100,5 @@ REGISTER_LOGICAL_OP(less_than, "Out = X < Y");
REGISTER_LOGICAL_KERNEL
(
less_than
,
CPU
,
paddle
::
operators
::
LessThanFunctor
);
REGISTER_LOGICAL_OP
(
less_equal
,
"Out = X <= Y"
);
REGISTER_LOGICAL_KERNEL
(
less_equal
,
CPU
,
paddle
::
operators
::
LessEqualFunctor
);
REGISTER_LOGICAL_OP
(
greater_than
,
"Out = X > Y"
);
REGISTER_LOGICAL_KERNEL
(
greater_than
,
CPU
,
paddle
::
operators
::
GreaterThanFunctor
);
REGISTER_LOGICAL_OP
(
greater_equal
,
"Out = X >= Y"
);
REGISTER_LOGICAL_KERNEL
(
greater_equal
,
CPU
,
paddle
::
operators
::
GreaterEqualFunctor
);
REGISTER_LOGICAL_OP
(
equal
,
"Out = X == Y"
);
REGISTER_LOGICAL_KERNEL
(
equal
,
CPU
,
paddle
::
operators
::
EqualFunctor
);
paddle/operators/compare_op.cu
浏览文件 @
61d6db56
...
...
@@ -16,8 +16,4 @@ limitations under the License. */
REGISTER_LOGICAL_KERNEL
(
less_than
,
CUDA
,
paddle
::
operators
::
LessThanFunctor
);
REGISTER_LOGICAL_KERNEL
(
less_equal
,
CUDA
,
paddle
::
operators
::
LessEqualFunctor
);
REGISTER_LOGICAL_KERNEL
(
greater_than
,
CUDA
,
paddle
::
operators
::
GreaterThanFunctor
);
REGISTER_LOGICAL_KERNEL
(
greater_equal
,
CUDA
,
paddle
::
operators
::
GreaterEqualFunctor
);
REGISTER_LOGICAL_KERNEL
(
equal
,
CUDA
,
paddle
::
operators
::
EqualFunctor
);
paddle/operators/compare_op.h
浏览文件 @
61d6db56
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <math.h>
#include <type_traits>
#include "paddle/framework/op_registry.h"
#include "paddle/operators/elementwise_op_function.h"
#include "paddle/platform/transform.h"
namespace
paddle
{
...
...
@@ -33,18 +34,6 @@ struct LessEqualFunctor {
HOSTDEVICE
bool
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
a
<=
b
;
}
};
template
<
typename
T
>
struct
GreaterThanFunctor
{
using
ELEM_TYPE
=
T
;
HOSTDEVICE
bool
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
a
>
b
;
}
};
template
<
typename
T
>
struct
GreaterEqualFunctor
{
using
ELEM_TYPE
=
T
;
HOSTDEVICE
bool
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
a
>=
b
;
}
};
template
<
typename
T
>
struct
EqualFunctor
{
using
ELEM_TYPE
=
T
;
...
...
@@ -65,14 +54,7 @@ class CompareOpKernel
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
using
T
=
typename
Functor
::
ELEM_TYPE
;
auto
*
x
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
y
=
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
Functor
binary_func
;
platform
::
Transform
<
DeviceContext
>
trans
;
trans
(
context
.
template
device_context
<
DeviceContext
>(),
x
->
data
<
T
>
(),
x
->
data
<
T
>
()
+
x
->
numel
(),
y
->
data
<
T
>
(),
out
->
mutable_data
<
bool
>
(
context
.
GetPlace
()),
binary_func
);
ElementwiseComputeEx
<
Functor
,
DeviceContext
,
T
,
bool
>
(
context
);
}
};
...
...
paddle/operators/elementwise_op_function.h
浏览文件 @
61d6db56
...
...
@@ -176,14 +176,15 @@ class MidWiseTransformIterator<T, platform::CUDADeviceContext>
};
#endif
template
<
typename
Functor
,
typename
T
,
typename
DeviceContext
>
template
<
typename
Functor
,
typename
T
,
typename
DeviceContext
,
typename
OutType
=
T
>
class
TransformFunctor
{
public:
TransformFunctor
(
const
framework
::
Tensor
*
x
,
const
framework
::
Tensor
*
y
,
framework
::
Tensor
*
z
,
const
DeviceContext
&
ctx
,
Functor
func
)
:
x_
(
x
->
data
<
T
>
()),
y_
(
y
->
data
<
T
>
()),
z_
(
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())),
z_
(
z
->
mutable_data
<
OutType
>
(
ctx
.
GetPlace
())),
nx_
(
x
->
numel
()),
ctx_
(
ctx
),
func_
(
func
)
{}
...
...
@@ -208,7 +209,7 @@ class TransformFunctor {
private:
const
T
*
x_
;
const
T
*
y_
;
T
*
z_
;
OutType
*
z_
;
int64_t
nx_
;
const
DeviceContext
&
ctx_
;
Functor
func_
;
...
...
@@ -364,15 +365,16 @@ void ElementwiseGradCompute(const framework::ExecutionContext& ctx) {
}
}
template
<
typename
Functor
,
typename
DeviceContext
,
typename
T
>
template
<
typename
Functor
,
typename
DeviceContext
,
typename
T
,
typename
OutType
=
T
>
void
ElementwiseComputeEx
(
const
framework
::
ExecutionContext
&
ctx
)
{
using
Tensor
=
framework
::
Tensor
;
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
z
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
TransformFunctor
<
Functor
,
T
,
DeviceContext
>
functor
(
z
->
mutable_data
<
OutType
>
(
ctx
.
GetPlace
());
TransformFunctor
<
Functor
,
T
,
DeviceContext
,
OutType
>
functor
(
x
,
y
,
z
,
ctx
.
template
device_context
<
DeviceContext
>(),
Functor
());
auto
x_dims
=
x
->
dims
();
...
...
python/paddle/v2/dataset/__init__.py
浏览文件 @
61d6db56
...
...
@@ -24,11 +24,23 @@ import conll05
import
uci_housing
import
sentiment
import
wmt14
import
wmt16
import
mq2007
import
flowers
import
voc2012
__all__
=
[
'mnist'
,
'imikolov'
,
'imdb'
,
'cifar'
,
'movielens'
,
'conll05'
,
'sentiment'
'uci_housing'
,
'wmt14'
,
'mq2007'
,
'flowers'
,
'voc2012'
'mnist'
,
'imikolov'
,
'imdb'
,
'cifar'
,
'movielens'
,
'conll05'
,
'sentiment'
'uci_housing'
,
'wmt14'
,
'wmt16'
,
'mq2007'
,
'flowers'
,
'voc2012'
,
]
python/paddle/v2/dataset/common.py
浏览文件 @
61d6db56
...
...
@@ -25,8 +25,12 @@ import glob
import
cPickle
as
pickle
__all__
=
[
'DATA_HOME'
,
'download'
,
'md5file'
,
'split'
,
'cluster_files_reader'
,
'convert'
'DATA_HOME'
,
'download'
,
'md5file'
,
'split'
,
'cluster_files_reader'
,
'convert'
,
]
DATA_HOME
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset'
)
...
...
@@ -58,12 +62,15 @@ def md5file(fname):
return
hash_md5
.
hexdigest
()
def
download
(
url
,
module_name
,
md5sum
):
def
download
(
url
,
module_name
,
md5sum
,
save_name
=
None
):
dirname
=
os
.
path
.
join
(
DATA_HOME
,
module_name
)
if
not
os
.
path
.
exists
(
dirname
):
os
.
makedirs
(
dirname
)
filename
=
os
.
path
.
join
(
dirname
,
url
.
split
(
'/'
)[
-
1
])
filename
=
os
.
path
.
join
(
dirname
,
url
.
split
(
'/'
)[
-
1
]
if
save_name
is
None
else
save_name
)
retry
=
0
retry_limit
=
3
while
not
(
os
.
path
.
exists
(
filename
)
and
md5file
(
filename
)
==
md5sum
):
...
...
@@ -196,9 +203,11 @@ def convert(output_path, reader, line_count, name_prefix):
Convert data from reader to recordio format files.
:param output_path: directory in which output files will be saved.
:param reader: a data reader, from which the convert program will read data instances.
:param reader: a data reader, from which the convert program will read
data instances.
:param name_prefix: the name prefix of generated files.
:param max_lines_to_shuffle: the max lines numbers to shuffle before writing.
:param max_lines_to_shuffle: the max lines numbers to shuffle before
writing.
"""
assert
line_count
>=
1
...
...
python/paddle/v2/dataset/tests/wmt16_test.py
0 → 100644
浏览文件 @
61d6db56
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.v2.dataset.wmt16
import
unittest
class
TestWMT16
(
unittest
.
TestCase
):
def
checkout_one_sample
(
self
,
sample
):
# train data has 3 field: source language word indices,
# target language word indices, and target next word indices.
self
.
assertEqual
(
len
(
sample
),
3
)
# test start mark and end mark in source word indices.
self
.
assertEqual
(
sample
[
0
][
0
],
0
)
self
.
assertEqual
(
sample
[
0
][
-
1
],
1
)
# test start mask in target word indices
self
.
assertEqual
(
sample
[
1
][
0
],
0
)
# test en mask in target next word indices
self
.
assertEqual
(
sample
[
2
][
-
1
],
1
)
def
test_train
(
self
):
for
idx
,
sample
in
enumerate
(
paddle
.
v2
.
dataset
.
wmt16
.
train
(
src_dict_size
=
100000
,
trg_dict_size
=
100000
)()):
if
idx
>=
10
:
break
self
.
checkout_one_sample
(
sample
)
def
test_test
(
self
):
for
idx
,
sample
in
enumerate
(
paddle
.
v2
.
dataset
.
wmt16
.
test
(
src_dict_size
=
1000
,
trg_dict_size
=
1000
)()):
if
idx
>=
10
:
break
self
.
checkout_one_sample
(
sample
)
def
test_val
(
self
):
for
idx
,
sample
in
enumerate
(
paddle
.
v2
.
dataset
.
wmt16
.
validation
(
src_dict_size
=
1000
,
trg_dict_size
=
1000
)()):
if
idx
>=
10
:
break
self
.
checkout_one_sample
(
sample
)
def
test_get_dict
(
self
):
dict_size
=
1000
word_dict
=
paddle
.
v2
.
dataset
.
wmt16
.
get_dict
(
"en"
,
dict_size
,
True
)
self
.
assertEqual
(
len
(
word_dict
),
dict_size
)
self
.
assertEqual
(
word_dict
[
0
],
"<s>"
)
self
.
assertEqual
(
word_dict
[
1
],
"<e>"
)
self
.
assertEqual
(
word_dict
[
2
],
"<unk>"
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/v2/dataset/wmt14.py
浏览文件 @
61d6db56
...
...
@@ -25,12 +25,20 @@ import gzip
import
paddle.v2.dataset.common
from
paddle.v2.parameters
import
Parameters
__all__
=
[
'train'
,
'test'
,
'build_dict'
,
'convert'
]
URL_DEV_TEST
=
'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
__all__
=
[
'train'
,
'test'
,
'get_dict'
,
'convert'
,
]
URL_DEV_TEST
=
(
'http://www-lium.univ-lemans.fr/~schwenk/'
'cslm_joint_paper/data/dev+test.tgz'
)
MD5_DEV_TEST
=
'7d7897317ddd8ba0ae5c5fa7248d3ff5'
# this is a small set of data for test. The original data is too large and will be add later.
URL_TRAIN
=
'http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
# this is a small set of data for test. The original data is too large and
# will be add later.
URL_TRAIN
=
(
'http://paddlepaddle.cdn.bcebos.com/demo/'
'wmt_shrinked_data/wmt14.tgz'
)
MD5_TRAIN
=
'0791583d57d5beb693b9414c5b36798c'
# BLEU of this trained model is 26.92
URL_MODEL
=
'http://paddlepaddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.gz'
...
...
@@ -42,8 +50,8 @@ UNK = "<unk>"
UNK_IDX
=
2
def
__read_to_dict
__
(
tar_file
,
dict_size
):
def
__to_dict
__
(
fd
,
size
):
def
__read_to_dict
(
tar_file
,
dict_size
):
def
__to_dict
(
fd
,
size
):
out_dict
=
dict
()
for
line_count
,
line
in
enumerate
(
fd
):
if
line_count
<
size
:
...
...
@@ -58,19 +66,19 @@ def __read_to_dict__(tar_file, dict_size):
if
each_item
.
name
.
endswith
(
"src.dict"
)
]
assert
len
(
names
)
==
1
src_dict
=
__to_dict
__
(
f
.
extractfile
(
names
[
0
]),
dict_size
)
src_dict
=
__to_dict
(
f
.
extractfile
(
names
[
0
]),
dict_size
)
names
=
[
each_item
.
name
for
each_item
in
f
if
each_item
.
name
.
endswith
(
"trg.dict"
)
]
assert
len
(
names
)
==
1
trg_dict
=
__to_dict
__
(
f
.
extractfile
(
names
[
0
]),
dict_size
)
trg_dict
=
__to_dict
(
f
.
extractfile
(
names
[
0
]),
dict_size
)
return
src_dict
,
trg_dict
def
reader_creator
(
tar_file
,
file_name
,
dict_size
):
def
reader
():
src_dict
,
trg_dict
=
__read_to_dict
__
(
tar_file
,
dict_size
)
src_dict
,
trg_dict
=
__read_to_dict
(
tar_file
,
dict_size
)
with
tarfile
.
open
(
tar_file
,
mode
=
'r'
)
as
f
:
names
=
[
each_item
.
name
for
each_item
in
f
...
...
@@ -152,7 +160,7 @@ def get_dict(dict_size, reverse=True):
# if reverse = False, return dict = {'a':'001', 'b':'002', ...}
# else reverse = true, return dict = {'001':'a', '002':'b', ...}
tar_file
=
paddle
.
v2
.
dataset
.
common
.
download
(
URL_TRAIN
,
'wmt14'
,
MD5_TRAIN
)
src_dict
,
trg_dict
=
__read_to_dict
__
(
tar_file
,
dict_size
)
src_dict
,
trg_dict
=
__read_to_dict
(
tar_file
,
dict_size
)
if
reverse
:
src_dict
=
{
v
:
k
for
k
,
v
in
src_dict
.
items
()}
trg_dict
=
{
v
:
k
for
k
,
v
in
trg_dict
.
items
()}
...
...
python/paddle/v2/dataset/wmt16.py
0 → 100644
浏览文件 @
61d6db56
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
ACL2016 Multimodal Machine Translation. Please see this website for more
details: http://www.statmt.org/wmt16/multimodal-task.html#task1
If you use the dataset created for your task, please cite the following paper:
Multi30K: Multilingual English-German Image Descriptions.
@article{elliott-EtAl:2016:VL16,
author = {{Elliott}, D. and {Frank}, S. and {Sima"an}, K. and {Specia}, L.},
title = {Multi30K: Multilingual English-German Image Descriptions},
booktitle = {Proceedings of the 6th Workshop on Vision and Language},
year = {2016},
pages = {70--74},
year = 2016
}
"""
import
os
import
tarfile
import
gzip
from
collections
import
defaultdict
import
paddle.v2.dataset.common
__all__
=
[
"train"
,
"test"
,
"validation"
,
"convert"
,
"fetch"
,
"get_dict"
,
]
DATA_URL
=
(
"http://cloud.dlnel.org/filepub/"
"?uuid=46a0808e-ddd8-427c-bacd-0dbc6d045fed"
)
DATA_MD5
=
"0c38be43600334966403524a40dcd81e"
TOTAL_EN_WORDS
=
11250
TOTAL_DE_WORDS
=
19220
START_MARK
=
"<s>"
END_MARK
=
"<e>"
UNK_MARK
=
"<unk>"
def
__build_dict
(
tar_file
,
dict_size
,
save_path
,
lang
):
word_dict
=
defaultdict
(
int
)
with
tarfile
.
open
(
tar_file
,
mode
=
"r"
)
as
f
:
for
line
in
f
.
extractfile
(
"wmt16/train"
):
line_split
=
line
.
strip
().
split
(
"
\t
"
)
if
len
(
line_split
)
!=
2
:
continue
sen
=
line_split
[
0
]
if
lang
==
"en"
else
line_split
[
1
]
for
w
in
sen
.
split
():
word_dict
[
w
]
+=
1
with
open
(
save_path
,
"w"
)
as
fout
:
fout
.
write
(
"%s
\n
%s
\n
%s
\n
"
%
(
START_MARK
,
END_MARK
,
UNK_MARK
))
for
idx
,
word
in
enumerate
(
sorted
(
word_dict
.
iteritems
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)):
if
idx
+
3
==
dict_size
:
break
fout
.
write
(
"%s
\n
"
%
(
word
[
0
]))
def
__load_dict
(
tar_file
,
dict_size
,
lang
,
reverse
=
False
):
dict_path
=
os
.
path
.
join
(
paddle
.
v2
.
dataset
.
common
.
DATA_HOME
,
"wmt16/%s_%d.dict"
%
(
lang
,
dict_size
))
if
not
os
.
path
.
exists
(
dict_path
)
or
(
len
(
open
(
dict_path
,
"r"
).
readlines
())
!=
dict_size
):
__build_dict
(
tar_file
,
dict_size
,
dict_path
,
lang
)
word_dict
=
{}
with
open
(
dict_path
,
"r"
)
as
fdict
:
for
idx
,
line
in
enumerate
(
fdict
):
if
reverse
:
word_dict
[
idx
]
=
line
.
strip
()
else
:
word_dict
[
line
.
strip
()]
=
idx
return
word_dict
def
__get_dict_size
(
src_dict_size
,
trg_dict_size
,
src_lang
):
src_dict_size
=
min
(
src_dict_size
,
(
TOTAL_EN_WORDS
if
src_lang
==
"en"
else
TOTAL_DE_WORDS
))
trg_dict_size
=
min
(
trg_dict_size
,
(
TOTAL_DE_WORDS
if
src_lang
==
"en"
else
TOTAL_ENG_WORDS
))
return
src_dict_size
,
trg_dict_size
def
reader_creator
(
tar_file
,
file_name
,
src_dict_size
,
trg_dict_size
,
src_lang
):
def
reader
():
src_dict
=
__load_dict
(
tar_file
,
src_dict_size
,
src_lang
)
trg_dict
=
__load_dict
(
tar_file
,
trg_dict_size
,
(
"de"
if
src_lang
==
"en"
else
"en"
))
# the indice for start mark, end mark, and unk are the same in source
# language and target language. Here uses the source language
# dictionary to determine their indices.
start_id
=
src_dict
[
START_MARK
]
end_id
=
src_dict
[
END_MARK
]
unk_id
=
src_dict
[
UNK_MARK
]
src_col
=
0
if
src_lang
==
"en"
else
1
trg_col
=
1
-
src_col
with
tarfile
.
open
(
tar_file
,
mode
=
"r"
)
as
f
:
for
line
in
f
.
extractfile
(
file_name
):
line_split
=
line
.
strip
().
split
(
"
\t
"
)
if
len
(
line_split
)
!=
2
:
continue
src_words
=
line_split
[
src_col
].
split
()
src_ids
=
[
start_id
]
+
[
src_dict
.
get
(
w
,
unk_id
)
for
w
in
src_words
]
+
[
end_id
]
trg_words
=
line_split
[
trg_col
].
split
()
trg_ids
=
[
trg_dict
.
get
(
w
,
unk_id
)
for
w
in
trg_words
]
trg_ids_next
=
trg_ids
+
[
end_id
]
trg_ids
=
[
start_id
]
+
trg_ids
yield
src_ids
,
trg_ids
,
trg_ids_next
return
reader
def
train
(
src_dict_size
,
trg_dict_size
,
src_lang
=
"en"
):
"""
WMT16 train set reader.
This function returns the reader for train data. Each sample the reader
returns is made up of three fields: the source language word index sequence,
target language word index sequence and next word index sequence.
NOTE:
The original like for training data is:
http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz
paddle.dataset.wmt16 provides a tokenized version of the original dataset by
using moses's tokenization script:
https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
Args:
src_dict_size(int): Size of the source language dictionary. Three
special tokens will be added into the dictionary:
<s> for start mark, <e> for end mark, and <unk> for
unknown word.
trg_dict_size(int): Size of the target language dictionary. Three
special tokens will be added into the dictionary:
<s> for start mark, <e> for end mark, and <unk> for
unknown word.
src_lang(string): A string indicating which language is the source
language. Available options are: "en" for English
and "de" for Germany.
Returns:
callable: The train reader.
"""
assert
(
src_lang
in
[
"en"
,
"de"
],
(
"An error language type. Only support: "
"en (for English); de(for Germany)"
))
src_dict_size
,
trg_dict_size
=
__get_dict_size
(
src_dict_size
,
trg_dict_size
,
src_lang
)
return
reader_creator
(
tar_file
=
paddle
.
v2
.
dataset
.
common
.
download
(
DATA_URL
,
"wmt16"
,
DATA_MD5
,
"wmt16.tar.gz"
),
file_name
=
"wmt16/train"
,
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
)
def
test
(
src_dict_size
,
trg_dict_size
,
src_lang
=
"en"
):
"""
WMT16 test set reader.
This function returns the reader for test data. Each sample the reader
returns is made up of three fields: the source language word index sequence,
target language word index sequence and next word index sequence.
NOTE:
The original like for test data is:
http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/mmt16_task1_test.tar.gz
paddle.dataset.wmt16 provides a tokenized version of the original dataset by
using moses's tokenization script:
https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
Args:
src_dict_size(int): Size of the source language dictionary. Three
special tokens will be added into the dictionary:
<s> for start mark, <e> for end mark, and <unk> for
unknown word.
trg_dict_size(int): Size of the target language dictionary. Three
special tokens will be added into the dictionary:
<s> for start mark, <e> for end mark, and <unk> for
unknown word.
src_lang(string): A string indicating which language is the source
language. Available options are: "en" for English
and "de" for Germany.
Returns:
callable: The test reader.
"""
assert
(
src_lang
in
[
"en"
,
"de"
],
(
"An error language type. "
"Only support: en (for English); de(for Germany)"
))
src_dict_size
,
trg_dict_size
=
__get_dict_size
(
src_dict_size
,
trg_dict_size
,
src_lang
)
return
reader_creator
(
tar_file
=
paddle
.
v2
.
dataset
.
common
.
download
(
DATA_URL
,
"wmt16"
,
DATA_MD5
,
"wmt16.tar.gz"
),
file_name
=
"wmt16/test"
,
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
)
def
validation
(
src_dict_size
,
trg_dict_size
,
src_lang
=
"en"
):
"""
WMT16 validation set reader.
This function returns the reader for validation data. Each sample the reader
returns is made up of three fields: the source language word index sequence,
target language word index sequence and next word index sequence.
NOTE:
The original like for validation data is:
http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz
paddle.dataset.wmt16 provides a tokenized version of the original dataset by
using moses's tokenization script:
https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
Args:
src_dict_size(int): Size of the source language dictionary. Three
special tokens will be added into the dictionary:
<s> for start mark, <e> for end mark, and <unk> for
unknown word.
trg_dict_size(int): Size of the target language dictionary. Three
special tokens will be added into the dictionary:
<s> for start mark, <e> for end mark, and <unk> for
unknown word.
src_lang(string): A string indicating which language is the source
language. Available options are: "en" for English
and "de" for Germany.
Returns:
callable: The validation reader.
"""
assert
(
src_lang
in
[
"en"
,
"de"
],
(
"An error language type. "
"Only support: en (for English); de(for Germany)"
))
src_dict_size
,
trg_dict_size
=
__get_dict_size
(
src_dict_size
,
trg_dict_size
,
src_lang
)
return
reader_creator
(
tar_file
=
paddle
.
v2
.
dataset
.
common
.
download
(
DATA_URL
,
"wmt16"
,
DATA_MD5
,
"wmt16.tar.gz"
),
file_name
=
"wmt16/val"
,
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
)
def
get_dict
(
lang
,
dict_size
,
reverse
=
False
):
"""
return the word dictionary for the specified language.
Args:
lang(string): A string indicating which language is the source
language. Available options are: "en" for English
and "de" for Germany.
dict_size(int): Size of the specified language dictionary.
reverse(bool): If reverse is set to False, the returned python
dictionary will use word as key and use index as value.
If reverse is set to True, the returned python
dictionary will use index as key and word as value.
Returns:
dict: The word dictionary for the specific language.
"""
if
lang
==
"en"
:
dict_size
=
min
(
dict_size
,
TOTAL_EN_WORDS
)
else
:
dict_size
=
min
(
dict_size
,
TOTAL_DE_WORDS
)
dict_path
=
os
.
path
.
join
(
paddle
.
v2
.
dataset
.
common
.
DATA_HOME
,
"wmt16/%s_%d.dict"
%
(
lang
,
dict_size
))
assert
(
os
.
path
.
exists
(
dict_path
),
"Word dictionary does not exist. "
"Please invoke paddle.dataset.wmt16.train/test/validation "
"first to build the dictionary."
)
tar_file
=
os
.
path
.
join
(
paddle
.
v2
.
dataset
.
common
.
DATA_HOME
,
"wmt16.tar.gz"
)
return
__load_dict
(
tar_file
,
dict_size
,
lang
,
reverse
)
def
fetch
():
"""download the entire dataset.
"""
paddle
.
v4
.
dataset
.
common
.
download
(
DATA_URL
,
"wmt16"
,
DATA_MD5
,
"wmt16.tar.gz"
)
def
convert
(
path
,
src_dict_size
,
trg_dict_size
,
src_lang
):
"""Converts dataset to recordio format.
"""
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
train
(
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
),
1000
,
"wmt16_train"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
test
(
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
),
1000
,
"wmt16_test"
)
paddle
.
v2
.
dataset
.
common
.
convert
(
path
,
validation
(
src_dict_size
=
src_dict_size
,
trg_dict_size
=
trg_dict_size
,
src_lang
=
src_lang
),
1000
,
"wmt16_validation"
)
python/paddle/v2/fluid/__init__.py
浏览文件 @
61d6db56
...
...
@@ -86,7 +86,9 @@ def __bootstrap__():
os
.
environ
[
'OMP_NUM_THREADS'
]
=
str
(
num_threads
)
read_env_flags
=
[
'use_pinned_memory'
,
'check_nan_inf'
]
read_env_flags
=
[
'use_pinned_memory'
,
'check_nan_inf'
,
'do_memory_benchmark'
]
if
core
.
is_compile_gpu
():
read_env_flags
+=
[
'fraction_of_gpu_memory_to_use'
,
'op_sync'
]
core
.
init_gflags
([
sys
.
argv
[
0
]]
+
...
...
python/paddle/v2/fluid/clip.py
浏览文件 @
61d6db56
...
...
@@ -14,6 +14,7 @@
import
functools
import
layers
import
framework
from
.
import
core
__all__
=
[
...
...
@@ -66,7 +67,7 @@ def error_clip_callback(block, context):
class
BaseGradientClipAttr
(
object
):
def
process_context
(
self
,
context
,
p
_g
):
def
process_context
(
self
,
context
,
p
aram
,
grad
):
raise
NotImplementedError
()
def
create_operators
(
self
,
param
,
grad
):
...
...
@@ -74,7 +75,7 @@ class BaseGradientClipAttr(object):
class
NullGradientClipAttr
(
BaseGradientClipAttr
):
def
process_context
(
self
,
context
,
p
_g
):
def
process_context
(
self
,
context
,
p
aram
,
grad
):
pass
def
create_operators
(
self
,
param
,
grad
):
...
...
@@ -91,7 +92,7 @@ class GradientClipByValue(BaseGradientClipAttr):
self
.
max
=
max
self
.
min
=
min
def
process_context
(
self
,
context
,
p
_g
):
def
process_context
(
self
,
context
,
p
aram
,
grad
):
pass
def
create_operators
(
self
,
param
,
grad
):
...
...
@@ -99,19 +100,93 @@ class GradientClipByValue(BaseGradientClipAttr):
return
param
,
new_grad
class
GradientClipByNorm
(
BaseGradientClipAttr
):
def
__init__
(
self
,
clip_norm
):
self
.
clip_norm
=
clip_norm
def
process_context
(
self
,
context
,
param
,
grad
):
pass
def
create_operators
(
self
,
param
,
grad
):
new_grad
=
layers
.
clip_by_norm
(
x
=
grad
,
max_norm
=
self
.
clip_norm
)
return
param
,
new_grad
class
GradientClipByGlobalNorm
(
BaseGradientClipAttr
):
def
__init__
(
self
,
clip_norm
,
group_name
=
"default_group"
):
if
not
isinstance
(
group_name
,
basestring
):
raise
TypeError
(
"'group_name' must be a basestring."
)
self
.
clip_norm
=
clip_norm
self
.
group_name
=
group_name
def
process_context
(
self
,
context
,
param
,
grad
):
if
self
.
group_name
not
in
context
:
context
[
self
.
group_name
]
=
[]
context
[
self
.
group_name
+
"_clip_value"
]
=
self
.
clip_norm
context
[
self
.
group_name
+
"_clip"
]
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
"float32"
,
value
=
self
.
clip_norm
)
else
:
if
not
self
.
clip_norm
==
context
[
self
.
group_name
+
"_clip_value"
]:
raise
ValueError
(
"All parameters' 'clip_norm' of a same group should be the same"
)
local_norm_var
=
layers
.
reduce_sum
(
input
=
layers
.
pow
(
x
=
grad
,
factor
=
2.0
))
context
[
self
.
group_name
].
append
(
local_norm_var
)
self
.
context
=
context
def
create_operators
(
self
,
param
,
grad
):
group_scale_name
=
self
.
group_name
+
"_scale"
if
group_scale_name
not
in
self
.
context
:
group_norm_var
=
layers
.
sums
(
input
=
self
.
context
[
self
.
group_name
])
layers
.
sqrt
(
x
=
group_norm_var
,
out
=
group_norm_var
)
clip_var
=
self
.
context
[
self
.
group_name
+
"_clip"
]
group_scale_var
=
layers
.
elementwise_div
(
x
=
clip_var
,
y
=
layers
.
elementwise_max
(
x
=
clip_var
,
y
=
group_norm_var
))
assert
group_scale_var
.
shape
==
(
1L
,
)
self
.
context
[
group_scale_name
]
=
group_scale_var
new_grad
=
layers
.
elementwise_mul
(
x
=
grad
,
y
=
self
.
context
[
group_scale_name
])
return
param
,
new_grad
def
gradient_clip_by_global_norm
(
clip_norm
,
param_list
=
None
,
group_name
=
"default_group"
,
program
=
None
):
if
program
is
None
:
program
=
framework
.
default_main_program
()
if
param_list
is
None
:
param_list
=
program
.
block
(
0
).
all_parameters
()
if
all
(
isinstance
(
elem
,
basestring
)
for
elem
in
param_list
):
param_list
=
[
program
.
block
(
0
).
var
(
elem
)
for
elem
in
param_list
]
if
not
all
(
isinstance
(
elem
,
framework
.
Parameter
)
for
elem
in
param_list
):
raise
TypeError
(
"'param_list' should be a list of Parameter or basestring(parameter's name)."
)
for
param
in
param_list
:
param
.
gradient_clip_attr
=
GradientClipByGlobalNorm
(
clip_norm
,
group_name
)
def
append_gradient_clip_ops
(
param_grad
):
context
=
dict
()
create_op_callbacks
=
[]
for
p
,
g
in
param_grad
:
clip_attr
=
getattr
(
p
,
'clip_attr'
,
NullGradientClipAttr
())
clip_attr
=
getattr
(
p
,
'
gradient_
clip_attr'
,
NullGradientClipAttr
())
if
clip_attr
is
None
:
clip_attr
=
NullGradientClipAttr
()
if
not
isinstance
(
clip_attr
,
BaseGradientClipAttr
):
raise
TypeError
(
"clip attribute should be an instance of BaseGradientClippingAttr"
)
"clip attribute should be an instance of BaseGradientClipAttr"
)
clip_attr
.
process_context
(
context
=
context
,
p
_g
=
param_grad
)
clip_attr
.
process_context
(
context
=
context
,
p
aram
=
p
,
grad
=
g
)
create_op_callbacks
.
append
(
functools
.
partial
(
clip_attr
.
create_operators
,
param
=
p
,
grad
=
g
))
...
...
python/paddle/v2/fluid/framework.py
浏览文件 @
61d6db56
...
...
@@ -780,7 +780,7 @@ class Block(object):
trainable
=
p
.
trainable
,
optimize_attr
=
p
.
optimize_attr
,
regularizer
=
p
.
regularizer
,
clip_attr
=
p
.
clip_attr
,
gradient_clip_attr
=
p
.
gradient_
clip_attr
,
error_clip
=
p
.
error_clip
,
name
=
v
.
name
)
self
.
vars
[
new_p
.
name
]
=
new_p
...
...
@@ -948,7 +948,7 @@ class Parameter(Variable):
self
.
regularizer
=
kwargs
.
get
(
'regularizer'
,
None
)
self
.
clip_attr
=
kwargs
.
get
(
'
clip_attr'
,
None
)
self
.
gradient_clip_attr
=
kwargs
.
get
(
'gradient_
clip_attr'
,
None
)
# program is a global instance.
...
...
python/paddle/v2/fluid/layers/control_flow.py
浏览文件 @
61d6db56
...
...
@@ -11,22 +11,41 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
contextlib
from
..layer_helper
import
LayerHelper
,
unique_name
from
..framework
import
Program
,
Variable
,
Operator
from
..
import
core
from
layer_function_generator
import
autodoc
from
tensor
import
assign
,
fill_constant
import
contextlib
from
..registry
import
autodoc
from
..
import
core
from
..framework
import
Program
,
Variable
,
Operator
from
..layer_helper
import
LayerHelper
,
unique_name
__all__
=
[
'split_lod_tensor'
,
'merge_lod_tensor'
,
'BlockGuard'
,
'BlockGuardWithCompletion'
,
'StaticRNNMemoryLink'
,
'WhileGuard'
,
'While'
,
'lod_rank_table'
,
'max_sequence_len'
,
'topk'
,
'lod_tensor_to_array'
,
'array_to_lod_tensor'
,
'increment'
,
'array_write'
,
'create_array'
,
'less_than'
,
'array_read'
,
'shrink_memory'
,
'array_length'
,
'IfElse'
,
'DynamicRNN'
,
'ConditionalBlock'
,
'StaticRNN'
,
'reorder_lod_tensor_by_rank'
,
'ParallelDo'
,
'Print'
'split_lod_tensor'
,
'merge_lod_tensor'
,
'BlockGuard'
,
'BlockGuardWithCompletion'
,
'StaticRNNMemoryLink'
,
'WhileGuard'
,
'While'
,
'lod_rank_table'
,
'max_sequence_len'
,
'topk'
,
'lod_tensor_to_array'
,
'array_to_lod_tensor'
,
'increment'
,
'array_write'
,
'create_array'
,
'less_than'
,
'array_read'
,
'shrink_memory'
,
'array_length'
,
'IfElse'
,
'DynamicRNN'
,
'ConditionalBlock'
,
'StaticRNN'
,
'reorder_lod_tensor_by_rank'
,
'ParallelDo'
,
'Print'
,
]
...
...
@@ -1458,7 +1477,7 @@ class DynamicRNN(object):
method
))
@
autodoc
@
autodoc
()
def
reorder_lod_tensor_by_rank
(
x
,
rank_table
):
helper
=
LayerHelper
(
'reorder_lod_tensor_by_rank'
,
**
locals
())
helper
.
is_instance
(
'x'
,
Variable
)
...
...
python/paddle/v2/fluid/layers/device.py
浏览文件 @
61d6db56
...
...
@@ -15,14 +15,14 @@
All util layers.
"""
from
..layer_helper
import
LayerHelper
from
layer_function_generator
import
autodoc
from
..framework
import
unique_name
from
..
registry
import
autodoc
from
..
layer_helper
import
LayerHelper
__all__
=
[
'get_places'
]
@
autodoc
@
autodoc
()
def
get_places
(
device_count
=
None
,
device_type
=
None
):
helper
=
LayerHelper
(
'get_places'
,
**
locals
())
out_places
=
helper
.
create_variable
(
name
=
unique_name
(
helper
.
name
+
".out"
))
...
...
python/paddle/v2/fluid/
registry
.py
→
python/paddle/v2/fluid/
layers/layer_function_generator
.py
浏览文件 @
61d6db56
...
...
@@ -13,17 +13,19 @@
# limitations under the License.
import
re
import
cStringIO
import
warnings
import
functools
import
inspect
import
warnings
from
..
import
proto
import
proto.framework_pb2
as
framework_pb2
from
framework
import
OpProtoHolder
,
Variable
,
Program
,
Operator
from
paddle.v2.fluid.layer_helper
import
LayerHelper
,
unique_name
framework_pb2
=
proto
.
framework_pb2
from
..framework
import
OpProtoHolder
,
Variable
from
..layer_helper
import
LayerHelper
__all__
=
[
'deprecated'
,
'
register_layer
'
,
'
generate_layer_fn
'
,
'autodoc'
,
]
...
...
@@ -96,7 +98,7 @@ def _generate_doc_string_(op_proto):
return
buf
.
getvalue
()
def
register_layer
(
op_type
):
def
generate_layer_fn
(
op_type
):
"""Register the Python layer for an Operator.
Args:
...
...
@@ -207,7 +209,10 @@ def deprecated(func_or_class):
return
func_wrapper
def
autodoc
(
func
):
func
.
__doc__
=
_generate_doc_string_
(
OpProtoHolder
.
instance
().
get_op_proto
(
func
.
__name__
))
return
func
def
autodoc
(
comment
=
""
):
def
__impl__
(
func
):
func
.
__doc__
=
_generate_doc_string_
(
OpProtoHolder
.
instance
(
).
get_op_proto
(
func
.
__name__
))
+
comment
return
func
return
__impl__
python/paddle/v2/fluid/layers/ops.py
浏览文件 @
61d6db56
...
...
@@ -11,8 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
..registry
import
register_layer
from
layer_function_generator
import
generate_layer_fn
__activations__
=
[
'sigmoid'
,
...
...
@@ -46,21 +45,11 @@ __activations__ = [
]
__all__
=
[
'mean'
,
'mul'
,
'reshape'
,
'scale'
,
'transpose'
,
'sigmoid_cross_entropy_with_logits'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'clip'
,
'sequence_softmax'
,
'mean'
,
'mul'
,
'reshape'
,
'scale'
,
'transpose'
,
'sigmoid_cross_entropy_with_logits'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'clip'
,
'clip_by_norm'
,
'sequence_softmax'
]
+
__activations__
for
_OP
in
set
(
__all__
):
globals
()[
_OP
]
=
register_layer
(
_OP
)
globals
()[
_OP
]
=
generate_layer_fn
(
_OP
)
python/paddle/v2/fluid/param_attr.py
浏览文件 @
61d6db56
...
...
@@ -25,13 +25,13 @@ class ParamAttr(object):
learning_rate
=
1.0
,
regularizer
=
None
,
trainable
=
True
,
clip
=
None
):
gradient_
clip
=
None
):
self
.
name
=
name
self
.
initializer
=
initializer
self
.
learning_rate
=
learning_rate
self
.
regularizer
=
regularizer
self
.
trainable
=
trainable
self
.
clip
=
clip
self
.
gradient_clip
=
gradient_
clip
def
set_default_initializer
(
self
,
initializer
):
if
initializer
is
None
:
...
...
@@ -77,7 +77,7 @@ class ParamAttr(object):
},
'regularizer'
:
self
.
regularizer
,
'trainable'
:
self
.
trainable
,
'
clip_attr'
:
self
.
clip
'
gradient_clip_attr'
:
self
.
gradient_
clip
}
if
with_initializer
:
kwargs
[
'initializer'
]
=
self
.
initializer
...
...
python/paddle/v2/fluid/tests/CMakeLists.txt
浏览文件 @
61d6db56
...
...
@@ -6,3 +6,4 @@ endforeach()
add_subdirectory
(
book
)
add_subdirectory
(
book_distribute
)
add_subdirectory
(
book_memory_optimization
)
python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
浏览文件 @
61d6db56
...
...
@@ -27,7 +27,7 @@ hidden1 = fluid.layers.fc(input=image,
act
=
'relu'
,
param_attr
=
fluid
.
ParamAttr
(
regularizer
=
regularizer
,
clip
=
fluid
.
clip
.
ClipByValue
(
10
)))
gradient_
clip
=
fluid
.
clip
.
ClipByValue
(
10
)))
hidden2
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
...
...
python/paddle/v2/fluid/tests/book_memory_optimization/CMakeLists.txt
0 → 100644
浏览文件 @
61d6db56
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
list
(
REMOVE_ITEM TEST_OPS test_memopt_image_classification_train
)
py_test
(
test_memopt_image_classification_train_resnet SRCS test_memopt_image_classification_train.py ARGS resnet
)
py_test
(
test_memopt_image_classification_train_vgg SRCS test_memopt_image_classification_train.py ARGS vgg
)
# default test
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
0 → 100644
浏览文件 @
61d6db56
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid
as
fluid
x
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
dtype
=
'float32'
)
y_predict
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
1
,
act
=
None
)
y
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'float32'
)
cost
=
fluid
.
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.1
)
sgd_optimizer
.
minimize
(
avg_cost
)
# memopt_program = fluid.default_main_program()
memopt_program
=
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
BATCH_SIZE
=
200
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
uci_housing
.
train
(),
buf_size
=
500
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
PASS_NUM
=
100
for
pass_id
in
range
(
PASS_NUM
):
fluid
.
io
.
save_persistables
(
exe
,
"./fit_a_line.model/"
)
fluid
.
io
.
load_persistables
(
exe
,
"./fit_a_line.model/"
)
for
data
in
train_reader
():
avg_loss_value
,
=
exe
.
run
(
memopt_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
if
avg_loss_value
[
0
]
<
10.0
:
exit
(
0
)
# if avg cost less than 10.0, we think our code is good.
exit
(
1
)
python/paddle/v2/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py
0 → 100644
浏览文件 @
61d6db56
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
sys
import
paddle.v2
as
paddle
import
paddle.v2.fluid
as
fluid
def
resnet_cifar10
(
input
,
depth
=
32
):
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
tmp
=
fluid
.
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
bias_attr
=
False
)
return
fluid
.
layers
.
batch_norm
(
input
=
tmp
,
act
=
act
)
def
shortcut
(
input
,
ch_in
,
ch_out
,
stride
):
if
ch_in
!=
ch_out
:
return
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
0
,
None
)
else
:
return
input
def
basicblock
(
input
,
ch_in
,
ch_out
,
stride
):
tmp
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
short
=
shortcut
(
input
,
ch_in
,
ch_out
,
stride
)
return
fluid
.
layers
.
elementwise_add
(
x
=
tmp
,
y
=
short
,
act
=
'relu'
)
def
layer_warp
(
block_func
,
input
,
ch_in
,
ch_out
,
count
,
stride
):
tmp
=
block_func
(
input
,
ch_in
,
ch_out
,
stride
)
for
i
in
range
(
1
,
count
):
tmp
=
block_func
(
tmp
,
ch_out
,
ch_out
,
1
)
return
tmp
assert
(
depth
-
2
)
%
6
==
0
n
=
(
depth
-
2
)
/
6
conv1
=
conv_bn_layer
(
input
=
input
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
pool
=
fluid
.
layers
.
pool2d
(
input
=
res3
,
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
return
pool
def
vgg16_bn_drop
(
input
):
def
conv_block
(
input
,
num_filter
,
groups
,
dropouts
):
return
fluid
.
nets
.
img_conv_group
(
input
=
input
,
pool_size
=
2
,
pool_stride
=
2
,
conv_num_filter
=
[
num_filter
]
*
groups
,
conv_filter_size
=
3
,
conv_act
=
'relu'
,
conv_with_batchnorm
=
True
,
conv_batchnorm_drop_rate
=
dropouts
,
pool_type
=
'max'
)
conv1
=
conv_block
(
input
,
64
,
2
,
[
0.3
,
0
])
conv2
=
conv_block
(
conv1
,
128
,
2
,
[
0.4
,
0
])
conv3
=
conv_block
(
conv2
,
256
,
3
,
[
0.4
,
0.4
,
0
])
conv4
=
conv_block
(
conv3
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv5
=
conv_block
(
conv4
,
512
,
3
,
[
0.4
,
0.4
,
0
])
drop
=
fluid
.
layers
.
dropout
(
x
=
conv5
,
dropout_prob
=
0.5
)
fc1
=
fluid
.
layers
.
fc
(
input
=
drop
,
size
=
512
,
act
=
None
)
bn
=
fluid
.
layers
.
batch_norm
(
input
=
fc1
,
act
=
'relu'
)
drop2
=
fluid
.
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
fc2
=
fluid
.
layers
.
fc
(
input
=
drop2
,
size
=
512
,
act
=
None
)
return
fc2
classdim
=
10
data_shape
=
[
3
,
32
,
32
]
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
net_type
=
"vgg"
if
len
(
sys
.
argv
)
>=
2
:
net_type
=
sys
.
argv
[
1
]
if
net_type
==
"vgg"
:
print
(
"train vgg net"
)
net
=
vgg16_bn_drop
(
images
)
elif
net_type
==
"resnet"
:
print
(
"train resnet"
)
net
=
resnet_cifar10
(
images
,
32
)
else
:
raise
ValueError
(
"%s network is not supported"
%
net_type
)
predict
=
fluid
.
layers
.
fc
(
input
=
net
,
size
=
classdim
,
act
=
'softmax'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
0.001
)
opts
=
optimizer
.
minimize
(
avg_cost
)
accuracy
=
fluid
.
evaluator
.
Accuracy
(
input
=
predict
,
label
=
label
)
# memopt_program = fluid.default_main_program()
memopt_program
=
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
BATCH_SIZE
=
128
PASS_NUM
=
1
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
cifar
.
train10
(),
buf_size
=
128
*
10
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
images
,
label
])
exe
.
run
(
fluid
.
default_startup_program
())
for
pass_id
in
range
(
PASS_NUM
):
accuracy
.
reset
(
exe
)
for
data
in
train_reader
():
loss
,
acc
=
exe
.
run
(
memopt_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
]
+
accuracy
.
metrics
)
pass_acc
=
accuracy
.
eval
(
exe
)
print
(
"loss:"
+
str
(
loss
)
+
" acc:"
+
str
(
acc
)
+
" pass_acc:"
+
str
(
pass_acc
))
# this model is slow, so if we can train two mini batch, we think it works properly.
exit
(
0
)
exit
(
1
)
python/paddle/v2/fluid/tests/test_compare_op.py
浏览文件 @
61d6db56
...
...
@@ -38,8 +38,6 @@ def create_test_class(op_type, typename, callback):
for
_type_name
in
{
'float32'
,
'float64'
,
'int32'
,
'int64'
}:
create_test_class
(
'less_than'
,
_type_name
,
lambda
_a
,
_b
:
_a
<
_b
)
create_test_class
(
'less_equal'
,
_type_name
,
lambda
_a
,
_b
:
_a
<=
_b
)
create_test_class
(
'greater_than'
,
_type_name
,
lambda
_a
,
_b
:
_a
>
_b
)
create_test_class
(
'greater_equal'
,
_type_name
,
lambda
_a
,
_b
:
_a
>=
_b
)
create_test_class
(
'equal'
,
_type_name
,
lambda
_a
,
_b
:
_a
==
_b
)
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/fluid/tests/test_clip.py
→
python/paddle/v2/fluid/tests/test_
error_
clip.py
浏览文件 @
61d6db56
文件已移动
python/paddle/v2/fluid/tests/test_gradient_clip.py
0 → 100644
浏览文件 @
61d6db56
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid
as
fluid
BATCH_SIZE
=
128
CLIP
=
1
prog
=
fluid
.
framework
.
Program
()
with
fluid
.
program_guard
(
main_program
=
prog
):
image
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
dtype
=
'float32'
)
hidden1
=
fluid
.
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
)
hidden2
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
)
predict
=
fluid
.
layers
.
fc
(
input
=
hidden2
,
size
=
10
,
act
=
'softmax'
)
label
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'int64'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
prog_clip
=
prog
.
clone
()
avg_cost_clip
=
prog_clip
.
block
(
0
).
var
(
avg_cost
.
name
)
p_g
=
fluid
.
backward
.
append_backward
(
loss
=
avg_cost
)
p_g_clip
=
fluid
.
backward
.
append_backward
(
loss
=
avg_cost_clip
)
with
fluid
.
program_guard
(
main_program
=
prog_clip
):
fluid
.
clip
.
gradient_clip_by_global_norm
(
clip_norm
=
CLIP
)
p_g_clip
=
fluid
.
clip
.
append_gradient_clip_ops
(
p_g_clip
)
grad_list
=
[
elem
[
1
]
for
elem
in
p_g
]
grad_clip_list
=
[
elem
[
1
]
for
elem
in
p_g_clip
]
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
8192
),
batch_size
=
BATCH_SIZE
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
image
,
label
],
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
count
=
0
for
data
in
train_reader
():
count
+=
1
if
count
>
5
:
break
out
=
exe
.
run
(
prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
grad_list
)
out_clip
=
exe
.
run
(
prog_clip
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
grad_clip_list
)
global_norm
=
0
for
v
in
out
[
1
:]:
global_norm
+=
np
.
sum
(
np
.
power
(
v
,
2
))
global_norm
=
np
.
sqrt
(
global_norm
)
global_norm_clip
=
0
for
v
in
out_clip
[
1
:]:
global_norm_clip
+=
np
.
sum
(
np
.
power
(
v
,
2
))
global_norm_clip
=
np
.
sqrt
(
global_norm_clip
)
if
not
np
.
isclose
(
a
=
global_norm_clip
,
b
=
np
.
minimum
(
global_norm
,
CLIP
),
rtol
=
5e-3
):
exit
(
1
)
exit
(
0
)
python/paddle/v2/fluid/tests/test_registry.py
浏览文件 @
61d6db56
...
...
@@ -11,26 +11,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
warnings
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.registry
as
registry
import
numpy
as
np
import
decorators
class
TestRegistry
(
unittest
.
TestCase
):
@
decorators
.
prog_scope
()
def
test_registry_layer
(
self
):
self
.
layer_type
=
"mean"
program
=
framework
.
Program
()
x
=
fluid
.
layers
.
data
(
name
=
'X'
,
shape
=
[
10
,
10
],
dtype
=
'float32'
)
output
=
layers
.
mean
(
x
)
output
=
fluid
.
layers
.
mean
(
x
=
x
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
X
=
np
.
random
.
random
((
10
,
10
)).
astype
(
"float32"
)
mean_out
=
exe
.
run
(
program
,
feed
=
{
"X"
:
X
},
fetch_list
=
[
output
])
self
.
assertAlmostEqual
(
np
.
mean
(
X
),
mean_out
)
mean_out
=
exe
.
run
(
feed
=
{
"X"
:
X
},
fetch_list
=
[
output
])
self
.
assertAlmostEqual
(
np
.
mean
(
X
),
mean_out
[
0
]
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录