Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
29b68392
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
29b68392
编写于
1月 29, 2018
作者:
C
Cao Ying
提交者:
GitHub
1月 29, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7928 from guoshengCS/add-weight-normalization
Add weight normalization wrapper.
上级
9bf1a8da
52e17bf5
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
330 addition
and
8 deletion
+330
-8
paddle/operators/reduce_op.cc
paddle/operators/reduce_op.cc
+6
-3
python/paddle/v2/fluid/layer_helper.py
python/paddle/v2/fluid/layer_helper.py
+182
-4
python/paddle/v2/fluid/param_attr.py
python/paddle/v2/fluid/param_attr.py
+21
-1
python/paddle/v2/fluid/tests/test_weight_normalization.py
python/paddle/v2/fluid/tests/test_weight_normalization.py
+121
-0
未找到文件。
paddle/operators/reduce_op.cc
浏览文件 @
29b68392
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/operators/reduce_op.h"
#include "paddle/operators/reduce_op.h"
#include "paddle/operators/net_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -38,10 +37,14 @@ class ReduceOp : public framework::OperatorWithKernel {
...
@@ -38,10 +37,14 @@ class ReduceOp : public framework::OperatorWithKernel {
dim
,
x_rank
,
dim
,
x_rank
,
"The dim should be in the range [-rank(input), rank(input))."
);
"The dim should be in the range [-rank(input), rank(input))."
);
bool
reduce_all
=
ctx
->
Attrs
().
Get
<
bool
>
(
"reduce_all"
);
bool
reduce_all
=
ctx
->
Attrs
().
Get
<
bool
>
(
"reduce_all"
);
bool
keep_dim
=
ctx
->
Attrs
().
Get
<
bool
>
(
"keep_dim"
);
if
(
reduce_all
)
{
if
(
reduce_all
)
{
ctx
->
SetOutputDim
(
"Out"
,
{
1
});
if
(
keep_dim
)
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
std
::
vector
<
int64_t
>
(
x_rank
,
1
)));
else
ctx
->
SetOutputDim
(
"Out"
,
{
1
});
}
else
{
}
else
{
bool
keep_dim
=
ctx
->
Attrs
().
Get
<
bool
>
(
"keep_dim"
);
auto
dims_vector
=
vectorize
(
x_dims
);
auto
dims_vector
=
vectorize
(
x_dims
);
if
(
keep_dim
||
x_rank
==
1
)
{
if
(
keep_dim
||
x_rank
==
1
)
{
dims_vector
[
dim
]
=
1
;
dims_vector
[
dim
]
=
1
;
...
...
python/paddle/v2/fluid/layer_helper.py
浏览文件 @
29b68392
...
@@ -18,7 +18,7 @@ import itertools
...
@@ -18,7 +18,7 @@ import itertools
from
framework
import
Variable
,
Parameter
,
default_main_program
,
default_startup_program
,
\
from
framework
import
Variable
,
Parameter
,
default_main_program
,
default_startup_program
,
\
unique_name
,
dtype_is_floating
unique_name
,
dtype_is_floating
from
paddle.v2.fluid.initializer
import
Constant
,
Xavier
from
paddle.v2.fluid.initializer
import
Constant
,
Xavier
from
param_attr
import
ParamAttr
from
param_attr
import
ParamAttr
,
WeightNormParamAttr
class
LayerHelper
(
object
):
class
LayerHelper
(
object
):
...
@@ -104,6 +104,177 @@ class LayerHelper(object):
...
@@ -104,6 +104,177 @@ class LayerHelper(object):
(
dtype
,
each
.
dtype
))
(
dtype
,
each
.
dtype
))
return
dtype
return
dtype
def
_create_weight_normalize
(
self
,
attr
,
shape
,
dtype
):
from
.layers
import
elementwise_mul
,
elementwise_div
,
reshape
# Remove these ops when LayerHelper and layers support indicating
# program and block.
def
__norm_op
(
x
,
out
=
None
,
p
=
2
,
dim
=
None
,
keep_dim
=
False
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
(
"."
.
join
([
self
.
name
,
'weight_norm_norm'
])),
dtype
=
dtype
,
persistable
=
False
)
abs_out
=
block
.
create_var
(
name
=
unique_name
(
"."
.
join
([
self
.
name
,
'weight_norm_abs'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'abs'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
abs_out
})
pow_out
=
block
.
create_var
(
name
=
unique_name
(
"."
.
join
([
self
.
name
,
'weight_norm_pow'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'pow'
,
inputs
=
{
'X'
:
abs_out
},
outputs
=
{
'Out'
:
pow_out
},
attrs
=
{
'factor'
:
float
(
p
)})
sum_out
=
block
.
create_var
(
name
=
unique_name
(
"."
.
join
([
self
.
name
,
'weight_norm_sum'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'reduce_sum'
,
inputs
=
{
'X'
:
pow_out
},
outputs
=
{
'Out'
:
sum_out
},
attrs
=
{
'dim'
:
dim
,
'keep_dim'
:
keep_dim
,
'reduce_all'
:
True
if
dim
is
None
else
False
})
block
.
append_op
(
type
=
'pow'
,
inputs
=
{
'X'
:
sum_out
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'factor'
:
1.
/
p
})
return
out
def
__reshape_op
(
x
,
shape
,
out
=
None
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
(
"."
.
join
(
[
self
.
name
,
'weight_norm_reshape'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'reshape'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
})
return
out
def
__transpose_op
(
x
,
axis
,
out
=
None
,
block
=
self
.
startup_program
.
global_block
()):
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
(
"."
.
join
(
[
self
.
name
,
'weight_norm_transpose'
])),
dtype
=
dtype
,
persistable
=
False
)
block
.
append_op
(
type
=
'transpose'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'axis'
:
axis
})
return
out
def
__norm_except_dim
(
x
,
out
=
None
,
dim
=
None
,
block
=
self
.
startup_program
.
global_block
()):
"""Computes the norm over all dimensions except dim"""
if
out
is
None
:
out
=
block
.
create_var
(
name
=
unique_name
(
"."
.
join
([
self
.
name
,
'weight_norm_norm'
])),
dtype
=
dtype
,
persistable
=
False
)
if
dim
is
None
:
__norm_op
(
x
,
out
,
dim
=
dim
,
block
=
block
)
elif
dim
==
0
:
out_shape
=
[
x
.
shape
[
0
]]
+
[
1
]
*
(
len
(
x
.
shape
)
-
1
)
reshape
=
__reshape_op
(
x
,
shape
=
[
x
.
shape
[
0
],
-
1
],
block
=
block
)
norm
=
__norm_op
(
reshape
,
dim
=
1
,
block
=
block
)
__reshape_op
(
norm
,
out
=
out
,
shape
=
out_shape
,
block
=
block
)
elif
dim
==
len
(
x
.
shape
)
-
1
:
out_shape
=
[
1
]
*
(
len
(
x
.
shape
)
-
1
)
+
[
x
.
shape
[
-
1
]]
reshape
=
__reshape_op
(
x
,
shape
=
[
-
1
,
x
.
shape
[
-
1
]],
block
=
block
)
norm
=
__norm_op
(
reshape
,
dim
=
0
,
block
=
block
)
__reshape_op
(
norm
,
out
=
out
,
shape
=
out_shape
,
block
=
block
)
else
:
perm
=
range
(
len
(
x
.
shape
))
perm
[
0
],
perm
[
dim
]
=
dim
,
0
transpose
=
__transpose_op
(
x
,
perm
,
block
=
block
)
norm
=
__norm_op
(
transpose
,
dim
=
0
,
block
=
block
)
__transpose_op
(
norm
,
perm
,
out
=
out
,
block
=
block
)
return
out
def
__weight_normalize
(
g
,
v
,
dim
):
"""Calculations for weight normalization"""
norm
=
__norm_except_dim
(
v
,
dim
=
dim
,
block
=
self
.
main_program
.
current_block
())
scale
=
elementwise_div
(
x
=
g
,
y
=
norm
)
# The shapes of g and norm are the same.
# Currently, elementwise_mul only support broadcast when the shape
# of y is a subset of the shape of x. Thus, we reshape y to squeeze
# to achive the subset.
w
=
elementwise_mul
(
x
=
v
,
y
=
scale
if
dim
is
None
else
reshape
(
x
=
scale
,
shape
=
[
v
.
shape
[
dim
]]),
axis
=-
1
if
dim
is
None
else
dim
)
# To serialize the original parameter for inference, maybe a
# parameter rather than a variable should be returned.
return
w
g_param_attr
=
copy
.
deepcopy
(
attr
)
g_param_attr
.
name
=
attr
.
name
+
'_g'
g_param_shape
=
[
1
]
*
len
(
shape
)
if
attr
.
dim
is
not
None
:
g_param_shape
[
attr
.
dim
]
=
shape
[
attr
.
dim
]
v_param_attr
=
copy
.
deepcopy
(
attr
)
v_param_attr
.
name
=
attr
.
name
+
'_v'
v_param_shape
=
shape
# Add to startup_program to initialize g and v.
# Try to reconstruct the initializer of w by initializing g and v.
# Set the initializers of g and v as below, then the distribution
# of w is the same as initializing w with the given initializer.
# For Data-Dependent Initialization, please compute the init-values
# of g and v in external and then feed the values to g and v by
# executing an extra program.
g_param
=
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
g_param_shape
,
**
g_param_attr
.
to_kwargs
(
with_initializer
=
False
))
v_param
=
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
v_param_shape
,
**
v_param_attr
.
to_kwargs
(
with_initializer
=
True
))
__norm_except_dim
(
x
=
v_param
,
out
=
g_param
,
dim
=
attr
.
dim
,
block
=
self
.
startup_program
.
global_block
())
# Add weight normalization to main_program
g_param
=
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
g_param_shape
,
**
g_param_attr
.
to_kwargs
())
v_param
=
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
v_param_shape
,
**
v_param_attr
.
to_kwargs
())
w_param
=
__weight_normalize
(
g_param
,
v_param
,
dim
=
attr
.
dim
)
return
w_param
def
create_parameter
(
self
,
def
create_parameter
(
self
,
attr
,
attr
,
shape
,
shape
,
...
@@ -114,16 +285,23 @@ class LayerHelper(object):
...
@@ -114,16 +285,23 @@ class LayerHelper(object):
attr
=
copy
.
deepcopy
(
attr
)
attr
=
copy
.
deepcopy
(
attr
)
assert
isinstance
(
attr
,
ParamAttr
)
assert
isinstance
(
attr
,
ParamAttr
)
suffix
=
'b'
if
is_bias
else
'w'
suffix
=
'b'
if
is_bias
else
'w'
if
attr
.
name
is
None
:
attr
.
name
=
unique_name
(
"."
.
join
([
self
.
name
,
suffix
]))
if
default_initializer
is
None
:
if
default_initializer
is
None
and
attr
.
initializer
is
None
:
if
is_bias
:
if
is_bias
:
attr
.
set_default_bias_initializer
()
attr
.
set_default_bias_initializer
()
else
:
else
:
attr
.
set_default_param_initializer
()
attr
.
set_default_param_initializer
()
else
:
else
:
attr
.
set_default_initializer
(
default_initializer
)
attr
.
set_default_initializer
(
default_initializer
)
if
attr
.
name
is
None
:
attr
.
name
=
unique_name
(
"."
.
join
([
self
.
name
,
suffix
]))
# If weight normalization is set, insert extra parameters and ops.
# Refer to https://arxiv.org/pdf/1602.07868.pdf
if
isinstance
(
attr
,
WeightNormParamAttr
):
param
=
self
.
_create_weight_normalize
(
attr
,
shape
,
dtype
)
WeightNormParamAttr
.
params_with_weight_norm
.
append
(
param
)
return
param
self
.
startup_program
.
global_block
().
create_parameter
(
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
to_kwargs
(
with_initializer
=
True
))
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
to_kwargs
(
with_initializer
=
True
))
...
...
python/paddle/v2/fluid/param_attr.py
浏览文件 @
29b68392
...
@@ -15,7 +15,10 @@
...
@@ -15,7 +15,10 @@
from
initializer
import
Initializer
,
Xavier
,
Constant
from
initializer
import
Initializer
,
Xavier
,
Constant
from
regularizer
import
WeightDecayRegularizer
from
regularizer
import
WeightDecayRegularizer
__all__
=
[
'ParamAttr'
]
__all__
=
[
'ParamAttr'
,
'WeightNormParamAttr'
,
]
class
ParamAttr
(
object
):
class
ParamAttr
(
object
):
...
@@ -82,3 +85,20 @@ class ParamAttr(object):
...
@@ -82,3 +85,20 @@ class ParamAttr(object):
if
with_initializer
:
if
with_initializer
:
kwargs
[
'initializer'
]
=
self
.
initializer
kwargs
[
'initializer'
]
=
self
.
initializer
return
kwargs
return
kwargs
class
WeightNormParamAttr
(
ParamAttr
):
"""
Used for weight normalization. Any field in ParamAttr can also be set here.
Besides, an extra field dim can be set to indicate the dimension except
which to normalize.
"""
# List to record the parameters reparameterized by weight normalization.
# If these parameters are treated as Variable rather than Parameter,
# it can be used to discriminate these parameters and help to serialize
# these paramters for inference.
params_with_weight_norm
=
[]
def
__init__
(
self
,
dim
=
None
,
**
kwargs
):
super
(
WeightNormParamAttr
,
self
).
__init__
(
**
kwargs
)
self
.
dim
=
dim
python/paddle/v2/fluid/tests/test_weight_normalization.py
0 → 100644
浏览文件 @
29b68392
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
import
collections
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid.core
as
core
from
paddle.v2.fluid.initializer
import
ConstantInitializer
from
paddle.v2.fluid.param_attr
import
WeightNormParamAttr
class
TestWeightNormalization
(
unittest
.
TestCase
):
batch_size
=
3
hidden_size
=
5
data_desc
=
([
'x'
,
[
10
],
0
],
)
@
classmethod
def
setUpClass
(
cls
):
cls
.
set_program
()
@
classmethod
def
set_program
(
cls
):
data
=
fluid
.
layers
.
data
(
name
=
cls
.
data_desc
[
0
][
0
],
shape
=
cls
.
data_desc
[
0
][
1
])
out
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
cls
.
hidden_size
,
param_attr
=
WeightNormParamAttr
(
dim
=
None
,
name
=
'weight_norm_param'
,
initializer
=
ConstantInitializer
(
1.0
)),
bias_attr
=
False
,
act
=
None
)
loss
=
fluid
.
layers
.
reduce_sum
(
out
)
fluid
.
backward
.
append_backward
(
loss
=
loss
)
cls
.
fetch_list
=
[
'weight_norm_param_g'
,
'weight_norm_param_v'
,
'weight_norm_param_g@GRAD'
]
def
run_program
(
self
):
outputs
=
[]
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
set_inputs
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
output
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
self
.
inputs
,
fetch_list
=
self
.
fetch_list
,
return_numpy
=
False
)
outputs
.
append
(
output
)
self
.
actual_outputs
=
outputs
def
set_data
(
self
):
self
.
data
=
collections
.
OrderedDict
()
for
desc
in
self
.
data_desc
:
data_name
=
desc
[
0
]
data_shape
=
desc
[
1
]
data_lod_level
=
desc
[
2
]
data_lod
=
[]
for
i
in
range
(
data_lod_level
):
lod_level_i
=
numpy
.
random
.
randint
(
low
=
1
,
high
=
5
,
size
=
self
.
batch_size
if
i
==
0
else
lod_level_i
[
-
1
])
lod_level_i
=
[
0
]
+
numpy
.
cumsum
(
lod_level_i
).
tolist
()
data_lod
.
append
(
lod_level_i
)
data_value
=
numpy
.
random
.
random
(
size
=
[
data_lod
[
-
1
][
-
1
]
if
data_lod
else
self
.
batch_size
]
+
data_shape
).
astype
(
'float32'
)
self
.
data
[
data_name
]
=
(
data_value
,
data_lod
)
def
set_inputs
(
self
,
place
):
self
.
inputs
=
{}
for
desc
in
self
.
data_desc
:
tensor
=
fluid
.
Tensor
()
tensor
.
set
(
self
.
data
[
desc
[
0
]][
0
],
place
)
if
self
.
data
[
desc
[
0
]][
1
]:
tensor
.
set_lod
(
self
.
data
[
desc
[
0
]][
1
])
self
.
inputs
[
desc
[
0
]]
=
tensor
def
weight_normalize
(
self
):
v
=
numpy
.
ones
((
self
.
data
[
self
.
data_desc
[
0
][
0
]][
0
].
shape
[
-
1
],
self
.
hidden_size
))
g
=
numpy
.
linalg
.
norm
(
v
,
axis
=
None
,
keepdims
=
True
)
w
=
g
*
v
/
numpy
.
linalg
.
norm
(
v
,
axis
=
None
,
keepdims
=
True
)
x
=
self
.
data
[
self
.
data_desc
[
0
][
0
]][
0
]
out
=
numpy
.
dot
(
x
,
w
)
g_grad
=
(
numpy
.
dot
(
x
.
T
,
numpy
.
ones_like
(
out
))
*
(
v
/
numpy
.
linalg
.
norm
(
v
,
axis
=
None
,
keepdims
=
True
))).
sum
(
axis
=
None
,
keepdims
=
True
)
return
g
,
v
,
g_grad
def
test_weight_normalization
(
self
):
self
.
set_data
()
self
.
run_program
()
expect_output
=
self
.
weight_normalize
()
for
actual_output
in
self
.
actual_outputs
:
[
self
.
assertTrue
(
numpy
.
allclose
(
numpy
.
array
(
actual
),
expect
,
atol
=
0.001
))
for
expect
,
actual
in
zip
(
expect_output
,
actual_output
)
]
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录