Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2b841ec8
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2b841ec8
编写于
11月 15, 2016
作者:
T
Tao Luo
提交者:
GitHub
11月 15, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #421 from emailweixu/scaling_projection
Add ScalingProjection
上级
0ba0f02c
a6ad9a16
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
290 addition
and
41 deletion
+290
-41
doc/ui/api/trainer_config_helpers/layers.rst
doc/ui/api/trainer_config_helpers/layers.rst
+6
-0
paddle/gserver/layers/CostLayer.cpp
paddle/gserver/layers/CostLayer.cpp
+1
-1
paddle/gserver/layers/FullMatrixProjection.cpp
paddle/gserver/layers/FullMatrixProjection.cpp
+3
-1
paddle/gserver/layers/ScalingProjection.cpp
paddle/gserver/layers/ScalingProjection.cpp
+53
-0
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+11
-0
paddle/math/BaseMatrix.cu
paddle/math/BaseMatrix.cu
+85
-11
paddle/math/BaseMatrix.h
paddle/math/BaseMatrix.h
+39
-4
paddle/math/Matrix.cpp
paddle/math/Matrix.cpp
+17
-13
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+14
-0
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+39
-10
python/paddle/trainer_config_helpers/tests/configs/projections.py
...addle/trainer_config_helpers/tests/configs/projections.py
+1
-0
python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
...onfig_helpers/tests/configs/protostr/projections.protostr
+21
-1
未找到文件。
doc/ui/api/trainer_config_helpers/layers.rst
浏览文件 @
2b841ec8
...
@@ -191,6 +191,12 @@ embedding_layer
...
@@ -191,6 +191,12 @@ embedding_layer
:members: embedding_layer
:members: embedding_layer
:noindex:
:noindex:
scaling_projection
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: scaling_projection
:noindex:
dotmul_projection
dotmul_projection
-----------------
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
.. automodule:: paddle.trainer_config_helpers.layers
...
...
paddle/gserver/layers/CostLayer.cpp
浏览文件 @
2b841ec8
...
@@ -605,7 +605,7 @@ public:
...
@@ -605,7 +605,7 @@ public:
int
batchSize
=
input
->
getHeight
();
int
batchSize
=
input
->
getHeight
();
int
size
=
1
;
int
size
=
1
;
resizeOutput
(
batchSize
,
size
);
resizeOutput
(
batchSize
,
size
);
output_
.
value
->
sumRows
(
*
input
);
output_
.
value
->
sumRows
(
*
input
,
/* scaleSum= */
1
,
/* scaleDest= */
0
);
}
}
virtual
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
{
virtual
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
{
...
...
paddle/gserver/layers/FullMatrixProjection.cpp
浏览文件 @
2b841ec8
...
@@ -52,7 +52,9 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) {
...
@@ -52,7 +52,9 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) {
}
}
hl_set_sync_flag
(
syncFlag
);
hl_set_sync_flag
(
syncFlag
);
parameter_
->
incUpdate
(
callback
);
if
(
weight_
->
getWGrad
())
{
parameter_
->
incUpdate
(
callback
);
}
}
}
}
// namespace paddle
}
// namespace paddle
paddle/gserver/layers/ScalingProjection.cpp
0 → 100644
浏览文件 @
2b841ec8
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Projection.h"
namespace
paddle
{
class
ScalingProjection
:
public
Projection
{
public:
ScalingProjection
(
const
ProjectionConfig
&
config
,
const
ParameterPtr
&
parameter
,
bool
useGpu
)
:
Projection
(
config
,
parameter
,
useGpu
)
{
CHECK_EQ
(
parameter
->
getSize
(),
1UL
);
weight_
.
reset
(
new
Weight
(
1
,
1
,
parameter
));
}
void
forward
()
{
CHECK
(
in_
->
value
);
out_
->
value
->
add
(
*
in_
->
value
,
weight_
->
getW
()
->
getElement
(
0
,
0
));
}
void
backward
(
const
UpdateCallback
&
callback
)
{
if
(
weight_
->
getWGrad
())
{
auto
sum
=
Matrix
::
create
(
in_
->
value
->
getHeight
(),
1
,
false
,
useGpu_
);
sum
->
sumOfProducts
(
*
in_
->
value
,
*
out_
->
grad
,
/* scaleSum= */
1
,
/* scaleDest= */
0
);
weight_
->
getWGrad
()
->
sumCols
(
*
sum
,
/* scaleSum= */
1
,
/* scaleDest= */
1
);
parameter_
->
incUpdate
(
callback
);
}
if
(
in_
->
grad
)
{
in_
->
grad
->
add
(
*
out_
->
grad
,
weight_
->
getW
()
->
getElement
(
0
,
0
));
}
}
protected:
std
::
unique_ptr
<
Weight
>
weight_
;
};
REGISTER_PROJECTION
(
scaling
,
ScalingProjection
);
}
// namespace paddle
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
2b841ec8
...
@@ -135,6 +135,17 @@ TEST(Projection, identity) {
...
@@ -135,6 +135,17 @@ TEST(Projection, identity) {
}
}
}
}
TEST
(
Projection
,
scaling
)
{
ProjectionConfig
conf
;
conf
.
set_type
(
"scaling"
);
conf
.
set_input_size
(
10
);
conf
.
set_output_size
(
10
);
for
(
auto
useGpu
:
{
false
})
{
testProjectionGrad
(
conf
,
INPUT_DATA
,
/* parameterSize */
1
,
/* batchSize */
100
,
useGpu
);
}
}
#ifndef PADDLE_ONLY_CPU
#ifndef PADDLE_ONLY_CPU
TEST
(
Projection
,
conv
)
{
TEST
(
Projection
,
conv
)
{
const
int
NUM_FILTERS
=
16
;
const
int
NUM_FILTERS
=
16
;
...
...
paddle/math/BaseMatrix.cu
浏览文件 @
2b841ec8
...
@@ -1451,6 +1451,8 @@ int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
...
@@ -1451,6 +1451,8 @@ int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
int
numRows
=
b
.
height_
;
int
numRows
=
b
.
height_
;
int
numCols
=
b
.
width_
;
int
numCols
=
b
.
width_
;
CHECK_EQ
(
height_
,
numRows
);
CHECK_EQ
(
width_
,
1UL
);
aggregate
(
agg
,
base
::
unary
::
identity
(),
base
::
binary
::
second
(),
b
,
numRows
,
aggregate
(
agg
,
base
::
unary
::
identity
(),
base
::
binary
::
second
(),
b
,
numRows
,
numCols
,
offset
,
false_type
(),
true_type
()
/*aAsColVector*/
);
numCols
,
offset
,
false_type
(),
true_type
()
/*aAsColVector*/
);
...
@@ -1463,18 +1465,69 @@ int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) {
...
@@ -1463,18 +1465,69 @@ int BaseMatrixT<real>::applyRow(Agg agg, Saver sv, BaseMatrixT& b) {
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
int
numRows
=
b
.
height_
;
int
numRows
=
b
.
height_
;
int
numCols
=
b
.
width_
;
int
numCols
=
b
.
width_
;
CHECK_EQ
(
height_
,
numRows
);
CHECK_EQ
(
width_
,
1UL
);
aggregate
(
agg
,
base
::
unary
::
identity
(),
sv
,
b
,
numRows
,
numCols
,
offset
,
aggregate
(
agg
,
base
::
unary
::
identity
(),
sv
,
b
,
numRows
,
numCols
,
offset
,
false_type
(),
true_type
()
/*aAsColVector*/
);
false_type
(),
true_type
()
/*aAsColVector*/
);
return
0
;
return
0
;
}
}
template
<
>
template
<
class
Agg
>
int
BaseMatrixT
<
real
>::
applyRow
(
Agg
agg
,
real
scaleDest
,
real
scaleAgg
,
BaseMatrixT
&
b
)
{
if
(
scaleDest
!=
0
)
{
applyRow
(
agg
,
base
::
binary
::
add2
(
scaleDest
,
scaleAgg
),
b
);
}
else
{
applyRow
(
agg
,
base
::
binary
::
second
(),
b
);
if
(
scaleAgg
!=
1
)
{
mulScalar
(
scaleAgg
);
}
}
return
0
;
}
template
<
>
template
<
class
Agg
,
class
Op
,
class
Saver
>
int
BaseMatrixT
<
real
>::
applyRow
(
Agg
agg
,
Op
op
,
Saver
sv
,
BaseMatrixT
&
b
,
BaseMatrixT
&
c
)
{
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
int
numRows
=
b
.
height_
;
int
numCols
=
b
.
width_
;
CHECK_EQ
(
height_
,
numRows
);
CHECK_EQ
(
width_
,
1UL
);
CHECK_EQ
(
c
.
height_
,
numRows
);
CHECK_EQ
(
c
.
width_
,
numCols
);
aggregate
(
agg
,
op
,
sv
,
b
,
c
,
numRows
,
numCols
,
offset
,
false_type
(),
true_type
()
/*aAsColVector*/
);
return
0
;
}
template
<
>
template
<
class
Agg
,
class
Op
>
int
BaseMatrixT
<
real
>::
applyRow
(
Agg
agg
,
Op
op
,
real
scaleDest
,
real
scaleAgg
,
BaseMatrixT
&
b
,
BaseMatrixT
&
c
)
{
if
(
scaleDest
!=
0
)
{
applyRow
(
agg
,
op
,
base
::
binary
::
add2
(
scaleDest
,
scaleAgg
),
b
,
c
);
}
else
{
applyRow
(
agg
,
op
,
base
::
binary
::
second
(),
b
,
c
);
if
(
scaleAgg
!=
1
)
{
mulScalar
(
scaleAgg
);
}
}
return
0
;
}
template
<
>
template
<
>
template
<
class
Agg
>
template
<
class
Agg
>
int
BaseMatrixT
<
real
>::
applyCol
(
Agg
agg
,
BaseMatrixT
&
b
)
{
int
BaseMatrixT
<
real
>::
applyCol
(
Agg
agg
,
BaseMatrixT
&
b
)
{
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
int
numRows
=
b
.
height_
;
int
numRows
=
b
.
height_
;
int
numCols
=
b
.
width_
;
int
numCols
=
b
.
width_
;
CHECK_EQ
(
width_
,
numCols
);
CHECK_EQ
(
height_
,
1UL
);
aggregate
(
agg
,
base
::
unary
::
identity
(),
base
::
binary
::
second
(),
b
,
numRows
,
aggregate
(
agg
,
base
::
unary
::
identity
(),
base
::
binary
::
second
(),
b
,
numRows
,
numCols
,
offset
,
true_type
()
/*aAsRowVector*/
,
false_type
());
numCols
,
offset
,
true_type
()
/*aAsRowVector*/
,
false_type
());
...
@@ -1487,6 +1540,8 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
...
@@ -1487,6 +1540,8 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
int
numRows
=
b
.
height_
;
int
numRows
=
b
.
height_
;
int
numCols
=
b
.
width_
;
int
numCols
=
b
.
width_
;
CHECK_EQ
(
width_
,
numCols
);
CHECK_EQ
(
height_
,
1UL
);
aggregate
(
agg
,
base
::
unary
::
identity
(),
sv
,
b
,
numRows
,
numCols
,
offset
,
aggregate
(
agg
,
base
::
unary
::
identity
(),
sv
,
b
,
numRows
,
numCols
,
offset
,
true_type
()
/*aAsRowVector*/
,
false_type
());
true_type
()
/*aAsRowVector*/
,
false_type
());
...
@@ -1494,8 +1549,23 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
...
@@ -1494,8 +1549,23 @@ int BaseMatrixT<real>::applyCol(Agg agg, Saver sv, BaseMatrixT& b) {
}
}
template
<
>
template
<
>
void
BaseMatrixT
<
real
>::
sumRows
(
BaseMatrixT
&
b
)
{
template
<
class
Agg
>
applyRow
(
aggregate
::
sum
(),
b
);
int
BaseMatrixT
<
real
>::
applyCol
(
Agg
agg
,
real
scaleDest
,
real
scaleAgg
,
BaseMatrixT
&
b
)
{
if
(
scaleDest
!=
0
)
{
applyCol
(
agg
,
base
::
binary
::
add2
(
scaleDest
,
scaleAgg
),
b
);
}
else
{
applyCol
(
agg
,
base
::
binary
::
second
(),
b
);
if
(
scaleAgg
!=
1
)
{
mulScalar
(
scaleAgg
);
}
}
return
0
;
}
template
<
>
void
BaseMatrixT
<
real
>::
sumRows
(
BaseMatrixT
&
b
,
real
scaleSum
,
real
scaleDest
)
{
applyRow
(
aggregate
::
sum
(),
scaleDest
,
scaleSum
,
b
);
}
}
template
<
>
template
<
>
...
@@ -1524,18 +1594,22 @@ void BaseMatrixT<real>::minCols(BaseMatrixT& b) {
...
@@ -1524,18 +1594,22 @@ void BaseMatrixT<real>::minCols(BaseMatrixT& b) {
}
}
template
<
>
template
<
>
void
BaseMatrixT
<
real
>::
sumCols
(
BaseMatrixT
&
b
,
real
scale
)
{
void
BaseMatrixT
<
real
>::
sumCols
(
BaseMatrixT
&
b
,
real
scale
Sum
,
real
scaleDest
)
{
applyCol
(
aggregate
::
sum
(),
base
::
binary
::
add2
(
1.0
,
scale
)
,
b
);
applyCol
(
aggregate
::
sum
(),
scaleDest
,
scaleSum
,
b
);
}
}
template
<
>
template
<
>
void
BaseMatrixT
<
real
>::
sumOfSquares
(
BaseMatrixT
&
b
,
BaseMatrixT
&
c
)
{
void
BaseMatrixT
<
real
>::
sumOfSquaredDiffs
(
int
numRows
=
b
.
height_
;
BaseMatrixT
&
b
,
BaseMatrixT
&
c
,
real
scaleSum
,
real
scaleDest
)
{
int
numCols
=
b
.
width_
;
applyRow
(
aggregate
::
sum
(),
base
::
binary
::
squaredDiff
(),
MatrixOffset
offset
(
0
,
0
,
0
,
0
,
0
,
0
);
scaleDest
,
scaleSum
,
b
,
c
);
aggregate
(
aggregate
::
sum
(),
base
::
binary
::
squaredDiff
(),
base
::
binary
::
add
(),
}
b
,
c
,
numRows
,
numCols
,
offset
,
false_type
(),
true_type
()
/*aAsColVector*/
);
template
<
>
void
BaseMatrixT
<
real
>::
sumOfProducts
(
BaseMatrixT
&
b
,
BaseMatrixT
&
c
,
real
scaleSum
,
real
scaleDest
)
{
applyRow
(
aggregate
::
sum
(),
base
::
binary
::
mul
(),
scaleDest
,
scaleSum
,
b
,
c
);
}
}
template
class
BaseMatrixT
<
real
>;
template
class
BaseMatrixT
<
real
>;
...
...
paddle/math/BaseMatrix.h
浏览文件 @
2b841ec8
...
@@ -305,6 +305,23 @@ public:
...
@@ -305,6 +305,23 @@ public:
template
<
class
Agg
>
template
<
class
Agg
>
int
applyRow
(
Agg
agg
,
BaseMatrixT
&
b
);
int
applyRow
(
Agg
agg
,
BaseMatrixT
&
b
);
/**
* a aggregate expression that apply each row of matrix b.
*
* @code
* for each row i & 0 <= j < b.width_, do:
* dst = agg(op(b[i*ldb + j], c[i*ldc + j])
* this[i] = sv(this[i], dst)
* @endcode
*/
template
<
class
Agg
,
class
Op
,
class
Saver
>
int
applyRow
(
Agg
agg
,
Op
op
,
Saver
sv
,
BaseMatrixT
&
b
,
BaseMatrixT
&
c
);
// Same as the above with the special handing of sv=add2(scaleDest, scaleAgg)
template
<
class
Agg
,
class
Op
>
int
applyRow
(
Agg
agg
,
Op
op
,
real
scaleDest
,
real
scaleAgg
,
BaseMatrixT
&
b
,
BaseMatrixT
&
c
);
/**
/**
* a aggregate expression that apply each row of matrix b.
* a aggregate expression that apply each row of matrix b.
*
*
...
@@ -317,6 +334,10 @@ public:
...
@@ -317,6 +334,10 @@ public:
template
<
class
Agg
,
class
Saver
>
template
<
class
Agg
,
class
Saver
>
int
applyRow
(
Agg
agg
,
Saver
sv
,
BaseMatrixT
&
b
);
int
applyRow
(
Agg
agg
,
Saver
sv
,
BaseMatrixT
&
b
);
// Same as the above with the special handing of sv=add2(scaleDest, scaleAgg)
template
<
class
Agg
>
int
applyRow
(
Agg
agg
,
real
scaleDest
,
real
scaleAgg
,
BaseMatrixT
&
b
);
/**
/**
* a aggregate expression that apply each column of matrix b.
* a aggregate expression that apply each column of matrix b.
*
*
...
@@ -340,6 +361,10 @@ public:
...
@@ -340,6 +361,10 @@ public:
template
<
class
Agg
,
class
Saver
>
template
<
class
Agg
,
class
Saver
>
int
applyCol
(
Agg
agg
,
Saver
sv
,
BaseMatrixT
&
b
);
int
applyCol
(
Agg
agg
,
Saver
sv
,
BaseMatrixT
&
b
);
// Same as the above with the special handing of sv=add2(scaleDest, scaleAgg)
template
<
class
Agg
>
int
applyCol
(
Agg
agg
,
real
scaleDest
,
real
scaleAgg
,
BaseMatrixT
&
b
);
bool
useGpu
()
const
{
return
useGpu_
;
}
bool
useGpu
()
const
{
return
useGpu_
;
}
const
T
*
rowBuf
(
size_t
row
)
const
{
return
data_
+
width_
*
row
;
}
const
T
*
rowBuf
(
size_t
row
)
const
{
return
data_
+
width_
*
row
;
}
...
@@ -920,7 +945,9 @@ public:
...
@@ -920,7 +945,9 @@ public:
void
addRowScale
(
size_t
cCol
,
BaseMatrixT
&
b
,
BaseMatrixT
&
c
);
void
addRowScale
(
size_t
cCol
,
BaseMatrixT
&
b
,
BaseMatrixT
&
c
);
/// calculate the sum of each row of the matrix b.
/// calculate the sum of each row of the matrix b.
void
sumRows
(
BaseMatrixT
&
b
);
/// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij}
void
sumRows
(
BaseMatrixT
&
b
,
T
scaleSum
,
T
scaleDest
);
/// calculate the maximum value of each row of the matrix b.
/// calculate the maximum value of each row of the matrix b.
void
maxRows
(
BaseMatrixT
&
b
);
void
maxRows
(
BaseMatrixT
&
b
);
/// calculate the minimum value of each row of the matrix b.
/// calculate the minimum value of each row of the matrix b.
...
@@ -932,10 +959,18 @@ public:
...
@@ -932,10 +959,18 @@ public:
void
maxCols
(
BaseMatrixT
&
b
);
void
maxCols
(
BaseMatrixT
&
b
);
/// calculate the minimum value of each column of the matrix b.
/// calculate the minimum value of each column of the matrix b.
void
minCols
(
BaseMatrixT
&
b
);
void
minCols
(
BaseMatrixT
&
b
);
void
sumCols
(
BaseMatrixT
&
b
,
T
scale
);
/// calculate the sum of each row of (b - c)^2.
/// calculate the sum of each column of the matrix b.
void
sumOfSquares
(
BaseMatrixT
&
b
,
BaseMatrixT
&
c
);
/// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji}
void
sumCols
(
BaseMatrixT
&
b
,
T
scaleSum
,
T
scaleDest
);
/// this_i = scaleDest * this_i + scaleSum * \sum_j (b_{ij} - c_{ij})^2
void
sumOfSquaredDiffs
(
BaseMatrixT
&
b
,
BaseMatrixT
&
c
,
T
scaleSum
,
T
scaleDest
);
/// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij}
void
sumOfProducts
(
BaseMatrixT
&
b
,
BaseMatrixT
&
c
,
T
scaleSum
,
T
scaleDest
);
/**
/**
* @code
* @code
...
...
paddle/math/Matrix.cpp
浏览文件 @
2b841ec8
...
@@ -242,7 +242,7 @@ real GpuMatrix::getSum() {
...
@@ -242,7 +242,7 @@ real GpuMatrix::getSum() {
void
GpuMatrix
::
accumulateColSum
(
Matrix
&
src
)
{
void
GpuMatrix
::
accumulateColSum
(
Matrix
&
src
)
{
CHECK_EQ
(
getWidth
(),
src
.
getWidth
());
CHECK_EQ
(
getWidth
(),
src
.
getWidth
());
CHECK_EQ
(
getHeight
(),
(
size_t
)
1
);
CHECK_EQ
(
getHeight
(),
(
size_t
)
1
);
sumCols
(
src
,
1.0
);
sumCols
(
src
,
1.0
,
1.0
);
}
}
real
GpuMatrix
::
getAbsSum
()
{
real
GpuMatrix
::
getAbsSum
()
{
...
@@ -389,7 +389,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
...
@@ -389,7 +389,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
CHECK_EQ
(
width_
,
a
.
getWidth
());
CHECK_EQ
(
width_
,
a
.
getWidth
());
GpuSparseMatrix
*
sMatPtr
=
dynamic_cast
<
GpuSparseMatrix
*>
(
&
a
);
GpuSparseMatrix
*
sMatPtr
=
dynamic_cast
<
GpuSparseMatrix
*>
(
&
a
);
if
(
!
sMatPtr
)
{
if
(
!
sMatPtr
)
{
sumCols
(
a
,
scale
);
sumCols
(
a
,
/* scaleSum= */
scale
,
/* scaleDest= */
1
);
}
else
{
}
else
{
real
*
data
=
getData
();
real
*
data
=
getData
();
hl_sparse_matrix_s
A_d
=
sMatPtr
->
sMatrix_
.
get
();
hl_sparse_matrix_s
A_d
=
sMatPtr
->
sMatrix_
.
get
();
...
@@ -589,7 +589,7 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
...
@@ -589,7 +589,7 @@ void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
void
GpuMatrix
::
colMerge
(
Matrix
&
src
)
{
void
GpuMatrix
::
colMerge
(
Matrix
&
src
)
{
CHECK
(
src
.
height_
==
height_
);
CHECK
(
src
.
height_
==
height_
);
if
(
!
trans_
&&
!
src
.
trans_
)
{
if
(
!
trans_
&&
!
src
.
trans_
)
{
sumRows
(
src
);
sumRows
(
src
,
/* scaleSum= */
1
,
/* scaleDest= */
0
);
}
else
{
}
else
{
LOG
(
FATAL
)
<<
"Is not supported"
;
LOG
(
FATAL
)
<<
"Is not supported"
;
}
}
...
@@ -599,7 +599,7 @@ void GpuMatrix::rowSum(Matrix& sum) {
...
@@ -599,7 +599,7 @@ void GpuMatrix::rowSum(Matrix& sum) {
CHECK_EQ
(
sum
.
getHeight
(),
getHeight
());
CHECK_EQ
(
sum
.
getHeight
(),
getHeight
());
CHECK_EQ
(
sum
.
getWidth
(),
(
size_t
)
1
);
CHECK_EQ
(
sum
.
getWidth
(),
(
size_t
)
1
);
sum
.
sumRows
(
*
this
);
sum
.
sumRows
(
*
this
,
/* scaleSum= */
1
,
/* scaleDest= */
0
);
}
}
void
GpuMatrix
::
rowMax
(
Matrix
&
max
)
{
void
GpuMatrix
::
rowMax
(
Matrix
&
max
)
{
...
@@ -790,7 +790,8 @@ void GpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
...
@@ -790,7 +790,8 @@ void GpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
LOG
(
FATAL
)
<<
"not supported: GpuSparseMatrix as label"
;
LOG
(
FATAL
)
<<
"not supported: GpuSparseMatrix as label"
;
}
}
BaseMatrix
::
sumOfSquares
(
output
,
label
);
BaseMatrix
::
sumOfSquaredDiffs
(
output
,
label
,
/* scaleSum= */
1
,
/* scaleDest= */
1
);
}
}
void
GpuMatrix
::
sumOfSquaresBp
(
Matrix
&
outputV
,
Matrix
&
label
)
{
void
GpuMatrix
::
sumOfSquaresBp
(
Matrix
&
outputV
,
Matrix
&
label
)
{
...
@@ -1501,7 +1502,7 @@ void CpuMatrix::accumulateColSum(Matrix& src) {
...
@@ -1501,7 +1502,7 @@ void CpuMatrix::accumulateColSum(Matrix& src) {
CHECK_EQ
(
getWidth
(),
src
.
getWidth
());
CHECK_EQ
(
getWidth
(),
src
.
getWidth
());
CHECK_EQ
(
getHeight
(),
(
size_t
)
1
);
CHECK_EQ
(
getHeight
(),
(
size_t
)
1
);
sumCols
(
src
,
1.0
);
sumCols
(
src
,
/* scaleSum= */
1
,
/* scaleDest= */
1
);
}
}
real
CpuMatrix
::
getAbsSum
()
{
real
CpuMatrix
::
getAbsSum
()
{
...
@@ -2188,7 +2189,7 @@ void CpuMatrix::collectBias(Matrix& a, real scale) {
...
@@ -2188,7 +2189,7 @@ void CpuMatrix::collectBias(Matrix& a, real scale) {
CHECK_EQ
(
width_
,
a
.
getWidth
());
CHECK_EQ
(
width_
,
a
.
getWidth
());
CpuSparseMatrix
*
aptr
=
dynamic_cast
<
CpuSparseMatrix
*>
(
&
a
);
CpuSparseMatrix
*
aptr
=
dynamic_cast
<
CpuSparseMatrix
*>
(
&
a
);
if
(
!
aptr
)
{
if
(
!
aptr
)
{
sumCols
(
a
,
scale
);
sumCols
(
a
,
/* scaleSum= */
scale
,
/* scaleDest= */
1
);
}
else
{
}
else
{
size_t
nnz
=
aptr
->
getElementCnt
();
size_t
nnz
=
aptr
->
getElementCnt
();
int
*
cols
=
aptr
->
getCols
();
int
*
cols
=
aptr
->
getCols
();
...
@@ -2227,7 +2228,7 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
...
@@ -2227,7 +2228,7 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
real
*
dst
=
getData
();
real
*
dst
=
getData
();
real
*
src
=
a
.
getData
();
real
*
src
=
a
.
getData
();
const
int
*
starts
=
startsPos
.
getData
();
const
int
*
starts
=
startsPos
.
getData
();
MatrixPtr
outMtx
=
Matrix
::
create
(
1
,
1
,
false
,
false
);
MatrixPtr
outMtx
=
Matrix
::
create
(
nullptr
,
1
,
width
,
false
,
false
);
MatrixPtr
dataMtx
=
Matrix
::
create
(
nullptr
,
1
,
width
,
false
,
false
);
MatrixPtr
dataMtx
=
Matrix
::
create
(
nullptr
,
1
,
width
,
false
,
false
);
for
(
size_t
i
=
0
;
i
<
height
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
height
;
i
++
)
{
int
sequenceLength
=
starts
[
i
+
1
]
-
starts
[
i
];
int
sequenceLength
=
starts
[
i
+
1
]
-
starts
[
i
];
...
@@ -2239,13 +2240,15 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
...
@@ -2239,13 +2240,15 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
dataMtx
->
setData
(
src
+
starts
[
i
]
*
width
,
sequenceLength
,
width
);
dataMtx
->
setData
(
src
+
starts
[
i
]
*
width
,
sequenceLength
,
width
);
if
(
mode
==
0
)
{
if
(
mode
==
0
)
{
// plain average
// plain average
outMtx
->
sumCols
(
*
dataMtx
,
(
real
)
1
/
(
real
)
sequenceLength
);
outMtx
->
sumCols
(
*
dataMtx
,
(
real
)
1
/
(
real
)
sequenceLength
,
/* scaleDest= */
1
);
}
else
if
(
mode
==
1
)
{
}
else
if
(
mode
==
1
)
{
// sum instead of average
// sum instead of average
outMtx
->
sumCols
(
*
dataMtx
,
(
real
)
1
);
outMtx
->
sumCols
(
*
dataMtx
,
/* scaleSum= */
1
,
/* scaleDest= */
1
);
}
else
if
(
mode
==
2
)
{
}
else
if
(
mode
==
2
)
{
// divide by square root of sequenceLength
// divide by square root of sequenceLength
outMtx
->
sumCols
(
*
dataMtx
,
(
real
)
1
/
std
::
sqrt
(
sequenceLength
));
outMtx
->
sumCols
(
*
dataMtx
,
(
real
)
1
/
std
::
sqrt
(
sequenceLength
),
/* scaleDest= */
1
);
}
else
{
}
else
{
LOG
(
FATAL
)
<<
"should not reach here"
;
LOG
(
FATAL
)
<<
"should not reach here"
;
}
}
...
@@ -2932,7 +2935,7 @@ void CpuMatrix::rowSum(Matrix& sum) {
...
@@ -2932,7 +2935,7 @@ void CpuMatrix::rowSum(Matrix& sum) {
CHECK_EQ
(
sum
.
getHeight
(),
getHeight
());
CHECK_EQ
(
sum
.
getHeight
(),
getHeight
());
CHECK_EQ
(
sum
.
getWidth
(),
(
size_t
)
1
);
CHECK_EQ
(
sum
.
getWidth
(),
(
size_t
)
1
);
sum
.
sumRows
(
*
this
);
sum
.
sumRows
(
*
this
,
/* scaleSum= */
1
,
/* scaleDest= */
0
);
}
}
void
CpuMatrix
::
rowMaxId
(
IVector
&
maxIds
)
{
void
CpuMatrix
::
rowMaxId
(
IVector
&
maxIds
)
{
...
@@ -3485,7 +3488,8 @@ void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
...
@@ -3485,7 +3488,8 @@ void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) {
}
}
}
}
BaseMatrix
::
sumOfSquares
(
output
,
label
);
BaseMatrix
::
sumOfSquaredDiffs
(
output
,
label
,
/* scaleSum= */
1
,
/* scaleDest= */
1
);
}
}
/* calculate the error of outputV according to label */
/* calculate the error of outputV according to label */
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
2b841ec8
...
@@ -592,6 +592,20 @@ class DotMulProjection(Projection):
...
@@ -592,6 +592,20 @@ class DotMulProjection(Projection):
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
[
1
,
output_size
]
return
[
1
,
output_size
]
# ScalingProjection
@
config_class
class
ScalingProjection
(
Projection
):
type
=
'scaling'
def
calc_output_size
(
self
,
input_layer_config
):
return
input_layer_config
.
size
def
calc_parameter_size
(
self
,
input_size
,
output_size
):
return
1
def
calc_parameter_dims
(
self
,
input_size
,
output_size
):
return
[
1
,
1
]
@
config_class
@
config_class
class
TableProjection
(
Projection
):
class
TableProjection
(
Projection
):
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
2b841ec8
...
@@ -65,6 +65,7 @@ __all__ = [
...
@@ -65,6 +65,7 @@ __all__ = [
'StaticInput'
,
'StaticInput'
,
'expand_layer'
,
'expand_layer'
,
'scaling_layer'
,
'scaling_layer'
,
'scaling_projection'
,
'power_layer'
,
'power_layer'
,
'interpolation_layer'
,
'interpolation_layer'
,
'bilinear_interp_layer'
,
'bilinear_interp_layer'
,
...
@@ -458,7 +459,7 @@ def identity_projection(input, offset=None):
...
@@ -458,7 +459,7 @@ def identity_projection(input, offset=None):
:type input: LayerOutput
:type input: LayerOutput
:param offset: Offset, None if use default.
:param offset: Offset, None if use default.
:type offset: int
:type offset: int
:return: A IdentityProjection or IdentityOffsetProjection
O
bject
:return: A IdentityProjection or IdentityOffsetProjection
o
bject
:rtype: IdentityProjection or IdentityOffsetProjection
:rtype: IdentityProjection or IdentityOffsetProjection
"""
"""
if
offset
is
None
:
if
offset
is
None
:
...
@@ -471,6 +472,34 @@ def identity_projection(input, offset=None):
...
@@ -471,6 +472,34 @@ def identity_projection(input, offset=None):
return
proj
return
proj
@
wrap_param_attr_default
()
def
scaling_projection
(
input
,
param_attr
=
None
):
"""
scaling_projection multiplies the input with a scalar parameter and add to
the output.
.. math::
out += w * in
The example usage is:
.. code-block:: python
proj = scaling_projection(input=layer)
:param input: Input Layer.
:type input: LayerOutput
:param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute
:return: A ScalingProjection object
:rtype: ScalingProjection
"""
proj
=
ScalingProjection
(
input_layer_name
=
input
.
name
,
**
param_attr
.
attr
)
proj
.
origin
=
input
return
proj
@
wrap_param_attr_default
()
@
wrap_param_attr_default
()
def
dotmul_projection
(
input
,
param_attr
=
None
):
def
dotmul_projection
(
input
,
param_attr
=
None
):
"""
"""
...
@@ -1426,11 +1455,11 @@ def bilinear_interp_layer(input,
...
@@ -1426,11 +1455,11 @@ def bilinear_interp_layer(input,
.. code-block:: python
.. code-block:: python
bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64)
bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64)
:param input: A input layer.
:param input: A input layer.
:type input: LayerOutput.
:type input: LayerOutput.
:param out_size_x: bilinear interpolation output width.
:param out_size_x: bilinear interpolation output width.
:type out_size_x: int|None
:type out_size_x: int|None
:param out_size_y: bilinear interpolation output height.
:param out_size_y: bilinear interpolation output height.
:type out_size_y: int|None
:type out_size_y: int|None
:param name: The layer's name, which cna not be specified.
:param name: The layer's name, which cna not be specified.
...
@@ -1742,11 +1771,11 @@ def img_conv_layer(input,
...
@@ -1742,11 +1771,11 @@ def img_conv_layer(input,
The details of convolution layer, please refer UFLDL's `convolution
The details of convolution layer, please refer UFLDL's `convolution
<http://ufldl.stanford.edu/tutorial/supervised/
<http://ufldl.stanford.edu/tutorial/supervised/
FeatureExtractionUsingConvolution/>`_ .
FeatureExtractionUsingConvolution/>`_ .
Convolution Transpose (deconv) layer for image. Paddle only support square
Convolution Transpose (deconv) layer for image. Paddle only support square
input currently and thus input image's width equals height.
input currently and thus input image's width equals height.
The details of convolution transpose layer,
The details of convolution transpose layer,
please refer to the following explanation and references therein
please refer to the following explanation and references therein
<http://datascience.stackexchange.com/questions/6107/
<http://datascience.stackexchange.com/questions/6107/
what-are-deconvolutional-layers/>`_ .
what-are-deconvolutional-layers/>`_ .
...
@@ -4392,7 +4421,7 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
...
@@ -4392,7 +4421,7 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None):
.. code-block:: python
.. code-block:: python
cost = cross_entropy(input=input_layer,
cost = cross_entropy(input=input_layer,
label=label_layer)
label=label_layer)
:param input: The first input layer.
:param input: The first input layer.
...
@@ -4432,7 +4461,7 @@ def cross_entropy_with_selfnorm(input,
...
@@ -4432,7 +4461,7 @@ def cross_entropy_with_selfnorm(input,
.. code-block:: python
.. code-block:: python
cost = cross_entropy_with_selfnorm(input=input_layer,
cost = cross_entropy_with_selfnorm(input=input_layer,
label=label_layer)
label=label_layer)
:param input: The first input layer.
:param input: The first input layer.
...
@@ -4502,7 +4531,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
...
@@ -4502,7 +4531,7 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
.. code-block:: python
.. code-block:: python
cost = huber_cost(input=input_layer,
cost = huber_cost(input=input_layer,
label=label_layer)
label=label_layer)
:param input: The first input layer.
:param input: The first input layer.
...
@@ -4542,7 +4571,7 @@ def multi_binary_label_cross_entropy(input,
...
@@ -4542,7 +4571,7 @@ def multi_binary_label_cross_entropy(input,
.. code-block:: python
.. code-block:: python
cost = multi_binary_label_cross_entropy(input=input_layer,
cost = multi_binary_label_cross_entropy(input=input_layer,
label=label_layer)
label=label_layer)
:param input: The first input layer.
:param input: The first input layer.
...
...
python/paddle/trainer_config_helpers/tests/configs/projections.py
浏览文件 @
2b841ec8
...
@@ -26,6 +26,7 @@ with mixed_layer() as m5:
...
@@ -26,6 +26,7 @@ with mixed_layer() as m5:
with
mixed_layer
()
as
m6
:
with
mixed_layer
()
as
m6
:
m6
+=
dotmul_operator
(
a
=
m3
,
b
=
m4
)
m6
+=
dotmul_operator
(
a
=
m3
,
b
=
m4
)
m6
+=
scaling_projection
(
m3
)
img
=
data_layer
(
name
=
'img'
,
size
=
32
*
32
)
img
=
data_layer
(
name
=
'img'
,
size
=
32
*
32
)
flt
=
data_layer
(
name
=
'filter'
,
size
=
3
*
3
*
1
*
64
)
flt
=
data_layer
(
name
=
'filter'
,
size
=
3
*
3
*
1
*
64
)
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/projections.protostr
浏览文件 @
2b841ec8
...
@@ -111,13 +111,23 @@ layers {
...
@@ -111,13 +111,23 @@ layers {
inputs {
inputs {
input_layer_name: "__mixed_2__"
input_layer_name: "__mixed_2__"
}
}
inputs {
input_layer_name: "__mixed_2__"
input_parameter_name: "___mixed_5__.w1"
proj_conf {
type: "scaling"
name: "___mixed_5__.w1"
input_size: 100
output_size: 100
}
}
inputs {
inputs {
input_layer_name: "__mixed_3__"
input_layer_name: "__mixed_3__"
}
}
operator_confs {
operator_confs {
type: "dot_mul"
type: "dot_mul"
input_indices: 0
input_indices: 0
input_indices:
1
input_indices:
2
input_sizes: 100
input_sizes: 100
input_sizes: 100
input_sizes: 100
output_size: 100
output_size: 100
...
@@ -258,6 +268,16 @@ parameters {
...
@@ -258,6 +268,16 @@ parameters {
initial_strategy: 0
initial_strategy: 0
initial_smart: false
initial_smart: false
}
}
parameters {
name: "___mixed_5__.w1"
size: 1
initial_mean: 0.0
initial_std: 1.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: true
}
parameters {
parameters {
name: "___mixed_7__.w0"
name: "___mixed_7__.w0"
size: 30000
size: 30000
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录