Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a7855d3e
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a7855d3e
编写于
10月 21, 2016
作者:
H
hedaoyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Lazy Assignment
上级
3a5d60bc
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
650 addition
and
59 deletion
+650
-59
CMakeLists.txt
CMakeLists.txt
+6
-0
paddle/math/TensorAssign.h
paddle/math/TensorAssign.h
+142
-0
paddle/math/TensorEvaluate.h
paddle/math/TensorEvaluate.h
+5
-3
paddle/math/TensorExpression.h
paddle/math/TensorExpression.h
+8
-0
paddle/math/TrainingAlgorithmOp.cu
paddle/math/TrainingAlgorithmOp.cu
+172
-0
paddle/math/tests/CMakeLists.txt
paddle/math/tests/CMakeLists.txt
+6
-3
paddle/math/tests/TensorCheck.h
paddle/math/tests/TensorCheck.h
+179
-0
paddle/math/tests/test_TrainingAlgorithm.cpp
paddle/math/tests/test_TrainingAlgorithm.cpp
+1
-53
paddle/math/tests/test_lazyAssign.cu
paddle/math/tests/test_lazyAssign.cu
+131
-0
未找到文件。
CMakeLists.txt
浏览文件 @
a7855d3e
...
...
@@ -87,6 +87,12 @@ if(NOT WITH_GPU)
add_definitions
(
-DHPPL_STUB_FUNC
)
list
(
APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu
)
else
()
if
(
${
CUDA_VERSION_MAJOR
}
GREATER 6
)
if
(
COMPILER_SUPPORT_CXX11
)
LIST
(
APPEND CUDA_NVCC_FLAGS -std=c++11
)
endif
()
endif
()
# TODO(yuyang18): Change it to remove std=c++11 in cuda compile.
set
(
CUDA_PROPAGATE_HOST_FLAGS OFF
)
if
(
NOT CUDNN_FOUND
)
...
...
paddle/math/TensorAssign.h
0 → 100644
浏览文件 @
a7855d3e
/**
* TensorAssign.h
*
* Author: hedaoyuan (hedaoyuan@baidu.com)
* Created on: 2016-10-08
*
* Copyright (c) Baidu.com, Inc. All Rights Reserved
*
*/
#pragma once
#include <algorithm>
#include "paddle/utils/Logging.h"
namespace
paddle
{
template
<
typename
LhsType
,
typename
RhsType
,
class
T
>
class
TensorAssignOp
{
public:
explicit
TensorAssignOp
(
const
LhsType
&
lhs
,
const
RhsType
&
rhs
)
:
lhs_
(
lhs
),
rhs_
(
rhs
)
{
#ifndef __CUDA_ARCH__
CHECK_EQ
(
lhs_
.
getWidth
(),
rhs_
.
getWidth
());
CHECK_EQ
(
lhs_
.
getHeight
(),
rhs_
.
getHeight
());
CHECK_EQ
(
lhs_
.
useGpu
(),
rhs_
.
useGpu
());
#endif
}
INLINE
void
apply
(
const
int
i
,
const
int
j
)
{
lhs_
.
applyRef
(
i
,
j
)
=
rhs_
.
apply
(
i
,
j
);
}
INLINE
void
apply
(
const
int
index
)
{
lhs_
.
applyRef
(
index
)
=
rhs_
.
apply
(
index
);
}
INLINE
size_t
getWidth
()
const
{
return
lhs_
.
getWidth
();
}
INLINE
size_t
getHeight
()
const
{
return
rhs_
.
getHeight
();
}
INLINE
bool
isContiguous
()
const
{
return
lhs_
.
isContiguous
()
&&
rhs_
.
isContiguous
();
}
INLINE
bool
useGpu
()
const
{
return
lhs_
.
useGpu
();
}
private:
TensorApply
<
LhsType
,
T
>
lhs_
;
TensorApply
<
const
RhsType
,
T
>
rhs_
;
};
template
<
typename
Assign
,
typename
...
AssignOp
>
void
AssignCpuEvaluate
(
int
height
,
int
width
,
bool
isContiguous
,
Assign
&&
assign
,
AssignOp
&&
...
args
)
{
if
(
isContiguous
)
{
int
size
=
height
*
width
;
for
(
int
index
=
0
;
index
<
size
;
index
++
)
{
assign
.
apply
(
index
);
__attribute__
((
unused
))
int
dummy
[]
=
{
(((
args
)).
apply
(
index
),
0
)...
};
}
}
else
{
for
(
int
i
=
0
;
i
<
height
;
i
++
)
{
for
(
int
j
=
0
;
j
<
width
;
j
++
)
{
assign
.
apply
(
i
,
j
);
__attribute__
((
unused
))
int
dummy
[]
=
{
(((
args
)).
apply
(
i
,
j
),
0
)...
};
}
}
}
}
#ifdef __NVCC__
template
<
typename
Assign
,
typename
...
AssignOp
>
__global__
void
AssignGpuEvaluate1
(
const
int
border
,
Assign
assign
,
AssignOp
...
args
)
{
const
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
idx
<
border
)
{
assign
.
apply
(
idx
);
__attribute__
((
unused
))
int
dummy
[]
=
{
(((
args
)).
apply
(
idx
),
0
)...
};
}
}
template
<
typename
Assign
,
typename
...
AssignOp
>
__global__
void
AssignGpuEvaluate2
(
const
int
height
,
const
int
width
,
Assign
assign
,
AssignOp
...
args
)
{
const
int
colIdx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
int
rowIdx
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
for
(
int
i
=
rowIdx
;
i
<
height
;
i
+=
gridDim
.
y
*
blockDim
.
y
)
{
for
(
int
j
=
colIdx
;
j
<
width
;
j
+=
gridDim
.
x
*
blockDim
.
x
)
{
assign
.
apply
(
i
,
j
);
__attribute__
((
unused
))
int
dummy
[]
=
{
(((
args
)).
apply
(
i
,
j
),
0
)...
};
}
}
}
#endif
// At least one assignment expression is required
template
<
typename
Assign
,
typename
...
AssignOp
>
void
AssignEvaluate
(
Assign
&&
assign
,
AssignOp
&&
...
args
)
{
const
bool
useGpu_
=
assign
.
useGpu
();
bool
isContiguous_
=
assign
.
isContiguous
();
const
size_t
height
=
assign
.
getHeight
();
const
size_t
width
=
assign
.
getWidth
();
const
int
packSize
=
sizeof
...(
args
);
const
bool
packUseGpu
[]
=
{
((
args
)).
useGpu
()...
};
const
bool
packIsContiguous
[]
=
{
((
args
)).
isContiguous
()...
};
const
size_t
packHeight
[]
=
{
((
args
)).
getHeight
()...
};
const
size_t
packWidth
[]
=
{
((
args
)).
getWidth
()...
};
for
(
int
i
=
0
;
i
<
packSize
;
i
++
)
{
CHECK_EQ
(
useGpu_
,
packUseGpu
[
i
]);
CHECK_EQ
(
height
,
packHeight
[
i
]);
CHECK_EQ
(
width
,
packWidth
[
i
]);
isContiguous_
=
isContiguous_
&&
packIsContiguous
[
i
];
}
if
(
useGpu_
)
{
#ifdef __NVCC__
if
(
isContiguous_
)
{
int
size
=
height
*
width
;
int
blockSize
=
size
<=
1024
?
size
:
1024
;
int
gridSize
=
(
size
+
1024
-
1
)
/
1024
;
AssignGpuEvaluate1
<<<
gridSize
,
blockSize
,
0
,
STREAM_DEFAULT
>>>
(
size
,
assign
,
args
...);
}
else
{
int
blockSizeY
=
std
::
min
(
32
,
(
int
)
height
);
int
blockSizeX
=
(
32
/
blockSizeY
)
*
32
;
int
gridSizeX
=
std
::
min
(
32
,
(
int
)(
width
+
blockSizeX
-
1
)
/
blockSizeX
);
int
gridSizeY
=
std
::
min
(
32
,
(
int
)(
height
+
blockSizeY
-
1
)
/
blockSizeY
);
dim3
threads
(
blockSizeX
,
blockSizeY
);
dim3
grid
(
gridSizeX
,
gridSizeY
);
AssignGpuEvaluate2
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
height
,
width
,
assign
,
args
...);
}
CHECK_SYNC
(
"AssignEvaluate failed"
);
#endif
}
else
{
AssignCpuEvaluate
(
height
,
width
,
isContiguous_
,
assign
,
args
...);
}
}
}
// namespace paddle
paddle/math/TensorEvaluate.h
浏览文件 @
a7855d3e
...
...
@@ -27,14 +27,16 @@ inline void TensorCpuApply(LeftType& lhs, const RightType& rhs) {
CHECK_EQ
(
lhs_
.
getHeight
(),
rhs_
.
getHeight
());
CHECK_EQ
(
lhs_
.
useGpu
(),
rhs_
.
useGpu
());
int
height
=
lhs_
.
getHeight
();
int
width
=
lhs_
.
getWidth
();
if
(
lhs_
.
isContiguous
()
&&
rhs_
.
isContiguous
())
{
int
size
=
lhs_
.
getHeight
()
*
lhs_
.
getWidth
()
;
int
size
=
height
*
width
;
for
(
int
index
=
0
;
index
<
size
;
index
++
)
{
lhs_
.
applyRef
(
index
)
=
rhs_
.
apply
(
index
);
}
}
else
{
for
(
size_t
i
=
0
;
i
<
lhs_
.
getHeight
()
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
lhs_
.
getWidth
()
;
j
++
)
{
for
(
int
i
=
0
;
i
<
height
;
i
++
)
{
for
(
int
j
=
0
;
j
<
width
;
j
++
)
{
lhs_
.
applyRef
(
i
,
j
)
=
rhs_
.
apply
(
i
,
j
);
}
}
...
...
paddle/math/TensorExpression.h
浏览文件 @
a7855d3e
...
...
@@ -27,6 +27,8 @@ typename ExprType2,
typename
ExprType3
,
class
T
>
class
TensorTernaryOp
;
template
<
typename
LhsType
,
typename
RhsType
,
class
T
>
class
TensorAssignOp
;
/**
* \brief Tensor base class.
*
...
...
@@ -318,6 +320,12 @@ public:
(
hppl
::
unary
::
constant
<
T
>
(
p
),
derived
());
}
template
<
typename
ExpressionType
>
TensorAssignOp
<
Derived
,
ExpressionType
,
T
>
lazyAssign
(
const
ExpressionType
&
expr
)
const
{
return
TensorAssignOp
<
Derived
,
ExpressionType
,
T
>
(
derived
(),
expr
);
}
protected:
const
Derived
&
derived
()
const
{
return
*
static_cast
<
const
Derived
*>
(
this
);
}
};
...
...
paddle/math/TrainingAlgorithmOp.cu
浏览文件 @
a7855d3e
...
...
@@ -12,6 +12,175 @@
#include "BaseMatrix.h"
#include "TrainingAlgorithmOp.h"
#if __cplusplus > 199711L
#include "TensorAssign.h"
namespace
paddle
{
void
sparseMomentumApply
(
BaseMatrix
&
value
,
BaseMatrix
&
grad
,
BaseMatrix
&
momU
,
BaseMatrix
&
momV
,
real
alpha
,
real
beta
,
real
gamma
,
real
tau
,
real
learningRate
)
{
auto
expr1
=
momU
.
lazyAssign
(
momU
-
(
alpha
*
gamma
*
learningRate
)
*
grad
);
auto
expr2
=
momV
.
lazyAssign
(
momV
+
(
tau
*
alpha
*
gamma
*
learningRate
)
*
grad
);
auto
expr3
=
value
.
lazyAssign
(
(
tau
/
beta
+
(
real
)
1
/
alpha
)
*
momU
+
((
real
)
1
/
beta
)
*
momV
);
AssignEvaluate
(
expr1
,
expr2
,
expr3
);
}
void
adadeltaApply
(
BaseMatrix
&
value
,
BaseMatrix
&
grad
,
BaseMatrix
&
mom
,
BaseMatrix
&
accum
,
BaseMatrix
&
accum_update
,
BaseMatrix
&
lr
,
real
rou
,
real
epsilon
,
real
learningRate
,
real
momentum
,
real
decayRate
)
{
auto
expr1
=
accum
.
lazyAssign
(
rou
*
accum
+
((
real
)
1
-
rou
)
*
grad
.
square
());
auto
expr2
=
lr
.
lazyAssign
(
((
accum_update
+
epsilon
)
/
(
accum
+
epsilon
)).
sqrt
());
auto
expr3
=
accum_update
.
lazyAssign
(
rou
*
accum_update
+
((
real
)
1
-
rou
)
*
(
grad
*
lr
).
square
());
auto
expr4
=
mom
.
lazyAssign
(
mom
*
momentum
-
learningRate
*
lr
*
(
grad
+
value
*
decayRate
));
auto
expr5
=
value
.
lazyAssign
(
value
+
mom
);
AssignEvaluate
(
expr1
,
expr2
,
expr3
,
expr4
,
expr5
);
}
void
adagradApply
(
BaseMatrix
&
value
,
BaseMatrix
&
grad
,
BaseMatrix
&
mom
,
BaseMatrix
&
accum_buffer
,
BaseMatrix
&
accum
,
BaseMatrix
&
lr
,
real
epsilon
,
real
learningRate
,
real
momentum
,
real
decayRate
)
{
auto
expr1
=
accum
.
lazyAssign
(
accum
+
grad
.
square
());
auto
expr2
=
lr
.
lazyAssign
(
(
accum_buffer
+
accum
+
epsilon
).
sqrt
().
reciprocal
());
auto
expr3
=
mom
.
lazyAssign
(
mom
*
momentum
-
learningRate
*
lr
*
(
grad
+
value
*
decayRate
));
auto
expr4
=
value
.
lazyAssign
(
value
+
mom
);
AssignEvaluate
(
expr1
,
expr2
,
expr3
,
expr4
);
}
void
rmspropApply
(
BaseMatrix
&
value
,
BaseMatrix
&
grad
,
BaseMatrix
&
mom
,
BaseMatrix
&
g
,
BaseMatrix
&
f
,
BaseMatrix
&
lr
,
real
accumulatedRou
,
real
rou
,
real
epsilon
,
real
learningRate
,
real
momentum
,
real
decayRate
,
bool
firstTime
)
{
auto
expr2
=
f
.
lazyAssign
(
accumulatedRou
*
f
+
((
real
)
1
-
rou
)
*
grad
);
auto
expr3
=
lr
.
lazyAssign
((
g
-
f
.
square
()
+
epsilon
).
sqrt
().
reciprocal
());
auto
expr4
=
mom
.
lazyAssign
(
mom
*
momentum
-
learningRate
*
lr
*
(
grad
+
value
*
decayRate
));
auto
expr5
=
value
.
lazyAssign
(
value
+
mom
);
if
(
firstTime
)
{
auto
expr1
=
g
.
lazyAssign
(
accumulatedRou
*
g
+
grad
.
square
());
AssignEvaluate
(
expr1
,
expr2
,
expr3
,
expr4
,
expr5
);
}
else
{
auto
expr1
=
g
.
lazyAssign
(
accumulatedRou
*
g
+
((
real
)
1
-
rou
)
*
grad
.
square
());
AssignEvaluate
(
expr1
,
expr2
,
expr3
,
expr4
,
expr5
);
}
}
void
decayedAdagradApply
(
BaseMatrix
&
value
,
BaseMatrix
&
grad
,
BaseMatrix
&
mom
,
BaseMatrix
&
accum
,
BaseMatrix
&
lr
,
real
accumulatedRou
,
real
rou
,
real
epsilon
,
real
learningRate
,
real
momentum
,
real
decayRate
,
bool
firstTime
)
{
auto
expr2
=
lr
.
lazyAssign
((
accum
+
epsilon
).
sqrt
().
reciprocal
());
auto
expr3
=
mom
.
lazyAssign
(
mom
*
momentum
-
learningRate
*
lr
*
(
grad
+
value
*
decayRate
));
auto
expr4
=
value
.
lazyAssign
(
value
+
mom
);
if
(
firstTime
)
{
auto
expr1
=
accum
.
lazyAssign
(
accumulatedRou
*
accum
+
grad
.
square
());
AssignEvaluate
(
expr1
,
expr2
,
expr3
,
expr4
);
}
else
{
auto
expr1
=
accum
.
lazyAssign
(
accumulatedRou
*
accum
+
((
real
)
1
-
rou
)
*
grad
.
square
());
AssignEvaluate
(
expr1
,
expr2
,
expr3
,
expr4
);
}
}
void
adamApply
(
BaseMatrix
&
value
,
BaseMatrix
&
grad
,
BaseMatrix
&
mom
,
// firse moment
BaseMatrix
&
v
,
// second moment
real
beta1
,
real
beta2
,
real
beta1_power
,
real
beta2_power
,
real
epsilon
,
real
learningRate
)
{
real
alpha
=
learningRate
*
std
::
sqrt
((
real
)
1
-
beta2_power
)
/
((
real
)
1
-
beta1_power
);
auto
expr1
=
mom
.
lazyAssign
(
beta1
*
mom
+
((
real
)
1
-
beta1
)
*
grad
);
auto
expr2
=
v
.
lazyAssign
(
beta2
*
v
+
((
real
)
1
-
beta2
)
*
grad
.
square
());
auto
expr3
=
value
.
lazyAssign
(
value
-
(
mom
*
alpha
)
/
(
v
.
sqrt
()
+
epsilon
));
AssignEvaluate
(
expr1
,
expr2
,
expr3
);
}
void
adamaxApply
(
BaseMatrix
&
value
,
BaseMatrix
&
grad
,
BaseMatrix
&
mom
,
// firse moment
BaseMatrix
&
u
,
// weighted infinity norm
real
beta1
,
real
beta2
,
int64_t
step
,
real
alpha
)
{
auto
expr1
=
mom
.
lazyAssign
(
beta1
*
mom
+
((
real
)
1
-
beta1
)
*
grad
);
auto
expr2
=
u
.
lazyAssign
(
(
beta2
*
u
>
grad
.
abs
()).
condition
(
beta2
*
u
,
grad
.
abs
()));
auto
expr3
=
value
.
lazyAssign
(
value
-
(
alpha
/
((
real
)
1
-
(
real
)
std
::
pow
(
beta1
,
step
)))
*
(
mom
/
u
));
AssignEvaluate
(
expr1
,
expr2
,
expr3
);
}
}
// namespace paddle
#else
namespace
paddle
{
void
sparseMomentumApply
(
BaseMatrix
&
value
,
...
...
@@ -180,3 +349,6 @@ void adamaxApply(BaseMatrix& value,
}
}
// namespace paddle
#endif
paddle/math/tests/CMakeLists.txt
浏览文件 @
a7855d3e
...
...
@@ -15,13 +15,16 @@ add_simple_unittest(test_perturbation)
add_simple_unittest
(
test_CpuGpuVector
)
add_simple_unittest
(
test_Allocator
)
if
(
WITH_GPU
)
CUDA_ADD_EXECUTABLE
(
test_Tensor test_Tensor.cu
)
link_paddle_test
(
test_Tensor
)
if
(
COMPILER_SUPPORT_CXX11
)
LIST
(
APPEND CUDA_NVCC_FLAGS -std=c++11
)
CUDA_ADD_EXECUTABLE
(
test_Tensor test_Tensor.cu
)
link_paddle_test
(
test_Tensor
)
CUDA_ADD_EXECUTABLE
(
test_lazyAssign test_lazyAssign.cu
)
link_paddle_test
(
test_lazyAssign
)
endif
()
else
()
compile_cu_as_cpp
(
test_Tensor.cu
)
add_unittest
(
test_Tensor test_Tensor.cu
)
compile_cu_as_cpp
(
test_lazyAssign.cu
)
add_unittest
(
test_lazyAssign test_lazyAssign.cu
)
endif
(
WITH_GPU
)
paddle/math/tests/TensorCheck.h
0 → 100644
浏览文件 @
a7855d3e
/**
* test_Tensor.cpp
*
* Author: hedaoyuan (hedaoyuan@baidu.com)
* Created on: 2016-06-06
*
* Copyright (c) Baidu.com, Inc. All Rights Reserved
*/
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
using
namespace
paddle
;
// NOLINT
using
namespace
std
;
// NOLINT
template
<
typename
Tensor
>
extern
void
TensorCheckEqual
(
const
Tensor
&
tensor1
,
const
Tensor
&
tensor2
);
void
TensorCheckEqual
(
const
CpuMatrix
&
matrix1
,
const
CpuMatrix
&
matrix2
)
{
CHECK
(
matrix1
.
getHeight
()
==
matrix2
.
getHeight
());
CHECK
(
matrix1
.
getWidth
()
==
matrix2
.
getWidth
());
int
height
=
matrix1
.
getHeight
();
int
width
=
matrix1
.
getWidth
();
const
real
*
data1
=
matrix1
.
getData
();
const
real
*
data2
=
matrix2
.
getData
();
int
count
=
0
;
for
(
int
i
=
0
;
i
<
height
;
i
++
)
{
for
(
int
j
=
0
;
j
<
width
;
j
++
)
{
if
(
data1
[
i
*
width
+
j
]
!=
data2
[
i
*
width
+
j
])
{
count
++
;
}
}
}
EXPECT_EQ
(
count
,
0
)
<<
"There are "
<<
count
<<
" different element."
;
}
void
TensorCheckEqual
(
const
GpuMatrix
&
matrix1
,
const
GpuMatrix
&
matrix2
)
{
CpuMatrix
cpu1
(
matrix1
.
getHeight
(),
matrix1
.
getWidth
());
CpuMatrix
cpu2
(
matrix2
.
getHeight
(),
matrix2
.
getWidth
());
cpu1
.
copyFrom
(
matrix1
);
cpu2
.
copyFrom
(
matrix2
);
TensorCheckEqual
(
cpu1
,
cpu2
);
}
void
TensorCheckErr
(
const
CpuMatrix
&
matrix1
,
const
CpuMatrix
&
matrix2
)
{
CHECK
(
matrix1
.
getHeight
()
==
matrix2
.
getHeight
());
CHECK
(
matrix1
.
getWidth
()
==
matrix2
.
getWidth
());
#ifndef PADDLE_TYPE_DOUBLE
real
err
=
1e-5
;
#else
real
err
=
1e-10
;
#endif
int
height
=
matrix1
.
getHeight
();
int
width
=
matrix1
.
getWidth
();
const
real
*
data1
=
matrix1
.
getData
();
const
real
*
data2
=
matrix2
.
getData
();
int
count
=
0
;
for
(
int
i
=
0
;
i
<
height
;
i
++
)
{
for
(
int
j
=
0
;
j
<
width
;
j
++
)
{
real
a
=
data1
[
i
*
width
+
j
];
real
b
=
data2
[
i
*
width
+
j
];
if
(
fabs
(
a
-
b
)
>
err
)
{
if
((
fabsf
(
a
-
b
)
/
fabsf
(
a
))
>
(
err
/
10.0
f
))
{
count
++
;
}
}
}
}
EXPECT_EQ
(
count
,
0
)
<<
"There are "
<<
count
<<
" different element."
;
}
void
TensorCheckErr
(
const
GpuMatrix
&
matrix1
,
const
GpuMatrix
&
matrix2
)
{
CpuMatrix
cpu1
(
matrix1
.
getHeight
(),
matrix1
.
getWidth
());
CpuMatrix
cpu2
(
matrix2
.
getHeight
(),
matrix2
.
getWidth
());
cpu1
.
copyFrom
(
matrix1
);
cpu2
.
copyFrom
(
matrix2
);
TensorCheckErr
(
cpu1
,
cpu2
);
}
template
<
class
T
>
void
TensorCheckEqual
(
const
CpuVectorT
<
T
>&
vector1
,
const
CpuVectorT
<
T
>&
vector2
)
{
CHECK
(
vector1
.
getSize
()
==
vector2
.
getSize
());
const
T
*
data1
=
vector1
.
getData
();
const
T
*
data2
=
vector2
.
getData
();
size_t
size
=
vector1
.
getSize
();
int
count
=
0
;
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
if
(
data1
[
i
]
!=
data2
[
i
])
{
count
++
;
}
}
EXPECT_EQ
(
count
,
0
)
<<
"There are "
<<
count
<<
" different element."
;
}
template
<
class
T
>
void
TensorCheckEqual
(
const
GpuVectorT
<
T
>&
vector1
,
const
GpuVectorT
<
T
>&
vector2
)
{
CpuVectorT
<
T
>
cpu1
(
vector1
.
getSize
());
CpuVectorT
<
T
>
cpu2
(
vector2
.
getSize
());
cpu1
.
copyFrom
(
vector1
);
cpu2
.
copyFrom
(
vector2
);
TensorCheckEqual
(
cpu1
,
cpu2
);
}
int
VectorCheckErr
(
const
Vector
&
vector1
,
const
Vector
&
vector2
)
{
CHECK
(
vector1
.
getSize
()
==
vector2
.
getSize
());
const
real
*
data1
=
vector1
.
getData
();
const
real
*
data2
=
vector2
.
getData
();
size_t
size
=
vector1
.
getSize
();
int
count
=
0
;
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
real
a
=
data1
[
i
];
real
b
=
data2
[
i
];
if
(
fabs
(
a
-
b
)
>
FLAGS_max_diff
)
{
if
((
fabsf
(
a
-
b
)
/
fabsf
(
a
))
>
(
FLAGS_max_diff
/
10.0
f
))
{
count
++
;
}
}
}
return
count
;
}
#define INIT_UNARY(A1, A2) \
Tensor A1(height, width); \
Tensor A2(height, width); \
A1.randomizeUniform(); \
A2.copyFrom(A1)
#define INIT_BINARY(A1, A2, B) \
INIT_UNARY(A1, A2); \
Tensor B(height, width); \
B.randomizeUniform()
#define INIT_TERNARY(A1, A2, B, C) \
INIT_BINARY(A1, A2, B); \
Tensor C(height, width); \
C.randomizeUniform()
#define INIT_QUATERNARY(A1, A2, B, C, D) \
INIT_TERNARY(A1, A2, B, C); \
Tensor D(height, width); \
D.randomizeUniform()
// Performance Check
#ifdef PADDLE_DISABLE_TIMER
#define CHECK_VECTORPTR(vector1, vector2) \
EXPECT_EQ(VectorCheckErr(vector1, vector2), 0)
#define EXPRESSION_PERFORMANCE(expression) \
expression;
#else
#include "paddle/utils/Stat.h"
#define CHECK_VECTORPTR(vector1, vector2)
#define EXPRESSION_PERFORMANCE(expression) \
do {\
char expr[30];\
strncpy(expr, #expression, 30);\
if (expr[29] != '\0') {\
expr[27] = '.'; expr[28] = '.'; expr[29] = '\0';\
}\
expression;\
for (int i = 0; i < 20; i++) {\
REGISTER_TIMER(expr);\
expression;\
}\
LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ')\
<< *globalStat.getStat(expr);\
globalStat.reset();\
} while (0)
#endif
paddle/math/tests/test_TrainingAlgorithm.cpp
浏览文件 @
a7855d3e
...
...
@@ -11,6 +11,7 @@
#include "paddle/utils/Util.h"
#include "paddle/math/TrainingAlgorithmOp.h"
#include "OriginalOptimizerApi.h"
#include "TensorCheck.h"
using
namespace
paddle
;
// NOLINT
...
...
@@ -33,26 +34,6 @@ private:
double
max_diff_
;
};
int
VectorCheckErr
(
const
Vector
&
vector1
,
const
Vector
&
vector2
)
{
CHECK
(
vector1
.
getSize
()
==
vector2
.
getSize
());
const
real
*
data1
=
vector1
.
getData
();
const
real
*
data2
=
vector2
.
getData
();
size_t
size
=
vector1
.
getSize
();
int
count
=
0
;
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
real
a
=
data1
[
i
];
real
b
=
data2
[
i
];
if
(
fabs
(
a
-
b
)
>
FLAGS_max_diff
)
{
if
((
fabsf
(
a
-
b
)
/
fabsf
(
a
))
>
(
FLAGS_max_diff
/
10.0
f
))
{
count
++
;
}
}
}
return
count
;
}
#define COPY_VECTOR_TO_CPU(cpuVec, vector) \
do {\
if (vector->useGpu()) {\
...
...
@@ -71,39 +52,6 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
return
VectorCheckErr
(
*
tmp1
,
*
tmp2
);
}
#ifdef PADDLE_DISABLE_TIMER
#define CHECK_VECTORPTR(vector1, vector2) \
EXPECT_EQ(VectorCheckErr(vector1, vector2), 0)
#define EXPRESSION_PERFORMANCE(expression) \
expression;
#else
#include "paddle/utils/Stat.h"
#define CHECK_VECTORPTR(vector1, vector2)
#define EXPRESSION_PERFORMANCE(expression) \
do {\
char expr[30];\
strncpy(expr, #expression, 30);\
if (expr[29] != '\0') {\
expr[27] = '.'; expr[28] = '.'; expr[29] = '\0';\
}\
expression;\
for (int i = 0; i < 20; i++) {\
REGISTER_TIMER(expr);\
expression;\
}\
LOG(INFO) << std::setiosflags(std::ios::left) << std::setfill(' ')\
<< *globalStat.getStat(expr);\
globalStat.reset();\
} while (0)
#endif
typedef
std
::
function
<
void
(
size_t
size
,
bool
useGpu
)
>
testMatrixFunc
;
void
testCase
(
testMatrixFunc
matrixFunc
)
{
...
...
paddle/math/tests/test_lazyAssign.cu
0 → 100644
浏览文件 @
a7855d3e
/**
* test_lazyAssign.cpp
*
* Author: hedaoyuan (hedaoyuan@baidu.com)
* Created on: 2016-10-15
*
* Copyright (c) Baidu.com, Inc. All Rights Reserved
*/
#include <gtest/gtest.h>
#include "paddle/math/Matrix.h"
#include "paddle/math/TensorAssign.h"
#include "TensorCheck.h"
using
namespace
paddle
;
// NOLINT
using
namespace
std
;
// NOLINT
typedef
std
::
function
<
void
(
int
height
,
int
width
)
>
testMatrixFunc
;
void
testMatrixCase
(
testMatrixFunc
matrixFunc
)
{
for
(
auto
height
:
{
1
})
{
for
(
auto
width
:
{
1
,
32
,
64
,
128
,
512
,
1024
,
4096
,
32768
,
65536
,
131072
,
262144
,
524288
,
1048576
,
2097152
,
4194304
,
8388608
})
{
matrixFunc
(
height
,
width
);
}
}
}
template
<
typename
Tensor
>
void
testLazyAssign
(
int
height
,
int
width
)
{
INIT_QUATERNARY
(
A1
,
A2
,
B
,
C
,
D
);
EXPRESSION_PERFORMANCE
(
A1
=
B
+
C
;
A1
=
A1
*
D
;);
EXPRESSION_PERFORMANCE
(
auto
expr1
=
A2
.
lazyAssign
(
B
+
C
);
auto
expr2
=
A2
.
lazyAssign
(
A2
*
D
);
AssignEvaluate
(
expr1
,
expr2
););
TensorCheckErr
(
A1
,
A2
);
}
TEST
(
lazyAssign
,
CPU
)
{
testMatrixCase
(
testLazyAssign
<
CpuMatrix
>
);
}
#ifndef PADDLE_ONLY_CPU
TEST
(
lazyAssign
,
GPU
)
{
testMatrixCase
(
testLazyAssign
<
GpuMatrix
>
);
}
#endif
template
<
typename
Tensor
>
void
sgdUpdateTensor
(
Tensor
&
A
,
Tensor
&
B
,
Tensor
&
C
,
Tensor
&
D
,
real
p1
,
real
p2
,
real
p3
)
{
C
=
C
*
p2
-
D
*
(
B
+
A
*
p3
)
*
p1
;
A
+=
C
;
}
void
sgdUpdateLazyAssign
(
BaseMatrix
&
A
,
BaseMatrix
&
B
,
BaseMatrix
&
C
,
BaseMatrix
&
D
,
real
p1
,
real
p2
,
real
p3
)
{
auto
expr1
=
C
.
lazyAssign
(
C
*
p2
-
D
*
(
B
+
A
*
p3
)
*
p1
);
auto
expr2
=
A
.
lazyAssign
(
A
+
C
);
AssignEvaluate
(
expr1
,
expr2
);
}
template
<
typename
Tensor
>
void
testSgdUpdate
(
int
height
,
int
width
)
{
Tensor
A1
(
height
,
width
);
Tensor
A2
(
height
,
width
);
Tensor
A3
(
height
,
width
);
A1
.
randomizeUniform
();
A2
.
copyFrom
(
A1
);
A3
.
copyFrom
(
A1
);
Tensor
B
(
height
,
width
);
B
.
randomizeUniform
();
Tensor
C1
(
height
,
width
);
Tensor
C2
(
height
,
width
);
Tensor
C3
(
height
,
width
);
C1
.
randomizeUniform
();
C2
.
copyFrom
(
C1
);
C3
.
copyFrom
(
C1
);
Tensor
D
(
height
,
width
);
D
.
randomizeUniform
();
real
p1
=
0.2
;
real
p2
=
0.3
;
real
p3
=
0.5
;
/**
* c = p2 * c - p1 * (b + p3 * a);
* a = a + c;
*/
// BaseMatrix API
EXPRESSION_PERFORMANCE
(
A1
.
sgdUpdate
(
B
,
C1
,
D
,
p1
,
p2
,
p3
););
// Tensor expression
EXPRESSION_PERFORMANCE
(
sgdUpdateTensor
(
A2
,
B
,
C2
,
D
,
p1
,
p2
,
p3
));
// lazyAssign
EXPRESSION_PERFORMANCE
(
sgdUpdateLazyAssign
(
A3
,
B
,
C3
,
D
,
p1
,
p2
,
p3
));
TensorCheckErr
(
A1
,
A2
);
TensorCheckErr
(
A1
,
A3
);
TensorCheckErr
(
C1
,
C2
);
TensorCheckErr
(
C1
,
C3
);
}
TEST
(
sgdUpdate
,
CPU
)
{
testMatrixCase
(
testSgdUpdate
<
CpuMatrix
>
);
}
#ifndef PADDLE_ONLY_CPU
TEST
(
sgdUpdate
,
GPU
)
{
testMatrixCase
(
testSgdUpdate
<
GpuMatrix
>
);
}
#endif
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
hl_start
();
hl_init
(
0
);
return
RUN_ALL_TESTS
();
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录