Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2537ac51
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2537ac51
编写于
12月 20, 2018
作者:
X
Xin Pan
提交者:
GitHub
12月 20, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14983 from typhoonzero/revert_14850
Revert "[Feature] Fp16 training for resnet50 (#14850)"
上级
a015a8a3
da87f7a6
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
141 addition
and
356 deletion
+141
-356
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+3
-5
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+1
-2
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
+17
-44
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
+2
-3
paddle/fluid/operators/elementwise/elementwise_div_op.cu
paddle/fluid/operators/elementwise/elementwise_div_op.cu
+0
-5
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
+10
-12
paddle/fluid/operators/fill_zeros_like_op.cu.cc
paddle/fluid/operators/fill_zeros_like_op.cu.cc
+0
-3
paddle/fluid/operators/metrics/accuracy_op.cu
paddle/fluid/operators/metrics/accuracy_op.cu
+3
-5
paddle/fluid/operators/optimizers/momentum_op.cu
paddle/fluid/operators/optimizers/momentum_op.cu
+1
-4
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+2
-4
paddle/fluid/operators/top_k_op.cu
paddle/fluid/operators/top_k_op.cu
+6
-9
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+0
-3
python/paddle/fluid/data_feeder.py
python/paddle/fluid/data_feeder.py
+0
-2
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+4
-50
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+75
-100
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+2
-6
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+4
-13
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+0
-2
python/paddle/fluid/tests/unittests/test_accuracy_op.py
python/paddle/fluid/tests/unittests/test_accuracy_op.py
+2
-15
python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_div_op.py
+2
-23
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
+0
-5
python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
...n/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
+1
-11
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
...dle/fluid/tests/unittests/test_learning_rate_scheduler.py
+1
-1
python/paddle/fluid/tests/unittests/test_momentum_op.py
python/paddle/fluid/tests/unittests/test_momentum_op.py
+4
-17
python/paddle/fluid/tests/unittests/test_top_k_op.py
python/paddle/fluid/tests/unittests/test_top_k_op.py
+1
-12
未找到文件。
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
2537ac51
...
...
@@ -355,9 +355,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
// TODO(paddle-dev): Why is there no input for this op_handle?
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
auto
out_dtype
=
all_vars_
.
at
(
loss_grad_name
)
->
GetDataType
();
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
],
out_dtype
);
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
]);
}
// This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss.
...
...
@@ -660,13 +658,13 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
,
proto
::
VarType
::
Type
dtype
)
const
{
ir
::
Node
*
out_var_node
)
const
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
// Insert ScaleCost OpHandle
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
result
->
CreateEmptyNode
(
"scale_loss_grad"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
dev_ctx
,
dtype
);
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
dev_ctx
);
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
op_handle
);
// FIXME: Currently ScaleLossGradOp only use device_count as scale
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
2537ac51
...
...
@@ -68,8 +68,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
,
proto
::
VarType
::
Type
dtype
)
const
;
ir
::
Node
*
out_var_node
)
const
;
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
;
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
浏览文件 @
2537ac51
...
...
@@ -22,66 +22,39 @@ namespace details {
ScaleLossGradOpHandle
::
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
platform
::
Place
place
,
platform
::
DeviceContext
*
dev_ctx
,
proto
::
VarType
::
Type
dtype
)
platform
::
DeviceContext
*
dev_ctx
)
:
OpHandleBase
(
node
),
coeff_
(
static_cast
<
float
>
(
1.0
/
num_dev
)),
scope_
(
scope
),
place_
(
place
),
out_dtype_
(
dtype
)
{
place_
(
place
)
{
this
->
SetDeviceContext
(
place_
,
dev_ctx
);
}
ScaleLossGradOpHandle
::~
ScaleLossGradOpHandle
()
{}
struct
ScaleLossGradFunctor
{
float
coeff_
;
Tensor
*
out_
;
platform
::
Place
place_
;
OpHandleBase
*
op_handle_
;
proto
::
VarType
::
Type
out_dtype_
;
platform
::
DeviceContext
*
ctx_
;
ScaleLossGradFunctor
(
float
coeff
,
Tensor
*
out
,
platform
::
Place
place
,
OpHandleBase
*
op_handle
,
proto
::
VarType
::
Type
dtype
,
platform
::
DeviceContext
*
ctx
)
:
coeff_
(
coeff
),
out_
(
out
),
place_
(
place
),
out_dtype_
(
dtype
),
ctx_
(
ctx
)
{}
template
<
typename
OutT
>
void
apply
()
const
{
auto
*
out_data
=
out_
->
mutable_data
<
OutT
>
(
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
out_data
=
static_cast
<
OutT
>
(
coeff_
);
}
else
{
#ifdef PADDLE_WITH_CUDA
OutT
cast_coeff
=
static_cast
<
OutT
>
(
coeff_
);
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
ctx_
)
->
stream
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
out_data
,
platform
::
CPUPlace
(),
&
cast_coeff
,
SizeOfType
(
out_dtype_
),
stream
);
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
#endif
}
}
};
void
ScaleLossGradOpHandle
::
RunImpl
()
{
// Doesn't wait any event
std
::
string
var_name
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
])
->
name_
;
auto
&
local_scope
=
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
*
tensor
=
local_scope
.
FindVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
();
tensor
->
Resize
(
make_ddim
({
1
}));
float
*
tmp
=
local_scope
.
FindVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
()
->
mutable_data
<
float
>
(
make_ddim
({
1
}),
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
tmp
=
coeff_
;
}
else
{
#ifdef PADDLE_WITH_CUDA
ScaleLossGradFunctor
func
(
coeff_
,
tensor
,
place_
,
this
,
out_dtype_
,
this
->
dev_ctxes_
.
at
(
place_
));
this
->
RunAndRecordEvent
([
&
]
{
framework
::
VisitDataType
(
out_dtype_
,
func
);
});
#else
ScaleLossGradFunctor
func
(
coeff_
,
tensor
,
place_
,
this
,
out_dtype_
,
nullptr
);
framework
::
VisitDataType
(
out_dtype_
,
func
);
this
->
RunAndRecordEvent
([
&
]
{
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
this
->
dev_ctxes_
.
at
(
place_
))
->
stream
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
tmp
,
platform
::
CPUPlace
(),
&
coeff_
,
sizeof
(
float
),
stream
);
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
});
#endif
}
}
std
::
string
ScaleLossGradOpHandle
::
Name
()
const
{
return
"Scale LossGrad"
;
}
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
浏览文件 @
2537ac51
...
...
@@ -26,8 +26,8 @@ namespace details {
struct
ScaleLossGradOpHandle
:
public
OpHandleBase
{
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
platform
::
Place
place
,
platform
::
DeviceContext
*
context
,
p
roto
::
VarType
::
Type
dtype
);
platform
::
Place
place
,
p
latform
::
DeviceContext
*
context
);
~
ScaleLossGradOpHandle
()
final
;
...
...
@@ -40,7 +40,6 @@ struct ScaleLossGradOpHandle : public OpHandleBase {
float
coeff_
;
Scope
*
scope_
;
platform
::
Place
place_
;
proto
::
VarType
::
Type
out_dtype_
;
};
}
// namespace details
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cu
浏览文件 @
2537ac51
...
...
@@ -12,23 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
elementwise_div
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
elementwise_div_grad
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
浏览文件 @
2537ac51
...
...
@@ -12,21 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
elementwise_mul
,
ops
::
ElementwiseMulKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
int64_
t
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
plat
::
float16
>
);
elementwise_mul
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
in
t
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
elementwise_mul_grad
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
int64_t
>
,
ops
::
ElementwiseMulGradKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
paddle/fluid/operators/fill_zeros_like_op.cu.cc
浏览文件 @
2537ac51
...
...
@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/fill_zeros_like_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
...
...
@@ -23,6 +22,4 @@ REGISTER_OP_CUDA_KERNEL(
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
paddle/fluid/operators/metrics/accuracy_op.cu
浏览文件 @
2537ac51
...
...
@@ -16,7 +16,6 @@ limitations under the License. */
#include <thrust/reduce.h>
#include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/gpu_info.h"
namespace
paddle
{
...
...
@@ -95,7 +94,6 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
// FIXME(typhoonzero): types of T is for inference data.
// label data is always int64
REGISTER_OP_CUDA_KERNEL
(
accuracy
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
float
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
double
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
paddle
::
platform
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
accuracy
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
float
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
double
>
);
paddle/fluid/operators/optimizers/momentum_op.cu
浏览文件 @
2537ac51
...
...
@@ -14,11 +14,8 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
momentum
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
);
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
2537ac51
...
...
@@ -237,8 +237,7 @@ class SparseMomentumFunctor<T, UseNesterov> {
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
static_cast
<
T
>
(
0
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
// put memory access in register
const
T
p
=
p_
[
i
];
const
T
lr
=
lr_
[
0
];
...
...
@@ -283,8 +282,7 @@ class SparseMomentumFunctor<T, NoNesterov> {
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
static_cast
<
T
>
(
0
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
// put memory access in register
const
T
p
=
p_
[
i
];
const
T
lr
=
lr_
[
0
];
...
...
paddle/fluid/operators/top_k_op.cu
浏览文件 @
2537ac51
...
...
@@ -16,7 +16,6 @@ limitations under the License. */
#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cuda_device_function.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -151,7 +150,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
if
(
k
<
MaxLength
-
(
*
beam
))
{
topk
[
k
]
=
topk
[
k
+
*
beam
];
}
else
{
topk
[
k
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
topk
[
k
].
set
(
-
INFINITY
,
-
1
);
}
}
if
(
!
(
*
is_empty
))
{
...
...
@@ -161,7 +160,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
}
*
max
=
topk
[
MaxLength
-
1
];
if
((
*
max
).
v
==
-
static_cast
<
T
>
(
1
)
)
*
is_empty
=
true
;
if
((
*
max
).
v
==
-
1
)
*
is_empty
=
true
;
*
beam
=
0
;
}
}
...
...
@@ -182,7 +181,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
if
(
k
<
MaxLength
-
*
beam
)
{
topk
[
k
]
=
topk
[
k
+
*
beam
];
}
else
{
topk
[
k
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
topk
[
k
].
set
(
-
INFINITY
,
-
1
);
}
}
if
(
!
(
*
is_empty
))
{
...
...
@@ -279,7 +278,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
bool
firststep
=
true
;
for
(
int
j
=
0
;
j
<
MaxLength
;
j
++
)
{
topk
[
j
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
topk
[
j
].
set
(
-
INFINITY
,
-
1
);
}
while
(
top_num
)
{
ThreadGetTopK
<
T
,
MaxLength
,
BlockSize
>
(
...
...
@@ -363,7 +362,5 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
top_k
,
paddle
::
operators
::
TopkOpCUDAKernel
<
float
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
double
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
paddle
::
platform
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
top_k
,
paddle
::
operators
::
TopkOpCUDAKernel
<
float
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
double
>
);
paddle/fluid/platform/nccl_helper.h
浏览文件 @
2537ac51
...
...
@@ -23,7 +23,6 @@
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#define NCCL_ID_VARNAME "NCCLID"
...
...
@@ -39,8 +38,6 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
return
ncclInt
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT64
)
{
return
ncclInt64
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
FP16
)
{
return
ncclFloat16
;
}
else
{
PADDLE_THROW
(
"Not supported"
);
}
...
...
python/paddle/fluid/data_feeder.py
浏览文件 @
2537ac51
...
...
@@ -44,8 +44,6 @@ class DataToLoDTensorConverter(object):
self
.
dtype
=
'int64'
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP64
:
self
.
dtype
=
'float64'
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
self
.
dtype
=
'float16'
elif
dtype
==
core
.
VarDesc
.
VarType
.
INT32
:
self
.
dtype
=
'int32'
elif
dtype
==
core
.
VarDesc
.
VarType
.
UINT8
:
...
...
python/paddle/fluid/initializer.py
浏览文件 @
2537ac51
...
...
@@ -18,7 +18,6 @@ from . import framework
import
numpy
as
np
import
contextlib
from
.core
import
VarDesc
from
.
import
unique_name
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'TruncatedNormal'
,
'Xavier'
,
'Bilinear'
,
...
...
@@ -208,39 +207,16 @@ class UniformInitializer(Initializer):
# Initialization Ops should be prepended and not appended
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
'gaussian_random'
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
op
=
block
.
_prepend_op
(
type
=
"uniform_random"
,
outputs
=
{
"Out"
:
out_
var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"shape"
:
var
.
shape
,
"dtype"
:
out_dtype
,
"dtype"
:
int
(
var
.
dtype
)
,
"min"
:
self
.
_low
,
"max"
:
self
.
_high
,
"seed"
:
self
.
_seed
})
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
var
.
op
=
op
return
op
...
...
@@ -285,39 +261,17 @@ class NormalInitializer(Initializer):
# Initialization Ops should be prepended and not appended
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
'gaussian_random'
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
op
=
block
.
_prepend_op
(
type
=
"gaussian_random"
,
outputs
=
{
"Out"
:
out_
var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"shape"
:
var
.
shape
,
"dtype"
:
out_dtype
,
"dtype"
:
int
(
var
.
dtype
)
,
"mean"
:
self
.
_mean
,
"std"
:
self
.
_std_dev
,
"seed"
:
self
.
_seed
,
"use_mkldnn"
:
False
})
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
var
.
op
=
op
return
op
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
2537ac51
...
...
@@ -63,18 +63,14 @@ def noam_decay(d_model, warmup_steps):
Returns:
The decayed learning rate.
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
(
1
)
def
_lr_schedule
(
dtype
):
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
(
1
)
a
=
global_step
**-
0.5
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
a
=
global_step
**-
0.5
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
return
lr_value
return
_lr_schedule
return
lr_value
def
exponential_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
...
@@ -113,19 +109,15 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
sgd_optimizer.minimize(avg_cost)
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
def
_lr_schedule
(
dtype
):
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
return
decayed_lr
return
_lr_schedule
return
decayed_lr
def
natural_exp_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
...
@@ -146,19 +138,15 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Returns:
The decayed learning rate
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
def
_lr_schedule
(
dtype
):
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
return
decayed_lr
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
return
_lr_schedule
return
decayed_lr
def
inverse_time_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
...
@@ -196,20 +184,16 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
staircase=True))
sgd_optimizer.minimize(avg_cost)
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
def
_lr_schedule
(
dtype
):
with
default_main_program
().
_lr_schedule_guard
()
:
global_step
=
_decay_step_counter
(
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
return
decayed_lr
return
_lr_schedule
return
decayed_lr
def
polynomial_decay
(
learning_rate
,
...
...
@@ -240,33 +224,28 @@ def polynomial_decay(learning_rate,
Returns:
Variable: The decayed learning rate
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
def
_lr_schedule
(
dtype
,
decay_steps
=
decay_steps
):
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
if
cycle
:
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
zero_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
0.0
)
one_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
1.0
)
with
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
==
zero_var
):
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
float
(
decay_steps
))
global_step
=
nn
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
if
cycle
:
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
zero_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
with
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
==
zero_var
):
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
nn
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
return
_lr_schedule
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
def
piecewise_decay
(
boundaries
,
values
):
...
...
@@ -294,42 +273,38 @@ def piecewise_decay(boundaries, values):
"""
def
_lr_schedule
(
dtype
):
with
default_main_program
().
_lr_schedule_guard
():
if
len
(
values
)
-
len
(
boundaries
)
!=
1
:
raise
ValueError
(
"len(values) - len(boundaries) should be 1"
)
global_step
=
_decay_step_counter
()
lr
=
tensor
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate"
)
with
control_flow
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]),
force_cpu
=
True
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
with
default_main_program
().
_lr_schedule_guard
():
if
len
(
values
)
-
len
(
boundaries
)
!=
1
:
raise
ValueError
(
"len(values) - len(boundaries) should be 1"
)
global_step
=
_decay_step_counter
()
lr
=
tensor
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate"
)
with
control_flow
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
tensor
.
assign
(
last_value_var
,
lr
)
return
lr
value
=
float
(
boundaries
[
i
]),
force_cpu
=
True
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
tensor
.
assign
(
last_value_var
,
lr
)
return
_lr_schedule
return
lr
def
append_LARS
(
params_grads
,
learning_rate
,
weight_decay
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
2537ac51
...
...
@@ -2798,10 +2798,6 @@ def batch_norm(input,
helper
=
LayerHelper
(
'batch_norm'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
# use fp32 for bn parameter
if
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
dtype
=
core
.
VarDesc
.
VarType
.
FP32
input_shape
=
input
.
shape
if
data_layout
==
'NCHW'
:
channel_num
=
input_shape
[
1
]
...
...
@@ -2836,7 +2832,7 @@ def batch_norm(input,
trainable
=
False
,
do_model_average
=
do_model_average_for_mean_and_var
),
shape
=
param_shape
,
dtype
=
dtype
)
dtype
=
input
.
dtype
)
mean
.
stop_gradient
=
True
variance
=
helper
.
create_parameter
(
...
...
@@ -2846,7 +2842,7 @@ def batch_norm(input,
trainable
=
False
,
do_model_average
=
do_model_average_for_mean_and_var
),
shape
=
param_shape
,
dtype
=
dtype
)
dtype
=
input
.
dtype
)
variance
.
stop_gradient
=
True
# create output
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
2537ac51
...
...
@@ -50,21 +50,17 @@ class Optimizer(object):
def
__init__
(
self
,
learning_rate
,
regularization
=
None
,
name
=
None
):
if
not
isinstance
(
learning_rate
,
float
)
and
\
not
isinstance
(
learning_rate
,
framework
.
Variable
)
and
\
not
callable
(
learning_rate
):
raise
TypeError
(
"learning rate should be float or Variable or callable(dtype)"
)
not
isinstance
(
learning_rate
,
framework
.
Variable
):
raise
TypeError
(
"learning rate should be float or Variable"
)
self
.
_name
=
name
self
.
regularization
=
regularization
self
.
_learning_rate
=
learning_rate
# the learning rate type should be inferenced from loss
self
.
_dtype
=
None
# each program should have a independent learning rate
# program -> Variable(learning_rate) or:
# program -> callable(return learning_rate Variable)
# program -> Variable(learning_rate)
self
.
_learning_rate_map
=
dict
()
if
isinstance
(
self
.
_learning_rate
,
framework
.
Variable
)
or
\
callable
(
self
.
_learning_rate
):
if
isinstance
(
self
.
_learning_rate
,
framework
.
Variable
):
self
.
_learning_rate_map
[
framework
.
default_main_program
(
)]
=
self
.
_learning_rate
# Dictionary of accumulators. Some optimizer subclasses need to
...
...
@@ -79,11 +75,6 @@ class Optimizer(object):
if
isinstance
(
lr
,
framework
.
Variable
):
return
elif
callable
(
lr
):
dtype
=
'float32'
if
self
.
_dtype
is
None
else
self
.
_dtype
self
.
_learning_rate_map
[
framework
.
default_main_program
()]
=
lr
(
dtype
)
return
else
:
if
not
isinstance
(
self
.
_learning_rate
,
float
):
raise
TypeError
(
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
2537ac51
...
...
@@ -368,8 +368,6 @@ class OpTest(unittest.TestCase):
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
return
[
place
]
else
:
return
[]
else
:
return
[]
places
=
[
fluid
.
CPUPlace
()]
...
...
python/paddle/fluid/tests/unittests/test_accuracy_op.py
浏览文件 @
2537ac51
...
...
@@ -22,10 +22,8 @@ from op_test import OpTest
class
TestAccuracyOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"accuracy"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
n
=
8192
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
self
.
dtype
)
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
"float32"
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
...
...
@@ -36,25 +34,14 @@ class TestAccuracyOp(OpTest):
num_correct
+=
1
break
self
.
outputs
=
{
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
self
.
dtype
),
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
"float32"
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
class
TestAccuracyOpFp16
(
TestAccuracyOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-3
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
浏览文件 @
2537ac51
...
...
@@ -21,16 +21,14 @@ from op_test import OpTest
class
ElementwiseDivOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"elementwise_div"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
""" Warning
CPU gradient check error!
'X': np.random.random((32,84)).astype("float32"),
'Y': np.random.random((32,84)).astype("float32")
"""
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
),
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float32"
),
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
divide
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
...
...
@@ -48,9 +46,6 @@ class ElementwiseDivOp(OpTest):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
(
'Y'
))
def
init_dtype
(
self
):
pass
class
TestElementwiseDivOp_scalar
(
ElementwiseDivOp
):
def
setUp
(
self
):
...
...
@@ -131,21 +126,5 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
}
class
TestElementwiseDivOpFp16
(
ElementwiseDivOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1
)
def
test_check_grad_ingore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
1
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
1
,
no_grad_set
=
set
(
'Y'
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
浏览文件 @
2537ac51
...
...
@@ -135,10 +135,5 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
}
class
TestElementwiseMulOpFp16
(
ElementwiseMulOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
浏览文件 @
2537ac51
...
...
@@ -22,22 +22,12 @@ from op_test import OpTest
class
TestFillZerosLikeOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"fill_zeros_like"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
self
.
dtype
)}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
"float32"
)}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
class
TestFillZerosLikeOpFp16
(
TestFillZerosLikeOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
浏览文件 @
2537ac51
...
...
@@ -97,7 +97,7 @@ class TestLearningRateDecay(unittest.TestCase):
startup_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
(
"float32"
)
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
...
...
python/paddle/fluid/tests/unittests/test_momentum_op.py
浏览文件 @
2537ac51
...
...
@@ -24,13 +24,11 @@ from op_test import OpTest
class
TestMomentumOp1
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"momentum"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
self
.
dtype
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
self
.
dtype
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
self
.
dtype
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
self
.
dtype
)
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
"float32"
)
mu
=
0.0001
use_nesterov
=
False
...
...
@@ -52,21 +50,10 @@ class TestMomentumOp1(OpTest):
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
class
TestMomentumOpFp16
(
TestMomentumOp1
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-3
)
class
TestMomentumOp2
(
OpTest
):
'''Test Momentum with default values for attributes
'''
...
...
python/paddle/fluid/tests/unittests/test_top_k_op.py
浏览文件 @
2537ac51
...
...
@@ -23,11 +23,8 @@ class TestTopkOp(OpTest):
def
setUp
(
self
):
self
.
set_args
()
self
.
op_type
=
"top_k"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
k
=
self
.
top_k
input
=
np
.
random
.
random
((
self
.
row
,
k
)).
astype
(
self
.
dtype
)
input
=
np
.
random
.
random
((
self
.
row
,
k
)).
astype
(
"float32"
)
output
=
np
.
ndarray
((
self
.
row
,
k
))
indices
=
np
.
ndarray
((
self
.
row
,
k
)).
astype
(
"int64"
)
...
...
@@ -41,9 +38,6 @@ class TestTopkOp(OpTest):
self
.
outputs
=
{
'Out'
:
output
,
'Indices'
:
indices
}
def
init_dtype
(
self
):
pass
def
set_args
(
self
):
self
.
row
=
32
self
.
top_k
=
1
...
...
@@ -52,11 +46,6 @@ class TestTopkOp(OpTest):
self
.
check_output
()
class
TestTopkOpFp16
(
TestTopkOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
class
TestTopkOp3d
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"top_k"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录