Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
da87f7a6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
da87f7a6
编写于
12月 20, 2018
作者:
T
typhoonzero
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "[Feature] Fp16 training for resnet50 (#14850)"
This reverts commit
3d750f9c
.
上级
3babc801
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
141 addition
and
356 deletion
+141
-356
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+3
-5
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+1
-2
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
+17
-44
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
+2
-3
paddle/fluid/operators/elementwise/elementwise_div_op.cu
paddle/fluid/operators/elementwise/elementwise_div_op.cu
+0
-5
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
+10
-12
paddle/fluid/operators/fill_zeros_like_op.cu.cc
paddle/fluid/operators/fill_zeros_like_op.cu.cc
+0
-3
paddle/fluid/operators/metrics/accuracy_op.cu
paddle/fluid/operators/metrics/accuracy_op.cu
+3
-5
paddle/fluid/operators/optimizers/momentum_op.cu
paddle/fluid/operators/optimizers/momentum_op.cu
+1
-4
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+2
-4
paddle/fluid/operators/top_k_op.cu
paddle/fluid/operators/top_k_op.cu
+6
-9
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+0
-3
python/paddle/fluid/data_feeder.py
python/paddle/fluid/data_feeder.py
+0
-2
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+4
-50
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+75
-100
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+2
-6
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+4
-13
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+0
-2
python/paddle/fluid/tests/unittests/test_accuracy_op.py
python/paddle/fluid/tests/unittests/test_accuracy_op.py
+2
-15
python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_div_op.py
+2
-23
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
...n/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
+0
-5
python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
...n/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
+1
-11
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
...dle/fluid/tests/unittests/test_learning_rate_scheduler.py
+1
-1
python/paddle/fluid/tests/unittests/test_momentum_op.py
python/paddle/fluid/tests/unittests/test_momentum_op.py
+4
-17
python/paddle/fluid/tests/unittests/test_top_k_op.py
python/paddle/fluid/tests/unittests/test_top_k_op.py
+1
-12
未找到文件。
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
da87f7a6
...
@@ -355,9 +355,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
...
@@ -355,9 +355,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
// TODO(paddle-dev): Why is there no input for this op_handle?
// TODO(paddle-dev): Why is there no input for this op_handle?
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
auto
out_dtype
=
all_vars_
.
at
(
loss_grad_name
)
->
GetDataType
();
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
]);
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
],
out_dtype
);
}
}
// This assumes the backward generating code will ensure IsScaleLossOp
// This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss.
// is true only for the op that scale the final scalar loss.
...
@@ -660,13 +658,13 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(
...
@@ -660,13 +658,13 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
,
proto
::
VarType
::
Type
dtype
)
const
{
ir
::
Node
*
out_var_node
)
const
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
// Insert ScaleCost OpHandle
// Insert ScaleCost OpHandle
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
result
->
CreateEmptyNode
(
"scale_loss_grad"
,
ir
::
Node
::
Type
::
kOperation
),
result
->
CreateEmptyNode
(
"scale_loss_grad"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
dev_ctx
,
dtype
);
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
dev_ctx
);
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
op_handle
);
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
op_handle
);
// FIXME: Currently ScaleLossGradOp only use device_count as scale
// FIXME: Currently ScaleLossGradOp only use device_count as scale
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
da87f7a6
...
@@ -68,8 +68,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -68,8 +68,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
,
ir
::
Node
*
out_var_node
)
const
;
proto
::
VarType
::
Type
dtype
)
const
;
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
;
int
dst_dev_id
)
const
;
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
浏览文件 @
da87f7a6
...
@@ -22,66 +22,39 @@ namespace details {
...
@@ -22,66 +22,39 @@ namespace details {
ScaleLossGradOpHandle
::
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
ScaleLossGradOpHandle
::
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
Scope
*
scope
,
platform
::
Place
place
,
platform
::
Place
place
,
platform
::
DeviceContext
*
dev_ctx
,
platform
::
DeviceContext
*
dev_ctx
)
proto
::
VarType
::
Type
dtype
)
:
OpHandleBase
(
node
),
:
OpHandleBase
(
node
),
coeff_
(
static_cast
<
float
>
(
1.0
/
num_dev
)),
coeff_
(
static_cast
<
float
>
(
1.0
/
num_dev
)),
scope_
(
scope
),
scope_
(
scope
),
place_
(
place
),
place_
(
place
)
{
out_dtype_
(
dtype
)
{
this
->
SetDeviceContext
(
place_
,
dev_ctx
);
this
->
SetDeviceContext
(
place_
,
dev_ctx
);
}
}
ScaleLossGradOpHandle
::~
ScaleLossGradOpHandle
()
{}
ScaleLossGradOpHandle
::~
ScaleLossGradOpHandle
()
{}
struct
ScaleLossGradFunctor
{
float
coeff_
;
Tensor
*
out_
;
platform
::
Place
place_
;
OpHandleBase
*
op_handle_
;
proto
::
VarType
::
Type
out_dtype_
;
platform
::
DeviceContext
*
ctx_
;
ScaleLossGradFunctor
(
float
coeff
,
Tensor
*
out
,
platform
::
Place
place
,
OpHandleBase
*
op_handle
,
proto
::
VarType
::
Type
dtype
,
platform
::
DeviceContext
*
ctx
)
:
coeff_
(
coeff
),
out_
(
out
),
place_
(
place
),
out_dtype_
(
dtype
),
ctx_
(
ctx
)
{}
template
<
typename
OutT
>
void
apply
()
const
{
auto
*
out_data
=
out_
->
mutable_data
<
OutT
>
(
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
out_data
=
static_cast
<
OutT
>
(
coeff_
);
}
else
{
#ifdef PADDLE_WITH_CUDA
OutT
cast_coeff
=
static_cast
<
OutT
>
(
coeff_
);
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
ctx_
)
->
stream
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
out_data
,
platform
::
CPUPlace
(),
&
cast_coeff
,
SizeOfType
(
out_dtype_
),
stream
);
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
#endif
}
}
};
void
ScaleLossGradOpHandle
::
RunImpl
()
{
void
ScaleLossGradOpHandle
::
RunImpl
()
{
// Doesn't wait any event
// Doesn't wait any event
std
::
string
var_name
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
])
->
name_
;
std
::
string
var_name
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
])
->
name_
;
auto
&
local_scope
=
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
local_scope
=
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
*
tensor
=
local_scope
.
FindVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
();
float
*
tmp
=
local_scope
.
FindVar
(
var_name
)
tensor
->
Resize
(
make_ddim
({
1
}));
->
GetMutable
<
LoDTensor
>
()
->
mutable_data
<
float
>
(
make_ddim
({
1
}),
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
tmp
=
coeff_
;
}
else
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
ScaleLossGradFunctor
func
(
coeff_
,
tensor
,
place_
,
this
,
out_dtype_
,
this
->
RunAndRecordEvent
([
&
]
{
this
->
dev_ctxes_
.
at
(
place_
));
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
this
->
RunAndRecordEvent
([
&
]
{
framework
::
VisitDataType
(
out_dtype_
,
func
);
});
this
->
dev_ctxes_
.
at
(
place_
))
#else
->
stream
();
ScaleLossGradFunctor
func
(
coeff_
,
tensor
,
place_
,
this
,
out_dtype_
,
nullptr
);
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
tmp
,
framework
::
VisitDataType
(
out_dtype_
,
func
);
platform
::
CPUPlace
(),
&
coeff_
,
sizeof
(
float
),
stream
);
VLOG
(
10
)
<<
place_
<<
"RUN Scale loss grad op"
;
});
#endif
#endif
}
}
}
std
::
string
ScaleLossGradOpHandle
::
Name
()
const
{
return
"Scale LossGrad"
;
}
std
::
string
ScaleLossGradOpHandle
::
Name
()
const
{
return
"Scale LossGrad"
;
}
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.h
浏览文件 @
da87f7a6
...
@@ -26,8 +26,8 @@ namespace details {
...
@@ -26,8 +26,8 @@ namespace details {
struct
ScaleLossGradOpHandle
:
public
OpHandleBase
{
struct
ScaleLossGradOpHandle
:
public
OpHandleBase
{
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
ScaleLossGradOpHandle
(
ir
::
Node
*
node
,
size_t
num_dev
,
Scope
*
scope
,
platform
::
Place
place
,
platform
::
DeviceContext
*
context
,
platform
::
Place
place
,
p
roto
::
VarType
::
Type
dtype
);
p
latform
::
DeviceContext
*
context
);
~
ScaleLossGradOpHandle
()
final
;
~
ScaleLossGradOpHandle
()
final
;
...
@@ -40,7 +40,6 @@ struct ScaleLossGradOpHandle : public OpHandleBase {
...
@@ -40,7 +40,6 @@ struct ScaleLossGradOpHandle : public OpHandleBase {
float
coeff_
;
float
coeff_
;
Scope
*
scope_
;
Scope
*
scope_
;
platform
::
Place
place_
;
platform
::
Place
place_
;
proto
::
VarType
::
Type
out_dtype_
;
};
};
}
// namespace details
}
// namespace details
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cu
浏览文件 @
da87f7a6
...
@@ -12,23 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,23 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_div
,
elementwise_div
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_div_grad
,
elementwise_div_grad
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
ops
::
ElementwiseDivGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.cu
浏览文件 @
da87f7a6
...
@@ -12,21 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,21 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_mul
,
ops
::
ElementwiseMulKernel
<
plat
::
CUDADeviceContext
,
float
>
,
elementwise_mul
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
int64_
t
>
,
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
in
t
>
,
ops
::
ElementwiseMulKernel
<
p
lat
::
CUDADeviceContext
,
plat
::
float16
>
);
ops
::
ElementwiseMulKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
elementwise_mul_grad
,
elementwise_mul_grad
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ElementwiseMulGradKernel
<
p
lat
::
CUDADeviceContext
,
int64_t
>
,
ops
::
ElementwiseMulGradKernel
<
p
addle
::
platform
::
CUDADeviceContext
,
ops
::
ElementwiseMulGradKernel
<
plat
::
CUDADeviceContext
,
plat
::
float16
>
);
int64_t
>
);
paddle/fluid/operators/fill_zeros_like_op.cu.cc
浏览文件 @
da87f7a6
...
@@ -14,7 +14,6 @@ limitations under the License. */
...
@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/fill_zeros_like_op.h"
#include "paddle/fluid/operators/fill_zeros_like_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
...
@@ -23,6 +22,4 @@ REGISTER_OP_CUDA_KERNEL(
...
@@ -23,6 +22,4 @@ REGISTER_OP_CUDA_KERNEL(
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
paddle/fluid/operators/metrics/accuracy_op.cu
浏览文件 @
da87f7a6
...
@@ -16,7 +16,6 @@ limitations under the License. */
...
@@ -16,7 +16,6 @@ limitations under the License. */
#include <thrust/reduce.h>
#include <thrust/reduce.h>
#include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -95,7 +94,6 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -95,7 +94,6 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
// FIXME(typhoonzero): types of T is for inference data.
// FIXME(typhoonzero): types of T is for inference data.
// label data is always int64
// label data is always int64
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
accuracy
,
accuracy
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
float
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
float
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
double
>
,
paddle
::
operators
::
AccuracyOpCUDAKernel
<
double
>
);
paddle
::
operators
::
AccuracyOpCUDAKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/optimizers/momentum_op.cu
浏览文件 @
da87f7a6
...
@@ -14,11 +14,8 @@ limitations under the License. */
...
@@ -14,11 +14,8 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/operators/optimizers/momentum_op.h"
#include "paddle/fluid/platform/float16.h"
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
momentum
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
momentum
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
ops
::
MomentumOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
da87f7a6
...
@@ -237,8 +237,7 @@ class SparseMomentumFunctor<T, UseNesterov> {
...
@@ -237,8 +237,7 @@ class SparseMomentumFunctor<T, UseNesterov> {
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_idx
=
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
:
static_cast
<
T
>
(
0
);
// put memory access in register
// put memory access in register
const
T
p
=
p_
[
i
];
const
T
p
=
p_
[
i
];
const
T
lr
=
lr_
[
0
];
const
T
lr
=
lr_
[
0
];
...
@@ -283,8 +282,7 @@ class SparseMomentumFunctor<T, NoNesterov> {
...
@@ -283,8 +282,7 @@ class SparseMomentumFunctor<T, NoNesterov> {
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
{
auto
row_idx
=
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_height_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
T
g
=
row_idx
>=
0
?
g_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
:
static_cast
<
T
>
(
0
);
// put memory access in register
// put memory access in register
const
T
p
=
p_
[
i
];
const
T
p
=
p_
[
i
];
const
T
lr
=
lr_
[
0
];
const
T
lr
=
lr_
[
0
];
...
...
paddle/fluid/operators/top_k_op.cu
浏览文件 @
da87f7a6
...
@@ -16,7 +16,6 @@ limitations under the License. */
...
@@ -16,7 +16,6 @@ limitations under the License. */
#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cuda_device_function.h"
#include "paddle/fluid/platform/cuda_device_function.h"
#include "paddle/fluid/platform/float16.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -151,7 +150,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -151,7 +150,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
if
(
k
<
MaxLength
-
(
*
beam
))
{
if
(
k
<
MaxLength
-
(
*
beam
))
{
topk
[
k
]
=
topk
[
k
+
*
beam
];
topk
[
k
]
=
topk
[
k
+
*
beam
];
}
else
{
}
else
{
topk
[
k
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
topk
[
k
].
set
(
-
INFINITY
,
-
1
);
}
}
}
}
if
(
!
(
*
is_empty
))
{
if
(
!
(
*
is_empty
))
{
...
@@ -161,7 +160,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -161,7 +160,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
}
}
*
max
=
topk
[
MaxLength
-
1
];
*
max
=
topk
[
MaxLength
-
1
];
if
((
*
max
).
v
==
-
static_cast
<
T
>
(
1
)
)
*
is_empty
=
true
;
if
((
*
max
).
v
==
-
1
)
*
is_empty
=
true
;
*
beam
=
0
;
*
beam
=
0
;
}
}
}
}
...
@@ -182,7 +181,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
...
@@ -182,7 +181,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair<T> topk[], int* beam,
if
(
k
<
MaxLength
-
*
beam
)
{
if
(
k
<
MaxLength
-
*
beam
)
{
topk
[
k
]
=
topk
[
k
+
*
beam
];
topk
[
k
]
=
topk
[
k
+
*
beam
];
}
else
{
}
else
{
topk
[
k
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
topk
[
k
].
set
(
-
INFINITY
,
-
1
);
}
}
}
}
if
(
!
(
*
is_empty
))
{
if
(
!
(
*
is_empty
))
{
...
@@ -279,7 +278,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
...
@@ -279,7 +278,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices,
bool
firststep
=
true
;
bool
firststep
=
true
;
for
(
int
j
=
0
;
j
<
MaxLength
;
j
++
)
{
for
(
int
j
=
0
;
j
<
MaxLength
;
j
++
)
{
topk
[
j
].
set
(
-
static_cast
<
T
>
(
INFINITY
)
,
-
1
);
topk
[
j
].
set
(
-
INFINITY
,
-
1
);
}
}
while
(
top_num
)
{
while
(
top_num
)
{
ThreadGetTopK
<
T
,
MaxLength
,
BlockSize
>
(
ThreadGetTopK
<
T
,
MaxLength
,
BlockSize
>
(
...
@@ -363,7 +362,5 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -363,7 +362,5 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
top_k
,
paddle
::
operators
::
TopkOpCUDAKernel
<
float
>
,
top_k
,
paddle
::
operators
::
TopkOpCUDAKernel
<
float
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
double
>
);
paddle
::
operators
::
TopkOpCUDAKernel
<
double
>
,
paddle
::
operators
::
TopkOpCUDAKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/platform/nccl_helper.h
浏览文件 @
da87f7a6
...
@@ -23,7 +23,6 @@
...
@@ -23,7 +23,6 @@
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#define NCCL_ID_VARNAME "NCCLID"
#define NCCL_ID_VARNAME "NCCLID"
...
@@ -39,8 +38,6 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
...
@@ -39,8 +38,6 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) {
return
ncclInt
;
return
ncclInt
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT64
)
{
}
else
if
(
type
==
framework
::
proto
::
VarType
::
INT64
)
{
return
ncclInt64
;
return
ncclInt64
;
}
else
if
(
type
==
framework
::
proto
::
VarType
::
FP16
)
{
return
ncclFloat16
;
}
else
{
}
else
{
PADDLE_THROW
(
"Not supported"
);
PADDLE_THROW
(
"Not supported"
);
}
}
...
...
python/paddle/fluid/data_feeder.py
浏览文件 @
da87f7a6
...
@@ -44,8 +44,6 @@ class DataToLoDTensorConverter(object):
...
@@ -44,8 +44,6 @@ class DataToLoDTensorConverter(object):
self
.
dtype
=
'int64'
self
.
dtype
=
'int64'
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP64
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP64
:
self
.
dtype
=
'float64'
self
.
dtype
=
'float64'
elif
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
self
.
dtype
=
'float16'
elif
dtype
==
core
.
VarDesc
.
VarType
.
INT32
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
INT32
:
self
.
dtype
=
'int32'
self
.
dtype
=
'int32'
elif
dtype
==
core
.
VarDesc
.
VarType
.
UINT8
:
elif
dtype
==
core
.
VarDesc
.
VarType
.
UINT8
:
...
...
python/paddle/fluid/initializer.py
浏览文件 @
da87f7a6
...
@@ -18,7 +18,6 @@ from . import framework
...
@@ -18,7 +18,6 @@ from . import framework
import
numpy
as
np
import
numpy
as
np
import
contextlib
import
contextlib
from
.core
import
VarDesc
from
.core
import
VarDesc
from
.
import
unique_name
__all__
=
[
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'TruncatedNormal'
,
'Xavier'
,
'Bilinear'
,
'Constant'
,
'Uniform'
,
'Normal'
,
'TruncatedNormal'
,
'Xavier'
,
'Bilinear'
,
...
@@ -208,39 +207,16 @@ class UniformInitializer(Initializer):
...
@@ -208,39 +207,16 @@ class UniformInitializer(Initializer):
# Initialization Ops should be prepended and not appended
# Initialization Ops should be prepended and not appended
if
self
.
_seed
==
0
:
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
'gaussian_random'
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
op
=
block
.
_prepend_op
(
op
=
block
.
_prepend_op
(
type
=
"uniform_random"
,
type
=
"uniform_random"
,
outputs
=
{
"Out"
:
out_
var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
attrs
=
{
"shape"
:
var
.
shape
,
"shape"
:
var
.
shape
,
"dtype"
:
out_dtype
,
"dtype"
:
int
(
var
.
dtype
)
,
"min"
:
self
.
_low
,
"min"
:
self
.
_low
,
"max"
:
self
.
_high
,
"max"
:
self
.
_high
,
"seed"
:
self
.
_seed
"seed"
:
self
.
_seed
})
})
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
var
.
op
=
op
var
.
op
=
op
return
op
return
op
...
@@ -285,39 +261,17 @@ class NormalInitializer(Initializer):
...
@@ -285,39 +261,17 @@ class NormalInitializer(Initializer):
# Initialization Ops should be prepended and not appended
# Initialization Ops should be prepended and not appended
if
self
.
_seed
==
0
:
if
self
.
_seed
==
0
:
self
.
_seed
=
block
.
program
.
random_seed
self
.
_seed
=
block
.
program
.
random_seed
# to be compatible of fp16 initalizers
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
out_dtype
=
VarDesc
.
VarType
.
FP32
out_var
=
block
.
create_var
(
name
=
unique_name
.
generate
(
"."
.
join
([
'gaussian_random'
,
'tmp'
])),
shape
=
var
.
shape
,
dtype
=
out_dtype
,
type
=
VarDesc
.
VarType
.
LOD_TENSOR
,
persistable
=
False
)
else
:
out_dtype
=
var
.
dtype
out_var
=
var
op
=
block
.
_prepend_op
(
op
=
block
.
_prepend_op
(
type
=
"gaussian_random"
,
type
=
"gaussian_random"
,
outputs
=
{
"Out"
:
out_
var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
attrs
=
{
"shape"
:
var
.
shape
,
"shape"
:
var
.
shape
,
"dtype"
:
out_dtype
,
"dtype"
:
int
(
var
.
dtype
)
,
"mean"
:
self
.
_mean
,
"mean"
:
self
.
_mean
,
"std"
:
self
.
_std_dev
,
"std"
:
self
.
_std_dev
,
"seed"
:
self
.
_seed
,
"seed"
:
self
.
_seed
,
"use_mkldnn"
:
False
"use_mkldnn"
:
False
})
})
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
:
block
.
append_op
(
type
=
"cast"
,
inputs
=
{
"X"
:
out_var
},
outputs
=
{
"Out"
:
var
},
attrs
=
{
"in_dtype"
:
out_var
.
dtype
,
"out_dtype"
:
var
.
dtype
})
var
.
op
=
op
var
.
op
=
op
return
op
return
op
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
da87f7a6
...
@@ -63,18 +63,14 @@ def noam_decay(d_model, warmup_steps):
...
@@ -63,18 +63,14 @@ def noam_decay(d_model, warmup_steps):
Returns:
Returns:
The decayed learning rate.
The decayed learning rate.
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
(
1
)
def
_lr_schedule
(
dtype
):
a
=
global_step
**-
0.5
with
default_main_program
().
_lr_schedule_guard
():
b
=
(
warmup_steps
**-
1.5
)
*
global_step
global_step
=
_decay_step_counter
(
1
)
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
a
=
global_step
**-
0.5
return
lr_value
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
nn
.
elementwise_min
(
a
,
b
)
return
lr_value
return
_lr_schedule
def
exponential_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
exponential_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
@@ -113,19 +109,15 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -113,19 +109,15 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
sgd_optimizer.minimize(avg_cost)
sgd_optimizer.minimize(avg_cost)
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
def
_lr_schedule
(
dtype
):
div_res
=
global_step
/
decay_steps
with
default_main_program
().
_lr_schedule_guard
():
if
staircase
:
global_step
=
_decay_step_counter
()
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
return
decayed_lr
return
decayed_lr
return
_lr_schedule
def
natural_exp_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
natural_exp_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
@@ -146,19 +138,15 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -146,19 +138,15 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
Returns:
Returns:
The decayed learning rate
The decayed learning rate
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
def
_lr_schedule
(
dtype
):
div_res
=
global_step
/
decay_steps
with
default_main_program
().
_lr_schedule_guard
():
if
staircase
:
global_step
=
_decay_step_counter
()
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
div_res
=
global_step
/
decay_steps
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
return
decayed_lr
return
_lr_schedule
return
decayed_lr
def
inverse_time_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
def
inverse_time_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
...
@@ -196,20 +184,16 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -196,20 +184,16 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
staircase=True))
staircase=True))
sgd_optimizer.minimize(avg_cost)
sgd_optimizer.minimize(avg_cost)
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
def
_lr_schedule
(
dtype
):
div_res
=
global_step
/
decay_steps
with
default_main_program
().
_lr_schedule_guard
()
:
if
staircase
:
global_step
=
_decay_step_counter
(
)
div_res
=
ops
.
floor
(
div_res
)
div_res
=
global_step
/
decay_steps
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
return
decayed_lr
return
decayed_lr
return
_lr_schedule
def
polynomial_decay
(
learning_rate
,
def
polynomial_decay
(
learning_rate
,
...
@@ -240,33 +224,28 @@ def polynomial_decay(learning_rate,
...
@@ -240,33 +224,28 @@ def polynomial_decay(learning_rate,
Returns:
Returns:
Variable: The decayed learning rate
Variable: The decayed learning rate
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
def
_lr_schedule
(
dtype
,
decay_steps
=
decay_steps
):
if
cycle
:
with
default_main_program
().
_lr_schedule_guard
():
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
global_step
=
_decay_step_counter
()
zero_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
if
cycle
:
one_var
=
tensor
.
fill_constant
(
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
zero_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
0.0
)
one_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
1.0
)
with
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
==
zero_var
):
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
float
(
decay_steps
))
global_step
=
nn
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
with
control_flow
.
Switch
()
as
switch
:
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
with
switch
.
case
(
global_step
==
zero_var
):
return
decayed_lr
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
nn
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
return
_lr_schedule
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
def
piecewise_decay
(
boundaries
,
values
):
def
piecewise_decay
(
boundaries
,
values
):
...
@@ -294,42 +273,38 @@ def piecewise_decay(boundaries, values):
...
@@ -294,42 +273,38 @@ def piecewise_decay(boundaries, values):
"""
"""
with
default_main_program
().
_lr_schedule_guard
():
def
_lr_schedule
(
dtype
):
if
len
(
values
)
-
len
(
boundaries
)
!=
1
:
with
default_main_program
().
_lr_schedule_guard
():
raise
ValueError
(
"len(values) - len(boundaries) should be 1"
)
if
len
(
values
)
-
len
(
boundaries
)
!=
1
:
raise
ValueError
(
"len(values) - len(boundaries) should be 1"
)
global_step
=
_decay_step_counter
()
global_step
=
_decay_step_counter
()
lr
=
tensor
.
create_global_var
(
shape
=
[
1
],
lr
=
tensor
.
create_global_var
(
value
=
0.0
,
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
,
persistable
=
True
,
dtype
=
'float32'
,
name
=
"learning_rate"
)
persistable
=
True
,
name
=
"learning_rate"
)
with
control_flow
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
with
control_flow
.
Switch
()
as
switch
:
boundary_val
=
tensor
.
fill_constant
(
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]),
force_cpu
=
True
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
shape
=
[
1
],
dtype
=
'float32'
,
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
value
=
float
(
boundaries
[
i
]),
with
switch
.
default
():
force_cpu
=
True
)
tensor
.
assign
(
last_value_var
,
lr
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
return
lr
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
tensor
.
assign
(
last_value_var
,
lr
)
return
_lr_schedule
return
lr
def
append_LARS
(
params_grads
,
learning_rate
,
weight_decay
):
def
append_LARS
(
params_grads
,
learning_rate
,
weight_decay
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
da87f7a6
...
@@ -2798,10 +2798,6 @@ def batch_norm(input,
...
@@ -2798,10 +2798,6 @@ def batch_norm(input,
helper
=
LayerHelper
(
'batch_norm'
,
**
locals
())
helper
=
LayerHelper
(
'batch_norm'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
dtype
=
helper
.
input_dtype
()
# use fp32 for bn parameter
if
dtype
==
core
.
VarDesc
.
VarType
.
FP16
:
dtype
=
core
.
VarDesc
.
VarType
.
FP32
input_shape
=
input
.
shape
input_shape
=
input
.
shape
if
data_layout
==
'NCHW'
:
if
data_layout
==
'NCHW'
:
channel_num
=
input_shape
[
1
]
channel_num
=
input_shape
[
1
]
...
@@ -2836,7 +2832,7 @@ def batch_norm(input,
...
@@ -2836,7 +2832,7 @@ def batch_norm(input,
trainable
=
False
,
trainable
=
False
,
do_model_average
=
do_model_average_for_mean_and_var
),
do_model_average
=
do_model_average_for_mean_and_var
),
shape
=
param_shape
,
shape
=
param_shape
,
dtype
=
dtype
)
dtype
=
input
.
dtype
)
mean
.
stop_gradient
=
True
mean
.
stop_gradient
=
True
variance
=
helper
.
create_parameter
(
variance
=
helper
.
create_parameter
(
...
@@ -2846,7 +2842,7 @@ def batch_norm(input,
...
@@ -2846,7 +2842,7 @@ def batch_norm(input,
trainable
=
False
,
trainable
=
False
,
do_model_average
=
do_model_average_for_mean_and_var
),
do_model_average
=
do_model_average_for_mean_and_var
),
shape
=
param_shape
,
shape
=
param_shape
,
dtype
=
dtype
)
dtype
=
input
.
dtype
)
variance
.
stop_gradient
=
True
variance
.
stop_gradient
=
True
# create output
# create output
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
da87f7a6
...
@@ -50,21 +50,17 @@ class Optimizer(object):
...
@@ -50,21 +50,17 @@ class Optimizer(object):
def
__init__
(
self
,
learning_rate
,
regularization
=
None
,
name
=
None
):
def
__init__
(
self
,
learning_rate
,
regularization
=
None
,
name
=
None
):
if
not
isinstance
(
learning_rate
,
float
)
and
\
if
not
isinstance
(
learning_rate
,
float
)
and
\
not
isinstance
(
learning_rate
,
framework
.
Variable
)
and
\
not
isinstance
(
learning_rate
,
framework
.
Variable
):
not
callable
(
learning_rate
):
raise
TypeError
(
"learning rate should be float or Variable"
)
raise
TypeError
(
"learning rate should be float or Variable or callable(dtype)"
)
self
.
_name
=
name
self
.
_name
=
name
self
.
regularization
=
regularization
self
.
regularization
=
regularization
self
.
_learning_rate
=
learning_rate
self
.
_learning_rate
=
learning_rate
# the learning rate type should be inferenced from loss
# the learning rate type should be inferenced from loss
self
.
_dtype
=
None
self
.
_dtype
=
None
# each program should have a independent learning rate
# each program should have a independent learning rate
# program -> Variable(learning_rate) or:
# program -> Variable(learning_rate)
# program -> callable(return learning_rate Variable)
self
.
_learning_rate_map
=
dict
()
self
.
_learning_rate_map
=
dict
()
if
isinstance
(
self
.
_learning_rate
,
framework
.
Variable
)
or
\
if
isinstance
(
self
.
_learning_rate
,
framework
.
Variable
):
callable
(
self
.
_learning_rate
):
self
.
_learning_rate_map
[
framework
.
default_main_program
(
self
.
_learning_rate_map
[
framework
.
default_main_program
(
)]
=
self
.
_learning_rate
)]
=
self
.
_learning_rate
# Dictionary of accumulators. Some optimizer subclasses need to
# Dictionary of accumulators. Some optimizer subclasses need to
...
@@ -79,11 +75,6 @@ class Optimizer(object):
...
@@ -79,11 +75,6 @@ class Optimizer(object):
if
isinstance
(
lr
,
framework
.
Variable
):
if
isinstance
(
lr
,
framework
.
Variable
):
return
return
elif
callable
(
lr
):
dtype
=
'float32'
if
self
.
_dtype
is
None
else
self
.
_dtype
self
.
_learning_rate_map
[
framework
.
default_main_program
()]
=
lr
(
dtype
)
return
else
:
else
:
if
not
isinstance
(
self
.
_learning_rate
,
float
):
if
not
isinstance
(
self
.
_learning_rate
,
float
):
raise
TypeError
(
raise
TypeError
(
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
da87f7a6
...
@@ -368,8 +368,6 @@ class OpTest(unittest.TestCase):
...
@@ -368,8 +368,6 @@ class OpTest(unittest.TestCase):
place
=
core
.
CUDAPlace
(
0
)
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
if
core
.
is_float16_supported
(
place
):
return
[
place
]
return
[
place
]
else
:
return
[]
else
:
else
:
return
[]
return
[]
places
=
[
fluid
.
CPUPlace
()]
places
=
[
fluid
.
CPUPlace
()]
...
...
python/paddle/fluid/tests/unittests/test_accuracy_op.py
浏览文件 @
da87f7a6
...
@@ -22,10 +22,8 @@ from op_test import OpTest
...
@@ -22,10 +22,8 @@ from op_test import OpTest
class
TestAccuracyOp
(
OpTest
):
class
TestAccuracyOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"accuracy"
self
.
op_type
=
"accuracy"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
n
=
8192
n
=
8192
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
self
.
dtype
)
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
"float32"
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
...
@@ -36,25 +34,14 @@ class TestAccuracyOp(OpTest):
...
@@ -36,25 +34,14 @@ class TestAccuracyOp(OpTest):
num_correct
+=
1
num_correct
+=
1
break
break
self
.
outputs
=
{
self
.
outputs
=
{
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
self
.
dtype
),
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
"float32"
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int32"
),
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
'Total'
:
np
.
array
([
n
]).
astype
(
"int32"
)
}
}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestAccuracyOpFp16
(
TestAccuracyOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-3
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
浏览文件 @
da87f7a6
...
@@ -21,16 +21,14 @@ from op_test import OpTest
...
@@ -21,16 +21,14 @@ from op_test import OpTest
class
ElementwiseDivOp
(
OpTest
):
class
ElementwiseDivOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"elementwise_div"
self
.
op_type
=
"elementwise_div"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
""" Warning
""" Warning
CPU gradient check error!
CPU gradient check error!
'X': np.random.random((32,84)).astype("float32"),
'X': np.random.random((32,84)).astype("float32"),
'Y': np.random.random((32,84)).astype("float32")
'Y': np.random.random((32,84)).astype("float32")
"""
"""
self
.
inputs
=
{
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
),
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float32"
),
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
'Y'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
"float32"
)
}
}
self
.
outputs
=
{
'Out'
:
np
.
divide
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
self
.
outputs
=
{
'Out'
:
np
.
divide
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
...
@@ -48,9 +46,6 @@ class ElementwiseDivOp(OpTest):
...
@@ -48,9 +46,6 @@ class ElementwiseDivOp(OpTest):
self
.
check_grad
(
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
(
'Y'
))
[
'X'
],
'Out'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
(
'Y'
))
def
init_dtype
(
self
):
pass
class
TestElementwiseDivOp_scalar
(
ElementwiseDivOp
):
class
TestElementwiseDivOp_scalar
(
ElementwiseDivOp
):
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -131,21 +126,5 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
...
@@ -131,21 +126,5 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp):
}
}
class
TestElementwiseDivOpFp16
(
ElementwiseDivOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1
)
def
test_check_grad_ingore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
1
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
1
,
no_grad_set
=
set
(
'Y'
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py
浏览文件 @
da87f7a6
...
@@ -135,10 +135,5 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
...
@@ -135,10 +135,5 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
}
}
class
TestElementwiseMulOpFp16
(
ElementwiseMulOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py
浏览文件 @
da87f7a6
...
@@ -22,22 +22,12 @@ from op_test import OpTest
...
@@ -22,22 +22,12 @@ from op_test import OpTest
class
TestFillZerosLikeOp
(
OpTest
):
class
TestFillZerosLikeOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"fill_zeros_like"
self
.
op_type
=
"fill_zeros_like"
self
.
dtype
=
np
.
float32
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
"float32"
)}
self
.
init_dtype
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
self
.
dtype
)}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestFillZerosLikeOpFp16
(
TestFillZerosLikeOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
浏览文件 @
da87f7a6
...
@@ -97,7 +97,7 @@ class TestLearningRateDecay(unittest.TestCase):
...
@@ -97,7 +97,7 @@ class TestLearningRateDecay(unittest.TestCase):
startup_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
(
"float32"
)
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
...
...
python/paddle/fluid/tests/unittests/test_momentum_op.py
浏览文件 @
da87f7a6
...
@@ -24,13 +24,11 @@ from op_test import OpTest
...
@@ -24,13 +24,11 @@ from op_test import OpTest
class
TestMomentumOp1
(
OpTest
):
class
TestMomentumOp1
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"momentum"
self
.
op_type
=
"momentum"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
self
.
dtype
)
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
self
.
dtype
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
self
.
dtype
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
self
.
dtype
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
"float32"
)
mu
=
0.0001
mu
=
0.0001
use_nesterov
=
False
use_nesterov
=
False
...
@@ -52,21 +50,10 @@ class TestMomentumOp1(OpTest):
...
@@ -52,21 +50,10 @@ class TestMomentumOp1(OpTest):
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
class
TestMomentumOpFp16
(
TestMomentumOp1
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-3
)
class
TestMomentumOp2
(
OpTest
):
class
TestMomentumOp2
(
OpTest
):
'''Test Momentum with default values for attributes
'''Test Momentum with default values for attributes
'''
'''
...
...
python/paddle/fluid/tests/unittests/test_top_k_op.py
浏览文件 @
da87f7a6
...
@@ -23,11 +23,8 @@ class TestTopkOp(OpTest):
...
@@ -23,11 +23,8 @@ class TestTopkOp(OpTest):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
set_args
()
self
.
set_args
()
self
.
op_type
=
"top_k"
self
.
op_type
=
"top_k"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
k
=
self
.
top_k
k
=
self
.
top_k
input
=
np
.
random
.
random
((
self
.
row
,
k
)).
astype
(
self
.
dtype
)
input
=
np
.
random
.
random
((
self
.
row
,
k
)).
astype
(
"float32"
)
output
=
np
.
ndarray
((
self
.
row
,
k
))
output
=
np
.
ndarray
((
self
.
row
,
k
))
indices
=
np
.
ndarray
((
self
.
row
,
k
)).
astype
(
"int64"
)
indices
=
np
.
ndarray
((
self
.
row
,
k
)).
astype
(
"int64"
)
...
@@ -41,9 +38,6 @@ class TestTopkOp(OpTest):
...
@@ -41,9 +38,6 @@ class TestTopkOp(OpTest):
self
.
outputs
=
{
'Out'
:
output
,
'Indices'
:
indices
}
self
.
outputs
=
{
'Out'
:
output
,
'Indices'
:
indices
}
def
init_dtype
(
self
):
pass
def
set_args
(
self
):
def
set_args
(
self
):
self
.
row
=
32
self
.
row
=
32
self
.
top_k
=
1
self
.
top_k
=
1
...
@@ -52,11 +46,6 @@ class TestTopkOp(OpTest):
...
@@ -52,11 +46,6 @@ class TestTopkOp(OpTest):
self
.
check_output
()
self
.
check_output
()
class
TestTopkOpFp16
(
TestTopkOp
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
class
TestTopkOp3d
(
OpTest
):
class
TestTopkOp3d
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"top_k"
self
.
op_type
=
"top_k"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录