未验证 提交 eafc9889 编写于 作者: D Difer 提交者: GitHub

move some fluid apis (#55986)

* move fluid apis

* fix type error

* remove static exponential_decay

* fix some import error

* remove nn.py

* fix some error

* fix type error
上级 1e5fec39
...@@ -42,7 +42,6 @@ from .framework import disable_static # noqa: F401 ...@@ -42,7 +42,6 @@ from .framework import disable_static # noqa: F401
from .framework import enable_static # noqa: F401 from .framework import enable_static # noqa: F401
from .framework import in_dynamic_mode # noqa: F401 from .framework import in_dynamic_mode # noqa: F401
from .fluid.dataset import * # noqa: F401, F403 from .fluid.dataset import * # noqa: F401, F403
from .fluid.lazy_init import LazyGuard # noqa: F401
from .framework.dtype import iinfo # noqa: F401 from .framework.dtype import iinfo # noqa: F401
from .framework.dtype import finfo # noqa: F401 from .framework.dtype import finfo # noqa: F401
...@@ -437,6 +436,7 @@ import paddle.text # noqa: F401 ...@@ -437,6 +436,7 @@ import paddle.text # noqa: F401
import paddle.vision # noqa: F401 import paddle.vision # noqa: F401
from .tensor.random import check_shape # noqa: F401 from .tensor.random import check_shape # noqa: F401
from .nn.initializer.lazy_init import LazyGuard # noqa: F401
# CINN has to set a flag to include a lib # CINN has to set a flag to include a lib
if is_compiled_with_cinn(): if is_compiled_with_cinn():
......
...@@ -113,7 +113,7 @@ class LocalSGDOptimizer(MetaOptimizerBase): ...@@ -113,7 +113,7 @@ class LocalSGDOptimizer(MetaOptimizerBase):
p2s = self.create_snapshot_vars(main_block.program) p2s = self.create_snapshot_vars(main_block.program)
with program_guard(main_block.program, startup_program): with program_guard(main_block.program, startup_program):
step = paddle.fluid.layers.autoincreased_step_counter(begin=1) step = paddle.optimizer.lr.autoincreased_step_counter(begin=1)
k_steps = paddle.static.create_global_var( k_steps = paddle.static.create_global_var(
name="k_steps", name="k_steps",
shape=[1], shape=[1],
...@@ -330,7 +330,7 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): ...@@ -330,7 +330,7 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase):
p2s = self.create_snapshot_vars(main_block.program) p2s = self.create_snapshot_vars(main_block.program)
with program_guard(main_block.program, startup_program): with program_guard(main_block.program, startup_program):
step = paddle.fluid.layers.autoincreased_step_counter(begin=1) step = paddle.optimizer.lr.autoincreased_step_counter(begin=1)
k_steps = paddle.static.create_global_var( k_steps = paddle.static.create_global_var(
name="k_steps", name="k_steps",
......
...@@ -15,17 +15,15 @@ ...@@ -15,17 +15,15 @@
import logging import logging
import paddle import paddle
from paddle.fluid.layers.learning_rate_scheduler import (
exponential_decay,
inverse_time_decay,
noam_decay,
)
from paddle.optimizer.lr import ( from paddle.optimizer.lr import (
ExponentialDecay, ExponentialDecay,
InverseTimeDecay, InverseTimeDecay,
LRScheduler, LRScheduler,
NaturalExpDecay, NaturalExpDecay,
NoamDecay, NoamDecay,
exponential_decay,
inverse_time_decay,
noam_decay,
) )
from ..ps.utils.public import ( from ..ps.utils.public import (
......
...@@ -21,7 +21,6 @@ from .framework import ( ...@@ -21,7 +21,6 @@ from .framework import (
default_main_program, default_main_program,
_current_expected_place, _current_expected_place,
) )
from .lazy_init import lazy_init_helper
from .framework import program_guard from .framework import program_guard
import numpy as np import numpy as np
from .core import VarDesc from .core import VarDesc
......
...@@ -12,17 +12,12 @@ ...@@ -12,17 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import nn
from .nn import *
from . import io from . import io
from .io import * from .io import *
from . import math_op_patch from . import math_op_patch
from .math_op_patch import * from .math_op_patch import *
from .learning_rate_scheduler import *
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
__all__ = [] __all__ = []
__all__ += nn.__all__
__all__ += io.__all__ __all__ += io.__all__
__all__ += learning_rate_scheduler.__all__
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
All layers just related to the neural network.
"""
import os
import inspect
import warnings
import numpy as np
import paddle
from ..layer_helper import LayerHelper
from ..framework import (
Variable,
OpProtoHolder,
dygraph_only,
_dygraph_tracer,
default_main_program,
_create_tensor,
static_only,
_global_flags,
in_dygraph_mode,
)
from ..framework import _current_expected_place
from .. import dygraph_utils
from ..param_attr import ParamAttr
from .layer_function_generator import (
autodoc,
templatedoc,
_generate_doc_string_,
)
from .. import unique_name
from .. import core
from ...utils import deprecated
from ..data_feeder import (
convert_dtype,
check_variable_and_dtype,
check_type,
check_dtype,
)
from paddle.utils import deprecated
from paddle import _C_ops, _legacy_C_ops
from collections.abc import Iterable
__all__ = [
'autoincreased_step_counter',
]
def autoincreased_step_counter(counter_name=None, begin=1, step=1):
"""
:api_attr: Static Graph
Create an auto-increase variable. which will be automatically increased
by 1 in every iteration. By default, the first return of this counter is 1,
and the step size is 1.
Args:
counter_name(str, optional): The counter name. Default '@STEP_COUNTER@'.
begin(int, optional): The first return value of this counter. Default 1.
step(int, optional): The step size. Default 1.
Returns:
Variable: The auto-increased Variable with data type int64.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
paddle.enable_static()
global_step = fluid.layers.autoincreased_step_counter(
counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
"""
helper = LayerHelper('global_step_counter')
if counter_name is None:
counter_name = '@STEP_COUNTER@'
counter, is_new_var = helper.create_or_get_global_variable(
name=counter_name,
dtype='int64',
shape=[1],
persistable=True,
belong_to_optimizer=True,
)
if is_new_var:
helper.set_variable_initializer(
counter,
initializer=paddle.nn.initializer.ConstantInitializer(
value=begin - 1, force_cpu=True
),
)
helper.main_program.global_block()._prepend_op(
type='increment',
inputs={'X': [counter]},
outputs={'Out': [counter]},
attrs={'step': float(step)},
)
counter.stop_gradient = True
return counter
...@@ -1409,8 +1409,6 @@ def _get_lr_scheduler_program(lr_scheduler, lr_param_dict, lr_decay_steps): ...@@ -1409,8 +1409,6 @@ def _get_lr_scheduler_program(lr_scheduler, lr_param_dict, lr_decay_steps):
InverseTimeDecay, InverseTimeDecay,
NaturalExpDecay, NaturalExpDecay,
NoamDecay, NoamDecay,
)
from paddle.static.learning_rate_scheduler import (
exponential_decay, exponential_decay,
inverse_time_decay, inverse_time_decay,
natural_exp_decay, natural_exp_decay,
......
...@@ -18,7 +18,7 @@ import math ...@@ -18,7 +18,7 @@ import math
import numpy as np import numpy as np
from ...fluid.framework import default_main_program, in_dygraph_mode from ...fluid.framework import default_main_program, in_dygraph_mode
from ...fluid.lazy_init import lazy_init_helper from .lazy_init import lazy_init_helper
__all__ = [] __all__ = []
...@@ -42,7 +42,7 @@ class Initializer: ...@@ -42,7 +42,7 @@ class Initializer:
return self._lazy_init(param, block) return self._lazy_init(param, block)
def forward(self, param, block=None): def forward(self, param, block=None):
"""Add corresponding initialization operations to the network""" """Add corresponding initialization operations to the network."""
raise NotImplementedError() raise NotImplementedError()
def _lazy_init(self, param, block=None): def _lazy_init(self, param, block=None):
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import framework from ...fluid import framework
__all__ = ["LazyGuard"] __all__ = ["LazyGuard"]
......
此差异已折叠。
...@@ -72,8 +72,6 @@ from .nn.control_flow import Print # noqa: F401 ...@@ -72,8 +72,6 @@ from .nn.control_flow import Print # noqa: F401
from ..fluid.param_attr import WeightNormParamAttr # noqa: F401 from ..fluid.param_attr import WeightNormParamAttr # noqa: F401
from ..fluid.optimizer import Optimizer # noqa: F401 from ..fluid.optimizer import Optimizer # noqa: F401
from ..fluid.layers import exponential_decay # noqa: F401
from ..fluid.layers import learning_rate_scheduler # noqa: F401
from .nn.metric import auc # noqa: F401 from .nn.metric import auc # noqa: F401
from .nn.metric import accuracy # noqa: F401 from .nn.metric import accuracy # noqa: F401
...@@ -135,5 +133,4 @@ __all__ = [ # noqa ...@@ -135,5 +133,4 @@ __all__ = [ # noqa
'create_parameter', 'create_parameter',
'set_ipu_shard', 'set_ipu_shard',
'ctr_metric_bundle', 'ctr_metric_bundle',
'exponential_decay',
] ]
...@@ -24,7 +24,7 @@ from paddle.common_ops_import import ( ...@@ -24,7 +24,7 @@ from paddle.common_ops_import import (
check_type, check_type,
check_variable_and_dtype, check_variable_and_dtype,
) )
from paddle.fluid import core, layers, unique_name from paddle.fluid import core, unique_name
from paddle.fluid.data_feeder import check_dtype from paddle.fluid.data_feeder import check_dtype
from paddle.fluid.framework import ( from paddle.fluid.framework import (
Program, Program,
...@@ -4210,7 +4210,7 @@ class ExponentialMovingAverage: ...@@ -4210,7 +4210,7 @@ class ExponentialMovingAverage:
Update Exponential Moving Average. Should only call this method in Update Exponential Moving Average. Should only call this method in
train program. train program.
""" """
global_step = layers.autoincreased_step_counter( global_step = paddle.optimizer.lr.autoincreased_step_counter(
counter_name=self._step_counter_name counter_name=self._step_counter_name
) )
param_master_emas = [] param_master_emas = []
......
...@@ -67,7 +67,7 @@ def optimizer_setting(params, parameter_list=None): ...@@ -67,7 +67,7 @@ def optimizer_setting(params, parameter_list=None):
) )
else: else:
optimizer = paddle.optimizer.Momentum( optimizer = paddle.optimizer.Momentum(
learning_rate=fluid.layers.cosine_decay( learning_rate=paddle.optimizer.lr.cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs learning_rate=lr, step_each_epoch=step, epochs=num_epochs
), ),
momentum=momentum_rate, momentum=momentum_rate,
......
...@@ -248,7 +248,7 @@ class DistSeResneXt2x2(TestDistRunnerBase): ...@@ -248,7 +248,7 @@ class DistSeResneXt2x2(TestDistRunnerBase):
else: else:
optimizer = ( optimizer = (
paddle.distributed.fleet.meta_optimizers.DGCMomentumOptimizer( paddle.distributed.fleet.meta_optimizers.DGCMomentumOptimizer(
learning_rate=fluid.layers.piecewise_decay( learning_rate=paddle.optimizer.lr.piecewise_decay(
boundaries=bd, values=lr boundaries=bd, values=lr
), ),
momentum=0.9, momentum=0.9,
......
...@@ -477,7 +477,7 @@ class TestLRDecayConditional(TranspilerTest): ...@@ -477,7 +477,7 @@ class TestLRDecayConditional(TranspilerTest):
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost) avg_cost = paddle.mean(cost)
sgd_optimizer = paddle.optimizer.SGD( sgd_optimizer = paddle.optimizer.SGD(
learning_rate=fluid.layers.piecewise_decay( learning_rate=paddle.optimizer.lr.piecewise_decay(
[10000, 20000], [1.0, 0.5, 1.0] [10000, 20000], [1.0, 0.5, 1.0]
) )
) )
...@@ -581,7 +581,7 @@ class TestL2DecayWithPiecewise(TranspilerTest): ...@@ -581,7 +581,7 @@ class TestL2DecayWithPiecewise(TranspilerTest):
bd = [1, 10, 20, 30] bd = [1, 10, 20, 30]
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
sgd_optimizer = paddle.optimizer.Momentum( sgd_optimizer = paddle.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay( learning_rate=paddle.optimizer.lr.piecewise_decay(
boundaries=bd, values=lr boundaries=bd, values=lr
), ),
momentum=0.9, momentum=0.9,
......
...@@ -451,7 +451,7 @@ class TestDygraphOCRAttention(unittest.TestCase): ...@@ -451,7 +451,7 @@ class TestDygraphOCRAttention(unittest.TestCase):
ocr_attention = OCRAttention() ocr_attention = OCRAttention()
if Config.learning_rate_decay == "piecewise_decay": if Config.learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay( learning_rate = paddle.optimizer.lr.piecewise_decay(
[50000], [Config.LR, Config.LR * 0.01] [50000], [Config.LR, Config.LR * 0.01]
) )
else: else:
...@@ -527,7 +527,7 @@ class TestDygraphOCRAttention(unittest.TestCase): ...@@ -527,7 +527,7 @@ class TestDygraphOCRAttention(unittest.TestCase):
ocr_attention = OCRAttention() ocr_attention = OCRAttention()
if Config.learning_rate_decay == "piecewise_decay": if Config.learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay( learning_rate = paddle.optimizer.lr.piecewise_decay(
[50000], [Config.LR, Config.LR * 0.01] [50000], [Config.LR, Config.LR * 0.01]
) )
else: else:
......
...@@ -67,7 +67,7 @@ def optimizer_setting(params, parameter_list=None): ...@@ -67,7 +67,7 @@ def optimizer_setting(params, parameter_list=None):
# TODO(minqiyang): Add learning rate scheduler support to dygraph mode # TODO(minqiyang): Add learning rate scheduler support to dygraph mode
# optimizer = fluid.optimizer.Momentum( # optimizer = fluid.optimizer.Momentum(
# learning_rate=params["lr"], # learning_rate=params["lr"],
# learning_rate=fluid.layers.piecewise_decay( # learning_rate=paddle.optimizer.lr.piecewise_decay(
# boundaries=bd, values=lr), # boundaries=bd, values=lr),
# momentum=0.9, # momentum=0.9,
# regularization=paddle.regularizer.L2Decay(1e-4)) # regularization=paddle.regularizer.L2Decay(1e-4))
......
...@@ -63,7 +63,7 @@ def optimizer_setting(params, parameter_list=None): ...@@ -63,7 +63,7 @@ def optimizer_setting(params, parameter_list=None):
# TODO(minqiyang): Add learning rate scheduler support to dygraph mode # TODO(minqiyang): Add learning rate scheduler support to dygraph mode
# optimizer = fluid.optimizer.Momentum( # optimizer = fluid.optimizer.Momentum(
# learning_rate=params["lr"], # learning_rate=params["lr"],
# learning_rate=fluid.layers.piecewise_decay( # learning_rate=paddle.optimizer.lr.piecewise_decay(
# boundaries=bd, values=lr), # boundaries=bd, values=lr),
# momentum=0.9, # momentum=0.9,
# regularization=paddle.regularizer.L2Decay(1e-4)) # regularization=paddle.regularizer.L2Decay(1e-4))
......
...@@ -1137,7 +1137,7 @@ class TestDygraphTransformerSortGradient(unittest.TestCase): ...@@ -1137,7 +1137,7 @@ class TestDygraphTransformerSortGradient(unittest.TestCase):
is_sparse=is_sparse, is_sparse=is_sparse,
) )
if sync: if sync:
lr_decay = fluid.layers.learning_rate_scheduler.noam_decay( lr_decay = paddle.optimizer.lr.noam_decay(
ModelHyperParams.d_model, TrainTaskConfig.warmup_steps ModelHyperParams.d_model, TrainTaskConfig.warmup_steps
) )
with fluid.default_main_program()._lr_schedule_guard(): with fluid.default_main_program()._lr_schedule_guard():
......
...@@ -20,7 +20,7 @@ import numpy as np ...@@ -20,7 +20,7 @@ import numpy as np
import paddle import paddle
from paddle import fluid from paddle import fluid
from paddle.fluid import core, framework, layers from paddle.fluid import core, framework
def exponential_decay( def exponential_decay(
...@@ -239,7 +239,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -239,7 +239,9 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
d_model = 0.01 d_model = 0.01
warmup_steps = 200 warmup_steps = 200
learning_rate = 2.0 learning_rate = 2.0
lr = fluid.layers.noam_decay(d_model, warmup_steps, learning_rate) lr = paddle.optimizer.lr.noam_decay(
d_model, warmup_steps, learning_rate
)
for step in range(5): for step in range(5):
step += 1 step += 1
right_result = noam_decay( right_result = noam_decay(
...@@ -278,7 +280,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -278,7 +280,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
np.testing.assert_allclose(t, right_result[i], rtol=1e-05) np.testing.assert_allclose(t, right_result[i], rtol=1e-05)
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
lr = fluid.layers.linear_lr_warmup( lr = paddle.optimizer.lr.linear_lr_warmup(
learning_rate="fake_lr", learning_rate="fake_lr",
warmup_steps=2, warmup_steps=2,
start_lr=0.0, start_lr=0.0,
...@@ -443,39 +445,59 @@ class TestLearningRateDecay(unittest.TestCase): ...@@ -443,39 +445,59 @@ class TestLearningRateDecay(unittest.TestCase):
common_kwargs_false["staircase"] = False common_kwargs_false["staircase"] = False
decay_fns = [ decay_fns = [
(exponential_decay, layers.exponential_decay, common_kwargs_true), (
(exponential_decay, layers.exponential_decay, common_kwargs_false), exponential_decay,
(natural_exp_decay, layers.natural_exp_decay, common_kwargs_true), paddle.optimizer.lr.exponential_decay,
(natural_exp_decay, layers.natural_exp_decay, common_kwargs_false), common_kwargs_true,
(inverse_time_decay, layers.inverse_time_decay, common_kwargs_true), ),
(
exponential_decay,
paddle.optimizer.lr.exponential_decay,
common_kwargs_false,
),
(
natural_exp_decay,
paddle.optimizer.lr.natural_exp_decay,
common_kwargs_true,
),
(
natural_exp_decay,
paddle.optimizer.lr.natural_exp_decay,
common_kwargs_false,
),
(
inverse_time_decay,
paddle.optimizer.lr.inverse_time_decay,
common_kwargs_true,
),
( (
inverse_time_decay, inverse_time_decay,
layers.inverse_time_decay, paddle.optimizer.lr.inverse_time_decay,
common_kwargs_false, common_kwargs_false,
), ),
( (
polynomial_decay, polynomial_decay,
layers.polynomial_decay, paddle.optimizer.lr.polynomial_decay,
{"learning_rate": 1.0, "decay_steps": 5, "cycle": True}, {"learning_rate": 1.0, "decay_steps": 5, "cycle": True},
), ),
( (
polynomial_decay, polynomial_decay,
layers.polynomial_decay, paddle.optimizer.lr.polynomial_decay,
{"learning_rate": 1.0, "decay_steps": 5, "cycle": False}, {"learning_rate": 1.0, "decay_steps": 5, "cycle": False},
), ),
( (
piecewise_decay, piecewise_decay,
layers.piecewise_decay, paddle.optimizer.lr.piecewise_decay,
{"boundaries": [3, 6, 9], "values": [0.1, 0.2, 0.3, 0.4]}, {"boundaries": [3, 6, 9], "values": [0.1, 0.2, 0.3, 0.4]},
), ),
( (
cosine_decay, cosine_decay,
layers.cosine_decay, paddle.optimizer.lr.cosine_decay,
{"learning_rate": 0.1, "step_each_epoch": 100, "epochs": 120}, {"learning_rate": 0.1, "step_each_epoch": 100, "epochs": 120},
), ),
( (
noam_decay, noam_decay,
layers.noam_decay, paddle.optimizer.lr.noam_decay,
{"d_model": 0.01, "warmup_steps": 200, "learning_rate": 2.0}, {"d_model": 0.01, "warmup_steps": 200, "learning_rate": 2.0},
), ),
] ]
...@@ -507,7 +529,7 @@ class TestLinearWamrupLearningRateDecay(unittest.TestCase): ...@@ -507,7 +529,7 @@ class TestLinearWamrupLearningRateDecay(unittest.TestCase):
end_lr = 0.1 end_lr = 0.1
with fluid.program_guard(main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog):
decayed_lr = layers.linear_lr_warmup( decayed_lr = paddle.optimizer.lr.linear_lr_warmup(
fluid_decay_fn(**kwargs), warmup_steps, start_lr, end_lr fluid_decay_fn(**kwargs), warmup_steps, start_lr, end_lr
) )
...@@ -548,7 +570,7 @@ class TestLinearWamrupLearningRateDecayWithScalarInput(unittest.TestCase): ...@@ -548,7 +570,7 @@ class TestLinearWamrupLearningRateDecayWithScalarInput(unittest.TestCase):
warmup_steps = 10 warmup_steps = 10
with fluid.program_guard(main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog):
decayed_lr = layers.linear_lr_warmup( decayed_lr = paddle.optimizer.lr.linear_lr_warmup(
lr, warmup_steps, start_lr, end_lr lr, warmup_steps, start_lr, end_lr
) )
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册