提交 7e12c73e 编写于 作者: H HydrogenSulfate

polish progressive training code

上级 8b8e0431
...@@ -268,7 +268,7 @@ v2_xl_block = [ # only for 21k pretraining. ...@@ -268,7 +268,7 @@ v2_xl_block = [ # only for 21k pretraining.
] ]
efficientnetv2_params = { efficientnetv2_params = {
# params: (block, width, depth, dropout) # params: (block, width, depth, dropout)
"efficientnetv2-s": (v2_s_block, 1.0, 1.0, 0.2), "efficientnetv2-s": (v2_s_block, 1.0, 1.0, np.linspace(0.1, 0.3, 4)),
"efficientnetv2-m": (v2_m_block, 1.0, 1.0, 0.3), "efficientnetv2-m": (v2_m_block, 1.0, 1.0, 0.3),
"efficientnetv2-l": (v2_l_block, 1.0, 1.0, 0.4), "efficientnetv2-l": (v2_l_block, 1.0, 1.0, 0.4),
"efficientnetv2-xl": (v2_xl_block, 1.0, 1.0, 0.4), "efficientnetv2-xl": (v2_xl_block, 1.0, 1.0, 0.4),
...@@ -293,7 +293,7 @@ def efficientnetv2_config(model_name: str): ...@@ -293,7 +293,7 @@ def efficientnetv2_config(model_name: str):
act_fn="silu", act_fn="silu",
survival_prob=0.8, survival_prob=0.8,
local_pooling=False, local_pooling=False,
conv_dropout=None, conv_dropout=0,
num_classes=1000)) num_classes=1000))
return cfg return cfg
...@@ -756,8 +756,10 @@ class Head(nn.Layer): ...@@ -756,8 +756,10 @@ class Head(nn.Layer):
self._avg_pooling = nn.AdaptiveAvgPool2D(output_size=1) self._avg_pooling = nn.AdaptiveAvgPool2D(output_size=1)
if self.dropout_rate > 0: if isinstance(self.dropout_rate,
self._dropout = nn.Dropout(self.dropout_rate) (list, tuple)) or self.dropout_rate > 0:
self._dropout = nn.Dropout(self.dropout_rate[0] if isinstance(
self.dropout_rate, (list, tuple)) else self.dropout_rate)
else: else:
self._dropout = None self._dropout = None
......
...@@ -4,16 +4,16 @@ Global: ...@@ -4,16 +4,16 @@ Global:
pretrained_model: null pretrained_model: null
output_dir: ./output/ output_dir: ./output/
device: gpu device: gpu
save_interval: 100 save_interval: 1
eval_during_train: True eval_during_train: True
eval_interval: 1 eval_interval: 1
epochs: 350 epochs: 350
print_batch_step: 20 print_batch_step: 20
use_visualdl: False use_visualdl: False
train_mode: progressive # progressive training
# used for static mode and model export # used for static mode and model export
image_shape: [3, 384, 384] image_shape: [3, 384, 384]
save_inference_dir: ./inference save_inference_dir: ./inference
train_mode: efficientnetv2 # progressive training
AMP: AMP:
scale_loss: 65536 scale_loss: 65536
...@@ -63,13 +63,15 @@ DataLoader: ...@@ -63,13 +63,15 @@ DataLoader:
to_rgb: True to_rgb: True
channel_first: False channel_first: False
- RandCropImage: - RandCropImage:
size: 171
progress_size: [171, 214, 257, 300]
scale: [0.05, 1.0] scale: [0.05, 1.0]
size: 224
- RandFlipImage: - RandFlipImage:
flip_code: 1 flip_code: 1
- RandAugmentV2: - RandAugmentV2:
num_layers: 2 num_layers: 2
magnitude: 5 magnitude: 5.0
progress_magnitude: [5.0, 8.3333333333, 11.66666666667, 15.0]
- NormalizeImage: - NormalizeImage:
scale: 1.0 scale: 1.0
mean: [128.0, 128.0, 128.0] mean: [128.0, 128.0, 128.0]
......
...@@ -439,6 +439,7 @@ class RandCropImage(object): ...@@ -439,6 +439,7 @@ class RandCropImage(object):
def __init__(self, def __init__(self,
size, size,
progress_size=None,
scale=None, scale=None,
ratio=None, ratio=None,
interpolation=None, interpolation=None,
...@@ -448,6 +449,7 @@ class RandCropImage(object): ...@@ -448,6 +449,7 @@ class RandCropImage(object):
else: else:
self.size = size self.size = size
self.progress_size = progress_size
self.scale = [0.08, 1.0] if scale is None else scale self.scale = [0.08, 1.0] if scale is None else scale
self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio
......
...@@ -176,9 +176,14 @@ class RandomApply(object): ...@@ -176,9 +176,14 @@ class RandomApply(object):
class RandAugmentV2(RandAugment): class RandAugmentV2(RandAugment):
"""Customed RandAugment for EfficientNetV2""" """Customed RandAugment for EfficientNetV2"""
def __init__(self, num_layers=2, magnitude=5, fillcolor=(128, 128, 128)): def __init__(self,
num_layers=2,
magnitude=5,
progress_magnitude=None,
fillcolor=(128, 128, 128)):
super().__init__(num_layers, magnitude, fillcolor) super().__init__(num_layers, magnitude, fillcolor)
abso_level = self.magnitude / self.max_level # [5.0~10.0/10.0]=[0.5, 1.0] self.progress_magnitude = progress_magnitude
abso_level = self.magnitude / self.max_level
self.level_map = { self.level_map = {
"shearX": 0.3 * abso_level, "shearX": 0.3 * abso_level,
"shearY": 0.3 * abso_level, "shearY": 0.3 * abso_level,
......
...@@ -12,6 +12,6 @@ ...@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from ppcls.engine.train.train import train_epoch from ppcls.engine.train.train import train_epoch
from ppcls.engine.train.train_efficientnetv2 import train_epoch_efficientnetv2
from ppcls.engine.train.train_fixmatch import train_epoch_fixmatch from ppcls.engine.train.train_fixmatch import train_epoch_fixmatch
from ppcls.engine.train.train_fixmatch_ccssl import train_epoch_fixmatch_ccssl from ppcls.engine.train.train_fixmatch_ccssl import train_epoch_fixmatch_ccssl
from ppcls.engine.train.train_progressive import train_epoch_progressive
...@@ -13,29 +13,21 @@ ...@@ -13,29 +13,21 @@
# limitations under the License. # limitations under the License.
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import time
import numpy as np
from ppcls.data import build_dataloader from ppcls.data import build_dataloader
from ppcls.engine.train.utils import type_name
from ppcls.utils import logger from ppcls.utils import logger
from .train import train_epoch from .train import train_epoch
def train_epoch_efficientnetv2(engine, epoch_id, print_batch_step): def train_epoch_progressive(engine, epoch_id, print_batch_step):
# 1. Build training hyper-parameters for different training stage # 1. Build training hyper-parameters for different training stage
num_stage = 4 num_stage = 4
ratio_list = [(i + 1) / num_stage for i in range(num_stage)] ratio_list = [(i + 1) / num_stage for i in range(num_stage)]
ram_list = np.linspace(5, 10, num_stage)
# dropout_rate_list = np.linspace(0.0, 0.2, num_stage)
stones = [ stones = [
int(engine.config["Global"]["epochs"] * ratio_list[i]) int(engine.config["Global"]["epochs"] * ratio_list[i])
for i in range(num_stage) for i in range(num_stage)
] ]
image_size_list = [
int(128 + (300 - 128) * ratio_list[i]) for i in range(num_stage)
]
stage_id = 0 stage_id = 0
for i in range(num_stage): for i in range(num_stage):
if epoch_id > stones[i]: if epoch_id > stones[i]:
...@@ -43,10 +35,24 @@ def train_epoch_efficientnetv2(engine, epoch_id, print_batch_step): ...@@ -43,10 +35,24 @@ def train_epoch_efficientnetv2(engine, epoch_id, print_batch_step):
# 2. Adjust training hyper-parameters for different training stage # 2. Adjust training hyper-parameters for different training stage
if not hasattr(engine, 'last_stage') or engine.last_stage < stage_id: if not hasattr(engine, 'last_stage') or engine.last_stage < stage_id:
cur_dropout_rate = 0.0
def _change_dp_func(m):
global cur_dropout_rate
if type_name(m) == "Head" and hasattr(m, "_dropout"):
m._dropout.p = m.dropout_rate[stage_id]
cur_dropout_rate = m.dropout_rate[stage_id]
engine.model.apply(_change_dp_func)
cur_image_size = engine.config["DataLoader"]["Train"]["dataset"][
"transform_ops"][1]["RandCropImage"]["progress_size"][stage_id]
cur_magnitude = engine.config["DataLoader"]["Train"]["dataset"][
"transform_ops"][3]["RandAugment"]["progress_magnitude"][stage_id]
engine.config["DataLoader"]["Train"]["dataset"]["transform_ops"][1][ engine.config["DataLoader"]["Train"]["dataset"]["transform_ops"][1][
"RandCropImage"]["size"] = image_size_list[stage_id] "RandCropImage"]["size"] = cur_image_size
engine.config["DataLoader"]["Train"]["dataset"]["transform_ops"][3][ engine.config["DataLoader"]["Train"]["dataset"]["transform_ops"][3][
"RandAugment"]["magnitude"] = ram_list[stage_id] "RandAugment"]["magnitude"] = cur_magnitude
engine.train_dataloader = build_dataloader( engine.train_dataloader = build_dataloader(
engine.config["DataLoader"], engine.config["DataLoader"],
"Train", "Train",
...@@ -55,9 +61,11 @@ def train_epoch_efficientnetv2(engine, epoch_id, print_batch_step): ...@@ -55,9 +61,11 @@ def train_epoch_efficientnetv2(engine, epoch_id, print_batch_step):
seed=epoch_id) seed=epoch_id)
engine.train_dataloader_iter = iter(engine.train_dataloader) engine.train_dataloader_iter = iter(engine.train_dataloader)
engine.last_stage = stage_id engine.last_stage = stage_id
logger.info( logger.info(f"Training stage: [{stage_id+1}/{num_stage}]("
f"Training stage: [{stage_id+1}/{num_stage}](random_aug_magnitude={ram_list[stage_id]}, train_image_size={image_size_list[stage_id]})" f"random_aug_magnitude={cur_magnitude}, "
) f"train_image_size={cur_image_size}, "
f"dropout_rate={cur_dropout_rate}"
f")")
# 3. Train one epoch as usual at current stage # 3. Train one epoch as usual at current stage
train_epoch(engine, epoch_id, print_batch_step) train_epoch(engine, epoch_id, print_batch_step)
...@@ -61,6 +61,7 @@ def load_dygraph_pretrain(model, path=None): ...@@ -61,6 +61,7 @@ def load_dygraph_pretrain(model, path=None):
m.set_dict(param_state_dict) m.set_dict(param_state_dict)
else: else:
model.set_dict(param_state_dict) model.set_dict(param_state_dict)
logger.info("Finish load pretrained model from {}".format(path))
return return
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册