diff --git a/docs/en/extension/train_with_DALI_en.md b/docs/en/extension/train_with_DALI_en.md index cc3e2868059a0f1d3776313172f0315ada09970d..a67a76166b0d890d69f5e2a8cd14c68b146c785b 100644 --- a/docs/en/extension/train_with_DALI_en.md +++ b/docs/en/extension/train_with_DALI_en.md @@ -49,8 +49,14 @@ python -m paddle.distributed.launch \ ## Train with FP16 -On the basis of the above, using FP16 half-precision can further improve the training speed, just add fields in the start training command `AMP.use_pure_fp16=True`: +On the basis of the above, using FP16 half-precision can further improve the training speed, you can refer to the following command. ```shell -python tools/static/train.py -c configs/ResNet/ResNet50.yaml -o use_dali=True -o AMP.use_pure_fp16=True +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fraction_of_gpu_memory_to_use=0.8 + +python -m paddle.distributed.launch \ + --gpus="0,1,2,3,4,5,6,7" \ + tools/static/train.py \ + -c configs/ResNet/ResNet50_fp16.yaml ``` diff --git a/docs/zh_CN/extension/train_with_DALI.md b/docs/zh_CN/extension/train_with_DALI.md index 6e7e977c937ea3473c9dd7a085cc0982fd685afc..b31b5cfdac7ae8d942546ca52252462fd1272019 100644 --- a/docs/zh_CN/extension/train_with_DALI.md +++ b/docs/zh_CN/extension/train_with_DALI.md @@ -48,9 +48,14 @@ python -m paddle.distributed.launch \ ``` ## 使用FP16训练 - -在上述基础上,使用FP16半精度训练,可以进一步提高速度,只需在启动训练命令中添加字段`AMP.use_pure_fp16=True`: +在上述基础上,使用FP16半精度训练,可以进一步提高速度,可以参考下面的配置与运行命令。 ```shell -python tools/static/train.py -c configs/ResNet/ResNet50.yaml -o use_dali=True -o AMP.use_pure_fp16=True +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fraction_of_gpu_memory_to_use=0.8 + +python -m paddle.distributed.launch \ + --gpus="0,1,2,3,4,5,6,7" \ + tools/static/train.py \ + -c configs/ResNet/ResNet50_fp16.yaml ``` diff --git a/ppcls/modeling/architectures/swin_transformer.py b/ppcls/modeling/architectures/swin_transformer.py index 15a1e24c2af6172e7bf14d6f6b71ea8cbff7c61e..4b65ab5528c9eaa13b605902e37ca9d7fafa336b 100644 --- a/ppcls/modeling/architectures/swin_transformer.py +++ b/ppcls/modeling/architectures/swin_transformer.py @@ -63,7 +63,7 @@ def window_partition(x, window_size): return windows -def window_reverse(windows, window_size, H, W): +def window_reverse(windows, window_size, H, W, C): """ Args: windows: (num_windows*B, window_size, window_size, C) @@ -74,10 +74,9 @@ def window_reverse(windows, window_size, H, W): Returns: x: (B, H, W, C) """ - B = int(windows.shape[0] / (H * W / window_size / window_size)) x = windows.reshape( - [B, H // window_size, W // window_size, window_size, window_size, -1]) - x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, H, W, -1]) + [-1, H // window_size, W // window_size, window_size, window_size, C]) + x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H, W, C]) return x @@ -334,8 +333,8 @@ class SwinTransformerBlock(nn.Layer): # merge windows attn_windows = attn_windows.reshape( [-1, self.window_size, self.window_size, C]) - shifted_x = window_reverse(attn_windows, self.window_size, H, - W) # B H' W' C + shifted_x = window_reverse(attn_windows, self.window_size, H, W, + C) # B H' W' C # reverse cyclic shift if self.shift_size > 0: @@ -406,7 +405,7 @@ class PatchMerging(nn.Layer): x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C x = paddle.concat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C - x = x.reshape([B, -1, 4 * C]) # B H/2*W/2 4*C + x = x.reshape([B, H * W // 4, 4 * C]) # B H/2*W/2 4*C x = self.norm(x) x = self.reduction(x) @@ -551,10 +550,8 @@ class PatchEmbed(nn.Layer): def forward(self, x): B, C, H, W = x.shape - # FIXME look at relaxing size constraints - assert H == self.img_size[0] and W == self.img_size[1], \ - "Input image size ({H}*{W}) doesn't match model ({}*{}).".format( - H, W, self.img_size[0], self.img_size[1]) + # TODO (littletomatodonkey), uncomment the line will cause failure of jit.save + # assert [H, W] == self.img_size[:2], "Input image size ({H}*{W}) doesn't match model ({}*{}).".format(H, W, self.img_size[0], self.img_size[1]) x = self.proj(x) x = x.flatten(2).transpose([0, 2, 1]) # B Ph*Pw C diff --git a/tools/eval.py b/tools/eval.py index 8e0bcf16b0d7e15c1472bdd904b6f4294c3e6403..b214e70c653af206bca0d4cc05e85067854dd78d 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -72,6 +72,10 @@ def main(args, return_dict={}): init_model(config, net, optimizer=None) valid_dataloader = Reader(config, 'valid', places=place)() + if len(valid_dataloader) <= 0: + logger.error( + "valid dataloader is empty, please check your data config again!") + sys.exit(-1) net.eval() with paddle.no_grad(): if not multilabel: diff --git a/tools/export_model.py b/tools/export_model.py index 51b4fe2b6b8737fa2cd826b1708fc8c5d495cf79..5d6b338dbcd7071ccda93471c6f9531246927eeb 100644 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -47,6 +47,12 @@ class Net(paddle.nn.Layer): self.pre_net = net(class_dim=class_dim) self.model = model + def eval(self): + self.training = False + for layer in self.sublayers(): + layer.training = False + layer.eval() + def forward(self, inputs): x = self.pre_net(inputs) if self.model == "GoogLeNet": diff --git a/tools/train.py b/tools/train.py index 48e15676c4cba08264305140b434d83c2ed250d8..b17175427ccf0f0fdb3629870cb82aa7c56c6e68 100644 --- a/tools/train.py +++ b/tools/train.py @@ -88,9 +88,18 @@ def main(args): init_model(config, net, optimizer) train_dataloader = Reader(config, 'train', places=place)() + if len(train_dataloader) <= 0: + logger.error( + "train dataloader is empty, please check your data config again!") + sys.exit(-1) if config.validate: valid_dataloader = Reader(config, 'valid', places=place)() + if len(valid_dataloader) <= 0: + logger.error( + "valid dataloader is empty, please check your data config again!" + ) + sys.exit(-1) last_epoch_id = config.get("last_epoch", -1) best_top1_acc = 0.0 # best top1 acc record