# Video classification on Kinetics-600 using MoViNet-A3-Stream backbone. # --experiment_type=movinet_kinetics600 # Achieves 80.09% Top-1 accuracy. # http://mldash/experiments/8515953265355959123 runtime: distribution_strategy: 'tpu' mixed_precision_dtype: 'bfloat16' task: losses: l2_weight_decay: 0.00003 label_smoothing: 0.1 model: backbone: movinet: model_id: 'a3' causal: true # Note: we train with '3d_2plus1d', but convert to '2plus1d' for inference conv_type: '3d_2plus1d' se_type: '2plus3d' activation: 'hard_swish' gating_activation: 'hard_sigmoid' use_positional_encoding: true stochastic_depth_drop_rate: 0.2 norm_activation: use_sync_bn: true dropout_rate: 0.5 activation: 'hard_swish' train_data: name: kinetics600 variant_name: rgb feature_shape: !!python/tuple - 64 - 256 - 256 - 3 temporal_stride: 2 random_stride_range: 0 global_batch_size: 1024 dtype: 'bfloat16' shuffle_buffer_size: 1024 min_image_size: 288 aug_max_area_ratio: 1.0 aug_max_aspect_ratio: 2.0 aug_min_area_ratio: 0.08 aug_min_aspect_ratio: 0.5 validation_data: name: kinetics600 feature_shape: !!python/tuple - 120 - 256 - 256 - 3 temporal_stride: 2 num_test_clips: 1 num_test_crops: 1 global_batch_size: 64 min_image_size: 288 dtype: 'bfloat16' drop_remainder: false trainer: optimizer_config: learning_rate: cosine: initial_learning_rate: 1.8 decay_steps: 85785 warmup: linear: warmup_steps: 2145 optimizer: type: 'rmsprop' rmsprop: rho: 0.9 momentum: 0.9 epsilon: 1.0 clipnorm: 1.0 train_steps: 85785 steps_per_loop: 500 summary_interval: 500 validation_interval: 500