diff --git a/test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml b/test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml new file mode 100644 index 0000000000000000000000000000000000000000..860e4f53043138e7434d71a816fdf051048be6f7 --- /dev/null +++ b/test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml @@ -0,0 +1,108 @@ +Global: + use_gpu: true + epoch_num: 8 + log_smooth_window: 200 + print_batch_step: 200 + save_model_dir: ./output/rec/r45_visionlan + save_epoch_step: 1 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/en/word_2.png + # for data or label process + character_dict_path: + max_text_length: &max_text_length 25 + training_step: &training_step LA + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_visionlan.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + clip_norm: 20.0 + group_lr: true + training_step: *training_step + lr: + name: Piecewise + decay_epochs: [6] + values: [0.0001, 0.00001] + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: VisionLAN + Transform: + Backbone: + name: ResNet45 + strides: [2, 2, 2, 1, 1] + Head: + name: VLHead + n_layers: 3 + n_position: 256 + n_dim: 512 + max_text_length: *max_text_length + training_step: *training_step + +Loss: + name: VLLoss + mode: *training_step + weight_res: 0.5 + weight_mas: 0.5 + +PostProcess: + name: VLLabelDecode + +Metric: + name: RecMetric + is_filter: true + + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: RGB + channel_first: False + - ABINetRecAug: + - VLLabelEncode: # Class handling label + - VLRecResizeImg: + image_shape: [3, 64, 256] + - KeepKeys: + keep_keys: ['image', 'label', 'label_res', 'label_sub', 'label_id', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 220 + drop_last: True + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: RGB + channel_first: False + - VLLabelEncode: # Class handling label + - VLRecResizeImg: + image_shape: [3, 64, 256] + - KeepKeys: + keep_keys: ['image', 'label', 'label_res', 'label_sub', 'label_id', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 64 + num_workers: 4 + diff --git a/test_tipc/configs/rec_r45_visionlan/train_infer_python.txt b/test_tipc/configs/rec_r45_visionlan/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..c08ae7beb6c867bf36283e60dc1e70cfd9ee06a7 --- /dev/null +++ b/test_tipc/configs/rec_r45_visionlan/train_infer_python.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:rec_r45_visionlan +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=32|whole_train_whole_infer=64 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/rec_r45_visionlan_train/best_accuracy +infer_export:tools/export_model.py -c test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,64,256" --rec_algorithm="VisionLAN" --use_space_char=False +--use_gpu:True|False +--enable_mkldnn:False +--cpu_threads:6 +--rec_batch_num:1|6 +--use_tensorrt:False +--precision:fp32 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,64,256]}]