Merge pull request #7261 from andyjpaddle/add_tipc_vl

[TIPC] add tipc for visionlan

Merge pull request #7261 from andyjpaddle/add_tipc_vl
[TIPC] add tipc for visionlan
65d9d93e · andyjpaddle · GitHub · 77b0bbfe · 9dd51384 · 65d9d93e
2 changed file
--- a/test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml
+++ b/test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml
+Global:
+  use_gpu: true
+  epoch_num: 8
+  log_smooth_window: 200
+  print_batch_step: 200
+  save_model_dir: ./output/rec/r45_visionlan
+  save_epoch_step: 1
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words/en/word_2.png
+  # for data or label process
+  character_dict_path:
+  max_text_length: &max_text_length 25
+  training_step: &training_step LA
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/predicts_visionlan.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  clip_norm: 20.0
+  group_lr: true
+  training_step: *training_step
+  lr:
+    name: Piecewise
+    decay_epochs: [6]
+    values: [0.0001, 0.00001] 
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+Architecture:
+  model_type: rec
+  algorithm: VisionLAN
+  Transform:
+  Backbone:
+    name: ResNet45
+    strides: [2, 2, 2, 1, 1]
+  Head:
+    name: VLHead
+    n_layers: 3
+    n_position: 256
+    n_dim: 512
+    max_text_length: *max_text_length
+    training_step: *training_step
+
+Loss:
+  name: VLLoss
+  mode: *training_step
+  weight_res: 0.5
+  weight_mas: 0.5
+
+PostProcess:
+  name: VLLabelDecode
+
+Metric:
+  name: RecMetric
+  is_filter: true
+
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/ic15_data/
+    label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: RGB
+          channel_first: False
+      - ABINetRecAug:
+      - VLLabelEncode: # Class handling label
+      - VLRecResizeImg:
+          image_shape: [3, 64, 256]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'label_res', 'label_sub', 'label_id', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 220
+    drop_last: True
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/ic15_data
+    label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: RGB
+          channel_first: False
+      - VLLabelEncode: # Class handling label
+      - VLRecResizeImg:
+          image_shape: [3, 64, 256]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'label_res', 'label_sub', 'label_id', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 64
+    num_workers: 4
+  
--- a/test_tipc/configs/rec_r45_visionlan/train_infer_python.txt
+++ b/test_tipc/configs/rec_r45_visionlan/train_infer_python.txt
+===========================train_params===========================
+model_name:rec_r45_visionlan
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:null
+Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=32|whole_train_whole_infer=64
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./inference/rec_inference
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml -o
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/rec_r45_visionlan_train/best_accuracy
+infer_export:tools/export_model.py -c test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml -o
+infer_quant:False
+inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,64,256" --rec_algorithm="VisionLAN" --use_space_char=False
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1|6
+--use_tensorrt:False
+--precision:fp32
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+--save_log_path:./test/output/
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,64,256]}]