1.bug fixes for online inference (#648)

Bourn3z · web-flow · commit 5120a2a6771c · 2024-01-08T09:05:29.000+08:00
2.introduce online inference based on yaml.
diff --git a/configs/det/dbnet/db_r50_icdar15.yaml b/configs/det/dbnet/db_r50_icdar15.yaml
@@ -158,6 +158,7 @@ eval:
 
 predict:
   ckpt_load_path: tmp_det/best.ckpt
+  output_save_dir: ./output
   dataset_sink_mode: False
   dataset:
     type: PredictDataset
@@ -169,24 +170,24 @@ predict:
       - DecodeImage:
           img_mode: RGB
           to_float32: False
-#      - DetLabelEncode:
-      - DetResize:  # GridResize 32
-          target_size: [ 736, 1280 ]
-          keep_ratio: False
-          limit_type: none
-          divisor: 32
+          keep_ori: True
+      - DetResize:
+          keep_ratio: True
+          padding: False
+          limit_side_len: 960
+          limit_type: max
       - NormalizeImage:
           bgr_to_rgb: False
           is_hwc: True
           mean: imagenet
           std: imagenet
       - ToCHWImage:
     #  the order of the dataloader list, matching the network input and the labels for evaluation
-    output_columns: [ 'img_path', 'image', 'raw_img_shape' ]  # shape in h, w order
-#    num_keys_of_labels: 2 # num labels
+    output_columns: ["image", "img_path", "shape_list", "image_ori"]
+    net_input_column_index: [ 0 ] # input indices for network forward func in output_columns
 
   loader:
     shuffle: False
-    batch_size: 1 # TODO: due to dynamic shape of polygons (num of boxes varies), BS has to be 1
+    batch_size: 1
     drop_remainder: False
     num_workers: 2
diff --git a/configs/rec/crnn/crnn_resnet34.yaml b/configs/rec/crnn/crnn_resnet34.yaml
@@ -162,30 +162,23 @@ predict:
     shuffle: False
     transform_pipeline:
       - DecodeImage:
-          img_mode: BGR
+          img_mode: RGB
           to_float32: False
-#      - RecCTCLabelEncode:
-#          max_text_len: *max_text_len
-#          character_dict_path: *character_dict_path
-#          use_space_char: *use_space_char
-#          lower: True
-      - RecResizeImg: # different from paddle (paddle converts image from HWC to CHW and rescale to [-1, 1] after resize.
-          image_shape: [32, 100] # H, W
-          infer_mode: *infer_mode
-          character_dict_path: *character_dict_path
-          padding: False # aspect ratio will be preserved if true.
-      - NormalizeImage:  # different from paddle (paddle wrongly normalize BGR image with RGB mean/std from ImageNet for det, and simple rescale to [-1, 1] in rec.
-          bgr_to_rgb: True
-          is_hwc: True
-          mean : [127.0, 127.0, 127.0]
-          std : [127.0, 127.0, 127.0]
+      - RecResizeNormForInfer:
+          target_height: 32
+          target_width: 100
+          keep_ratio: False
+          padding: False
+          norm_before_pad: False
       - ToCHWImage:
     #  the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visaulize
-    output_columns: [ 'img_path', 'image', 'raw_img_shape' ]
+    output_columns: ['image', 'img_path']
+    net_input_column_index: [0] # input indices for network forward func in output_columns
+    # label_column_index: [1, 2] # input indices marked as label
 
   loader:
       shuffle: False # TODO: tbc
-      batch_size: 1
+      batch_size: 2
       drop_remainder: True
       max_rowsize: 12
       num_workers: 8
diff --git a/mindocr/data/predict_dataset.py b/mindocr/data/predict_dataset.py
@@ -43,7 +43,7 @@ def __init__(
             raise ValueError("No transform pipeline is specified!")
 
         # prefetch the data keys, to fit GeneratorDataset
-        _data = self.data_list[0]
+        _data = self.data_list[0].copy()
         _data = run_transforms(_data, transforms=self.transforms)
         _available_keys = list(_data.keys())
         if output_columns is None:
@@ -60,7 +60,7 @@ def __init__(
                     )
 
     def __getitem__(self, index):
-        data = self.data_list[index]
+        data = self.data_list[index].copy()
 
         # perform transformation on data
         data = run_transforms(data, transforms=self.transforms)
diff --git a/mindocr/models/cls_mv3.py b/mindocr/models/cls_mv3.py
@@ -27,8 +27,7 @@ def __init__(self, config):
 
 
 @register_model
-def cls_mobilenet_v3_small_100_model(pretrained=False, **kwargs):
-    pretrained_backbone = not pretrained
+def cls_mobilenet_v3_small_100_model(pretrained=False, pretrained_backbone=True, **kwargs):
     model_config = {
         "backbone": {
             'name': 'cls_mobilenet_v3_small_100',
diff --git a/mindocr/models/det_dbnet.py b/mindocr/models/det_dbnet.py
@@ -33,8 +33,8 @@ def __init__(self, config):
 
 
 @register_model
-def dbnet_mobilenetv3(pretrained=False, **kwargs):
-    pretrained_backbone = 'https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3' \
+def dbnet_mobilenetv3(pretrained=False, pretrained_backbone=True, **kwargs):
+    backbone_ckpt_url = 'https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3' \
                           '/mobilenet_v3_large_050_no_scale_se_v2_expand-3c4047ac.ckpt'
     model_config = {
         "backbone": {
@@ -43,7 +43,7 @@ def dbnet_mobilenetv3(pretrained=False, **kwargs):
             'alpha': 0.5,
             'out_stages': [5, 8, 14, 20],
             'bottleneck_params': {'se_version': 'SqueezeExciteV2', 'always_expand': True},
-            'pretrained': pretrained_backbone if not pretrained else False
+            'pretrained': backbone_ckpt_url if pretrained_backbone else False
         },
         "neck": {
             "name": 'DBFPN',
@@ -68,8 +68,7 @@ def dbnet_mobilenetv3(pretrained=False, **kwargs):
 
 
 @register_model
-def dbnet_resnet18(pretrained=False, **kwargs):
-    pretrained_backbone = not pretrained
+def dbnet_resnet18(pretrained=False, pretrained_backbone=True, **kwargs):
     model_config = {
         "backbone": {
             'name': 'det_resnet18',
@@ -98,8 +97,7 @@ def dbnet_resnet18(pretrained=False, **kwargs):
 
 
 @register_model
-def dbnet_resnet50(pretrained=False, **kwargs):
-    pretrained_backbone = not pretrained
+def dbnet_resnet50(pretrained=False, pretrained_backbone=True, **kwargs):
     model_config = {
         "backbone": {
             'name': 'det_resnet50',
@@ -128,8 +126,7 @@ def dbnet_resnet50(pretrained=False, **kwargs):
 
 
 @register_model
-def dbnetpp_resnet50(pretrained=False, **kwargs):
-    pretrained_backbone = not pretrained
+def dbnetpp_resnet50(pretrained=False, pretrained_backbone=True, **kwargs):
     model_config = {
         "backbone": {
             'name': 'det_resnet50',
diff --git a/mindocr/models/det_psenet.py b/mindocr/models/det_psenet.py
@@ -29,8 +29,7 @@ def __init__(self, config):
 
 
 @register_model
-def psenet_resnet152(pretrained=False, **kwargs):
-    pretrained_backbone = not pretrained
+def psenet_resnet152(pretrained=False, pretrained_backbone=True, **kwargs):
     model_config = {
         "backbone": {
             'name': 'det_resnet152',
diff --git a/mindocr/models/kie_layoutxlm.py b/mindocr/models/kie_layoutxlm.py
@@ -40,12 +40,18 @@ def construct(self, x):
 
 
 @register_model
-def layoutxlm_ser(pretrained: bool = True, use_visual_backbone: bool = True, use_float16: bool = False, **kwargs):
+def layoutxlm_ser(
+    pretrained: bool = True,
+    pretrained_backbone=False,
+    use_visual_backbone: bool = True,
+    use_float16: bool = False,
+    **kwargs
+):
     model_config = {
         "type": "kie",
         "backbone": {
             "name": "layoutxlm",
-            "pretrained": pretrained,  # backbone pretrained
+            "pretrained": pretrained_backbone,  # backbone pretrained
             "use_visual_backbone": use_visual_backbone,
             "use_float16": use_float16,
         },
diff --git a/tests/ut/test_mindir_export.py b/tests/ut/test_mindir_export.py
@@ -37,7 +37,7 @@ def test_mindir_infer(model_name):
     outputs_mindir = model(x)
 
     # get original ckpt outputs
-    net = build_model(model_name, pretrained=True)
+    net = build_model(model_name, pretrained=True, pretrained_backbone=False)
     outputs_ckpt = net(x)
 
     for i, o in enumerate(outputs_mindir):
diff --git a/tests/ut/test_models.py b/tests/ut/test_models.py
@@ -31,7 +31,8 @@
 @pytest.mark.parametrize("pretrained", [True, False])
 def test_model_by_name(model_name, pretrained):
     print(model_name)
-    build_model(model_name, pretrained=pretrained)
+    pretrained_backbone = not pretrained
+    build_model(model_name, pretrained=pretrained, pretrained_backbone=pretrained_backbone)
     print("model created")
 
 
diff --git a/tools/export.py b/tools/export.py
@@ -91,9 +91,11 @@ def export(model_name_or_config, data_shape, local_ckpt_path, save_dir, is_dynam
         amp_level = "O0"
 
     if local_ckpt_path:
-        net = build_model(model_cfg, pretrained=False, ckpt_load_path=local_ckpt_path, amp_level=amp_level)
+        net = build_model(
+            model_cfg, pretrained=False, pretrained_backbone=False, ckpt_load_path=local_ckpt_path, amp_level=amp_level
+        )
     else:
-        net = build_model(model_cfg, pretrained=True, amp_level=amp_level)
+        net = build_model(model_cfg, pretrained=True, pretrained_backbone=False, amp_level=amp_level)
 
     logger.info(f"Set the AMP level of the model to be `{amp_level}`.")
 
diff --git a/tools/infer/text/config.py b/tools/infer/text/config.py
@@ -5,8 +5,6 @@
 """
 import argparse
 
-import yaml
-
 
 def str2bool(v):
     if isinstance(v, bool):
@@ -20,24 +18,9 @@ def str2bool(v):
 
 
 def create_parser():
-    parser_config = argparse.ArgumentParser(description="Inference Config File", add_help=False)
-    parser_config.add_argument(
-        "-c", "--config", type=str, default="", help='YAML config file specifying default arguments (default="")'
-    )
-
     parser = argparse.ArgumentParser(description="Inference Config Args")
     # params for prediction engine
     parser.add_argument("--mode", type=int, default=0, help="0 for graph mode, 1 for pynative mode ")  # added
-    # parser.add_argument("--use_gpu", type=str2bool, default=True)
-    # parser.add_argument("--use_npu", type=str2bool, default=False)
-    # parser.add_argument("--ir_optim", type=str2bool, default=True)
-    # parser.add_argument("--min_subgraph_size", type=int, default=15)
-    # parser.add_argument("--precision", type=str, default="fp32")
-    # parser.add_argument("--gpu_mem", type=int, default=500)
-    # parser.add_argument("--gpu_id", type=int, default=0)
-
-    parser.add_argument("--det_model_config", type=str, help="path to det model yaml config")  # added
-    parser.add_argument("--rec_model_config", type=str, help="path to rec model yaml config")  # added
 
     # params for text detector
     parser.add_argument("--image_dir", type=str, help="image path or image directory")
@@ -165,21 +148,6 @@ def create_parser():
         help="Whether to visualize results and save the visualized image.",
     )
 
-    # multi-process
-    """
-    parser.add_argument("--use_mp", type=str2bool, default=False)
-    parser.add_argument("--total_process_num", type=int, default=1)
-    parser.add_argument("--process_id", type=int, default=0)
-
-    parser.add_argument("--benchmark", type=str2bool, default=False)
-    parser.add_argument("--save_log_path", type=str, default="./log_output/")
-
-    parser.add_argument("--show_log", type=str2bool, default=True)
-    parser.add_argument("--use_onnx", type=str2bool, default=False)
-
-    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
-    parser.add_argument("--cpu_threads", type=int, default=10)
-    """
     parser.add_argument("--warmup", type=str2bool, default=False)
     parser.add_argument("--ocr_result_dir", type=str, default=None, help="path or directory of ocr results")
     parser.add_argument(
@@ -203,29 +171,10 @@ def create_parser():
     )
     parser.add_argument("--kie_batch_num", type=int, default=8)
 
-    return parser_config, parser
-
-
-def _check_cfgs_in_parser(cfgs: dict, parser: argparse.ArgumentParser):
-    actions_dest = [action.dest for action in parser._actions]
-    defaults_key = parser._defaults.keys()
-    for k in cfgs.keys():
-        if k not in actions_dest and k not in defaults_key:
-            raise KeyError(f"{k} does not exist in ArgumentParser!")
-
+    return parser
 
-def parse_args(args=None):
-    parser_config, parser = create_parser()
-    # Do we have a config file to parse?
-    args_config, remaining = parser_config.parse_known_args(args)
-    if args_config.config:
-        with open(args_config.config, "r") as f:
-            cfg = yaml.safe_load(f)
-            _check_cfgs_in_parser(cfg, parser)
-            parser.set_defaults(**cfg)
-            parser.set_defaults(config=args_config.config)
 
-    # The main arg parser parses the rest of the args, the usual
-    # defaults will have been overridden if config file specified.
-    args = parser.parse_args(remaining)
+def parse_args():
+    parser = create_parser()
+    args = parser.parse_args()
     return args
diff --git a/tools/infer/text/postprocess.py b/tools/infer/text/postprocess.py
@@ -10,7 +10,7 @@
 
 
 class Postprocessor(object):
-    def __init__(self, task="det", algo="DB", **kwargs):
+    def __init__(self, task="det", algo="DB", rec_char_dict_path=None, **kwargs):
         # algo = algo.lower()
         if task == "det":
             if algo.startswith("DB"):
@@ -46,27 +46,33 @@ def __init__(self, task="det", algo="DB", **kwargs):
             self.rescale_internally = True
             self.round = True
         elif task == "rec":
+            rec_char_dict_path = (
+                rec_char_dict_path or "mindocr/utils/dict/ch_dict.txt"
+                if algo in ["CRNN_CH", "SVTR_PPOCRv3_CH"]
+                else rec_char_dict_path
+            )
             # TODO: update character_dict_path and use_space_char after CRNN trained using en_dict.txt released
             if algo.startswith("CRNN") or algo.startswith("SVTR"):
                 # TODO: allow users to input char dict path
-                dict_path = "mindocr/utils/dict/ch_dict.txt" if algo in ["CRNN_CH", "SVTR_PPOCRv3_CH"] else None
                 if algo == "SVTR_PPOCRv3_CH":
                     postproc_cfg = dict(
                         name="CTCLabelDecode",
-                        character_dict_path=dict_path,
+                        character_dict_path=rec_char_dict_path,
                         use_space_char=True,
                     )
                 else:
                     postproc_cfg = dict(
                         name="RecCTCLabelDecode",
-                        character_dict_path=dict_path,
+                        character_dict_path=rec_char_dict_path,
                         use_space_char=False,
                     )
             elif algo.startswith("RARE"):
-                dict_path = "mindocr/utils/dict/ch_dict.txt" if algo == "RARE_CH" else None
+                rec_char_dict_path = (
+                    rec_char_dict_path or "mindocr/utils/dict/ch_dict.txt" if algo == "RARE_CH" else rec_char_dict_path
+                )
                 postproc_cfg = dict(
                     name="RecAttnLabelDecode",
-                    character_dict_path=dict_path,
+                    character_dict_path=rec_char_dict_path,
                     use_space_char=False,
                 )
 
diff --git a/tools/infer/text/predict_det.py b/tools/infer/text/predict_det.py
@@ -62,7 +62,13 @@ def __init__(self, args):
                 "The program has switched to amp_level O2 automatically."
             )
             amp_level = "O2"
-        self.model = build_model(model_name, pretrained=pretrained, ckpt_load_path=ckpt_load_path, amp_level=amp_level)
+        self.model = build_model(
+            model_name,
+            pretrained=pretrained,
+            pretrained_backbone=False,
+            ckpt_load_path=ckpt_load_path,
+            amp_level=amp_level,
+        )
         self.model.set_train(False)
         logger.info(
             "Init detection model: {} --> {}. Model weights loaded from {}".format(
diff --git a/tools/infer/text/predict_from_yaml.py b/tools/infer/text/predict_from_yaml.py
diff --git a/tools/infer/text/predict_rec.py b/tools/infer/text/predict_rec.py
diff --git a/tools/infer/text/predict_ser.py b/tools/infer/text/predict_ser.py