Merge branch 'develop' into upgrade_result

PaddlePaddle · Jan 10, 2025 · e505b83 · e505b83
2 parents 60f2cf2 + 9b6dded
commit e505b83
Show file tree

Hide file tree

Showing 39 changed files with 1,651 additions and 252 deletions.
diff --git a/api_examples/pipelines/test_formula_recognition.py b/api_examples/pipelines/test_formula_recognition.py
@@ -0,0 +1,43 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex import create_pipeline
+
+pipeline = create_pipeline(pipeline="formula_recognition")
+
+output = pipeline.predict(
+    "./test_samples/general_formula_recognition01.png", use_layout_detection=True
+)
+
+# output = pipeline.predict(
+#     "./test_samples/general_formula_recognition01.pdf",
+#     use_layout_detection=True,
+# )
+
+# output = pipeline.predict(
+#     "./test_samples/general_formula_recognition02.png",
+#     use_layout_detection=False,
+# )
+
+# img_list = [ "./test_samples/general_formula_recognition03.png", \
+#     "./test_samples/general_formula_recognition04.png", \
+#         "./test_samples/general_formula_recognition05.png",]
+# output = pipeline.predict(
+#     img_list,
+#     use_layout_detection=True,
+# )
+
+for res in output:
+    # res.save_to_img("./output/")
+    res.save_results("./output")
diff --git a/api_examples/pipelines/test_image_classification.py b/api_examples/pipelines/test_image_classification.py
@@ -16,7 +16,7 @@
 
 pipeline = create_pipeline(pipeline="image_classification")
 
-output = pipeline.predict("./test_samples/general_image_classification_001.jpg")
+output = pipeline.predict("./test_samples/general_image_classification_001.jpg", topk=5)
 
 # output = pipeline.predict("./test_samples/财报1.pdf")
 

diff --git a/api_examples/pipelines/test_ocr.py b/api_examples/pipelines/test_ocr.py
@@ -14,13 +14,15 @@
 
 from paddlex import create_pipeline
 
-pipeline = create_pipeline(pipeline="OCR")
+pipeline = create_pipeline(pipeline="OCR", limit_side_len=320)
 
 output = pipeline.predict(
     "./test_samples/general_ocr_002.png",
     use_doc_orientation_classify=True,
-    use_doc_unwarping=True,
-    use_textline_orientation=True,
+    use_doc_unwarping=False,
+    use_textline_orientation=False,
+    unclip_ratio=3.0,
+    limit_side_len=1920,
 )
 # output = pipeline.predict(
 #     "./test_samples/general_ocr_002.png",

diff --git a/api_examples/pipelines/test_pedestrian_attribute_rec.py b/api_examples/pipelines/test_pedestrian_attribute_rec.py
@@ -0,0 +1,26 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex import create_pipeline
+
+pipeline = create_pipeline(pipeline="pedestrian_attribute_recognition")
+
+output = pipeline.predict(
+    "./test_samples/pedestrian_attribute_002.jpg", det_threshold=0.7, cls_threshold=0.7
+)
+
+for res in output:
+    res.print()  ## 打印预测的结构化输出
+    res.save_to_img("./output")  ## 保存结果可视化图像
+    res.save_to_json("./output/")  ## 保存预测的结构化输出
diff --git a/api_examples/pipelines/test_vehicle_attribute_rec.py b/api_examples/pipelines/test_vehicle_attribute_rec.py
@@ -0,0 +1,26 @@
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddlex import create_pipeline
+
+pipeline = create_pipeline(pipeline="vehicle_attribute_recognition")
+
+output = pipeline.predict(
+    "./test_samples/vehicle_attribute_002.jpg", det_threshold=0.7, cls_threshold=0.7
+)
+
+for res in output:
+    res.print()  ## 打印预测的结构化输出
+    res.save_to_img("./output")  ## 保存结果可视化图像
+    res.save_to_json("./output/")  ## 保存预测的结构化输出
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.en.md
@@ -48,7 +48,7 @@ Pedestrian attribute recognition is a key function in computer vision systems, u
 <thead>
 <tr>
 <th>Model</th><th>Model Download Link</th>
-<th>mA (%)</th>
+<th>mAP (%)</th>
 <th>GPU Inference Time (ms)</th>
 <th>CPU Inference Time (ms)</th>
 <th>Model Size (M)</th>

diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.md b/docs/pipeline_usage/tutorials/cv_pipelines/pedestrian_attribute_recognition.md
@@ -48,7 +48,7 @@ comments: true
 <thead>
 <tr>
 <th>模型</th><th>模型下载链接</th>
-<th>mA（%）</th>
+<th>mAP（%）</th>
 <th>GPU推理耗时（ms）</th>
 <th>CPU推理耗时 (ms)</th>
 <th>模型存储大小（M)</th>

diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.en.md b/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.en.md
@@ -44,7 +44,7 @@ Vehicle attribute recognition is a crucial component in computer vision systems.
 <thead>
 <tr>
 <th>Model</th><th>Model Download Link</th>
-<th>mA (%)</th>
+<th>mAP (%)</th>
 <th>GPU Inference Time (ms)</th>
 <th>CPU Inference Time (ms)</th>
 <th>Model Size (M)</th>

diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.md b/docs/pipeline_usage/tutorials/cv_pipelines/vehicle_attribute_recognition.md
@@ -45,7 +45,7 @@ comments: true
 <thead>
 <tr>
 <th>模型</th><th>模型下载链接</th>
-<th>mA（%）</th>
+<th>mAP（%）</th>
 <th>GPU推理耗时（ms）</th>
 <th>CPU推理耗时 (ms)</th>
 <th>模型存储大小（M)</th>

diff --git a/paddlex/configs/pipelines/OCR.yaml b/paddlex/configs/pipelines/OCR.yaml
@@ -3,8 +3,8 @@ pipeline_name: OCR
 
 text_type: general
 
-use_doc_preprocessor: True
-use_textline_orientation: True
+use_doc_preprocessor: False
+use_textline_orientation: False
 
 SubPipelines:
   DocPreprocessor:
@@ -29,6 +29,13 @@ SubModules:
     model_name: PP-OCRv4_mobile_det
     model_dir: null
     batch_size: 1
+    limit_side_len: 960
+    limit_type: max
+    thresh: 0.3
+    box_thresh: 0.6
+    max_candidates: 1000
+    unclip_ratio: 2.0
+    use_dilation: False
   TextLineOrientation:
     module_name: textline_orientation
     model_name: PP-LCNet_x0_25_textline_ori 

diff --git a/paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml b/paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml
@@ -18,6 +18,13 @@ SubModules:
     ak: "api_key" # Set this to a real API key
     sk: "secret_key"  # Set this to a real secret key
 
+  MLLM_Chat:
+    module_name: chat_bot
+    model_name: PP-DocBee
+    base_url: "http://127.0.0.1/v1/chat/completions"
+    api_type: openai
+    api_key: "api_key"
+
   PromptEngneering:
     KIE_CommonText:
       module_name: prompt_engneering

diff --git a/paddlex/configs/pipelines/formula_recognition.yaml b/paddlex/configs/pipelines/formula_recognition.yaml
@@ -0,0 +1,35 @@
+
+pipeline_name: formula_recognition
+
+use_layout_detection: True
+use_doc_preprocessor: True
+
+SubModules:
+  LayoutDetection:
+    module_name: layout_detection
+    model_name: RT-DETR-H_layout_17cls
+    model_dir: null
+    batch_size: 1
+
+  FormulaRecognition:
+    module_name: formula_recognition
+    model_name: PP-FormulaNet-L
+    model_dir: null
+    batch_size: 5
+
+SubPipelines:
+  DocPreprocessor:
+    pipeline_name: doc_preprocessor
+    use_doc_orientation_classify: True
+    use_doc_unwarping: True
+    SubModules:
+      DocOrientationClassify:
+        module_name: doc_text_orientation
+        model_name: PP-LCNet_x1_0_doc_ori
+        model_dir: null
+        batch_size: 1
+      DocUnwarping:
+        module_name: image_unwarping
+        model_name: UVDoc
+        model_dir: null
+        batch_size: 1
diff --git a/paddlex/configs/pipelines/pedestrian_attribute_recognition.yaml b/paddlex/configs/pipelines/pedestrian_attribute_recognition.yaml
@@ -0,0 +1,15 @@
+pipeline_name: pedestrian_attribute_recognition
+
+SubModules:
+  Detection:
+    module_name: object_detection
+    model_name: PP-YOLOE-L_human
+    model_dir: null
+    batch_size: 1
+    threshold: 0.5 
+  Classification:
+    module_name: multilabel_classification
+    model_name: PP-LCNet_x1_0_pedestrian_attribute
+    model_dir: null
+    batch_size: 1
+    threshold: 0.5
diff --git a/paddlex/configs/pipelines/vehicle_attribute_recognition.yaml b/paddlex/configs/pipelines/vehicle_attribute_recognition.yaml
@@ -0,0 +1,15 @@
+pipeline_name: vehicle_attribute_recognition
+
+SubModules:
+  Detection:
+    module_name: object_detection
+    model_name: PP-YOLOE-L_vehicle
+    model_dir: null
+    batch_size: 1  
+    threshold: 0.5  
+  Classification:
+    module_name: multilabel_classification
+    model_name: PP-LCNet_x1_0_vehicle_attribute
+    model_dir: null
+    batch_size: 1
+    threshold: 0.5