Merge pull request #71 from kotaro-kinoshita/feature/export-figure-wi…

…th-json-and-csv feature export figure with csv and json
kotaro-kinoshita · Dec 31, 2024 · 7979ead · 7979ead
2 parents 0d37294 + 76d7c30
commit 7979ead
Show file tree

Hide file tree

Showing 9 changed files with 78 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -70,7 +70,7 @@ yomitoku ${path_data} -f md -o results -v --figure　--lite
 - `-d`, `--device` モデルを実行するためのデバイスを指定します。gpu が利用できない場合は cpu で推論が実行されます。(デフォルト: cuda)
 - `--ignore_line_break` 画像の改行位置を無視して、段落内の文章を連結して返します。（デフォルト：画像通りの改行位置位置で改行します。）
 - `--figure_letter` 検出した図表に含まれる文字も出力ファイルにエクスポートします。
-- `--figure` 検出した図、画像を出力ファイルにエクスポートします。(html と markdown のみ)
+- `--figure` 検出した図、画像を出力ファイルにエクスポートします。
 - `--encoding` エクスポートする出力ファイルの文字エンコーディングを指定します。サポートされていない文字コードが含まれる場合は、その文字を無視します。(utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
 
 その他のオプションに関しては、ヘルプを参照

diff --git a/README_EN.md b/README_EN.md
@@ -71,7 +71,7 @@ yomitoku ${path_data} -f md -o results -v --figure
 - `-d`, `--device`: Specify the device for running the model. If a GPU is unavailable, inference will be executed on the CPU. (Default: cuda)
 - `--ignore_line_break`: Ignores line breaks in the image and concatenates sentences within a paragraph. (Default: respects line breaks as they appear in the image.)
 - `--figure_letter`: Exports characters contained within detected figures and tables to the output file.
-- `--figure`: Exports detected figures and images to the output file (supported only for html and markdown).
+- `--figure`: Exports detected figures and images to the output file 
 - `--encoding` Specifies the character encoding for the output file to be exported. If unsupported characters are included, they will be ignored. (utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
 
 

diff --git a/docs/usage.en.md b/docs/usage.en.md
@@ -16,7 +16,7 @@ yomitoku ${path_data} -f md -o results -v
 - `-d`, `--device`: Specify the device for running the model. If a GPU is unavailable, inference will be executed on the CPU. (Default: cuda)
 - `--ignore_line_break`: Ignores line breaks in the image and concatenates sentences within a paragraph. (Default: respects line breaks as they appear in the image.)
 - `--figure_letter`: Exports characters contained within detected figures and tables to the output file.
-- `--figure`: Exports detected figures and images to the output file (supported only for html and markdown).
+- `--figure`: Exports detected figures and images to the output file
 - `--encoding` Specifies the character encoding for the output file to be exported. If unsupported characters are included, they will be ignored. (utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
 
 

diff --git a/docs/usage.ja.md b/docs/usage.ja.md
@@ -16,7 +16,7 @@ yomitoku ${path_data} -f md -o results -v
 - `-d`, `--device` モデルを実行するためのデバイスを指定します。gpu が利用できない場合は cpu で推論が実行されます。(デフォルト: cuda)
 - `--ignore_line_break` 画像の改行位置を無視して、段落内の文章を連結して返します。（デフォルト：画像通りの改行位置位置で改行します。）
 - `--figure_letter` 検出した図表に含まれる文字も出力ファイルにエクスポートします。
-- `--figure` 検出した図、画像を出力ファイルにエクスポートします。(html と markdown のみ)
+- `--figure` 検出した図、画像を出力ファイルにエクスポートします。
 - `--encoding` エクスポートする出力ファイルの文字エンコーディングを指定します。サポートされていない文字コードが含まれる場合は、その文字を無視します。(utf-8, utf-8-sig, shift-jis, enc-jp, cp932)
 
 その他のオプションに関しては、ヘルプを参照

diff --git a/src/yomitoku/cli/main.py b/src/yomitoku/cli/main.py
@@ -59,12 +59,18 @@ def process_single_file(args, analyzer, path, format):
                 out_path,
                 ignore_line_break=args.ignore_line_break,
                 encoding=args.encoding,
+                img=img,
+                export_figure=args.figure,
+                figure_dir=args.figure_dir,
             )
         elif format == "csv":
             results.to_csv(
                 out_path,
                 ignore_line_break=args.ignore_line_break,
                 encoding=args.encoding,
+                img=img,
+                export_figure=args.figure,
+                figure_dir=args.figure_dir,
             )
         elif format == "html":
             results.to_html(

diff --git a/src/yomitoku/export/export_csv.py b/src/yomitoku/export/export_csv.py
@@ -1,4 +1,6 @@
 import csv
+import cv2
+import os
 
 
 def table_to_csv(table, ignore_line_break):
@@ -33,11 +35,33 @@ def paragraph_to_csv(paragraph, ignore_line_break):
     return contents
 
 
+def save_figure(
+    figures,
+    img,
+    out_path,
+    figure_dir="figures",
+):
+    for i, figure in enumerate(figures):
+        x1, y1, x2, y2 = map(int, figure.box)
+        figure_img = img[y1:y2, x1:x2, :]
+        save_dir = os.path.dirname(out_path)
+        save_dir = os.path.join(save_dir, figure_dir)
+        os.makedirs(save_dir, exist_ok=True)
+
+        filename = os.path.splitext(os.path.basename(out_path))[0]
+        figure_name = f"{filename}_figure_{i}.png"
+        figure_path = os.path.join(save_dir, figure_name)
+        cv2.imwrite(figure_path, figure_img)
+
+
 def export_csv(
     inputs,
     out_path: str,
     ignore_line_break: bool = False,
     encoding: str = "utf-8",
+    img=None,
+    export_figure: bool = True,
+    figure_dir="figures",
 ):
     elements = []
     for table in inputs.tables:
@@ -63,6 +87,14 @@ def export_csv(
             }
         )
 
+    if export_figure:
+        save_figure(
+            inputs.figures,
+            img,
+            out_path,
+            figure_dir=figure_dir,
+        )
+
     elements = sorted(elements, key=lambda x: x["order"])
 
     with open(out_path, "w", newline="", encoding=encoding, errors="ignore") as f:

diff --git a/src/yomitoku/export/export_json.py b/src/yomitoku/export/export_json.py
@@ -1,5 +1,8 @@
 import json
 
+import cv2
+import os
+
 
 def paragraph_to_json(paragraph, ignore_line_break):
     if ignore_line_break:
@@ -12,11 +15,33 @@ def table_to_json(table, ignore_line_break):
             cell.contents = cell.contents.replace("\n", "")
 
 
+def save_figure(
+    figures,
+    img,
+    out_path,
+    figure_dir="figures",
+):
+    for i, figure in enumerate(figures):
+        x1, y1, x2, y2 = map(int, figure.box)
+        figure_img = img[y1:y2, x1:x2, :]
+        save_dir = os.path.dirname(out_path)
+        save_dir = os.path.join(save_dir, figure_dir)
+        os.makedirs(save_dir, exist_ok=True)
+
+        filename = os.path.splitext(os.path.basename(out_path))[0]
+        figure_name = f"{filename}_figure_{i}.png"
+        figure_path = os.path.join(save_dir, figure_name)
+        cv2.imwrite(figure_path, figure_img)
+
+
 def export_json(
     inputs,
     out_path,
     ignore_line_break=False,
     encoding: str = "utf-8",
+    img=None,
+    export_figure=False,
+    figure_dir="figures",
 ):
     from yomitoku.document_analyzer import DocumentAnalyzerSchema
 
@@ -28,6 +53,14 @@ def export_json(
         for paragraph in inputs.paragraphs:
             paragraph_to_json(paragraph, ignore_line_break)
 
+        if export_figure:
+            save_figure(
+                inputs.figures,
+                img,
+                out_path,
+                figure_dir=figure_dir,
+            )
+
     with open(out_path, "w", encoding=encoding, errors="ignore") as f:
         json.dump(
             inputs.model_dump(),

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -141,6 +141,7 @@ def test_run_tiff_csv(monkeypatch, tmp_path):
             "--tsr_cfg",
             "tests/yaml/table_structure_recognizer.yaml",
             "--lite",
+            "--figure",
         ],
     )
     main.main()
@@ -193,6 +194,7 @@ def test_run_dir_json(monkeypatch, tmp_path):
             str(tmp_path),
             "-f",
             "json",
+            "--figure",
         ],
     )
     main.main()

diff --git a/tests/test_export.py b/tests/test_export.py
@@ -583,7 +583,7 @@ def test_export(tmp_path):
     with open(out_path, "r") as f:
         assert json.load(f) == document_analyzer.model_dump()
 
-    document_analyzer.to_csv(tmp_path / "document_analyzer.csv")
+    document_analyzer.to_csv(tmp_path / "document_analyzer.csv", img=img)
     document_analyzer.to_html(tmp_path / "document_analyzer.html", img=img)
     document_analyzer.to_markdown(tmp_path / "document_analyzer.md", img=img)