first commit

kotaro-kinoshita · Oct 30, 2024 · 6903468 · 6903468
commit 6903468
Show file tree

Hide file tree

Showing 35 changed files with 3,140 additions and 0 deletions.
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
@@ -0,0 +1,62 @@
+---
+  name-template: "v$RESOLVED_VERSION"
+  tag-template: "v$RESOLVED_VERSION"
+  categories: # categorize
+    - title: "🚀 機能追加"
+      labels:
+        - "enhancement"
+    - title: "🔧  リファクタ"
+      labels:
+        - "refactoring"
+    - title: "🐛 バグ修正"
+      labels:
+        - "bug"
+    - title: "✅ テスト"
+      labels:
+        - "test"
+    - title: "📖 ドキュメント"
+      labels:
+        - "documentation"
+  change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
+  change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
+  autolabeler: # auto add labels based on branches or titles
+    - label: "enhancement"
+      branch:
+        - '/feature\/.+/'
+        - '/feat\/.+/'
+    - label: "release"
+      branch:
+        - '/release\/.+/'
+    - label: "refactoring"
+      branch:
+        - '/refactor\/.+/'
+      title:
+        - "/refactor/i"
+    - label: "bug"
+      branch:
+        - '/fix\/.+/'
+        - '/bug\/.+/'
+      title:
+        - "/fix/i"
+        - "/bug/i"
+    - label: "test"
+      branch:
+        - '/test\/.+/'
+    - label: "documentation"
+      branch:
+        - '/doc\/.+/'
+      title:
+        - "/doc/i"
+  version-resolver: # resolve next version based on tags ($RESOLVED_VERSION)
+    major:
+      labels:
+        - "breaking"
+    minor:
+      labels:
+        - "enhancement"
+    default: patch
+  template: |
+    ## 変更
+  
+    $CHANGES
+  
diff --git a/.github/workflows/build-and-publish.yml b/.github/workflows/build-and-publish.yml
@@ -0,0 +1,29 @@
+name: Publish to PyPI
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0 
+          tags: true  
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.9"
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: true
+      - name: Install dependencies
+        run: uv sync --dev
+      - name: build
+        run: uv build
+      - name: Publish to PyPI
+        run: uv publish  --publish-url https://test.pypi.org/legacy/ --token ${{ secrets.PYPI_TEST_TOKEN }}
+
diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
@@ -0,0 +1,23 @@
+---
+name: Release Drafter
+
+on:
+  push:
+    branches:
+      - main
+  pull_request_target: 
+    types: [opened, reopened, synchronize]
+
+permissions:
+  contents: read
+
+jobs:
+  update_release_draft:
+    permissions:
+      contents: write
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: release-drafter/release-drafter@v6
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml
@@ -0,0 +1,31 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python Lint
+
+on:
+  pull_request:
+    branches: [ "main" ]
+
+permissions:
+  contents: read
+
+jobs:
+  lint-and-test:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.9
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.9"
+    - name: Install uv
+      uses: astral-sh/setup-uv@v3
+      with:
+        enable-cache: true
+    - name: Install dependencies
+      run: uv sync --dev
+    - name: Run linter
+      run: uv run ruff check
+    - name: Run tests
+      run: uv run pytest
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,18 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+
+# Virtual environments
+.venv
+
+# Dev tools cache
+.ruff_cache
+.pytest_cache
+
+dataset/
+weights/
+results/
diff --git a/.python-version b/.python-version
@@ -0,0 +1 @@
+3.9
diff --git a/README.md b/README.md
diff --git a/configs/ocr.yaml b/configs/ocr.yaml
@@ -0,0 +1,61 @@
+DEVICE: "cuda"
+
+DETECTION:
+  WEIGHTS: "weights/dbnet_res50_20241024.pth"
+  BACKBONE:
+    NAME: "resnet50"
+    DILATION: True
+
+  DECODER:
+    IN_CHANNELS: [256, 512, 1024, 2048]
+    HIDDEN_DIM: 256
+    ADAPTIVE: True
+    SERIAL: False
+    SMOOTH: False
+    K: 50
+
+  DATA:
+    SHORTEST_SIZE: 768
+    LIMIT_SIZE: 1920
+
+  POST_PROCESS:
+    MIN_SIZE: 2
+    THRESH: 0.3
+    BOX_THRESH: 0.5
+    MAX_CANDIDATES: 1500
+    UNCLIP_RATIO: 2.5
+
+  VISUALIZE:
+    COLOR: [0, 255, 0] #RGB
+    HEATMAP: False
+
+RECOGNITION:
+  WEIGHTS: weights/checkpoint_4_79999.pth
+  CHARSET: "resource/charset.txt"
+
+  DATA:
+    NUM_WORKERS: 4
+    BATCH_SIZE: 128
+    IMAGE_SIZE: [32, 800]
+
+  MODEL:
+    MAX_LEN: 100
+    PATCH_SIZE: [8, 8]
+    HIDDEN_DIM: 512
+    ENC_NUM_HEADS: 8
+    ENC_MLP_RATIO: 4
+    ENC_DEPTH: 12
+    DEC_NUM_HEADS: 8
+    DEC_MLP_RATIO: 4
+    DEC_DEPTH: 1
+    DECODE_AR: True
+    REFINE_ITERS: 1
+    DROPOUT: 0.1
+
+  VISUALIZE:
+    FONT: resource/MPLUS1p-Medium.ttf
+    COLOR: [0, 0, 255] #RGB
+    FONT_SIZE: 12
+
+
+
diff --git a/dockerfile b/dockerfile
@@ -0,0 +1,32 @@
+FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04
+
+ENV TZ=Asia/Tokyo
+ENV DEBIAN_FRONTEND=noninteractive
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+RUN apt-get -y update && apt-get -y upgrade
+RUN apt-get install -y curl wget unzip vim
+
+RUN apt-get -y install libopencv-dev build-essential clang
+
+ENV UV_INDEX_STRATEGY="unsafe-best-match"
+
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+
+
+COPY pyproject.toml .
+
+ENV UV_SYSTEM_PYTHON=true \
+    UV_COMPILE_BYTECODE=1 \
+    UV_CACHE_DIR=/root/.cache/uv \
+    UV_LINK_MODE=copy
+
+ENV PATH="/root/.cargo/bin/:$PATH"
+
+RUN --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=.python-version,target=.python-version \
+    --mount=type=bind,source=README.md,target=README.md \
+    uv sync
+RUN . .venv/bin/activate
+
+WORKDIR /workspace
diff --git a/examples/simple_ocr.py b/examples/simple_ocr.py
@@ -0,0 +1,40 @@
+import argparse
+import os
+import cv2
+import json
+from yomitoku import OCR
+
+
+def main(args):
+    filename = os.path.basename(args.image)
+    name, ext = os.path.splitext(filename)
+
+    ocr = OCR(args.config, visualize=args.vis)
+    preds, vis = ocr(args.image)
+
+    os.makedirs(args.outdir, exist_ok=True)
+
+    if vis is not None:
+        out_vis = os.path.join(args.outdir, f"{name}_visualize.jpg")
+        cv2.imwrite(out_vis, vis)
+
+    with open(os.path.join(args.outdir, f"{name}_result.json"), "w") as f:
+        json.dump(
+            preds,
+            f,
+            ensure_ascii=False,
+            indent=4,
+            sort_keys=True,
+            separators=(",", ": "),
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, default="configs/ocr.yaml")
+    parser.add_argument("--image", type=str, default="dataset/00000528_0894389_62.jpg")
+    parser.add_argument("--vis", action="store_true")
+    parser.add_argument("--outdir", type=str, default="results")
+    args = parser.parse_args()
+
+    main(args)
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "yomitoku"
+dynamic = ["version"]
+authors = [{name = "Kotaro Kinoshita"}]
+description = "Yomitoku is a document image analysis package powered by AI technology for the Japanese language."
+readme = "README.md"
+requires-python = ">=3.9"
+keywords = ["Japanese", "OCR", "Deep Learning"]
+dependencies = [
+    "omegaconf>=2.3.0",
+    "opencv-python>=4.10.0.84",
+    "pyclipper>=1.3.0.post6",
+    "shapely>=2.0.6",
+    "timm>=1.0.11",
+    "torch>=2.5.0",
+    "torchvision>=0.20.0",
+]
+
+[tool.hatch.version]
+source = "vcs"
+tag-pattern = "^v(?P<version>[0-9]+\\.[0-9]+\\.[0-9]+)$"
+
+
+[tool.hatch.version.raw-options]
+local_scheme = "no-local-version"
+version_scheme = "no-guess-dev"
+
+[tool.uv]
+dev-dependencies = [
+    "pytest-cov>=5.0.0",
+    "pytest>=8.3.3",
+    "ruff>=0.7.0",
+]
+
diff --git a/resource/MPLUS1p-Medium.ttf b/resource/MPLUS1p-Medium.ttf
diff --git a/resource/charset.txt b/resource/charset.txt
diff --git a/src/yomitoku/__init__.py b/src/yomitoku/__init__.py
@@ -0,0 +1,6 @@
+from importlib.metadata import version
+
+from .ocr import OCR
+
+__all__ = ["OCR"]
+__version__ = version(__package__)
diff --git a/src/yomitoku/data/__init__.py b/src/yomitoku/data/__init__.py
diff --git a/src/yomitoku/data/dataset.py b/src/yomitoku/data/dataset.py
@@ -0,0 +1,35 @@
+from torchvision import transforms as T
+
+
+from torch.utils.data import Dataset
+
+from yomitoku.data.functions import (
+    resize_with_padding,
+    extract_roi_with_perspective,
+)
+
+
+class ParseqDataset(Dataset):
+    def __init__(self, cfg, img, quads):
+        self.img = img
+        self.quads = quads
+        self.cfg = cfg
+        self.img = img[:, :, ::-1]
+
+        self.transform = T.Compose(
+            [
+                T.ToTensor(),
+                T.Normalize(0.5, 0.5),
+            ]
+        )
+
+    def __len__(self):
+        return len(self.quads)
+
+    def __getitem__(self, index):
+        polygon = self.quads[index]
+        roi_img = extract_roi_with_perspective(self.img, polygon)
+        resized = resize_with_padding(roi_img, self.cfg.DATA.IMAGE_SIZE)
+        tensor = self.transform(resized)
+
+        return tensor