Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
kotaro-kinoshita committed Oct 30, 2024
0 parents commit 6903468
Show file tree
Hide file tree
Showing 35 changed files with 3,140 additions and 0 deletions.
62 changes: 62 additions & 0 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
---
name-template: "v$RESOLVED_VERSION"
tag-template: "v$RESOLVED_VERSION"
categories: # categorize
- title: "🚀 機能追加"
labels:
- "enhancement"
- title: "🔧 リファクタ"
labels:
- "refactoring"
- title: "🐛 バグ修正"
labels:
- "bug"
- title: "✅ テスト"
labels:
- "test"
- title: "📖 ドキュメント"
labels:
- "documentation"
change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
autolabeler: # auto add labels based on branches or titles
- label: "enhancement"
branch:
- '/feature\/.+/'
- '/feat\/.+/'
- label: "release"
branch:
- '/release\/.+/'
- label: "refactoring"
branch:
- '/refactor\/.+/'
title:
- "/refactor/i"
- label: "bug"
branch:
- '/fix\/.+/'
- '/bug\/.+/'
title:
- "/fix/i"
- "/bug/i"
- label: "test"
branch:
- '/test\/.+/'
- label: "documentation"
branch:
- '/doc\/.+/'
title:
- "/doc/i"
version-resolver: # resolve next version based on tags ($RESOLVED_VERSION)
major:
labels:
- "breaking"
minor:
labels:
- "enhancement"
default: patch
template: |
## 変更
$CHANGES
29 changes: 29 additions & 0 deletions .github/workflows/build-and-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Publish to PyPI

on:
release:
types: [published]

jobs:
publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
tags: true
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.9"
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- name: Install dependencies
run: uv sync --dev
- name: build
run: uv build
- name: Publish to PyPI
run: uv publish --publish-url https://test.pypi.org/legacy/ --token ${{ secrets.PYPI_TEST_TOKEN }}

23 changes: 23 additions & 0 deletions .github/workflows/create-release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
name: Release Drafter

on:
push:
branches:
- main
pull_request_target:
types: [opened, reopened, synchronize]

permissions:
contents: read

jobs:
update_release_draft:
permissions:
contents: write
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: release-drafter/release-drafter@v6
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31 changes: 31 additions & 0 deletions .github/workflows/lint-and-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python Lint

on:
pull_request:
branches: [ "main" ]

permissions:
contents: read

jobs:
lint-and-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.9"
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- name: Install dependencies
run: uv sync --dev
- name: Run linter
run: uv run ruff check
- name: Run tests
run: uv run pytest
18 changes: 18 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Python-generated files
__pycache__/
*.py[oc]
build/
dist/
wheels/
*.egg-info

# Virtual environments
.venv

# Dev tools cache
.ruff_cache
.pytest_cache

dataset/
weights/
results/
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.9
Empty file added README.md
Empty file.
61 changes: 61 additions & 0 deletions configs/ocr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
DEVICE: "cuda"

DETECTION:
WEIGHTS: "weights/dbnet_res50_20241024.pth"
BACKBONE:
NAME: "resnet50"
DILATION: True

DECODER:
IN_CHANNELS: [256, 512, 1024, 2048]
HIDDEN_DIM: 256
ADAPTIVE: True
SERIAL: False
SMOOTH: False
K: 50

DATA:
SHORTEST_SIZE: 768
LIMIT_SIZE: 1920

POST_PROCESS:
MIN_SIZE: 2
THRESH: 0.3
BOX_THRESH: 0.5
MAX_CANDIDATES: 1500
UNCLIP_RATIO: 2.5

VISUALIZE:
COLOR: [0, 255, 0] #RGB
HEATMAP: False

RECOGNITION:
WEIGHTS: weights/checkpoint_4_79999.pth
CHARSET: "resource/charset.txt"

DATA:
NUM_WORKERS: 4
BATCH_SIZE: 128
IMAGE_SIZE: [32, 800]

MODEL:
MAX_LEN: 100
PATCH_SIZE: [8, 8]
HIDDEN_DIM: 512
ENC_NUM_HEADS: 8
ENC_MLP_RATIO: 4
ENC_DEPTH: 12
DEC_NUM_HEADS: 8
DEC_MLP_RATIO: 4
DEC_DEPTH: 1
DECODE_AR: True
REFINE_ITERS: 1
DROPOUT: 0.1

VISUALIZE:
FONT: resource/MPLUS1p-Medium.ttf
COLOR: [0, 0, 255] #RGB
FONT_SIZE: 12



32 changes: 32 additions & 0 deletions dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04

ENV TZ=Asia/Tokyo
ENV DEBIAN_FRONTEND=noninteractive
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

RUN apt-get -y update && apt-get -y upgrade
RUN apt-get install -y curl wget unzip vim

RUN apt-get -y install libopencv-dev build-essential clang

ENV UV_INDEX_STRATEGY="unsafe-best-match"

RUN curl -LsSf https://astral.sh/uv/install.sh | sh


COPY pyproject.toml .

ENV UV_SYSTEM_PYTHON=true \
UV_COMPILE_BYTECODE=1 \
UV_CACHE_DIR=/root/.cache/uv \
UV_LINK_MODE=copy

ENV PATH="/root/.cargo/bin/:$PATH"

RUN --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=.python-version,target=.python-version \
--mount=type=bind,source=README.md,target=README.md \
uv sync
RUN . .venv/bin/activate

WORKDIR /workspace
40 changes: 40 additions & 0 deletions examples/simple_ocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import argparse
import os
import cv2
import json
from yomitoku import OCR


def main(args):
filename = os.path.basename(args.image)
name, ext = os.path.splitext(filename)

ocr = OCR(args.config, visualize=args.vis)
preds, vis = ocr(args.image)

os.makedirs(args.outdir, exist_ok=True)

if vis is not None:
out_vis = os.path.join(args.outdir, f"{name}_visualize.jpg")
cv2.imwrite(out_vis, vis)

with open(os.path.join(args.outdir, f"{name}_result.json"), "w") as f:
json.dump(
preds,
f,
ensure_ascii=False,
indent=4,
sort_keys=True,
separators=(",", ": "),
)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default="configs/ocr.yaml")
parser.add_argument("--image", type=str, default="dataset/00000528_0894389_62.jpg")
parser.add_argument("--vis", action="store_true")
parser.add_argument("--outdir", type=str, default="results")
args = parser.parse_args()

main(args)
38 changes: 38 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[build-system]
requires = ["hatchling", "hatch-vcs"]
build-backend = "hatchling.build"

[project]
name = "yomitoku"
dynamic = ["version"]
authors = [{name = "Kotaro Kinoshita"}]
description = "Yomitoku is a document image analysis package powered by AI technology for the Japanese language."
readme = "README.md"
requires-python = ">=3.9"
keywords = ["Japanese", "OCR", "Deep Learning"]
dependencies = [
"omegaconf>=2.3.0",
"opencv-python>=4.10.0.84",
"pyclipper>=1.3.0.post6",
"shapely>=2.0.6",
"timm>=1.0.11",
"torch>=2.5.0",
"torchvision>=0.20.0",
]

[tool.hatch.version]
source = "vcs"
tag-pattern = "^v(?P<version>[0-9]+\\.[0-9]+\\.[0-9]+)$"


[tool.hatch.version.raw-options]
local_scheme = "no-local-version"
version_scheme = "no-guess-dev"

[tool.uv]
dev-dependencies = [
"pytest-cov>=5.0.0",
"pytest>=8.3.3",
"ruff>=0.7.0",
]

Binary file added resource/MPLUS1p-Medium.ttf
Binary file not shown.
1 change: 1 addition & 0 deletions resource/charset.txt

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions src/yomitoku/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from importlib.metadata import version

from .ocr import OCR

__all__ = ["OCR"]
__version__ = version(__package__)
Empty file added src/yomitoku/data/__init__.py
Empty file.
35 changes: 35 additions & 0 deletions src/yomitoku/data/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from torchvision import transforms as T


from torch.utils.data import Dataset

from yomitoku.data.functions import (
resize_with_padding,
extract_roi_with_perspective,
)


class ParseqDataset(Dataset):
def __init__(self, cfg, img, quads):
self.img = img
self.quads = quads
self.cfg = cfg
self.img = img[:, :, ::-1]

self.transform = T.Compose(
[
T.ToTensor(),
T.Normalize(0.5, 0.5),
]
)

def __len__(self):
return len(self.quads)

def __getitem__(self, index):
polygon = self.quads[index]
roi_img = extract_roi_with_perspective(self.img, polygon)
resized = resize_with_padding(roi_img, self.cfg.DATA.IMAGE_SIZE)
tensor = self.transform(resized)

return tensor
Loading

0 comments on commit 6903468

Please sign in to comment.