-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 6903468
Showing
35 changed files
with
3,140 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
--- | ||
name-template: "v$RESOLVED_VERSION" | ||
tag-template: "v$RESOLVED_VERSION" | ||
categories: # categorize | ||
- title: "🚀 機能追加" | ||
labels: | ||
- "enhancement" | ||
- title: "🔧 リファクタ" | ||
labels: | ||
- "refactoring" | ||
- title: "🐛 バグ修正" | ||
labels: | ||
- "bug" | ||
- title: "✅ テスト" | ||
labels: | ||
- "test" | ||
- title: "📖 ドキュメント" | ||
labels: | ||
- "documentation" | ||
change-template: "- $TITLE @$AUTHOR (#$NUMBER)" | ||
change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. | ||
autolabeler: # auto add labels based on branches or titles | ||
- label: "enhancement" | ||
branch: | ||
- '/feature\/.+/' | ||
- '/feat\/.+/' | ||
- label: "release" | ||
branch: | ||
- '/release\/.+/' | ||
- label: "refactoring" | ||
branch: | ||
- '/refactor\/.+/' | ||
title: | ||
- "/refactor/i" | ||
- label: "bug" | ||
branch: | ||
- '/fix\/.+/' | ||
- '/bug\/.+/' | ||
title: | ||
- "/fix/i" | ||
- "/bug/i" | ||
- label: "test" | ||
branch: | ||
- '/test\/.+/' | ||
- label: "documentation" | ||
branch: | ||
- '/doc\/.+/' | ||
title: | ||
- "/doc/i" | ||
version-resolver: # resolve next version based on tags ($RESOLVED_VERSION) | ||
major: | ||
labels: | ||
- "breaking" | ||
minor: | ||
labels: | ||
- "enhancement" | ||
default: patch | ||
template: | | ||
## 変更 | ||
$CHANGES | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
name: Publish to PyPI | ||
|
||
on: | ||
release: | ||
types: [published] | ||
|
||
jobs: | ||
publish: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
with: | ||
fetch-depth: 0 | ||
tags: true | ||
- name: Set up Python 3.9 | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: "3.9" | ||
- name: Install uv | ||
uses: astral-sh/setup-uv@v3 | ||
with: | ||
enable-cache: true | ||
- name: Install dependencies | ||
run: uv sync --dev | ||
- name: build | ||
run: uv build | ||
- name: Publish to PyPI | ||
run: uv publish --publish-url https://test.pypi.org/legacy/ --token ${{ secrets.PYPI_TEST_TOKEN }} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
--- | ||
name: Release Drafter | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request_target: | ||
types: [opened, reopened, synchronize] | ||
|
||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
update_release_draft: | ||
permissions: | ||
contents: write | ||
pull-requests: write | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: release-drafter/release-drafter@v6 | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# This workflow will install Python dependencies, run tests and lint with a single version of Python | ||
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python | ||
|
||
name: Python Lint | ||
|
||
on: | ||
pull_request: | ||
branches: [ "main" ] | ||
|
||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
lint-and-test: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Set up Python 3.9 | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: "3.9" | ||
- name: Install uv | ||
uses: astral-sh/setup-uv@v3 | ||
with: | ||
enable-cache: true | ||
- name: Install dependencies | ||
run: uv sync --dev | ||
- name: Run linter | ||
run: uv run ruff check | ||
- name: Run tests | ||
run: uv run pytest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Python-generated files | ||
__pycache__/ | ||
*.py[oc] | ||
build/ | ||
dist/ | ||
wheels/ | ||
*.egg-info | ||
|
||
# Virtual environments | ||
.venv | ||
|
||
# Dev tools cache | ||
.ruff_cache | ||
.pytest_cache | ||
|
||
dataset/ | ||
weights/ | ||
results/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
3.9 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
DEVICE: "cuda" | ||
|
||
DETECTION: | ||
WEIGHTS: "weights/dbnet_res50_20241024.pth" | ||
BACKBONE: | ||
NAME: "resnet50" | ||
DILATION: True | ||
|
||
DECODER: | ||
IN_CHANNELS: [256, 512, 1024, 2048] | ||
HIDDEN_DIM: 256 | ||
ADAPTIVE: True | ||
SERIAL: False | ||
SMOOTH: False | ||
K: 50 | ||
|
||
DATA: | ||
SHORTEST_SIZE: 768 | ||
LIMIT_SIZE: 1920 | ||
|
||
POST_PROCESS: | ||
MIN_SIZE: 2 | ||
THRESH: 0.3 | ||
BOX_THRESH: 0.5 | ||
MAX_CANDIDATES: 1500 | ||
UNCLIP_RATIO: 2.5 | ||
|
||
VISUALIZE: | ||
COLOR: [0, 255, 0] #RGB | ||
HEATMAP: False | ||
|
||
RECOGNITION: | ||
WEIGHTS: weights/checkpoint_4_79999.pth | ||
CHARSET: "resource/charset.txt" | ||
|
||
DATA: | ||
NUM_WORKERS: 4 | ||
BATCH_SIZE: 128 | ||
IMAGE_SIZE: [32, 800] | ||
|
||
MODEL: | ||
MAX_LEN: 100 | ||
PATCH_SIZE: [8, 8] | ||
HIDDEN_DIM: 512 | ||
ENC_NUM_HEADS: 8 | ||
ENC_MLP_RATIO: 4 | ||
ENC_DEPTH: 12 | ||
DEC_NUM_HEADS: 8 | ||
DEC_MLP_RATIO: 4 | ||
DEC_DEPTH: 1 | ||
DECODE_AR: True | ||
REFINE_ITERS: 1 | ||
DROPOUT: 0.1 | ||
|
||
VISUALIZE: | ||
FONT: resource/MPLUS1p-Medium.ttf | ||
COLOR: [0, 0, 255] #RGB | ||
FONT_SIZE: 12 | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04 | ||
|
||
ENV TZ=Asia/Tokyo | ||
ENV DEBIAN_FRONTEND=noninteractive | ||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone | ||
|
||
RUN apt-get -y update && apt-get -y upgrade | ||
RUN apt-get install -y curl wget unzip vim | ||
|
||
RUN apt-get -y install libopencv-dev build-essential clang | ||
|
||
ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||
|
||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh | ||
|
||
|
||
COPY pyproject.toml . | ||
|
||
ENV UV_SYSTEM_PYTHON=true \ | ||
UV_COMPILE_BYTECODE=1 \ | ||
UV_CACHE_DIR=/root/.cache/uv \ | ||
UV_LINK_MODE=copy | ||
|
||
ENV PATH="/root/.cargo/bin/:$PATH" | ||
|
||
RUN --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ | ||
--mount=type=bind,source=.python-version,target=.python-version \ | ||
--mount=type=bind,source=README.md,target=README.md \ | ||
uv sync | ||
RUN . .venv/bin/activate | ||
|
||
WORKDIR /workspace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import argparse | ||
import os | ||
import cv2 | ||
import json | ||
from yomitoku import OCR | ||
|
||
|
||
def main(args): | ||
filename = os.path.basename(args.image) | ||
name, ext = os.path.splitext(filename) | ||
|
||
ocr = OCR(args.config, visualize=args.vis) | ||
preds, vis = ocr(args.image) | ||
|
||
os.makedirs(args.outdir, exist_ok=True) | ||
|
||
if vis is not None: | ||
out_vis = os.path.join(args.outdir, f"{name}_visualize.jpg") | ||
cv2.imwrite(out_vis, vis) | ||
|
||
with open(os.path.join(args.outdir, f"{name}_result.json"), "w") as f: | ||
json.dump( | ||
preds, | ||
f, | ||
ensure_ascii=False, | ||
indent=4, | ||
sort_keys=True, | ||
separators=(",", ": "), | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--config", type=str, default="configs/ocr.yaml") | ||
parser.add_argument("--image", type=str, default="dataset/00000528_0894389_62.jpg") | ||
parser.add_argument("--vis", action="store_true") | ||
parser.add_argument("--outdir", type=str, default="results") | ||
args = parser.parse_args() | ||
|
||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
[build-system] | ||
requires = ["hatchling", "hatch-vcs"] | ||
build-backend = "hatchling.build" | ||
|
||
[project] | ||
name = "yomitoku" | ||
dynamic = ["version"] | ||
authors = [{name = "Kotaro Kinoshita"}] | ||
description = "Yomitoku is a document image analysis package powered by AI technology for the Japanese language." | ||
readme = "README.md" | ||
requires-python = ">=3.9" | ||
keywords = ["Japanese", "OCR", "Deep Learning"] | ||
dependencies = [ | ||
"omegaconf>=2.3.0", | ||
"opencv-python>=4.10.0.84", | ||
"pyclipper>=1.3.0.post6", | ||
"shapely>=2.0.6", | ||
"timm>=1.0.11", | ||
"torch>=2.5.0", | ||
"torchvision>=0.20.0", | ||
] | ||
|
||
[tool.hatch.version] | ||
source = "vcs" | ||
tag-pattern = "^v(?P<version>[0-9]+\\.[0-9]+\\.[0-9]+)$" | ||
|
||
|
||
[tool.hatch.version.raw-options] | ||
local_scheme = "no-local-version" | ||
version_scheme = "no-guess-dev" | ||
|
||
[tool.uv] | ||
dev-dependencies = [ | ||
"pytest-cov>=5.0.0", | ||
"pytest>=8.3.3", | ||
"ruff>=0.7.0", | ||
] | ||
|
Binary file not shown.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from importlib.metadata import version | ||
|
||
from .ocr import OCR | ||
|
||
__all__ = ["OCR"] | ||
__version__ = version(__package__) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from torchvision import transforms as T | ||
|
||
|
||
from torch.utils.data import Dataset | ||
|
||
from yomitoku.data.functions import ( | ||
resize_with_padding, | ||
extract_roi_with_perspective, | ||
) | ||
|
||
|
||
class ParseqDataset(Dataset): | ||
def __init__(self, cfg, img, quads): | ||
self.img = img | ||
self.quads = quads | ||
self.cfg = cfg | ||
self.img = img[:, :, ::-1] | ||
|
||
self.transform = T.Compose( | ||
[ | ||
T.ToTensor(), | ||
T.Normalize(0.5, 0.5), | ||
] | ||
) | ||
|
||
def __len__(self): | ||
return len(self.quads) | ||
|
||
def __getitem__(self, index): | ||
polygon = self.quads[index] | ||
roi_img = extract_roi_with_perspective(self.img, polygon) | ||
resized = resize_with_padding(roi_img, self.cfg.DATA.IMAGE_SIZE) | ||
tensor = self.transform(resized) | ||
|
||
return tensor |
Oops, something went wrong.