From 3747ceada6c78f526aa846b5b90f3e665c64765e Mon Sep 17 00:00:00 2001 From: kotaro-kinoshita Date: Thu, 28 Nov 2024 23:18:05 +0900 Subject: [PATCH] add text normalize --- src/yomitoku/text_recognizer.py | 3 +++ uv.lock | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/yomitoku/text_recognizer.py b/src/yomitoku/text_recognizer.py index 38d6ff1..082d394 100644 --- a/src/yomitoku/text_recognizer.py +++ b/src/yomitoku/text_recognizer.py @@ -2,6 +2,7 @@ import numpy as np import torch +import unicodedata from pydantic import conlist from .base import BaseModelCatalog, BaseModule, BaseSchema @@ -72,6 +73,8 @@ def preprocess(self, img, polygons): def postprocess(self, p, points): pred, score = self.tokenizer.decode(p) + pred = [unicodedata.normalize("NFKC", x) for x in pred] + directions = [] for point in points: point = np.array(point) diff --git a/uv.lock b/uv.lock index b6372e6..77e07c7 100644 --- a/uv.lock +++ b/uv.lock @@ -1530,7 +1530,7 @@ wheels = [ [[package]] name = "yomitoku" -version = "0.4.1" +version = "0.5.1" source = { editable = "." } dependencies = [ { name = "huggingface-hub" },