OCR Server

將您的 iPhone 變成強大的本機 OCR 伺服器，採用 Apple 的 Vision Framework 技術。無需雲端依賴，無限制使用，完全隱私保護。

使用方法

啟動應用程式，伺服器將自動啟動
從同一網路上的任何裝置存取顯示的 IP 位址
上傳圖像即可獲得文字辨識結果
透過 API 將服務整合到您的應用程式中
為確保應用程式持續運行不中斷，請啟用 iOS 引導使用模式並保持螢幕開啟

OCR 測試：在您的電腦上開啟網頁瀏覽器，瀏覽應用程式顯示的 IP 位址來執行 OCR 測試。

API 範例 - 透過 upload API 上傳圖像：

curl -H "Accept: application/json" \
  -X POST http://<您的IP>:8000/upload \
  -F "file=@01.png"

Python 上傳範例：

import requests

url = "http://10.0.1.11:8000/upload"  # 替換為您的 IP 位址
file_path = "01.png"

with open(file_path, "rb") as f:
    files = {"file": f}
    headers = {"Accept": "application/json"}
    response = requests.post(url, files=files, headers=headers)

print("status code:", response.status_code)
print("response:", response.text)

JSON 回應格式如下：

{
  "success": true,
  "message": "File uploaded successfully",
  "ocr_result": "Hello\nWorld",
  "image_height": 648,
  "image_width": 1247,
  "ocr_boxes": [
    {
      "text": "Hello",
      "x": 429.6554479416482,
      "y": 268.0000001076923,
      "w": 201.83814102564105,
      "h": 72,
      "rect": {
        "topLeft_x": 429.6554479416482,
        "topLeft_y": 268.0000001076923,
        "topRight_x": 631.4935889672893,
        "topRight_y": 268.0000001076923,
        "bottomRight_x": 631.4935889672893,
        "bottomRight_y": 340.0000001076923,
        "bottomLeft_x": 429.6554479416482,
        "bottomLeft_y": 340.0000001076923
      }
    },
    {
      "text": "World",
      "x": 421.6618595738782,
      "y": 417.99999971428576,
      "w": 251.79807692307696,
      "h": 80,
      "rect": {
        "topLeft_x": 421.6618595738782,
        "topLeft_y": 417.99999971428576,
        "topRight_x": 673.4599364969552,
        "topRight_y": 417.99999971428576,
        "bottomRight_x": 673.4599364969552,
        "bottomRight_y": 497.99999971428576,
        "bottomLeft_x": 421.6618595738782,
        "bottomLeft_y": 497.99999971428576
      }
    }
  ]
}

image_width 和 image_height 代表圖像的寬度和高度（以像素為單位）， x 和 y 代表文字邊界框的左上角原點（以像素為單位）， w 和 h 代表文字邊界框的寬度和高度（以像素為單位）， rect 提供偵測到的文字區域四個角的座標，並保留其原始方向（非軸對齊）。

Python 範例 – 使用 ocr_boxes 資訊繪製文字邊界框：

#
# pip3 install requests pillow opencv-python numpy
#

import os
import sys
import requests
from PIL import Image, ImageDraw, ImageFont, ImageOps
import numpy as np
import cv2

url = "http://10.0.1.11:8000/upload"  # Replace with your IP address
file_path = "01.png"

# ===== Select font (supports Chinese and English), font size auto-scales with box height =====
def pick_font(box_h_px: float):
    font_candidates = [
        # macOS
        "/System/Library/Fonts/PingFang.ttc",
        "/System/Library/Fonts/STHeiti Light.ttc",
        # Windows
        r"C:\Windows\Fonts\msyh.ttc",
        r"C:\Windows\Fonts\msjh.ttc",
        r"C:\Windows\Fonts\arialuni.ttf",
        # Noto
        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
        "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
    ]
    size = max(10, int(box_h_px * 0.25))  # Small font size = 25% of box height (minimum 10pt)
    for path in font_candidates:
        if os.path.exists(path):
            try:
                return ImageFont.truetype(path, size=size)
            except Exception:
                pass
    return ImageFont.load_default()

# ===== Draw box and small text =====
def draw_boxes(img_pil: Image.Image, boxes, line_thickness: int = 5) -> Image.Image:
    draw = ImageDraw.Draw(img_pil)
    for b in boxes:
        try:
            x = float(b["x"]); y = float(b["y"])
            w = float(b["w"]); h = float(b["h"])
            text = str(b.get("text", ""))
        except Exception:
            continue

        # Red bounding box
        x2, y2 = x + w, y + h
        draw.rectangle([x, y, x2, y2], outline=(255, 0, 0), width=line_thickness)

        # Top-right label
        font = pick_font(h)
        # Text size
        # textbbox returns (l, t, r, b)
        l, t, r, b = draw.textbbox((0, 0), text, font=font)
        tw, th = (r - l), (b - t)
        pad = max(2, int(h * 0.06))

        # Align label to top-right, not exceeding box or image edge
        tx = int(max(0, min(x2 - tw - pad, img_pil.width - tw - pad)))
        ty = int(max(0, min(y + pad, img_pil.height - th - pad)))

        # White background
        draw.rectangle([tx - pad, ty - pad, tx + tw + pad, ty + th + pad], fill=(255, 255, 255))
        draw.text((tx, ty), text, font=font, fill=(20, 20, 20))
    return img_pil

def main():
    if not os.path.exists(file_path):
        print(f"[ERROR] Image not found: {file_path}", file=sys.stderr)
        sys.exit(1)

    # 1) Upload
    with open(file_path, "rb") as f:
        files = {"file": f}
        headers = {"Accept": "application/json"}
        try:
            response = requests.post(url, files=files, headers=headers, timeout=60)
        except requests.RequestException as e:
            print(f"[ERROR] Request failed: {e}", file=sys.stderr)
            sys.exit(2)

    print("status code:", response.status_code)

    # 2) Check HTTP and JSON
    if response.status_code != 200:
        print("response:", response.text[:500])
        sys.exit(3)

    try:
        data = response.json()
    except ValueError:
        print("[ERROR] Not JSON response")
        print("response:", response.text[:500])
        sys.exit(4)

    if not data.get("success", False):
        print("[ERROR] Server returned failure:", data)
        sys.exit(5)

    print("response ok")

    # 3) Load original image (using PIL)
    img_pil = Image.open(file_path)
    img_pil = ImageOps.exif_transpose(img_pil).convert("RGB")

    # If server returns different dimensions (should usually match), use server dimensions
    W = int(data.get("image_width", img_pil.width))
    H = int(data.get("image_height", img_pil.height))
    if (W, H) != (img_pil.width, img_pil.height):
        img_pil = img_pil.resize((W, H), Image.BICUBIC)

    boxes = data.get("ocr_boxes", [])
    img_pil = draw_boxes(img_pil, boxes)

    # 4) Display
    img_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
    cv2.imshow("OCR Preview", img_cv)
    print("Press any key on the image window to exit...")
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

範例輸出：

功能特色

採用 Apple Vision Framework 的高精度 OCR
支援多語言自動偵測
透過網頁介面上傳並在數秒內獲得 OCR 結果
JSON API 便於整合到應用程式中
100% 本機處理，無雲端依賴，完全隱私保護

使用場景

無需雲端服務的本機 OCR
在同一網路內的裝置間共享 OCR 服務
使用多台 iPhone 建構 OCR 處理叢集

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

OCR Server

使用方法

功能特色

使用場景

FilesExpand file tree

README.zh-TW.md

Latest commit

History

README.zh-TW.md

File metadata and controls

OCR Server

使用方法

功能特色

使用場景