OCR Server

将您的 iPhone 变成强大的本地 OCR 服务器，采用 Apple 的 Vision Framework 技术。无需云端依赖，无限制使用，完全隐私保护。

使用方法

启动应用程序，服务器将自动启动
从同一网络上的任何设备访问显示的 IP 地址
上传图像即可获得文字识别结果
通过 API 将服务集成到您的应用程序中
为确保应用程序持续运行不中断，请启用 iOS 引导式访问模式并保持屏幕开启

OCR 测试：在您的计算机上打开网页浏览器，浏览应用程序显示的 IP 地址来执行 OCR 测试。

API 示例 - 通过 upload API 上传图像：

curl -H "Accept: application/json" \
  -X POST http://<您的IP>:8000/upload \
  -F "file=@01.png"

Python 上传示例：

import requests

url = "http://10.0.1.11:8000/upload"  # 替换为您的 IP 地址
file_path = "01.png"

with open(file_path, "rb") as f:
    files = {"file": f}
    headers = {"Accept": "application/json"}
    response = requests.post(url, files=files, headers=headers)

print("status code:", response.status_code)
print("response:", response.text)

JSON 响应格式如下：

{
  "success": true,
  "message": "File uploaded successfully",
  "ocr_result": "Hello\nWorld",
  "image_height": 648,
  "image_width": 1247,
  "ocr_boxes": [
    {
      "text": "Hello",
      "x": 429.6554479416482,
      "y": 268.0000001076923,
      "w": 201.83814102564105,
      "h": 72,
      "rect": {
        "topLeft_x": 429.6554479416482,
        "topLeft_y": 268.0000001076923,
        "topRight_x": 631.4935889672893,
        "topRight_y": 268.0000001076923,
        "bottomRight_x": 631.4935889672893,
        "bottomRight_y": 340.0000001076923,
        "bottomLeft_x": 429.6554479416482,
        "bottomLeft_y": 340.0000001076923
      }
    },
    {
      "text": "World",
      "x": 421.6618595738782,
      "y": 417.99999971428576,
      "w": 251.79807692307696,
      "h": 80,
      "rect": {
        "topLeft_x": 421.6618595738782,
        "topLeft_y": 417.99999971428576,
        "topRight_x": 673.4599364969552,
        "topRight_y": 417.99999971428576,
        "bottomRight_x": 673.4599364969552,
        "bottomRight_y": 497.99999971428576,
        "bottomLeft_x": 421.6618595738782,
        "bottomLeft_y": 497.99999971428576
      }
    }
  ]
}

image_width 和 image_height 代表图像的宽度和高度（以像素为单位）， x 和 y 代表文字边界框的左上角原点（以像素为单位）， w 和 h 代表文字边界框的宽度和高度（以像素为单位）， rect 提供检测到的文本区域四个角的坐标，并保留其原始方向（非轴对齐）。

Python 示例 – 使用 ocr_boxes 信息绘制文字边界框：

#
# pip3 install requests pillow opencv-python numpy
#

import os
import sys
import requests
from PIL import Image, ImageDraw, ImageFont, ImageOps
import numpy as np
import cv2

url = "http://10.0.1.11:8000/upload"  # Replace with your IP address
file_path = "01.png"

# ===== Select font (supports Chinese and English), font size auto-scales with box height =====
def pick_font(box_h_px: float):
    font_candidates = [
        # macOS
        "/System/Library/Fonts/PingFang.ttc",
        "/System/Library/Fonts/STHeiti Light.ttc",
        # Windows
        r"C:\Windows\Fonts\msyh.ttc",
        r"C:\Windows\Fonts\msjh.ttc",
        r"C:\Windows\Fonts\arialuni.ttf",
        # Noto
        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
        "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
    ]
    size = max(10, int(box_h_px * 0.25))  # Small font size = 25% of box height (minimum 10pt)
    for path in font_candidates:
        if os.path.exists(path):
            try:
                return ImageFont.truetype(path, size=size)
            except Exception:
                pass
    return ImageFont.load_default()

# ===== Draw box and small text =====
def draw_boxes(img_pil: Image.Image, boxes, line_thickness: int = 5) -> Image.Image:
    draw = ImageDraw.Draw(img_pil)
    for b in boxes:
        try:
            x = float(b["x"]); y = float(b["y"])
            w = float(b["w"]); h = float(b["h"])
            text = str(b.get("text", ""))
        except Exception:
            continue

        # Red bounding box
        x2, y2 = x + w, y + h
        draw.rectangle([x, y, x2, y2], outline=(255, 0, 0), width=line_thickness)

        # Top-right label
        font = pick_font(h)
        # Text size
        # textbbox returns (l, t, r, b)
        l, t, r, b = draw.textbbox((0, 0), text, font=font)
        tw, th = (r - l), (b - t)
        pad = max(2, int(h * 0.06))

        # Align label to top-right, not exceeding box or image edge
        tx = int(max(0, min(x2 - tw - pad, img_pil.width - tw - pad)))
        ty = int(max(0, min(y + pad, img_pil.height - th - pad)))

        # White background
        draw.rectangle([tx - pad, ty - pad, tx + tw + pad, ty + th + pad], fill=(255, 255, 255))
        draw.text((tx, ty), text, font=font, fill=(20, 20, 20))
    return img_pil

def main():
    if not os.path.exists(file_path):
        print(f"[ERROR] Image not found: {file_path}", file=sys.stderr)
        sys.exit(1)

    # 1) Upload
    with open(file_path, "rb") as f:
        files = {"file": f}
        headers = {"Accept": "application/json"}
        try:
            response = requests.post(url, files=files, headers=headers, timeout=60)
        except requests.RequestException as e:
            print(f"[ERROR] Request failed: {e}", file=sys.stderr)
            sys.exit(2)

    print("status code:", response.status_code)

    # 2) Check HTTP and JSON
    if response.status_code != 200:
        print("response:", response.text[:500])
        sys.exit(3)

    try:
        data = response.json()
    except ValueError:
        print("[ERROR] Not JSON response")
        print("response:", response.text[:500])
        sys.exit(4)

    if not data.get("success", False):
        print("[ERROR] Server returned failure:", data)
        sys.exit(5)

    print("response ok")

    # 3) Load original image (using PIL)
    img_pil = Image.open(file_path)
    img_pil = ImageOps.exif_transpose(img_pil).convert("RGB")

    # If server returns different dimensions (should usually match), use server dimensions
    W = int(data.get("image_width", img_pil.width))
    H = int(data.get("image_height", img_pil.height))
    if (W, H) != (img_pil.width, img_pil.height):
        img_pil = img_pil.resize((W, H), Image.BICUBIC)

    boxes = data.get("ocr_boxes", [])
    img_pil = draw_boxes(img_pil, boxes)

    # 4) Display
    img_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
    cv2.imshow("OCR Preview", img_cv)
    print("Press any key on the image window to exit...")
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

示例输出：

功能特色

采用 Apple Vision Framework 的高精度 OCR
支持多语言自动检测
通过网页界面上传并在数秒内获得 OCR 结果
JSON API 便于集成到应用程序中
100% 本地处理，无云端依赖，完全隐私保护

使用场景

无需云端服务的本地 OCR
在同一网络内的设备间共享 OCR 服务
使用多台 iPhone 构建 OCR 处理集群

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

OCR Server

使用方法

功能特色

使用场景

FilesExpand file tree

README.zh-CN.md

Latest commit

History

README.zh-CN.md

File metadata and controls

OCR Server

使用方法

功能特色

使用场景