Skip to content

Latest commit

 

History

History
251 lines (203 loc) · 7.9 KB

File metadata and controls

251 lines (203 loc) · 7.9 KB

OCR Server

將您的 iPhone 變成強大的本機 OCR 伺服器,採用 Apple 的 Vision Framework 技術。 無需雲端依賴,無限制使用,完全隱私保護。

從 App Store 下載

English | 日本語 | 繁體中文 | 简体中文 | 한국어 | Français

image

使用方法

  1. 啟動應用程式,伺服器將自動啟動
  2. 從同一網路上的任何裝置存取顯示的 IP 位址
  3. 上傳圖像即可獲得文字辨識結果
  4. 透過 API 將服務整合到您的應用程式中
  5. 為確保應用程式持續運行不中斷,請啟用 iOS 引導使用模式並保持螢幕開啟
  • OCR 測試:在您的電腦上開啟網頁瀏覽器,瀏覽應用程式顯示的 IP 位址來執行 OCR 測試。

image2

  • API 範例 - 透過 upload API 上傳圖像:

    curl -H "Accept: application/json" \
      -X POST http://<您的IP>:8000/upload \
      -F "file=@01.png"
    
  • Python 上傳範例:

    import requests
    
    url = "http://10.0.1.11:8000/upload"  # 替換為您的 IP 位址
    file_path = "01.png"
    
    with open(file_path, "rb") as f:
        files = {"file": f}
        headers = {"Accept": "application/json"}
        response = requests.post(url, files=files, headers=headers)
    
    print("status code:", response.status_code)
    print("response:", response.text)
  • JSON 回應格式如下:

    {
      "success": true,
      "message": "File uploaded successfully",
      "ocr_result": "Hello\nWorld",
      "image_height": 648,
      "image_width": 1247,
      "ocr_boxes": [
        {
          "text": "Hello",
          "x": 429.6554479416482,
          "y": 268.0000001076923,
          "w": 201.83814102564105,
          "h": 72,
          "rect": {
            "topLeft_x": 429.6554479416482,
            "topLeft_y": 268.0000001076923,
            "topRight_x": 631.4935889672893,
            "topRight_y": 268.0000001076923,
            "bottomRight_x": 631.4935889672893,
            "bottomRight_y": 340.0000001076923,
            "bottomLeft_x": 429.6554479416482,
            "bottomLeft_y": 340.0000001076923
          }
        },
        {
          "text": "World",
          "x": 421.6618595738782,
          "y": 417.99999971428576,
          "w": 251.79807692307696,
          "h": 80,
          "rect": {
            "topLeft_x": 421.6618595738782,
            "topLeft_y": 417.99999971428576,
            "topRight_x": 673.4599364969552,
            "topRight_y": 417.99999971428576,
            "bottomRight_x": 673.4599364969552,
            "bottomRight_y": 497.99999971428576,
            "bottomLeft_x": 421.6618595738782,
            "bottomLeft_y": 497.99999971428576
          }
        }
      ]
    }

    image_widthimage_height 代表圖像的寬度和高度(以像素為單位), xy 代表文字邊界框的左上角原點(以像素為單位), wh 代表文字邊界框的寬度和高度(以像素為單位), rect 提供偵測到的文字區域四個角的座標,並保留其原始方向(非軸對齊)。

  • Python 範例 – 使用 ocr_boxes 資訊繪製文字邊界框:

    #
    # pip3 install requests pillow opencv-python numpy
    #
    
    import os
    import sys
    import requests
    from PIL import Image, ImageDraw, ImageFont, ImageOps
    import numpy as np
    import cv2
    
    url = "http://10.0.1.11:8000/upload"  # Replace with your IP address
    file_path = "01.png"
    
    # ===== Select font (supports Chinese and English), font size auto-scales with box height =====
    def pick_font(box_h_px: float):
        font_candidates = [
            # macOS
            "/System/Library/Fonts/PingFang.ttc",
            "/System/Library/Fonts/STHeiti Light.ttc",
            # Windows
            r"C:\Windows\Fonts\msyh.ttc",
            r"C:\Windows\Fonts\msjh.ttc",
            r"C:\Windows\Fonts\arialuni.ttf",
            # Noto
            "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
            "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
        ]
        size = max(10, int(box_h_px * 0.25))  # Small font size = 25% of box height (minimum 10pt)
        for path in font_candidates:
            if os.path.exists(path):
                try:
                    return ImageFont.truetype(path, size=size)
                except Exception:
                    pass
        return ImageFont.load_default()
    
    # ===== Draw box and small text =====
    def draw_boxes(img_pil: Image.Image, boxes, line_thickness: int = 5) -> Image.Image:
        draw = ImageDraw.Draw(img_pil)
        for b in boxes:
            try:
                x = float(b["x"]); y = float(b["y"])
                w = float(b["w"]); h = float(b["h"])
                text = str(b.get("text", ""))
            except Exception:
                continue
    
            # Red bounding box
            x2, y2 = x + w, y + h
            draw.rectangle([x, y, x2, y2], outline=(255, 0, 0), width=line_thickness)
    
            # Top-right label
            font = pick_font(h)
            # Text size
            # textbbox returns (l, t, r, b)
            l, t, r, b = draw.textbbox((0, 0), text, font=font)
            tw, th = (r - l), (b - t)
            pad = max(2, int(h * 0.06))
    
            # Align label to top-right, not exceeding box or image edge
            tx = int(max(0, min(x2 - tw - pad, img_pil.width - tw - pad)))
            ty = int(max(0, min(y + pad, img_pil.height - th - pad)))
    
            # White background
            draw.rectangle([tx - pad, ty - pad, tx + tw + pad, ty + th + pad], fill=(255, 255, 255))
            draw.text((tx, ty), text, font=font, fill=(20, 20, 20))
        return img_pil
    
    def main():
        if not os.path.exists(file_path):
            print(f"[ERROR] Image not found: {file_path}", file=sys.stderr)
            sys.exit(1)
    
        # 1) Upload
        with open(file_path, "rb") as f:
            files = {"file": f}
            headers = {"Accept": "application/json"}
            try:
                response = requests.post(url, files=files, headers=headers, timeout=60)
            except requests.RequestException as e:
                print(f"[ERROR] Request failed: {e}", file=sys.stderr)
                sys.exit(2)
    
        print("status code:", response.status_code)
    
        # 2) Check HTTP and JSON
        if response.status_code != 200:
            print("response:", response.text[:500])
            sys.exit(3)
    
        try:
            data = response.json()
        except ValueError:
            print("[ERROR] Not JSON response")
            print("response:", response.text[:500])
            sys.exit(4)
    
        if not data.get("success", False):
            print("[ERROR] Server returned failure:", data)
            sys.exit(5)
    
        print("response ok")
    
        # 3) Load original image (using PIL)
        img_pil = Image.open(file_path)
        img_pil = ImageOps.exif_transpose(img_pil).convert("RGB")
    
        # If server returns different dimensions (should usually match), use server dimensions
        W = int(data.get("image_width", img_pil.width))
        H = int(data.get("image_height", img_pil.height))
        if (W, H) != (img_pil.width, img_pil.height):
            img_pil = img_pil.resize((W, H), Image.BICUBIC)
    
        boxes = data.get("ocr_boxes", [])
        img_pil = draw_boxes(img_pil, boxes)
    
        # 4) Display
        img_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
        cv2.imshow("OCR Preview", img_cv)
        print("Press any key on the image window to exit...")
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    if __name__ == "__main__":
        main()

    範例輸出:

    image3

功能特色

  • 採用 Apple Vision Framework 的高精度 OCR
  • 支援多語言自動偵測
  • 透過網頁介面上傳並在數秒內獲得 OCR 結果
  • JSON API 便於整合到應用程式中
  • 100% 本機處理,無雲端依賴,完全隱私保護

使用場景

  • 無需雲端服務的本機 OCR
  • 在同一網路內的裝置間共享 OCR 服務
  • 使用多台 iPhone 建構 OCR 處理叢集