將您的 iPhone 變成強大的本機 OCR 伺服器,採用 Apple 的 Vision Framework 技術。 無需雲端依賴,無限制使用,完全隱私保護。
English | 日本語 | 繁體中文 | 简体中文 | 한국어 | Français
- 啟動應用程式,伺服器將自動啟動
- 從同一網路上的任何裝置存取顯示的 IP 位址
- 上傳圖像即可獲得文字辨識結果
- 透過 API 將服務整合到您的應用程式中
- 為確保應用程式持續運行不中斷,請啟用 iOS 引導使用模式並保持螢幕開啟
- OCR 測試:在您的電腦上開啟網頁瀏覽器,瀏覽應用程式顯示的 IP 位址來執行 OCR 測試。
-
API 範例 - 透過
uploadAPI 上傳圖像:curl -H "Accept: application/json" \ -X POST http://<您的IP>:8000/upload \ -F "file=@01.png" -
Python 上傳範例:
import requests url = "http://10.0.1.11:8000/upload" # 替換為您的 IP 位址 file_path = "01.png" with open(file_path, "rb") as f: files = {"file": f} headers = {"Accept": "application/json"} response = requests.post(url, files=files, headers=headers) print("status code:", response.status_code) print("response:", response.text)
-
JSON 回應格式如下:
{ "success": true, "message": "File uploaded successfully", "ocr_result": "Hello\nWorld", "image_height": 648, "image_width": 1247, "ocr_boxes": [ { "text": "Hello", "x": 429.6554479416482, "y": 268.0000001076923, "w": 201.83814102564105, "h": 72, "rect": { "topLeft_x": 429.6554479416482, "topLeft_y": 268.0000001076923, "topRight_x": 631.4935889672893, "topRight_y": 268.0000001076923, "bottomRight_x": 631.4935889672893, "bottomRight_y": 340.0000001076923, "bottomLeft_x": 429.6554479416482, "bottomLeft_y": 340.0000001076923 } }, { "text": "World", "x": 421.6618595738782, "y": 417.99999971428576, "w": 251.79807692307696, "h": 80, "rect": { "topLeft_x": 421.6618595738782, "topLeft_y": 417.99999971428576, "topRight_x": 673.4599364969552, "topRight_y": 417.99999971428576, "bottomRight_x": 673.4599364969552, "bottomRight_y": 497.99999971428576, "bottomLeft_x": 421.6618595738782, "bottomLeft_y": 497.99999971428576 } } ] }image_width和image_height代表圖像的寬度和高度(以像素為單位),x和y代表文字邊界框的左上角原點(以像素為單位),w和h代表文字邊界框的寬度和高度(以像素為單位),rect提供偵測到的文字區域四個角的座標,並保留其原始方向(非軸對齊)。 -
Python 範例 – 使用
ocr_boxes資訊繪製文字邊界框:# # pip3 install requests pillow opencv-python numpy # import os import sys import requests from PIL import Image, ImageDraw, ImageFont, ImageOps import numpy as np import cv2 url = "http://10.0.1.11:8000/upload" # Replace with your IP address file_path = "01.png" # ===== Select font (supports Chinese and English), font size auto-scales with box height ===== def pick_font(box_h_px: float): font_candidates = [ # macOS "/System/Library/Fonts/PingFang.ttc", "/System/Library/Fonts/STHeiti Light.ttc", # Windows r"C:\Windows\Fonts\msyh.ttc", r"C:\Windows\Fonts\msjh.ttc", r"C:\Windows\Fonts\arialuni.ttf", # Noto "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc", ] size = max(10, int(box_h_px * 0.25)) # Small font size = 25% of box height (minimum 10pt) for path in font_candidates: if os.path.exists(path): try: return ImageFont.truetype(path, size=size) except Exception: pass return ImageFont.load_default() # ===== Draw box and small text ===== def draw_boxes(img_pil: Image.Image, boxes, line_thickness: int = 5) -> Image.Image: draw = ImageDraw.Draw(img_pil) for b in boxes: try: x = float(b["x"]); y = float(b["y"]) w = float(b["w"]); h = float(b["h"]) text = str(b.get("text", "")) except Exception: continue # Red bounding box x2, y2 = x + w, y + h draw.rectangle([x, y, x2, y2], outline=(255, 0, 0), width=line_thickness) # Top-right label font = pick_font(h) # Text size # textbbox returns (l, t, r, b) l, t, r, b = draw.textbbox((0, 0), text, font=font) tw, th = (r - l), (b - t) pad = max(2, int(h * 0.06)) # Align label to top-right, not exceeding box or image edge tx = int(max(0, min(x2 - tw - pad, img_pil.width - tw - pad))) ty = int(max(0, min(y + pad, img_pil.height - th - pad))) # White background draw.rectangle([tx - pad, ty - pad, tx + tw + pad, ty + th + pad], fill=(255, 255, 255)) draw.text((tx, ty), text, font=font, fill=(20, 20, 20)) return img_pil def main(): if not os.path.exists(file_path): print(f"[ERROR] Image not found: {file_path}", file=sys.stderr) sys.exit(1) # 1) Upload with open(file_path, "rb") as f: files = {"file": f} headers = {"Accept": "application/json"} try: response = requests.post(url, files=files, headers=headers, timeout=60) except requests.RequestException as e: print(f"[ERROR] Request failed: {e}", file=sys.stderr) sys.exit(2) print("status code:", response.status_code) # 2) Check HTTP and JSON if response.status_code != 200: print("response:", response.text[:500]) sys.exit(3) try: data = response.json() except ValueError: print("[ERROR] Not JSON response") print("response:", response.text[:500]) sys.exit(4) if not data.get("success", False): print("[ERROR] Server returned failure:", data) sys.exit(5) print("response ok") # 3) Load original image (using PIL) img_pil = Image.open(file_path) img_pil = ImageOps.exif_transpose(img_pil).convert("RGB") # If server returns different dimensions (should usually match), use server dimensions W = int(data.get("image_width", img_pil.width)) H = int(data.get("image_height", img_pil.height)) if (W, H) != (img_pil.width, img_pil.height): img_pil = img_pil.resize((W, H), Image.BICUBIC) boxes = data.get("ocr_boxes", []) img_pil = draw_boxes(img_pil, boxes) # 4) Display img_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) cv2.imshow("OCR Preview", img_cv) print("Press any key on the image window to exit...") cv2.waitKey(0) cv2.destroyAllWindows() if __name__ == "__main__": main()
範例輸出:
- 採用 Apple Vision Framework 的高精度 OCR
- 支援多語言自動偵測
- 透過網頁介面上傳並在數秒內獲得 OCR 結果
- JSON API 便於整合到應用程式中
- 100% 本機處理,無雲端依賴,完全隱私保護
- 無需雲端服務的本機 OCR
- 在同一網路內的裝置間共享 OCR 服務
- 使用多台 iPhone 建構 OCR 處理叢集


