Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,15 @@ MCP Client can access the following tools to interact with Windows:
- `Process`: List running processes or terminate them by PID or name.
- `Notification`: Send a Windows toast notification with a title and message.
- `Registry`: Read, write, delete, or list Windows Registry values and keys.
- `CursorPosition`: Get the current mouse cursor (x, y) coordinates.
- `PixelColor`: Get the RGB color value at screen coordinates with hex code and color name.
- `KeyHold`: Press or release keyboard keys independently for hold operations (e.g., hold Shift while clicking).
- `ScreenInfo`: Get information about all connected monitors (resolution, position, primary).
- `ScreenHighlight`: Highlight a screen region with a colored border for visual debugging.
- `MousePath`: Move the mouse cursor smoothly through a series of waypoints.
- `ScreenReader`: Read text from a screen region using OCR (Windows built-in or pytesseract fallback).
- `WaitForChange`: Wait until a screen region visually changes beyond a threshold.
- `FindImage`: Find a template image on screen using visual template matching (requires `pip install 'windows-mcp[vision]'`).

## 🤝 Connect with Us
Stay updated and join our community:
Expand Down
12 changes: 12 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ dev = [
"pytest>=8.0.0",
"pytest-asyncio>=0.24.0",
]
vision = [
"opencv-python-headless>=4.8.0",
"numpy>=1.26.0",
]
ocr = [
"pytesseract>=0.3.10",
]
all = [
"opencv-python-headless>=4.8.0",
"numpy>=1.26.0",
"pytesseract>=0.3.10",
]

[project.urls]
Homepage = "https://github.com/CursorTouch"
Expand Down
288 changes: 288 additions & 0 deletions src/windows_mcp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,45 @@ async def lifespan(app: FastMCP):

mcp = FastMCP(name="windows-mcp", instructions=instructions, lifespan=lifespan)


def _to_physical(loc: list[int], coordinate_system: str) -> list[int]:
"""Convert coordinates to physical space if needed.

Args:
loc: [x, y] coordinates.
coordinate_system: "physical" (no conversion) or "logical" (multiply by DPI scale).

Returns:
[x, y] in physical coordinates ready for pyautogui.
"""
if coordinate_system == "logical":
if desktop is None:
raise RuntimeError("Desktop service is not initialized.")
scale = desktop.get_dpi_scaling()
return [round(loc[0] * scale), round(loc[1] * scale)]
return loc
Comment on lines +94 to +99
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_to_physical/_region_to_physical/_path_to_physical use Python's built-in round(), which performs bankers rounding on exact .5 values (e.g., 4.5 -> 4). With DPI scales like 1.5 or 2.5 this can systematically shift some coordinates by 1px. Consider switching to a deterministic pixel conversion strategy (e.g., always round half-up) and apply it consistently across all three helpers.

Copilot uses AI. Check for mistakes.


def _region_to_physical(region: list[int], coordinate_system: str) -> list[int]:
"""Convert a region [x, y, width, height] to physical space if needed."""
if coordinate_system == "logical":
if desktop is None:
raise RuntimeError("Desktop service is not initialized.")
scale = desktop.get_dpi_scaling()
return [round(v * scale) for v in region]
return region


def _path_to_physical(path: list[list[int]], coordinate_system: str) -> list[list[int]]:
"""Convert a list of [x, y] waypoints to physical space if needed."""
if coordinate_system == "logical":
if desktop is None:
raise RuntimeError("Desktop service is not initialized.")
scale = desktop.get_dpi_scaling()
return [[round(p[0] * scale), round(p[1] * scale)] for p in path]
return path
Comment on lines +78 to +124
Copy link

Copilot AI Mar 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In logical mode _to_physical and _path_to_physical index into loc / each waypoint without validating shape first. For invalid inputs (e.g., loc=[100] or a malformed waypoint), this raises IndexError and the tool wrapper returns a generic Error: list index out of range instead of the intended "loc must be [x, y]" / "waypoint must be [x, y]" messages. Consider validating lengths in the helpers (or before calling them) and raising a clear ValueError/returning an unchanged value so the downstream validation runs.

Copilot uses AI. Check for mistakes.


@mcp.tool(
name="App",
description="Manages Windows applications with three modes: 'launch' (opens the prescibed application), 'resize' (adjusts active window size/position), 'switch' (brings specific window into focus).",
Expand Down Expand Up @@ -676,6 +715,255 @@ def registry_tool(mode: Literal['get', 'set', 'delete', 'list'], path: str, name
except Exception as e:
return f'Error accessing registry: {str(e)}'


@mcp.tool(
name="CursorPosition",
description="Returns the current mouse cursor position as (x, y) coordinates.",
annotations=ToolAnnotations(
title="CursorPosition",
readOnlyHint=True,
destructiveHint=False,
idempotentHint=True,
openWorldHint=False,
),
)
@with_analytics(analytics, "CursorPosition-Tool")
def cursor_position_tool(ctx: Context = None) -> str:
try:
return desktop.get_cursor_position()
except Exception as e:
return f"Error: {str(e)}"


@mcp.tool(
name="PixelColor",
description=(
"Gets the RGB color value at specified screen coordinates [x, y]. "
"Returns color as RGB values and hex code with approximate color name. "
"Set coordinate_system='logical' to auto-convert from logical (DPI-scaled) coordinates to physical. "
"Default is 'physical' (no conversion)."
),
annotations=ToolAnnotations(
title="PixelColor",
readOnlyHint=True,
destructiveHint=False,
idempotentHint=True,
openWorldHint=False,
),
)
@with_analytics(analytics, "PixelColor-Tool")
def pixel_color_tool(
loc: list[int],
coordinate_system: Literal["physical", "logical"] = "physical",
ctx: Context = None,
) -> str:
try:
loc = _to_physical(loc, coordinate_system)
return desktop.get_pixel_color(loc)
except Exception as e:
return f"Error: {str(e)}"


@mcp.tool(
name="KeyHold",
description="Presses or releases keyboard keys independently, enabling key hold operations. Use action='down' to press and hold, 'up' to release. Supports modifier keys (shift, ctrl, alt, win) and special keys (f1-f12, enter, tab, escape, etc.). Release keys after use to avoid stuck keys.",
annotations=ToolAnnotations(
title="KeyHold",
readOnlyHint=False,
destructiveHint=True,
idempotentHint=False,
openWorldHint=False,
),
)
@with_analytics(analytics, "KeyHold-Tool")
def key_hold_tool(action: Literal["down", "up"], keys: list[str], ctx: Context = None) -> str:
try:
return desktop.key_hold(action, keys)
except Exception as e:
return f"Error: {str(e)}"


@mcp.tool(
name="ScreenInfo",
description="Returns information about all connected monitors including resolution, position, and which is the primary display. Useful for multi-monitor setups and coordinate targeting.",
annotations=ToolAnnotations(
title="ScreenInfo",
readOnlyHint=True,
destructiveHint=False,
idempotentHint=True,
openWorldHint=False,
),
)
@with_analytics(analytics, "ScreenInfo-Tool")
def screen_info_tool(ctx: Context = None) -> str:
try:
return desktop.get_screen_info()
except Exception as e:
return f"Error: {str(e)}"


@mcp.tool(
name="ScreenHighlight",
description=(
"Highlights a rectangular region on screen with a colored border for visual identification. "
"Useful for debugging automation targets. The highlight appears briefly then disappears. "
"Set coordinate_system='logical' to auto-convert from logical (DPI-scaled) coordinates to physical. "
"Default is 'physical' (no conversion)."
),
annotations=ToolAnnotations(
title="ScreenHighlight",
readOnlyHint=True,
destructiveHint=False,
idempotentHint=True,
openWorldHint=False,
),
)
@with_analytics(analytics, "ScreenHighlight-Tool")
def screen_highlight_tool(
loc: list[int],
size: list[int],
duration: float = 2.0,
color: Literal["red", "green", "blue", "yellow"] = "red",
coordinate_system: Literal["physical", "logical"] = "physical",
ctx: Context = None,
) -> str:
try:
loc = _to_physical(loc, coordinate_system)
size = _to_physical(size, coordinate_system)
return desktop.highlight_region(loc, size, duration, color)
except Exception as e:
return f"Error: {str(e)}"


@mcp.tool(
name="MousePath",
description=(
"Moves the mouse cursor smoothly through a series of waypoints. "
"Each waypoint is [x, y]. The movement is interpolated over the specified duration for smooth animation. "
"Set coordinate_system='logical' to auto-convert from logical (DPI-scaled) coordinates to physical. "
"Default is 'physical' (no conversion)."
),
annotations=ToolAnnotations(
title="MousePath",
readOnlyHint=False,
destructiveHint=False,
idempotentHint=False,
openWorldHint=False,
),
)
@with_analytics(analytics, "MousePath-Tool")
def mouse_path_tool(
path: list[list[int]],
duration: float = 0.5,
coordinate_system: Literal["physical", "logical"] = "physical",
ctx: Context = None,
) -> str:
try:
path = _path_to_physical(path, coordinate_system)
return desktop.mouse_path(path, duration)
except Exception as e:
return f"Error: {str(e)}"


@mcp.tool(
name="ScreenReader",
description=(
"Reads text from a screen region using OCR (Optical Character Recognition). "
"Uses Windows built-in OCR engine. Specify a region [x, y, width, height] to read from a specific area, "
"or omit for the full screen. "
"Set coordinate_system='logical' to auto-convert from logical (DPI-scaled) coordinates to physical. "
"Default is 'physical' (no conversion)."
),
annotations=ToolAnnotations(
title="ScreenReader",
readOnlyHint=True,
destructiveHint=False,
idempotentHint=True,
openWorldHint=False,
),
)
@with_analytics(analytics, "ScreenReader-Tool")
def screen_reader_tool(
region: list[int] | None = None,
language: str = "en",
coordinate_system: Literal["physical", "logical"] = "physical",
ctx: Context = None,
) -> str:
try:
if region is not None:
region = _region_to_physical(region, coordinate_system)
return desktop.read_screen_text(region, language)
except Exception as e:
return f"Error: {str(e)}"


@mcp.tool(
name="WaitForChange",
description=(
"Waits until a screen region visually changes beyond a threshold. "
"Useful for waiting for loading to complete, animations to finish, or content to update. "
"Compares pixel data between captures. Returns when change is detected or timeout is reached. "
"Set coordinate_system='logical' to auto-convert from logical (DPI-scaled) coordinates to physical. "
"Default is 'physical' (no conversion)."
),
annotations=ToolAnnotations(
title="WaitForChange",
readOnlyHint=True,
destructiveHint=False,
idempotentHint=True,
openWorldHint=False,
),
)
@with_analytics(analytics, "WaitForChange-Tool")
def wait_for_change_tool(
region: list[int],
timeout: float = 30.0,
threshold: float = 0.05,
poll_interval: float = 0.5,
coordinate_system: Literal["physical", "logical"] = "physical",
ctx: Context = None,
) -> str:
try:
region = _region_to_physical(region, coordinate_system)
return desktop.wait_for_change(region, timeout, threshold, poll_interval)
except Exception as e:
return f"Error: {str(e)}"


@mcp.tool(
name="FindImage",
description=(
"Finds a template image on screen using visual template matching. "
"Returns the center coordinates and confidence score of the best match. "
"Requires opencv-python-headless: pip install 'windows-mcp[vision]'. "
"Optionally restrict search to a region [x, y, width, height]. "
"Set coordinate_system='logical' to auto-convert from logical (DPI-scaled) coordinates to physical. "
"Default is 'physical' (no conversion)."
),
annotations=ToolAnnotations(
title="FindImage",
readOnlyHint=True,
destructiveHint=False,
idempotentHint=True,
openWorldHint=False,
),
)
@with_analytics(analytics, "FindImage-Tool")
def find_image_tool(
template_path: str,
region: list[int] | None = None,
threshold: float = 0.8,
coordinate_system: Literal["physical", "logical"] = "physical",
ctx: Context = None,
) -> str:
try:
if region is not None:
region = _region_to_physical(region, coordinate_system)
return desktop.find_image(template_path, region, threshold)
except Exception as e:
return f"Error: {str(e)}"


class Transport(Enum):
STDIO = "stdio"
SSE = "sse"
Expand Down
Loading
Loading