Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,173 changes: 934 additions & 239 deletions pdm.lock

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ authors = [
dependencies = [
"anthropic>=0.54.0",
"gradio-client>=1.4.3",
"grpcio-tools>=1.67.0",
"grpcio>=1.67.0",
"httpx>=0.28.1",
"Jinja2>=3.1.4",
Expand Down Expand Up @@ -57,6 +56,7 @@ typecheck = "mypy"
"chat:api" = "uvicorn askui.chat.api.app:app --reload --port 9261"
"mcp:dev" = "mcp dev src/askui/mcp/__init__.py"
"grpc:gen" = "bash scripts/grpc-gen.sh"
"json:gen" = "datamodel-codegen --output-model-type pydantic_v2.BaseModel --input src/askui/tools/askui/askui_ui_controller_grpc/json_schema/ --input-file-type jsonschema --output src/askui/tools/askui/askui_ui_controller_grpc/generated/"

[dependency-groups]
test = [
Expand Down Expand Up @@ -237,3 +237,7 @@ pynput = [
web = [
"playwright>=1.41.0",
]
dev = [
"datamodel-code-generator>=0.31.2",
"grpcio-tools>=1.73.1",
]
3 changes: 2 additions & 1 deletion src/askui/mcp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any

from fastmcp import FastMCP
from fastmcp.tools.tool import Tool

from askui.agent import VisionAgent

Expand All @@ -16,7 +17,7 @@ class AppContext:
@asynccontextmanager
async def mcp_lifespan(server: FastMCP[Any]) -> AsyncIterator[AppContext]: # noqa: ARG001
with VisionAgent(display=2) as vision_agent:
server.add_tool(vision_agent.click)
server.add_tool(Tool.from_function(vision_agent.click))
yield AppContext(vision_agent=vision_agent)


Expand Down
5 changes: 4 additions & 1 deletion src/askui/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from .agent_os import AgentOs, ModifierKey, PcKey
from .agent_os import AgentOs, Coordinate, ModifierKey, PcKey
from .askui.command_helpers import create_style
from .toolbox import AgentToolbox

__all__ = [
"AgentOs",
"AgentToolbox",
"ModifierKey",
"PcKey",
"Coordinate",
"create_style",
]
115 changes: 114 additions & 1 deletion src/askui/tools/agent_os.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from abc import ABC, abstractmethod
from typing import Literal
from typing import TYPE_CHECKING, Literal

from PIL import Image
from pydantic import BaseModel

if TYPE_CHECKING:
from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import ( # noqa: E501
RenderObjectStyle,
)


ModifierKey = Literal[
"command",
"alt",
Expand Down Expand Up @@ -148,6 +154,11 @@ class ClickEvent(BaseModel):
timestamp: float


class Coordinate(BaseModel):
x: int
y: int


InputEvent = ClickEvent


Expand Down Expand Up @@ -359,3 +370,105 @@ def stop_listening(self) -> None:
change in the future.
"""
raise NotImplementedError

def get_mouse_position(self) -> Coordinate:
"""
Get the current mouse cursor position.

Returns:
The current mouse position data.
"""
raise NotImplementedError

def set_mouse_position(self, x: int, y: int) -> None:
"""
Set the mouse cursor position to specific coordinates.

Args:
x (int): The horizontal coordinate (in pixels) to set the cursor to.
y (int): The vertical coordinate (in pixels) to set the cursor to.

"""
raise NotImplementedError

def render_quad(self, style: "RenderObjectStyle") -> int:
"""
Render a quad object to the display.

Args:
style (RenderObjectStyle): The style properties for the quad.

Returns:
Response containing the render object ID.
"""
raise NotImplementedError

def render_line(self, style: "RenderObjectStyle", points: list[Coordinate]) -> int:
"""
Render a line object to the display.

Args:
style (RenderObjectStyle): The style properties for the line.
points (list[Coordinate]): The points defining the line.

Returns:
Response containing the render object ID.
"""
raise NotImplementedError

def render_image(self, style: "RenderObjectStyle", image_data: str) -> int:
"""
Render an image object to the display.

Args:
style (RenderObjectStyle): The style properties for the image.
image_data (str): The image data to display.

Returns:
Response containing the render object ID.
"""
raise NotImplementedError

def render_text(self, style: "RenderObjectStyle", content: str) -> int:
"""
Render a text object to the display.

Args:
style (RenderObjectStyle): The style properties for the text.
text_content (str): The text content to display.

Returns:
Response containing the render object ID.
"""
raise NotImplementedError

def update_render_object(self, object_id: int, style: "RenderObjectStyle") -> None:
"""
Update styling properties of an existing render object.

Args:
object_id (int): The ID of the render object to update.
style (RenderObjectStyle): The new style properties.
"""
raise NotImplementedError

def delete_render_object(self, object_id: int) -> None:
"""
Delete an existing render object from the display.

Args:
object_id (int): The ID of the render object to delete.

Returns:
Response confirming the deletion.
"""
raise NotImplementedError

def clear_render_objects(self) -> None:
"""
Clear all render objects from the display.

Returns:
Response confirming the clearing.
"""
raise NotImplementedError
Loading