-
Notifications
You must be signed in to change notification settings - Fork 218
Expand file tree
/
Copy pathkernel.py
More file actions
101 lines (73 loc) · 2.71 KB
/
kernel.py
File metadata and controls
101 lines (73 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
Android Action Kernel - Main Agent Loop
An AI agent that controls Android devices through the accessibility API.
Uses LLMs to make decisions based on screen context.
Usage:
python kernel.py
"""
import os
import json
import time
from typing import List, Dict, Any
from config import Config
from actions import execute_action, run_adb_command
from llm_providers import get_llm_provider
import sanitizer
def get_screen_state() -> str:
"""Dumps the current UI XML and returns the sanitized JSON string."""
# 1. Capture XML from device
run_adb_command(["shell", "uiautomator", "dump", Config.SCREEN_DUMP_PATH])
# 2. Pull to local machine
run_adb_command(["pull", Config.SCREEN_DUMP_PATH, Config.LOCAL_DUMP_PATH])
# 3. Read & Sanitize
if not os.path.exists(Config.LOCAL_DUMP_PATH):
return "Error: Could not capture screen."
with open(Config.LOCAL_DUMP_PATH, "r", encoding="utf-8") as f:
xml_content = f.read()
elements = sanitizer.get_interactive_elements(xml_content)
return json.dumps(elements, indent=2)
def run_agent(goal: str, max_steps: int = None) -> None:
"""
Main agent loop: Perceive -> Reason -> Act
Args:
goal: The task to accomplish
max_steps: Maximum steps before stopping (default from config)
"""
if max_steps is None:
max_steps = Config.MAX_STEPS
print(f"🚀 Android Action Kernel Started")
print(f"📋 Goal: {goal}")
print(f"🤖 Provider: {Config.LLM_PROVIDER} ({Config.get_model()})")
# Initialize LLM provider
llm = get_llm_provider()
action_history: List[Dict[str, Any]] = []
for step in range(max_steps):
print(f"\n--- Step {step + 1}/{max_steps} ---")
# 1. Perception: Capture screen state
print("👀 Scanning Screen...")
screen_context = get_screen_state()
# 2. Reasoning: Get LLM decision
print("🧠 Thinking...")
decision = llm.get_decision(goal, screen_context, action_history)
print(f"💡 Decision: {decision.get('reason', 'No reason provided')}")
# 3. Action: Execute the decision
execute_action(decision)
# Track action history for context
action_history.append(decision)
# Wait for UI to update
time.sleep(Config.STEP_DELAY)
print("\n⚠️ Max steps reached. Task may be incomplete.")
def main():
"""Entry point for the Android Action Kernel."""
try:
Config.validate()
except ValueError as e:
print(f"❌ Configuration Error: {e}")
return
goal = input("Enter your goal: ")
if not goal.strip():
print("❌ No goal provided. Exiting.")
return
run_agent(goal)
if __name__ == "__main__":
main()