Skip to content

Commit 94e5471

Browse files
authored
Merge pull request #235 from PredicateSystems/categorized_tasks
make agent categorize tasks
2 parents 87c7931 + 51226bb commit 94e5471

20 files changed

+4392
-64
lines changed

predicate/__init__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,17 @@
118118
# Ordinal support (Phase 3)
119119
from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
120120
from .overlay import clear_overlay, show_overlay
121+
from .overlay_dismissal import OverlayDismissResult, dismiss_overlays, dismiss_overlays_before_agent
121122
from .permissions import PermissionPolicy
123+
from .pruning import (
124+
CategoryDetectionResult,
125+
PrunedSnapshotContext,
126+
PruningTaskCategory,
127+
SkeletonDomNode,
128+
classify_task_category,
129+
prune_snapshot_for_task,
130+
serialize_pruned_snapshot,
131+
)
122132
from .query import find, query
123133
from .read import extract, extract_async, read, read_best_effort
124134
from .recorder import Recorder, Trace, TraceStep, record
@@ -250,6 +260,10 @@
250260
"screenshot",
251261
"show_overlay",
252262
"clear_overlay",
263+
# Overlay dismissal (proactive popup/banner removal)
264+
"OverlayDismissResult",
265+
"dismiss_overlays",
266+
"dismiss_overlays_before_agent",
253267
# Text Search
254268
"find_text_rect",
255269
"TextRectSearchResult",
@@ -313,6 +327,13 @@
313327
"save_storage_state",
314328
# Formatting (v0.12.0+)
315329
"format_snapshot_for_llm",
330+
"CategoryDetectionResult",
331+
"PrunedSnapshotContext",
332+
"PruningTaskCategory",
333+
"SkeletonDomNode",
334+
"classify_task_category",
335+
"prune_snapshot_for_task",
336+
"serialize_pruned_snapshot",
316337
# Agent Config (v0.12.0+)
317338
"AgentConfig",
318339
# Enums

predicate/agent_runtime.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,40 @@ async def get_url(self) -> str:
326326
self._cached_url = url
327327
return url
328328

329+
async def read_markdown(self, max_chars: int = 8000) -> str | None:
330+
"""
331+
Read page content as markdown for semantic understanding.
332+
333+
This extracts the page HTML and converts it to markdown format,
334+
which is useful for LLM planning to understand page context
335+
(e.g., product listings, form fields, navigation structure).
336+
337+
Args:
338+
max_chars: Maximum characters to return (default 8000).
339+
Truncates from the end if content exceeds this limit.
340+
341+
Returns:
342+
Markdown string if successful, None if extraction fails.
343+
"""
344+
try:
345+
page = getattr(self.backend, "page", None)
346+
if page is None:
347+
return None
348+
349+
# Import here to avoid circular dependency
350+
from .read import _fallback_read_from_page_async
351+
352+
result = await _fallback_read_from_page_async(page, output_format="markdown")
353+
if result is None or result.status != "success":
354+
return None
355+
356+
content = result.content
357+
if len(content) > max_chars:
358+
content = content[:max_chars]
359+
return content
360+
except Exception:
361+
return None
362+
329363
async def get_viewport_height(self) -> int:
330364
"""
331365
Get current viewport height in pixels.
@@ -398,19 +432,23 @@ async def click(self, element_id: int) -> None:
398432

399433
await self.record_action(f"CLICK({element_id})")
400434

401-
async def type(self, element_id: int, text: str) -> None:
435+
async def type(self, element_id: int, text: str, *, delay_ms: float | None = None) -> None:
402436
"""
403437
Type text into an element.
404438
405439
Args:
406440
element_id: Element ID from snapshot
407441
text: Text to type
442+
delay_ms: Optional delay between keystrokes in milliseconds
408443
"""
409444
# First click to focus
410445
await self.click(element_id)
411446

412447
# Then type
413-
await self.backend.type_text(text)
448+
if delay_ms is None:
449+
await self.backend.type_text(text)
450+
else:
451+
await self.backend.type_text(text, delay_ms=delay_ms)
414452
await self.record_action(f"TYPE({element_id}, '{text[:20]}...')" if len(text) > 20 else f"TYPE({element_id}, '{text}')")
415453

416454
async def press(self, key: str) -> None:

predicate/agents/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
- RuntimeAgent (execution loop and bounded vision fallback)
77
88
Agent types:
9-
- PredicateBrowserAgent: Single-executor agent with manual step definitions
9+
- PredicateAgent: Branded alias for PlannerExecutorAgent (recommended for external use)
1010
- PlannerExecutorAgent: Two-tier agent with LLM-generated plans
11+
- PredicateBrowserAgent: Single-executor agent with manual step definitions
1112
1213
Task abstractions:
1314
- AutomationTask: Generic task model for browser automation
@@ -67,6 +68,9 @@
6768
get_config_preset,
6869
)
6970

71+
# Branded alias for PlannerExecutorAgent
72+
PredicateAgent = PlannerExecutorAgent
73+
7074
__all__ = [
7175
# Automation Task
7276
"AutomationTask",
@@ -95,6 +99,7 @@
9599
"PlanStep",
96100
"PlannerExecutorAgent",
97101
"PlannerExecutorConfig",
102+
"PredicateAgent", # Branded alias for PlannerExecutorAgent
98103
"PredicateSpec",
99104
"RecoveryNavigationConfig",
100105
"RetryConfig",

predicate/agents/automation_task.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,32 @@ class AutomationTask:
179179
# Domain hints for heuristics (e.g., ["ecommerce", "amazon"])
180180
domain_hints: tuple[str, ...] = field(default_factory=tuple)
181181

182+
# Force a specific pruning category (overrides auto-detection)
183+
force_pruning_category: str | None = None
184+
185+
def pruning_category_hint(self):
186+
"""
187+
Return the pruning-oriented category for this task.
188+
189+
If force_pruning_category is set, returns that category directly.
190+
Otherwise, uses rule-based normalization from task text and hints.
191+
"""
192+
from ..pruning import PruningTaskCategory, classify_task_category
193+
194+
# If a category is forced, return it directly
195+
if self.force_pruning_category:
196+
try:
197+
return PruningTaskCategory(self.force_pruning_category)
198+
except ValueError:
199+
pass # Invalid category, fall through to auto-detection
200+
201+
return classify_task_category(
202+
task_text=self.task,
203+
current_url=self.starting_url,
204+
domain_hints=self.domain_hints,
205+
task_category=self.category,
206+
).category
207+
182208
@classmethod
183209
def from_webbench_task(cls, task: Any) -> "AutomationTask":
184210
"""

0 commit comments

Comments
 (0)