From b78b912a5154d6fa4d71f7a88a9efb5bd9c7cd7a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 29 Mar 2026 00:06:16 +0000 Subject: [PATCH] feat: add /image command, /browse browser-view, and vision-message support - BaseCoder: add pending_images list; run() captures/clears images and passes them to _build_messages; _build_messages builds multi-part vision content when images are present (OpenAI/Anthropic vision API format) - web_scraper: add fetch_page_info() returning structured page data (title, description, status_code, headings, links, content) - terminal: add print_browse() rich browser-view panel and print_image_added() confirmation; update /help table with new commands - commands: add _cmd_browse() and _cmd_image() with full dispatch - repl: add /browse and /image to tab-completion list Agent-Logs-Url: https://github.com/Rahulchaube1/QGo/sessions/50b8c034-e924-4434-912d-f950eba6066b Co-authored-by: Rahulchaube1 <157899057+Rahulchaube1@users.noreply.github.com> --- qgo/coders/base_coder.py | 25 ++++++++++----- qgo/ui/commands.py | 65 +++++++++++++++++++++++++++++++++++++++ qgo/ui/repl.py | 4 +-- qgo/ui/terminal.py | 59 ++++++++++++++++++++++++++++++++++- qgo/utils/web_scraper.py | 66 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 208 insertions(+), 11 deletions(-) diff --git a/qgo/coders/base_coder.py b/qgo/coders/base_coder.py index 3e22ad5..6c2b35f 100644 --- a/qgo/coders/base_coder.py +++ b/qgo/coders/base_coder.py @@ -113,6 +113,7 @@ def __init__( self.chat_files: list[FileContext] = [] self.messages: list[Message] = [] self.total_usage = TokenUsage() + self.pending_images: list[str] = [] # Images queued for the next message # ─── File management ────────────────────────────────────────────── @@ -164,14 +165,18 @@ def run(self, user_message: str) -> str: """ self.refresh_files() - # Build message list - messages = self._build_messages(user_message) + # Capture and clear any pending images + images = self.pending_images[:] + self.pending_images.clear() + + # Build message list (include images in the current turn if any) + messages = self._build_messages(user_message, images=images or None) # Send to LLM response = self._send(messages) - # Record in history - self.messages.append(Message(role="user", content=user_message)) + # Record in history (images attached to this user turn) + self.messages.append(Message(role="user", content=user_message, images=images)) self.messages.append(Message(role="assistant", content=response)) # Apply edits @@ -218,7 +223,7 @@ def _send(self, messages: list[dict]) -> str: # ─── Prompt building ────────────────────────────────────────────── - def _build_messages(self, user_message: str) -> list[dict]: + def _build_messages(self, user_message: str, images: list[str] | None = None) -> list[dict]: """Build the full message list to send to the LLM.""" result: list[dict] = [] @@ -230,8 +235,14 @@ def _build_messages(self, user_message: str) -> list[dict]: for msg in self.messages[-20:]: # Keep last 20 messages result.append(msg.to_dict()) - # Current user message - result.append({"role": "user", "content": user_message}) + # Current user message — include images if any (vision models) + if images: + content: list = [{"type": "text", "text": user_message}] + for img in images: + content.append({"type": "image_url", "image_url": {"url": img}}) + result.append({"role": "user", "content": content}) + else: + result.append({"role": "user", "content": user_message}) return result def _build_system_prompt(self) -> str: diff --git a/qgo/ui/commands.py b/qgo/ui/commands.py index a9f4ccc..a2dca7e 100644 --- a/qgo/ui/commands.py +++ b/qgo/ui/commands.py @@ -53,6 +53,8 @@ def handle(self, text: str) -> bool: "/map": self._cmd_map, "/run": self._cmd_run, "/web": self._cmd_web, + "/browse": self._cmd_browse, + "/image": self._cmd_image, "/git": self._cmd_git, "/paste": self._cmd_paste, "/ls": self._cmd_ls, @@ -211,6 +213,69 @@ def _cmd_web(self, args: str) -> None: except Exception as exc: self.io.print_error(f"Failed to fetch URL: {exc}") + def _cmd_browse(self, args: str) -> None: + """Fetch and display a web page with rich browser-like formatting.""" + if not args: + self.io.print_warning("Usage: /browse ") + return + url = args.strip() + if not url.startswith(("http://", "https://")): + url = "https://" + url + try: + from qgo.utils.web_scraper import fetch_page_info + self.io.print_info(f"Loading: {url}") + page_info = fetch_page_info(url) + self.io.print_browse(page_info) + content = page_info.get("content", "") + if content: + self.coder.messages.append({ + "role": "user", + "content": f"Web page content from {url}:\n\n{content[:8000]}", + }) + self.io.print_success( + f"Page content added to context ({len(content):,} chars)." + ) + except Exception as exc: + self.io.print_error(f"Failed to browse {url}: {exc}") + + def _cmd_image(self, args: str) -> None: + """Attach one or more images (local path or URL) to the next message.""" + if not args: + self.io.print_warning("Usage: /image [path2 ...]") + return + for src in args.split(): + src = src.strip() + if not src: + continue + p = Path(src) + if p.exists() and p.is_file(): + # Encode local file as a base64 data URL + try: + import base64 + ext = p.suffix.lower().lstrip(".") + mime = { + "jpg": "image/jpeg", "jpeg": "image/jpeg", + "png": "image/png", "gif": "image/gif", + "webp": "image/webp", "bmp": "image/bmp", + }.get(ext, "image/png") + data = base64.b64encode(p.read_bytes()).decode("ascii") + data_url = f"data:{mime};base64,{data}" + self.coder.pending_images.append(data_url) + self.io.print_image_added(src, len(self.coder.pending_images)) + except Exception as exc: + self.io.print_error(f"Failed to load image {src}: {exc}") + elif src.startswith(("http://", "https://")): + # Remote image — pass URL directly (vision models support this) + self.coder.pending_images.append(src) + self.io.print_image_added(src, len(self.coder.pending_images)) + else: + self.io.print_warning(f"Image not found: {src}") + count = len(self.coder.pending_images) + if count: + self.io.print_info( + f" {count} image(s) queued — they will be sent with your next message." + ) + def _cmd_git(self, args: str) -> None: if not args: self.io.print_warning("Usage: /git ") diff --git a/qgo/ui/repl.py b/qgo/ui/repl.py index 993d013..5c687dc 100644 --- a/qgo/ui/repl.py +++ b/qgo/ui/repl.py @@ -25,8 +25,8 @@ def _get_completer(coder: "BaseCoder"): commands = [ "/add", "/drop", "/files", "/diff", "/commit", "/undo", "/clear", - "/model", "/models", "/tokens", "/map", "/run", "/web", "/git", - "/paste", "/ls", "/config", "/help", "/exit", "/quit", + "/model", "/models", "/tokens", "/map", "/run", "/web", "/browse", + "/image", "/git", "/paste", "/ls", "/config", "/help", "/exit", "/quit", ] # Add current files as completions for /add and /drop file_names = [str(fc.path.name) for fc in coder.chat_files] diff --git a/qgo/ui/terminal.py b/qgo/ui/terminal.py index cc09e63..76d5fb3 100644 --- a/qgo/ui/terminal.py +++ b/qgo/ui/terminal.py @@ -216,6 +216,60 @@ def print_token_usage(self, prompt: int, completion: int, cost: float) -> None: f"[qgo.info]Cost:[/] ${cost:.4f}" ) + # ─── Browser view ───────────────────────────────────────────────── + + def print_browse(self, page_info: dict) -> None: + """Display a rich browser-like view of a web page.""" + url = page_info.get("url", "") + title = page_info.get("title", "") or "Untitled" + description = page_info.get("description", "") + headings: list = page_info.get("headings", []) + links: list = page_info.get("links", []) + status = page_info.get("status_code", 0) + + # Address-bar / header panel + header = Text() + header.append("🌐 ", style="bold cyan") + header.append(f"{title}\n", style="bold white") + header.append(f" {url}\n", style="dim blue underline") + if description: + header.append(f"\n {description}\n", style="dim white italic") + if status: + color = "bold green" if status == 200 else "bold red" + header.append(f"\n HTTP {status}", style=color) + self.console.print( + Panel(header, title="[bold cyan]Browser View[/]", border_style="cyan") + ) + + # Table of contents + if headings: + toc = Text() + for level, text in headings: + indent = " " * (level - 1) + prefix = "#" * level + " " + style = "bold cyan" if level == 1 else ("cyan" if level == 2 else "white") + toc.append(f"{indent}{prefix}{text}\n", style=style) + self.console.print( + Panel(toc, title="📑 Table of Contents", border_style="blue") + ) + + # Links + if links: + link_text = Text() + for i, (text, href) in enumerate(links, 1): + link_text.append(f" {i:2}. ", style="dim") + link_text.append(f"{text}", style="cyan") + link_text.append(f" → {href}\n", style="dim") + self.console.print(Panel(link_text, title="🔗 Links", border_style="blue")) + + # ─── Image support ──────────────────────────────────────────────── + + def print_image_added(self, source: str, index: int) -> None: + """Print confirmation that an image has been queued for the next message.""" + self.console.print( + f"[qgo.success]🖼 Image #{index} queued:[/] [dim]{source}[/]" + ) + def print_help(self) -> None: """Print the help text.""" help_md = """\ @@ -235,7 +289,9 @@ def print_help(self) -> None: | `/tokens` | Show token usage | | `/map` | Show repository map | | `/run ` | Run shell command | -| `/web ` | Fetch URL as context | +| `/web ` | Fetch URL as plain text context | +| `/browse ` | Open URL with browser-like view (title, TOC, links) | +| `/image ` | Attach an image to the next message (vision models) | | `/git ` | Run git command | | `/paste` | Paste clipboard content | | `/ls [path]` | List directory files | @@ -248,5 +304,6 @@ def print_help(self) -> None: - Press **Ctrl+C** to cancel current input - Press **Ctrl+D** to exit - Use `/add *.py` to add multiple files with glob patterns +- Use `/image` before your question to send images to vision-capable models """ self.console.print(Markdown(help_md)) diff --git a/qgo/utils/web_scraper.py b/qgo/utils/web_scraper.py index dc5f3fb..11248ee 100644 --- a/qgo/utils/web_scraper.py +++ b/qgo/utils/web_scraper.py @@ -95,7 +95,71 @@ def fetch_url(url: str, timeout: int = 15) -> str: return f"[Error fetching {url}: {exc}]" -def _fetch_plain(url: str, timeout: int = 15) -> str: +def fetch_page_info(url: str, timeout: int = 15) -> dict: + """Fetch a web page and return structured info for browser-view display. + + Returns a dict with: url, title, description, status_code, headings, links, content. + """ + result: dict = { + "url": url, + "title": "", + "description": "", + "status_code": 0, + "headings": [], + "links": [], + "content": "", + } + try: + import requests + from bs4 import BeautifulSoup + + headers = { + "User-Agent": ( + "Mozilla/5.0 (compatible; QGo/0.1; +https://github.com/Rahulchaube1/QGo)" + ) + } + response = requests.get(url, headers=headers, timeout=timeout) + response.raise_for_status() + result["status_code"] = response.status_code + + soup = BeautifulSoup(response.text, "html.parser") + + # Title + if soup.title: + result["title"] = soup.title.get_text(strip=True) + + # Meta description + meta = soup.find("meta", attrs={"name": "description"}) + if meta and isinstance(meta, object) and hasattr(meta, "get"): + result["description"] = meta.get("content", "") # type: ignore[union-attr] + + # Headings (h1–h3, max 15) + headings: list[tuple[int, str]] = [] + for tag in soup.find_all(["h1", "h2", "h3"]): + text = tag.get_text(strip=True) + if text: + headings.append((int(tag.name[1]), text)) + result["headings"] = headings[:15] + + # Links (max 20) + links: list[tuple[str, str]] = [] + for a in soup.find_all("a", href=True)[:30]: + text = a.get_text(strip=True) + href = a["href"] + if text and href and not href.startswith("#") and len(links) < 20: + links.append((text[:60], href)) + result["links"] = links + + # Readable content (reuse existing fetch_url) + result["content"] = fetch_url(url, timeout) + + except Exception as exc: + result["content"] = f"[Error loading {url}: {exc}]" + + return result + + + """Minimal fallback using only urllib (no requests/bs4).""" try: import urllib.request