From 7e66350d8b3080e12cb5dc8dc62bb41e4394044c Mon Sep 17 00:00:00 2001 From: Jay Sahnan Date: Fri, 24 Apr 2026 07:39:26 +0100 Subject: [PATCH 01/23] feat: add competitor-analysis skill --- skills/competitor-analysis/.gitignore | 2 + skills/competitor-analysis/SKILL.md | 254 ++++++ .../competitor-analysis/profiles/example.json | 12 + .../references/example-research.md | 111 +++ .../references/report-template.html | 105 +++ .../references/research-patterns.md | 208 +++++ .../references/workflow.md | 384 +++++++++ .../scripts/capture_screenshots.mjs | 146 ++++ .../scripts/compile_report.mjs | 745 ++++++++++++++++++ .../scripts/extract_vs_names.mjs | 112 +++ .../scripts/gate_candidates.mjs | 178 +++++ .../competitor-analysis/scripts/list_urls.mjs | 83 ++ .../scripts/merge_partials.mjs | 202 +++++ .../competitor-analysis/scripts/package.json | 6 + 14 files changed, 2548 insertions(+) create mode 100644 skills/competitor-analysis/.gitignore create mode 100644 skills/competitor-analysis/SKILL.md create mode 100644 skills/competitor-analysis/profiles/example.json create mode 100644 skills/competitor-analysis/references/example-research.md create mode 100644 skills/competitor-analysis/references/report-template.html create mode 100644 skills/competitor-analysis/references/research-patterns.md create mode 100644 skills/competitor-analysis/references/workflow.md create mode 100644 skills/competitor-analysis/scripts/capture_screenshots.mjs create mode 100644 skills/competitor-analysis/scripts/compile_report.mjs create mode 100644 skills/competitor-analysis/scripts/extract_vs_names.mjs create mode 100644 skills/competitor-analysis/scripts/gate_candidates.mjs create mode 100644 skills/competitor-analysis/scripts/list_urls.mjs create mode 100644 skills/competitor-analysis/scripts/merge_partials.mjs create mode 100644 skills/competitor-analysis/scripts/package.json diff --git a/skills/competitor-analysis/.gitignore b/skills/competitor-analysis/.gitignore new file mode 100644 index 0000000..d4fcb2d --- /dev/null +++ b/skills/competitor-analysis/.gitignore @@ -0,0 +1,2 @@ +profiles/*.json +!profiles/example.json diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md new file mode 100644 index 0000000..403781b --- /dev/null +++ b/skills/competitor-analysis/SKILL.md @@ -0,0 +1,254 @@ +--- +name: competitor-analysis +description: | + Competitor research and intelligence skill. Takes a user's company (with optional + seed competitor URLs), auto-discovers additional competitors via Browserbase Search API, + deeply researches each using a 4-lane pattern (marketing surface, external signal, + public benchmarks, strategic diff vs the user's company), and compiles the results + into an HTML report with four views: overview, per-competitor deep dive, side-by-side + feature/pricing matrix, and a chronological mentions feed (benchmarks, comparison + pages, news, Reddit, HN, LinkedIn posts, YouTube videos, reviews). + Use when the user wants to: (1) analyze competitors, (2) build a competitive matrix, + (3) extract competitor pricing / features, (4) find comparison pages and online + mentions of competitors, (5) surface public benchmarks. Triggers: "competitor analysis", + "analyze competitors", "competitive intel", "competitor research", "competitor pricing", + "feature comparison", "price comparison", "find comparisons", "who's comparing us", + "competitor mentions", "competitor benchmarks". +license: MIT +compatibility: Requires bb CLI (@browserbasehq/cli) and BROWSERBASE_API_KEY env var +allowed-tools: Bash Agent AskUserQuestion +metadata: + author: browserbase + version: "0.1.0" +--- + +# Competitor Analysis + +Analyze a user's competitors. Uses Browserbase Search API for discovery and a 4-lane Plan→Research→Synthesize pattern for enrichment — outputting an HTML report with overview, per-competitor deep dives, a side-by-side feature/pricing matrix, and a chronological mentions feed. + +**Required**: `BROWSERBASE_API_KEY` env var and `bb` CLI installed. + +**First-run setup**: On the first run you'll be prompted to approve `bb fetch`, `bb search`, `cat`, `mkdir`, `sed`, etc. Select **"Yes, and don't ask again for: bb fetch:\*"** (or equivalent) for each. To permanently approve, add these to your `~/.claude/settings.json` under `permissions.allow`: +```json +"Bash(bb:*)", "Bash(bunx:*)", "Bash(bun:*)", "Bash(node:*)", +"Bash(cat:*)", "Bash(mkdir:*)", "Bash(sed:*)", "Bash(head:*)", "Bash(tr:*)", "Bash(rm:*)" +``` + +**Path rules**: Always use full literal paths in Bash — NOT `~` or `$HOME`. Resolve the home directory once and use it everywhere. When building subagent prompts, replace `{SKILL_DIR}` with the full literal path. + +**Output directory**: All output goes to `~/Desktop/{company_slug}_competitors_{YYYY-MM-DD}/`. This directory contains one `.md` file per competitor plus the generated HTML views and CSV. + +**CRITICAL — Tool restrictions (applies to main agent AND all subagents)**: +- All web searches: use `bb search`. NEVER WebSearch. +- All page fetches: use `bb fetch --allow-redirects`. NEVER WebFetch. Pipe through `sed ... | tr -s ' \n'` to extract text. 1 MB response limit — fall back to `bb browse` for JS-heavy pages. +- All research output: subagents write **one markdown file per competitor** to `{OUTPUT_DIR}/{competitor-slug}.md` using bash heredoc. NEVER use the Write tool or `python3 -c`. See `references/example-research.md` for the file format. +- Report compilation: use `node {SKILL_DIR}/scripts/compile_report.mjs {OUTPUT_DIR} --user-company "{user_company}" --open` — generates `index.html`, `competitors/*.html`, `matrix.html`, `mentions.html`, `results.csv` in one step and opens overview. +- URL deduplication: `node {SKILL_DIR}/scripts/list_urls.mjs /tmp --prefix competitor`. +- **Subagents must use ONLY the Bash tool.** +- **Main agent NEVER reads raw discovery JSON batch files.** + +**CRITICAL — Minimize permission prompts**: +- Subagents MUST batch ALL file writes into a SINGLE Bash call using chained heredocs. +- Batch ALL searches and ALL fetches into single Bash calls via `&&` chaining. + +## Pipeline Overview + +Follow these 7 steps in order. Do not skip or reorder. + +1. **User Company Research** — Deeply understand the user's company, produce `precise_category` + `category_include_keywords` + `exclusion_list` +2. **Depth Mode + Seed Input** — Choose depth, accept optional seed competitor URLs +3. **Discovery (3 parallel waves)** — Wave A (alternatives), Wave B (precise category), Wave C (comparison-page graph via "X vs Y" title parsing) +4. **Gate** — `scripts/gate_candidates.mjs` bb-fetches each candidate's hero text and drops wrong-category URLs +5. **Deep Enrichment (5 subagents per competitor in deep/deeper modes)** — Marketing, Discussion, Social, News, Technical — each lane a separate subagent writing to `partials/`; then `merge_partials.mjs` consolidates +6. **Screenshots** — `capture_screenshots.mjs` via the `browse` CLI captures homepage hero + full-page pricing for each competitor +7. **HTML Report** — Overview + per-competitor (with embedded screenshots) + matrix + mentions views + +--- + +## Step 0: Setup Output Directory + +```bash +OUTPUT_DIR=~/Desktop/{company_slug}_competitors_{YYYY-MM-DD} +mkdir -p "$OUTPUT_DIR" +``` + +Replace `{company_slug}` with the user's company name (lowercase, hyphenated) and `{YYYY-MM-DD}` with today's date. Pass `{OUTPUT_DIR}` as a full literal path to every subagent. + +Clean up discovery batch files from prior runs: +```bash +rm -f /tmp/competitor_discovery_batch_*.json +``` + +## Step 1: User Company Research + +This step sets the baseline for what "competitor" means. + +1. Ask the user for their company name or URL. + +2. **Check for an existing profile** at `{SKILL_DIR}/profiles/{company-slug}.json`. If it exists, load it and confirm with the user: "I have your profile from {researched_at}. Still accurate?" — if yes, skip to Step 2. + The profile format is shared with `company-research` (same shape). If a user already has a profile saved under `company-research/profiles/`, you may copy it into this skill's profiles directory rather than re-researching. + +3. **No profile exists** → run the self-research flow. See `references/research-patterns.md` → "Self-Research" for sub-questions and page-discovery rules. + +4. Synthesize into a profile: Company, Product, Existing Customers, Competitors (seed list), Use Cases, **precise_category**, **category_include_keywords**, **exclusion_list**. Do NOT include ICP — this skill doesn't need it. + - `precise_category`: one sentence describing the category. e.g., "cloud headless browser infrastructure for AI agents with CDP". Avoid vague words like "tools" / "platform". + - `category_include_keywords`: 8-15 phrases a direct competitor's marketing would likely contain (hero or title). Include semantic variants. + - `exclusion_list`: phrases that indicate a *different* category — used by the gate to reject false positives (e.g. `antidetect browser`, `scraping api`, `screenshot api`, `residential proxy`). + See `references/research-patterns.md` → "Synthesis Output" for the exact format and Browserbase as a worked example. + +5. Present the profile to the user. Do not proceed until confirmed. + +6. **Save the confirmed profile** to `{SKILL_DIR}/profiles/{company-slug}.json`. + +## Step 2: Depth Mode + Seed Input + +Ask clarifying questions via `AskUserQuestion` with checkboxes: +- **Known competitors?** Text area for URLs/names (optional — discovery will find more). +- **Depth mode?** + - `quick` — marketing surface only, many competitors, ~2-3 tool calls each + - `deep` — + external signal (mentions, reviews, news), ~5-8 tool calls each + - `deeper` — + public benchmarks + strategic diff vs user's company, ~10-15 tool calls each +- **Target count?** Rough number of competitors to research (e.g., 10 / 20 / 50). + +This is the ONLY user interaction. After this, execute silently until the report is ready. + +| Mode | Research per competitor | Best for | +|------|--------------------------|----------| +| `quick` | Lane 1 only (homepage + pricing) | Scanning ~30-50 competitors fast | +| `deep` | Lanes 1+2 | ~15-25 competitors with external signal | +| `deeper` | All 4 lanes (+ benchmarks + strategic diff) | ~5-15 competitors with full intel | + +## Step 3: Discovery (3 parallel waves) + +**Formula**: `ceil(target_count / 20)` queries per wave. Over-discover ~3x because the gate drops ~40-60%. + +Evaluation on Browserbase shows all three waves are additive — skip any and you lose real competitors: + +**Wave A — Generic alternatives** (broad; heavy aggregator noise, filtered out later) +- `"alternatives to {user_company}"` +- `"{user_company} competitors"` + +**Wave B — Precise category** (uses `precise_category` from the profile) +- `"{precise_category}"` verbatim +- 2-3 queries composed from the most distinctive tokens (e.g. `"cloud browser for ai agents"`, `"browser infrastructure API"`) + +**Wave C — Comparison-page graph** (highest precision) +- `"{user_company} vs"` +- `"{seed1} vs"`, `"{seed2} vs"`, `"{seed3} vs"` (seeds from the profile's `competitors` list) +- After the searches, run `scripts/extract_vs_names.mjs` to parse `"X vs Y"` patterns from result titles — this uniquely surfaces competitors that don't appear as URL hits. + +**Process**: +1. Launch discovery subagents in a single message (up to ~6), split across the three waves. Each subagent runs its queries in ONE Bash call: + ```bash + bb search "{query}" --num-results 25 --output /tmp/competitor_discovery_batch_{N}.json + ``` +2. After all waves complete: + ```bash + node {SKILL_DIR}/scripts/list_urls.mjs /tmp --prefix competitor > /tmp/competitor_urls.txt + node {SKILL_DIR}/scripts/extract_vs_names.mjs /tmp --prefix competitor \ + --seed "{user_company},{seed1},{seed2},{seed3}" \ + > /tmp/competitor_vs_names.jsonl + ``` +3. **Filter** `/tmp/competitor_urls.txt` — remove blog posts, news, AI-tool directories (seektool.ai, respan.ai, agentsindex.ai, toolradar.com, aitoolsatlas.ai, vibecodedthis.com, etc.), review aggregators (g2.com, capterra.com), databases (crunchbase.com, tracxn.com), user's own domain. See `references/workflow.md` for the full noise-domain list. +4. For `vs_names` entries that have a resolved `domain`, add them. For unresolved names, optionally run `bb search "{name}" --num-results 3` and pick the top root domain. +5. Merge with user-provided seed URLs. Dedup by hostname → `/tmp/competitor_candidates.txt`. + +## Step 4: Gate (category-fit filter) + +Drop candidates whose marketing identifies them as a *different* category before enrichment burns tool calls on them. + +```bash +cat /tmp/competitor_candidates.txt \ + | node {SKILL_DIR}/scripts/gate_candidates.mjs \ + --include "{profile.category_include_keywords joined with commas}" \ + --exclude "{profile.exclusion_list joined with commas}" \ + --concurrency 6 \ + > /tmp/competitor_gated.jsonl + +grep '"status":"PASS"' /tmp/competitor_gated.jsonl \ + | node -e 'require("fs").readFileSync(0,"utf-8").split("\n").filter(Boolean).forEach(l => { try { console.log(JSON.parse(l).url); } catch {} })' \ + > /tmp/competitor_passed.txt +``` + +The gate fetches each candidate's homepage via `bb fetch --allow-redirects`, extracts the first 800 chars of visible text, and classifies position-aware: exclude in `` → REJECT; include in `<title>` → PASS; hybrid title → hero200 tiebreak; otherwise fall through. + +**Review the PASS/REJECT split** in `/tmp/competitor_gated.jsonl`. Spot-check for miscategorizations. If a known direct competitor was REJECTED because their marketing straddles categories (e.g. browser + scraping), manually add their URL to `/tmp/competitor_passed.txt`. + +**Evaluated on Browserbase** with 12 mixed candidates: 7/7 real competitors passed, 4/4 wrong-category rejected, 1 known-hybrid edge case rejected. + +## Step 5: Deep Enrichment + +Two modes. See `references/workflow.md` for prompt templates and wave management. See `references/research-patterns.md` for the lane-by-lane methodology. + +### Quick mode — single subagent per batch +- Input: `/tmp/competitor_passed.txt` (gate survivors), ~8 competitors per subagent. +- One subagent runs Lane A only (marketing surface). 2-3 tool calls each. +- Writes directly to `{OUTPUT_DIR}/{slug}.md`. + +### Deep / Deeper mode — 5 subagents PER competitor (parallel lane fan-out) +For each competitor, launch 5 parallel subagents, one per lane: +- **A. Marketing** (`marketing`): pricing, features, positioning, integrations, customers, team, funding, HQ. Owns canonical frontmatter. +- **B. Discussion** (`discussion`): Reddit, HN, forums, Dev.to, Hashnode. Broad queries beyond `site:` — also `"{competitor}" review 2026`, `"{competitor}" issues OR problems`, `"{competitor}" discussion`. +- **C. Social** (`social`): LinkedIn posts, YouTube videos, Twitter/X. Snippets only — do NOT fetch. +- **D. News & Comparisons** (`news`): TechCrunch, Verge, VentureBeat, Forbes, Businesswire, Substack, blog reviews. Every mention needs a date. +- **E. Technical & Benchmarks** (`technical`): GitHub benchmark repos/PRs, performance posts. Writes Benchmarks + technical Findings. + +Budget per lane: deep = 5-8 tool calls, deeper = 10-15. +Launch all 5 lane-subagents for ONE competitor in a single Agent tool call set (5 parallel). Across 5 competitors = 5 messages. + +Each subagent writes a partial to `{OUTPUT_DIR}/partials/{slug}.{lane}.md`. + +**Critical**: Pass the user's company name, product, and key features verbatim into every subagent prompt so the technical lane can do strategic diffing. Pass the full literal `{OUTPUT_DIR}` path to every subagent. + +### Merge partials → canonical per-competitor file +After all subagents for all competitors complete: +```bash +node {SKILL_DIR}/scripts/merge_partials.mjs {OUTPUT_DIR} +``` +Unions the 5 partials per competitor into one `{OUTPUT_DIR}/{slug}.md` — dedup'd Mentions (sorted by date desc), dedup'd Benchmarks, merged Findings, canonical frontmatter from the marketing lane. + +## Step 6: Screenshots + +Capture homepage hero + full-page pricing screenshots for each competitor: +```bash +node {SKILL_DIR}/scripts/capture_screenshots.mjs {OUTPUT_DIR} --env remote +``` + +Uses the `browse` CLI (`npm install -g @browserbasehq/browse-cli`) against a Browserbase remote session. Writes PNGs to `{OUTPUT_DIR}/screenshots/{slug}-{hero,pricing}.png`. The compile step in Step 7 auto-embeds them on each per-competitor HTML page. + +Cost: ~15-20s per competitor. ~90s for 5 competitors. + +## Step 7: HTML Report + +1. **Generate all views + CSV** (opens overview in browser): + ```bash + node {SKILL_DIR}/scripts/compile_report.mjs {OUTPUT_DIR} --user-company "{user_company}" --open + ``` + Produces: + - `{OUTPUT_DIR}/index.html` — overview: competitor table with tagline, pricing summary, key features, strategic diff + - `{OUTPUT_DIR}/competitors/{slug}.html` — per-competitor deep dive (all sections) + - `{OUTPUT_DIR}/matrix.html` — side-by-side feature/pricing matrix + - `{OUTPUT_DIR}/mentions.html` — chronological feed with source-type pills + client-side filter + - `{OUTPUT_DIR}/results.csv` — flat spreadsheet + +2. **Present a chat summary**: + +``` +## Competitor Analysis Complete + +- **Competitors researched**: {count} +- **Depth mode**: {mode} +- **Mentions collected**: {total mentions} across {source types count} source types +- **Public benchmarks found**: {count} +- **Opened in browser**: ~/Desktop/{company_slug}_competitors_{date}/index.html +``` + +3. Show the **overview table** in chat: + +``` +| Competitor | Positioning | Pricing | Key Features | Strategic Diff | +|------------|-------------|---------|--------------|----------------| +| Rival Co | AI-native headless browser | $99/mo entry | stealth, proxies, CAPTCHA | Similar infra; cheaper entry | +``` + +4. Call out the top 3-5 most interesting findings — e.g., "3 competitors have public benchmarks; Rival Co is cheapest; Foo Inc launched a session-replay feature 2 weeks ago." Offer to dig deeper into any specific competitor or re-run with different depth. diff --git a/skills/competitor-analysis/profiles/example.json b/skills/competitor-analysis/profiles/example.json new file mode 100644 index 0000000..f1d7203 --- /dev/null +++ b/skills/competitor-analysis/profiles/example.json @@ -0,0 +1,12 @@ +{ + "company": "", + "website": "", + "product": "", + "existing_customers": [], + "competitors": [], + "use_cases": [], + "precise_category": "", + "category_include_keywords": [], + "exclusion_list": [], + "researched_at": "" +} diff --git a/skills/competitor-analysis/references/example-research.md b/skills/competitor-analysis/references/example-research.md new file mode 100644 index 0000000..d40b8c9 --- /dev/null +++ b/skills/competitor-analysis/references/example-research.md @@ -0,0 +1,111 @@ +# Example Competitor Research File + +Each enrichment subagent writes one markdown file per competitor to `{OUTPUT_DIR}/{competitor-slug}.md`, where `{OUTPUT_DIR}` is the per-run Desktop directory set up by the main agent in Step 0 (e.g., `~/Desktop/acme_competitors_2026-04-23/`). The YAML frontmatter contains structured fields for report/matrix compilation. The body contains per-section research plus aggregated mentions and benchmarks. + +## Template + +```markdown +--- +competitor_name: Rival Co +website: https://rivalco.com +tagline: The fastest way to ship browser agents +positioning: Developer-first headless browser API +product_description: Cloud-hosted headless browser infrastructure for AI agents and scrapers +target_customer: AI engineers, scraping teams, SaaS companies +pricing_model: Usage-based + seat tiers +pricing_tiers: Free (100 min) | Pro $99/mo | Scale $499/mo | Enterprise Contact +key_features: stealth proxy | session replay | CAPTCHA solving | CDP protocol | Playwright driver +integrations: Playwright | Puppeteer | Stagehand | LangChain +headquarters: San Francisco, CA +founded: 2023 +employee_estimate: 11-50 +funding_info: Seed, $5M (2024) +strategic_diff: Similar infra; weaker in stealth, but cheaper entry tier +--- + +## Product +Cloud-hosted headless browser infrastructure. Exposes CDP-compatible sessions with +built-in stealth, proxies, and CAPTCHA solving. Positioned at AI agents and scraping teams. + +## Pricing +- Free: 100 browser minutes/month, 1 concurrent session +- Pro ($99/mo): 10K minutes, 5 concurrent, basic proxies +- Scale ($499/mo): 100K minutes, 50 concurrent, residential proxies, session replay +- Enterprise: custom pricing, SSO, dedicated support + +## Features +- Stealth mode with fingerprint rotation +- Residential proxy pool (180+ countries) +- Auto-CAPTCHA solving +- Session replay / video recording +- CDP-compatible WebSocket API +- Playwright, Puppeteer, Selenium drivers + +## Positioning +Marketing emphasizes "AI-native" and developer-first DX. Landing page hero: +"Give your agents a browser." Targets solo devs through mid-market AI teams. + +## Comparison vs {user_company} +- **Overlaps**: Headless browser cloud, CDP API, Playwright driver, proxy support +- **Gaps**: No session inspector UI, no Stagehand-equivalent high-level library, weaker stealth benchmarks +- **Where they win**: Lower entry price ($99 vs $199), simpler pricing tiers +- **Where you win**: Stronger stealth (per public benchmarks), better observability, larger integration ecosystem + +## Mentions +- **[Benchmark]** computesdk/benchmarks PR #92 — Rival Co 73% pass rate on stealth tests (source: https://github.com/computesdk/benchmarks/pull/92, 2026-03-14) +- **[Comparison]** Browserbase vs Rival Co — side-by-side review (source: https://example.com/browserbase-vs-rivalco, 2026-02-01) +- **[Reddit]** r/webscraping thread: "Moved from Rival Co to X after CAPTCHA issues" — 24 upvotes (source: https://reddit.com/r/webscraping/comments/abc123) +- **[HN]** "Show HN: Rival Co raises seed to build..." — 112 points, 48 comments (source: https://news.ycombinator.com/item?id=12345) +- **[LinkedIn]** CEO post on product launch — 412 reactions (source: https://linkedin.com/posts/rivalco-launch) +- **[YouTube]** "Rival Co vs Browserbase" review by Dev YouTuber — 8.2K views (source: https://youtube.com/watch?v=xyz) +- **[News]** TechCrunch coverage of seed round (source: https://techcrunch.com/2024/11/rival-co-seed) +- **[Review]** G2 4.3/5 (31 reviews), main complaint: flaky sessions (source: https://g2.com/products/rival-co) + +## Benchmarks +- **computesdk/benchmarks PR #92** — Rival Co 73% pass rate on stealth, 4th of 7 tested (https://github.com/computesdk/benchmarks/pull/92) +- **headless-bench blog** — Rival Co 1.8s cold start, 2nd fastest (https://example.com/headless-bench-2026) + +## Research Findings +- **[high]** Usage-based pricing starts at $99/mo for 10K minutes (source: rivalco.com/pricing) +- **[high]** Series seed, $5M raised Nov 2024 (source: TechCrunch) +- **[medium]** CEO LinkedIn emphasizes AI-agent use cases (source: linkedin.com/in/rivalco-ceo) +- **[low]** Possibly a team under 20 based on careers page (source: rivalco.com/careers) +``` + +## Field Rules + +- **YAML frontmatter**: All structured fields go here. Extracted for matrix + CSV compilation. +- **`pricing_tiers`**: Pipe-separated (`|`) with tier name + short price. `compile_report.mjs` parses on `|` for the matrix view. +- **`key_features`**, **`integrations`**: Pipe-separated lists. +- **`strategic_diff`**: One-line summary (shown in overview table). +- **Body sections**: `## Product`, `## Pricing`, `## Features`, `## Positioning`, `## Comparison vs {user_company}`, `## Mentions`, `## Benchmarks`, `## Research Findings`. +- **Mentions format**: `- **[SourceType]** title | snippet (source: url, date)` — `SourceType` is one of `Benchmark`, `Comparison`, `News`, `Reddit`, `HN`, `LinkedIn`, `YouTube`, `Review`, `Podcast`, `X`. +- **Findings format**: `- **[confidence]** fact (source: url)` — `confidence` is `high`, `medium`, or `low`. +- **Filename**: `{OUTPUT_DIR}/{competitor-slug}.md` where slug is lowercase, hyphenated. + +## Writing via Bash Heredoc + +Subagents write these files using bash heredoc to avoid security prompts. Use the full literal `{OUTPUT_DIR}` path — no `~` or `$HOME`: + +```bash +cat << 'COMPETITOR_MD' > {OUTPUT_DIR}/rival-co.md +--- +competitor_name: Rival Co +website: https://rivalco.com +... +--- + +## Product +... + +## Pricing +... + +## Mentions +- **[Benchmark]** ... +COMPETITOR_MD +``` + +Use `'COMPETITOR_MD'` (quoted) as the delimiter to prevent shell variable expansion. + +**IMPORTANT**: Write ALL competitor files in a SINGLE Bash call using chained heredocs to minimize permission prompts. diff --git a/skills/competitor-analysis/references/report-template.html b/skills/competitor-analysis/references/report-template.html new file mode 100644 index 0000000..4153389 --- /dev/null +++ b/skills/competitor-analysis/references/report-template.html @@ -0,0 +1,105 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<meta charset="UTF-8"> +<meta name="viewport" content="width=device-width, initial-scale=1.0"> +<title>Competitor Analysis — {{TITLE}} + + + + + +
+
+
+

{{TITLE}}

+
{{META}}
+
+ + Powered by Browserbase + + +
+ + + +
+
Competitors
{{TOTAL}}
+
Mentions
{{MENTION_COUNT}}
+
Benchmarks
{{BENCHMARK_COUNT}}
+
With Pricing
{{WITH_PRICING}}
+
+ + + + + + + + + + + + + {{TABLE_ROWS}} + +
CompetitorPositioningPricingKey FeaturesStrategic Diff
+
+ + + + diff --git a/skills/competitor-analysis/references/research-patterns.md b/skills/competitor-analysis/references/research-patterns.md new file mode 100644 index 0000000..ed1ad3a --- /dev/null +++ b/skills/competitor-analysis/references/research-patterns.md @@ -0,0 +1,208 @@ +# Competitor Analysis — Research Patterns + +## Overview + +Two research contexts: +1. **Self-Research** (Step 1) — Deep research on the user's company so we know what "competitor" means for this run. +2. **Competitor Research** (Step 4) — For each discovered/seeded competitor, run the 4-lane enrichment below. + +Both use the Plan → Research → Synthesize pattern. Self-research is identical in shape to the one in `company-research`, so profiles can be reused across skills. + +## Self-Research (User's Company) + +### Sub-Questions +- "What does {company} sell and what specific problem does it solve?" +- "Who are {company}'s existing customers? What industries, company sizes, use cases?" +- "Who are {company}'s known competitors? What category do they compete in?" +- "What pricing model does {company} use?" +- "What features, integrations, and differentiators does {company}'s marketing emphasize?" + +### Page Discovery +Dynamic via sitemap — do NOT hardcode `/about` or `/pricing`: +1. `bb fetch --allow-redirects "{company website}/sitemap.xml"` — primary source +2. Scan for URLs with keywords: `pricing`, `customer`, `compare`, `vs`, `about`, `features`, `integrations` +3. Optionally fetch `/llms.txt` for page descriptions +4. Pick 3-5 most relevant URLs + +### External Research +- `bb search "{company} alternatives competitors vs"` +- `bb search "{company} review comparison"` +- Fetch 1-2 most informative third-party pages + +### Synthesis Output +Produce a profile with: +- **Company**, **Product**, **Existing Customers**, **Competitors** (seed list), **Use Cases** +- **precise_category** — one clear sentence that describes what category this product competes in. Avoid fuzzy words like "tools" or "platform". Good: "cloud headless browser infrastructure for AI agents exposing CDP". Bad: "browser automation tools". This becomes the anchor for discovery queries and the gate. +- **category_include_keywords** — 8-15 phrases that a *direct competitor's* marketing would very likely contain (title or hero). Include semantic variants. e.g. for Browserbase: `cloud browser`, `headless browser`, `browser infrastructure`, `browser infra`, `browser api`, `infra for ai agents`, `browser for agents`, `managed chromium`, `cdp`, `remote browser`, `infrastructure for computer use`, `agents and automations`. +- **exclusion_list** — phrases that indicate a *different* category, used by the gate to reject false positives. e.g. `antidetect browser`, `multilogin`, `scraping api`, `web scraping api`, `screenshot api`, `residential proxy`, `proxy rotation`, `open-source ai browser` (end-user local browsers, not cloud infra), `privacy-first browser`. + +The same `profiles/{company-slug}.json` shape used by `company-research`, extended with the three new fields. The `competitors` array becomes the seed list and the first inputs to the comparison-graph expansion in Step 3. + +--- + +## Competitor Research — 4 Research Lanes + +For each competitor, run these four lanes (depth-gated): + +### Lane 1 — Marketing Surface (ALL depth modes) +Goal: extract what the competitor says about themselves from their own site. + +**Sub-questions**: +- "What does {competitor} sell, who is it for, and how is it positioned?" +- "What are {competitor}'s pricing tiers and pricing model?" +- "What key features, integrations, and platforms does {competitor} list?" + +**Pages to fetch** (via sitemap discovery — do NOT hardcode): +1. Homepage +2. `/pricing` (or equivalent from sitemap) +3. `/features`, `/product`, `/platform`, `/solutions` +4. `/integrations`, `/customers`, `/case-studies` + +**Extract into frontmatter fields**: `tagline`, `positioning`, `product_description`, `target_customer`, `pricing_model`, `pricing_tiers`, `key_features`, `integrations`. + +### Lane 2 — External Signal (deep + deeper) +Goal: what the rest of the internet says about them. + +**Sub-questions**: +- "What third-party comparison pages mention {competitor}?" +- "What do users say on Reddit, HN, G2, Capterra?" +- "What recent news, launches, or announcements?" +- "Who is talking about them on LinkedIn or YouTube?" + +**Search queries**: +``` +"{competitor} vs" +"{competitor} alternatives" +"{competitor} review" +"{competitor} G2" / "{competitor} Capterra" +"site:reddit.com {competitor}" +"site:news.ycombinator.com {competitor}" +"site:linkedin.com/posts {competitor}" +"site:youtube.com {competitor}" +"{competitor} launch 2025 OR 2026" +"{competitor} funding announcement" +``` + +**Extraction rule**: From search results, harvest each hit as a `Mentions` entry. Classify source type from the URL: +- `reddit.com` → `Reddit` +- `news.ycombinator.com` → `HN` +- `linkedin.com` → `LinkedIn` +- `youtube.com` / `youtu.be` → `YouTube` +- `g2.com` / `capterra.com` / `trustradius.com` → `Review` +- `*vs*` in path or title → `Comparison` +- news domains (techcrunch, theverge, venturebeat, forbes, businesswire, globenewswire) → `News` +- `twitter.com` / `x.com` → `X` +- `spotify.com/episode` / transistor/simplecast → `Podcast` + +For LinkedIn and YouTube, the snippet + URL from `bb search` is enough. Do NOT try to deep-fetch individual LinkedIn posts (auth walls) — list them with title/snippet. + +### Lane 3 — Public Benchmarks (deeper only) +Goal: find third-party benchmarks that measured this competitor's product. + +**Sub-questions**: +- "Has {competitor} been included in any public benchmark?" +- "Are there GitHub repos, PRs, or blog posts comparing {competitor} head-to-head on a measured axis (speed, accuracy, cost, pass rate)?" + +**Search queries**: +``` +"{competitor} benchmark" +"{competitor} performance test" +"site:github.com {competitor} benchmark" +"site:github.com {competitor} vs" +"{competitor} vs {seed_competitor} benchmark" # pairwise, use another known competitor as the seed +"{category} benchmark {competitor}" # e.g. "headless browser benchmark {competitor}" +``` + +**Extraction**: Add each hit to `Benchmarks` section with: title, source, URL, key finding (one line). Also mirror into `Mentions` with type `Benchmark`. + +**Known benchmark repos to check directly** (if domain is on-topic): +- `github.com/computesdk/benchmarks` +- Category-specific benchmark repos discovered via the first search wave + +### Lane 4 — Strategic Diff vs User's Company (deeper only) +Goal: explicitly compare this competitor to the user's company. + +**Inputs**: `{user_company_profile}` (from Step 1) — specifically `product`, `use_cases`, `key_features` if available. + +**Sub-questions**: +- "What features does {competitor} have that {user_company} does not?" +- "What features does {user_company} have that {competitor} does not?" +- "Who does {competitor} serve that {user_company} does not (and vice versa)?" +- "Where does each one win on the marketing surface (price, feature depth, DX, ecosystem)?" + +**No new fetches required** for this lane — it's a synthesis step over Lane 1 + 2 + 3 findings plus the user's profile. Write as: + +```markdown +## Comparison vs {user_company} +- **Overlaps**: ... +- **Gaps**: ... +- **Where they win**: ... +- **Where you win**: ... +``` + +Also populate the `strategic_diff` frontmatter field with a one-line summary for the overview table. + +--- + +## Depth Mode Behavior + +### Quick Mode (~lots of competitors, cheap) +- **Lanes**: 1 only +- **Budget**: 2-3 tool calls per competitor (homepage + pricing page) +- **Fields populated**: tagline, product_description, pricing_tiers, key_features +- **Mentions / Benchmarks / Comparison**: skipped + +### Deep Mode (balanced, default) +- **Lanes**: 1 + 2 +- **Budget**: 5-8 tool calls per competitor +- **Everything in quick** + 5-10 mentions across source types + +### Deeper Mode (full intel) +- **Lanes**: 1 + 2 + 3 + 4 +- **Budget**: 10-15 tool calls per competitor +- **Everything in deep** + benchmarks section + strategic diff section + +--- + +## Finding Format (per lane) + +Every finding is a factual statement tied to a source: + +```json +{ + "lane": "marketing | external | benchmark | strategic", + "fact": "Rival Co charges $99/mo for 10K browser minutes", + "sourceUrl": "https://rivalco.com/pricing", + "confidence": "high" +} +``` + +**Confidence**: +- `high`: Directly stated on the competitor's own website or official press +- `medium`: Inferred from third-party articles, reviews, or job posts +- `low`: Speculative / outdated sources + +## Research Loop Rules + +1. **Lane 1 first** — always start with the competitor's own site +2. **Use sitemap, not hardcoded paths** — `/pricing` might be `/plans` or `/pricing-plans` +3. **Rephrase, don't retry** — if a search returns generic junk, switch keywords +4. **Fetch selectively** — pick the 1-2 most promising URLs per query +5. **For LinkedIn/YouTube: search only, don't fetch** — snippet is enough, avoid auth walls +6. **Respect step budget** per depth mode +7. **Deduplicate mentions** — same URL should only appear once in `## Mentions` + +## Synthesis Instructions + +After the research loop completes for a competitor: + +1. Fill frontmatter fields from Lane 1 findings +2. Write body sections: Product, Pricing, Features, Positioning (all from Lane 1) +3. Append `## Mentions` from Lane 2 classified hits +4. Append `## Benchmarks` from Lane 3 (deeper only) +5. Append `## Comparison vs {user_company}` from Lane 4 synthesis (deeper only) +6. Append `## Research Findings` as a raw-findings appendix with confidence tags + +No ICP score. No threat score. Pure intel. + +If a field has no supporting findings, leave it empty rather than guessing. diff --git a/skills/competitor-analysis/references/workflow.md b/skills/competitor-analysis/references/workflow.md new file mode 100644 index 0000000..7ba4dcd --- /dev/null +++ b/skills/competitor-analysis/references/workflow.md @@ -0,0 +1,384 @@ +# Competitor Analysis — Workflow Reference + +## Discovery Batch JSON Schema + +File: `/tmp/competitor_discovery_batch_{N}.json` + +`bb search --output` writes a JSON object: + +```json +{ + "requestId": "abc123", + "query": "alternatives to acme", + "results": [ + { "url": "https://example.com", "title": "Example Corp", "author": null, "publishedDate": null } + ] +} +``` + +The `list_urls.mjs` script (run with `--prefix competitor`) deduplicates across batches. + +## Competitor Research Markdown Format + +File: `{OUTPUT_DIR}/{competitor-slug}.md` — see `references/example-research.md` for the full template. + +**YAML frontmatter fields** (used by `compile_report.mjs`): +- `competitor_name` (required) +- `website` (required) +- `tagline` +- `positioning` +- `product_description` +- `target_customer` +- `pricing_model` +- `pricing_tiers` (pipe-separated: `Free | Pro $99 | Enterprise Contact`) +- `key_features` (pipe-separated) +- `integrations` (pipe-separated) +- `headquarters` +- `founded` +- `employee_estimate` +- `funding_info` +- `strategic_diff` (one-line for overview table; deeper mode only) + +**Body sections** (in this order — `compile_report.mjs` parses by heading): +- `## Product` +- `## Pricing` +- `## Features` +- `## Positioning` +- `## Comparison vs {user_company}` (deeper only) +- `## Mentions` +- `## Benchmarks` (deeper only) +- `## Research Findings` + +**Mentions line format** (parsed into the mentions feed): +``` +- **[SourceType]** Title | Snippet (source: URL, YYYY-MM-DD) +``` +`SourceType` ∈ `Benchmark | Comparison | News | Reddit | HN | LinkedIn | YouTube | Review | Podcast | X`. Date is optional but preferred. + +## Extracting Text from HTML + +`bb fetch --allow-redirects` returns raw HTML. To extract readable text in one pipe: + +```bash +bb fetch --allow-redirects "https://rivalco.com/pricing" | sed 's/]*>.*<\/script>//g; s/]*>.*<\/style>//g; s/<[^>]*>//g; s/&/\&/g; s/<//g; s/ / /g; s/&#[0-9]*;//g' | tr -s ' \n' | head -c 3000 +``` + +Limit to ~3000 chars per page to keep subagent context manageable. For JS-heavy pages (client-rendered pricing tables), use `bb browse` instead of `bb fetch`. + +## Discovery Subagent Prompt Template + +``` +You are a competitor discovery subagent. Run search queries and save results. + +TOOL RULES — CRITICAL, FOLLOW EXACTLY: +1. You may ONLY use the Bash tool. No exceptions. +2. Run ALL searches in a SINGLE Bash call using && chaining. +3. BANNED TOOLS: WebFetch, WebSearch, Write, Read, Glob, Grep — ALL BANNED. +4. NEVER use ~ or $HOME in paths — use full literal paths. + +TASK: +Run ALL of the following searches in ONE Bash command: + +bb search "{query1}" --num-results 25 --output /tmp/competitor_discovery_batch_{N1}.json && \ +bb search "{query2}" --num-results 25 --output /tmp/competitor_discovery_batch_{N2}.json && \ +bb search "{query3}" --num-results 25 --output /tmp/competitor_discovery_batch_{N3}.json && \ +echo "Discovery complete" + +After the command completes, report back ONLY the count of results per batch. +Do NOT analyze, summarize, or return the actual results. +``` + +### Discovery query patterns + +Discovery uses **three parallel waves** (evaluated — all three are additive): + +**Wave A — Generic alternatives** (broad net, lots of noise): +- `"alternatives to {user_company}"` +- `"{user_company} competitors"` + +**Wave B — Precise category queries** (uses `precise_category` from self-research): +- `"{precise_category}"` verbatim +- `"{precise_category_2_3_keywords}"` — pick the 3 most distinctive tokens +- Compose with "API", "cloud", "for agents": `"cloud {primary_noun} for ai agents"`, `"{primary_noun} infrastructure API"` + +**Wave C — Comparison-page graph** (highest-precision single wave): +- `"{user_company} vs"` +- For each seed competitor from the user's profile, also run `"{seed} vs"` +- After the searches, `scripts/extract_vs_names.mjs` parses `"X vs Y"` titles across all Wave C results to surface candidate names that don't appear as URLs. + +**Evaluation result** (tested on Browserbase): Wave A returns ~10% real competitors (mostly AI-tool-listicle aggregators). Wave B returns ~35%. Wave C uniquely surfaces named brands via title parsing that neither A nor B finds. Use all three. + +## Enrichment fan-out — 5 subagents PER competitor (deep/deeper modes) + +For each gated-PASS competitor, launch **five parallel subagents**, one per lane. Each subagent writes a *partial* to `{OUTPUT_DIR}/partials/{slug}.{lane}.md`. After all subagents complete, `scripts/merge_partials.mjs` unions the partials into one canonical `{OUTPUT_DIR}/{slug}.md` per competitor (dedup mentions by URL, sort by date desc). + +The 5 lanes: + +| Lane | Slug | Scope | +|------|------|-------| +| **A. Marketing** | `marketing` | Owns canonical frontmatter. Pricing, features, positioning, integrations, customers, target, team, funding, HQ. Homepage + sitemap-driven page discovery. | +| **B. Discussion** | `discussion` | Reddit, HN, forums, dev.to, hashnode. Broader queries beyond `site:` restrictions — also `"{competitor}" discussion`, `"{competitor}" review 2026`, `"{competitor}" issues OR problems`. Writes Mentions bullets with dates. | +| **C. Social** | `social` | LinkedIn posts, YouTube videos, Twitter/X threads. Search snippets only — do NOT fetch (auth walls). | +| **D. News & Comparisons** | `news` | Comparison pages ("X vs Y"), TechCrunch / Verge / Forbes / VentureBeat / Businesswire, independent blog reviews, Substack. Every mention MUST include a date. | +| **E. Technical & Benchmarks** | `technical` | GitHub benchmark repos/PRs, performance blog posts, independent tests. Writes Benchmarks bullets AND Findings on technical specifics (CDP support, uptime, concurrency limits, SDKs). | + +**Wave management for 5 competitors × 5 lanes = 25 subagents**: launch 5 subagents per competitor in ONE message (all 5 lanes parallel), sequentially per competitor across 5 messages. Or for ≤3 competitors, fit all 15 subagents in 3 messages. + +**Merge step** (once all partials exist): +```bash +node {SKILL_DIR}/scripts/merge_partials.mjs {OUTPUT_DIR} +``` +Produces one `{OUTPUT_DIR}/{slug}.md` per competitor with dedup'd Mentions (sorted date desc), Benchmarks, and Findings. + +## Legacy: Single-subagent template (quick mode only) + +In `quick` mode, keep a single subagent per batch of competitors (no fan-out — Lane 1 only, budget 2-3 calls each). + +``` +You are a competitor enrichment subagent. For each competitor URL, run the 4-lane research +pattern and write a single markdown file per competitor. + +CONTEXT: +- User's company: {user_company} +- User's product: {user_product} +- User's key features: {user_key_features} +- Depth mode: {depth_mode} (quick | deep | deeper) +- Output directory: {OUTPUT_DIR} ← write files HERE, as a full literal path + +COMPETITOR URLS TO PROCESS: +{url_list} + +TOOL RULES — CRITICAL, FOLLOW EXACTLY: +1. You may ONLY use the Bash tool. No exceptions. +2. All searches: Bash → bb search "..." --num-results 10 +3. All page fetches: Bash → bb fetch --allow-redirects "..." + bb fetch returns RAW HTML. To extract text, pipe through: + sed 's/]*>.*<\/script>//g; s/]*>.*<\/style>//g; s/<[^>]*>//g' | tr -s ' \n' | head -c 3000 + If a page returns thin content or "enable JavaScript", use bb browse instead. +4. BATCH all file writes: Write ALL markdown files in a SINGLE Bash call using chained heredocs. +5. BANNED TOOLS: WebFetch, WebSearch, Write, Read, Glob, Grep — ALL BANNED. +6. NEVER use ~ or $HOME in paths — use full literal paths. + +RESEARCH PATTERN (per competitor — lanes are depth-gated): + +LANE 1 — Marketing Surface (always run): + a. Fetch competitor homepage + b. Discover via sitemap: /sitemap.xml — find /pricing, /features, /integrations, /customers + c. Fetch 2-4 most relevant pages + d. Extract: tagline, positioning, product_description, target_customer, + pricing_model, pricing_tiers, key_features, integrations + +LANE 2 — External Signal (deep + deeper): + Run these searches: + bb search "{competitor} vs" + bb search "{competitor} alternatives review" + bb search "site:reddit.com {competitor}" + bb search "site:news.ycombinator.com {competitor}" + bb search "site:linkedin.com/posts {competitor}" + bb search "site:youtube.com {competitor}" + bb search "{competitor} G2 OR Capterra" + bb search "{competitor} launch OR funding 2025 OR 2026" + + For each search result, classify source type from URL: + reddit.com → Reddit + news.ycombinator.com → HN + linkedin.com → LinkedIn + youtube.com/youtu.be → YouTube + twitter.com/x.com → X (or Twitter — either works) + dev.to → DevTo + hashnode.dev, hashnode.com → Hashnode + *.substack.com → Substack + spotify.com/episode, transistor.fm, simplecast.com → Podcast + g2.com/capterra.com/trustradius.com → Review + url or title contains "vs" → Comparison + techcrunch/theverge/venturebeat/forbes/businesswire/wired/fortune → News + other blog domain → Blog + + Record each as a Mentions line with title + one-line snippet + URL + **date**. Always include + the date when available. `bb search` returns `publishedDate` in the JSON result — prefer it. + If absent, parse the year from title/URL (e.g. "2026" or `/2025/11/` in a news URL). + For LinkedIn and YouTube — use search snippet only, do NOT fetch the page. + +LANE 3 — Public Benchmarks (deeper only): + Run these searches: + bb search "{competitor} benchmark" + bb search "site:github.com {competitor} benchmark" + bb search "{category} benchmark {competitor}" + + Record each hit in ## Benchmarks with: title, source, URL, one-line key finding. + Also append to ## Mentions with type Benchmark. + +LANE 4 — Strategic Diff vs {user_company} (deeper only): + Using Lane 1-3 findings + the user's company profile, write: + ## Comparison vs {user_company} + - Overlaps: ... + - Gaps: ... + - Where they win: ... + - Where you win: ... + Also fill the `strategic_diff` frontmatter field with a one-line summary. + +BUDGETS (respect strictly): + quick: 2-3 tool calls per competitor (homepage + 1-2 pages) + deep: 5-8 tool calls per competitor (Lane 1 + Lane 2) + deeper: 10-15 tool calls per competitor (all 4 lanes) + +OUTPUT — write ALL competitor files in a SINGLE Bash call using chained heredocs directly to {OUTPUT_DIR}: + +cat << 'COMPETITOR_MD' > {OUTPUT_DIR}/{slug1}.md +--- +competitor_name: {name} +website: {url} +tagline: {tagline} +positioning: {positioning} +product_description: {description} +target_customer: {audience} +pricing_model: {model} +pricing_tiers: {tier1} | {tier2} | {tier3} +key_features: {f1} | {f2} | {f3} +integrations: {i1} | {i2} +headquarters: {hq} +founded: {year} +employee_estimate: {estimate} +funding_info: {funding} +strategic_diff: {one line — deeper only} +--- + +## Product +{paragraph} + +## Pricing +{bullets per tier} + +## Features +{bullets} + +## Positioning +{paragraph} + +## Comparison vs {user_company} ← deeper only +- Overlaps: ... +- Gaps: ... +- Where they win: ... +- Where you win: ... + +## Mentions +- **[SourceType]** Title | Snippet (source: URL, YYYY-MM-DD) + +## Benchmarks ← deeper only +- Title | Source | URL | Key finding + +## Research Findings +- **[confidence]** Fact (source: URL) +COMPETITOR_MD +cat << 'COMPETITOR_MD' > {OUTPUT_DIR}/{slug2}.md +... +COMPETITOR_MD + +Use 'COMPETITOR_MD' (quoted) as the heredoc delimiter to prevent shell variable expansion. + +Report back ONLY: "Batch {batch_id}: {succeeded}/{total} competitors researched, {mentions_count} mentions, {benchmarks_count} benchmarks." +Do NOT return raw data to the main conversation. +``` + +## Wave Management + +### Key Principle: Maximize Parallelism, Minimize Prompts +Launch as many subagents as possible in a single message (up to ~6 per message). Each subagent MUST batch all its Bash operations. + +### Discovery Phase +- Launch up to 6 discovery subagents in a single message, split by wave (A/B/C — see "Discovery query patterns" above) +- Each subagent runs ALL its queries in ONE Bash call with `&&` chaining +- After all waves complete, run the following in sequence: + ```bash + # 1. Dedup URLs from all batches + node {SKILL_DIR}/scripts/list_urls.mjs /tmp --prefix competitor > /tmp/competitor_urls.txt + + # 2. Extract candidate names from "X vs Y" titles (Wave C output) + node {SKILL_DIR}/scripts/extract_vs_names.mjs /tmp --prefix competitor \ + --seed "{user_company},{seed1},{seed2},{seed3}" \ + > /tmp/competitor_vs_names.jsonl + ``` +- **Filter URLs**: Remove blog posts, news articles, AI-tool directories (seektool.ai, respan.ai, agentsindex.ai, toolradar.com, aitoolsatlas.ai, aidirectory.com, vibecodedthis.com, aichief.com, openalternative.co, cbinsights.com, saasworthy.com, softwareworld.com), review aggregators (g2.com, capterra.com, trustradius.com), databases (crunchbase.com, tracxn.com), and the user's own domain. Keep only candidate company homepages. +- For names from `extract_vs_names.mjs` that didn't resolve to a domain, optionally run `bb search "{name}" --num-results 3` to resolve the top domain; skip if ambiguous. +- **Merge**: filtered-URL list ∪ resolved `vs_names` domains ∪ user-provided seed URLs. Dedup by hostname into `/tmp/competitor_candidates.txt`. + +### Gate Phase (between discovery and enrichment) + +Drop wrong-category candidates BEFORE enrichment burns tool calls on them. + +```bash +cat /tmp/competitor_candidates.txt \ + | node {SKILL_DIR}/scripts/gate_candidates.mjs \ + --include "{category_include_keywords_csv}" \ + --exclude "{exclusion_list_csv}" \ + --concurrency 6 \ + > /tmp/competitor_gated.jsonl + +# Extract PASS-only URLs for enrichment +grep '"status":"PASS"' /tmp/competitor_gated.jsonl \ + | node -e 'require("fs").readFileSync(0,"utf-8").split("\n").filter(Boolean).forEach(l => { try { console.log(JSON.parse(l).url); } catch {} })' \ + > /tmp/competitor_passed.txt +``` + +**Keyword sources**: +- `--include` ← profile's `category_include_keywords` (comma-joined). +- `--exclude` ← profile's `exclusion_list`. + +**Gate logic** (position-aware): REJECT if exclude term in ``; PASS if include term in `<title>`; for hybrid titles with both (e.g. "Browser Automation & Web Scraping API"), tiebreak by first 200 chars of hero text; otherwise fall through to hero-wide check. Conservative by default. + +**Review the output** — the main agent SHOULD spot-check both lists and MAY manually re-include a REJECT if it recognizes a known direct competitor whose own marketing is category-ambiguous. + +**Evaluation on Browserbase** (12 candidates): 7/7 real competitors PASSED; 4/4 wrong-category (antidetect, scraping API, screenshot API, local AI browser) REJECTED. One split-identity edge (Browserless) rejected — acceptable. + +### Enrichment Phase +Two modes: + +- **`quick` mode** — single subagent per batch of competitors. Lane A (marketing) only. ~8 competitors per subagent, 2-3 tool calls each. Writes directly to `{OUTPUT_DIR}/{slug}.md`. +- **`deep` / `deeper` modes** — 5-subagent fan-out PER competitor. Each subagent owns ONE lane (marketing / discussion / social / news / technical). Writes to `{OUTPUT_DIR}/partials/{slug}.{lane}.md`. Budget: 5-8 calls per subagent (deep), 10-15 (deeper). After all lanes complete, run `scripts/merge_partials.mjs` to consolidate. +- Launch the 5 lane-subagents for a competitor in ONE Agent tool message (5 parallel Agent calls). Across multiple competitors, batch into 3-5 messages depending on count. + +### Screenshots Phase (after merge, before compile) + +Capture homepage hero + full-page pricing screenshots for each competitor: +```bash +node {SKILL_DIR}/scripts/capture_screenshots.mjs {OUTPUT_DIR} --env remote --concurrency 1 +``` +Requires the `browse` CLI (`npm install -g @browserbasehq/browse-cli`). `--env remote` uses a Browserbase session. Writes PNGs to `{OUTPUT_DIR}/screenshots/{slug}-hero.png` and `{slug}-pricing.png`. `compile_report.mjs` auto-embeds them in per-competitor HTML pages when present. + +Cost: ~15-20s per competitor (serial). Total for 5 competitors ≈ 90s. + +### Sizing Formula +``` +search_queries = ceil(requested_competitors / 20) # discovery is narrower than lead gen +discovery_subagents = ceil(search_queries / 3) +expected_urls = search_queries * 15 + +quick: research_subagents = ceil(expected_urls / 8) +deep: research_subagents = ceil(expected_urls / 4) +deeper: research_subagents = ceil(expected_urls / 2) +``` + +### Error Handling +- If a subagent fails, log and continue with remaining batches +- If >50% of subagents fail in a wave, pause and inform the user +- If `bb fetch --allow-redirects` fails, try `bb browse` as fallback or skip that page + +## Report Compilation + +After all enrichment subagents complete, compile all HTML views in one command: + +```bash +node {SKILL_DIR}/scripts/compile_report.mjs {OUTPUT_DIR} --user-company "{user_company}" --open +``` + +The script: +- Reads all `.md` files in `{OUTPUT_DIR}` +- Parses YAML frontmatter + body sections +- Deduplicates by normalized competitor name +- Generates `{OUTPUT_DIR}/index.html` — overview table (name, tagline, pricing, key features, strategic diff) +- Generates `{OUTPUT_DIR}/competitors/{slug}.html` — per-competitor deep dive +- Generates `{OUTPUT_DIR}/matrix.html` — side-by-side feature/pricing grid across competitors +- Generates `{OUTPUT_DIR}/mentions.html` — chronological feed with source-type pills + client-side filter +- Generates `{OUTPUT_DIR}/results.csv` — flat spreadsheet +- Opens `index.html` in the default browser (`--open` flag) +- Prints a JSON summary to stderr diff --git a/skills/competitor-analysis/scripts/capture_screenshots.mjs b/skills/competitor-analysis/scripts/capture_screenshots.mjs new file mode 100644 index 0000000..855bbab --- /dev/null +++ b/skills/competitor-analysis/scripts/capture_screenshots.mjs @@ -0,0 +1,146 @@ +#!/usr/bin/env node + +// Capture hero + pricing screenshots for each competitor in the research directory. +// Reads per-competitor markdown files, extracts `website` and optional `pricing_url` +// frontmatter, navigates via `browse`, and writes PNGs to `{OUTPUT_DIR}/screenshots/`. +// +// Requires: `browse` CLI (`npm install -g @browserbasehq/browse-cli`), either local Chrome +// or a Browserbase remote session (`browse env remote`). +// +// Usage: node capture_screenshots.mjs <research-dir> [--env remote|local] [--concurrency 2] + +import { readdirSync, readFileSync, mkdirSync, existsSync } from 'fs'; +import { join } from 'path'; +import { spawnSync } from 'child_process'; + +const args = process.argv.slice(2); + +if (args.includes('--help') || args.includes('-h') || args.length === 0) { + console.error(`Usage: node capture_screenshots.mjs <research-dir> [options] + +Reads all .md files in <research-dir>, extracts website + pricing URLs from the YAML +frontmatter, and captures two screenshots per competitor: + - {slug}-hero.png — 1280x800 viewport of the homepage + - {slug}-pricing.png — full-page screenshot of the pricing page + +Output goes to <research-dir>/screenshots/. + +Options: + --env <remote|local> Which browse env to use (default: remote) + --concurrency <n> How many competitors to capture in parallel (default: 1) + (screenshot takes ~3s; serial is usually fine) + --skip-existing Skip competitors that already have screenshots + --help, -h Show this help message`); + process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1); +} + +const dir = args[0]; +const envIdx = args.indexOf('--env'); +const browseEnv = envIdx !== -1 ? args[envIdx + 1] : 'remote'; +const concurrencyIdx = args.indexOf('--concurrency'); +const concurrency = concurrencyIdx !== -1 ? parseInt(args[concurrencyIdx + 1], 10) : 1; +const skipExisting = args.includes('--skip-existing'); + +const shotsDir = join(dir, 'screenshots'); +mkdirSync(shotsDir, { recursive: true }); + +function parseFrontmatter(content) { + const m = content.match(/^---\n([\s\S]*?)\n---/); + if (!m) return null; + const fields = {}; + for (const line of m[1].split('\n')) { + const idx = line.indexOf(':'); + if (idx > 0) { + const k = line.slice(0, idx).trim(); + const v = line.slice(idx + 1).trim().replace(/^["']|["']$/g, ''); + if (k && v) fields[k] = v; + } + } + return fields; +} + +// Try common pricing URL patterns if the frontmatter doesn't list one explicitly. +function pricingCandidates(website) { + const base = website.replace(/\/$/, ''); + return [`${base}/pricing`, `${base}/plans`, `${base}/pricing-plans`, base]; +} + +function run(cmd, args, { timeout = 30000 } = {}) { + return spawnSync(cmd, args, { encoding: 'utf-8', timeout, maxBuffer: 4 * 1024 * 1024 }); +} + +// Ensure the browse env is set to the requested mode (one-time config). +const envRes = run('browse', ['env', browseEnv]); +if (envRes.status !== 0) { + console.error(`Warning: could not set browse env to ${browseEnv}: ${envRes.stderr || envRes.stdout}`); +} + +async function captureOne(slug, website, pricingUrl) { + const heroPath = join(shotsDir, `${slug}-hero.png`); + const pricingPath = join(shotsDir, `${slug}-pricing.png`); + const result = { slug, hero: null, pricing: null, errors: [] }; + + if (skipExisting && existsSync(heroPath) && existsSync(pricingPath)) { + return { ...result, hero: heroPath, pricing: pricingPath, skipped: true }; + } + + // Hero: viewport 1280x800, single-screen shot + try { + run('browse', ['goto', website], { timeout: 30000 }); + run('browse', ['viewport', '1280', '800']); + run('browse', ['wait', 'timeout', '1500']); // let the hero settle + const r = run('browse', ['screenshot', '--no-animations', heroPath]); + if (r.status === 0 && existsSync(heroPath)) result.hero = heroPath; + else result.errors.push(`hero: ${r.stderr || r.stdout}`); + } catch (err) { result.errors.push(`hero exception: ${err.message}`); } + + // Pricing: full-page; try explicit URL first, then common fallbacks + const urlsToTry = pricingUrl ? [pricingUrl, ...pricingCandidates(website)] : pricingCandidates(website); + let pricingOk = false; + for (const url of urlsToTry) { + try { + const gotoRes = run('browse', ['goto', url], { timeout: 30000 }); + if (gotoRes.status !== 0) continue; + run('browse', ['wait', 'timeout', '1500']); + const r = run('browse', ['screenshot', '--full-page', '--no-animations', pricingPath]); + if (r.status === 0 && existsSync(pricingPath)) { result.pricing = pricingPath; pricingOk = true; break; } + } catch {} + } + if (!pricingOk) result.errors.push('pricing: no candidate URL captured'); + + return result; +} + +// Load competitor records +const files = readdirSync(dir).filter(f => f.endsWith('.md')).sort(); +const jobs = []; +for (const f of files) { + const content = readFileSync(join(dir, f), 'utf-8'); + const fm = parseFrontmatter(content); + if (!fm || !fm.website) continue; + const slug = f.replace('.md', ''); + jobs.push({ slug, website: fm.website, pricingUrl: fm.pricing_url }); +} + +console.error(`Capturing screenshots for ${jobs.length} competitors → ${shotsDir}`); + +const results = []; +const queue = [...jobs]; +async function worker() { + while (queue.length > 0) { + const job = queue.shift(); + const started = Date.now(); + const r = await captureOne(job.slug, job.website, job.pricingUrl); + results.push(r); + const elapsed = ((Date.now() - started) / 1000).toFixed(1); + const marks = [r.hero ? 'H' : '-', r.pricing ? 'P' : '-'].join(''); + console.error(` [${marks}] ${job.slug.padEnd(24)} ${elapsed}s ${r.skipped ? '(skipped)' : ''}`); + if (r.errors.length) for (const e of r.errors) console.error(` ! ${e.slice(0, 120)}`); + } +} +await Promise.all(Array(Math.min(concurrency, jobs.length || 1)).fill(0).map(worker)); + +const okHero = results.filter(r => r.hero).length; +const okPricing = results.filter(r => r.pricing).length; +console.error(`\nDone: ${okHero}/${jobs.length} hero · ${okPricing}/${jobs.length} pricing`); +console.log(JSON.stringify({ total: jobs.length, hero: okHero, pricing: okPricing, outputDir: shotsDir })); diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs new file mode 100644 index 0000000..3752618 --- /dev/null +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -0,0 +1,745 @@ +#!/usr/bin/env node + +// Compiles per-competitor markdown files into an HTML report + CSV. +// Produces four views: index.html (overview), competitors/*.html (deep dive), +// matrix.html (side-by-side feature/pricing grid), mentions.html (chronological feed). +// +// Usage: node compile_report.mjs <research-dir> [--user-company "Acme"] [--template <path>] [--open] + +import { readdirSync, readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const args = process.argv.slice(2); + +if (args.includes('--help') || args.includes('-h') || args.length === 0) { + console.error(`Usage: node compile_report.mjs <research-dir> [--user-company "<name>"] [--template <path>] [--open] + +Reads all .md files from <research-dir>, generates: + - index.html — overview: competitor table with tagline, pricing, features, strategic diff + - competitors/<slug>.html — per-competitor deep dive pages + - matrix.html — side-by-side feature/pricing grid across competitors + - mentions.html — chronological feed of all external mentions with source-type filter + - results.csv — flat spreadsheet + +Options: + --user-company <name> Name of the user's company (used in comparison sections) + --template <path> Path to report-template.html (default: auto-detect) + --open Open index.html in the default browser after generation + --help, -h Show this help message`); + process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1); +} + +const dir = args[0]; +const shouldOpen = args.includes('--open'); +const userCompanyIdx = args.indexOf('--user-company'); +const userCompany = userCompanyIdx !== -1 ? args[userCompanyIdx + 1] : ''; +const templateIdx = args.indexOf('--template'); +let templatePath = templateIdx !== -1 ? args[templateIdx + 1] : null; + +if (!templatePath) { + const candidates = [ + join(__dirname, '..', 'references', 'report-template.html'), + join(__dirname, 'report-template.html'), + ]; + templatePath = candidates.find(p => existsSync(p)); + if (!templatePath) { + console.error('Error: Could not find report-template.html. Use --template to specify path.'); + process.exit(1); + } +} + +const template = readFileSync(templatePath, 'utf-8'); + +let files; +try { + files = readdirSync(dir).filter(f => f.endsWith('.md')).sort(); +} catch (err) { + console.error(`Error reading directory ${dir}: ${err.message}`); + process.exit(1); +} + +if (files.length === 0) { + console.error(`No .md files found in ${dir}`); + process.exit(1); +} + +// ---------- Parsing ---------- + +function parseFrontmatter(content) { + const fmMatch = content.match(/^---\n([\s\S]*?)\n---/); + if (!fmMatch) return null; + const fields = {}; + for (const line of fmMatch[1].split('\n')) { + const idx = line.indexOf(':'); + if (idx > 0) { + const key = line.slice(0, idx).trim(); + const val = line.slice(idx + 1).trim().replace(/^["']|["']$/g, ''); + if (key && val) fields[key] = val; + } + } + return fields; +} + +function parseBody(content) { + const bodyMatch = content.match(/^---\n[\s\S]*?\n---\n([\s\S]*)/); + return bodyMatch ? bodyMatch[1].trim() : ''; +} + +function parseSections(body) { + const sections = {}; + const lines = body.split('\n'); + let currentKey = null; + let buffer = []; + for (const line of lines) { + const m = line.match(/^## (.+)$/); + if (m) { + if (currentKey !== null) sections[currentKey] = buffer.join('\n').trim(); + currentKey = m[1].trim(); + buffer = []; + } else if (currentKey !== null) { + buffer.push(line); + } + } + if (currentKey !== null) sections[currentKey] = buffer.join('\n').trim(); + return sections; +} + +// Parse Mentions section into structured entries. +// Format: `- **[SourceType]** Title | Snippet (source: URL, YYYY-MM-DD)` +function parseMentions(sectionText) { + if (!sectionText) return []; + const out = []; + for (const raw of sectionText.split('\n')) { + const line = raw.trim(); + if (!line.startsWith('- ')) continue; + const typeM = line.match(/^-\s*\*\*\[([^\]]+)\]\*\*\s*(.*)$/); + if (!typeM) continue; + const sourceType = typeM[1].trim(); + let rest = typeM[2]; + + let url = ''; + let date = ''; + const sourceM = rest.match(/\(source:\s*([^)]+)\)\s*$/); + if (sourceM) { + const sourceBlock = sourceM[1]; + const parts = sourceBlock.split(',').map(s => s.trim()).filter(Boolean); + url = parts[0] || ''; + const dateCandidate = parts.slice(1).join(', '); + if (dateCandidate && /\d{4}-\d{2}-\d{2}/.test(dateCandidate)) date = dateCandidate.match(/\d{4}-\d{2}-\d{2}/)[0]; + rest = rest.slice(0, sourceM.index).trim(); + } + + let title = rest; + let snippet = ''; + const pipeIdx = rest.indexOf('|'); + if (pipeIdx !== -1) { + title = rest.slice(0, pipeIdx).trim(); + snippet = rest.slice(pipeIdx + 1).trim(); + } + + out.push({ sourceType, title, snippet, url, date }); + } + return out; +} + +// Parse Benchmarks section into structured entries. +// Format: `- Title | Source | URL | Key finding` or `- **Title** — Source (URL): finding` +function parseBenchmarks(sectionText) { + if (!sectionText) return []; + const out = []; + for (const raw of sectionText.split('\n')) { + const line = raw.trim(); + if (!line.startsWith('- ')) continue; + const rest = line.slice(2).trim(); + const parts = rest.split('|').map(s => s.trim()).filter(Boolean); + let title = '', source = '', url = '', finding = ''; + if (parts.length >= 4) { + [title, source, url, finding] = parts; + } else if (parts.length === 3) { + [title, url, finding] = parts; + } else { + title = rest; + const urlM = rest.match(/https?:\/\/\S+/); + if (urlM) url = urlM[0]; + } + out.push({ title, source, url, finding }); + } + return out; +} + +function splitPipes(s) { + return (s || '').split('|').map(x => x.trim()).filter(Boolean); +} + +function escapeHtml(str) { + return (str || '').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"'); +} + +function mdToHtml(md) { + const lines = md.split('\n'); + const out = []; + let inList = false; + let paraLines = []; + + function flushPara() { + if (paraLines.length > 0) { + let text = escapeHtml(paraLines.join(' ').trim()); + text = text.replace(/\*\*\[(\w+)\]\*\*/g, '<span class="confidence $1">[$1]</span>'); + text = text.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>'); + if (text) out.push(`<p>${text}</p>`); + paraLines = []; + } + } + function closeList() { if (inList) { out.push('</ul>'); inList = false; } } + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) { flushPara(); closeList(); continue; } + if (trimmed.startsWith('## ')) { flushPara(); closeList(); out.push(`<h2>${escapeHtml(trimmed.slice(3))}</h2>`); continue; } + if (trimmed.startsWith('### ')) { flushPara(); closeList(); out.push(`<h3>${escapeHtml(trimmed.slice(4))}</h3>`); continue; } + if (trimmed.startsWith('- ')) { + flushPara(); + if (!inList) { out.push('<ul>'); inList = true; } + let text = escapeHtml(trimmed.slice(2)); + text = text.replace(/\*\*\[(\w+)\]\*\*/g, '<span class="confidence $1">[$1]</span>'); + text = text.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>'); + text = text.replace(/(https?:\/\/\S+)/g, '<a href="$1" target="_blank">$1</a>'); + out.push(`<li>${text}</li>`); + continue; + } + closeList(); + paraLines.push(trimmed); + } + flushPara(); closeList(); + return out.join('\n'); +} + +// ---------- Load all competitor records ---------- + +const competitors = []; +for (const file of files) { + const content = readFileSync(join(dir, file), 'utf-8'); + const fields = parseFrontmatter(content); + if (!fields) continue; + const body = parseBody(content); + const sections = parseSections(body); + const mentions = parseMentions(sections['Mentions']); + const benchmarks = parseBenchmarks(sections['Benchmarks']); + const slug = file.replace('.md', ''); + competitors.push({ ...fields, body, sections, mentions, benchmarks, slug, file }); +} + +// Deduplicate by normalized competitor name (keep first occurrence — richer data tends to come first alphabetically) +const seen = new Map(); +for (const c of competitors) { + const name = (c.competitor_name || '').toLowerCase().replace(/\s*(inc|llc|ltd|corp|co)\s*\.?$/i, '').trim(); + if (!seen.has(name)) seen.set(name, c); +} +const deduped = [...seen.values()].sort((a, b) => (a.competitor_name || '').localeCompare(b.competitor_name || '')); + +// ---------- Aggregates ---------- + +const totalMentions = deduped.reduce((sum, c) => sum + c.mentions.length, 0); +const totalBenchmarks = deduped.reduce((sum, c) => sum + c.benchmarks.length, 0); +const withPricing = deduped.filter(c => c.pricing_tiers).length; + +const dirName = dir.split('/').pop(); +const title = dirName.replace(/_/g, ' ').replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); +const genDate = new Date().toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric' }); +const metaLine = `${deduped.length} competitors · ${totalMentions} mentions · ${totalBenchmarks} benchmarks · ${genDate}`; + +// ---------- index.html (overview) ---------- + +function featurePills(featuresStr, max = 4) { + const feats = splitPipes(featuresStr).slice(0, max); + return feats.map(f => `<span class="pill pill-feature">${escapeHtml(f)}</span>`).join(''); +} + +const tableRows = deduped.map(c => { + const hasDetail = c.body && c.body.length > 50; + const nameHtml = hasDetail + ? `<a href="competitors/${c.slug}.html">${escapeHtml(c.competitor_name)}</a>` + : escapeHtml(c.competitor_name); + const websiteHtml = c.website + ? `<span class="muted-line"><a href="${escapeHtml(c.website)}" target="_blank" style="color:var(--muted);">${escapeHtml(c.website.replace(/^https?:\/\/(www\.)?/, ''))}</a></span>` + : ''; + const pricingShort = splitPipes(c.pricing_tiers).slice(0, 3).join(' · ') || '—'; + return ` <tr> + <td><strong>${nameHtml}</strong>${websiteHtml}</td> + <td style="max-width:260px;">${escapeHtml(c.tagline || c.positioning || c.product_description || '')}</td> + <td style="max-width:180px;">${escapeHtml(pricingShort)}</td> + <td style="max-width:260px;">${featurePills(c.key_features)}</td> + <td class="muted-line" style="max-width:260px;color:var(--muted);font-size:0.8125rem;">${escapeHtml(c.strategic_diff || '')}</td> + </tr>`; +}).join('\n'); + +let indexHtml = template + .replace(/\{\{TITLE\}\}/g, escapeHtml(`${title}`)) + .replace(/\{\{META\}\}/g, escapeHtml(metaLine)) + .replace(/\{\{TOTAL\}\}/g, String(deduped.length)) + .replace(/\{\{MENTION_COUNT\}\}/g, String(totalMentions)) + .replace(/\{\{BENCHMARK_COUNT\}\}/g, String(totalBenchmarks)) + .replace(/\{\{WITH_PRICING\}\}/g, String(withPricing)) + .replace(/\{\{TABLE_ROWS\}\}/g, tableRows); + +writeFileSync(join(dir, 'index.html'), indexHtml); + +// ---------- competitors/{slug}.html ---------- + +try { mkdirSync(join(dir, 'competitors'), { recursive: true }); } catch {} + +const perCompetitorCss = ` + :root { --brand:#F03603; --blue:#4DA9E4; --black:#100D0D; --gray:#514F4F; --border:#edebeb; --bg:#F9F6F4; --card:#ffffff; --text:#100D0D; --muted:#514F4F; } + * { margin:0; padding:0; box-sizing:border-box; } + body { font-family:Inter,-apple-system,BlinkMacSystemFont,'Segoe UI',system-ui,sans-serif; background:var(--bg); color:var(--text); line-height:1.6; font-size:16px; } + .container { max-width:880px; margin:0 auto; padding:2rem 1.5rem; } + a { color:var(--brand); text-decoration:none; } + a:hover { text-decoration:underline; } + .back { font-size:0.875rem; color:var(--muted); margin-bottom:1.5rem; display:inline-block; } + .back:hover { color:var(--brand); } + header { margin-bottom:2rem; } + header h1 { font-size:1.5rem; font-weight:600; margin-bottom:0.25rem; } + header .meta { color:var(--muted); font-size:0.875rem; } + .fields { background:var(--card); border:1px solid var(--border); border-radius:4px; padding:1.25rem; margin-bottom:2rem; display:grid; grid-template-columns:auto 1fr; gap:0.375rem 1rem; font-size:0.875rem; } + .fields dt { color:var(--muted); font-weight:500; } + .fields dd { color:var(--text); } + .research { background:var(--card); border:1px solid var(--border); border-radius:4px; padding:1.5rem; margin-bottom:1.25rem; } + .research h2 { font-size:1.125rem; font-weight:600; margin:1.5rem 0 0.5rem 0; color:var(--black); } + .research h2:first-child { margin-top:0; } + .research p { margin-bottom:0.75rem; } + .research ul { margin:0.5rem 0 1rem 1.25rem; } + .research li { margin-bottom:0.375rem; font-size:0.875rem; } + .confidence { font-size:0.75rem; font-weight:600; padding:1px 6px; border-radius:2px; } + .confidence.high { background:rgba(144,201,77,0.12); color:#5a8a1a; } + .confidence.medium { background:rgba(244,186,65,0.12); color:#9a7520; } + .confidence.low { background:rgba(240,54,3,0.08); color:var(--brand); } + .mention-item { display:flex; gap:0.5rem; align-items:flex-start; padding:0.5rem 0; border-bottom:1px solid var(--border); font-size:0.875rem; } + .mention-item:last-child { border-bottom:none; } + .src-pill { font-size:0.6875rem; font-weight:600; padding:2px 8px; border-radius:999px; white-space:nowrap; border:1px solid; } + .src-Benchmark { background:rgba(77,169,228,0.12); color:#2172a3; border-color:rgba(77,169,228,0.4); } + .src-Comparison { background:rgba(240,54,3,0.10); color:var(--brand); border-color:rgba(240,54,3,0.4); } + .src-News { background:#f2f2f2; color:var(--black); border-color:#ddd; } + .src-Reddit { background:#fff2eb; color:#d84300; border-color:#ffd4b7; } + .src-HN { background:#fff4e5; color:#c95500; border-color:#ffcc99; } + .src-LinkedIn { background:#e7f1fa; color:#0a66c2; border-color:#b3d4ee; } + .src-YouTube { background:#ffebee; color:#c4302b; border-color:#f7b2ae; } + .src-Review { background:rgba(144,201,77,0.12); color:#5a8a1a; border-color:rgba(144,201,77,0.4); } + .src-Podcast { background:#efe7fa; color:#6236c2; border-color:#d1bde9; } + .src-X { background:#eef2f7; color:#111; border-color:#cfd9e5; } + .src-Twitter { background:#eef2f7; color:#111; border-color:#cfd9e5; } + .src-DevTo { background:#f3f3f6; color:#0a0a0a; border-color:#dcdce0; } + .src-Hashnode { background:#eef4ff; color:#2962ff; border-color:#c6d8ff; } + .src-Substack { background:#fff4e5; color:#ff6719; border-color:#ffd4b7; } + .src-Blog { background:#f6f3ee; color:#6a5d45; border-color:#e1dbcc; } + .shots { display:grid; grid-template-columns:1fr 1fr; gap:1rem; margin-bottom:1.5rem; } + @media (max-width:720px) { .shots { grid-template-columns:1fr; } } + .shot { background:var(--card); border:1px solid var(--border); border-radius:4px; overflow:hidden; } + .shot-label { font-size:0.6875rem; text-transform:uppercase; letter-spacing:0.05em; color:var(--muted); font-weight:600; padding:0.5rem 0.75rem; border-bottom:1px solid var(--border); background:#fafafa; } + .shot img { display:block; width:100%; height:auto; } + .shot-pricing img { max-height:560px; object-fit:cover; object-position:top; } + footer { margin-top:3rem; padding-top:1.5rem; border-top:1px solid var(--border); text-align:center; font-size:0.75rem; color:var(--muted); } + footer a { color:var(--brand); text-decoration:none; font-weight:500; } +`; + +for (const c of deduped) { + if (!c.body || c.body.length < 50) continue; + + const mentionsHtml = c.mentions.length + ? c.mentions.map(m => { + const dateStr = m.date ? `<span class="muted-line" style="color:var(--muted);font-size:0.75rem;margin-left:auto;">${escapeHtml(m.date)}</span>` : ''; + const linkText = m.url ? `<a href="${escapeHtml(m.url)}" target="_blank">${escapeHtml(m.title || m.url)}</a>` : escapeHtml(m.title); + const snippet = m.snippet ? ` — <span style="color:var(--muted);">${escapeHtml(m.snippet)}</span>` : ''; + return `<div class="mention-item"><span class="src-pill src-${escapeHtml(m.sourceType)}">${escapeHtml(m.sourceType)}</span><div style="flex:1;">${linkText}${snippet}</div>${dateStr}</div>`; + }).join('\n') + : '<p style="color:var(--muted);font-size:0.875rem;">No mentions collected.</p>'; + + const benchmarksHtml = c.benchmarks.length + ? `<ul>${c.benchmarks.map(b => { + const link = b.url ? `<a href="${escapeHtml(b.url)}" target="_blank">${escapeHtml(b.title || b.url)}</a>` : escapeHtml(b.title); + const src = b.source ? ` <span style="color:var(--muted);">(${escapeHtml(b.source)})</span>` : ''; + const finding = b.finding ? ` — ${escapeHtml(b.finding)}` : ''; + return `<li>${link}${src}${finding}</li>`; + }).join('')}</ul>` + : ''; + + const productHtml = c.sections['Product'] ? `<h2>Product</h2>${mdToHtml(c.sections['Product'])}` : ''; + const pricingHtml = c.sections['Pricing'] ? `<h2>Pricing</h2>${mdToHtml(c.sections['Pricing'])}` : ''; + const featuresHtml = c.sections['Features'] ? `<h2>Features</h2>${mdToHtml(c.sections['Features'])}` : ''; + const positioningHtml = c.sections['Positioning'] ? `<h2>Positioning</h2>${mdToHtml(c.sections['Positioning'])}` : ''; + const comparisonKey = Object.keys(c.sections).find(k => k.startsWith('Comparison')); + const comparisonHtml = comparisonKey ? `<h2>${escapeHtml(comparisonKey)}</h2>${mdToHtml(c.sections[comparisonKey])}` : ''; + const findingsHtml = c.sections['Research Findings'] ? `<h2>Research Findings</h2>${mdToHtml(c.sections['Research Findings'])}` : ''; + + // Screenshots — filenames match capture_screenshots.mjs output. + const heroShot = existsSync(join(dir, 'screenshots', `${c.slug}-hero.png`)); + const pricingShot = existsSync(join(dir, 'screenshots', `${c.slug}-pricing.png`)); + const screenshotsHtml = (heroShot || pricingShot) ? ` + <div class="shots"> + ${heroShot ? `<div class="shot shot-hero"><div class="shot-label">Homepage hero</div><img src="../screenshots/${escapeHtml(c.slug)}-hero.png" alt="${escapeHtml(c.competitor_name)} homepage hero" loading="lazy"></div>` : ''} + ${pricingShot ? `<div class="shot shot-pricing"><div class="shot-label">Pricing page</div><img src="../screenshots/${escapeHtml(c.slug)}-pricing.png" alt="${escapeHtml(c.competitor_name)} pricing page" loading="lazy"></div>` : ''} + </div>` : ''; + + const companyHtml = `<!DOCTYPE html> +<html lang="en"> +<head> +<meta charset="UTF-8"> +<meta name="viewport" content="width=device-width, initial-scale=1.0"> +<title>${escapeHtml(c.competitor_name)} — Competitor Analysis + + + + +
+ ← Back to overview +
+

${escapeHtml(c.competitor_name)}

+
+ ${c.website ? `${escapeHtml(c.website)}` : ''} + ${c.tagline ? ` · ${escapeHtml(c.tagline)}` : ''} +
+
${screenshotsHtml} +
+ ${c.positioning ? `
Positioning
${escapeHtml(c.positioning)}
` : ''} + ${c.product_description ? `
Product
${escapeHtml(c.product_description)}
` : ''} + ${c.target_customer ? `
Target Customer
${escapeHtml(c.target_customer)}
` : ''} + ${c.pricing_model ? `
Pricing Model
${escapeHtml(c.pricing_model)}
` : ''} + ${c.pricing_tiers ? `
Pricing Tiers
${escapeHtml(c.pricing_tiers)}
` : ''} + ${c.key_features ? `
Key Features
${escapeHtml(c.key_features)}
` : ''} + ${c.integrations ? `
Integrations
${escapeHtml(c.integrations)}
` : ''} + ${c.headquarters ? `
HQ
${escapeHtml(c.headquarters)}
` : ''} + ${c.founded ? `
Founded
${escapeHtml(c.founded)}
` : ''} + ${c.employee_estimate ? `
Employees
${escapeHtml(c.employee_estimate)}
` : ''} + ${c.funding_info ? `
Funding
${escapeHtml(c.funding_info)}
` : ''} + ${c.strategic_diff ? `
Strategic Diff
${escapeHtml(c.strategic_diff)}
` : ''} +
+
+ ${productHtml} + ${pricingHtml} + ${featuresHtml} + ${positioningHtml} + ${comparisonHtml} +
+
+

Mentions

+ ${mentionsHtml} +
+ ${c.benchmarks.length ? `

Benchmarks

${benchmarksHtml}
` : ''} + ${findingsHtml ? `
${findingsHtml}
` : ''} +
+ + +`; + + writeFileSync(join(dir, 'competitors', `${c.slug}.html`), companyHtml); +} + +// ---------- matrix.html (side-by-side) ---------- + +// Collect union of features and integrations across competitors +function buildMatrixAxis(field) { + const counts = new Map(); + for (const c of deduped) { + for (const item of splitPipes(c[field])) { + const key = item.toLowerCase(); + if (!counts.has(key)) counts.set(key, { label: item, count: 0 }); + counts.get(key).count += 1; + } + } + return [...counts.values()].sort((a, b) => b.count - a.count).slice(0, 18); +} +const featureAxis = buildMatrixAxis('key_features'); +const integrationAxis = buildMatrixAxis('integrations'); + +function competitorHas(c, field, label) { + return splitPipes(c[field]).some(x => x.toLowerCase() === label.toLowerCase()); +} + +function matrixSection(heading, axis, field) { + if (!axis.length) return ''; + // Competitor names tilted 35° (more legible than full vertical). Row label (feature name) is the sticky + // left column so users can scroll horizontally without losing context on many-competitor tables. + const header = ` + ${escapeHtml(heading)} + ${deduped.map(c => ``).join('')} + `; + const rows = axis.map(a => { + const cells = deduped.map(c => competitorHas(c, field, a.label) + ? `●` + : `·`).join(''); + return ` + ${escapeHtml(a.label)}${a.count} + ${cells} + `; + }).join('\n'); + return `
+

${escapeHtml(heading)}

+
+ ${header}${rows}
+
+
`; +} + +const pricingRows = deduped.map(c => `${escapeHtml(c.competitor_name)}${escapeHtml(c.pricing_model || '')}${escapeHtml(c.pricing_tiers || '—')}${escapeHtml(c.target_customer || '')}`).join(''); + +const matrixHtml = ` + + + + +Feature Matrix — ${escapeHtml(title)} + + + + +
+
+

Feature & Pricing Matrix

+
${escapeHtml(metaLine)}
+
+ + +
+

Pricing

+ + + ${pricingRows} +
CompetitorModelTiersTarget Customer
+
+ + ${matrixSection('Features', featureAxis, 'key_features')} + ${matrixSection('Integrations', integrationAxis, 'integrations')} +
+ + +`; + +writeFileSync(join(dir, 'matrix.html'), matrixHtml); + +// ---------- mentions.html (feed + filter) ---------- + +const allMentions = []; +for (const c of deduped) { + for (const m of c.mentions) { + allMentions.push({ ...m, competitor: c.competitor_name, slug: c.slug }); + } +} +// Sort by date desc (empty dates last) +allMentions.sort((a, b) => { + if (a.date && b.date) return b.date.localeCompare(a.date); + if (a.date) return -1; + if (b.date) return 1; + return 0; +}); + +const sourceTypes = [...new Set(allMentions.map(m => m.sourceType))].sort(); +const sourceFilterButtons = ['All', ...sourceTypes].map(t => + `` +).join(''); + +const mentionItems = allMentions.map(m => { + const link = m.url ? `${escapeHtml(m.title || m.url)}` : escapeHtml(m.title); + const snippet = m.snippet ? `
${escapeHtml(m.snippet)}
` : ''; + const date = m.date ? `${escapeHtml(m.date)}` : ''; + return `
+ ${escapeHtml(m.sourceType)} +
+ +
${link}
+ ${snippet} +
+
`; +}).join('\n'); + +const mentionsHtml = ` + + + + +Mentions Feed — ${escapeHtml(title)} + + + + +
+
+

Mentions Feed

+
${allMentions.length} mentions across ${deduped.length} competitors · ${escapeHtml(genDate)}
+
+ +
${sourceFilterButtons}
+
+ ${mentionItems || '
No mentions collected — try running in deep or deeper mode.
'} +
+
+ + + +`; + +writeFileSync(join(dir, 'mentions.html'), mentionsHtml); + +// ---------- CSV ---------- + +const priority = [ + 'competitor_name', 'website', 'tagline', 'positioning', 'product_description', + 'target_customer', 'pricing_model', 'pricing_tiers', 'key_features', 'integrations', + 'headquarters', 'founded', 'employee_estimate', 'funding_info', 'strategic_diff' +]; +const flatRows = deduped.map(c => { + const row = {}; + for (const k of Object.keys(c)) { + if (['body', 'sections', 'mentions', 'benchmarks', 'slug', 'file'].includes(k)) continue; + row[k] = c[k]; + } + row.mention_count = String(c.mentions.length); + row.benchmark_count = String(c.benchmarks.length); + return row; +}); +const allCols = [...new Set(flatRows.flatMap(r => Object.keys(r)))]; +const cols = [...priority.filter(c => allCols.includes(c)), ...allCols.filter(c => !priority.includes(c)).sort()]; + +function csvEscape(v) { + v = String(v || ''); + if (v.includes(',') || v.includes('"') || v.includes('\n')) return '"' + v.replace(/"/g, '""') + '"'; + return v; +} + +const csvLines = [cols.join(',')]; +for (const row of flatRows) csvLines.push(cols.map(c => csvEscape(row[c] || '')).join(',')); +writeFileSync(join(dir, 'results.csv'), csvLines.join('\n') + '\n'); + +// ---------- Summary ---------- + +console.error(JSON.stringify({ + total: deduped.length, + mentions: totalMentions, + benchmarks: totalBenchmarks, + with_pricing: withPricing, + user_company: userCompany, + files_generated: { + index: join(dir, 'index.html'), + matrix: join(dir, 'matrix.html'), + mentions: join(dir, 'mentions.html'), + competitors: deduped.filter(c => c.body && c.body.length > 50).length, + csv: join(dir, 'results.csv') + } +}, null, 2)); + +console.log(join(dir, 'index.html')); + +if (shouldOpen) { + const { execSync } = await import('child_process'); + try { execSync(`open "${join(dir, 'index.html')}"`); } catch {} +} diff --git a/skills/competitor-analysis/scripts/extract_vs_names.mjs b/skills/competitor-analysis/scripts/extract_vs_names.mjs new file mode 100644 index 0000000..cae3ce5 --- /dev/null +++ b/skills/competitor-analysis/scripts/extract_vs_names.mjs @@ -0,0 +1,112 @@ +#!/usr/bin/env node + +// Parses "X vs Y" patterns from bb search result titles across discovery batch files. +// Produces a ranked list of candidate competitor names, with an example title each, +// and attempts to resolve each name to a domain from the result URL pool. +// +// Usage: node extract_vs_names.mjs [--prefix competitor] [--seed "Browserbase,Hyperbrowser,Steel"] +// +// Output: newline-delimited JSON to stdout, one object per candidate: +// { "name": "anchor", "hits": 3, "domain": "anchorbrowser.io", "example": "Browserless vs Anchor..." } + +import { readdirSync, readFileSync } from 'fs'; +import { join } from 'path'; + +const args = process.argv.slice(2); + +if (args.includes('--help') || args.includes('-h') || args.length === 0) { + console.error(`Usage: node extract_vs_names.mjs [--prefix ] [--seed ""] + +Reads all _discovery_batch_*.json files, parses "X vs Y" patterns from result +titles, and outputs a ranked list of candidate competitor names as newline-delimited JSON. + +Options: + --prefix Batch file prefix (default: "competitor") + --seed "" Comma-separated list of seed names to exclude from output + (you already know these; want the OTHER side of the comparison) + --help, -h Show this help message`); + process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1); +} + +const dir = args[0]; +const prefixIdx = args.indexOf('--prefix'); +const prefix = prefixIdx !== -1 && args[prefixIdx + 1] ? args[prefixIdx + 1] : 'competitor'; +const seedIdx = args.indexOf('--seed'); +const seeds = seedIdx !== -1 && args[seedIdx + 1] + ? args[seedIdx + 1].split(',').map(s => s.trim().toLowerCase()).filter(Boolean) + : []; +const seedSet = new Set(seeds); + +const pattern = new RegExp(`^${prefix}_discovery_batch_.*\\.json$`); + +let files; +try { + files = readdirSync(dir).filter(f => pattern.test(f)).sort(); +} catch (err) { + console.error(`Error reading directory ${dir}: ${err.message}`); + process.exit(1); +} + +if (files.length === 0) { + console.error(`No ${prefix}_discovery_batch_*.json files found in ${dir}`); + process.exit(1); +} + +const allResults = []; +for (const f of files) { + try { + const d = JSON.parse(readFileSync(join(dir, f), 'utf-8')); + const rs = Array.isArray(d) ? d : d.results || []; + allResults.push(...rs); + } catch {} +} + +// Build a lookup of hostname -> candidate root domain from all result URLs. +// Used later to try to resolve "anchor" -> "anchorbrowser.io". +const hostMap = new Map(); +for (const r of allResults) { + if (!r.url) continue; + try { + const h = new URL(r.url).hostname.replace(/^www\./, ''); + const root = h.split('.').slice(-2).join('.'); + if (!hostMap.has(root)) hostMap.set(root, h); + } catch {} +} + +// Extract names from "X vs Y" patterns. +const counts = new Map(); +for (const r of allResults) { + const title = (r.title || '').toLowerCase(); + const ms = [...title.matchAll(/\b([a-z][\w.\-]{2,})\s+(?:vs\.?|versus)\s+([a-z][\w.\-]{2,})/g)]; + for (const m of ms) { + for (const raw of [m[1], m[2]]) { + const name = raw.replace(/[^a-z0-9.\-]/g, '').trim(); + if (!name || name.length < 3) continue; + if (seedSet.has(name)) continue; + // Reject obvious non-product tokens + if (['the', 'and', 'for', 'with', 'best', 'top', 'better', 'using', 'choosing'].includes(name)) continue; + if (!counts.has(name)) counts.set(name, { name, hits: 0, example: r.title }); + counts.get(name).hits += 1; + } + } +} + +// Try to resolve each name to a domain. +function resolveDomain(name) { + const needle = name.replace(/\./g, ''); + for (const [root, host] of hostMap.entries()) { + const rootBase = root.split('.')[0]; + if (rootBase === needle || rootBase.startsWith(needle) || needle.startsWith(rootBase)) return host; + } + return null; +} + +const ranked = [...counts.values()] + .map(c => ({ ...c, domain: resolveDomain(c.name) })) + .sort((a, b) => b.hits - a.hits); + +for (const c of ranked) { + console.log(JSON.stringify(c)); +} + +console.error(`Extracted ${ranked.length} candidate names from ${files.length} batch files`); diff --git a/skills/competitor-analysis/scripts/gate_candidates.mjs b/skills/competitor-analysis/scripts/gate_candidates.mjs new file mode 100644 index 0000000..8017269 --- /dev/null +++ b/skills/competitor-analysis/scripts/gate_candidates.mjs @@ -0,0 +1,178 @@ +#!/usr/bin/env node + +// Category-fit gate. For each candidate URL, fetch the homepage hero via `bb fetch`, +// extract visible text, and decide whether the candidate is in the same category as +// the user's company based on include/exclude keyword rules. +// +// Usage: +// cat urls.txt | node gate_candidates.mjs \ +// --include "cloud browser,headless browser,browser infrastructure,CDP,agent" \ +// --exclude "antidetect,scraping api,screenshot api,multilogin,scraping platform,proxy rotation" \ +// --concurrency 6 +// +// Output: newline-delimited JSON to stdout with one object per URL: +// { "url": "https://foo.com", "status": "PASS" | "REJECT" | "UNKNOWN", +// "matched_includes": [...], "matched_excludes": [...], "title": "...", "hero": "..." } + +import { execSync, spawnSync } from 'child_process'; +import { readFileSync } from 'fs'; + +const args = process.argv.slice(2); + +if (args.includes('--help') || args.includes('-h')) { + console.error(`Usage: cat urls.txt | node gate_candidates.mjs [options] + +Reads URLs from stdin (one per line) OR from --input . For each URL, fetches +the homepage via \`bb fetch --allow-redirects\`, extracts the first N chars of visible +text (the hero / tagline area), and classifies against include/exclude keyword rules. + +Options: + --include "" Required. Comma-separated keywords; candidate PASSES if any match. + --exclude "" Comma-separated keywords; candidate REJECTS if any match. + --input Read URLs from file instead of stdin. + --concurrency Max parallel fetches (default: 6). + --hero-chars Chars of visible text to examine (default: 800). + --help, -h Show this help message.`); + process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1); +} + +function flag(name) { + const i = args.indexOf(name); + return i !== -1 ? args[i + 1] : null; +} + +const includes = (flag('--include') || '').split(',').map(s => s.trim().toLowerCase()).filter(Boolean); +const excludes = (flag('--exclude') || '').split(',').map(s => s.trim().toLowerCase()).filter(Boolean); +const concurrency = parseInt(flag('--concurrency') || '6', 10); +const heroChars = parseInt(flag('--hero-chars') || '800', 10); +const inputFile = flag('--input'); + +if (includes.length === 0) { + console.error('Error: --include is required'); + process.exit(1); +} + +let urls; +if (inputFile) { + urls = readFileSync(inputFile, 'utf-8').split('\n').map(l => l.trim()).filter(Boolean); +} else { + const stdin = readFileSync(0, 'utf-8'); + urls = stdin.split('\n').map(l => l.trim()).filter(Boolean); +} + +if (urls.length === 0) { + console.error('Error: no URLs provided (pipe via stdin or use --input)'); + process.exit(1); +} + +function stripHtml(html) { + return html + .replace(/]*>[\s\S]*?<\/script>/gi, ' ') + .replace(/]*>[\s\S]*?<\/style>/gi, ' ') + .replace(/<[^>]*>/g, ' ') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/ /g, ' ') + .replace(/\s+/g, ' ') + .trim(); +} + +// Position-aware classification: +// 1. Exclude term in → REJECT (their primary identity is the excluded category) +// 2. Include term in <title> → PASS (their primary identity matches) +// 3. Include in early hero (200ch) → PASS iff no exclude in early hero +// 4. Otherwise → REJECT (default conservative) +// Rationale: <title> is the single strongest signal of what a company sells. +// Mid/late hero mentions (e.g. "we also support web scraping use cases") shouldn't +// disqualify a real competitor that self-identifies in its title as a cloud browser. +function classify(title, heroFull, includes, excludes) { + const titleLower = (title || '').toLowerCase(); + const heroLower = heroFull.toLowerCase(); + const heroEarly = heroLower.slice(0, 200); + + const incTitle = includes.filter(k => titleLower.includes(k)); + const excTitle = excludes.filter(k => titleLower.includes(k)); + const incEarly = includes.filter(k => heroEarly.includes(k)); + const excEarly = excludes.filter(k => heroEarly.includes(k)); + const incHero = includes.filter(k => heroLower.includes(k)); + const excHero = excludes.filter(k => heroLower.includes(k)); + + let status, reason; + if (incTitle.length > 0 && excTitle.length > 0) { + // Hybrid-identity title (e.g. "Browser Automation & Web Scraping API"). + // Break the tie by the early hero — whichever category has more mentions wins. + if (incEarly.length > excEarly.length) { status = 'PASS'; reason = `title-hybrid→hero200 leans include(${incEarly[0] || incTitle[0]})`; } + else if (excEarly.length > incEarly.length) { status = 'REJECT'; reason = `title-hybrid→hero200 leans exclude(${excEarly[0] || excTitle[0]})`; } + else { status = 'PASS'; reason = `title-hybrid→tie, defaulting include(${incTitle[0]})`; } + } + else if (excTitle.length > 0) { status = 'REJECT'; reason = `title→exclude(${excTitle[0]})`; } + else if (incTitle.length > 0) { status = 'PASS'; reason = `title→include(${incTitle[0]})`; } + else if (incEarly.length > 0 && excEarly.length === 0) { status = 'PASS'; reason = `hero200→include(${incEarly[0]})`; } + else if (excEarly.length > 0) { status = 'REJECT'; reason = `hero200→exclude(${excEarly[0]})`; } + else if (incHero.length > 0 && excHero.length === 0) { status = 'PASS'; reason = `hero→include(${incHero[0]})`; } + else { status = 'REJECT'; reason = 'no category signal'; } + + return { + status, reason, + matched_includes: [...new Set([...incTitle, ...incEarly, ...incHero])], + matched_excludes: [...new Set([...excTitle, ...excEarly, ...excHero])], + }; +} + +async function gateOne(url) { + try { + const proc = spawnSync('bb', ['fetch', '--allow-redirects', url], { + encoding: 'utf-8', + maxBuffer: 4 * 1024 * 1024, + timeout: 20000, + }); + if (proc.status !== 0) { + return { url, status: 'UNKNOWN', reason: 'bb fetch failed', matched_includes: [], matched_excludes: [], title: '', hero: '' }; + } + let resp; + try { resp = JSON.parse(proc.stdout); } catch { + return { url, status: 'UNKNOWN', reason: 'non-JSON response', matched_includes: [], matched_excludes: [], title: '', hero: '' }; + } + const html = resp.content || ''; + const titleM = html.match(/<title[^>]*>([^<]*)<\/title>/i); + const title = titleM ? titleM[1].trim() : ''; + const heroFull = stripHtml(html).slice(0, heroChars); + const c = classify(title, heroFull, includes, excludes); + return { + url, + status: c.status, + reason: c.reason, + matched_includes: c.matched_includes, + matched_excludes: c.matched_excludes, + title, + hero: heroFull.slice(0, 240), + }; + } catch (err) { + return { url, status: 'UNKNOWN', reason: err.message, matched_includes: [], matched_excludes: [], title: '', hero: '' }; + } +} + +// Run with bounded concurrency +const results = []; +async function runAll() { + const queue = [...urls]; + const workers = Array(Math.min(concurrency, queue.length)).fill(0).map(async () => { + while (queue.length > 0) { + const u = queue.shift(); + const r = await gateOne(u); + results.push(r); + console.log(JSON.stringify(r)); + } + }); + await Promise.all(workers); +} + +await runAll(); + +const pass = results.filter(r => r.status === 'PASS').length; +const reject = results.filter(r => r.status === 'REJECT').length; +const unknown = results.filter(r => r.status === 'UNKNOWN').length; +console.error(`\nGate: ${pass} PASS / ${reject} REJECT / ${unknown} UNKNOWN (of ${results.length})`); diff --git a/skills/competitor-analysis/scripts/list_urls.mjs b/skills/competitor-analysis/scripts/list_urls.mjs new file mode 100644 index 0000000..a4bbcd3 --- /dev/null +++ b/skills/competitor-analysis/scripts/list_urls.mjs @@ -0,0 +1,83 @@ +#!/usr/bin/env node + +// Deduplicates discovery URLs from bb search JSON output files. +// Usage: node list_urls.mjs /tmp [--prefix competitor] +// Reads all {prefix}_discovery_batch_*.json files, deduplicates by domain, +// outputs one URL per line to stdout, stats to stderr. + +import { readdirSync, readFileSync } from 'fs'; +import { join } from 'path'; + +const args = process.argv.slice(2); + +if (args.includes('--help') || args.includes('-h') || args.length === 0) { + console.error(`Usage: node list_urls.mjs <directory> [--prefix <prefix>] + +Reads all <prefix>_discovery_batch_*.json files from <directory>, +deduplicates URLs by domain, and outputs one URL per line to stdout. + +Options: + --prefix <prefix> Batch file prefix (default: "competitor") + --help, -h Show this help message + +Examples: + node list_urls.mjs /tmp + node list_urls.mjs /tmp --prefix competitor`); + process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1); +} + +const dir = args[0]; +const prefixIdx = args.indexOf('--prefix'); +const prefix = prefixIdx !== -1 && args[prefixIdx + 1] ? args[prefixIdx + 1] : 'competitor'; + +const pattern = new RegExp(`^${prefix}_discovery_batch_.*\\.json$`); + +let files; +try { + files = readdirSync(dir) + .filter(f => pattern.test(f)) + .sort(); +} catch (err) { + console.error(`Error reading directory ${dir}: ${err.message}`); + process.exit(1); +} + +if (files.length === 0) { + console.error(`No ${prefix}_discovery_batch_*.json files found in ${dir}`); + process.exit(1); +} + +const seenDomains = new Set(); +const urls = []; +let totalResults = 0; + +for (const file of files) { + try { + const data = JSON.parse(readFileSync(join(dir, file), 'utf-8')); + const results = Array.isArray(data) ? data : (data.results || []); + totalResults += results.length; + + for (const result of results) { + const url = result.url; + if (!url) continue; + + try { + const hostname = new URL(url).hostname.replace(/^www\./, ''); + if (!seenDomains.has(hostname)) { + seenDomains.add(hostname); + urls.push(url); + } + } catch { + // Skip invalid URLs + } + } + } catch (err) { + console.error(`Warning: Failed to parse ${file}: ${err.message}`); + } +} + +for (const url of urls) { + console.log(url); +} + +console.error(`\n${files.length} files, ${totalResults} total results, ${urls.length} unique domains`); diff --git a/skills/competitor-analysis/scripts/merge_partials.mjs b/skills/competitor-analysis/scripts/merge_partials.mjs new file mode 100644 index 0000000..b42f3df --- /dev/null +++ b/skills/competitor-analysis/scripts/merge_partials.mjs @@ -0,0 +1,202 @@ +#!/usr/bin/env node + +// Merges per-lane partial markdown files into one consolidated file per competitor. +// +// The 5-lane subagent fan-out writes partials to: {OUTPUT_DIR}/partials/{slug}.{lane}.md +// lane ∈ { marketing, discussion, social, news, technical } +// +// Each partial has its own YAML frontmatter + sections. The marketing partial owns +// the canonical frontmatter (pricing, features, etc.); other lanes contribute only +// Mentions / Benchmarks / Findings bullets. The merge: +// 1. Starts from marketing.md's frontmatter as the canonical header +// 2. Appends body sections in the canonical order (Product, Pricing, Features, +// Positioning, Comparison, Mentions, Benchmarks, Research Findings) +// 3. Unions all Mentions bullets across lanes, dedups by URL, sorts by date desc +// 4. Unions all Research Findings bullets across lanes +// 5. Unions all Benchmarks bullets +// 6. Writes the consolidated file to {OUTPUT_DIR}/{slug}.md +// +// Usage: node merge_partials.mjs <research-dir> + +import { readdirSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; +import { join } from 'path'; + +const args = process.argv.slice(2); +if (args.includes('--help') || args.includes('-h') || args.length === 0) { + console.error(`Usage: node merge_partials.mjs <research-dir> + +Reads {dir}/partials/{slug}.{lane}.md files and writes consolidated +{dir}/{slug}.md per competitor. Lanes: marketing, discussion, social, news, technical.`); + process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1); +} + +const dir = args[0]; +const partialsDir = join(dir, 'partials'); + +const LANES = ['marketing', 'discussion', 'social', 'news', 'technical']; + +function parseFrontmatter(content) { + const m = content.match(/^---\n([\s\S]*?)\n---/); + if (!m) return { fm: null, body: content }; + const fields = {}; + for (const line of m[1].split('\n')) { + const idx = line.indexOf(':'); + if (idx > 0) { + const k = line.slice(0, idx).trim(); + const v = line.slice(idx + 1).trim().replace(/^["']|["']$/g, ''); + if (k && v) fields[k] = v; + } + } + const bodyMatch = content.match(/^---\n[\s\S]*?\n---\n([\s\S]*)/); + return { fm: fields, body: bodyMatch ? bodyMatch[1].trim() : '' }; +} + +function parseSections(body) { + const sections = {}; + const lines = body.split('\n'); + let currentKey = null; + let buffer = []; + for (const line of lines) { + const m = line.match(/^## (.+)$/); + if (m) { + if (currentKey !== null) sections[currentKey] = buffer.join('\n').trim(); + currentKey = m[1].trim(); + buffer = []; + } else if (currentKey !== null) { + buffer.push(line); + } + } + if (currentKey !== null) sections[currentKey] = buffer.join('\n').trim(); + return sections; +} + +function extractBullets(sectionText) { + if (!sectionText) return []; + return sectionText.split('\n').map(l => l.trim()).filter(l => l.startsWith('- ')); +} + +function urlOf(bullet) { + const m = bullet.match(/\(source:\s*([^,)]+)/); + return m ? m[1].trim() : null; +} + +function dateOf(bullet) { + const m = bullet.match(/\(source:\s*[^,)]+,\s*(\d{4}-\d{2}-\d{2})/); + return m ? m[1] : ''; +} + +let files; +try { files = readdirSync(partialsDir); } catch { + console.error(`No partials directory at ${partialsDir} — nothing to merge.`); + process.exit(0); +} + +// Group partials by slug +const bySlug = new Map(); +for (const f of files) { + if (!f.endsWith('.md')) continue; + const m = f.match(/^(.+)\.([a-z]+)\.md$/); + if (!m) continue; + const slug = m[1]; + const lane = m[2]; + if (!LANES.includes(lane)) continue; + if (!bySlug.has(slug)) bySlug.set(slug, {}); + const content = readFileSync(join(partialsDir, f), 'utf-8'); + bySlug.get(slug)[lane] = parseFrontmatter(content); +} + +let merged = 0; +for (const [slug, lanes] of bySlug.entries()) { + const marketing = lanes.marketing; + if (!marketing || !marketing.fm) { + console.error(`[skip] ${slug}: no marketing partial — cannot form canonical frontmatter`); + continue; + } + + // Union body sections + const allSections = {}; + for (const lane of LANES) { + if (!lanes[lane]) continue; + const secs = parseSections(lanes[lane].body); + for (const [k, v] of Object.entries(secs)) { + if (!allSections[k]) allSections[k] = []; + allSections[k].push(v); + } + } + + // Dedup Mentions by URL, sort by date desc + const mentionBullets = (allSections['Mentions'] || []).flatMap(s => extractBullets(s)); + const seenUrls = new Set(); + const dedupedMentions = []; + for (const b of mentionBullets) { + const u = urlOf(b); + const key = u || b; // fallback to bullet text if no URL + if (seenUrls.has(key)) continue; + seenUrls.add(key); + dedupedMentions.push(b); + } + dedupedMentions.sort((a, b) => { + const da = dateOf(a), db = dateOf(b); + if (da && db) return db.localeCompare(da); + if (da) return -1; + if (db) return 1; + return 0; + }); + + // Dedup Benchmarks by URL + const benchmarkBullets = (allSections['Benchmarks'] || []).flatMap(s => extractBullets(s)); + const seenBench = new Set(); + const dedupedBench = []; + for (const b of benchmarkBullets) { + const m = b.match(/https?:\/\/\S+/); + const key = m ? m[0] : b; + if (seenBench.has(key)) continue; + seenBench.add(key); + dedupedBench.push(b); + } + + // Dedup Findings loosely (by exact text) + const findingBullets = (allSections['Research Findings'] || []).flatMap(s => extractBullets(s)); + const dedupedFindings = [...new Set(findingBullets)]; + + // Merge/prefer marketing for Product/Pricing/Features/Positioning/Comparison + function first(key) { + const arr = allSections[key] || []; + return arr.length ? arr[0] : ''; + } + + // Rebuild frontmatter (marketing's FM wins; other lanes may add `pricing_url` or `strategic_diff`) + const mergedFm = { ...marketing.fm }; + for (const lane of LANES) { + if (!lanes[lane] || !lanes[lane].fm) continue; + for (const [k, v] of Object.entries(lanes[lane].fm)) { + if (!mergedFm[k] && v) mergedFm[k] = v; + } + } + + const fmLines = Object.entries(mergedFm).map(([k, v]) => `${k}: ${v}`).join('\n'); + + // Comparison heading may be "Comparison vs Browserbase" etc — find any key starting with "Comparison" + const comparisonKey = Object.keys(allSections).find(k => k.startsWith('Comparison')); + + const out = [ + '---', + fmLines, + '---', + '', + first('Product') ? `## Product\n${first('Product')}\n` : '', + first('Pricing') ? `## Pricing\n${first('Pricing')}\n` : '', + first('Features') ? `## Features\n${first('Features')}\n` : '', + first('Positioning') ? `## Positioning\n${first('Positioning')}\n` : '', + comparisonKey && allSections[comparisonKey].length ? `## ${comparisonKey}\n${allSections[comparisonKey][0]}\n` : '', + dedupedMentions.length ? `## Mentions\n${dedupedMentions.join('\n')}\n` : '', + dedupedBench.length ? `## Benchmarks\n${dedupedBench.join('\n')}\n` : '', + dedupedFindings.length ? `## Research Findings\n${dedupedFindings.join('\n')}\n` : '', + ].filter(Boolean).join('\n'); + + writeFileSync(join(dir, `${slug}.md`), out); + merged += 1; + console.error(`[ok] ${slug}: ${dedupedMentions.length} mentions, ${dedupedBench.length} benchmarks, ${dedupedFindings.length} findings`); +} + +console.log(JSON.stringify({ merged, competitors: bySlug.size })); diff --git a/skills/competitor-analysis/scripts/package.json b/skills/competitor-analysis/scripts/package.json new file mode 100644 index 0000000..c9e2383 --- /dev/null +++ b/skills/competitor-analysis/scripts/package.json @@ -0,0 +1,6 @@ +{ + "name": "competitor-analysis-scripts", + "version": "0.1.0", + "private": true, + "type": "module" +} From 96454335711082f7a2a5d938f6621ea0734d6ad6 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 12:54:31 +0100 Subject: [PATCH 02/23] fix(competitor-analysis): harden merge_partials against subagent format drift --- .../scripts/merge_partials.mjs | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/skills/competitor-analysis/scripts/merge_partials.mjs b/skills/competitor-analysis/scripts/merge_partials.mjs index b42f3df..f89cf54 100644 --- a/skills/competitor-analysis/scripts/merge_partials.mjs +++ b/skills/competitor-analysis/scripts/merge_partials.mjs @@ -72,7 +72,17 @@ function parseSections(body) { function extractBullets(sectionText) { if (!sectionText) return []; - return sectionText.split('\n').map(l => l.trim()).filter(l => l.startsWith('- ')); + const out = []; + for (const raw of sectionText.split('\n')) { + const line = raw.trim(); + // Accept either "- ..." or numbered-list "1. ..." — normalize both to "- ...". + if (line.startsWith('- ')) out.push(line); + else { + const m = line.match(/^\d+\.\s+(.*)$/); + if (m) out.push('- ' + m[1]); + } + } + return out; } function urlOf(bullet) { @@ -165,12 +175,25 @@ for (const [slug, lanes] of bySlug.entries()) { return arr.length ? arr[0] : ''; } - // Rebuild frontmatter (marketing's FM wins; other lanes may add `pricing_url` or `strategic_diff`) - const mergedFm = { ...marketing.fm }; + // Rebuild frontmatter — whitelist canonical fields only. Non-marketing lane subagents + // sometimes leak ad-hoc meta fields (notes, searches_run, lane, etc.) into their partial's + // frontmatter; those are debug/summary fields, not canonical data. Drop them here. + const CANONICAL_FIELDS = [ + 'competitor_name', 'website', 'pricing_url', + 'tagline', 'positioning', 'product_description', 'target_customer', + 'pricing_model', 'pricing_tiers', 'key_features', 'integrations', + 'headquarters', 'founded', 'employee_estimate', 'funding_info', + 'strategic_diff', + ]; + const mergedFm = {}; + for (const k of CANONICAL_FIELDS) { + if (marketing.fm[k]) mergedFm[k] = marketing.fm[k]; + } + // Other lanes may fill in canonical gaps (e.g. funding_info from news, strategic_diff from technical). for (const lane of LANES) { - if (!lanes[lane] || !lanes[lane].fm) continue; - for (const [k, v] of Object.entries(lanes[lane].fm)) { - if (!mergedFm[k] && v) mergedFm[k] = v; + if (lane === 'marketing' || !lanes[lane] || !lanes[lane].fm) continue; + for (const k of CANONICAL_FIELDS) { + if (!mergedFm[k] && lanes[lane].fm[k]) mergedFm[k] = lanes[lane].fm[k]; } } From 356c77707ebce95512315c3b34ae1243983dde02 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 15:17:09 +0100 Subject: [PATCH 03/23] =?UTF-8?q?feat(competitor-analysis):=20add=20mandat?= =?UTF-8?q?ory=20Step=204.5=20=E2=80=94=20confirm=20enrichment=20set=20wit?= =?UTF-8?q?h=20user?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gate has known blind spots that silently drop real competitors: - JS-heavy homepages (Tavily, Firecrawl) — bb fetch returns near-empty hero text, keyword matcher has nothing to match on - Cloudflare challenge pages (Perplexity) — title becomes "Just a moment...", no category signal - Semantic variants — "search foundation" (Jina AI), "retrieval backbone", etc. don't lexically match a list centered on "search API" - Apex-vs-product domain confusion — brave.com (browser) vs api-dashboard.search.brave.com (actual API) Auto-promoting the PASS list to enrichment is the wrong default because enrichment is expensive (5 competitors × 5 lane-subagents = 25 subagents, ~10-15 min wall time, ~300 bb calls). Running that on a partly wrong set wastes all of it. Insert a mandatory Step 4.5 between Gate and Deep Enrichment: 1. Main agent groups /tmp/competitor_gated.jsonl into three buckets — PASS, UNKNOWN (fetch failed — surfaced separately, these are the silent misses), and rejected-brand-matches (top ~10 REJECTs whose title contains a seed token or shows up in Wave C "X vs Y" graph). 2. AskUserQuestion with a checkbox list + free-text "add more" field. 3. Write the confirmed set to /tmp/competitor_enrichment_set.txt — this is the input for Step 5, not /tmp/competitor_passed.txt. Surfaced while testing the skill on Exa (exa.ai): the gate passed 22/101 candidates but silently rejected Tavily, Jina AI, Firecrawl, and Perplexity — all real direct competitors. Step 4.5 catches them. SKILL.md pipeline overview is now 8 steps (was 7). Step 5 input path is updated. workflow.md gets a User-confirm phase section with the three buckets and the list of known gate blind spots. --- skills/competitor-analysis/SKILL.md | 44 ++++++++++++++++--- .../references/workflow.md | 17 +++++++ 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index 403781b..e4d8a7c 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -53,15 +53,16 @@ Analyze a user's competitors. Uses Browserbase Search API for discovery and a 4- ## Pipeline Overview -Follow these 7 steps in order. Do not skip or reorder. +Follow these 8 steps in order. Do not skip or reorder. 1. **User Company Research** — Deeply understand the user's company, produce `precise_category` + `category_include_keywords` + `exclusion_list` 2. **Depth Mode + Seed Input** — Choose depth, accept optional seed competitor URLs 3. **Discovery (3 parallel waves)** — Wave A (alternatives), Wave B (precise category), Wave C (comparison-page graph via "X vs Y" title parsing) 4. **Gate** — `scripts/gate_candidates.mjs` bb-fetches each candidate's hero text and drops wrong-category URLs -5. **Deep Enrichment (5 subagents per competitor in deep/deeper modes)** — Marketing, Discussion, Social, News, Technical — each lane a separate subagent writing to `partials/`; then `merge_partials.mjs` consolidates -6. **Screenshots** — `capture_screenshots.mjs` via the `browse` CLI captures homepage hero + full-page pricing for each competitor -7. **HTML Report** — Overview + per-competitor (with embedded screenshots) + matrix + mentions views +5. **Confirm enrichment set with the user** — Present PASS / UNKNOWN / rejected-brand-matches via `AskUserQuestion`. User ticks the real ones, adds any the discovery missed. Skipping this step is wasteful because enrichment is expensive (25 subagents × depth budget) and the gate is imperfect (JS-heavy homepages, Cloudflare challenges, semantic-variant taglines) +6. **Deep Enrichment (5 subagents per competitor in deep/deeper modes)** — Marketing, Discussion, Social, News, Technical — each lane a separate subagent writing to `partials/`; then `merge_partials.mjs` consolidates +7. **Screenshots** — `capture_screenshots.mjs` via the `browse` CLI captures homepage hero + full-page pricing for each competitor +8. **HTML Report** — Overview + per-competitor (with embedded screenshots) + matrix + mentions views --- @@ -172,16 +173,45 @@ grep '"status":"PASS"' /tmp/competitor_gated.jsonl \ The gate fetches each candidate's homepage via `bb fetch --allow-redirects`, extracts the first 800 chars of visible text, and classifies position-aware: exclude in `<title>` → REJECT; include in `<title>` → PASS; hybrid title → hero200 tiebreak; otherwise fall through. -**Review the PASS/REJECT split** in `/tmp/competitor_gated.jsonl`. Spot-check for miscategorizations. If a known direct competitor was REJECTED because their marketing straddles categories (e.g. browser + scraping), manually add their URL to `/tmp/competitor_passed.txt`. - **Evaluated on Browserbase** with 12 mixed candidates: 7/7 real competitors passed, 4/4 wrong-category rejected, 1 known-hybrid edge case rejected. +## Step 4.5: Confirm enrichment set with the user + +**This step is mandatory. Do NOT skip to enrichment just because the gate ran.** + +Enrichment is expensive: 5 competitors × 5 lane-subagents = 25 subagents, ~10-15 minutes of wall clock, ~300 `bb` calls. Running it on the wrong set wastes all of that. The gate also has known blind spots: + +- **JS-heavy homepages** (e.g. Tavily, Firecrawl) — `bb fetch` returns near-empty text, so keyword matching has nothing to match on → REJECT or UNKNOWN +- **Cloudflare challenge pages** (e.g. Perplexity) — title becomes "Just a moment..." → no category signal +- **Semantic variants** — "search foundation" / "retrieval backbone" don't lexically match a list centered on "search API" +- **Domain ambiguity** — `brave.com` (the browser) vs `api-dashboard.search.brave.com` (the actual API product) can confuse classification + +The user almost always has domain knowledge the skill lacks. Ask them. + +**Process** — the main agent: + +1. Read `/tmp/competitor_gated.jsonl` and group rows: + - **PASS bucket**: everything with status=PASS. + - **UNKNOWN bucket**: status=UNKNOWN (fetch failed — always surface, these are the silent misses). + - **Rejected-brand bucket**: top ~10 REJECT rows whose title mentions a well-known brand pattern (e.g. contains the token from a user-supplied seed list, or appears frequently in the Wave C "X vs Y" graph). + +2. Present the buckets to the user, one table per bucket, with URL + title + reason (for rejects). + +3. Use `AskUserQuestion` with a checkbox list of all candidates across the three buckets, plus a free-text "add more" field. The prompt should be explicit: + > "Here are the gate's picks plus a few it was unsure about. Tick the ones that are real competitors in your space, and paste any URLs I missed (comma-separated). Enrichment will run on ONLY the ticked set." + +4. Write the confirmed set to `/tmp/competitor_enrichment_set.txt` (one URL per line). This is the input for Step 5 — not `/tmp/competitor_passed.txt`. + +**If the user doesn't respond** or explicitly says "just run it", fall back to `/tmp/competitor_passed.txt` as-is, but warn in chat that the run may waste budget on wrong-category hits. + +**Exa test, 2026-04-24**: gate auto-passed 22 of 101 candidates but missed Tavily (generic title), Jina AI (semantic mismatch — "search foundation"), Firecrawl (JS-heavy fetch failure), and Perplexity (Cloudflare challenge). All four are real direct competitors. This step catches them. + ## Step 5: Deep Enrichment Two modes. See `references/workflow.md` for prompt templates and wave management. See `references/research-patterns.md` for the lane-by-lane methodology. ### Quick mode — single subagent per batch -- Input: `/tmp/competitor_passed.txt` (gate survivors), ~8 competitors per subagent. +- Input: `/tmp/competitor_enrichment_set.txt` (user-confirmed set from Step 4.5), ~8 competitors per subagent. - One subagent runs Lane A only (marketing surface). 2-3 tool calls each. - Writes directly to `{OUTPUT_DIR}/{slug}.md`. diff --git a/skills/competitor-analysis/references/workflow.md b/skills/competitor-analysis/references/workflow.md index 7ba4dcd..98038b6 100644 --- a/skills/competitor-analysis/references/workflow.md +++ b/skills/competitor-analysis/references/workflow.md @@ -302,6 +302,23 @@ Launch as many subagents as possible in a single message (up to ~6 per message). - For names from `extract_vs_names.mjs` that didn't resolve to a domain, optionally run `bb search "{name}" --num-results 3` to resolve the top domain; skip if ambiguous. - **Merge**: filtered-URL list ∪ resolved `vs_names` domains ∪ user-provided seed URLs. Dedup by hostname into `/tmp/competitor_candidates.txt`. +### User-confirm phase (between gate and enrichment — mandatory) + +After the gate writes `/tmp/competitor_gated.jsonl`, the main agent MUST ask the user to confirm the enrichment set before launching subagents. Enrichment is 25 subagents × depth budget per competitor — too expensive to run on guesses. + +Present three buckets to the user: +1. **PASS** — status=PASS rows with title +2. **UNKNOWN** — status=UNKNOWN (fetch failed; always a silent miss risk — JS-heavy homepages, Cloudflare challenges) +3. **Rejected-brand matches** — top ~10 REJECT rows whose title contains a seed token or that showed up repeatedly in the Wave C "X vs Y" graph + +Then `AskUserQuestion` with a checkbox list + free-text "add more". Write the confirmed set to `/tmp/competitor_enrichment_set.txt` (one URL per line). That file — not `/tmp/competitor_passed.txt` — is the input to the enrichment subagents. + +Known gate blind spots to surface aggressively: +- JS-heavy landing pages return near-empty hero text → gate's keyword matcher has nothing to bite on +- Cloudflare challenge titles ("Just a moment...") → obvious false negative +- "Search foundation" / "retrieval backbone" / "agent runtime" — semantic variants of the category don't lexically match +- Apex domain vs product subdomain (e.g. `brave.com` the browser vs `api-dashboard.search.brave.com` the actual API product) + ### Gate Phase (between discovery and enrichment) Drop wrong-category candidates BEFORE enrichment burns tool calls on them. From 583f58128ffd7a64b601ceabc03718390799fa2d Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 15:42:24 +0100 Subject: [PATCH 04/23] fix(competitor-analysis): normalize mention-bullet format during merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lane subagents don't consistently emit the canonical Mentions bullet format specified in workflow.md — they drift into variants per lane: - Discussion lane: `- **HN** — [Title](url) — snippet` - News lane: `- **2025-08-06** — [News] Outlet — "title" — url` - Technical lane: `- **[Benchmark]** ...` (canonical) compile_report.mjs' parseMentions only matches the canonical `- **[SourceType]** Title | Snippet (source: URL, YYYY-MM-DD)` shape, so non-canonical variants silently dropped from the mentions feed. On the Exa end-to-end test, merge reported 404 total mentions across 5 competitors but the rendered feed showed 0. Rather than fight prompt drift across 25 subagents, normalize at merge time. New normalizeMentionBullet() rewrites the three observed variants into canonical form before dedup, so downstream (CSV, per-competitor pages, mentions.html feed) stays clean. After fix on the Exa run: 294 mentions render, 81% with dates, distribution: 42 LinkedIn / 34 HN / 25 Blog / 24 YouTube / 20 Reddit / 12 Comparison / 11 DevTo / 8 News / 4 Substack. --- .../scripts/merge_partials.mjs | 62 ++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/skills/competitor-analysis/scripts/merge_partials.mjs b/skills/competitor-analysis/scripts/merge_partials.mjs index f89cf54..5c983f3 100644 --- a/skills/competitor-analysis/scripts/merge_partials.mjs +++ b/skills/competitor-analysis/scripts/merge_partials.mjs @@ -85,6 +85,63 @@ function extractBullets(sectionText) { return out; } +// Normalize Mentions bullet lines to the canonical format that `compile_report.mjs` +// parses: `- **[SourceType]** Title | Snippet (source: URL, YYYY-MM-DD)`. +// +// Lane subagents deviate in practice — we've observed at least three variants: +// A) discussion-style: `- **HN** — [Title](url) — snippet` +// B) news-style: `- **2025-08-06** — [News] Outlet — "title" — url` +// C) canonical: `- **[SourceType]** Title | Snippet (source: URL, YYYY-MM-DD)` +// Rather than fighting prompt drift, normalize at merge time so downstream stays clean. +function normalizeMentionBullet(line) { + // Already canonical — nothing to do. + if (/^-\s*\*\*\[\w+\]\*\*/.test(line)) return line; + + const urlMatch = line.match(/https?:\/\/\S+/); + const url = urlMatch ? urlMatch[0].replace(/[).,\]\s]+$/, '') : ''; + const dateMatch = line.match(/\b(\d{4}-\d{2}-\d{2})\b/); + const date = dateMatch ? dateMatch[1] : ''; + + // Pattern A — `- **SourceType** — [Title](url) — snippet` (e.g. discussion lane) + // **SourceType** is bold but without the brackets we want in canonical form. + let m = line.match(/^-\s*\*\*([^*]+)\*\*\s*[—\-]\s*\[([^\]]+)\]\(([^)]+)\)\s*(?:[—\-]\s*(.*))?$/); + if (m) { + const [, rawType, title, linkUrl, snippet] = m; + const sourceType = rawType.trim().replace(/^\[|\]$/g, ''); + const snippetStr = snippet && snippet.trim() ? ` | ${snippet.trim()}` : ''; + const dateStr = date ? `, ${date}` : ''; + return `- **[${sourceType}]** ${title.trim()}${snippetStr} (source: ${linkUrl}${dateStr})`; + } + + // Pattern B — `- **YYYY-MM-DD** — [SourceType] Outlet — "title" — url` (e.g. news lane) + m = line.match(/^-\s*\*\*(\d{4}-\d{2}-\d{2})\*\*\s*[—\-]\s*\[(\w+)\]\s+([^—]+?)\s*[—\-]\s*"?([^"]+?)"?\s*(?:[—\-]\s*(\S+))?\s*$/); + if (m) { + const [, dateStr, sourceType, outlet, title, trailingUrl] = m; + const finalUrl = trailingUrl && trailingUrl.startsWith('http') ? trailingUrl : url; + const snippet = outlet.trim(); + return `- **[${sourceType}]** ${title.trim()}${snippet ? ` | ${snippet}` : ''} (source: ${finalUrl || ''}, ${dateStr})`; + } + + // Pattern C — generic fallback: find any `**X**` tag + URL and format canonically. + m = line.match(/^-\s*\*\*([^*]+)\*\*\s*(.*)/); + if (m && url) { + const rawType = m[1].trim().replace(/^\[|\]$/g, ''); + // If the leading token is a date, try to pull a later **type** off the rest. + let sourceType = rawType; + if (/^\d{4}-\d{2}-\d{2}$/.test(rawType)) { + const innerType = m[2].match(/\[(\w+)\]/); + if (innerType) sourceType = innerType[1]; + } + const linkTextM = m[2].match(/\[([^\]]+)\]/); + const title = linkTextM ? linkTextM[1] : m[2].replace(url, '').replace(/[—"]+/g, '').replace(/^\W+|\W+$/g, '').slice(0, 100); + const dateStr = date ? `, ${date}` : ''; + return `- **[${sourceType}]** ${title.trim()} (source: ${url}${dateStr})`; + } + + // Last resort — leave line untouched (preserves data even if un-parseable). + return line; +} + function urlOf(bullet) { const m = bullet.match(/\(source:\s*([^,)]+)/); return m ? m[1].trim() : null; @@ -134,8 +191,9 @@ for (const [slug, lanes] of bySlug.entries()) { } } - // Dedup Mentions by URL, sort by date desc - const mentionBullets = (allSections['Mentions'] || []).flatMap(s => extractBullets(s)); + // Normalize → dedup Mentions by URL, sort by date desc + const rawBullets = (allSections['Mentions'] || []).flatMap(s => extractBullets(s)); + const mentionBullets = rawBullets.map(normalizeMentionBullet); const seenUrls = new Set(); const dedupedMentions = []; for (const b of mentionBullets) { From ab4d53f837d829740dc3fa51575f083e7abffdfa Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 17:16:55 +0100 Subject: [PATCH 05/23] =?UTF-8?q?refactor(competitor-analysis):=20drop=20p?= =?UTF-8?q?ricing=20screenshot=20=E2=80=94=20hero=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pricing-page screenshots were adding ~300KB per competitor (Browsaur's was 580KB) and doubling the per-run browse-CLI cost, but the per-tier text already lives in the frontmatter (pricing_tiers, pricing_model) and renders in the Pricing section of the per-competitor page. The visual didn't add signal over the structured data — it was redundant. Homepage hero stays. That one is worth keeping: the tagline, visual brand identity, and positioning screenshot-vs-text diff surface things the fields can't (logo treatment, animation cues, hero copy voice). Changes: - capture_screenshots.mjs: drop pricingCandidates() + pricing capture loop, simplify result shape to {slug, hero, errors}, halve per-competitor wall time (~10-20s vs ~15-20s, no pricing fallback chain). - compile_report.mjs: remove 2-column .shots grid + .shot-pricing CSS, render single .shot-hero card per page. - SKILL.md Step 6 + references/workflow.md: doc sync. Also clarify that `browse` is a separate package from `bb` (@browserbasehq/browse-cli vs @browserbasehq/cli) — came up as a user question during test runs. Existing runs re-rendered without pricing shots; ~1.5MB of PNGs removed from the two test output dirs on Desktop. --- skills/competitor-analysis/SKILL.md | 10 ++-- .../references/workflow.md | 6 +- .../scripts/capture_screenshots.mjs | 57 ++++++------------- .../scripts/compile_report.mjs | 12 ++-- 4 files changed, 29 insertions(+), 56 deletions(-) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index e4d8a7c..472df60 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -61,8 +61,8 @@ Follow these 8 steps in order. Do not skip or reorder. 4. **Gate** — `scripts/gate_candidates.mjs` bb-fetches each candidate's hero text and drops wrong-category URLs 5. **Confirm enrichment set with the user** — Present PASS / UNKNOWN / rejected-brand-matches via `AskUserQuestion`. User ticks the real ones, adds any the discovery missed. Skipping this step is wasteful because enrichment is expensive (25 subagents × depth budget) and the gate is imperfect (JS-heavy homepages, Cloudflare challenges, semantic-variant taglines) 6. **Deep Enrichment (5 subagents per competitor in deep/deeper modes)** — Marketing, Discussion, Social, News, Technical — each lane a separate subagent writing to `partials/`; then `merge_partials.mjs` consolidates -7. **Screenshots** — `capture_screenshots.mjs` via the `browse` CLI captures homepage hero + full-page pricing for each competitor -8. **HTML Report** — Overview + per-competitor (with embedded screenshots) + matrix + mentions views +7. **Screenshots** — `capture_screenshots.mjs` via the `browse` CLI captures a 1280×800 homepage hero per competitor +8. **HTML Report** — Overview + per-competitor (with embedded hero screenshot) + matrix + mentions views --- @@ -239,14 +239,14 @@ Unions the 5 partials per competitor into one `{OUTPUT_DIR}/{slug}.md` — dedup ## Step 6: Screenshots -Capture homepage hero + full-page pricing screenshots for each competitor: +Capture a homepage hero screenshot per competitor: ```bash node {SKILL_DIR}/scripts/capture_screenshots.mjs {OUTPUT_DIR} --env remote ``` -Uses the `browse` CLI (`npm install -g @browserbasehq/browse-cli`) against a Browserbase remote session. Writes PNGs to `{OUTPUT_DIR}/screenshots/{slug}-{hero,pricing}.png`. The compile step in Step 7 auto-embeds them on each per-competitor HTML page. +Uses the `browse` CLI — a separate package from `bb` (`npm install -g @browserbasehq/browse-cli`). Connects to a Browserbase remote session by default. Writes one PNG per competitor to `{OUTPUT_DIR}/screenshots/{slug}-hero.png`. The compile step in Step 7 auto-embeds the hero on each per-competitor HTML page. -Cost: ~15-20s per competitor. ~90s for 5 competitors. +Cost: ~10-20s per competitor. ~60s for 5 competitors. ## Step 7: HTML Report diff --git a/skills/competitor-analysis/references/workflow.md b/skills/competitor-analysis/references/workflow.md index 98038b6..c73013b 100644 --- a/skills/competitor-analysis/references/workflow.md +++ b/skills/competitor-analysis/references/workflow.md @@ -356,13 +356,13 @@ Two modes: ### Screenshots Phase (after merge, before compile) -Capture homepage hero + full-page pricing screenshots for each competitor: +Capture homepage hero screenshot for each competitor: ```bash node {SKILL_DIR}/scripts/capture_screenshots.mjs {OUTPUT_DIR} --env remote --concurrency 1 ``` -Requires the `browse` CLI (`npm install -g @browserbasehq/browse-cli`). `--env remote` uses a Browserbase session. Writes PNGs to `{OUTPUT_DIR}/screenshots/{slug}-hero.png` and `{slug}-pricing.png`. `compile_report.mjs` auto-embeds them in per-competitor HTML pages when present. +Requires the `browse` CLI (`npm install -g @browserbasehq/browse-cli` — separate package from `bb`). `--env remote` uses a Browserbase session. Writes one PNG per competitor to `{OUTPUT_DIR}/screenshots/{slug}-hero.png`. `compile_report.mjs` auto-embeds the hero in the per-competitor HTML page when present. -Cost: ~15-20s per competitor (serial). Total for 5 competitors ≈ 90s. +Cost: ~10-20s per competitor (serial). Total for 5 competitors ≈ 60s. ### Sizing Formula ``` diff --git a/skills/competitor-analysis/scripts/capture_screenshots.mjs b/skills/competitor-analysis/scripts/capture_screenshots.mjs index 855bbab..f559151 100644 --- a/skills/competitor-analysis/scripts/capture_screenshots.mjs +++ b/skills/competitor-analysis/scripts/capture_screenshots.mjs @@ -1,8 +1,8 @@ #!/usr/bin/env node -// Capture hero + pricing screenshots for each competitor in the research directory. -// Reads per-competitor markdown files, extracts `website` and optional `pricing_url` -// frontmatter, navigates via `browse`, and writes PNGs to `{OUTPUT_DIR}/screenshots/`. +// Capture homepage hero screenshot for each competitor in the research directory. +// Reads per-competitor markdown files, extracts `website` from frontmatter, navigates +// via `browse`, and writes one PNG per competitor to `{OUTPUT_DIR}/screenshots/`. // // Requires: `browse` CLI (`npm install -g @browserbasehq/browse-cli`), either local Chrome // or a Browserbase remote session (`browse env remote`). @@ -18,10 +18,9 @@ const args = process.argv.slice(2); if (args.includes('--help') || args.includes('-h') || args.length === 0) { console.error(`Usage: node capture_screenshots.mjs <research-dir> [options] -Reads all .md files in <research-dir>, extracts website + pricing URLs from the YAML -frontmatter, and captures two screenshots per competitor: - - {slug}-hero.png — 1280x800 viewport of the homepage - - {slug}-pricing.png — full-page screenshot of the pricing page +Reads all .md files in <research-dir>, extracts the `website` field from each +competitor's YAML frontmatter, and captures a 1280x800 viewport screenshot of the +homepage. Writes one PNG per competitor as {slug}-hero.png. Output goes to <research-dir>/screenshots/. @@ -59,12 +58,6 @@ function parseFrontmatter(content) { return fields; } -// Try common pricing URL patterns if the frontmatter doesn't list one explicitly. -function pricingCandidates(website) { - const base = website.replace(/\/$/, ''); - return [`${base}/pricing`, `${base}/plans`, `${base}/pricing-plans`, base]; -} - function run(cmd, args, { timeout = 30000 } = {}) { return spawnSync(cmd, args, { encoding: 'utf-8', timeout, maxBuffer: 4 * 1024 * 1024 }); } @@ -75,13 +68,12 @@ if (envRes.status !== 0) { console.error(`Warning: could not set browse env to ${browseEnv}: ${envRes.stderr || envRes.stdout}`); } -async function captureOne(slug, website, pricingUrl) { +async function captureOne(slug, website) { const heroPath = join(shotsDir, `${slug}-hero.png`); - const pricingPath = join(shotsDir, `${slug}-pricing.png`); - const result = { slug, hero: null, pricing: null, errors: [] }; + const result = { slug, hero: null, errors: [] }; - if (skipExisting && existsSync(heroPath) && existsSync(pricingPath)) { - return { ...result, hero: heroPath, pricing: pricingPath, skipped: true }; + if (skipExisting && existsSync(heroPath)) { + return { ...result, hero: heroPath, skipped: true }; } // Hero: viewport 1280x800, single-screen shot @@ -94,20 +86,6 @@ async function captureOne(slug, website, pricingUrl) { else result.errors.push(`hero: ${r.stderr || r.stdout}`); } catch (err) { result.errors.push(`hero exception: ${err.message}`); } - // Pricing: full-page; try explicit URL first, then common fallbacks - const urlsToTry = pricingUrl ? [pricingUrl, ...pricingCandidates(website)] : pricingCandidates(website); - let pricingOk = false; - for (const url of urlsToTry) { - try { - const gotoRes = run('browse', ['goto', url], { timeout: 30000 }); - if (gotoRes.status !== 0) continue; - run('browse', ['wait', 'timeout', '1500']); - const r = run('browse', ['screenshot', '--full-page', '--no-animations', pricingPath]); - if (r.status === 0 && existsSync(pricingPath)) { result.pricing = pricingPath; pricingOk = true; break; } - } catch {} - } - if (!pricingOk) result.errors.push('pricing: no candidate URL captured'); - return result; } @@ -119,10 +97,10 @@ for (const f of files) { const fm = parseFrontmatter(content); if (!fm || !fm.website) continue; const slug = f.replace('.md', ''); - jobs.push({ slug, website: fm.website, pricingUrl: fm.pricing_url }); + jobs.push({ slug, website: fm.website }); } -console.error(`Capturing screenshots for ${jobs.length} competitors → ${shotsDir}`); +console.error(`Capturing hero screenshots for ${jobs.length} competitors → ${shotsDir}`); const results = []; const queue = [...jobs]; @@ -130,17 +108,16 @@ async function worker() { while (queue.length > 0) { const job = queue.shift(); const started = Date.now(); - const r = await captureOne(job.slug, job.website, job.pricingUrl); + const r = await captureOne(job.slug, job.website); results.push(r); const elapsed = ((Date.now() - started) / 1000).toFixed(1); - const marks = [r.hero ? 'H' : '-', r.pricing ? 'P' : '-'].join(''); - console.error(` [${marks}] ${job.slug.padEnd(24)} ${elapsed}s ${r.skipped ? '(skipped)' : ''}`); + const mark = r.hero ? 'H' : '-'; + console.error(` [${mark}] ${job.slug.padEnd(24)} ${elapsed}s ${r.skipped ? '(skipped)' : ''}`); if (r.errors.length) for (const e of r.errors) console.error(` ! ${e.slice(0, 120)}`); } } await Promise.all(Array(Math.min(concurrency, jobs.length || 1)).fill(0).map(worker)); const okHero = results.filter(r => r.hero).length; -const okPricing = results.filter(r => r.pricing).length; -console.error(`\nDone: ${okHero}/${jobs.length} hero · ${okPricing}/${jobs.length} pricing`); -console.log(JSON.stringify({ total: jobs.length, hero: okHero, pricing: okPricing, outputDir: shotsDir })); +console.error(`\nDone: ${okHero}/${jobs.length} hero`); +console.log(JSON.stringify({ total: jobs.length, hero: okHero, outputDir: shotsDir })); diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index 3752618..b221226 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -335,12 +335,10 @@ const perCompetitorCss = ` .src-Hashnode { background:#eef4ff; color:#2962ff; border-color:#c6d8ff; } .src-Substack { background:#fff4e5; color:#ff6719; border-color:#ffd4b7; } .src-Blog { background:#f6f3ee; color:#6a5d45; border-color:#e1dbcc; } - .shots { display:grid; grid-template-columns:1fr 1fr; gap:1rem; margin-bottom:1.5rem; } - @media (max-width:720px) { .shots { grid-template-columns:1fr; } } + .shots { margin-bottom:1.5rem; } .shot { background:var(--card); border:1px solid var(--border); border-radius:4px; overflow:hidden; } .shot-label { font-size:0.6875rem; text-transform:uppercase; letter-spacing:0.05em; color:var(--muted); font-weight:600; padding:0.5rem 0.75rem; border-bottom:1px solid var(--border); background:#fafafa; } .shot img { display:block; width:100%; height:auto; } - .shot-pricing img { max-height:560px; object-fit:cover; object-position:top; } footer { margin-top:3rem; padding-top:1.5rem; border-top:1px solid var(--border); text-align:center; font-size:0.75rem; color:var(--muted); } footer a { color:var(--brand); text-decoration:none; font-weight:500; } `; @@ -374,13 +372,11 @@ for (const c of deduped) { const comparisonHtml = comparisonKey ? `<h2>${escapeHtml(comparisonKey)}</h2>${mdToHtml(c.sections[comparisonKey])}` : ''; const findingsHtml = c.sections['Research Findings'] ? `<h2>Research Findings</h2>${mdToHtml(c.sections['Research Findings'])}` : ''; - // Screenshots — filenames match capture_screenshots.mjs output. + // Screenshot — filename matches capture_screenshots.mjs output. const heroShot = existsSync(join(dir, 'screenshots', `${c.slug}-hero.png`)); - const pricingShot = existsSync(join(dir, 'screenshots', `${c.slug}-pricing.png`)); - const screenshotsHtml = (heroShot || pricingShot) ? ` + const screenshotsHtml = heroShot ? ` <div class="shots"> - ${heroShot ? `<div class="shot shot-hero"><div class="shot-label">Homepage hero</div><img src="../screenshots/${escapeHtml(c.slug)}-hero.png" alt="${escapeHtml(c.competitor_name)} homepage hero" loading="lazy"></div>` : ''} - ${pricingShot ? `<div class="shot shot-pricing"><div class="shot-label">Pricing page</div><img src="../screenshots/${escapeHtml(c.slug)}-pricing.png" alt="${escapeHtml(c.competitor_name)} pricing page" loading="lazy"></div>` : ''} + <div class="shot shot-hero"><div class="shot-label">Homepage</div><img src="../screenshots/${escapeHtml(c.slug)}-hero.png" alt="${escapeHtml(c.competitor_name)} homepage hero" loading="lazy"></div> </div>` : ''; const companyHtml = `<!DOCTYPE html> From b569de621c26aa015b647726785b3e9ca402c079 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 17:22:10 +0100 Subject: [PATCH 06/23] feat(competitor-analysis): render matrix from curated matrix.json taxonomy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old matrix view was broken on real runs. Subagents write key_features and integrations as prose (comma-separated or full sentences), not as pipe-separated atomic labels the matrix expected. Pipe-splitting gave one unique blob per competitor, so the matrix trivially rendered a diagonal — zero actual comparison across competitors. Fix is to synthesize a shared taxonomy after enrichment and render the matrix from that. New flow: - After merge, the main agent reads all per-competitor .md files, distills a canonical list of 12-20 atomic features and 10-20 integrations that apply across the category, and writes {OUTPUT_DIR}/matrix.json with a per-competitor yes/no mapping. - compile_report.mjs auto-detects matrix.json and renders the Features + Integrations axes from it. Falls back to the old pipe-split behavior when matrix.json is missing. Verified on the Exa test: before fix, Features axis was 5 one-off blobs with a diagonal of ●s. After: 19 atomic feature rows × 5 competitors with 36 ● cells showing real overlap (Web Search API, MCP server, Free tier, Structured JSON are universal; only Jina has Reranker+Embeddings; only Tavily has Site crawler; SerpAPI alone has CAPTCHA solving and hourly throughput SLA; etc.). SKILL.md Step 5 gets a new "Synthesize the comparison matrix" substep with the matrix.json schema and the rule "do not skip — without this the matrix view is trivially diagonal". --- skills/competitor-analysis/SKILL.md | 29 +++++++++++ .../scripts/compile_report.mjs | 49 ++++++++++++++++--- 2 files changed, 72 insertions(+), 6 deletions(-) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index 472df60..39fe808 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -237,6 +237,35 @@ node {SKILL_DIR}/scripts/merge_partials.mjs {OUTPUT_DIR} ``` Unions the 5 partials per competitor into one `{OUTPUT_DIR}/{slug}.md` — dedup'd Mentions (sorted by date desc), dedup'd Benchmarks, merged Findings, canonical frontmatter from the marketing lane. +### Synthesize the comparison matrix (write `matrix.json`) + +**Subagents write `key_features` and `integrations` as prose**, not as pipe-separated atomic feature labels. So a naive `|`-split axis becomes one-blob-per-competitor with no overlap — the rendered matrix shows a useless diagonal. + +The main agent fixes this by synthesizing a **shared taxonomy** across competitors and writing `{OUTPUT_DIR}/matrix.json`. `compile_report.mjs` auto-detects this file and renders the matrix from it instead of from the pipe split. + +**Process** — main agent: +1. Read all `{slug}.md` files. Focus on the `key_features`, `integrations`, and `## Features` sections. +2. Produce a canonical list of 12-20 *atomic* features — each must be a yes/no proposition a competitor either has or doesn't (e.g. "MCP server", "SOC 2", "Site crawler", "Reranker"). Avoid sentence-length features. Avoid features only one competitor has. +3. Produce a canonical list of 10-20 integrations (frameworks, marketplaces, SDK languages). +4. For each competitor, map each taxonomy entry to `true` / `false` based on the enrichment data. Be conservative — if not mentioned, leave `false`. +5. Write the result to `{OUTPUT_DIR}/matrix.json` in this shape: + ```json + { + "category": "AI search APIs", + "features": [{ "name": "Web Search API", "description": "..." }, ...], + "integrations": [{ "name": "LangChain" }, ...], + "competitors": { + "tavily": { + "features": { "Web Search API": true, "Site crawler": true, ... }, + "integrations": { "LangChain": true, "Databricks Marketplace": true, ... } + }, + "serpapi": { "features": {...}, "integrations": {...} } + } + } + ``` + +If this step is skipped, the matrix view falls back to the raw pipe-split axis (mostly useless for atomic comparison). Do not skip. + ## Step 6: Screenshots Capture a homepage hero screenshot per competitor: diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index b221226..f3ce1d9 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -435,8 +435,33 @@ for (const c of deduped) { // ---------- matrix.html (side-by-side) ---------- -// Collect union of features and integrations across competitors -function buildMatrixAxis(field) { +// Prefer a curated taxonomy from `matrix.json` when present — subagents write +// heterogeneous prose into key_features/integrations frontmatter, so the raw +// split-by-pipe axis is one-blob-per-competitor (no overlap, no comparison). +// `matrix.json` defines a shared axis of atomic features and a yes/no mapping +// per competitor, producing a real comparison. +let curatedMatrix = null; +try { + const p = join(dir, 'matrix.json'); + if (existsSync(p)) curatedMatrix = JSON.parse(readFileSync(p, 'utf-8')); +} catch (err) { + console.error(`Warning: matrix.json present but unreadable — falling back to pipe split. ${err.message}`); +} + +function buildMatrixAxisFromCurated(kind) { + if (!curatedMatrix || !curatedMatrix[kind]) return []; + return curatedMatrix[kind].map(entry => { + const label = entry.name; + let count = 0; + for (const c of deduped) { + const compKey = curatedMatrix.competitors[c.slug]; + if (compKey && compKey[kind] && compKey[kind][label]) count += 1; + } + return { label, count, description: entry.description || '' }; + }); +} + +function buildMatrixAxisFromPipes(field) { const counts = new Map(); for (const c of deduped) { for (const item of splitPipes(c[field])) { @@ -447,11 +472,23 @@ function buildMatrixAxis(field) { } return [...counts.values()].sort((a, b) => b.count - a.count).slice(0, 18); } -const featureAxis = buildMatrixAxis('key_features'); -const integrationAxis = buildMatrixAxis('integrations'); + +const featureAxis = curatedMatrix + ? buildMatrixAxisFromCurated('features') + : buildMatrixAxisFromPipes('key_features'); +const integrationAxis = curatedMatrix + ? buildMatrixAxisFromCurated('integrations') + : buildMatrixAxisFromPipes('integrations'); function competitorHas(c, field, label) { - return splitPipes(c[field]).some(x => x.toLowerCase() === label.toLowerCase()); + // Curated mode: look up in matrix.json (field is 'features' or 'integrations'). + if (curatedMatrix) { + const compEntry = curatedMatrix.competitors[c.slug]; + return !!(compEntry && compEntry[field] && compEntry[field][label]); + } + // Fallback: raw pipe-split match. + const rawField = field === 'features' ? 'key_features' : field; + return splitPipes(c[rawField]).some(x => x.toLowerCase() === label.toLowerCase()); } function matrixSection(heading, axis, field) { @@ -548,7 +585,7 @@ const matrixHtml = `<!DOCTYPE html> </table> </section> - ${matrixSection('Features', featureAxis, 'key_features')} + ${matrixSection('Features', featureAxis, 'features')} ${matrixSection('Integrations', integrationAxis, 'integrations')} </div> <footer>Generated by <a href="https://github.com/anthropics/skills">competitor-analysis</a> · Powered by <a href="https://browserbase.com">Browserbase</a></footer> From 4c75bf042e61c4cf9a9ee48eb9dafe81f445e082 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 17:25:50 +0100 Subject: [PATCH 07/23] fix(competitor-analysis): realign matrix column headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rotated competitor-name headers were misaligned with their data columns and the rightmost labels (Tavily, You.com in the Exa test) got cut off. Root cause: `transform-origin: left top` combined with a fixed `width: 160px` on the label made each label's horizontal extent run ~131px to the right of the column, so labels visually floated several columns to the right of their target and the last N labels overflowed off-screen. Fix: anchor the rotated label to the BOTTOM-RIGHT of its column cell (position:absolute right:4px bottom:8px, transform-origin:right bottom) and steepen rotation from -35° to -55° so horizontal extent is reduced. Drop the fixed width — label is now only as wide as its text, which shrinks short names (Jina AI, Serper) and tightens layout. Cell width 44→52px and header height 130→150px give rotated labels room to live inside the cell rather than overflowing. Result on the Exa run: all 5 competitor names visible, each label's bottom-right sits at the top-right of its column, leaning up-left toward the column — the shingled "hanging label" pattern. --- skills/competitor-analysis/scripts/compile_report.mjs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index f3ce1d9..a8ea210 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -549,15 +549,15 @@ const matrixHtml = `<!DOCTYPE html> .mx-table th, .mx-table td { border:1px solid var(--border); padding:0; } .mx-table tr:hover td:not(.mx-feature) { background:#fdf7f5; } .mx-table tr:hover .mx-feature { background:#fdfcfb; } - .mx-feature-h { position:sticky; left:0; z-index:3; background:#fafafa; text-align:left; min-width:220px; padding:0.5rem 0.75rem !important; border-bottom:1px solid var(--border); } - .mx-comp-h { height:130px; vertical-align:bottom; padding:0 !important; background:#fafafa; min-width:44px; max-width:44px; border-bottom:1px solid var(--border); } - .mx-comp-h-inner { transform:rotate(-35deg); transform-origin:left top; white-space:nowrap; font-size:0.75rem; font-weight:600; color:var(--text); text-transform:none; letter-spacing:0; padding:0.35rem 0.5rem; width:160px; margin-left:18px; margin-top:118px; } + .mx-feature-h { position:sticky; left:0; z-index:3; background:#fafafa; text-align:left; min-width:220px; padding:0.5rem 0.75rem !important; border-bottom:1px solid var(--border); vertical-align:bottom; } + .mx-comp-h { height:150px; vertical-align:bottom; padding:0 !important; background:#fafafa; min-width:52px; max-width:52px; border-bottom:1px solid var(--border); position:relative; overflow:visible; } + .mx-comp-h-inner { position:absolute; right:4px; bottom:8px; transform:rotate(-55deg); transform-origin:right bottom; white-space:nowrap; font-size:0.8125rem; font-weight:600; color:var(--text); letter-spacing:0; } .mx-comp-h-inner a { color:var(--text); text-decoration:none; } .mx-comp-h-inner a:hover { color:var(--brand); } .mx-feature { position:sticky; left:0; z-index:2; background:var(--card); min-width:220px; font-size:0.8125rem; padding:0.45rem 0.75rem !important; display:flex; align-items:center; justify-content:space-between; gap:0.5rem; } .mx-feature-label { flex:1; } .mx-count { color:var(--muted); font-size:0.7rem; font-weight:600; background:#f4f1ee; padding:0 6px; border-radius:999px; } - .mx-cell { text-align:center; font-weight:700; min-width:44px; max-width:44px; padding:0.45rem 0 !important; font-size:0.9rem; } + .mx-cell { text-align:center; font-weight:700; min-width:52px; max-width:52px; padding:0.45rem 0 !important; font-size:0.9rem; } .mx-yes { color:#5a8a1a; background:rgba(144,201,77,0.06); } .mx-no { color:#e0dcd7; } From 03108ef85da9955cd36d60272e024153c864f7ea Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 17:33:44 +0100 Subject: [PATCH 08/23] fix(competitor-analysis): horizontal matrix column headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rotated/diagonal competitor names (35° then 55°) kept producing awkward alignment: the rotation anchor vs the column's visual center never quite matched, and long names (You.com, LlamaIndex) overflowed off the right. Simpler fix: just make the headers horizontal. With 5 competitors × 110px each = 550px, plus the 240px feature column, the table is 790px wide — fits inside the 1200px container without scrolling. For >10 competitors the .mx-scroll wrapper already provides horizontal scroll. Drops the .mx-comp-h-inner rotation wrapper, bumps cell width from 52→110px and data font from 0.9→0.95rem for readability. Feature column grows 220→240px to fit longer taxonomy labels like "Hourly throughput SLA". --- .../scripts/compile_report.mjs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index a8ea210..67214a4 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -493,11 +493,11 @@ function competitorHas(c, field, label) { function matrixSection(heading, axis, field) { if (!axis.length) return ''; - // Competitor names tilted 35° (more legible than full vertical). Row label (feature name) is the sticky - // left column so users can scroll horizontally without losing context on many-competitor tables. + // Horizontal competitor-name headers — simpler to read than rotated. Row label (feature name) is + // the sticky left column so users can scroll horizontally without losing context on wide tables. const header = `<tr> <th class="mx-feature-h">${escapeHtml(heading)}</th> - ${deduped.map(c => `<th class="mx-comp-h"><div class="mx-comp-h-inner"><a href="competitors/${escapeHtml(c.slug)}.html">${escapeHtml(c.competitor_name)}</a></div></th>`).join('')} + ${deduped.map(c => `<th class="mx-comp-h"><a href="competitors/${escapeHtml(c.slug)}.html">${escapeHtml(c.competitor_name)}</a></th>`).join('')} </tr>`; const rows = axis.map(a => { const cells = deduped.map(c => competitorHas(c, field, a.label) @@ -549,15 +549,14 @@ const matrixHtml = `<!DOCTYPE html> .mx-table th, .mx-table td { border:1px solid var(--border); padding:0; } .mx-table tr:hover td:not(.mx-feature) { background:#fdf7f5; } .mx-table tr:hover .mx-feature { background:#fdfcfb; } - .mx-feature-h { position:sticky; left:0; z-index:3; background:#fafafa; text-align:left; min-width:220px; padding:0.5rem 0.75rem !important; border-bottom:1px solid var(--border); vertical-align:bottom; } - .mx-comp-h { height:150px; vertical-align:bottom; padding:0 !important; background:#fafafa; min-width:52px; max-width:52px; border-bottom:1px solid var(--border); position:relative; overflow:visible; } - .mx-comp-h-inner { position:absolute; right:4px; bottom:8px; transform:rotate(-55deg); transform-origin:right bottom; white-space:nowrap; font-size:0.8125rem; font-weight:600; color:var(--text); letter-spacing:0; } - .mx-comp-h-inner a { color:var(--text); text-decoration:none; } - .mx-comp-h-inner a:hover { color:var(--brand); } - .mx-feature { position:sticky; left:0; z-index:2; background:var(--card); min-width:220px; font-size:0.8125rem; padding:0.45rem 0.75rem !important; display:flex; align-items:center; justify-content:space-between; gap:0.5rem; } + .mx-feature-h { position:sticky; left:0; z-index:3; background:#fafafa; text-align:left; min-width:240px; padding:0.75rem !important; border-bottom:1px solid var(--border); font-size:0.6875rem; text-transform:uppercase; letter-spacing:0.05em; color:var(--muted); font-weight:600; } + .mx-comp-h { padding:0.75rem 0.5rem !important; background:#fafafa; min-width:110px; max-width:140px; border-bottom:1px solid var(--border); text-align:center; font-size:0.8125rem; font-weight:600; text-transform:none; letter-spacing:0; color:var(--text); white-space:nowrap; } + .mx-comp-h a { color:var(--text); text-decoration:none; } + .mx-comp-h a:hover { color:var(--brand); } + .mx-feature { position:sticky; left:0; z-index:2; background:var(--card); min-width:240px; font-size:0.8125rem; padding:0.45rem 0.75rem !important; display:flex; align-items:center; justify-content:space-between; gap:0.5rem; } .mx-feature-label { flex:1; } .mx-count { color:var(--muted); font-size:0.7rem; font-weight:600; background:#f4f1ee; padding:0 6px; border-radius:999px; } - .mx-cell { text-align:center; font-weight:700; min-width:52px; max-width:52px; padding:0.45rem 0 !important; font-size:0.9rem; } + .mx-cell { text-align:center; font-weight:700; min-width:110px; max-width:140px; padding:0.5rem 0 !important; font-size:0.95rem; } .mx-yes { color:#5a8a1a; background:rgba(144,201,77,0.06); } .mx-no { color:#e0dcd7; } From 64b63ff8e424074554369c03032af179bef13517 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 17:42:37 +0100 Subject: [PATCH 09/23] feat(competitor-analysis): add "Where you're winning / losing" on overview MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The overview page showed a list of competitors but no explicit view of the user's own strategic position. Hard to answer at a glance: what do I uniquely have, what are the table-stakes features I'm missing? Extend matrix.json with a `userCompany` entry (same shape as each competitor — features + integrations yes/no flags), and compute two buckets on the overview page: - Winning: features the user has where 0–1 competitors also have them. Ordered by rarity (unique features first). - Losing: features the user lacks where 3+ competitors have them. Ordered by gap size (most common features first). Each item shows who else has it ("only you" / "Tavily, SerpAPI" / "4 competitors"), so users can assess the strategic weight at a glance. Rendered as two cards (green-bordered "win", brand-red-bordered "loss") between the summary stats and the results table on index.html. Cards gracefully degrade to nothing if matrix.json lacks userCompany — a skill run that skipped Step 5b's matrix synthesis gets an overview without the strategic summary rather than an error. On the Exa test: 4 wins (Site crawler · Embeddings · 3+ SDK languages · CrewAI integration) and 3 losses (Image/visual search · Dedicated news endpoint · Hourly throughput SLA). Clear strategic picture in one screen. SKILL.md Step 5b "Synthesize the comparison matrix" now documents userCompany as a required field with the explicit note that without it the strategic summary doesn't render. --- .../references/report-template.html | 19 ++++ .../scripts/compile_report.mjs | 99 ++++++++++++++++--- 2 files changed, 106 insertions(+), 12 deletions(-) diff --git a/skills/competitor-analysis/references/report-template.html b/skills/competitor-analysis/references/report-template.html index 4153389..7d71034 100644 --- a/skills/competitor-analysis/references/report-template.html +++ b/skills/competitor-analysis/references/report-template.html @@ -38,6 +38,23 @@ .stat .label { font-size: 0.6875rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--muted); font-weight: 600; margin-bottom: 0.25rem; } .stat .value { font-size: 1.5rem; font-weight: 700; color: var(--black); } + /* Strategic win/loss cards */ + .strategic { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-bottom: 2rem; } + @media (max-width: 720px) { .strategic { grid-template-columns: 1fr; } } + .strategic .card { background: var(--card); border: 1px solid var(--border); border-radius: 4px; padding: 1.25rem; } + .strategic .card.win { border-left: 3px solid var(--high); } + .strategic .card.loss { border-left: 3px solid var(--low); } + .strategic h3 { font-size: 0.9375rem; font-weight: 600; margin-bottom: 0.125rem; color: var(--black); display: flex; align-items: center; gap: 0.5rem; } + .strategic h3 .badge { font-size: 0.6875rem; font-weight: 700; padding: 2px 8px; border-radius: 999px; } + .strategic h3 .badge.win { background: rgba(144,201,77,0.12); color: #5a8a1a; border: 1px solid rgba(144,201,77,0.3); } + .strategic h3 .badge.loss { background: rgba(240,54,3,0.08); color: var(--low); border: 1px solid rgba(240,54,3,0.2); } + .strategic .sub { font-size: 0.8125rem; color: var(--muted); margin-bottom: 0.75rem; } + .strategic ul { list-style: none; } + .strategic li { padding: 0.375rem 0; font-size: 0.875rem; border-top: 1px solid var(--border); display: flex; justify-content: space-between; align-items: baseline; gap: 0.75rem; } + .strategic li:first-child { border-top: 0; } + .strategic li .who { color: var(--muted); font-size: 0.75rem; white-space: nowrap; } + .strategic .empty { color: var(--muted); font-size: 0.8125rem; padding: 0.5rem 0; } + .results-table { width: 100%; border-collapse: collapse; background: var(--card); border: 1px solid var(--border); border-radius: 4px; overflow: hidden; margin-bottom: 2rem; } .results-table th { text-align: left; font-size: 0.6875rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--muted); font-weight: 600; padding: 0.75rem 1rem; border-bottom: 1px solid var(--border); background: #fafafa; } .results-table td { padding: 0.75rem 1rem; border-bottom: 1px solid var(--border); font-size: 0.875rem; vertical-align: top; } @@ -81,6 +98,8 @@ <h1>{{TITLE}}</h1> <div class="stat"><div class="label">With Pricing</div><div class="value">{{WITH_PRICING}}</div></div> </div> + {{STRATEGIC_SUMMARY}} + <table class="results-table"> <thead> <tr> diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index 67214a4..1d9091f 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -277,6 +277,90 @@ const tableRows = deduped.map(c => { </tr>`; }).join('\n'); +// Prefer a curated taxonomy from `matrix.json` when present — subagents write +// heterogeneous prose into key_features/integrations frontmatter, so the raw +// split-by-pipe axis is one-blob-per-competitor (no overlap, no comparison). +// `matrix.json` defines a shared axis of atomic features and a yes/no mapping +// per competitor, producing a real comparison. +let curatedMatrix = null; +try { + const p = join(dir, 'matrix.json'); + if (existsSync(p)) curatedMatrix = JSON.parse(readFileSync(p, 'utf-8')); +} catch (err) { + console.error(`Warning: matrix.json present but unreadable — falling back to pipe split. ${err.message}`); +} + +// Strategic summary — "Where are you winning?" / "Where are you losing?" +// Requires matrix.json to carry a `userCompany` entry with feature flags. We then +// compare the user's flag per feature against how many competitors also have it. +// - Winning: user has the feature + at most 1 competitor has it (differentiated). +// - Losing: user LACKS the feature + 3 or more competitors have it (common gap). +// If userCompany is absent we render nothing — a skill run that skipped Step 5's +// matrix synthesis shouldn't get a broken/empty block here. +function buildStrategicSummary() { + if (!curatedMatrix || !curatedMatrix.userCompany) return ''; + const user = curatedMatrix.userCompany; + const userName = user.name || userCompany || 'You'; + const userEsc = escapeHtml(userName); + + function analyze(kind) { + const axis = curatedMatrix[kind] || []; + const compMap = curatedMatrix.competitors || {}; + const userFlags = user[kind] || {}; + const wins = []; + const losses = []; + for (const entry of axis) { + const label = entry.name; + const userHas = !!userFlags[label]; + const whoElseHas = []; + for (const c of deduped) { + const compEntry = compMap[c.slug]; + if (compEntry && compEntry[kind] && compEntry[kind][label]) whoElseHas.push(c.competitor_name); + } + const competitorCount = whoElseHas.length; + if (userHas && competitorCount <= 1) { + wins.push({ label, whoElseHas }); + } else if (!userHas && competitorCount >= 3) { + losses.push({ label, whoElseHas }); + } + } + // Order wins by rarity (fewest competitors have it first → most differentiated). + wins.sort((a, b) => a.whoElseHas.length - b.whoElseHas.length); + // Order losses by how many competitors have it (more = bigger gap). + losses.sort((a, b) => b.whoElseHas.length - a.whoElseHas.length); + return { wins, losses }; + } + + const featureAnalysis = analyze('features'); + const integrationAnalysis = analyze('integrations'); + const allWins = [...featureAnalysis.wins, ...integrationAnalysis.wins]; + const allLosses = [...featureAnalysis.losses, ...integrationAnalysis.losses]; + + function renderList(items, emptyMessage) { + if (!items.length) return `<div class="empty">${escapeHtml(emptyMessage)}</div>`; + return `<ul>${items.slice(0, 10).map(it => { + const n = it.whoElseHas.length; + const who = n === 0 ? 'only you' : (n <= 3 ? it.whoElseHas.join(', ') : `${n} competitors`); + return `<li><span class="label">${escapeHtml(it.label)}</span><span class="who">${escapeHtml(who)}</span></li>`; + }).join('')}</ul>`; + } + + return `<div class="strategic"> + <div class="card win"> + <h3>Where ${userEsc} is winning <span class="badge win">${allWins.length}</span></h3> + <div class="sub">Features and integrations ${userEsc} has that 0–1 competitors match.</div> + ${renderList(allWins, 'No clear differentiators found — user has no unique features in the current taxonomy.')} + </div> + <div class="card loss"> + <h3>Where ${userEsc} is losing <span class="badge loss">${allLosses.length}</span></h3> + <div class="sub">Features and integrations ${userEsc} lacks that 3+ competitors have.</div> + ${renderList(allLosses, 'No major gaps found — user keeps up on table-stakes features.')} + </div> + </div>`; +} + +const strategicSummary = buildStrategicSummary(); + let indexHtml = template .replace(/\{\{TITLE\}\}/g, escapeHtml(`${title}`)) .replace(/\{\{META\}\}/g, escapeHtml(metaLine)) @@ -284,6 +368,7 @@ let indexHtml = template .replace(/\{\{MENTION_COUNT\}\}/g, String(totalMentions)) .replace(/\{\{BENCHMARK_COUNT\}\}/g, String(totalBenchmarks)) .replace(/\{\{WITH_PRICING\}\}/g, String(withPricing)) + .replace(/\{\{STRATEGIC_SUMMARY\}\}/g, strategicSummary) .replace(/\{\{TABLE_ROWS\}\}/g, tableRows); writeFileSync(join(dir, 'index.html'), indexHtml); @@ -435,18 +520,8 @@ for (const c of deduped) { // ---------- matrix.html (side-by-side) ---------- -// Prefer a curated taxonomy from `matrix.json` when present — subagents write -// heterogeneous prose into key_features/integrations frontmatter, so the raw -// split-by-pipe axis is one-blob-per-competitor (no overlap, no comparison). -// `matrix.json` defines a shared axis of atomic features and a yes/no mapping -// per competitor, producing a real comparison. -let curatedMatrix = null; -try { - const p = join(dir, 'matrix.json'); - if (existsSync(p)) curatedMatrix = JSON.parse(readFileSync(p, 'utf-8')); -} catch (err) { - console.error(`Warning: matrix.json present but unreadable — falling back to pipe split. ${err.message}`); -} +// curatedMatrix is loaded earlier (before the index.html section) because the +// strategic summary on the overview page reads userCompany from it. function buildMatrixAxisFromCurated(kind) { if (!curatedMatrix || !curatedMatrix[kind]) return []; From 86b9888dc11912d0d73bf4d98cae55c81b15b4d5 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 17:43:10 +0100 Subject: [PATCH 10/23] docs(competitor-analysis): require userCompany in matrix.json schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missed in the strategic-summary commit (652a9a4) — the SKILL.md block that defines the matrix.json schema was on an older revision of the file that didn't get re-staged. Re-add the userCompany field and flag it as required, with the explicit note that skipping it means the "Where you're winning / losing" cards don't render. --- skills/competitor-analysis/SKILL.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index 39fe808..88c479d 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -254,6 +254,11 @@ The main agent fixes this by synthesizing a **shared taxonomy** across competito "category": "AI search APIs", "features": [{ "name": "Web Search API", "description": "..." }, ...], "integrations": [{ "name": "LangChain" }, ...], + "userCompany": { + "name": "Exa", + "features": { "Web Search API": true, "Site crawler": true, ... }, + "integrations": { "LangChain": true, ... } + }, "competitors": { "tavily": { "features": { "Web Search API": true, "Site crawler": true, ... }, @@ -264,7 +269,9 @@ The main agent fixes this by synthesizing a **shared taxonomy** across competito } ``` -If this step is skipped, the matrix view falls back to the raw pipe-split axis (mostly useless for atomic comparison). Do not skip. + **`userCompany` is required**. The overview page renders two cards — "Where {user} is winning" (features the user has that ≤1 competitor matches) and "Where {user} is losing" (features the user lacks that ≥3 competitors have). Populate `userCompany.features` and `userCompany.integrations` from the self-research profile (Step 1). Without this field those two cards don't render. + +If this step is skipped, the matrix view falls back to the raw pipe-split axis (useless for atomic comparison) and the strategic summary doesn't render. Do not skip. ## Step 6: Screenshots From 6a8df89f8e89b6bcff9ba977299daaaf37b4940e Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 18:51:02 +0100 Subject: [PATCH 11/23] feat(competitor-analysis): mandate fact-check subagent for matrix.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 5b (matrix synthesis) produces LLM inference from heterogeneous subagent prose. On the Browserbase run 2026-04-23 that inference confidently marked SOC 2 as a Browserbase moat — except Hyperbrowser, Kernel, AND Anchor Browser all have SOC 2 Type II (verified via their own trust portals and compliance blog posts). Shipping that to a GTM team would have made the whole report un-trustable. Add Step 5c: a mandatory fact-check subagent that runs after the taxonomy synthesis and before compile. For every true/false cell in matrix.json, it: - If true: finds a concrete source URL (docs, trust portal, changelog, GitHub license) or flips to false. - If false: runs one targeted bb search to guard against misses. - Outputs a verified matrix.json with a per-cell `sources` field plus a matrix_fact_check.md delta log of every flip. The "Where you're winning / losing" cards are strategic claims. Without verification they hallucinate moats. The SKILL now labels this step MANDATORY with the Browserbase-SOC 2 example as proof of what skipping it costs. --- skills/competitor-analysis/SKILL.md | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index 88c479d..1c2d9bb 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -273,6 +273,41 @@ The main agent fixes this by synthesizing a **shared taxonomy** across competito If this step is skipped, the matrix view falls back to the raw pipe-split axis (useless for atomic comparison) and the strategic summary doesn't render. Do not skip. +### Fact-check the matrix (MANDATORY) + +**Do not trust the taxonomy pass alone.** It is LLM inference from heterogeneous prose and will make false claims that survive into the "Where you're winning" card, damaging the report's credibility. Observed during Browserbase run 2026-04-23: matrix.json claimed SOC 2 was unique to Browserbase; verification showed Hyperbrowser, Kernel, and Anchor Browser ALL have SOC 2 Type II (confirmed via their own trust portals and compliance blog posts). That single error would have presented a hallucinated moat to a real GTM team. + +Launch a dedicated **fact-check subagent** (Bash-only) after the taxonomy pass and before compile: + +``` +You are a matrix-verification subagent. For EACH cell in {OUTPUT_DIR}/matrix.json +(userCompany + every competitor × every feature × every integration), verify the +boolean against a concrete source URL. + +TOOL RULES: Bash ONLY. bb search + bb fetch. + +For each cell: +1. If `true` — find a source that explicitly confirms the feature. Candidates: + - The company's own docs / pricing / feature pages + - Trust portals (trust.{company}.* / {company}.io/trust) + - Official changelog / blog announcements + - GitHub repo LICENSE / README for open-source claims + - SafeBase / Vanta trust portals for SOC 2 / HIPAA / ISO + If no source found, flip to `false` and record why. +2. If `false` — run ONE targeted bb search to check we didn't miss it. Flip to + `true` only on first-party evidence. +3. Be adversarial: "no mention" ≠ "not supported". But "status page exists" is NOT + proof of a published uptime SLA commitment — look for an explicit SLA % number. + +Output a verified matrix.json with an added `sources` field per cell: + { "Feature name": { "value": true, "source": "https://..." } } + +And write a cells-changed log to {OUTPUT_DIR}/matrix_fact_check.md listing every +flip (was true → now false, or vice versa) with the source URL and quoted evidence. +``` + +After the subagent completes, the main agent re-reads matrix.json, recompiles the report, and surfaces the `matrix_fact_check.md` delta to the user. **The strategic summary is worthless without this step** — it will confidently state "winning on X" where X is a hallucination. + ## Step 6: Screenshots Capture a homepage hero screenshot per competitor: From 09d904ca1ada3bf2aa70a947828a8f1b44df8893 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 18:55:07 +0100 Subject: [PATCH 12/23] feat(competitor-analysis): prose summaries for win/loss cards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bulleted lists of winning/losing features read like a spreadsheet, not the analyst-briefing the overview page is supposed to be. Extend matrix.json's userCompany with optional winningSummary / losingSummary prose fields (2-4 sentences each) and render them as paragraphs when present. Falls back to the existing bulleted list when absent so a partial run still shows the boolean comparison. SKILL.md flags these as strongly preferred and tells the main agent to write them AFTER the fact-check step so the prose is grounded in verified cells — otherwise the paragraph will state fluent but false moats. Updated the Exa example in the schema block to include the two summary fields. On the Browserbase run: two paragraphs replace the previous 12 bullets. Winning reads as enterprise moats (SLA, Stagehand, EU/APAC, Selenium, OpenAI Agents + n8n integrations). Losing reads as transparency + openness gaps — concrete competitor names cited (Anchor's Halluminate win, Steel's leaderboard, Browsaur MIT + Kernel + Steel AGPL). --- skills/competitor-analysis/SKILL.md | 6 +++++- .../references/report-template.html | 1 + .../scripts/compile_report.mjs | 16 ++++++++++++---- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index 1c2d9bb..197fd37 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -256,6 +256,8 @@ The main agent fixes this by synthesizing a **shared taxonomy** across competito "integrations": [{ "name": "LangChain" }, ...], "userCompany": { "name": "Exa", + "winningSummary": "Exa's moats are its first-party neural index and the integrated Research API — no one else in the set ships a semantic/embeddings-native retrieval primitive alongside a multi-step agentic research endpoint. It's also the only provider with a crawler product bundled in, and ties with SerpAPI on breadth of SDK language coverage.", + "losingSummary": "Exa trails competitors on operational transparency — SerpAPI, Serper, and Tavily all publish hourly throughput SLAs, and Exa lacks a dedicated news endpoint that SerpAPI, Serper, and You.com all ship. Image/visual search is also missing vs 4 of 5 competitors.", "features": { "Web Search API": true, "Site crawler": true, ... }, "integrations": { "LangChain": true, ... } }, @@ -269,7 +271,9 @@ The main agent fixes this by synthesizing a **shared taxonomy** across competito } ``` - **`userCompany` is required**. The overview page renders two cards — "Where {user} is winning" (features the user has that ≤1 competitor matches) and "Where {user} is losing" (features the user lacks that ≥3 competitors have). Populate `userCompany.features` and `userCompany.integrations` from the self-research profile (Step 1). Without this field those two cards don't render. + **`userCompany` is required**. The overview page renders two cards — "Where {user} is winning" and "Where {user} is losing". Populate `userCompany.features` and `userCompany.integrations` from the self-research profile (Step 1). Without this field those two cards don't render. + + **`userCompany.winningSummary` / `losingSummary` are strongly preferred** (analyst-style prose, 2-4 sentences each). When present, the cards render as paragraphs instead of bulleted lists — reads like a briefing, not a spreadsheet. Write these AFTER the fact-check step below so prose is grounded in verified cells, not raw inference. If absent, the cards fall back to a bulleted list of winning/losing items with who-else-has-it. If this step is skipped, the matrix view falls back to the raw pipe-split axis (useless for atomic comparison) and the strategic summary doesn't render. Do not skip. diff --git a/skills/competitor-analysis/references/report-template.html b/skills/competitor-analysis/references/report-template.html index 7d71034..129c48e 100644 --- a/skills/competitor-analysis/references/report-template.html +++ b/skills/competitor-analysis/references/report-template.html @@ -49,6 +49,7 @@ .strategic h3 .badge.win { background: rgba(144,201,77,0.12); color: #5a8a1a; border: 1px solid rgba(144,201,77,0.3); } .strategic h3 .badge.loss { background: rgba(240,54,3,0.08); color: var(--low); border: 1px solid rgba(240,54,3,0.2); } .strategic .sub { font-size: 0.8125rem; color: var(--muted); margin-bottom: 0.75rem; } + .strategic .prose { font-size: 0.9375rem; line-height: 1.6; color: var(--text); margin: 0.5rem 0 0; } .strategic ul { list-style: none; } .strategic li { padding: 0.375rem 0; font-size: 0.875rem; border-top: 1px solid var(--border); display: flex; justify-content: space-between; align-items: baseline; gap: 0.75rem; } .strategic li:first-child { border-top: 0; } diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index 1d9091f..f740c39 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -345,16 +345,24 @@ function buildStrategicSummary() { }).join('')}</ul>`; } + // Prefer the analyst-written prose from matrix.json when present — reads as narrative, + // not a spreadsheet. Falls back to the bulleted list when no prose is provided so a + // skill run that skipped the prose step still surfaces the boolean comparison. + function renderBody(prose, items, emptyMessage) { + if (prose && prose.trim()) return `<p class="prose">${escapeHtml(prose)}</p>`; + return renderList(items, emptyMessage); + } + return `<div class="strategic"> <div class="card win"> <h3>Where ${userEsc} is winning <span class="badge win">${allWins.length}</span></h3> - <div class="sub">Features and integrations ${userEsc} has that 0–1 competitors match.</div> - ${renderList(allWins, 'No clear differentiators found — user has no unique features in the current taxonomy.')} + ${user.winningSummary ? '' : `<div class="sub">Features and integrations ${userEsc} has that 0–1 competitors match.</div>`} + ${renderBody(user.winningSummary, allWins, 'No clear differentiators found — user has no unique features in the current taxonomy.')} </div> <div class="card loss"> <h3>Where ${userEsc} is losing <span class="badge loss">${allLosses.length}</span></h3> - <div class="sub">Features and integrations ${userEsc} lacks that 3+ competitors have.</div> - ${renderList(allLosses, 'No major gaps found — user keeps up on table-stakes features.')} + ${user.losingSummary ? '' : `<div class="sub">Features and integrations ${userEsc} lacks that 3+ competitors have.</div>`} + ${renderBody(user.losingSummary, allLosses, 'No major gaps found — user keeps up on table-stakes features.')} </div> </div>`; } From 73f573d3a773d04a921b8b5009ea27cbdc4817d9 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 19:06:14 +0100 Subject: [PATCH 13/23] fix(competitor-analysis): mandate user-company research parity with competitors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 1 was doing light self-research on the user's company while Step 5 did deep 5-lane enrichment on every competitor. That asymmetry meant the userCompany row in matrix.json was filled from the main agent's memory rather than from verified partials, and the strategic summary printed fabricated moats about the user's OWN product. Concrete examples from the Browserbase run 2026-04-23, caught only when the user pushed back: - Claimed a "published uptime SLA" — no numeric SLA exists on browserbase.com, only a status page. - Marked open-source as false — Stagehand is MIT-licensed at github.com/browserbase/stagehand, plus Browserbase ships 10+ other OSS repos (sdk-node, sdk-python, create-browser-app, Arena, open-operator, mcp-server-browserbase, etc). The correct framing is "OSS at the SDK layer, cloud-only at the infra layer" — a split the skill wasn't capturing. Systemic fix: - Step 1 now mandates the same 5-lane partial enrichment on the user's company that Step 5 runs on competitors. Partials go to partials/{user-slug}.{lane}.md. merge_partials.mjs consolidates to {OUTPUT_DIR}/{user-slug}.md. - Step 5b (matrix synthesis) now explicitly reads {user-slug}.md as the source for userCompany flags. Every flag must be traceable to a Research Findings bullet with a cited URL — the rule applies identically to the user's company and every competitor. - Added the Browserbase-SLA + Browserbase-Stagehand errors to SKILL.md as the cautionary tale for why this parity matters. --- skills/competitor-analysis/SKILL.md | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index 197fd37..8a36978 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -82,24 +82,34 @@ rm -f /tmp/competitor_discovery_batch_*.json ## Step 1: User Company Research -This step sets the baseline for what "competitor" means. +This step sets the baseline for what "competitor" means AND produces the verified data the Step 5b matrix will use for the `userCompany` row. + +**Rule**: The user's company gets the same 5-lane research depth as competitors. Do NOT fill `userCompany` in matrix.json from memory — it will ship false claims to the user's own team. On the Browserbase run 2026-04-23, skipping this step produced a matrix that claimed Browserbase had a "published uptime SLA" (there is no numeric public SLA — only a status page) and marked Stagehand's MIT-licensed OSS SDK as `open-source: false` (the repo is github.com/browserbase/stagehand, LICENSE confirmed MIT). Both errors would have surfaced in the "Where you're winning" card as fabricated moats. + +Process: 1. Ask the user for their company name or URL. -2. **Check for an existing profile** at `{SKILL_DIR}/profiles/{company-slug}.json`. If it exists, load it and confirm with the user: "I have your profile from {researched_at}. Still accurate?" — if yes, skip to Step 2. +2. **Check for an existing profile** at `{SKILL_DIR}/profiles/{company-slug}.json`. If it exists, load it and confirm with the user: "I have your profile from {researched_at}. Still accurate?" — if yes, skip to Step 2 BUT still run the partial-lane enrichment below so matrix synthesis has fresh feature evidence. The profile format is shared with `company-research` (same shape). If a user already has a profile saved under `company-research/profiles/`, you may copy it into this skill's profiles directory rather than re-researching. -3. **No profile exists** → run the self-research flow. See `references/research-patterns.md` → "Self-Research" for sub-questions and page-discovery rules. +3. **Run the full 5-lane enrichment on the user's company** — identical to the competitor pattern in Step 5. For each lane, spawn a Bash-only subagent that writes to `{OUTPUT_DIR}/partials/{user-slug}.{lane}.md`: + - **marketing** — tagline, positioning, pricing tiers, features, integrations, open-source components (SDK repos + licenses), regions offered, compliance (SOC 2 / HIPAA / trust portal URL) + - **technical** — CDP / Playwright / Puppeteer / Selenium driver support (with docs URLs), SDK languages, MCP server URL, stealth product name + tier, session replay + video recording specifics, published uptime SLA (actual %, not status page), third-party benchmarks + - **discussion**, **social**, **news** — optional in quick mode, recommended in deep+ + See `references/research-patterns.md` → "Self-Research" for sub-questions. Each finding MUST cite a URL. + +4. Run `merge_partials.mjs` on the user's partials too — produces `{OUTPUT_DIR}/{user-slug}.md`, the canonical source Step 5b reads from for `userCompany` flags. -4. Synthesize into a profile: Company, Product, Existing Customers, Competitors (seed list), Use Cases, **precise_category**, **category_include_keywords**, **exclusion_list**. Do NOT include ICP — this skill doesn't need it. +5. Synthesize into a profile: Company, Product, Existing Customers, Competitors (seed list), Use Cases, **precise_category**, **category_include_keywords**, **exclusion_list**. Do NOT include ICP — this skill doesn't need it. - `precise_category`: one sentence describing the category. e.g., "cloud headless browser infrastructure for AI agents with CDP". Avoid vague words like "tools" / "platform". - `category_include_keywords`: 8-15 phrases a direct competitor's marketing would likely contain (hero or title). Include semantic variants. - `exclusion_list`: phrases that indicate a *different* category — used by the gate to reject false positives (e.g. `antidetect browser`, `scraping api`, `screenshot api`, `residential proxy`). See `references/research-patterns.md` → "Synthesis Output" for the exact format and Browserbase as a worked example. -5. Present the profile to the user. Do not proceed until confirmed. +6. Present the profile + the user-company `.md` to the user for confirmation. Do not proceed until confirmed. -6. **Save the confirmed profile** to `{SKILL_DIR}/profiles/{company-slug}.json`. +7. **Save the confirmed profile** to `{SKILL_DIR}/profiles/{company-slug}.json`. ## Step 2: Depth Mode + Seed Input @@ -244,10 +254,10 @@ Unions the 5 partials per competitor into one `{OUTPUT_DIR}/{slug}.md` — dedup The main agent fixes this by synthesizing a **shared taxonomy** across competitors and writing `{OUTPUT_DIR}/matrix.json`. `compile_report.mjs` auto-detects this file and renders the matrix from it instead of from the pipe split. **Process** — main agent: -1. Read all `{slug}.md` files. Focus on the `key_features`, `integrations`, and `## Features` sections. +1. Read ALL `{slug}.md` files, INCLUDING the user's company file `{user-slug}.md` produced in Step 1. The user is competitor #0 for matrix purposes — treat with identical rigor. 2. Produce a canonical list of 12-20 *atomic* features — each must be a yes/no proposition a competitor either has or doesn't (e.g. "MCP server", "SOC 2", "Site crawler", "Reranker"). Avoid sentence-length features. Avoid features only one competitor has. 3. Produce a canonical list of 10-20 integrations (frameworks, marketplaces, SDK languages). -4. For each competitor, map each taxonomy entry to `true` / `false` based on the enrichment data. Be conservative — if not mentioned, leave `false`. +4. For each company INCLUDING THE USER, map each taxonomy entry to `true` / `false` based on the enrichment data in their `.md` file. **Every flag must be traceable to a Research Findings bullet with a cited URL.** If the user's file says "Stagehand MIT-licensed (github.com/browserbase/stagehand)", the Open-source feature is `true` with that URL as the source. If not mentioned, leave `false`. 5. Write the result to `{OUTPUT_DIR}/matrix.json` in this shape: ```json { From 1f8d742364913830e206aa829a44a1f88951c912 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 19:51:38 +0100 Subject: [PATCH 14/23] feat(competitor-analysis): battle card lane (6th synthesis subagent) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds sales-enablement output grounded in the fact-checked matrix. Closes the single biggest gap surfaced by the v0.2 framework research: the skill was a Competitor Profiling Matrix but not a Battle Card tool — Klue/Crayon's most-requested artifact had no equivalent in our pipe. Design: synthesis-only lane (no new bb calls), runs AFTER Step 5c fact-check so battle cards are grounded in verified cells, not fresh inference. Eliminates the failure mode where the skill's sales output would contradict the matrix it publishes. Changes: - scripts/merge_partials.mjs: add 'battle' to LANES; union the `## Battle Card` section into the merged {slug}.md between the Comparison and Mentions sections. - scripts/compile_report.mjs: parse the Battle Card section from c.sections, render as a brand-accented `.research.battle` card on the per-competitor HTML page (left border in brand orange, uppercase small-caps subheadings for Landmines / Objection Handlers / Talk Tracks). - references/battle-card.md (new): format spec — three sections, citation rules, adversarial self-check checklist. - references/battle-card-subagent.md (new): standalone prompt template with placeholder list. Main agent substitutes per competitor and launches one subagent per competitor in parallel. - references/example-research.md: add a worked Battle Card section to the Rival Co example. - SKILL.md: new Step 5d (Battle synthesis) with explicit dependency on Step 5c fact-check; Pipeline Overview updated to mention the 6th lane in deep/deeper modes. Scope deliberately tight — this is Phase E of the approved v0.2 plan (/Users/jay/.claude/plans/you-can-figure-out-jaunty-pelican.md). Phases A/B/C/D/F/G deferred. Existing Browserbase + Exa compile runs verified unchanged (no battle partials → battle card card omitted). --- skills/competitor-analysis/SKILL.md | 16 ++- .../references/battle-card-subagent.md | 122 ++++++++++++++++++ .../references/battle-card.md | 91 +++++++++++++ .../references/example-research.md | 15 ++- .../scripts/compile_report.mjs | 9 ++ .../scripts/merge_partials.mjs | 6 +- 6 files changed, 255 insertions(+), 4 deletions(-) create mode 100644 skills/competitor-analysis/references/battle-card-subagent.md create mode 100644 skills/competitor-analysis/references/battle-card.md diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index 8a36978..a46e090 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -60,9 +60,9 @@ Follow these 8 steps in order. Do not skip or reorder. 3. **Discovery (3 parallel waves)** — Wave A (alternatives), Wave B (precise category), Wave C (comparison-page graph via "X vs Y" title parsing) 4. **Gate** — `scripts/gate_candidates.mjs` bb-fetches each candidate's hero text and drops wrong-category URLs 5. **Confirm enrichment set with the user** — Present PASS / UNKNOWN / rejected-brand-matches via `AskUserQuestion`. User ticks the real ones, adds any the discovery missed. Skipping this step is wasteful because enrichment is expensive (25 subagents × depth budget) and the gate is imperfect (JS-heavy homepages, Cloudflare challenges, semantic-variant taglines) -6. **Deep Enrichment (5 subagents per competitor in deep/deeper modes)** — Marketing, Discussion, Social, News, Technical — each lane a separate subagent writing to `partials/`; then `merge_partials.mjs` consolidates +6. **Deep Enrichment (5 subagents per competitor in deep/deeper modes)** — Marketing, Discussion, Social, News, Technical — each lane a separate subagent writing to `partials/`; then `merge_partials.mjs` consolidates. In deep/deeper modes, **Step 5d** adds a 6th Battle Card synthesis lane AFTER Step 5c fact-check completes — produces per-competitor Landmines / Objection Handlers / Talk Tracks grounded in cited evidence. 7. **Screenshots** — `capture_screenshots.mjs` via the `browse` CLI captures a 1280×800 homepage hero per competitor -8. **HTML Report** — Overview + per-competitor (with embedded hero screenshot) + matrix + mentions views +8. **HTML Report** — Overview + per-competitor (with embedded hero screenshot + Battle Card card) + matrix + mentions views --- @@ -322,6 +322,18 @@ flip (was true → now false, or vice versa) with the source URL and quoted evid After the subagent completes, the main agent re-reads matrix.json, recompiles the report, and surfaces the `matrix_fact_check.md` delta to the user. **The strategic summary is worthless without this step** — it will confidently state "winning on X" where X is a hallucination. +### Step 5d: Battle Card synthesis (deep/deeper only, after Step 5c) + +**Depends on fact-checked matrix.json from Step 5c.** This is a sales-enablement lane. For each competitor, launch a Bash-only synthesis subagent (no new `bb` calls) that reads all 5 existing partials + the user's merged `.md` + fact-checked `matrix.json`, and produces per-competitor Landmines / Objection Handlers / Talk Tracks grounded in cited evidence. + +Prompt template: `references/battle-card-subagent.md` (substitute `{COMPETITOR_SLUG}` / `{COMPETITOR_NAME}` / `{USER_COMPANY_NAME}` / `{USER_WINNING_SUMMARY}` per competitor). Format spec: `references/battle-card.md`. + +Output: `{OUTPUT_DIR}/partials/{slug}.battle.md` with a `## Battle Card` section. `merge_partials.mjs` unions this into the consolidated `{slug}.md`. `compile_report.mjs` renders it as a brand-accented card on the per-competitor HTML page. + +**Why this lane is synthesis-only** — battle cards must be grounded in facts that already survived Step 5c. Letting the subagent do fresh `bb` searches would reintroduce the hallucinated-moat problem the fact-check step exists to prevent. The subagent's adversarial self-check explicitly rejects claims not traceable to an input partial bullet or a `sources`-backed matrix cell. + +Parallelism: 1 subagent per competitor, all in one Agent-tool message (synthesis is fast, ~3-5 Bash calls per subagent). Skip this step in `quick` mode — there isn't enough research depth to ground the cards credibly. + ## Step 6: Screenshots Capture a homepage hero screenshot per competitor: diff --git a/skills/competitor-analysis/references/battle-card-subagent.md b/skills/competitor-analysis/references/battle-card-subagent.md new file mode 100644 index 0000000..7e47b03 --- /dev/null +++ b/skills/competitor-analysis/references/battle-card-subagent.md @@ -0,0 +1,122 @@ +# Battle Card subagent prompt + +Main agent substitutes placeholders per competitor. Launch AFTER Step 5c fact-check completes — this lane depends on `matrix.json` cells having `sources` URLs. + +## Placeholders to substitute + +- `{OUTPUT_DIR}` → full literal path, e.g. `/Users/jay/Desktop/browserbase_competitors_2026-04-24-1930` +- `{COMPETITOR_SLUG}` → e.g. `hyperbrowser` +- `{COMPETITOR_NAME}` → e.g. `Hyperbrowser` +- `{USER_SLUG}` → e.g. `browserbase` +- `{USER_COMPANY_NAME}` → e.g. `Browserbase` +- `{USER_PRODUCT_ONE_LINER}` → pulled from Step 1 profile +- `{USER_WINNING_SUMMARY}` → matrix.json `userCompany.winningSummary` +- `{USER_LOSING_SUMMARY}` → matrix.json `userCompany.losingSummary` + +## Prompt + +``` +You are the Battle Card synthesis subagent. Produce an evidence-grounded +battle card a real AE would use on a call. + +TOOL RULES — CRITICAL, FOLLOW EXACTLY: +1. You may ONLY use the Bash tool. No exceptions. +2. BANNED TOOLS: WebFetch, WebSearch, Write, Read, Glob, Grep, bb search, + bb fetch — ALL BANNED. This is a SYNTHESIS lane, not a research lane. + You read files that already exist; you do not make new network calls. +3. Read ALL inputs in ONE Bash call via `cat`. Write output in ONE heredoc. +4. NEVER use ~ or $HOME — full literal paths only. + +INPUTS (all already exist on disk — read in one Bash call): +- {OUTPUT_DIR}/partials/{COMPETITOR_SLUG}.marketing.md +- {OUTPUT_DIR}/partials/{COMPETITOR_SLUG}.discussion.md +- {OUTPUT_DIR}/partials/{COMPETITOR_SLUG}.social.md +- {OUTPUT_DIR}/partials/{COMPETITOR_SLUG}.news.md +- {OUTPUT_DIR}/partials/{COMPETITOR_SLUG}.technical.md +- {OUTPUT_DIR}/{USER_SLUG}.md # user's own merged file +- {OUTPUT_DIR}/matrix.json # fact-checked matrix — cells + # must have a `sources` URL to + # be trustworthy; reject any + # cell without one + +CONTEXT: +- User's company: {USER_COMPANY_NAME} +- User's product: {USER_PRODUCT_ONE_LINER} +- User's verified moats (from matrix.json userCompany.winningSummary): + {USER_WINNING_SUMMARY} +- User's verified gaps (from matrix.json userCompany.losingSummary): + {USER_LOSING_SUMMARY} +- Competitor: {COMPETITOR_NAME} +- Competitor slug: {COMPETITOR_SLUG} + +TASK — produce three sections, every claim traceable to an input bullet +or matrix.sources URL: + +1. LANDMINES (3-5 items) — concrete verifiable facts that HURT + {COMPETITOR_NAME} in a deal. Each: + - States a specific, verifiable fact (not "they're slow" — "their + p50 was 3.4s on the Nov 2025 Halluminate benchmark") + - Cites a source URL pulled from an actual bullet in one of the + input partials (Mentions / Benchmarks / Research Findings) + - Includes a one-line "how to use it" talking point + - Prefers third-party sources over competitor's own marketing + - If no evidence exists for a potential landmine, OMIT it. 3 cited + landmines > 5 half-invented ones. + +2. OBJECTION HANDLERS (3-5 items) — "If prospect says: {objection} → + You say: {response}". Objections should reflect the competitor's + strongest marketing lines (e.g. if their homepage says "99.99% + uptime", the objection is "we hear {user} has no uptime guarantee"). + Responses must reference a real user moat from winningSummary — + never a hallucinated feature. + +3. TALK TRACKS (2-3 items) — 1-2 sentence opening pitches. Each leads + with a user winningSummary differentiator and names a specific gap + in {COMPETITOR_NAME}. Confident, factual, no hyperbole. + +ADVERSARIAL SELF-CHECK before writing: +- [ ] Every landmine cites a URL that appears in one of the input + partials. No invented URLs. +- [ ] No claim contradicts a fact-checked cell in matrix.json. +- [ ] No talk track claims a user feature where matrix.json shows + userCompany.features[X] = false. +- [ ] Objections are realistic (what a prospect would actually raise), + not strawmen. + +OUTPUT — write via a single heredoc to + {OUTPUT_DIR}/partials/{COMPETITOR_SLUG}.battle.md + +cat << 'BATTLE_MD' > {OUTPUT_DIR}/partials/{COMPETITOR_SLUG}.battle.md +--- +competitor_name: {COMPETITOR_NAME} +lane: battle +generated_at: {YYYY-MM-DD} +--- + +## Battle Card + +### Landmines + +- **{one-line fact}** — {how to use it in the call}. (source: {url}) + +### Objection Handlers + +- If they say: "{objection verbatim}" + You say: {response citing user's moat} (evidence: {url}) + +### Talk Tracks + +1. {1-2 sentence pitch} +BATTLE_MD + +REPORT BACK only one line: + "{COMPETITOR_SLUG} battle: {N} landmines, {M} objections, {K} tracks, all cited." + +Do NOT return the card content. +``` + +## Wave management + +- Launch 1 battle-card subagent per competitor. All can run in parallel (synthesis is fast and uses no shared state beyond already-written partials). +- Depth: only run in `deep` or `deeper` modes. `quick` mode does not have the research depth to ground battle cards credibly. +- Budget: ~3-5 Bash calls per subagent (1 big cat, 1 big heredoc, maybe 1-2 sanity checks). diff --git a/skills/competitor-analysis/references/battle-card.md b/skills/competitor-analysis/references/battle-card.md new file mode 100644 index 0000000..dbba15a --- /dev/null +++ b/skills/competitor-analysis/references/battle-card.md @@ -0,0 +1,91 @@ +# Battle Card — format spec + +The Battle lane is the **6th** subagent lane in deep/deeper mode. It runs AFTER Step 5c fact-check completes — it reads only existing partials + the fact-checked `matrix.json`, **never makes new `bb` calls**. This is a pure synthesis lane. + +Output file: `{OUTPUT_DIR}/partials/{slug}.battle.md`. `merge_partials.mjs` unions its `## Battle Card` section into the consolidated `{slug}.md`. `compile_report.mjs` renders it as a brand-accented card on the per-competitor HTML page. + +## The three sections + +### Landmines (3-5 items) + +Concrete, verifiable facts about the competitor that **hurt them in a deal**. Every item must cite a URL from an existing partial (Mentions, Benchmarks, or Research Findings). Prefer third-party evidence (benchmarks, reviews, news) over the competitor's own marketing — marketing claims are weak ammunition. + +Format: +``` +### Landmines + +- **{one-line factual claim}** — {how an AE uses it in the call}. (source: {url}) +``` + +Example: +``` +- **Anchor won Halluminate's November 2025 stealth benchmark (1.7% fail rate)** — use if prospect worries about detection, but only after confirming their volume tier; Anchor's CAPTCHA product is paywalled behind Starter ($20/mo). (source: https://halluminate.com/browserbench) +``` + +### Objection Handlers (3-5 items) + +Format: "if prospect says X → you say Y, citing a real user moat from `userCompany.winningSummary`." Every response must reference a feature/integration the fact-checked matrix confirms the user has. Never respond with a claim that contradicts a fact-checked matrix cell. + +Format: +``` +### Objection Handlers + +- If they say: "{objection verbatim}" + You say: {response citing user's moat} (evidence: {url}) +``` + +Example: +``` +- If they say: "Hyperbrowser is $99/mo cheaper than your Scale tier" + You say: "Hyperbrowser drops replay this quarter — you'll lose session video when you hit production. Our Scale tier includes session inspector + video recording; matrix.json confirms Hyperbrowser's feature set doesn't cover either." (evidence: https://docs.hyperbrowser.ai/changelog) +``` + +### Talk Tracks (2-3 items) + +One-to-two sentence opening pitches an AE can memorize. Lead with a user winningSummary differentiator; name the specific gap in the competitor. No hyperbole, no claims not grounded in fact-checked matrix cells. + +Format: +``` +### Talk Tracks + +1. {1-2 sentence pitch} +``` + +Example: +``` +1. For production observability, Browserbase is the only provider in the category with BOTH session video recording AND a session inspector UI — Hyperbrowser shipped neither, Anchor shipped neither, and Kernel replaced video replay with rrweb-only last quarter. +``` + +## Markdown file shape + +```markdown +--- +competitor_name: Hyperbrowser +lane: battle +generated_at: 2026-04-24 +--- + +## Battle Card + +### Landmines +- **Fact 1** — usage. (source: url) +- **Fact 2** — usage. (source: url) + +### Objection Handlers +- If they say: "..." + You say: ... (evidence: url) + +### Talk Tracks +1. Pitch 1 +2. Pitch 2 +``` + +## Quality gates — Adversarial self-check (subagent MUST run before writing) + +- [ ] Every landmine cites a URL that appears in one of the input partials (Mentions / Benchmarks / Research Findings). No invented URLs. +- [ ] No claim contradicts a fact-checked cell in `matrix.json` (cells must have a `sources` URL to be trustworthy). +- [ ] No talk track claims a user feature where `matrix.json` shows `userCompany.features[X] = false`. +- [ ] Objections are realistic — they're what a prospect would actually raise based on the competitor's strongest marketing lines, not strawmen. +- [ ] Third-party evidence preferred over competitor's own marketing (benchmarks, reviews, news > their docs/pricing). + +If a potential landmine has no evidence in the partials, OMIT it. It is better to ship 3 cited landmines than 5 half-invented ones. diff --git a/skills/competitor-analysis/references/example-research.md b/skills/competitor-analysis/references/example-research.md index d40b8c9..bd94cfd 100644 --- a/skills/competitor-analysis/references/example-research.md +++ b/skills/competitor-analysis/references/example-research.md @@ -70,6 +70,19 @@ Marketing emphasizes "AI-native" and developer-first DX. Landing page hero: - **[high]** Series seed, $5M raised Nov 2024 (source: TechCrunch) - **[medium]** CEO LinkedIn emphasizes AI-agent use cases (source: linkedin.com/in/rivalco-ceo) - **[low]** Possibly a team under 20 based on careers page (source: rivalco.com/careers) + +## Battle Card + +### Landmines +- **Rival Co scores 73% on the computesdk stealth benchmark (4th of 7 tested)** — use against stealth-forward prospects; they rank below Browserbase and Hyperbrowser on the same test. (source: https://github.com/computesdk/benchmarks/pull/92) +- **G2 average 4.3/5 with "flaky sessions" as top complaint across 31 reviews** — cite when prospect raises reliability concerns. (source: https://g2.com/products/rival-co) + +### Objection Handlers +- If they say: "Rival Co is $99/mo — cheaper than your Pro tier" + You say: "Cheaper upfront, but compare total cost of stealth incidents — their 73% benchmark pass rate means ~1 in 4 requests hits a challenge page you'll need to retry, and retries aren't free." (evidence: https://github.com/computesdk/benchmarks/pull/92) + +### Talk Tracks +1. For production workloads where session reliability matters, Browserbase ships session inspector + video recording as table stakes; Rival Co has neither in their 2024 product set. ``` ## Field Rules @@ -78,7 +91,7 @@ Marketing emphasizes "AI-native" and developer-first DX. Landing page hero: - **`pricing_tiers`**: Pipe-separated (`|`) with tier name + short price. `compile_report.mjs` parses on `|` for the matrix view. - **`key_features`**, **`integrations`**: Pipe-separated lists. - **`strategic_diff`**: One-line summary (shown in overview table). -- **Body sections**: `## Product`, `## Pricing`, `## Features`, `## Positioning`, `## Comparison vs {user_company}`, `## Mentions`, `## Benchmarks`, `## Research Findings`. +- **Body sections**: `## Product`, `## Pricing`, `## Features`, `## Positioning`, `## Comparison vs {user_company}`, `## Mentions`, `## Benchmarks`, `## Research Findings`, `## Battle Card` (deep/deeper modes only; synthesized by the Battle lane after fact-check). - **Mentions format**: `- **[SourceType]** title | snippet (source: url, date)` — `SourceType` is one of `Benchmark`, `Comparison`, `News`, `Reddit`, `HN`, `LinkedIn`, `YouTube`, `Review`, `Podcast`, `X`. - **Findings format**: `- **[confidence]** fact (source: url)` — `confidence` is `high`, `medium`, or `low`. - **Filename**: `{OUTPUT_DIR}/{competitor-slug}.md` where slug is lowercase, hyphenated. diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index f740c39..064c446 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -403,9 +403,13 @@ const perCompetitorCss = ` .research { background:var(--card); border:1px solid var(--border); border-radius:4px; padding:1.5rem; margin-bottom:1.25rem; } .research h2 { font-size:1.125rem; font-weight:600; margin:1.5rem 0 0.5rem 0; color:var(--black); } .research h2:first-child { margin-top:0; } + .research h3 { font-size:0.9375rem; font-weight:600; margin:1rem 0 0.375rem 0; color:var(--black); } .research p { margin-bottom:0.75rem; } .research ul { margin:0.5rem 0 1rem 1.25rem; } .research li { margin-bottom:0.375rem; font-size:0.875rem; } + .research.battle { border-left:3px solid var(--brand); } + .research.battle h2 { color:var(--brand); } + .research.battle h3 { text-transform:uppercase; letter-spacing:0.04em; font-size:0.75rem; color:var(--muted); margin-top:1.25rem; } .confidence { font-size:0.75rem; font-weight:600; padding:1px 6px; border-radius:2px; } .confidence.high { background:rgba(144,201,77,0.12); color:#5a8a1a; } .confidence.medium { background:rgba(244,186,65,0.12); color:#9a7520; } @@ -463,6 +467,10 @@ for (const c of deduped) { const positioningHtml = c.sections['Positioning'] ? `<h2>Positioning</h2>${mdToHtml(c.sections['Positioning'])}` : ''; const comparisonKey = Object.keys(c.sections).find(k => k.startsWith('Comparison')); const comparisonHtml = comparisonKey ? `<h2>${escapeHtml(comparisonKey)}</h2>${mdToHtml(c.sections[comparisonKey])}` : ''; + // Battle Card — synthesized by the Battle lane subagent (Step 5d) after fact-check completes. + // Contains Landmines / Objection Handlers / Talk Tracks — sales-enablement-grade output. + const battleCardKey = Object.keys(c.sections).find(k => k === 'Battle Card' || k.startsWith('Battle')); + const battleCardHtml = battleCardKey ? `<h2>${escapeHtml(battleCardKey)}</h2>${mdToHtml(c.sections[battleCardKey])}` : ''; const findingsHtml = c.sections['Research Findings'] ? `<h2>Research Findings</h2>${mdToHtml(c.sections['Research Findings'])}` : ''; // Screenshot — filename matches capture_screenshots.mjs output. @@ -512,6 +520,7 @@ for (const c of deduped) { ${positioningHtml} ${comparisonHtml} </div> + ${battleCardHtml ? `<div class="research battle">${battleCardHtml}</div>` : ''} <div class="research"> <h2>Mentions</h2> ${mentionsHtml} diff --git a/skills/competitor-analysis/scripts/merge_partials.mjs b/skills/competitor-analysis/scripts/merge_partials.mjs index 5c983f3..5de2c99 100644 --- a/skills/competitor-analysis/scripts/merge_partials.mjs +++ b/skills/competitor-analysis/scripts/merge_partials.mjs @@ -33,7 +33,7 @@ Reads {dir}/partials/{slug}.{lane}.md files and writes consolidated const dir = args[0]; const partialsDir = join(dir, 'partials'); -const LANES = ['marketing', 'discussion', 'social', 'news', 'technical']; +const LANES = ['marketing', 'discussion', 'social', 'news', 'technical', 'battle']; function parseFrontmatter(content) { const m = content.match(/^---\n([\s\S]*?)\n---/); @@ -259,6 +259,9 @@ for (const [slug, lanes] of bySlug.entries()) { // Comparison heading may be "Comparison vs Browserbase" etc — find any key starting with "Comparison" const comparisonKey = Object.keys(allSections).find(k => k.startsWith('Comparison')); + // Battle lane produces a `## Battle Card` section — sales enablement synthesized from verified + // partials + fact-checked matrix (runs AFTER Step 5c fact-check, see SKILL.md Step 5d). + const battleCardKey = Object.keys(allSections).find(k => k === 'Battle Card' || k.startsWith('Battle')); const out = [ '---', @@ -270,6 +273,7 @@ for (const [slug, lanes] of bySlug.entries()) { first('Features') ? `## Features\n${first('Features')}\n` : '', first('Positioning') ? `## Positioning\n${first('Positioning')}\n` : '', comparisonKey && allSections[comparisonKey].length ? `## ${comparisonKey}\n${allSections[comparisonKey][0]}\n` : '', + battleCardKey && allSections[battleCardKey].length ? `## ${battleCardKey}\n${allSections[battleCardKey][0]}\n` : '', dedupedMentions.length ? `## Mentions\n${dedupedMentions.join('\n')}\n` : '', dedupedBench.length ? `## Benchmarks\n${dedupedBench.join('\n')}\n` : '', dedupedFindings.length ? `## Research Findings\n${dedupedFindings.join('\n')}\n` : '', From e8bb80b6f7b164c726c99b99401c7d32ae4134a9 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 20:36:22 +0100 Subject: [PATCH 15/23] fix(competitor-analysis): accept battle-lane format drift at merge time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First end-to-end run of the battle lane on Browserbase data: 4 of 5 subagents emitted their Battle Card content with format drift that parseSections() couldn't resolve — some led with `# Battle Card: X vs Y` (h1, not h2 and so invisible to the `## `-only section splitter), some skipped the wrapper heading entirely and led with `## 1. Landmines`. Only 1 of 5 battle cards made it into the merged {slug}.md. Same root cause as the earlier mention-bullet-format fix (commit 953f078): subagents will drift from any prompt-level format spec. Treat the entire battle partial body as the Battle Card content regardless of heading style. Strip any leading `# Battle Card …` h1 or `## Battle Card` h2 wrapper so we don't double-wrap, then emit the rest under our canonical `## Battle Card` heading in the merged file. After the fix: 5 of 5 battle cards rendered in per-competitor HTML on the Browserbase run, each with 5-6 cited landmines, 5 objection handlers with source links, and 2-3 talk tracks. Content quality spot-checked on Anchor (counters the Halluminate stealth benchmark loss with the Advanced Stealth update link) and Steel (flags US-only regions + self-benchmark bias). --- .../scripts/merge_partials.mjs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/skills/competitor-analysis/scripts/merge_partials.mjs b/skills/competitor-analysis/scripts/merge_partials.mjs index 5de2c99..2435398 100644 --- a/skills/competitor-analysis/scripts/merge_partials.mjs +++ b/skills/competitor-analysis/scripts/merge_partials.mjs @@ -259,9 +259,19 @@ for (const [slug, lanes] of bySlug.entries()) { // Comparison heading may be "Comparison vs Browserbase" etc — find any key starting with "Comparison" const comparisonKey = Object.keys(allSections).find(k => k.startsWith('Comparison')); - // Battle lane produces a `## Battle Card` section — sales enablement synthesized from verified - // partials + fact-checked matrix (runs AFTER Step 5c fact-check, see SKILL.md Step 5d). - const battleCardKey = Object.keys(allSections).find(k => k === 'Battle Card' || k.startsWith('Battle')); + // Battle lane is format-drifty: subagents emit `## Battle Card`, `# Battle Card: X vs Y` + // (h1 — not picked up by parseSections), or skip the wrapper and lead with `## Landmines`. + // Treat the ENTIRE battle partial body as the Battle Card section regardless of heading style, + // so sales enablement content always lands in the merged file. + let battleCardBody = ''; + if (lanes.battle && lanes.battle.body) { + const body = lanes.battle.body.trim(); + // Strip any leading `# Battle Card ...` h1 line so we don't double-wrap. + battleCardBody = body.replace(/^#\s+Battle Card[^\n]*\n+/m, '').trim(); + // If the body already has `## Battle Card` as its first h2, drop that leading heading + // (we add our own below). + battleCardBody = battleCardBody.replace(/^##\s+Battle Card\s*\n+/m, '').trim(); + } const out = [ '---', @@ -273,7 +283,7 @@ for (const [slug, lanes] of bySlug.entries()) { first('Features') ? `## Features\n${first('Features')}\n` : '', first('Positioning') ? `## Positioning\n${first('Positioning')}\n` : '', comparisonKey && allSections[comparisonKey].length ? `## ${comparisonKey}\n${allSections[comparisonKey][0]}\n` : '', - battleCardKey && allSections[battleCardKey].length ? `## ${battleCardKey}\n${allSections[battleCardKey][0]}\n` : '', + battleCardBody ? `## Battle Card\n${battleCardBody}\n` : '', dedupedMentions.length ? `## Mentions\n${dedupedMentions.join('\n')}\n` : '', dedupedBench.length ? `## Benchmarks\n${dedupedBench.join('\n')}\n` : '', dedupedFindings.length ? `## Research Findings\n${dedupedFindings.join('\n')}\n` : '', From 716516ee086ee4b84403c61640160e95a4c9ce7c Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 22:28:39 +0100 Subject: [PATCH 16/23] fix(competitor-analysis): harden merge + capture scripts on fresh run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second end-to-end run on Browserbase (2026-04-24-1955) exposed two small-but-real bugs not caught on the 2026-04-23 run: 1) merge_partials.mjs — the Battle Card heading-stripper's regex required the first line to be exactly `## Battle Card\s*\n` or `# Battle Card[^\n]*\n`, so an h2-with-suffix line like `## Battle Card — Hyperbrowser` slipped through. The merged hyperbrowser.md got a duplicate `## Battle Card` heading and the HTML rendered the section twice. Generalize to strip any leading heading line (h1-h3) mentioning "Battle Card" with any suffix. One regex handles all observed drift patterns from the 5 subagents. 2) capture_screenshots.mjs — the --help template literal contained unescaped backticks around `website`, breaking the enclosing `\`...\`` literal and yielding a SyntaxError at load time. Never caught before because prior runs skipped --help. Replaced the inner backticks with double quotes. Verified on the fresh Browserbase run: all 5 battle cards merge with exactly one `## Battle Card` header each; 5/5 hero screenshots captured (anchor / browserbase / hyperbrowser / kernel / steel). --- .../competitor-analysis/scripts/capture_screenshots.mjs | 2 +- skills/competitor-analysis/scripts/merge_partials.mjs | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/skills/competitor-analysis/scripts/capture_screenshots.mjs b/skills/competitor-analysis/scripts/capture_screenshots.mjs index f559151..84b743c 100644 --- a/skills/competitor-analysis/scripts/capture_screenshots.mjs +++ b/skills/competitor-analysis/scripts/capture_screenshots.mjs @@ -18,7 +18,7 @@ const args = process.argv.slice(2); if (args.includes('--help') || args.includes('-h') || args.length === 0) { console.error(`Usage: node capture_screenshots.mjs <research-dir> [options] -Reads all .md files in <research-dir>, extracts the `website` field from each +Reads all .md files in <research-dir>, extracts the "website" field from each competitor's YAML frontmatter, and captures a 1280x800 viewport screenshot of the homepage. Writes one PNG per competitor as {slug}-hero.png. diff --git a/skills/competitor-analysis/scripts/merge_partials.mjs b/skills/competitor-analysis/scripts/merge_partials.mjs index 2435398..0f56b88 100644 --- a/skills/competitor-analysis/scripts/merge_partials.mjs +++ b/skills/competitor-analysis/scripts/merge_partials.mjs @@ -266,11 +266,10 @@ for (const [slug, lanes] of bySlug.entries()) { let battleCardBody = ''; if (lanes.battle && lanes.battle.body) { const body = lanes.battle.body.trim(); - // Strip any leading `# Battle Card ...` h1 line so we don't double-wrap. - battleCardBody = body.replace(/^#\s+Battle Card[^\n]*\n+/m, '').trim(); - // If the body already has `## Battle Card` as its first h2, drop that leading heading - // (we add our own below). - battleCardBody = battleCardBody.replace(/^##\s+Battle Card\s*\n+/m, '').trim(); + // Strip the FIRST heading line if it mentions "Battle Card" — handles h1/h2/h3 and any + // suffix (e.g. `## Battle Card — Hyperbrowser`, `# Battle Card: Browsaur`). Otherwise the + // canonical `## Battle Card` wrapper added below produces duplicate headings. + battleCardBody = body.replace(/^#{1,3}\s+Battle\s*Card\b[^\n]*\n+/m, '').trim(); } const out = [ From 8eef24f7271c09413cb0d61ad1f25b46355ffed9 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 22:37:03 +0100 Subject: [PATCH 17/23] =?UTF-8?q?fix(competitor-analysis):=20tighten=20ove?= =?UTF-8?q?rview=20table=20=E2=80=94=20exclude=20user,=20truncate=20cells?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three concrete bugs on the refreshed Browserbase run: 1) The user's own company leaked into the competitor table as the first row with '—' pricing. Filter it out by matching competitor_name AND slug (case-insensitive) against matrix.json userCompany.name (falling back to --user-company). Also rebuild metaLine + {{TOTAL}} + mentions-header count off the filtered list so "N competitors" is accurate. 2) Overview table cells rendered full 650-char pricing_tiers strings when subagents drifted into prose instead of pipe-separated tiers. Add truncate() helper (~140-160ch with word-boundary ellipsis) on tagline, pricing, and strategic_diff cells. 3) featurePills dropped all pills when key_features had no pipes — because splitPipes returned a single giant blob. Fall back to splitting on semicolons/commas, and cap each pill to 40 chars with a word-boundary ellipsis. Prevents wall-of-text pills. Also lifted the curatedMatrix load above the first use site to avoid a temporal dead zone (the filter needs userCompany.name; the matrix was previously loaded farther down for the renderer functions). After fix: 5-row table instead of 6, pricing/tagline/diff cells fit the intended max-widths, feature pills show as short capsules. --- .../scripts/compile_report.mjs | 74 +++++++++++++------ 1 file changed, 53 insertions(+), 21 deletions(-) diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index 064c446..8f2d261 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -250,16 +250,56 @@ const withPricing = deduped.filter(c => c.pricing_tiers).length; const dirName = dir.split('/').pop(); const title = dirName.replace(/_/g, ' ').replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); const genDate = new Date().toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric' }); -const metaLine = `${deduped.length} competitors · ${totalMentions} mentions · ${totalBenchmarks} benchmarks · ${genDate}`; +// Initial metaLine uses deduped.length as fallback; we rebuild it after filtering the user's +// own company out of `competitorRows` so the "N competitors" count is accurate. +let metaLine = `${deduped.length} competitors · ${totalMentions} mentions · ${totalBenchmarks} benchmarks · ${genDate}`; + +// Load the curated matrix EARLY — the overview table needs userCompany.name to filter the +// user's own company out of the competitor list, and the strategic summary card needs the +// whole matrix. Keep this block above the first use site to avoid temporal dead zones. +let curatedMatrix = null; +try { + const p = join(dir, 'matrix.json'); + if (existsSync(p)) curatedMatrix = JSON.parse(readFileSync(p, 'utf-8')); +} catch (err) { + console.error(`Warning: matrix.json present but unreadable — falling back to pipe split. ${err.message}`); +} // ---------- index.html (overview) ---------- function featurePills(featuresStr, max = 4) { - const feats = splitPipes(featuresStr).slice(0, max); - return feats.map(f => `<span class="pill pill-feature">${escapeHtml(f)}</span>`).join(''); + // key_features is supposed to be pipe-separated but subagents drift into prose. + // If no pipes are present, split on commas as a fallback so we still show something + // and cap item length to avoid bleeding wall-of-text into the table. + let feats = splitPipes(featuresStr); + if (feats.length <= 1 && featuresStr) { + feats = featuresStr.split(/[;,]/).map(s => s.trim()).filter(Boolean); + } + return feats.slice(0, max).map(f => { + const short = f.length > 42 ? f.slice(0, 40).replace(/\s+\S*$/, '') + '…' : f; + return `<span class="pill pill-feature">${escapeHtml(short)}</span>`; + }).join(''); +} + +function truncate(str, n) { + if (!str) return ''; + if (str.length <= n) return str; + return str.slice(0, n - 1).replace(/\s+\S*$/, '') + '…'; } -const tableRows = deduped.map(c => { +// Exclude the user's own company from the competitor table. matrix.json's userCompany.name +// wins; fall back to the --user-company CLI arg. Match case-insensitively against the +// competitor_name AND the slug so we catch "Browserbase" vs "browserbase.md". +const userNameLower = ((curatedMatrix && curatedMatrix.userCompany && curatedMatrix.userCompany.name) || userCompany || '').toLowerCase(); +const competitorRows = deduped.filter(c => { + const nameLower = (c.competitor_name || '').toLowerCase(); + const slugLower = (c.slug || '').toLowerCase(); + return !userNameLower || (nameLower !== userNameLower && slugLower !== userNameLower); +}); +// Rebuild metaLine now that we know the true competitor count (excluding the user's company). +metaLine = `${competitorRows.length} competitors · ${totalMentions} mentions · ${totalBenchmarks} benchmarks · ${genDate}`; + +const tableRows = competitorRows.map(c => { const hasDetail = c.body && c.body.length > 50; const nameHtml = hasDetail ? `<a href="competitors/${c.slug}.html">${escapeHtml(c.competitor_name)}</a>` @@ -267,28 +307,20 @@ const tableRows = deduped.map(c => { const websiteHtml = c.website ? `<span class="muted-line"><a href="${escapeHtml(c.website)}" target="_blank" style="color:var(--muted);">${escapeHtml(c.website.replace(/^https?:\/\/(www\.)?/, ''))}</a></span>` : ''; - const pricingShort = splitPipes(c.pricing_tiers).slice(0, 3).join(' · ') || '—'; + // Pricing: prefer pipe-split summary; if there are no pipes (prose drift), truncate hard. + let pricingShort = splitPipes(c.pricing_tiers).slice(0, 3).join(' · '); + if (!pricingShort) pricingShort = truncate(c.pricing_tiers || '', 140) || '—'; return ` <tr> <td><strong>${nameHtml}</strong>${websiteHtml}</td> - <td style="max-width:260px;">${escapeHtml(c.tagline || c.positioning || c.product_description || '')}</td> + <td style="max-width:260px;">${escapeHtml(truncate(c.tagline || c.positioning || c.product_description || '', 140))}</td> <td style="max-width:180px;">${escapeHtml(pricingShort)}</td> <td style="max-width:260px;">${featurePills(c.key_features)}</td> - <td class="muted-line" style="max-width:260px;color:var(--muted);font-size:0.8125rem;">${escapeHtml(c.strategic_diff || '')}</td> + <td class="muted-line" style="max-width:260px;color:var(--muted);font-size:0.8125rem;">${escapeHtml(truncate(c.strategic_diff || '', 160))}</td> </tr>`; }).join('\n'); -// Prefer a curated taxonomy from `matrix.json` when present — subagents write -// heterogeneous prose into key_features/integrations frontmatter, so the raw -// split-by-pipe axis is one-blob-per-competitor (no overlap, no comparison). -// `matrix.json` defines a shared axis of atomic features and a yes/no mapping -// per competitor, producing a real comparison. -let curatedMatrix = null; -try { - const p = join(dir, 'matrix.json'); - if (existsSync(p)) curatedMatrix = JSON.parse(readFileSync(p, 'utf-8')); -} catch (err) { - console.error(`Warning: matrix.json present but unreadable — falling back to pipe split. ${err.message}`); -} +// curatedMatrix was loaded earlier (before the overview table renderer needed userCompany.name). +// Keeping this comment as a marker for the matrix-axis functions below. // Strategic summary — "Where are you winning?" / "Where are you losing?" // Requires matrix.json to carry a `userCompany` entry with feature flags. We then @@ -372,7 +404,7 @@ const strategicSummary = buildStrategicSummary(); let indexHtml = template .replace(/\{\{TITLE\}\}/g, escapeHtml(`${title}`)) .replace(/\{\{META\}\}/g, escapeHtml(metaLine)) - .replace(/\{\{TOTAL\}\}/g, String(deduped.length)) + .replace(/\{\{TOTAL\}\}/g, String(competitorRows.length)) .replace(/\{\{MENTION_COUNT\}\}/g, String(totalMentions)) .replace(/\{\{BENCHMARK_COUNT\}\}/g, String(totalBenchmarks)) .replace(/\{\{WITH_PRICING\}\}/g, String(withPricing)) @@ -782,7 +814,7 @@ const mentionsHtml = `<!DOCTYPE html> <div class="container"> <header> <h1>Mentions Feed</h1> - <div class="meta">${allMentions.length} mentions across ${deduped.length} competitors · ${escapeHtml(genDate)}</div> + <div class="meta">${allMentions.length} mentions across ${competitorRows.length} competitors · ${escapeHtml(genDate)}</div> </header> <nav class="views"> <a href="index.html">Overview</a> From d8702dfad30d61401a8897a40d38d61eec2b2d2d Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 22:42:30 +0100 Subject: [PATCH 18/23] =?UTF-8?q?fix(competitor-analysis):=20mentions=20fe?= =?UTF-8?q?ed=20=E2=80=94=20alias=20frontmatter=20+=20normalize=20pills?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs on the fresh Browserbase run that both traced to subagent format drift: 1) Browsaur missing entirely from mentions feed chips Root cause: browsaur.marketing.md wrote `competitor: Browsaur` instead of canonical `competitor_name: Browsaur`. merge_partials' CANONICAL_FIELDS whitelist dropped the field silently, leaving Browsaur's merged .md with an empty competitor_name. The overview table still rendered (by slug) but the mentions feed keys on competitor name for the chip label — blank chips filtered out. Fix: FIELD_ALIASES map in merge_partials — `competitor` and `name` and `company` all map to `competitor_name`; `homepage` and `url` to `website`; `price_tiers` and `pricing` to `pricing_tiers`. canonicalValue(fm, key) walks the alias table when the canonical key is absent. Silent fallback: subagents can drift on field names without us losing data. 2) Unstyled mention pills with invented source types Subagents emitted `[VendorBlog]`, `[HackerNews]`, `[GitHubIssue]`, `[CompetitorBlog]` — none matching the CSS classes. Rendered as unstyled spans. Fix: normalizeSourceType() in parseMentions. Canonical set (Benchmark/Comparison/News/Reddit/HN/LinkedIn/YouTube/Review/ Podcast/X/DevTo/Hashnode/Substack/Blog) stays. Aliases map HackerNews→HN, Twitter→X, VendorBlog/CompetitorBlog/GitHubIssue/ Medium/Docs→Blog. Unknown types keyword-scan for a canonical token; else fall back to Blog. Guarantees every pill gets styled. Also filter competitorRows (not deduped) when building allMentions, so the user's own company doesn't leak into the feed even if it has mentions. Fallback chip label is c.slug if competitor_name is blank. After fix on the Browserbase run: 5 competitor chips (Anchor 21, Browsaur 12, Hyperbrowser 22, Kernel 28, Steel 23), all source pills mapped to canonical palette. --- .../scripts/compile_report.mjs | 39 +++++++++++++++++-- .../scripts/merge_partials.mjs | 27 ++++++++++++- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index 8f2d261..2606dd1 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -108,6 +108,37 @@ function parseSections(body) { return sections; } +// Normalize subagent-invented source types onto the canonical taxonomy so the mentions +// feed CSS has a pill class for every entry. Observed drift: HackerNews→HN, VendorBlog→Blog, +// CompetitorBlog→Blog, GitHubIssue→Blog, Twitter→X. Unknown types fall back to "Blog" to +// guarantee styled rendering (catch-all). Also handles free-text leaking into the bracket +// slot (e.g. "Browsaur Blog — ..." — sourceType becomes "Blog" if we can find that token). +function normalizeSourceType(raw) { + if (!raw) return 'Blog'; + const t = raw.trim(); + const canonical = new Set([ + 'Benchmark','Comparison','News','Reddit','HN','LinkedIn','YouTube', + 'Review','Podcast','X','DevTo','Hashnode','Substack','Blog' + ]); + if (canonical.has(t)) return t; + // Alias table for common drifts + const aliases = { + 'Hacker News': 'HN', 'HackerNews': 'HN', 'Show HN': 'HN', 'Ask HN': 'HN', + 'Twitter': 'X', + 'Vendor Blog': 'Blog', 'VendorBlog': 'Blog', + 'Competitor Blog': 'Blog', 'CompetitorBlog': 'Blog', + 'GitHub Issue': 'Blog', 'GitHubIssue': 'Blog', 'GitHub': 'Blog', + 'Documentation': 'Blog', 'Docs': 'Blog', + 'Medium': 'Blog', 'Substack Post': 'Substack', + }; + if (aliases[t]) return aliases[t]; + // Keyword scan — if the raw contains a canonical token anywhere, use that. + for (const c of canonical) { + if (new RegExp(`\\b${c}\\b`, 'i').test(t)) return c; + } + return 'Blog'; // catch-all for fully unknown types (styled via .src-Blog) +} + // Parse Mentions section into structured entries. // Format: `- **[SourceType]** Title | Snippet (source: URL, YYYY-MM-DD)` function parseMentions(sectionText) { @@ -118,7 +149,7 @@ function parseMentions(sectionText) { if (!line.startsWith('- ')) continue; const typeM = line.match(/^-\s*\*\*\[([^\]]+)\]\*\*\s*(.*)$/); if (!typeM) continue; - const sourceType = typeM[1].trim(); + const sourceType = normalizeSourceType(typeM[1].trim()); let rest = typeM[2]; let url = ''; @@ -719,10 +750,12 @@ writeFileSync(join(dir, 'matrix.html'), matrixHtml); // ---------- mentions.html (feed + filter) ---------- +// Mentions feed: iterate `competitorRows` (user's own company already filtered out earlier) +// so the chronological feed doesn't mix the user's own mentions with competitors'. const allMentions = []; -for (const c of deduped) { +for (const c of competitorRows) { for (const m of c.mentions) { - allMentions.push({ ...m, competitor: c.competitor_name, slug: c.slug }); + allMentions.push({ ...m, competitor: c.competitor_name || c.slug, slug: c.slug }); } } // Sort by date desc (empty dates last) diff --git a/skills/competitor-analysis/scripts/merge_partials.mjs b/skills/competitor-analysis/scripts/merge_partials.mjs index 0f56b88..e656c9a 100644 --- a/skills/competitor-analysis/scripts/merge_partials.mjs +++ b/skills/competitor-analysis/scripts/merge_partials.mjs @@ -243,15 +243,38 @@ for (const [slug, lanes] of bySlug.entries()) { 'headquarters', 'founded', 'employee_estimate', 'funding_info', 'strategic_diff', ]; + // Subagents drift on canonical field names too. Common aliases observed in real runs: + // `competitor` → `competitor_name` (browsaur marketing subagent), `homepage` → `website`, + // `price_tiers` / `pricing` → `pricing_tiers`. Accept aliases silently. + const FIELD_ALIASES = { + 'competitor': 'competitor_name', + 'name': 'competitor_name', + 'company': 'competitor_name', + 'homepage': 'website', + 'url': 'website', + 'price_tiers': 'pricing_tiers', + 'pricing': 'pricing_tiers', + }; + function canonicalValue(fm, key) { + if (fm[key]) return fm[key]; + for (const [alias, canonical] of Object.entries(FIELD_ALIASES)) { + if (canonical === key && fm[alias]) return fm[alias]; + } + return undefined; + } const mergedFm = {}; for (const k of CANONICAL_FIELDS) { - if (marketing.fm[k]) mergedFm[k] = marketing.fm[k]; + const v = canonicalValue(marketing.fm, k); + if (v) mergedFm[k] = v; } // Other lanes may fill in canonical gaps (e.g. funding_info from news, strategic_diff from technical). for (const lane of LANES) { if (lane === 'marketing' || !lanes[lane] || !lanes[lane].fm) continue; for (const k of CANONICAL_FIELDS) { - if (!mergedFm[k] && lanes[lane].fm[k]) mergedFm[k] = lanes[lane].fm[k]; + if (!mergedFm[k]) { + const v = canonicalValue(lanes[lane].fm, k); + if (v) mergedFm[k] = v; + } } } From d83b7cc9da39aeeeb26b052f606f9b64614092e9 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Fri, 24 Apr 2026 23:44:51 +0100 Subject: [PATCH 19/23] perf(competitor-analysis): fix 25-min Step 5 wall-clock + skill-creator nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A real 10-competitor run on Browserbase clocked 40 minutes and never reached fact-check or screenshots before interrupt. Trace attribution: Step 5 enrichment alone burned 25 minutes by self-throttling to 10 agents per message (5 sequential rounds of 10), when the Agent tool happily runs 50+ in parallel. Wall clock collapses to the slowest single agent (~5 min) once we stop batching. Three classes of fix in this commit: 1. Parallelism guidance — workflow.md + SKILL.md - Drop the "up to ~6 per message" cap. Replaced with the explicit rule: launch ALL subagents needed for a phase in ONE Agent message. For 10 × 5 lanes = 50 parallel agents in one message. - Document the measured cost: splitting cost 20 minutes vs unsplit on the Apr 2026 Browserbase run. - Update Step 5 + Wave Management + the lane-fan-out section to match. No remaining contradictions in the docs. 2. Discovery is parallel Bash, not subagents Discovery is 6-12 `bb search` calls. Wrapping each wave in an Agent subagent costs more in cold-start + tool-reasoning overhead than the work itself (~1-2 min wasted). New "Discovery — parallel Bash, not subagents" section in workflow.md gives the exact 3-Bash-call recipe (Wave A/B/C). SKILL.md Step 3 points at it. 3. Skill-creator audit nits (rules from the skill-creator skill) - Add Tables of Contents to all 4 reference docs >100 lines (workflow.md, research-patterns.md, example-research.md, battle-card-subagent.md). battle-card.md is 91 lines so skipped per the rule. - Bump version "0.1.0" → "0.2.0". The skill picked up battle cards (df62374), fact-check (8502f71), prose summaries (9fd482f), user-company parity (845422d), matrix taxonomy (c74d229), Step 4.5 user-confirm (ae58982), and 7 more fixes since 0.1.0 — well past a minor bump. - Kept `allowed-tools` frontmatter field. Not in skill-creator's spec but harness-consumed in some Claude Code setups; harmless if ignored, useful if respected. Estimated next-run impact: 40 min → ~12-15 min through compile, dominated by per-subagent ceiling (3-5 min) + matrix synthesis (4 min) + fact-check (5-10 min if you want it). --- skills/competitor-analysis/SKILL.md | 9 +-- .../references/battle-card-subagent.md | 5 ++ .../references/example-research.md | 5 ++ .../references/research-patterns.md | 9 +++ .../references/workflow.md | 60 ++++++++++++------- 5 files changed, 61 insertions(+), 27 deletions(-) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index a46e090..b6d7b41 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -19,7 +19,7 @@ compatibility: Requires bb CLI (@browserbasehq/cli) and BROWSERBASE_API_KEY env allowed-tools: Bash Agent AskUserQuestion metadata: author: browserbase - version: "0.1.0" + version: "0.2.0" --- # Competitor Analysis @@ -149,10 +149,7 @@ Evaluation on Browserbase shows all three waves are additive — skip any and yo - After the searches, run `scripts/extract_vs_names.mjs` to parse `"X vs Y"` patterns from result titles — this uniquely surfaces competitors that don't appear as URL hits. **Process**: -1. Launch discovery subagents in a single message (up to ~6), split across the three waves. Each subagent runs its queries in ONE Bash call: - ```bash - bb search "{query}" --num-results 25 --output /tmp/competitor_discovery_batch_{N}.json - ``` +1. Issue **3 parallel `bb search` Bash calls** (one per wave) in a SINGLE message — NOT subagents. Each Bash call chains its 2-4 queries with `&&`. See `references/workflow.md` → "Discovery — parallel Bash, not subagents" for the exact recipe. Subagents are too heavy for a workload of 6-12 `bb search` calls. 2. After all waves complete: ```bash node {SKILL_DIR}/scripts/list_urls.mjs /tmp --prefix competitor > /tmp/competitor_urls.txt @@ -234,7 +231,7 @@ For each competitor, launch 5 parallel subagents, one per lane: - **E. Technical & Benchmarks** (`technical`): GitHub benchmark repos/PRs, performance posts. Writes Benchmarks + technical Findings. Budget per lane: deep = 5-8 tool calls, deeper = 10-15. -Launch all 5 lane-subagents for ONE competitor in a single Agent tool call set (5 parallel). Across 5 competitors = 5 messages. +**Launch ALL competitor × lane subagents in a SINGLE Agent tool message.** For 10 competitors × 5 lanes = 50 parallel Agent calls in one message. Do NOT split into batches per competitor or per lane — wall clock collapses to the slowest single agent (~3-5 min). Splitting into 5 rounds of 10 cost 25 minutes of wall clock vs 5 minutes parallel on a real measured run; do not do it. Each subagent writes a partial to `{OUTPUT_DIR}/partials/{slug}.{lane}.md`. diff --git a/skills/competitor-analysis/references/battle-card-subagent.md b/skills/competitor-analysis/references/battle-card-subagent.md index 7e47b03..b367915 100644 --- a/skills/competitor-analysis/references/battle-card-subagent.md +++ b/skills/competitor-analysis/references/battle-card-subagent.md @@ -1,5 +1,10 @@ # Battle Card subagent prompt +## Contents +- [Placeholders to substitute](#placeholders-to-substitute) — `{OUTPUT_DIR}`, `{COMPETITOR_SLUG}`, etc. +- [Prompt](#prompt) — full subagent instruction template (paste with placeholders filled in) +- [Wave management](#wave-management) — launch policy: one Agent message per run, all competitors in parallel + Main agent substitutes placeholders per competitor. Launch AFTER Step 5c fact-check completes — this lane depends on `matrix.json` cells having `sources` URLs. ## Placeholders to substitute diff --git a/skills/competitor-analysis/references/example-research.md b/skills/competitor-analysis/references/example-research.md index bd94cfd..687cb04 100644 --- a/skills/competitor-analysis/references/example-research.md +++ b/skills/competitor-analysis/references/example-research.md @@ -1,5 +1,10 @@ # Example Competitor Research File +## Contents +- [Template](#template) — full worked example for a fictional "Rival Co" +- [Field Rules](#field-rules) — frontmatter fields, body section order, mention/findings format +- [Writing via Bash Heredoc](#writing-via-bash-heredoc) — required pattern for subagents to avoid permission prompts + Each enrichment subagent writes one markdown file per competitor to `{OUTPUT_DIR}/{competitor-slug}.md`, where `{OUTPUT_DIR}` is the per-run Desktop directory set up by the main agent in Step 0 (e.g., `~/Desktop/acme_competitors_2026-04-23/`). The YAML frontmatter contains structured fields for report/matrix compilation. The body contains per-section research plus aggregated mentions and benchmarks. ## Template diff --git a/skills/competitor-analysis/references/research-patterns.md b/skills/competitor-analysis/references/research-patterns.md index ed1ad3a..236319a 100644 --- a/skills/competitor-analysis/references/research-patterns.md +++ b/skills/competitor-analysis/references/research-patterns.md @@ -1,5 +1,14 @@ # Competitor Analysis — Research Patterns +## Contents +- [Overview](#overview) — two research contexts (self vs target) +- [Self-Research (User's Company)](#self-research-users-company) — sub-questions, page discovery, synthesis output (precise_category, include keywords, exclusion list) +- [Competitor Research — 4 Research Lanes](#competitor-research--4-research-lanes) — Marketing / External / Benchmarks / Strategic Diff +- [Depth Mode Behavior](#depth-mode-behavior) — quick / deep / deeper budgets and scope +- [Finding Format (per lane)](#finding-format-per-lane) — JSON shape, confidence levels +- [Research Loop Rules](#research-loop-rules) — 7 meta-rules for the research phase +- [Synthesis Instructions](#synthesis-instructions) — turn findings into matrix cells + ## Overview Two research contexts: diff --git a/skills/competitor-analysis/references/workflow.md b/skills/competitor-analysis/references/workflow.md index c73013b..d54f759 100644 --- a/skills/competitor-analysis/references/workflow.md +++ b/skills/competitor-analysis/references/workflow.md @@ -1,5 +1,15 @@ # Competitor Analysis — Workflow Reference +## Contents +- [Discovery Batch JSON Schema](#discovery-batch-json-schema) — bb search output format +- [Competitor Research Markdown Format](#competitor-research-markdown-format) — frontmatter + body section spec +- [Extracting Text from HTML](#extracting-text-from-html) — bb fetch | jq | sed pipeline +- [Discovery — parallel Bash, not subagents](#discovery--parallel-bash-not-subagents) — Wave A/B/C recipes +- [Enrichment fan-out — 5 subagents PER competitor](#enrichment-fan-out--5-subagents-per-competitor-deepdeeper-modes) +- [Legacy: Single-subagent template](#legacy-single-subagent-template-quick-mode-only) — quick mode only +- [Wave Management](#wave-management) — parallelism rule, gate phase, sizing formula +- [Report Compilation](#report-compilation) — compile_report.mjs invocation + ## Discovery Batch JSON Schema File: `/tmp/competitor_discovery_batch_{N}.json` @@ -65,29 +75,37 @@ bb fetch --allow-redirects "https://rivalco.com/pricing" | sed 's/<script[^>]*>. Limit to ~3000 chars per page to keep subagent context manageable. For JS-heavy pages (client-rendered pricing tables), use `bb browse` instead of `bb fetch`. -## Discovery Subagent Prompt Template +## Discovery — parallel Bash, not subagents -``` -You are a competitor discovery subagent. Run search queries and save results. +The main agent runs discovery as **3 parallel `bb search` Bash calls** (one per wave) in a SINGLE message. No subagent layer. Each wave chains its 2-4 queries with `&&` and writes results to `/tmp/competitor_discovery_batch_{wave}{N}.json`. -TOOL RULES — CRITICAL, FOLLOW EXACTLY: -1. You may ONLY use the Bash tool. No exceptions. -2. Run ALL searches in a SINGLE Bash call using && chaining. -3. BANNED TOOLS: WebFetch, WebSearch, Write, Read, Glob, Grep — ALL BANNED. -4. NEVER use ~ or $HOME in paths — use full literal paths. +Example — main agent issues these three Bash tool calls in parallel in one message: -TASK: -Run ALL of the following searches in ONE Bash command: +```bash +# Wave A — alternatives +bb search "alternatives to {user_company}" --num-results 25 --output /tmp/competitor_discovery_batch_A1.json && \ +bb search "{user_company} competitors" --num-results 25 --output /tmp/competitor_discovery_batch_A2.json && \ +echo "A done" +``` -bb search "{query1}" --num-results 25 --output /tmp/competitor_discovery_batch_{N1}.json && \ -bb search "{query2}" --num-results 25 --output /tmp/competitor_discovery_batch_{N2}.json && \ -bb search "{query3}" --num-results 25 --output /tmp/competitor_discovery_batch_{N3}.json && \ -echo "Discovery complete" +```bash +# Wave B — precise category +bb search "{precise_category}" --num-results 25 --output /tmp/competitor_discovery_batch_B1.json && \ +bb search "{compose 3 distinctive tokens}" --num-results 25 --output /tmp/competitor_discovery_batch_B2.json && \ +bb search "{primary_noun} for ai agents" --num-results 25 --output /tmp/competitor_discovery_batch_B3.json && \ +echo "B done" +``` -After the command completes, report back ONLY the count of results per batch. -Do NOT analyze, summarize, or return the actual results. +```bash +# Wave C — comparison-page graph +bb search "{user_company} vs" --num-results 25 --output /tmp/competitor_discovery_batch_C1.json && \ +bb search "{seed1} vs" --num-results 20 --output /tmp/competitor_discovery_batch_C2.json && \ +bb search "{seed2} vs" --num-results 20 --output /tmp/competitor_discovery_batch_C3.json && \ +echo "C done" ``` +Why direct Bash and not subagents: each wave is 2-4 `bb search` calls — agent cold-start + tool-reasoning overhead is bigger than the actual work. Using parallel Bash saves ~1-2 min per run with no quality loss. + ### Discovery query patterns Discovery uses **three parallel waves** (evaluated — all three are additive): @@ -122,7 +140,7 @@ The 5 lanes: | **D. News & Comparisons** | `news` | Comparison pages ("X vs Y"), TechCrunch / Verge / Forbes / VentureBeat / Businesswire, independent blog reviews, Substack. Every mention MUST include a date. | | **E. Technical & Benchmarks** | `technical` | GitHub benchmark repos/PRs, performance blog posts, independent tests. Writes Benchmarks bullets AND Findings on technical specifics (CDP support, uptime, concurrency limits, SDKs). | -**Wave management for 5 competitors × 5 lanes = 25 subagents**: launch 5 subagents per competitor in ONE message (all 5 lanes parallel), sequentially per competitor across 5 messages. Or for ≤3 competitors, fit all 15 subagents in 3 messages. +**Wave management — launch ALL subagents in ONE message**: for N competitors × 5 lanes = 5N subagents, fit them all in a single Agent-tool message. Wall clock then equals the slowest single subagent (~3-5 min) instead of `batches × slowest_per_batch`. On a real 10-competitor run we measured 25 minutes wasted by self-throttling to 10-per-message — the Agent tool happily runs 50+ in parallel; do not split into batches for "politeness". The only cap is that each subagent still batches its own Bash operations into a single call. **Merge step** (once all partials exist): ```bash @@ -283,11 +301,11 @@ Do NOT return raw data to the main conversation. ## Wave Management ### Key Principle: Maximize Parallelism, Minimize Prompts -Launch as many subagents as possible in a single message (up to ~6 per message). Each subagent MUST batch all its Bash operations. +**Launch ALL subagents needed for a phase in ONE message.** No "up to 6 per message" cap — the Agent tool runs them in parallel, so wall clock = slowest single agent regardless of count. On a 10-competitor × 5-lane = 50-subagent enrichment, splitting into 5 batches of 10 cost an extra 20 minutes of wall clock vs one batch of 50 (measured Apr 2026). Each subagent still MUST batch its own Bash operations into a single call. ### Discovery Phase -- Launch up to 6 discovery subagents in a single message, split by wave (A/B/C — see "Discovery query patterns" above) -- Each subagent runs ALL its queries in ONE Bash call with `&&` chaining +- **Run discovery as parallel `bb search` Bash calls, not subagents.** Subagent overhead (cold start + tool reasoning) is bigger than the work. Three Bash tool calls in one message — one per wave (A/B/C) — chain each wave's searches with `&&`. +- Each wave's bash call writes its outputs as `/tmp/competitor_discovery_batch_{wave}{N}.json` - After all waves complete, run the following in sequence: ```bash # 1. Dedup URLs from all batches @@ -352,7 +370,7 @@ Two modes: - **`quick` mode** — single subagent per batch of competitors. Lane A (marketing) only. ~8 competitors per subagent, 2-3 tool calls each. Writes directly to `{OUTPUT_DIR}/{slug}.md`. - **`deep` / `deeper` modes** — 5-subagent fan-out PER competitor. Each subagent owns ONE lane (marketing / discussion / social / news / technical). Writes to `{OUTPUT_DIR}/partials/{slug}.{lane}.md`. Budget: 5-8 calls per subagent (deep), 10-15 (deeper). After all lanes complete, run `scripts/merge_partials.mjs` to consolidate. -- Launch the 5 lane-subagents for a competitor in ONE Agent tool message (5 parallel Agent calls). Across multiple competitors, batch into 3-5 messages depending on count. +- **Launch ALL competitor × lane subagents in a SINGLE Agent tool message.** For 10 competitors × 5 lanes = 50 parallel agents in one message. Do NOT split into batches — wall clock becomes the slowest single agent (~3-5 min) instead of batches-times-batch-max (~25 min on 10 competitors split into 5 rounds of 10). ### Screenshots Phase (after merge, before compile) From 0468e0633a85aabd65cbd5e199abdf7c2e5cf382 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Sat, 25 Apr 2026 00:39:13 +0100 Subject: [PATCH 20/23] perf(competitor-analysis): spot-check fact-check by default, 25-call budget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Apr 25 Browserbase run got stuck at 111+ bb tool calls in fact-check before user interrupt. Root cause: the previous Step 5c mandated verifying EVERY cell of matrix.json — for a 7-company × 33-axis matrix that's 231 cells. Most of those cells are universal table-stakes (Playwright, Puppeteer, CDP, Python SDK) where any cloud browser has them; verifying all of those is redundant work that blocks the pipeline from reaching battle cards / screenshots / compile. The original problem fact-check was solving (the SOC 2 hallucination on the Apr 23 run) was about a HANDFUL of high-stakes cells: the ones that drive the "Where you're winning" summary, plus compliance + license + pricing. Rest doesn't need verification. Switch the default to spot-check with a hard 25-call budget. Priority order is explicit and ranked: 1. Every cell that appears in userCompany.winningSummary/losingSummary 2. Compliance cells (SOC 2, HIPAA, ISO 27001) across all competitors 3. Open-source license cells (Steel was wrong as AGPL — actually Apache 2.0) 4. Pricing tiers + funding numbers cited in summaries Explicit skip list: - Universal cells (Playwright, Puppeteer, CDP, Python SDK, etc.) - `false` cells with no claim - Integration cells unless cited in summaries The subagent counts its own bb calls and STOPS at 25 — partial fact-check beats blocking the pipeline. Full-sweep mode (~80 calls, verifies every non-universal cell) is opt-in for board-deck-level deliverables. Estimated impact on next run: fact-check phase 15+ min → 3-5 min, no more pipeline stalls before battle cards. The summary stays trustworthy because we verify the cells that actually feed it. --- skills/competitor-analysis/SKILL.md | 81 +++++++++++++++++++---------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/skills/competitor-analysis/SKILL.md b/skills/competitor-analysis/SKILL.md index b6d7b41..940f545 100644 --- a/skills/competitor-analysis/SKILL.md +++ b/skills/competitor-analysis/SKILL.md @@ -284,40 +284,65 @@ The main agent fixes this by synthesizing a **shared taxonomy** across competito If this step is skipped, the matrix view falls back to the raw pipe-split axis (useless for atomic comparison) and the strategic summary doesn't render. Do not skip. -### Fact-check the matrix (MANDATORY) +### Fact-check the matrix — spot-check the high-stakes cells (default) -**Do not trust the taxonomy pass alone.** It is LLM inference from heterogeneous prose and will make false claims that survive into the "Where you're winning" card, damaging the report's credibility. Observed during Browserbase run 2026-04-23: matrix.json claimed SOC 2 was unique to Browserbase; verification showed Hyperbrowser, Kernel, and Anchor Browser ALL have SOC 2 Type II (confirmed via their own trust portals and compliance blog posts). That single error would have presented a hallucinated moat to a real GTM team. +**Do not trust the taxonomy pass alone for high-stakes cells.** It is LLM inference from prose and will hallucinate moats. Observed during Browserbase run 2026-04-23: matrix.json claimed SOC 2 was unique to Browserbase; verification showed Hyperbrowser, Kernel, and Anchor Browser all have SOC 2 Type II. -Launch a dedicated **fact-check subagent** (Bash-only) after the taxonomy pass and before compile: +But verifying every cell is the opposite mistake. A 7-company × 33-axis matrix has 231 cells. The Apr 2026 Browserbase run got stuck at 111+ tool calls in fact-check before interrupt — the subagent kept going on table-stakes cells (Playwright support, CDP, Python SDK) that are universal in the category. + +**Default = spot-check, not full sweep.** Only verify cells that meaningfully change the strategic narrative. + +Launch a single fact-check subagent (Bash-only) with **a hard 25-call budget** that targets ONLY these high-stakes axes: + +1. **Every `userCompany.features` and `userCompany.integrations` cell** (the user's own moats — these go straight into "Where you're winning" prose). Typical: 17 + 16 = 33 cells, but most are obvious (your own product). Focus on: + - Anything claimed as a *moat* in `winningSummary` + - Anything claimed as a *gap* in `losingSummary` + - Compliance (SOC 2, HIPAA, ISO 27001, GDPR) + - Open-source license claims (MIT / Apache 2.0 / AGPL — observed wrong on Steel) + - Published uptime SLA (status page ≠ SLA) + +2. **Across competitors, only the cells that drive the win/loss summary**: + - For each "Winning" claim, verify the user has it AND verify the competitors don't. + - For each "Losing" claim, verify the named competitors do have it. + - Compliance + license + SLA across all competitors (high-trust, frequently wrong). + +3. **Do NOT verify**: + - Universal table-stakes (Playwright, Puppeteer, CDP, Python SDK) — every cloud browser has these. + - `false` cells with no claim being made (no moat lost or won). + - Integration cells unless they appear in the win/loss summary. ``` -You are a matrix-verification subagent. For EACH cell in {OUTPUT_DIR}/matrix.json -(userCompany + every competitor × every feature × every integration), verify the -boolean against a concrete source URL. - -TOOL RULES: Bash ONLY. bb search + bb fetch. - -For each cell: -1. If `true` — find a source that explicitly confirms the feature. Candidates: - - The company's own docs / pricing / feature pages - - Trust portals (trust.{company}.* / {company}.io/trust) - - Official changelog / blog announcements - - GitHub repo LICENSE / README for open-source claims - - SafeBase / Vanta trust portals for SOC 2 / HIPAA / ISO - If no source found, flip to `false` and record why. -2. If `false` — run ONE targeted bb search to check we didn't miss it. Flip to - `true` only on first-party evidence. -3. Be adversarial: "no mention" ≠ "not supported". But "status page exists" is NOT - proof of a published uptime SLA commitment — look for an explicit SLA % number. - -Output a verified matrix.json with an added `sources` field per cell: - { "Feature name": { "value": true, "source": "https://..." } } - -And write a cells-changed log to {OUTPUT_DIR}/matrix_fact_check.md listing every -flip (was true → now false, or vice versa) with the source URL and quoted evidence. +You are a matrix spot-check subagent. Budget: 25 bb calls TOTAL across all cells. +Stop and return what you have when you hit the budget — partial fact-check is +better than blocking the rest of the pipeline. + +TOOL RULES: Bash ONLY. bb search + bb fetch. Count your calls; stop at 25. + +PRIORITY ORDER (highest-stakes first — work down until budget): +1. Every cell that appears in userCompany.winningSummary or losingSummary +2. Compliance cells (SOC 2, HIPAA, ISO 27001) for user + every competitor +3. Open-source / self-hostable + license cells across all competitors +4. Pricing tier numbers ($X/mo, /hr) for user + competitors named in summaries +5. Funding / employee_estimate fields (only if cited in summaries) + +Skip: +- Universal cells (Playwright, Puppeteer, CDP, Python SDK, etc.) +- `false` cells where no claim is being made +- Integration matrix cells unless they appear in summaries + +For each cell verified: +- If `true` — find one source URL (docs, trust portal, GitHub LICENSE, etc). +- If `false` — one targeted bb search. Flip ONLY on first-party evidence. + +Output: matrix.json with `sources: { "Feature": "https://..." }` on the +verified cells (other cells stay as-is). Cells-changed log to +{OUTPUT_DIR}/matrix_fact_check.md with each flip + URL + quoted evidence. +Report back: "spot-check: N cells verified, M flipped, B/25 budget used". ``` -After the subagent completes, the main agent re-reads matrix.json, recompiles the report, and surfaces the `matrix_fact_check.md` delta to the user. **The strategic summary is worthless without this step** — it will confidently state "winning on X" where X is a hallucination. +**Full-sweep mode (opt-in, slower)**: if the user explicitly says "full fact check" or for a high-stakes deliverable (board deck, press release), set the budget to 80 calls and verify every non-universal cell. Default is spot-check. + +After the subagent completes, re-read matrix.json, recompile, and surface `matrix_fact_check.md` delta to the user. The summary is much more trustworthy with spot-check than without — and ships in 3-5 minutes instead of stalling the pipeline. ### Step 5d: Battle Card synthesis (deep/deeper only, after Step 5c) From 9f882f2cfcdec9e1d8dce3411f7291fc173ec07c Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Sat, 25 Apr 2026 00:44:22 +0100 Subject: [PATCH 21/23] perf(competitor-analysis): hard-cap research lane tool calls + halve discovery results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User reported research phase still takes ~25min before fact-check even after prior parallelism fixes. Trace showed 2 lanes hitting 29-30 bb calls each against an 8-call advisory budget, dragging the 30-agent fan-out from 5→12min. - references/workflow.md: replace soft "BUDGETS (respect strictly)" with HARD CAP + per-call self-counter ("# bb call N/8") and explicit "stop and write what you have" instruction. Cite Apr 25 incident. - references/workflow.md: drop discovery searches from 25→12 results per query. Gate already filters most noise; 25 just inflated the candidate list and downstream gate calls. - profiles/example.json: drop redundant template (browserbase.json is the reference profile). --- .../competitor-analysis/profiles/example.json | 12 ------- .../references/workflow.md | 32 ++++++++++++------- 2 files changed, 20 insertions(+), 24 deletions(-) delete mode 100644 skills/competitor-analysis/profiles/example.json diff --git a/skills/competitor-analysis/profiles/example.json b/skills/competitor-analysis/profiles/example.json deleted file mode 100644 index f1d7203..0000000 --- a/skills/competitor-analysis/profiles/example.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "company": "", - "website": "", - "product": "", - "existing_customers": [], - "competitors": [], - "use_cases": [], - "precise_category": "", - "category_include_keywords": [], - "exclusion_list": [], - "researched_at": "" -} diff --git a/skills/competitor-analysis/references/workflow.md b/skills/competitor-analysis/references/workflow.md index d54f759..b593c4f 100644 --- a/skills/competitor-analysis/references/workflow.md +++ b/skills/competitor-analysis/references/workflow.md @@ -83,24 +83,24 @@ Example — main agent issues these three Bash tool calls in parallel in one mes ```bash # Wave A — alternatives -bb search "alternatives to {user_company}" --num-results 25 --output /tmp/competitor_discovery_batch_A1.json && \ -bb search "{user_company} competitors" --num-results 25 --output /tmp/competitor_discovery_batch_A2.json && \ +bb search "alternatives to {user_company}" --num-results 12 --output /tmp/competitor_discovery_batch_A1.json && \ +bb search "{user_company} competitors" --num-results 12 --output /tmp/competitor_discovery_batch_A2.json && \ echo "A done" ``` ```bash # Wave B — precise category -bb search "{precise_category}" --num-results 25 --output /tmp/competitor_discovery_batch_B1.json && \ -bb search "{compose 3 distinctive tokens}" --num-results 25 --output /tmp/competitor_discovery_batch_B2.json && \ -bb search "{primary_noun} for ai agents" --num-results 25 --output /tmp/competitor_discovery_batch_B3.json && \ +bb search "{precise_category}" --num-results 12 --output /tmp/competitor_discovery_batch_B1.json && \ +bb search "{compose 3 distinctive tokens}" --num-results 12 --output /tmp/competitor_discovery_batch_B2.json && \ +bb search "{primary_noun} for ai agents" --num-results 12 --output /tmp/competitor_discovery_batch_B3.json && \ echo "B done" ``` ```bash # Wave C — comparison-page graph -bb search "{user_company} vs" --num-results 25 --output /tmp/competitor_discovery_batch_C1.json && \ -bb search "{seed1} vs" --num-results 20 --output /tmp/competitor_discovery_batch_C2.json && \ -bb search "{seed2} vs" --num-results 20 --output /tmp/competitor_discovery_batch_C3.json && \ +bb search "{user_company} vs" --num-results 12 --output /tmp/competitor_discovery_batch_C1.json && \ +bb search "{seed1} vs" --num-results 12 --output /tmp/competitor_discovery_batch_C2.json && \ +bb search "{seed2} vs" --num-results 12 --output /tmp/competitor_discovery_batch_C3.json && \ echo "C done" ``` @@ -235,10 +235,18 @@ LANE 4 — Strategic Diff vs {user_company} (deeper only): - Where you win: ... Also fill the `strategic_diff` frontmatter field with a one-line summary. -BUDGETS (respect strictly): - quick: 2-3 tool calls per competitor (homepage + 1-2 pages) - deep: 5-8 tool calls per competitor (Lane 1 + Lane 2) - deeper: 10-15 tool calls per competitor (all 4 lanes) +HARD TOOL-CALL CAP — count your bb calls and STOP at the cap. Partial output beats blocking the pipeline. + quick mode: 3 bb calls max per competitor + deep mode: 8 bb calls max per competitor + deeper mode: 12 bb calls max per competitor + +ENFORCEMENT — at the start of every Bash call, prepend a comment like + # bb call N/8 (deep mode) +After hitting the cap, write the output file with WHAT YOU HAVE — even if a section is thin. +NEVER do a 9th call in deep mode "to be thorough". The pipeline budgets time on this assumption. + +Observed cost of overshoot (Apr 25 Browserbase run): two lanes hit 29-30 calls each, drove +wall-clock for the whole 30-agent fan-out from 5 min → 12 min. Don't do this. OUTPUT — write ALL competitor files in a SINGLE Bash call using chained heredocs directly to {OUTPUT_DIR}: From fb58a517105147414075b1277081c063d5669727 Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Sat, 25 Apr 2026 08:25:11 +0100 Subject: [PATCH 22/23] fix(competitor-analysis): four Cursor Bugbot findings 1. matrix.html leaked user company as a column with all-false features. Move competitorRows definition above aggregates and replace `deduped` with `competitorRows` in matrix headers/cells, axis counts, pricing table, strategic-summary inner loop, per-competitor page generation, and CSV. Now a single filter applies consistently across all views. 2. report-template.html referenced undefined --high / --low CSS vars on strategic win/loss card border-lefts (and the loss badge text color), so the colored borders silently didn't render. Define both in :root (high=#5a8a1a green, low=#F03603 brand) so they match the existing palette tokens. 3. gate_candidates.mjs used spawnSync inside async gateOne, blocking the event loop and reducing the documented --concurrency 6 to N=1 in practice. Switch to promisified execFile so the worker pool actually parallelizes. 4. extract_vs_names.mjs used bidirectional startsWith for domain resolution, which mapped "steel" -> steelhead.com and "browse" -> browserbase.com. Restrict prefix matches to known branding suffixes (browser/ai/io/app/ labs/etc.), break ties by shortest suffix, and exclude seeds from the host map so the user's own domain can't shadow shorter extracted names. 5. capture_screenshots.mjs (also flagged): the underlying `browse` CLI shares a single session, so true async parallelism would race on the same tab. Clamp --concurrency to 1 with a stderr note rather than silently corrupting output. --- .../references/report-template.html | 2 + .../scripts/capture_screenshots.mjs | 10 ++- .../scripts/compile_report.mjs | 70 +++++++++---------- .../scripts/extract_vs_names.mjs | 27 ++++++- .../scripts/gate_candidates.mjs | 56 ++++++++------- 5 files changed, 103 insertions(+), 62 deletions(-) diff --git a/skills/competitor-analysis/references/report-template.html b/skills/competitor-analysis/references/report-template.html index 129c48e..023db53 100644 --- a/skills/competitor-analysis/references/report-template.html +++ b/skills/competitor-analysis/references/report-template.html @@ -19,6 +19,8 @@ --card: #ffffff; --text: #100D0D; --muted: #514F4F; + --high: #5a8a1a; + --low: #F03603; } * { margin: 0; padding: 0; box-sizing: border-box; } body { font-family: Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif; background: var(--bg); color: var(--text); line-height: 1.6; font-size: 16px; } diff --git a/skills/competitor-analysis/scripts/capture_screenshots.mjs b/skills/competitor-analysis/scripts/capture_screenshots.mjs index 84b743c..397db54 100644 --- a/skills/competitor-analysis/scripts/capture_screenshots.mjs +++ b/skills/competitor-analysis/scripts/capture_screenshots.mjs @@ -37,9 +37,17 @@ const dir = args[0]; const envIdx = args.indexOf('--env'); const browseEnv = envIdx !== -1 ? args[envIdx + 1] : 'remote'; const concurrencyIdx = args.indexOf('--concurrency'); -const concurrency = concurrencyIdx !== -1 ? parseInt(args[concurrencyIdx + 1], 10) : 1; +let concurrency = concurrencyIdx !== -1 ? parseInt(args[concurrencyIdx + 1], 10) : 1; const skipExisting = args.includes('--skip-existing'); +// `browse` maintains a single shared session; parallel `browse goto/screenshot` calls would +// race on the same tab. Clamp concurrency to 1 and warn rather than silently corrupt output. +// (Each capture is fast — ~3-4s — so serial is acceptable.) +if (concurrency > 1) { + console.error(`Note: clamping --concurrency ${concurrency} to 1 — \`browse\` shares a single session across calls, so parallel screenshots would race on the same tab.`); + concurrency = 1; +} + const shotsDir = join(dir, 'screenshots'); mkdirSync(shotsDir, { recursive: true }); diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index 2606dd1..f0d5576 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -272,19 +272,6 @@ for (const c of competitors) { } const deduped = [...seen.values()].sort((a, b) => (a.competitor_name || '').localeCompare(b.competitor_name || '')); -// ---------- Aggregates ---------- - -const totalMentions = deduped.reduce((sum, c) => sum + c.mentions.length, 0); -const totalBenchmarks = deduped.reduce((sum, c) => sum + c.benchmarks.length, 0); -const withPricing = deduped.filter(c => c.pricing_tiers).length; - -const dirName = dir.split('/').pop(); -const title = dirName.replace(/_/g, ' ').replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); -const genDate = new Date().toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric' }); -// Initial metaLine uses deduped.length as fallback; we rebuild it after filtering the user's -// own company out of `competitorRows` so the "N competitors" count is accurate. -let metaLine = `${deduped.length} competitors · ${totalMentions} mentions · ${totalBenchmarks} benchmarks · ${genDate}`; - // Load the curated matrix EARLY — the overview table needs userCompany.name to filter the // user's own company out of the competitor list, and the strategic summary card needs the // whole matrix. Keep this block above the first use site to avoid temporal dead zones. @@ -296,6 +283,31 @@ try { console.error(`Warning: matrix.json present but unreadable — falling back to pipe split. ${err.message}`); } +// Filter the user's own company out before computing any "competitor" totals or rendering +// any view. matrix.json's userCompany.name wins; fall back to the --user-company CLI arg. +// Match case-insensitively against competitor_name AND slug. EVERY downstream loop that +// represents "the competitor set" (matrix.html columns, mentions feed, totals, strategic +// summary, per-competitor pages, CSV) must iterate `competitorRows`, not `deduped` — +// otherwise the user appears as a phantom column with all-false features. +const userCompanyName = (curatedMatrix && curatedMatrix.userCompany && curatedMatrix.userCompany.name) || userCompany || ''; +const userNameLower = userCompanyName.toLowerCase(); +const competitorRows = deduped.filter(c => { + const nameLower = (c.competitor_name || '').toLowerCase(); + const slugLower = (c.slug || '').toLowerCase(); + return !userNameLower || (nameLower !== userNameLower && slugLower !== userNameLower); +}); + +// ---------- Aggregates ---------- + +const totalMentions = competitorRows.reduce((sum, c) => sum + c.mentions.length, 0); +const totalBenchmarks = competitorRows.reduce((sum, c) => sum + c.benchmarks.length, 0); +const withPricing = competitorRows.filter(c => c.pricing_tiers).length; + +const dirName = dir.split('/').pop(); +const title = dirName.replace(/_/g, ' ').replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); +const genDate = new Date().toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric' }); +const metaLine = `${competitorRows.length} competitors · ${totalMentions} mentions · ${totalBenchmarks} benchmarks · ${genDate}`; + // ---------- index.html (overview) ---------- function featurePills(featuresStr, max = 4) { @@ -318,18 +330,6 @@ function truncate(str, n) { return str.slice(0, n - 1).replace(/\s+\S*$/, '') + '…'; } -// Exclude the user's own company from the competitor table. matrix.json's userCompany.name -// wins; fall back to the --user-company CLI arg. Match case-insensitively against the -// competitor_name AND the slug so we catch "Browserbase" vs "browserbase.md". -const userNameLower = ((curatedMatrix && curatedMatrix.userCompany && curatedMatrix.userCompany.name) || userCompany || '').toLowerCase(); -const competitorRows = deduped.filter(c => { - const nameLower = (c.competitor_name || '').toLowerCase(); - const slugLower = (c.slug || '').toLowerCase(); - return !userNameLower || (nameLower !== userNameLower && slugLower !== userNameLower); -}); -// Rebuild metaLine now that we know the true competitor count (excluding the user's company). -metaLine = `${competitorRows.length} competitors · ${totalMentions} mentions · ${totalBenchmarks} benchmarks · ${genDate}`; - const tableRows = competitorRows.map(c => { const hasDetail = c.body && c.body.length > 50; const nameHtml = hasDetail @@ -376,7 +376,7 @@ function buildStrategicSummary() { const label = entry.name; const userHas = !!userFlags[label]; const whoElseHas = []; - for (const c of deduped) { + for (const c of competitorRows) { const compEntry = compMap[c.slug]; if (compEntry && compEntry[kind] && compEntry[kind][label]) whoElseHas.push(c.competitor_name); } @@ -503,7 +503,7 @@ const perCompetitorCss = ` footer a { color:var(--brand); text-decoration:none; font-weight:500; } `; -for (const c of deduped) { +for (const c of competitorRows) { if (!c.body || c.body.length < 50) continue; const mentionsHtml = c.mentions.length @@ -608,7 +608,7 @@ function buildMatrixAxisFromCurated(kind) { return curatedMatrix[kind].map(entry => { const label = entry.name; let count = 0; - for (const c of deduped) { + for (const c of competitorRows) { const compKey = curatedMatrix.competitors[c.slug]; if (compKey && compKey[kind] && compKey[kind][label]) count += 1; } @@ -618,7 +618,7 @@ function buildMatrixAxisFromCurated(kind) { function buildMatrixAxisFromPipes(field) { const counts = new Map(); - for (const c of deduped) { + for (const c of competitorRows) { for (const item of splitPipes(c[field])) { const key = item.toLowerCase(); if (!counts.has(key)) counts.set(key, { label: item, count: 0 }); @@ -652,10 +652,10 @@ function matrixSection(heading, axis, field) { // the sticky left column so users can scroll horizontally without losing context on wide tables. const header = `<tr> <th class="mx-feature-h">${escapeHtml(heading)}</th> - ${deduped.map(c => `<th class="mx-comp-h"><a href="competitors/${escapeHtml(c.slug)}.html">${escapeHtml(c.competitor_name)}</a></th>`).join('')} + ${competitorRows.map(c => `<th class="mx-comp-h"><a href="competitors/${escapeHtml(c.slug)}.html">${escapeHtml(c.competitor_name)}</a></th>`).join('')} </tr>`; const rows = axis.map(a => { - const cells = deduped.map(c => competitorHas(c, field, a.label) + const cells = competitorRows.map(c => competitorHas(c, field, a.label) ? `<td class="mx-cell mx-yes" title="${escapeHtml(c.competitor_name)} has ${escapeHtml(a.label)}">●</td>` : `<td class="mx-cell mx-no">·</td>`).join(''); return `<tr> @@ -671,7 +671,7 @@ function matrixSection(heading, axis, field) { </section>`; } -const pricingRows = deduped.map(c => `<tr><td style="font-weight:500;">${escapeHtml(c.competitor_name)}</td><td style="color:var(--muted);font-size:0.8125rem;">${escapeHtml(c.pricing_model || '')}</td><td style="font-size:0.8125rem;">${escapeHtml(c.pricing_tiers || '—')}</td><td style="font-size:0.8125rem;">${escapeHtml(c.target_customer || '')}</td></tr>`).join(''); +const pricingRows = competitorRows.map(c => `<tr><td style="font-weight:500;">${escapeHtml(c.competitor_name)}</td><td style="color:var(--muted);font-size:0.8125rem;">${escapeHtml(c.pricing_model || '')}</td><td style="font-size:0.8125rem;">${escapeHtml(c.pricing_tiers || '—')}</td><td style="font-size:0.8125rem;">${escapeHtml(c.target_customer || '')}</td></tr>`).join(''); const matrixHtml = `<!DOCTYPE html> <html lang="en"> @@ -886,7 +886,7 @@ const priority = [ 'target_customer', 'pricing_model', 'pricing_tiers', 'key_features', 'integrations', 'headquarters', 'founded', 'employee_estimate', 'funding_info', 'strategic_diff' ]; -const flatRows = deduped.map(c => { +const flatRows = competitorRows.map(c => { const row = {}; for (const k of Object.keys(c)) { if (['body', 'sections', 'mentions', 'benchmarks', 'slug', 'file'].includes(k)) continue; @@ -912,7 +912,7 @@ writeFileSync(join(dir, 'results.csv'), csvLines.join('\n') + '\n'); // ---------- Summary ---------- console.error(JSON.stringify({ - total: deduped.length, + total: competitorRows.length, mentions: totalMentions, benchmarks: totalBenchmarks, with_pricing: withPricing, @@ -921,7 +921,7 @@ console.error(JSON.stringify({ index: join(dir, 'index.html'), matrix: join(dir, 'matrix.html'), mentions: join(dir, 'mentions.html'), - competitors: deduped.filter(c => c.body && c.body.length > 50).length, + competitors: competitorRows.filter(c => c.body && c.body.length > 50).length, csv: join(dir, 'results.csv') } }, null, 2)); diff --git a/skills/competitor-analysis/scripts/extract_vs_names.mjs b/skills/competitor-analysis/scripts/extract_vs_names.mjs index cae3ce5..48243bc 100644 --- a/skills/competitor-analysis/scripts/extract_vs_names.mjs +++ b/skills/competitor-analysis/scripts/extract_vs_names.mjs @@ -63,12 +63,16 @@ for (const f of files) { // Build a lookup of hostname -> candidate root domain from all result URLs. // Used later to try to resolve "anchor" -> "anchorbrowser.io". +// Exclude any host whose root-base equals a seed name — otherwise a short extracted token +// like "browse" can match the user's own domain (browserbase.com). const hostMap = new Map(); for (const r of allResults) { if (!r.url) continue; try { const h = new URL(r.url).hostname.replace(/^www\./, ''); const root = h.split('.').slice(-2).join('.'); + const rootBase = root.split('.')[0]; + if (seedSet.has(rootBase)) continue; if (!hostMap.has(root)) hostMap.set(root, h); } catch {} } @@ -92,12 +96,33 @@ for (const r of allResults) { } // Try to resolve each name to a domain. +// Strategy: +// 1. Exact match on rootBase wins outright. +// 2. Otherwise allow rootBase.startsWith(needle) ONLY when the suffix is a known +// branding token (e.g. "anchor" → "anchorbrowser.io"). Bidirectional startsWith +// was too loose: "steel" matched steelhead.com, "browse" matched browserbase.com. +// 3. Among multiple suffix matches, prefer the shortest suffix (most specific — +// "anchor" should match "anchorbrowser" before "anchorbrowserlabs"). Deterministic. +const BRAND_SUFFIXES = ['browser','app','ai','io','hq','co','dev','tech','cloud','agent','agents','labs','lab']; + function resolveDomain(name) { const needle = name.replace(/\./g, ''); + let exact = null; + let bestSuffix = null; // { host, suffixLen } for (const [root, host] of hostMap.entries()) { const rootBase = root.split('.')[0]; - if (rootBase === needle || rootBase.startsWith(needle) || needle.startsWith(rootBase)) return host; + if (rootBase === needle) { exact = host; break; } + if (rootBase.length > needle.length && rootBase.startsWith(needle)) { + const suffix = rootBase.slice(needle.length).replace(/^[\-_]/, ''); + if (BRAND_SUFFIXES.includes(suffix)) { + if (!bestSuffix || suffix.length < bestSuffix.suffixLen) { + bestSuffix = { host, suffixLen: suffix.length }; + } + } + } } + if (exact) return exact; + if (bestSuffix) return bestSuffix.host; return null; } diff --git a/skills/competitor-analysis/scripts/gate_candidates.mjs b/skills/competitor-analysis/scripts/gate_candidates.mjs index 8017269..f7c7673 100644 --- a/skills/competitor-analysis/scripts/gate_candidates.mjs +++ b/skills/competitor-analysis/scripts/gate_candidates.mjs @@ -14,9 +14,16 @@ // { "url": "https://foo.com", "status": "PASS" | "REJECT" | "UNKNOWN", // "matched_includes": [...], "matched_excludes": [...], "title": "...", "hero": "..." } -import { execSync, spawnSync } from 'child_process'; +import { execFile } from 'child_process'; +import { promisify } from 'util'; import { readFileSync } from 'fs'; +// Async execFile so the worker pool actually parallelizes. spawnSync blocks the entire +// event loop, which silently turns --concurrency N into N=1 — every URL fetched serially +// regardless of the flag. With promisified execFile, N workers can wait on N pending +// `bb fetch` processes concurrently. +const execFileAsync = promisify(execFile); + const args = process.argv.slice(2); if (args.includes('--help') || args.includes('-h')) { @@ -123,36 +130,35 @@ function classify(title, heroFull, includes, excludes) { } async function gateOne(url) { + let stdout; try { - const proc = spawnSync('bb', ['fetch', '--allow-redirects', url], { - encoding: 'utf-8', + const r = await execFileAsync('bb', ['fetch', '--allow-redirects', url], { maxBuffer: 4 * 1024 * 1024, timeout: 20000, }); - if (proc.status !== 0) { - return { url, status: 'UNKNOWN', reason: 'bb fetch failed', matched_includes: [], matched_excludes: [], title: '', hero: '' }; - } - let resp; - try { resp = JSON.parse(proc.stdout); } catch { - return { url, status: 'UNKNOWN', reason: 'non-JSON response', matched_includes: [], matched_excludes: [], title: '', hero: '' }; - } - const html = resp.content || ''; - const titleM = html.match(/<title[^>]*>([^<]*)<\/title>/i); - const title = titleM ? titleM[1].trim() : ''; - const heroFull = stripHtml(html).slice(0, heroChars); - const c = classify(title, heroFull, includes, excludes); - return { - url, - status: c.status, - reason: c.reason, - matched_includes: c.matched_includes, - matched_excludes: c.matched_excludes, - title, - hero: heroFull.slice(0, 240), - }; + stdout = r.stdout; } catch (err) { - return { url, status: 'UNKNOWN', reason: err.message, matched_includes: [], matched_excludes: [], title: '', hero: '' }; + // Non-zero exit, timeout, or spawn failure all surface here. + return { url, status: 'UNKNOWN', reason: `bb fetch failed: ${err.message}`, matched_includes: [], matched_excludes: [], title: '', hero: '' }; } + let resp; + try { resp = JSON.parse(stdout); } catch { + return { url, status: 'UNKNOWN', reason: 'non-JSON response', matched_includes: [], matched_excludes: [], title: '', hero: '' }; + } + const html = resp.content || ''; + const titleM = html.match(/<title[^>]*>([^<]*)<\/title>/i); + const title = titleM ? titleM[1].trim() : ''; + const heroFull = stripHtml(html).slice(0, heroChars); + const c = classify(title, heroFull, includes, excludes); + return { + url, + status: c.status, + reason: c.reason, + matched_includes: c.matched_includes, + matched_excludes: c.matched_excludes, + title, + hero: heroFull.slice(0, 240), + }; } // Run with bounded concurrency From 37087d24f9c1df796f1947c47424a21dc24d138a Mon Sep 17 00:00:00 2001 From: Jay Sahnan <jay@browserbase.com> Date: Sat, 25 Apr 2026 23:42:08 +0100 Subject: [PATCH 23/23] bugbot fixes --- .../scripts/compile_report.mjs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/skills/competitor-analysis/scripts/compile_report.mjs b/skills/competitor-analysis/scripts/compile_report.mjs index f0d5576..dd30c06 100644 --- a/skills/competitor-analysis/scripts/compile_report.mjs +++ b/skills/competitor-analysis/scripts/compile_report.mjs @@ -238,7 +238,16 @@ function mdToHtml(md) { let text = escapeHtml(trimmed.slice(2)); text = text.replace(/\*\*\[(\w+)\]\*\*/g, '<span class="confidence $1">[$1]</span>'); text = text.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>'); - text = text.replace(/(https?:\/\/\S+)/g, '<a href="$1" target="_blank">$1</a>'); + text = text.replace(/(https?:\/\/\S+)/g, (_, raw) => { + let url = raw; + let trail = ''; + while (url && /[)\],.;:!?]$/.test(url)) { + trail = url.slice(-1) + trail; + url = url.slice(0, -1); + } + if (!url) return raw; + return `<a href="${url}" target="_blank">${url}</a>${trail}`; + }); out.push(`<li>${text}</li>`); continue; } @@ -605,11 +614,12 @@ for (const c of competitorRows) { function buildMatrixAxisFromCurated(kind) { if (!curatedMatrix || !curatedMatrix[kind]) return []; + const compMap = curatedMatrix.competitors || {}; return curatedMatrix[kind].map(entry => { const label = entry.name; let count = 0; for (const c of competitorRows) { - const compKey = curatedMatrix.competitors[c.slug]; + const compKey = compMap[c.slug]; if (compKey && compKey[kind] && compKey[kind][label]) count += 1; } return { label, count, description: entry.description || '' }; @@ -638,7 +648,8 @@ const integrationAxis = curatedMatrix function competitorHas(c, field, label) { // Curated mode: look up in matrix.json (field is 'features' or 'integrations'). if (curatedMatrix) { - const compEntry = curatedMatrix.competitors[c.slug]; + const compMap = curatedMatrix.competitors || {}; + const compEntry = compMap[c.slug]; return !!(compEntry && compEntry[field] && compEntry[field][label]); } // Fallback: raw pipe-split match.