From 985032abb554f55cee3a5bd6fa02ef8d62b3f37b Mon Sep 17 00:00:00 2001 From: Jay Sahnan Date: Fri, 3 Apr 2026 14:24:20 -0700 Subject: [PATCH 1/2] init commit --- skills/cold-outbound/SKILL.md | 287 +++++ .../cold-outbound/profiles/browserbase.json | 9 + skills/cold-outbound/profiles/example.json | 9 + .../references/email-templates.md | 125 ++ .../references/research-patterns.md | 179 +++ skills/cold-outbound/references/workflow.md | 243 ++++ skills/cold-outbound/scripts/bb_search.ts | 96 ++ .../cold-outbound/scripts/bb_smart_fetch.ts | 236 ++++ skills/cold-outbound/scripts/compile_csv.py | 169 +++ .../scripts/list_discovery_urls.py | 73 ++ .../cold-outbound/scripts/package-lock.json | 1047 +++++++++++++++++ skills/cold-outbound/scripts/package.json | 14 + skills/cold-outbound/scripts/write_batch.py | 42 + 13 files changed, 2529 insertions(+) create mode 100644 skills/cold-outbound/SKILL.md create mode 100644 skills/cold-outbound/profiles/browserbase.json create mode 100644 skills/cold-outbound/profiles/example.json create mode 100644 skills/cold-outbound/references/email-templates.md create mode 100644 skills/cold-outbound/references/research-patterns.md create mode 100644 skills/cold-outbound/references/workflow.md create mode 100644 skills/cold-outbound/scripts/bb_search.ts create mode 100644 skills/cold-outbound/scripts/bb_smart_fetch.ts create mode 100644 skills/cold-outbound/scripts/compile_csv.py create mode 100644 skills/cold-outbound/scripts/list_discovery_urls.py create mode 100644 skills/cold-outbound/scripts/package-lock.json create mode 100644 skills/cold-outbound/scripts/package.json create mode 100644 skills/cold-outbound/scripts/write_batch.py diff --git a/skills/cold-outbound/SKILL.md b/skills/cold-outbound/SKILL.md new file mode 100644 index 0000000..bbecd19 --- /dev/null +++ b/skills/cold-outbound/SKILL.md @@ -0,0 +1,287 @@ +--- +name: cold-outbound +description: | + Cold outbound lead generation skill for SDR prospecting at scale. Researches a + company's product and ICP, discovers target companies using Browserbase Search API, + deeply researches each using a Plan→Research→Synthesize pattern, scores ICP fit, + and generates personalized outbound emails — all compiled into a scored CSV. + Supports depth modes (quick/deep/deeper) for balancing scale vs intelligence. + Use when the user wants to: (1) generate outbound leads, (2) build a prospecting + list, (3) find companies matching an ICP, (4) create personalized cold emails at + scale, (5) do SDR research. Triggers: "outbound", "lead gen", "prospecting list", + "ICP leads", "cold email", "find companies to sell to", "SDR", "build a lead list", + "outbound campaign". +--- + +# Cold Outbound + +Generate enriched lead lists with personalized outbound emails. Uses Browserbase Search API for discovery, a deep research pattern for enrichment, and LLM-powered email personalization. + +**Required**: `BROWSERBASE_API_KEY` env var. + +**Scripts location**: `~/.claude/skills/cold-outbound/scripts/` +On first run, install dependencies: `npm install --prefix ~/.claude/skills/cold-outbound/scripts` + +**Path rules**: Always use the full literal path in all Bash commands — NOT `~` or `$HOME` (both trigger "shell expansion syntax" approval prompts). Resolve the home directory once and use it everywhere (e.g., `/Users/jay/.claude/skills/cold-outbound/...`). When constructing subagent prompts, replace `{SKILL_DIR}` with the full literal path. When writing files (like profiles), use the Write tool with the full expanded path. + +**CRITICAL — Tool restrictions (applies to main agent AND all subagents)**: +- All web searches: use `bb_search.ts`. NEVER use WebSearch. +- All page fetches: use `bb_smart_fetch.ts`. NEVER use WebFetch. +- All file writes to /tmp/: use `write_batch.py` (pipe JSON into it). NEVER use the Write tool or `python3 -c` — both trigger security prompts. +- All CSV compilation: use `compile_csv.py`. NEVER write inline `python3 -c` to process, merge, or compile batch files — it triggers security prompts AND causes bugs (inconsistent key names across batches). The bundled scripts handle normalization. +- Do NOT write ad-hoc scripts to parse, merge, or deduplicate JSON batch files — the bundled scripts handle this. +- **Subagents must use ONLY the Bash tool. No other tools allowed.** This is non-negotiable — WebFetch, WebSearch, Write, Read, Glob, and Grep all trigger permission prompts that interrupt the user. +- **Main agent NEVER reads raw JSON batch files.** After enrichment subagents complete, go straight to `compile_csv.py`. For discovery URL lists, use `list_discovery_urls.py`. + +## Pipeline Overview + +Follow these 8 steps in order. Do not skip steps or reorder. + +1. **Company Research** — Discover the user's product, ICP, and pitch angle +2. **Depth Mode Selection** — Choose research depth based on lead count +3. **Micro-Vertical Generation** — Expand ICP into diverse search queries +4. **Output Schema Design** — Define CSV columns with user input +5. **Batch Discovery** — Subagents search for target companies in parallel +6. **Deep Research & Enrichment** — Subagents research each company, score ICP fit (NO emails yet) +7. **Contact Discovery** — Find decision makers at high-fit companies +8. **Email Generation + CSV Compilation** — Write personalized emails with full context, compile final CSV + +--- + +## Step 1: Deep Company Research + Vertical Scoping + +This is the most important step. The quality of everything downstream depends on deeply understanding the user's company AND the specific vertical they want to target. + +**If the user specifies a target vertical** (e.g., "sell to UI testing companies"), run a quick research on that vertical too: +- Search: `bb_search.ts --query "{vertical} companies landscape types"` +- Use the `AskUserQuestion` tool to ask clarifying questions as checkboxes — NOT as a text wall. Combine all questions into a single AskUserQuestion call with multiple questions. Example: + - Question 1 (multiSelect: true): "Which segments?" with options like "E2E testing platforms", "Visual regression tools", "Cross-browser testing", "AI-powered testing" + - Question 2: "Company stage?" with options like "Startups", "Mid-market", "Enterprise", "All" + - Question 3: "How many leads / depth?" with options like "Quick (100+)", "Deep (25-50)", "Deeper (<25)" +- This is the ONLY user interaction after profile confirmation. Fold answers into ICP and sub-verticals, then execute Steps 3-7 silently. +- Do NOT save vertical targeting answers to the profile. These are per-run decisions held in memory only. The profile only stores company facts (product, customers, competitors, use cases). + +If the user doesn't specify a vertical, derive sub-verticals from the company research. Still use AskUserQuestion for depth mode selection. + +**Profiles directory**: `~/.claude/skills/cold-outbound/profiles/` +A blank template (`example.json`) ships with the skill. Completed profiles persist across sessions. + +1. Ask the user for their company name or URL + +2. **Check for an existing profile**: + - List files in `~/.claude/skills/cold-outbound/profiles/` (ignore `example.json`) + - If a matching profile exists → load it, present to user: "I have your profile from {researched_at}. Still accurate?" If yes → skip to Step 2. If changes needed → edit fields and re-save. + - If no profile exists → proceed with deep research below. After confirmation, save to `profiles/{company-slug}.json` (copy structure from `example.json`). + - To add a new company later, the user just says "outbound for {new company}" and a new profile is created. + +3. **Run a full deep research on the user's company** using the Plan→Research→Synthesize pattern. + See `references/research-patterns.md` for sub-question templates, research loop rules, and synthesis instructions. + + **Key research steps:** + - Search: `bb_search.ts --query "{company name}" --num 10` + - Fetch homepage: `bb_smart_fetch.ts --url "{company website}"` + - **Discover site pages via sitemap** (do NOT hardcode paths like `/about` or `/customers`): + 1. `bb_smart_fetch.ts --url "{company website}/sitemap.xml" --raw` — primary source, has ALL pages + 2. Scan for URLs with keywords: `customer`, `case-stud`, `pricing`, `about`, `use-case`, `industry`, `solution` + 3. Optionally also fetch `/llms.txt --raw` for page descriptions (bonus context, often incomplete) + 4. Pick 3-5 most relevant URLs and fetch those (without `--raw`) + - Search for external context and competitors + - Accumulate findings with confidence levels + + **Synthesize into a profile** (about the COMPANY, not a specific vertical): + Company, Product, Existing Customers, Competitors, Use Cases. + Do NOT include ICP, pitch angle, or sub-verticals — those are per-run targeting decisions. + +4. Present the profile to the user for confirmation. Ask: "Does this capture your company accurately?" + + The user may adjust any field. Do not proceed until confirmed. + +5. **Save the confirmed profile** to `~/.claude/skills/cold-outbound/profiles/{company-slug}.json`: + ```json + { + "company": "Browserbase", + "website": "https://www.browserbase.com", + "product": "Cloud browser infrastructure for AI agents...", + "existing_customers": ["Firecrawl", "Ramp", "..."], + "competitors": ["Browserless", "Apify", "..."], + "use_cases": ["AI agent browser access", "web scraping", "E2E testing", "data extraction", "..."], + "researched_at": "2026-03-17" + } + ``` + This profile persists across sessions. Next time the user runs the skill for the same company, it loads instantly. + +If the user provides detailed company info directly, still run 2-3 searches to fill gaps (competitors, customer types, use cases) before confirming and saving. + +## Step 2: Depth Mode Selection + +Ask the user how many leads they want and recommend a depth mode: + +| Mode | Research per company | Best for | Default when | +|------|---------------------|----------|--------------| +| `quick` | Homepage + 1-2 searches | 100+ leads, broad discovery | User asks for 100+ leads | +| `deep` | 2-3 sub-questions, 5-8 tool calls | 25-50 leads, quality enrichment | User asks for 25-100 leads | +| `deeper` | 4-5 sub-questions, 10-15 tool calls | 10-25 leads, full intelligence | User asks for <25 leads | + +The user can override. Combine Step 1 profile confirmation and depth mode into a single prompt: +- Show the saved profile (or new research) +- Ask: "Still accurate? How many leads / what depth?" +- Once the user responds, go. No more questions. No status narration. Just execute Steps 3-7 silently and deliver results at the end. + +## Step 3: Micro-Vertical Generation + +Expand the confirmed ICP into search queries. Use the sub-verticals from the company profile to guide query generation. + +**Formula**: `ceil(requested_leads / 35)` micro-verticals needed. Over-discover by ~2-3x because filtering (competitors, existing customers, poor fits) typically drops 50-70% of discovered companies. + +Generate search queries with these patterns: +- Industry + company stage + geography ("fintech startups series A Bay Area") +- Technology stack + use case ("companies using Selenium for web scraping") +- Competitor adjacency ("alternatives to {known company in ICP}") +- Buyer persona + pain point ("engineering teams struggling with browser automation") + +Each query: 4-8 descriptive keywords, non-overlapping with other queries. Proceed immediately — do not ask the user to approve queries. + +## Step 4: Output Schema Design (auto) + +Use default columns plus enrichment fields that make sense for the ICP. Do not ask the user to pick columns — use sensible defaults: + +| Column | Description | +|--------|-------------| +| `company_name` | Company name (5 words max) | +| `website` | Homepage URL | +| `product_description` | What they do (12 words max) | +| `icp_fit_score` | 1-10 integer | +| `icp_fit_reasoning` | Why this score, referencing specific findings (20 words max) | +| `personalized_email` | Ready-to-send email draft | + +Auto-select enrichment fields based on ICP context. Always include `industry` and `key_features`. Add `employee_estimate`, `funding_info`, `target_audience` when relevant. Do not ask the user to pick — just use sensible defaults and proceed. + +## Step 5: Batch Discovery + +Launch subagents to run search queries in parallel. See `references/workflow.md` for subagent prompt templates, batch JSON schemas, and wave management rules. + +**Process**: +1. Launch ALL discovery subagents at once (up to ~6 per single message, using multiple Agent tool calls in one message for parallelism) +2. Each subagent runs one query using ONLY Bash: + ```bash + npx tsx ~/.claude/skills/cold-outbound/scripts/bb_search.ts --query "{query}" --num 25 --output /tmp/cold_discovery_batch_{N}.json + ``` +3. Subagents report back counts only — no raw data in main context +4. If more than 6 subagents needed, launch next wave of ~6 after current wave completes +5. After all waves complete, run `list_discovery_urls.py` to get deduplicated URLs: + ```bash + python3 ~/.claude/skills/cold-outbound/scripts/list_discovery_urls.py /tmp + ``` +6. Use the output (one URL per line) to build the enrichment assignment list — do NOT read or parse batch JSON files yourself + +## Step 6: Deep Research & Enrichment + +This is the core intelligence step. Each company is researched using a **Plan → Research → Synthesize** pattern, adapted from deep research methodology. + +Launch subagents to research companies in parallel. See `references/workflow.md` for the enrichment subagent prompt template. See `references/research-patterns.md` for the full research methodology: sub-question templates, finding format, research loop rules, and synthesis instructions. + +**Important**: Enrichment subagents do NOT write emails. They only research, score ICP fit, and fill enrichment fields. Emails are written later in Step 8 after contacts are found. + +**Process**: +1. Use the URL list from `list_discovery_urls.py` output (Step 5) — do NOT read batch JSON files yourself +2. Split URLs into groups per subagent (size depends on depth mode — see `references/workflow.md`) +3. Launch ALL enrichment subagents at once (up to ~6 per single message, using multiple Agent tool calls in one message for parallelism) +4. Each subagent uses ONLY Bash — for each company: + + **Phase A — Plan** (skip in quick mode): + Decompose what needs to be known into 2-5 sub-questions based on ICP and enrichment fields. + + **Phase B — Research Loop**: + For each sub-question, search and fetch relevant pages, extract findings with confidence levels. Accumulate findings, respecting the step budget for the current depth mode. + + **Phase C — Synthesize**: + From all accumulated findings: score ICP fit 1-10 with evidence-based reasoning, fill all enrichment fields. Do NOT write emails yet — that happens in Step 8. + +5. Subagents write results to `/tmp/cold_enrichment_batch_{N}.json` using `write_batch.py` (NEVER `python3 -c` or the Write tool) +6. Subagents report back counts only — findings count + success rate +7. After ALL enrichment subagents complete, proceed to Step 7 + +**Critical**: Include the confirmed ICP description and pitch angle verbatim in every subagent prompt for consistent scoring. + +## Step 7: Contact Discovery + +Automatically find decision makers at high-fit companies. See `references/workflow.md` for the contact discovery subagent prompt template. + +**Process**: +1. Run `compile_csv.py` with `--no-cleanup` to deduplicate enrichment results (keeps batch files for Step 8): + ```bash + cd ~/Desktop && python3 ~/.claude/skills/cold-outbound/scripts/compile_csv.py /tmp "{company_name}" "{YYYY-MM-DD}" --no-cleanup + ``` +2. Show a quick interim summary (lead count, score distribution, top 10 by ICP score) so the user sees progress +3. Filter for companies with icp_fit_score >= 8 +4. **Pick 3-5 target titles** based on the sender's product and who the buyer would be: + - Selling dev tools/docs → Head of DevRel, Developer Advocate, VP Engineering + - Selling security → CISO, Head of Security, VP Engineering + - Selling infrastructure → CTO, VP Engineering, Head of Platform + - Selling to early-stage startups → Founder, CEO, CTO (small teams = founders decide) + - Selling marketing/GTM tools → VP Marketing, Head of Growth, CMO +5. Group companies into batches of ~6 +6. Launch contact discovery subagents in parallel (up to ~6 per message). Each subagent uses ONLY Bash: + - Search: `bb_search.ts --query "{company name} {target title} LinkedIn"` for each relevant title + - Search: `bb_search.ts --query "{company name} team leadership"` + - Extract names, titles, LinkedIn URLs from search results + - Estimate email using `first@company.com` pattern + - See `references/workflow.md` for the contact discovery subagent prompt template +7. After all contact subagents complete, present a **full contact table**: + +``` +| Company (Score) | Contact | Title | Email (estimated) | LinkedIn | +|-----------------|---------|-------|--------------------|----------| +| Baseten (9) | Philip Kiely | Head of DevRel | philip@baseten.co | link | +| ... | ... | ... | ... | ... | +``` + +Then proceed immediately to Step 8. + +## Step 8: Email Generation + CSV Compilation + +Now that we have company research, ICP scores, AND contact info — write emails once with the full picture. See `references/email-templates.md` for email structure, personalization signals, examples, and anti-patterns. + +**Process**: +1. Launch email generation subagents in parallel (up to ~6 per message). Each subagent uses ONLY Bash. For each company, the subagent has: + - All enrichment data (product, industry, ICP score, findings) + - Contact info (name, title) if found + - Sender's company profile and pitch angle +2. Each email should: + - Address the contact by first name ("Hi Philip,") — or "Hi team," if no contact found + - Reference the contact's role where relevant ("As Head of DevRel, you know...") + - Use the richest research findings for personalization (not generic) + - Follow the rules in `references/email-templates.md` +3. Subagents write updated results (with emails + contact columns) to `/tmp/cold_final_batch_{N}.json` +4. Re-run `compile_csv.py` to produce the final CSV with all columns: + ```bash + cd ~/Desktop && python3 ~/.claude/skills/cold-outbound/scripts/compile_csv.py /tmp "{company_name}" "{YYYY-MM-DD}" + ``` +5. Present the final results: + +``` +## Outbound Lead List Complete + +- **Total leads**: {count} +- **With contacts found**: {count} +- **Depth mode**: {mode} +- **Score distribution**: + - High fit (8-10): {count} + - Medium fit (5-7): {count} + - Low fit (1-4): {count} +- **Output file**: ~/Desktop/{filename} +``` + +6. Show the **top 10 leads** with contacts in a table: + +``` +| Company | Score | Contact | Title | Product | Fit Reasoning | +|---------|-------|---------|-------|---------|---------------| +| Baseten | 9 | Philip Kiely | Head of DevRel | ML inference platform | $150M Series D, docs need... | +``` + +7. Show 3-5 sample personalized emails so the user can see the quality + +**Note**: Email addresses are estimated using common patterns (first@company.com). Recommend verifying through Apollo.io, Hunter.io, or LinkedIn Sales Navigator before sending. + +Offer to filter the CSV, regenerate emails for specific companies, or search for additional contacts at lower-scored companies. diff --git a/skills/cold-outbound/profiles/browserbase.json b/skills/cold-outbound/profiles/browserbase.json new file mode 100644 index 0000000..12105a7 --- /dev/null +++ b/skills/cold-outbound/profiles/browserbase.json @@ -0,0 +1,9 @@ +{ + "company": "Browserbase", + "website": "https://www.browserbase.com", + "product": "Cloud browser infrastructure for AI agents and web automation. Run Playwright, Puppeteer, and Selenium at scale with stealth mode, CAPTCHA solving, session persistence, residential proxies, and debugging tools. Products include Browserbase (headless infra), Stagehand (browser automation SDK), and Director (workflow builder). Also offers MCP browser tool and Computer Use Agent support.", + "existing_customers": ["Firecrawl", "Ramp", "Exa", "Reducto", "Cerebras", "Cartesia", "Extend", "Polymarket"], + "competitors": ["Browserless", "Apify", "Scrapfly", "Surfsky", "BrowserTree", "Hyperbrowser", "Anchor Browser"], + "use_cases": ["AI agent browser access", "web scraping and data extraction", "automated testing", "form filling", "document downloading", "price monitoring", "lead research", "computer use agents"], + "researched_at": "2026-03-18" +} diff --git a/skills/cold-outbound/profiles/example.json b/skills/cold-outbound/profiles/example.json new file mode 100644 index 0000000..ae469f5 --- /dev/null +++ b/skills/cold-outbound/profiles/example.json @@ -0,0 +1,9 @@ +{ + "company": "", + "website": "", + "product": "", + "existing_customers": [], + "competitors": [], + "use_cases": [], + "researched_at": "" +} diff --git a/skills/cold-outbound/references/email-templates.md b/skills/cold-outbound/references/email-templates.md new file mode 100644 index 0000000..797a479 --- /dev/null +++ b/skills/cold-outbound/references/email-templates.md @@ -0,0 +1,125 @@ +# Cold Outbound — Email Templates Reference + +## Email Structure + +Every outbound email follows this structure: + +**Subject line**: Specific, references their company or product. Never generic ("Quick question" is bad). +Example: "Thought on {Company}'s {specific feature/challenge}" + +**Body** (100-150 words, 3-4 short paragraphs): + +1. **Opening** (1-2 sentences): Reference something specific from their website — a feature they ship, a blog post, a recent launch, a job posting. Show you actually looked. + +2. **Bridge** (2-3 sentences): Connect their situation to the sender's value prop. Use the confirmed pitch angle. Frame as "companies like yours" or "teams building X often need Y." + +3. **Ask** (1 sentence): Soft CTA. "Would a 15-min call make sense to explore this?" Never "buy now" or "schedule a demo." + +4. **Sign-off**: First name only. No title dumps. + +## Personalization Signals + +When enriching a company, look for these on their website to fuel personalization: + +| Signal | Where to find | How to use | +|--------|---------------|------------| +| What they sell | Homepage hero, product page | Opening + bridge | +| Recent launches | Blog, changelog, press page | Opening hook | +| Hiring signals | Careers page, job boards | "I noticed you're scaling your X team" | +| Tech stack | Docs, job descriptions, GitHub | Bridge to technical pitch | +| Customer base | Case studies, logos section | "Working with companies like {their customer}" | +| Pain indicators | Pricing page (complexity), docs (workarounds) | Bridge to how you solve it | +| Growth signals | New markets, new features, funding news | Opening or bridge | + +## Examples + +### Example 1: SaaS company selling to e-commerce + +**Context extracted**: +- Company: CartFlow +- Product: Checkout optimization for Shopify stores +- Recent: Launched A/B testing feature last month +- Stack: React, Node.js, Shopify API + +**Sender pitch angle**: "We help companies automate browser-based testing and monitoring" + +**Email**: +``` +Subject: CartFlow's new A/B testing — monitoring at scale? + +Hi Sarah, + +Saw CartFlow just shipped A/B testing for checkout flows — congrats. That's a big surface area to keep reliable across Shopify's theme ecosystem. + +Teams running checkout experiments at scale often hit a wall with monitoring — catching layout breaks, payment form regressions, or slow renders before customers do. We help companies like yours run automated browser checks across hundreds of store configurations without building the infra in-house. + +Would a 15-min call make sense to see if this fits where CartFlow is headed? + +Best, +Alex +``` + +### Example 2: Data analytics startup + +**Context extracted**: +- Company: InsightPipe +- Product: Real-time analytics for marketing teams +- Recent: Hiring 3 data engineers +- Customers: Mid-market DTC brands + +**Sender pitch angle**: "We provide reliable web data collection infrastructure" + +**Email**: +``` +Subject: InsightPipe's data pipeline — web sources? + +Hi Marcus, + +Noticed InsightPipe is scaling the data engineering team — makes sense given the push into real-time analytics for DTC brands. + +A challenge we hear from analytics companies: reliably collecting web data (pricing, inventory, ad placements) at the frequency your customers need without getting blocked or managing proxy infrastructure. We handle the browser infrastructure side so your team can focus on the analytics layer. + +Worth a quick chat to see if web data collection is on your roadmap? + +Best, +Alex +``` + +### Example 3: AI company + +**Context extracted**: +- Company: AgentKit +- Product: Framework for building AI agents +- Recent: Open-sourced their core library +- Stack: Python, LangChain + +**Sender pitch angle**: "We give AI agents reliable browser access" + +**Email**: +``` +Subject: Browser access for AgentKit agents + +Hi Priya, + +AgentKit's agent framework is impressive — especially the open-source move. One pattern we see in agent builders: giving agents reliable browser access (navigating, extracting, filling forms) without the pain of managing headless Chrome at scale. + +We provide managed browser infrastructure specifically for AI agents — handles anti-bot detection, session management, and scales with your users' workloads. Several agent frameworks have integrated us as their default browser layer. + +Would it be useful to chat about how this could plug into AgentKit? + +Best, +Alex +``` + +## Anti-Patterns + +Avoid these in generated emails: + +- **Generic opener**: "I came across your company and was impressed" — says nothing specific +- **Feature dump**: Listing 5+ features instead of connecting to their need +- **Multiple CTAs**: "Book a demo, check our docs, or reply here" — pick one +- **Over 200 words**: SDR emails get skimmed, not read +- **Mentioning competitors by name**: "Unlike {competitor}..." — unprofessional in cold outreach +- **Fake familiarity**: "Hope you're having a great week!" — transparent filler +- **Title/credential stuffing** in sign-off: "Alex Johnson, Senior Account Executive, ABC Corp, MBA, PMP" — just first name +- **Apologetic tone**: "Sorry to bother you" — undermines the value you're offering diff --git a/skills/cold-outbound/references/research-patterns.md b/skills/cold-outbound/references/research-patterns.md new file mode 100644 index 0000000..ee0ec97 --- /dev/null +++ b/skills/cold-outbound/references/research-patterns.md @@ -0,0 +1,179 @@ +# Cold Outbound — Deep Research Patterns + +## Overview + +This reference defines two research contexts: +1. **Self-Research** (Step 1) — Deep research on the user's own company to build a strong ICP foundation +2. **Target Research** (Step 6) — Research each discovered company using Plan→Research→Synthesize + +Both use the same 3-phase pattern but with different sub-questions and goals. + +## Self-Research (User's Company) + +This is the most important research in the pipeline. Every downstream decision depends on it. + +### Sub-Questions +- "What does {company} sell and what specific problem does it solve?" +- "Who are {company}'s existing customers? What industries, company sizes, and use cases?" +- "Who are {company}'s competitors and what differentiates them?" +- "What pricing model does {company} use and who is the typical buyer persona?" +- "What use cases and pain points does {company}'s marketing emphasize?" + +### Page Discovery +Discover site pages dynamically — do NOT hardcode paths like `/about` or `/customers`: +1. Fetch `bb_smart_fetch.ts --url "{company website}/sitemap.xml" --raw` — primary source, has ALL pages +2. Scan sitemap URLs for keywords: `customer`, `case-stud`, `pricing`, `about`, `use-case`, `blog`, `docs`, `industry`, `solution` +3. Optionally fetch `bb_smart_fetch.ts --url "{company website}/llms.txt" --raw` for page descriptions +4. Pick the 3-5 most relevant URLs from the sitemap and fetch those (without `--raw`) +5. Sitemap is the source of truth. llms.txt is bonus context but often incomplete. +6. The `--raw` flag outputs plain text instead of structured JSON — essential for reading sitemap.xml and llms.txt + +### External Research +- Search: `"{company} customers use cases reviews"` +- Search: `"{company} alternatives competitors vs"` +- Fetch 1-2 of the most informative third-party results (G2, blog posts, comparisons) + +### Synthesis Output +From all findings, produce a company profile: +- **Company**: name +- **Product**: what they sell, how it works, key capabilities (2-3 sentences, specific) +- **Existing Customers**: named customers or customer types found +- **Competitors**: who they compete with, key differentiators +- **Use Cases**: broad list of use cases the product serves (NOT tied to one vertical) + +Do NOT include ICP, pitch angle, or sub-verticals in the profile. Those are per-run targeting decisions made in Step 2 after the profile is confirmed. The profile is a general-purpose company fact sheet that works regardless of which vertical you target next. + +### Why This Matters +A thin profile produces generic search queries, weak lead scoring, and cookie-cutter emails. A rich profile with specific customers, competitors, and use cases produces targeted queries, accurate scoring, and emails that reference real pain points. + +--- + +## Target Company Research (Step 6) + +### Sub-Question Templates + +Generate sub-questions from these categories based on the ICP and enrichment fields requested. Not every category applies to every company — pick the most relevant. + +### Priority 1 (Always ask) +- **Product/Market**: "What does {company} sell and who are their customers?" +- **ICP Fit**: "How does {company}'s product/market relate to {sender's ICP description}?" + +### Priority 2 (Ask in deep/deeper) +- **Tech Stack**: "What technologies, frameworks, or infrastructure does {company} use?" +- **Growth Signals**: "Has {company} raised funding, launched products, or expanded recently?" +- **Pain Points**: "What challenges might {company} face that {sender's product} addresses?" + +### Priority 3 (Ask in deeper only) +- **Decision Makers**: "Who leads engineering, product, or growth at {company}?" +- **Competitive Landscape**: "Who are {company}'s competitors and how are they differentiated?" +- **Customers/Case Studies**: "Who are {company}'s notable customers and what results do they highlight?" + +### Search Query Patterns + +For each sub-question, generate 2-3 search query variations: + +``` +# Product/Market +"{company name} what they do" +"{company name} product features customers" + +# Tech Stack +"{company name} tech stack engineering blog" +"{company name} careers software engineer" (job posts reveal stack) + +# Growth Signals +"{company name} funding round 2025 2026" +"{company name} launch announcement" +"{company name} hiring" + +# Pain Points +"{company name} challenges {relevant domain}" +"{company name} {problem sender solves}" + +# Decision Makers +"{company name} VP engineering CTO LinkedIn" +"{company name} head of growth product" +``` + +## Finding Format + +Each finding is a self-contained factual statement tied to a source: + +```json +{ + "subQuestion": "What does Acme sell and who are their customers?", + "fact": "Acme provides checkout optimization for Shopify stores, serving mid-market DTC brands with $5M-$50M revenue", + "sourceUrl": "https://acme.com/about", + "sourceTitle": "About Acme - Checkout Optimization", + "confidence": "high" +} +``` + +**Confidence levels**: +- `high`: Directly stated on the company's own website or official press +- `medium`: Inferred from job postings, third-party articles, or indirect signals +- `low`: Speculative based on industry/category, or from outdated sources + +## Research Loop Rules + +1. **Process sub-questions by priority** — Priority 1 first, then 2, then 3 +2. **3-5 findings per sub-question, then move on** — Don't exhaust a topic +3. **Use parallel tool calls** — Search multiple queries simultaneously when possible +4. **Rephrase, don't retry** — If a search returns poor results, try different keywords +5. **Fetch selectively** — Don't fetch every URL from search results. Pick the 1-2 most relevant based on title and URL +6. **Stop at step limit** — Respect the depth mode's step budget per company +7. **Homepage first** — Always fetch the company's homepage before branching to other pages +8. **Deduplicate findings** — Don't record the same fact twice from different sources + +## Depth Mode Behavior + +### Quick Mode (100+ leads) +- **Skip Phase A** — No sub-question decomposition +- **Phase B**: Fetch the company homepage. Run 1-2 supplementary searches if homepage data is thin. +- **Phase C**: Extract available data, score ICP, write email from what's available +- **Budget**: 2-3 total tool calls per company +- **Trade-off**: Fast and cheap, but emails may be less personalized + +### Deep Mode (25-50 leads) +- **Phase A**: Decompose into 2-3 sub-questions (Priority 1 + selected Priority 2) +- **Phase B**: For each sub-question, run 2-3 searches + fetch 1-2 URLs. Target 3-5 findings per sub-question. +- **Phase C**: Synthesize from all findings. ICP reasoning references specific evidence. Email uses the most specific/compelling finding. +- **Budget**: 5-8 total tool calls per company +- **Trade-off**: Good balance of depth and scale + +### Deeper Mode (10-25 leads) +- **Phase A**: Decompose into 4-5 sub-questions (Priority 1 + 2 + selected Priority 3) +- **Phase B**: Research exhaustively. Fetch multiple pages per company (homepage, about, blog, careers, product pages). Target 3-5 findings per sub-question. +- **Phase C**: Synthesize with cited evidence. ICP reasoning is detailed. Email references multiple specific signals. +- **Budget**: 10-15 total tool calls per company +- **Trade-off**: High quality intelligence, but slow and expensive + +## Synthesis Instructions + +After the research loop completes for a company, synthesize findings into the output record: + +### ICP Scoring +Score 1-10 using ALL accumulated findings as evidence: +- **8-10**: Strong match. Multiple high-confidence findings confirm right industry, company stage, and clear pain point alignment. The pitch angle directly addresses a visible need supported by evidence. +- **5-7**: Partial match. Some findings suggest relevance but key signals are missing or low-confidence. Adjacent industry or unclear pain point. +- **1-4**: Weak match. Findings indicate wrong segment, too large/small, or no apparent connection to sender's product. + +Write `icp_fit_reasoning` referencing specific findings: "Series A fintech (from Crunchbase), uses Selenium for scraping (from job posting), expanding to EU market (from blog) — strong fit for browser infrastructure." + +### Email Personalization +Use the **richest, most specific** findings for email context: +- Opening: Use the most concrete finding (a specific product feature, a recent launch, a job posting) +- Bridge: Connect a finding about their challenges/stack to the sender's pitch angle +- If only low-confidence findings exist, keep the email shorter and more general — don't fabricate specificity + +### Enrichment Fields +Map findings to enrichment fields: +- `product_description` → from Product/Market findings +- `industry` → inferred from Product/Market +- `employee_estimate` → from LinkedIn search or careers page findings +- `funding_info` → from Growth Signals findings +- `headquarters` → from company homepage or about page +- `target_audience` → from Product/Market findings +- `key_features` → from product page findings + +If a field has no supporting findings, leave it empty rather than guessing. diff --git a/skills/cold-outbound/references/workflow.md b/skills/cold-outbound/references/workflow.md new file mode 100644 index 0000000..c8ab2ba --- /dev/null +++ b/skills/cold-outbound/references/workflow.md @@ -0,0 +1,243 @@ +# Cold Outbound — Workflow Reference + +## Discovery Batch JSON Schema + +File: `/tmp/cold_discovery_batch_{N}.json` + +```json +[ + { "url": "https://example.com", "title": "Example Corp", "author": null, "publishedDate": null }, + ... +] +``` + +Output of `bb_search.ts --output`. Array of search results. Each subagent produces one file. + +## Enrichment Batch JSON Schema + +File: `/tmp/cold_enrichment_batch_{N}.json` + +**CRITICAL: Use these exact field names.** Inconsistent keys across batches (e.g., `company` vs `company_name`) break compile_csv.py. + +```json +[ + { + "company_name": "Acme Inc", + "website": "https://acme.com", + "product_description": "AI-powered inventory management for e-commerce brands", + "industry": "E-commerce / SaaS", + "target_audience": "Mid-market e-commerce brands", + "key_features": ["demand forecasting", "automated reordering", "multi-warehouse sync"], + "icp_fit_score": 8, + "icp_fit_reasoning": "Series A e-commerce SaaS, uses Selenium for scraping, expanding to EU — strong fit", + "employee_estimate": "50-100", + "funding_info": "Series A, $12M", + "headquarters": "San Francisco, CA" + } +] +``` + +## Discovery Subagent Prompt Template + +``` +You are a lead discovery subagent. Run search queries and save results. + +TOOL RULES — CRITICAL, FOLLOW EXACTLY: +1. You may ONLY use the Bash tool. No exceptions. +2. All searches: Bash → npx tsx {SKILL_DIR}/scripts/bb_search.ts ... +3. BANNED TOOLS (these trigger permission prompts that break the flow): + - WebFetch — BANNED + - WebSearch — BANNED + - Write — BANNED + - Read — BANNED (for URLs; use bb_smart_fetch.ts) + - Glob, Grep — BANNED + If you use ANY banned tool, the entire run fails. Use ONLY Bash. +4. NEVER use ~ or $HOME in paths — they trigger "shell expansion" approval prompts. Use the full literal path provided in {SKILL_DIR}. + +TASK: +Run the following search queries using bb_search.ts and save results directly via --output: + +{for each query} +npx tsx {SKILL_DIR}/scripts/bb_search.ts --query "{query}" --num 25 --output /tmp/cold_discovery_batch_{batch_id}.json +{end for} + +After each search completes, report back ONLY the count of results found. +Do NOT analyze, summarize, or return the actual results. + +Example response: "Batch 1: 23 results. Batch 2: 25 results. Batch 3: 18 results." +``` + +## Research & Enrichment Subagent Prompt Template + +``` +You are a lead research & enrichment subagent. For each company URL, research the company using a 3-phase pattern and score ICP fit. Do NOT write emails — that happens later in Step 8 after contacts are found. + +CONTEXT: +- Sender's company: {sender_company} +- Sender's product: {sender_product} +- ICP description: {icp_description} +- Pitch angle: {pitch_angle} +- Depth mode: {depth_mode} +- Output schema columns: {columns} + +URLS TO PROCESS: +{url_list} + +TOOL RULES — CRITICAL, FOLLOW EXACTLY: +1. You may ONLY use the Bash tool. No exceptions. +2. All searches: Bash → npx tsx {SKILL_DIR}/scripts/bb_search.ts --query "..." --num 10 +3. All fetches: Bash → npx tsx {SKILL_DIR}/scripts/bb_smart_fetch.ts --url "..." +4. All file writes: Bash → pipe JSON into {SKILL_DIR}/scripts/write_batch.py (NEVER python3 -c) +5. BANNED TOOLS (these trigger permission prompts that break the flow): + - WebFetch — BANNED + - WebSearch — BANNED + - Write — BANNED (use Bash to write files) + - Read — BANNED (for URLs; use bb_smart_fetch.ts) + - Glob, Grep — BANNED + If you use ANY banned tool, the entire run fails. Use ONLY Bash. +6. NEVER use ~ or $HOME in paths — they trigger "shell expansion" approval prompts. Use the full literal path provided in {SKILL_DIR}. + +RESEARCH PATTERN (per company): +Follow the 3-phase deep research pattern from references/research-patterns.md. + +Phase A — Plan (skip in quick mode): +Decompose what you need to know into sub-questions based on ICP and enrichment fields. + +Phase B — Research Loop: +For each sub-question (or just the homepage in quick mode): +1. Run bb_search.ts with relevant query +2. Pick 1-2 most relevant URLs from results +3. Run bb_smart_fetch.ts on selected URLs +4. Smart page discovery: if you need deeper info on a company, try fetching their /llms.txt or /sitemap.xml first to find pages like customer stories, case studies, solutions pages — don't guess paths +5. Extract findings: factual statements with source, confidence level +6. Accumulate findings, move to next sub-question +7. Respect step budget: quick=2-3 calls, deep=5-8, deeper=10-15 + +Phase C — Synthesize: +From accumulated findings: +1. Score ICP fit 1-10 (see rubric below) +2. Fill enrichment fields from findings +3. Reference specific findings in icp_fit_reasoning +4. Do NOT write emails — that happens in Step 8 after contacts are discovered + +ICP SCORING RUBRIC: +- 8-10: Strong match. Multiple high-confidence findings confirm fit. Pitch angle directly addresses a visible need. +- 5-7: Partial match. Some findings suggest relevance but key signals missing or low-confidence. +- 1-4: Weak match. Findings indicate wrong segment or no apparent connection. + +OUTPUT — use the bundled write_batch.py script. NEVER use python3 -c or inline Python. +Inline Python triggers "shell metacharacters" and "consecutive quote characters" security prompts. + +Use this exact pattern to write results: + +echo '{json_data}' | python3 {SKILL_DIR}/scripts/write_batch.py /tmp/cold_enrichment_batch_{batch_id}.json + +Where {json_data} is a valid JSON string with your results. Example: + +echo '[{"company_name":"Acme","website":"https://acme.com","icp_fit_score":8,"icp_fit_reasoning":"Strong fit"}]' | python3 {SKILL_DIR}/scripts/write_batch.py /tmp/cold_enrichment_batch_{batch_id}.json + +For larger payloads, write JSON to a temp file first, then pipe: + +echo '[...]' > /tmp/cold_batch_{batch_id}_raw.json +python3 {SKILL_DIR}/scripts/write_batch.py /tmp/cold_enrichment_batch_{batch_id}.json < /tmp/cold_batch_{batch_id}_raw.json + +CRITICAL: Do NOT use python3 -c "..." — it ALWAYS triggers security prompts. Use write_batch.py instead. + +Report back ONLY: "Batch {batch_id}: {succeeded}/{total} enriched, {findings_count} total findings." +Do NOT return raw data to the main conversation. +``` + +## Wave Management + +### Key Principle: Maximize Parallelism +Launch as many subagents as possible in a single message (up to ~6 Agent tool calls per message). This matches the Exa skill pattern and minimizes total wall-clock time. Do NOT run subagents sequentially when they can run in parallel. + +### Discovery Phase +- Launch up to 6 discovery subagents in a single message (multiple Agent tool calls) +- Each subagent runs 1 search query (or up to 3 if queries are small) +- If more than 6 subagents needed, launch the next wave of ~6 after the current wave completes +- BB Search rate limit: 120 req/min — with 6 concurrent subagents each making 1 call, pacing is safe +- After all discovery waves complete, run `list_discovery_urls.py` to get deduplicated URLs — do NOT read batch JSON files directly + +### Research & Enrichment Phase +- Companies per subagent varies by depth: + - `quick`: ~10 companies per subagent (light research per company) + - `deep`: ~5 companies per subagent (moderate research per company) + - `deeper`: ~2-3 companies per subagent (intensive research per company) +- Launch up to 6 subagents in a single message (multiple Agent tool calls) +- If more than 6 subagents needed, launch the next wave of ~6 after the current wave completes +- Browser fallbacks take 10-30s each — expect slower subagents when sites are JS-heavy +- After ALL enrichment subagents complete, run `compile_csv.py` directly — do NOT read or merge batch files yourself + +### Sizing Formula +``` +micro_verticals = ceil(requested_leads / 35) +discovery_subagents = micro_verticals +expected_urls = micro_verticals * 20 (avg yield ~20 per 25-result query after filtering) + +# Enrichment sizing depends on depth: +quick: enrichment_subagents = ceil(expected_urls / 10) +deep: enrichment_subagents = ceil(expected_urls / 5) +deeper: enrichment_subagents = ceil(expected_urls / 3) + +discovery_waves = ceil(discovery_subagents / 6) +enrichment_waves = ceil(enrichment_subagents / 6) +``` + +### Error Handling +- If a subagent fails, log the error and continue with remaining batches +- If >50% of subagents fail in a wave, pause and inform the user +- Never retry identical queries — adjust wording if a query returns poor results +- If bb_smart_fetch.ts fails on a URL, skip it and note in the stats + +## Contact Discovery Subagent Prompt Template + +``` +You are a contact discovery subagent. Find decision makers at target companies. + +TOOL RULES — CRITICAL, FOLLOW EXACTLY: +1. You may ONLY use the Bash tool. No exceptions. +2. All searches: Bash → npx tsx {SKILL_DIR}/scripts/bb_search.ts --query "..." --num 10 +3. BANNED TOOLS (these trigger permission prompts that break the flow): + - WebFetch, WebSearch, Write, Read, Glob, Grep — ALL BANNED + If you use ANY banned tool, the entire run fails. Use ONLY Bash. +4. NEVER use ~ or $HOME in paths — they trigger "shell expansion" approval prompts. Use the full literal path provided in {SKILL_DIR}. + +COMPANIES TO RESEARCH: +{company_list_with_websites} + +SENDER CONTEXT: +- Sender's company: {sender_company} +- Sender's product: {sender_product} +- ICP description: {icp_description} + +TARGET TITLES — choose the most relevant buyer personas based on the sender's product and ICP: +{target_titles} + +Examples of how to pick titles: +- Selling dev tools/docs → Head of DevRel, Developer Advocate, VP Engineering +- Selling security → CISO, Head of Security, VP Engineering +- Selling infrastructure → CTO, VP Engineering, Head of Platform +- Selling to early-stage startups → Founder, CEO, CTO (small teams = founders decide) +- Selling marketing/GTM tools → VP Marketing, Head of Growth, CMO +- Selling data tools → Head of Data, VP Engineering, CTO + +The main agent should pick 3-5 relevant titles based on the sender's product and ICP, and pass them in {target_titles}. + +RESEARCH PATTERN (per company): +1. Search: "{company name} {title} LinkedIn" for each target title +2. Search: "{company name} team leadership engineering about" +3. From results, extract: name, title, LinkedIn URL +4. Estimate email: first@companydomain.com (use the company's actual domain) + +OUTPUT: +Report back a table for each company: +- company_name +- contact_name (full name) +- contact_title (their actual title, not your search query) +- estimated_email (first@domain.com pattern) +- linkedin_url (if found, otherwise "—") + +If no contact found for a company, report "No contact found" and move on. +Do NOT return raw search results — only the extracted contact info. +``` diff --git a/skills/cold-outbound/scripts/bb_search.ts b/skills/cold-outbound/scripts/bb_search.ts new file mode 100644 index 0000000..0d056df --- /dev/null +++ b/skills/cold-outbound/scripts/bb_search.ts @@ -0,0 +1,96 @@ +// Browserbase Search API wrapper for cold outbound lead discovery. +// Usage: npx tsx bb_search.ts --query "fintech startups series A" --num 25 [--output /tmp/batch.json] + +import Browserbase from "@browserbasehq/sdk"; +import { writeFileSync } from "fs"; + +interface SearchResult { + url: string; + title: string; + author?: string; + publishedDate?: string; +} + +function parseArgs(argv: string[]): { query: string; num: number; output?: string } { + const args = argv.slice(2); + let query = ""; + let num = 10; + let output: string | undefined; + + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--query": + query = args[++i]; + break; + case "--num": + num = Math.min(25, Math.max(1, parseInt(args[++i], 10))); + break; + case "--output": + output = args[++i]; + break; + } + } + + if (!query) { + console.error("Usage: npx tsx bb_search.ts --query \"search terms\" --num 25 [--output path.json]"); + process.exit(1); + } + + return { query, num, output }; +} + +async function search(query: string, numResults: number): Promise { + const apiKey = process.env.BROWSERBASE_API_KEY; + if (!apiKey) { + console.error("Error: BROWSERBASE_API_KEY environment variable is required"); + process.exit(1); + } + + const bb = new Browserbase({ apiKey }); + + try { + const response = await bb.search.web({ query, numResults }); + return (response.results || []).map((r: any) => ({ + url: r.url, + title: r.title, + author: r.author || undefined, + publishedDate: r.publishedDate || undefined, + })); + } catch (error: any) { + // Retry once on rate limit (429) + if (error?.status === 429) { + console.error("[bb_search] Rate limited, retrying in 1s..."); + await new Promise((r) => setTimeout(r, 1000)); + const response = await bb.search.web({ query, numResults }); + return (response.results || []).map((r: any) => ({ + url: r.url, + title: r.title, + author: r.author || undefined, + publishedDate: r.publishedDate || undefined, + })); + } + throw error; + } +} + +async function main() { + const { query, num, output } = parseArgs(process.argv); + + console.error(`[bb_search] Searching: "${query}" (num=${num})`); + const results = await search(query, num); + console.error(`[bb_search] Found ${results.length} results`); + + const payload = JSON.stringify(results, null, 2); + + if (output) { + writeFileSync(output, payload, "utf-8"); + console.error(`[bb_search] Written to ${output}`); + } else { + console.log(payload); + } +} + +main().catch((err) => { + console.error("[bb_search] Error:", err.message || err); + process.exit(1); +}); diff --git a/skills/cold-outbound/scripts/bb_smart_fetch.ts b/skills/cold-outbound/scripts/bb_smart_fetch.ts new file mode 100644 index 0000000..fbddf07 --- /dev/null +++ b/skills/cold-outbound/scripts/bb_smart_fetch.ts @@ -0,0 +1,236 @@ +// Smart fetch with Browserbase Fetch API fast-path and Stagehand browser fallback. +// Usage: npx tsx bb_smart_fetch.ts --url "https://example.com" [--output /tmp/result.json] [--raw] +// --raw: Output raw text content instead of structured JSON (useful for llms.txt, sitemap.xml) + +import Browserbase from "@browserbasehq/sdk"; +import { z } from "zod"; +import { writeFileSync } from "fs"; +import { chromium } from "playwright"; + +// ============= CONFIGURATION ============= + +const MIN_CONTENT_LENGTH = 500; +const MIN_TEXT_DENSITY = 0.05; + +const JS_REQUIRED_PATTERNS = [ + /enable javascript/i, + /javascript is (required|disabled|not enabled)/i, + /please enable javascript/i, + /this (site|page|app) requires javascript/i, + /checking your browser/i, + /