From 4c5edd3b901113e4c73cea04a7855a70be4b187f Mon Sep 17 00:00:00 2001 From: semiventurero Date: Thu, 9 Oct 2025 18:39:28 +0300 Subject: [PATCH 1/3] Add find_company_mail folder with placeholder --- task_examples/find_company_mail/.gitkeep | 1 + 1 file changed, 1 insertion(+) create mode 100644 task_examples/find_company_mail/.gitkeep diff --git a/task_examples/find_company_mail/.gitkeep b/task_examples/find_company_mail/.gitkeep new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/task_examples/find_company_mail/.gitkeep @@ -0,0 +1 @@ + From 92a716b3a64751a2e468e23bcee985a325f6cf5a Mon Sep 17 00:00:00 2001 From: semiventurero Date: Thu, 9 Oct 2025 18:57:35 +0300 Subject: [PATCH 2/3] feat: add find-company-mail contribution folder --- task_examples/find_company_mail/README.md | 24 ++++ .../find_company_mail/email_utils.py | 14 +++ .../find_company_mail/find_company_mail.py | 103 ++++++++++++++++++ .../find_company_mail/serper_client.py | 19 ++++ 4 files changed, 160 insertions(+) create mode 100644 task_examples/find_company_mail/README.md create mode 100644 task_examples/find_company_mail/email_utils.py create mode 100644 task_examples/find_company_mail/find_company_mail.py create mode 100644 task_examples/find_company_mail/serper_client.py diff --git a/task_examples/find_company_mail/README.md b/task_examples/find_company_mail/README.md new file mode 100644 index 0000000..df7d17c --- /dev/null +++ b/task_examples/find_company_mail/README.md @@ -0,0 +1,24 @@ +# Find Company Mail + +Find a company's email by querying the web with the format `mail: {company}` and extracting emails from search results. + +## Usage + +```bash +python task_examples/find_company_mail/find_company_mail.py --company "Linktera" +``` + +Example output: + +```json +{ + "company": "Linktera", + "email": "info@linktera.com" +} +``` + +## Notes + +- Uses Serper (`SERPER_API_KEY` in your `.env`) to perform the query `mail: {company}`. +- Extracts emails from result titles, snippets, and links. + diff --git a/task_examples/find_company_mail/email_utils.py b/task_examples/find_company_mail/email_utils.py new file mode 100644 index 0000000..e248268 --- /dev/null +++ b/task_examples/find_company_mail/email_utils.py @@ -0,0 +1,14 @@ +import re +from typing import List, Set + + +EMAIL_REGEX = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") + + +def extract_emails_from_text(text: str) -> List[str]: + if not text: + return [] + emails: Set[str] = set(re.findall(EMAIL_REGEX, text)) + return sorted(emails) + + diff --git a/task_examples/find_company_mail/find_company_mail.py b/task_examples/find_company_mail/find_company_mail.py new file mode 100644 index 0000000..eedfa67 --- /dev/null +++ b/task_examples/find_company_mail/find_company_mail.py @@ -0,0 +1,103 @@ +import sys +import os +import argparse +from typing import Optional +from urllib.parse import urlparse + +# Allow running as a script +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from pydantic import BaseModel + +try: + from task_examples.find_company_mail.serper_client import search_mail_query + from task_examples.find_company_mail.email_utils import extract_emails_from_text +except ImportError: + from serper_client import search_mail_query + from email_utils import extract_emails_from_text + + +class MailResponse(BaseModel): + company: str + email: Optional[str] = None + source: Optional[str] = None + + +def _normalize_to_domain(company_or_url: str) -> str: + """Accepts a domain (e.g., linktera.com) or URL (https://linktera.com/) and returns the domain.""" + text = company_or_url.strip() + if not text: + return text + has_scheme = text.startswith("http://") or text.startswith("https://") + to_parse = text if has_scheme else f"http://{text}" + parsed = urlparse(to_parse) + host = parsed.netloc or parsed.path + host = host.strip().lower() + if host.startswith("www."): + host = host[4:] + # Remove trailing slashes if any leaked into host + host = host.split("/")[0] + return host + + +def find_company_mail(company: str) -> MailResponse: + domain = _normalize_to_domain(company) + query = f"mail: {domain}" if domain else f"mail: {company}" + + try: + data = search_mail_query(query) + except Exception as e: + return MailResponse(company=company, email=None, source=None) + + # Try to extract from organic results: titles, snippets, links + candidates = [] + sources = [] + for item in data.get("organic", []): + page_link = item.get("link") + for field in ("title", "snippet"): + val = item.get(field) + if val: + emails = extract_emails_from_text(val) + if emails: + candidates.extend(emails) + sources.extend([page_link] * len(emails)) + link = item.get("link") + if link: + emails = extract_emails_from_text(link) + if emails: + candidates.extend(emails) + sources.extend([link] * len(emails)) + + # De-duplicate while preserving order + seen = set() + unique_candidates = [] + unique_sources = [] + for idx, c in enumerate(candidates): + if c not in seen: + seen.add(c) + unique_candidates.append(c) + # align the corresponding source if available + src = sources[idx] if idx < len(sources) else None + unique_sources.append(src) + + # Prefer emails that match the provided domain + if domain: + for idx, email in enumerate(unique_candidates): + email_l = email.lower() + if email_l.endswith("@" + domain) or email_l.endswith("@www." + domain): + return MailResponse(company=company, email=email, source=unique_sources[idx]) + + if unique_candidates: + return MailResponse(company=company, email=unique_candidates[0], source=unique_sources[0]) + return MailResponse(company=company, email=None, source=None) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Find a company's email via web search") + parser.add_argument("--company", required=True, help="Company name") + args = parser.parse_args() + + result = find_company_mail(args.company) + print(result.model_dump_json(indent=2)) + + diff --git a/task_examples/find_company_mail/serper_client.py b/task_examples/find_company_mail/serper_client.py new file mode 100644 index 0000000..645cf2b --- /dev/null +++ b/task_examples/find_company_mail/serper_client.py @@ -0,0 +1,19 @@ +import os +import requests +from dotenv import load_dotenv + +load_dotenv() + +SERPER_API_KEY = os.getenv("SERPER_API_KEY") +SERPER_URL = "https://google.serper.dev/search" + + +def search_mail_query(query: str) -> dict: + if not SERPER_API_KEY: + raise ValueError("Missing SERPER_API_KEY in .env") + headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"} + resp = requests.post(SERPER_URL, headers=headers, json={"q": query}) + resp.raise_for_status() + return resp.json() + + From e0c53d9f7e3abc029ad2cbc4d2a7c0456dee09a7 Mon Sep 17 00:00:00 2001 From: semiventurero <63498905+venturero@users.noreply.github.com> Date: Thu, 9 Oct 2025 19:09:07 +0300 Subject: [PATCH 3/3] Delete task_examples/find_company_mail/.gitkeep --- task_examples/find_company_mail/.gitkeep | 1 - 1 file changed, 1 deletion(-) delete mode 100644 task_examples/find_company_mail/.gitkeep diff --git a/task_examples/find_company_mail/.gitkeep b/task_examples/find_company_mail/.gitkeep deleted file mode 100644 index 8d1c8b6..0000000 --- a/task_examples/find_company_mail/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -