Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/kurt/tools/core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def mock_fetch_registry(clean_registry):
``get_provider()`` returns a fresh mock on each call.
"""
from kurt.tools.fetch.providers.apify.mock import MockApifyFetcher
from kurt.tools.fetch.providers.composio.mock import MockComposioFetcher
from kurt.tools.fetch.providers.firecrawl.mock import MockFirecrawlFetcher
from kurt.tools.fetch.providers.httpx.mock import MockHttpxFetcher
from kurt.tools.fetch.providers.tavily.mock import MockTavilyFetcher
Expand All @@ -54,6 +55,7 @@ def mock_fetch_registry(clean_registry):
"firecrawl": MockFirecrawlFetcher,
"apify": MockApifyFetcher,
"twitterapi": MockTwitterApiFetcher,
"composio": MockComposioFetcher,
}
clean_registry._provider_meta["fetch"] = {
name: {
Expand Down Expand Up @@ -111,6 +113,7 @@ def mock_map_registry(clean_registry):
def mock_full_registry(clean_registry):
"""Registry pre-loaded with ALL mock providers (fetch + map)."""
from kurt.tools.fetch.providers.apify.mock import MockApifyFetcher as MockApifyFetchProvider
from kurt.tools.fetch.providers.composio.mock import MockComposioFetcher
from kurt.tools.fetch.providers.firecrawl.mock import MockFirecrawlFetcher
from kurt.tools.fetch.providers.httpx.mock import MockHttpxFetcher
from kurt.tools.fetch.providers.tavily.mock import MockTavilyFetcher
Expand All @@ -130,6 +133,7 @@ def mock_full_registry(clean_registry):
"firecrawl": MockFirecrawlFetcher,
"apify": MockApifyFetchProvider,
"twitterapi": MockTwitterApiFetcher,
"composio": MockComposioFetcher,
}
map_providers = {
"sitemap": MockSitemapMapper,
Expand Down Expand Up @@ -173,6 +177,8 @@ def clean_env(monkeypatch):
"FIRECRAWL_API_KEY",
"APIFY_API_KEY",
"TWITTERAPI_API_KEY",
"COMPOSIO_API_KEY",
"COMPOSIO_CONNECTION_ID",
"NOTION_TOKEN",
"SANITY_TOKEN",
]:
Expand All @@ -186,6 +192,8 @@ def all_provider_env(monkeypatch):
monkeypatch.setenv("FIRECRAWL_API_KEY", "test-firecrawl-key")
monkeypatch.setenv("APIFY_API_KEY", "test-apify-key")
monkeypatch.setenv("TWITTERAPI_API_KEY", "test-twitterapi-key")
monkeypatch.setenv("COMPOSIO_API_KEY", "test-composio-key")
monkeypatch.setenv("COMPOSIO_CONNECTION_ID", "test-composio-connection")


# ============================================================================
Expand Down
8 changes: 8 additions & 0 deletions src/kurt/tools/core/tests/test_provider_config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def test_twitterapi_has_config_model(self):
assert hasattr(TwitterApiFetcher, "ConfigModel")
assert issubclass(TwitterApiFetcher.ConfigModel, BaseModel)

def test_composio_has_config_model(self):
from kurt.tools.fetch.providers.composio.provider import ComposioFetcher

assert hasattr(ComposioFetcher, "ConfigModel")
assert issubclass(ComposioFetcher.ConfigModel, BaseModel)


class TestMapProviderConfigModels:
"""Verify all map providers have ConfigModel."""
Expand Down Expand Up @@ -102,6 +108,7 @@ class TestConfigModelDefaults:
"kurt.tools.fetch.providers.firecrawl.config.FirecrawlProviderConfig",
"kurt.tools.fetch.providers.apify.config.ApifyFetchProviderConfig",
"kurt.tools.fetch.providers.twitterapi.config.TwitterApiProviderConfig",
"kurt.tools.fetch.providers.composio.config.ComposioProviderConfig",
"kurt.tools.map.providers.sitemap.config.SitemapProviderConfig",
"kurt.tools.map.providers.rss.config.RssProviderConfig",
"kurt.tools.map.providers.crawl.config.CrawlProviderConfig",
Expand Down Expand Up @@ -131,6 +138,7 @@ def test_config_model_has_defaults(self, config_path: str):
"kurt.tools.fetch.providers.firecrawl.config.FirecrawlProviderConfig",
"kurt.tools.fetch.providers.apify.config.ApifyFetchProviderConfig",
"kurt.tools.fetch.providers.twitterapi.config.TwitterApiProviderConfig",
"kurt.tools.fetch.providers.composio.config.ComposioProviderConfig",
"kurt.tools.map.providers.sitemap.config.SitemapProviderConfig",
"kurt.tools.map.providers.rss.config.RssProviderConfig",
"kurt.tools.map.providers.crawl.config.CrawlProviderConfig",
Expand Down
8 changes: 5 additions & 3 deletions src/kurt/tools/core/tests/test_provider_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class TestMockFetchRegistry:

def test_has_all_fetch_providers(self, mock_fetch_registry):
providers = mock_fetch_registry._providers.get("fetch", {})
expected = {"trafilatura", "httpx", "tavily", "firecrawl", "apify", "twitterapi"}
expected = {"trafilatura", "httpx", "tavily", "firecrawl", "apify", "twitterapi", "composio"}
assert set(providers.keys()) == expected

def test_get_provider_returns_mock(self, mock_fetch_registry):
Expand All @@ -45,7 +45,7 @@ def test_mock_provider_works(self, mock_fetch_registry):

def test_list_providers(self, mock_fetch_registry):
providers = mock_fetch_registry.list_providers("fetch")
assert len(providers) == 6
assert len(providers) == 7

def test_metadata_populated(self, mock_fetch_registry):
meta = mock_fetch_registry._provider_meta.get("fetch", {})
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_has_both_tools(self, mock_full_registry):
assert "map" in mock_full_registry._providers

def test_fetch_providers_count(self, mock_full_registry):
assert len(mock_full_registry._providers["fetch"]) == 6
assert len(mock_full_registry._providers["fetch"]) == 7

def test_map_providers_count(self, mock_full_registry):
assert len(mock_full_registry._providers["map"]) == 6
Expand Down Expand Up @@ -115,6 +115,8 @@ def test_all_provider_env_sets_tokens(self, all_provider_env):
assert os.environ.get("FIRECRAWL_API_KEY") == "test-firecrawl-key"
assert os.environ.get("APIFY_API_KEY") == "test-apify-key"
assert os.environ.get("TWITTERAPI_API_KEY") == "test-twitterapi-key"
assert os.environ.get("COMPOSIO_API_KEY") == "test-composio-key"
assert os.environ.get("COMPOSIO_CONNECTION_ID") == "test-composio-connection"


class TestIndividualMockFixtures:
Expand Down
13 changes: 10 additions & 3 deletions src/kurt/tools/core/tests/test_provider_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ def test_discovers_fetch_providers(self, monkeypatch):

providers = registry.list_providers("fetch")
names = sorted(p["name"] for p in providers)
assert names == ["apify", "firecrawl", "httpx", "tavily", "trafilatura", "twitterapi"]
assert names == ["apify", "composio", "firecrawl", "httpx", "tavily", "trafilatura", "twitterapi"]

def test_discovers_map_providers(self, monkeypatch):
"""Discovers all built-in map providers."""
Expand Down Expand Up @@ -855,6 +855,13 @@ def test_fetch_provider_metadata(self, monkeypatch):
assert "*x.com/*" in providers["twitterapi"]["url_patterns"]
assert providers["twitterapi"]["requires_env"] == ["TWITTERAPI_API_KEY"]

# composio - explicit-only Twitter/X provider
assert providers["composio"]["url_patterns"] == []
assert providers["composio"]["requires_env"] == [
"COMPOSIO_API_KEY",
"COMPOSIO_CONNECTION_ID",
]

def test_map_provider_metadata(self, monkeypatch):
"""Built-in map providers have correct metadata."""
monkeypatch.setenv("KURT_PROJECT_ROOT", "/nonexistent")
Expand All @@ -881,7 +888,7 @@ def test_map_provider_metadata(self, monkeypatch):
assert providers["cms"]["url_patterns"] == []

def test_url_matching_twitter_prefers_twitterapi(self, monkeypatch):
"""Twitter/X URLs must resolve to twitterapi, not apify (bd-21im.2)."""
"""Twitter/X URLs resolve to twitterapi by default; composio is explicit-only."""
monkeypatch.setenv("KURT_PROJECT_ROOT", "/nonexistent")
monkeypatch.setenv("HOME", "/nonexistent")

Expand Down Expand Up @@ -940,7 +947,7 @@ def test_list_tools_shows_fetch_and_map(self, monkeypatch):

assert "fetch" in tools
assert "map" in tools
assert len(tools["fetch"]) == 6
assert len(tools["fetch"]) == 7
assert len(tools["map"]) == 6

def test_validate_builtin_provider_missing_env(self, monkeypatch):
Expand Down
13 changes: 10 additions & 3 deletions src/kurt/tools/fetch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,16 @@ def _check_engine_status(engine: str) -> tuple[str, str]:
if os.getenv("TWITTERAPI_API_KEY"):
return "ready", "TwitterAPI.io extraction"
return "missing", "Set TWITTERAPI_API_KEY"
if engine == "composio":
if os.getenv("COMPOSIO_API_KEY") and os.getenv("COMPOSIO_CONNECTION_ID"):
return "ready", "Composio Twitter/X extraction"
return "missing", "Set COMPOSIO_API_KEY and COMPOSIO_CONNECTION_ID"
return "unknown", "Unknown engine"


def _list_engines(output_format: str) -> None:
"""List available fetch engines and their status."""
engines = ["trafilatura", "httpx", "firecrawl", "tavily", "apify", "twitterapi"]
engines = ["trafilatura", "httpx", "firecrawl", "tavily", "apify", "twitterapi", "composio"]
engine_info = []

for engine in engines:
Expand Down Expand Up @@ -106,11 +110,14 @@ def _list_engines(output_format: str) -> None:
@click.option("--files", "files_paths", help="Comma-separated list of local file paths")
@click.option(
"--provider",
help="Provider name for fetch (trafilatura, httpx, tavily, firecrawl, apify, twitterapi)",
help="Provider name for fetch (trafilatura, httpx, tavily, firecrawl, apify, twitterapi, composio)",
)
@click.option(
"--engine",
type=click.Choice(["firecrawl", "trafilatura", "httpx", "tavily", "apify", "twitterapi"], case_sensitive=False),
type=click.Choice(
["firecrawl", "trafilatura", "httpx", "tavily", "apify", "twitterapi", "composio"],
case_sensitive=False,
),
help="[Deprecated: use --provider] Fetch engine to use",
)
@click.option(
Expand Down
4 changes: 4 additions & 0 deletions src/kurt/tools/fetch/engines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- FirecrawlFetcher: Firecrawl API-based content extraction (handles JS rendering)
- ApifyFetcher: Apify-based social media content extraction
- TwitterApiFetcher: TwitterAPI.io-based Twitter/X content extraction (pay-as-you-go)
- ComposioFetcher: Composio-based Twitter/X content extraction

Usage:
from kurt.tools.fetch.engines import TrafilaturaFetcher
Expand All @@ -32,6 +33,7 @@
# Primary names are *Fetcher, *Engine aliases exist for backward compatibility
from kurt.tools.fetch.engines.trafilatura import TrafilaturaEngine, TrafilaturaFetcher
from kurt.tools.fetch.engines.twitterapi import TwitterApiEngine, TwitterApiFetcher
from kurt.tools.fetch.providers.composio.provider import ComposioFetcher


class EngineRegistry:
Expand Down Expand Up @@ -95,6 +97,7 @@ def is_available(cls, name: str) -> bool:
EngineRegistry.register("firecrawl", FirecrawlFetcher)
EngineRegistry.register("apify", ApifyFetcher)
EngineRegistry.register("twitterapi", TwitterApiFetcher)
EngineRegistry.register("composio", ComposioFetcher)


__all__ = [
Expand All @@ -107,6 +110,7 @@ def is_available(cls, name: str) -> bool:
"FirecrawlFetcher",
"ApifyFetcher",
"TwitterApiFetcher",
"ComposioFetcher",
# Backward compatibility aliases (*Engine names)
"TrafilaturaEngine",
"HttpxEngine",
Expand Down
4 changes: 4 additions & 0 deletions src/kurt/tools/fetch/providers/composio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Composio fetch provider for Twitter/X.

Zero-cost Twitter search via Composio API (20k free calls/month).
"""
33 changes: 33 additions & 0 deletions src/kurt/tools/fetch/providers/composio/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Configuration for Composio fetch provider."""

from __future__ import annotations

from pydantic import BaseModel, Field


class ComposioProviderConfig(BaseModel):
"""Configuration for Composio Twitter/X provider.

Requires:
- COMPOSIO_API_KEY environment variable
- COMPOSIO_CONNECTION_ID environment variable (from Composio dashboard)

Free tier: 20,000 API calls/month.
"""

timeout: float = Field(
default=60.0,
gt=0,
description="Request timeout in seconds",
)
max_results: int = Field(
default=100,
ge=10,
le=100,
description="Maximum results per search (10-100)",
)
cache_ttl_hours: int = Field(
default=6,
ge=0,
description="Cache TTL in hours (0 to disable)",
)
8 changes: 8 additions & 0 deletions src/kurt/tools/fetch/providers/composio/fixtures/error.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"content": "",
"metadata": {
"engine": "composio"
},
"success": false,
"error": "[Composio] Credentials not configured. Set COMPOSIO_API_KEY and COMPOSIO_CONNECTION_ID."
}
17 changes: 17 additions & 0 deletions src/kurt/tools/fetch/providers/composio/fixtures/success.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"content": "# Tweet by Test User (@testuser)\n\n**Date:** 2026-02-15T10:00:00Z\n**URL:** https://x.com/testuser/status/123456789\n\nThis is a test tweet with some content.\n\n**Engagement:** 100 likes · 50 retweets · 10 replies · 5,000 views\n",
"metadata": {
"engine": "composio",
"url": "https://x.com/testuser/status/123456789",
"tweet_id": "123456789",
"author": "testuser",
"created_at": "2026-02-15T10:00:00Z",
"like_count": 100,
"retweet_count": 50,
"reply_count": 10,
"impression_count": 5000,
"fetched_at": "2026-02-15T12:00:00Z"
},
"success": true,
"error": null
}
97 changes: 97 additions & 0 deletions src/kurt/tools/fetch/providers/composio/mock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Mock Composio fetcher for testing."""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Callable

from kurt.tools.fetch.core.base import FetchResult


class MockComposioFetcher:
"""Mock Composio fetcher for testing.

Provides call tracking, fixture loading, and configurable responses
without requiring Composio credentials.
"""

name = "composio"
version = "mock"
url_patterns: list[str] = []
requires_env: list[str] = []

def __init__(self) -> None:
self._calls: list[dict[str, Any]] = []
self._response: FetchResult | None = None
self._error: Exception | None = None
self._response_fn: Callable[[str], FetchResult] | None = None

@property
def calls(self) -> list[dict[str, Any]]:
"""Record of all fetch() calls."""
return self._calls

@property
def call_count(self) -> int:
return len(self._calls)

def was_called_with(self, url: str) -> bool:
"""Check if fetch was called with specific URL."""
return any(c["url"] == url for c in self._calls)

def reset(self) -> None:
"""Clear call history and responses."""
self._calls.clear()
self._response = None
self._error = None
self._response_fn = None

def with_error(self, error: Exception) -> MockComposioFetcher:
"""Configure mock to raise an error."""
self._error = error
return self

def with_response(self, response: FetchResult) -> MockComposioFetcher:
"""Configure mock to return specific response."""
self._response = response
return self

def with_fixture(self, fixture_name: str) -> MockComposioFetcher:
"""Load response from fixture file."""
fixture_path = Path(__file__).parent / "fixtures" / f"{fixture_name}.json"
data = json.loads(fixture_path.read_text())
self._response = FetchResult(**data)
return self

def with_response_fn(self, fn: Callable[[str], FetchResult]) -> MockComposioFetcher:
"""Configure mock to use a function for responses."""
self._response_fn = fn
return self

def fetch(self, url: str, **kwargs: Any) -> FetchResult:
"""Mock fetch implementation."""
self._calls.append({"url": url, **kwargs})

if self._error:
raise self._error

if self._response_fn:
return self._response_fn(url)

if self._response:
return self._response

return self.with_fixture("success")._response # type: ignore[return-value]


def create_mock(**kwargs: Any) -> MockComposioFetcher:
"""Create a configured mock."""
mock = MockComposioFetcher()
if "response" in kwargs:
mock.with_response(kwargs["response"])
if "error" in kwargs:
mock.with_error(kwargs["error"])
if "fixture" in kwargs:
mock.with_fixture(kwargs["fixture"])
return mock
Loading
Loading