Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
4ccb1c7
docs: generate and verify full project documentation suite
May 8, 2026
a355b3d
fix: CR-01 add memory_guidance to allowed blackboard entry types
May 8, 2026
b34aef2
fix: CR-02 WR-01 WR-02 fix blackboard race, log write failures, remov…
May 8, 2026
51b0345
fix: CR-03 cancel in-flight asyncio task on DELETE /tasks/{task_id}
May 8, 2026
990bd37
fix: CR-04 switch RedisSessionStore to redis.asyncio for non-blocking…
May 8, 2026
f53faa3
fix: CR-05 remove duplicate timeout kwarg from OpenAI client call
May 8, 2026
b98b814
fix: WR-03 use get_settings() for embedding cache dir, reset in tests
May 8, 2026
54b4566
fix: WR-04 replace redis.keys() with scan_iter to avoid O(N) blocking…
May 8, 2026
6c71855
fix: WR-05 include input token cost in _estimate_cost
May 8, 2026
00883cc
fix: WR-06 remove dead activate_synthesizer guard in budget_tight branch
May 8, 2026
8e521d4
fix: WR-07 add bounded LRU eviction to _tasks dict
May 8, 2026
5e03851
fix: WR-08 replace datetime.utcnow() with datetime.now(timezone.utc)
May 8, 2026
3fa564d
fix: WR-09 remove dead _is_sentence_terminator function
May 8, 2026
05bada0
fix: WR-10 replace print to stderr with structured logger in corpus i…
May 8, 2026
bcc7286
fix: update test mock and await for aioredis migration in CR-04
May 8, 2026
584fb0f
chore: add code review report and uv lockfile
May 9, 2026
b624f12
fix(lint): move logger assignment below imports in thesis_flow.py
May 9, 2026
7a3993d
fix(lint): apply black formatting to sessions/store.py and tools/mcp/…
May 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ thesis research "..." --rag-collection my_papers # also retrieve from your own
thesis critique "Social media causes depression because teens spend too much time online"
thesis verify "10.1038/nature14539"
thesis papers "transformer attention" --source arxiv --limit 5
thesis corpus ingest thesis.pdf --title "My Thesis" --discipline cs --year 2024
thesis corpus thesis.pdf --title "My Thesis" --discipline cs --year 2024
thesis ingest add paper.pdf --collection my_papers # add to your RAG corpus
thesis ingest list
thesis sessions
Expand Down Expand Up @@ -187,3 +187,52 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full workflow.
## License

[MIT](LICENSE-MIT).

## FastAPI app

The FastAPI app (`apps/api/main.py`) provides an async HTTP interface for programmatic access. Start it with:

```bash
uvicorn apps.api.main:app --reload
# or
make dev
```

The server starts on `http://localhost:8000` by default. Interactive docs are available at `http://localhost:8000/docs`.

### Endpoints

| Method | Path | Description |
|--------|------|-------------|
| `GET` | `/health` | Health check — returns status and pending task count |
| `POST` | `/tasks/` | Submit a research task (returns `task_id`, status `202`) |
| `GET` | `/tasks/` | List all tasks with their current status |
| `GET` | `/tasks/{task_id}` | Poll a task for status and result |
| `DELETE` | `/tasks/{task_id}` | Remove a completed or failed task |

### Example: submit and poll a task

```bash
# Submit
curl -s -X POST http://localhost:8000/tasks/ \
-H "Content-Type: application/json" \
-d '{"query": "Does retrieval-augmented generation improve factuality?", "discipline": "computer_science", "mode": "balanced"}' \
| jq .
# {"task_id": "abc-123", "status": "queued", "created_at": "..."}

# Poll until complete
curl -s http://localhost:8000/tasks/abc-123 | jq '.status'
```

**Request fields:**

| Field | Required | Default | Description |
|-------|----------|---------|-------------|
| `query` | yes | — | Research question to investigate |
| `discipline` | no | `general` | Subject area (e.g. `computer_science`, `psychology`) |
| `topic_summary` | no | same as `query` | One-sentence context for the planner |
| `existing_knowledge` | no | `""` | What the user already knows |
| `what_they_need` | no | `""` | Specific output they are looking for |
| `mode` | no | `balanced` | LLM tier to use: `quality`, `balanced`, or `cheap` |

Tasks run asynchronously in the background. Poll `GET /tasks/{task_id}` until `status` is `complete` or `failed`. The `result` field contains a full `ResearchSession` object in the same shape as the CLI JSON output.
470 changes: 470 additions & 0 deletions REVIEW.md

Large diffs are not rendered by default.

24 changes: 22 additions & 2 deletions apps/api/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import logging
import uuid
from collections import OrderedDict
from datetime import datetime, timezone
from typing import Any, Dict, Optional

Expand All @@ -25,8 +26,11 @@
version="0.2.0",
)

_MAX_TASKS = 1000

# In-memory task store: task_id -> {status, result, error, created_at}
_tasks: Dict[str, Dict[str, Any]] = {}
_tasks: OrderedDict[str, Dict[str, Any]] = OrderedDict()
_task_handles: Dict[str, asyncio.Task] = {}


class TaskRequest(BaseModel):
Expand Down Expand Up @@ -70,6 +74,8 @@ def _make_orchestrator() -> ThesisOrchestrator:

async def _run_task(task_id: str, request: TaskRequest) -> None:
try:
if task_id not in _tasks:
return
_tasks[task_id]["status"] = "running"
orch = _make_orchestrator()
rc = ResearchContext(
Expand All @@ -80,14 +86,22 @@ async def _run_task(task_id: str, request: TaskRequest) -> None:
what_they_need=request.what_they_need or "",
)
session = await orch.execute(rc)
if task_id not in _tasks:
return
_tasks[task_id]["status"] = "complete"
_tasks[task_id]["result"] = session.model_dump()
_tasks[task_id]["completed_at"] = datetime.now(timezone.utc).isoformat()
except asyncio.CancelledError:
return
except Exception as exc:
logger.exception("Task %s failed", task_id)
if task_id not in _tasks:
return
_tasks[task_id]["status"] = "failed"
_tasks[task_id]["error"] = str(exc)
_tasks[task_id]["completed_at"] = datetime.now(timezone.utc).isoformat()
finally:
_task_handles.pop(task_id, None)


@app.get("/health")
Expand All @@ -106,7 +120,10 @@ async def submit_task(request: TaskRequest):
"error": None,
"completed_at": None,
}
asyncio.create_task(_run_task(task_id, request))
while len(_tasks) > _MAX_TASKS:
_tasks.popitem(last=False)
handle = asyncio.create_task(_run_task(task_id, request))
_task_handles[task_id] = handle
return TaskResponse(task_id=task_id, status="queued", created_at=created_at)


Expand Down Expand Up @@ -142,4 +159,7 @@ async def list_tasks():
async def delete_task(task_id: str):
if task_id not in _tasks:
raise HTTPException(status_code=404, detail=f"Task {task_id!r} not found")
handle = _task_handles.pop(task_id, None)
if handle and not handle.done():
handle.cancel()
del _tasks[task_id]
11 changes: 5 additions & 6 deletions core/blackboard/engine.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import uuid
from datetime import datetime
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional

import redis
Expand Down Expand Up @@ -34,6 +34,7 @@ def add_entry(
"critique",
"citation_audit",
"corpus_benchmarks",
"memory_guidance",
}
if entry_type not in allowed_types:
raise ValueError(f"Invalid entry_type '{entry_type}'. Must be one of {allowed_types}")
Expand All @@ -43,7 +44,7 @@ def add_entry(
entry_type=entry_type,
content=content,
metadata=metadata or {},
timestamp=datetime.utcnow().isoformat() + "Z",
timestamp=datetime.now(timezone.utc).isoformat(),
)

# Store in Redis as JSON string under session-specific key
Expand All @@ -52,9 +53,8 @@ def add_entry(
return entry

def get_entries_by_type(self, entry_type: str) -> List[BlackboardEntry]:
keys = self.redis.keys(f"{self.prefix}*")
entries = []
for k in keys:
for k in self.redis.scan_iter(f"{self.prefix}*"):
data = self.redis.get(k)
if data:
entry = BlackboardEntry.model_validate_json(data)
Expand All @@ -63,9 +63,8 @@ def get_entries_by_type(self, entry_type: str) -> List[BlackboardEntry]:
return sorted(entries, key=lambda x: x.timestamp)

def get_all_entries(self) -> List[BlackboardEntry]:
keys = self.redis.keys(f"{self.prefix}*")
entries = []
for k in keys:
for k in self.redis.scan_iter(f"{self.prefix}*"):
data = self.redis.get(k)
if data:
entries.append(BlackboardEntry.model_validate_json(data))
Expand Down
7 changes: 4 additions & 3 deletions core/corpus/ingest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import re
import uuid
from typing import Any, List
Expand All @@ -7,6 +8,8 @@
from core.embeddings import EMBEDDING_MODEL
from core.schemas import CorpusSection

logger = logging.getLogger(__name__)

SECTION_PATTERNS = [
(r"(?i)^\s*(?:chapter\s+\d+[.:]\s*)?introduction\s*$", "introduction"),
(
Expand Down Expand Up @@ -231,9 +234,7 @@ def _ingest_raw(
ids=ids,
)
except Exception as e:
import sys

print(f"[CorpusIngest] add failed: {e}", file=sys.stderr)
logger.error("CorpusIngest.add failed: %s", e, exc_info=True)
raise

return corpus_sections
4 changes: 2 additions & 2 deletions core/embedding_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def _get_cache() -> Any | None:
logger.debug("diskcache not installed; embedding cache disabled")
return None

import os
from core.config import get_settings

cache_dir = os.environ.get("EMBEDDING_CACHE_DIR", "") or _DEFAULT_CACHE_DIR
cache_dir = get_settings().embedding_cache_dir or _DEFAULT_CACHE_DIR
_disk = diskcache.Cache(cache_dir)
return _disk

Expand Down
Loading
Loading