Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Tests

on:
push:
branches: [main]
pull_request:

jobs:
pytest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up uv
uses: astral-sh/setup-uv@v3
with:
python-version: "3.13"
- name: Install dependencies
run: uv sync --extra dev
- name: Run tests
run: uv run --extra dev pytest
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Create/refresh a local .venv and install deps from pyproject/uv.lock
install:
uv sync
uv sync --extra dev

# Optional: create venv explicitly (uv sync will also create one if missing)
venv:
Expand All @@ -15,6 +15,10 @@ ingest:
chat:
uv run chat.py

# Run the automated test suite
test:
uv run --extra dev pytest

# Create/update a lockfile explicitly (optional; uv sync also updates it)
lock:
uv lock
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
A fully local RAG pipeline using LlamaIndex + Ollama + Chroma to query your Logseq notes.

## Prereqs
- Python 3.10+
- Python 3.13+
- Ollama running (https://ollama.com)
- Pull a chat and embedding model:
```bash
Expand Down Expand Up @@ -36,6 +36,11 @@ make ingest
make chat
```

## Tests
```bash
make test
```

### Example questions
- Summarize tasks tagged #home in October 2025.
- Find notes referencing [[Team Topologies]] and list my pros/cons.
Expand Down
2 changes: 1 addition & 1 deletion ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
PAGE_LINK = re.compile(r"\[\[([^\]]+)\]\]") # [[Page]]
BLOCK_REF = re.compile(r"\(\(([a-zA-Z0-9_-]{6,})\)\)") # ((block-id))
TAG_HASH = re.compile(r"(?<!\w)#([A-Za-z0-9/_-]+)") # #tag
TAG_PROP = re.compile(r"^tags::\s*(.+)$", re.MULTILINE) # tags:: a, b
TAG_PROP = re.compile(r"^\s*tags::\s*(.+)$", re.MULTILINE) # tags:: a, b

def normalize_logseq_links(text: str) -> str:
"""Replace Logseq-specific link syntax with plain text.
Expand Down
12 changes: 12 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,15 @@ dependencies = [
"llama-index-vector-stores-chroma>=0.5.3",
"pyyaml>=6.0.3",
]

[project.optional-dependencies]
dev = [
"pytest>=8.3.3",
]

[tool.pytest.ini_options]
minversion = "8.0"
addopts = "-ra"
testpaths = [
"tests",
]
135 changes: 135 additions & 0 deletions tests/test_ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import importlib
import sys
from pathlib import Path
import textwrap

import pytest


@pytest.fixture(scope="session")
def ingest_module():
project_root = Path(__file__).resolve().parents[1]
config_path = project_root / "config.yaml"
created = False

if not config_path.exists():
config_path.write_text(
textwrap.dedent(
"""
logseq_root: /tmp
include_dirs: []
file_exts: []
exclude_globs: []
models:
llm: llama3.1
embedding: nomic-embed-text
storage:
chroma_path: /tmp/chroma
retrieval:
top_k: 5
mmr: false
chunk:
chunk_size: 512
chunk_overlap: 50
"""
).strip()
)
created = True

added_to_path = False
if str(project_root) not in sys.path:
sys.path.insert(0, str(project_root))
added_to_path = True

try:
if "ingest" in sys.modules:
module = sys.modules["ingest"]
else:
module = importlib.import_module("ingest")
yield module
finally:
if added_to_path and str(project_root) in sys.path:
sys.path.remove(str(project_root))
if created and config_path.exists():
config_path.unlink()


def test_normalize_logseq_links(ingest_module):
text = "Follow [[Page Name]] then see ((abc123))."
result = ingest_module.normalize_logseq_links(text)
assert result == "Follow Page Name then see [ref:abc123]."


def test_parse_tags_combines_sources(ingest_module):
text = """
#alpha introduces the topic
Another line with #beta and #alpha
tags:: gamma, beta , delta
"""
result = ingest_module.parse_tags(text)
assert result == ["alpha", "beta", "delta", "gamma"]


def test_page_title_from_path(ingest_module):
path = "/tmp/logseq/pages/project_notes.md"
assert ingest_module.page_title_from_path(path) == "project-notes"


def test_collect_files_respects_ext_and_excludes(tmp_path, ingest_module):
pages = tmp_path / "pages"
journals = tmp_path / "journals"
archive = pages / "archive"
pages.mkdir()
journals.mkdir()
archive.mkdir()

keep_pages = pages / "alpha.md"
keep_journal = journals / "2025-01-01.md"
ignore_ext = pages / "ignore.txt"
excluded = archive / "old.md"

keep_pages.write_text("alpha")
keep_journal.write_text("journal")
ignore_ext.write_text("nope")
excluded.write_text("archive")

found = ingest_module.collect_files(
str(tmp_path),
["pages", "journals"],
[".md"],
["pages/archive/*"],
)

assert set(found) == {str(keep_pages), str(keep_journal)}


def test_load_documents_applies_metadata(monkeypatch, tmp_path, ingest_module):
docs_dir = tmp_path / "pages"
docs_dir.mkdir()
doc_path = docs_dir / "demo_page.md"
doc_path.write_text(
"""
#alpha tag at the top
tags:: beta, alpha
Content referencing [[Other Page]] and ((xyz789)).
"""
)

class DummyDocument:
def __init__(self, text, metadata):
self.text = text
self.metadata = metadata

monkeypatch.setattr(ingest_module, "Document", DummyDocument)

docs = ingest_module.load_documents([str(doc_path)])

assert len(docs) == 1
doc = docs[0]
assert doc.text.strip().startswith("#alpha tag at the top")
assert "[[" not in doc.text and "((" not in doc.text
assert doc.metadata["source"] == str(doc_path)
assert doc.metadata["title"] == "demo-page"
assert doc.metadata["tags"] == "alpha, beta"
assert doc.metadata["basename"] == "demo_page.md"
assert doc.metadata["dir"] == "pages"
Loading