diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..4412783 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,20 @@ +name: Tests + +on: + push: + branches: [main] + pull_request: + +jobs: + pytest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up uv + uses: astral-sh/setup-uv@v3 + with: + python-version: "3.13" + - name: Install dependencies + run: uv sync --extra dev + - name: Run tests + run: uv run --extra dev pytest diff --git a/Makefile b/Makefile index 8f1ec3e..0a65ca5 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ # Create/refresh a local .venv and install deps from pyproject/uv.lock install: - uv sync + uv sync --extra dev # Optional: create venv explicitly (uv sync will also create one if missing) venv: @@ -15,6 +15,10 @@ ingest: chat: uv run chat.py +# Run the automated test suite +test: + uv run --extra dev pytest + # Create/update a lockfile explicitly (optional; uv sync also updates it) lock: uv lock diff --git a/README.md b/README.md index 86aa724..a887254 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A fully local RAG pipeline using LlamaIndex + Ollama + Chroma to query your Logseq notes. ## Prereqs -- Python 3.10+ +- Python 3.13+ - Ollama running (https://ollama.com) - Pull a chat and embedding model: ```bash @@ -36,6 +36,11 @@ make ingest make chat ``` +## Tests +```bash +make test +``` + ### Example questions - Summarize tasks tagged #home in October 2025. - Find notes referencing [[Team Topologies]] and list my pros/cons. diff --git a/ingest.py b/ingest.py index 8908d7e..987b831 100644 --- a/ingest.py +++ b/ingest.py @@ -33,7 +33,7 @@ PAGE_LINK = re.compile(r"\[\[([^\]]+)\]\]") # [[Page]] BLOCK_REF = re.compile(r"\(\(([a-zA-Z0-9_-]{6,})\)\)") # ((block-id)) TAG_HASH = re.compile(r"(? str: """Replace Logseq-specific link syntax with plain text. diff --git a/pyproject.toml b/pyproject.toml index d959981..c90fbcd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,3 +12,15 @@ dependencies = [ "llama-index-vector-stores-chroma>=0.5.3", "pyyaml>=6.0.3", ] + +[project.optional-dependencies] +dev = [ + "pytest>=8.3.3", +] + +[tool.pytest.ini_options] +minversion = "8.0" +addopts = "-ra" +testpaths = [ + "tests", +] diff --git a/tests/test_ingest.py b/tests/test_ingest.py new file mode 100644 index 0000000..121a40a --- /dev/null +++ b/tests/test_ingest.py @@ -0,0 +1,135 @@ +import importlib +import sys +from pathlib import Path +import textwrap + +import pytest + + +@pytest.fixture(scope="session") +def ingest_module(): + project_root = Path(__file__).resolve().parents[1] + config_path = project_root / "config.yaml" + created = False + + if not config_path.exists(): + config_path.write_text( + textwrap.dedent( + """ + logseq_root: /tmp + include_dirs: [] + file_exts: [] + exclude_globs: [] + models: + llm: llama3.1 + embedding: nomic-embed-text + storage: + chroma_path: /tmp/chroma + retrieval: + top_k: 5 + mmr: false + chunk: + chunk_size: 512 + chunk_overlap: 50 + """ + ).strip() + ) + created = True + + added_to_path = False + if str(project_root) not in sys.path: + sys.path.insert(0, str(project_root)) + added_to_path = True + + try: + if "ingest" in sys.modules: + module = sys.modules["ingest"] + else: + module = importlib.import_module("ingest") + yield module + finally: + if added_to_path and str(project_root) in sys.path: + sys.path.remove(str(project_root)) + if created and config_path.exists(): + config_path.unlink() + + +def test_normalize_logseq_links(ingest_module): + text = "Follow [[Page Name]] then see ((abc123))." + result = ingest_module.normalize_logseq_links(text) + assert result == "Follow Page Name then see [ref:abc123]." + + +def test_parse_tags_combines_sources(ingest_module): + text = """ + #alpha introduces the topic + Another line with #beta and #alpha + tags:: gamma, beta , delta + """ + result = ingest_module.parse_tags(text) + assert result == ["alpha", "beta", "delta", "gamma"] + + +def test_page_title_from_path(ingest_module): + path = "/tmp/logseq/pages/project_notes.md" + assert ingest_module.page_title_from_path(path) == "project-notes" + + +def test_collect_files_respects_ext_and_excludes(tmp_path, ingest_module): + pages = tmp_path / "pages" + journals = tmp_path / "journals" + archive = pages / "archive" + pages.mkdir() + journals.mkdir() + archive.mkdir() + + keep_pages = pages / "alpha.md" + keep_journal = journals / "2025-01-01.md" + ignore_ext = pages / "ignore.txt" + excluded = archive / "old.md" + + keep_pages.write_text("alpha") + keep_journal.write_text("journal") + ignore_ext.write_text("nope") + excluded.write_text("archive") + + found = ingest_module.collect_files( + str(tmp_path), + ["pages", "journals"], + [".md"], + ["pages/archive/*"], + ) + + assert set(found) == {str(keep_pages), str(keep_journal)} + + +def test_load_documents_applies_metadata(monkeypatch, tmp_path, ingest_module): + docs_dir = tmp_path / "pages" + docs_dir.mkdir() + doc_path = docs_dir / "demo_page.md" + doc_path.write_text( + """ + #alpha tag at the top + tags:: beta, alpha + Content referencing [[Other Page]] and ((xyz789)). + """ + ) + + class DummyDocument: + def __init__(self, text, metadata): + self.text = text + self.metadata = metadata + + monkeypatch.setattr(ingest_module, "Document", DummyDocument) + + docs = ingest_module.load_documents([str(doc_path)]) + + assert len(docs) == 1 + doc = docs[0] + assert doc.text.strip().startswith("#alpha tag at the top") + assert "[[" not in doc.text and "((" not in doc.text + assert doc.metadata["source"] == str(doc_path) + assert doc.metadata["title"] == "demo-page" + assert doc.metadata["tags"] == "alpha, beta" + assert doc.metadata["basename"] == "demo_page.md" + assert doc.metadata["dir"] == "pages"