From 6f5d26e0153b7bcfc795e79c886059e960d8cca5 Mon Sep 17 00:00:00 2001 From: gitronald Date: Sat, 21 Mar 2026 14:28:57 -0700 Subject: [PATCH 1/5] version [prerelease]: 0.4.1a0 --- pyproject.toml | 2 +- uv.lock | 2 +- zotlib/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ccf88c7..22cccb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "zotlib" -version = "0.4.0" +version = "0.4.1a0" description = "Extract and format bibliographic data from Zotero databases" readme = "README.md" requires-python = ">=3.10" diff --git a/uv.lock b/uv.lock index 089a7a9..dadc857 100644 --- a/uv.lock +++ b/uv.lock @@ -1080,7 +1080,7 @@ wheels = [ [[package]] name = "zotlib" -version = "0.4.0" +version = "0.4.1a0" source = { editable = "." } dependencies = [ { name = "pillow" }, diff --git a/zotlib/__init__.py b/zotlib/__init__.py index 747bb53..5fb96c2 100644 --- a/zotlib/__init__.py +++ b/zotlib/__init__.py @@ -1,6 +1,6 @@ """Zotlib - Extract and format bibliographic data from Zotero databases.""" -__version__ = "0.4.0" +__version__ = "0.4.1a0" from zotlib.database import ZoteroDatabase from zotlib.extractors import ( From 1e303b66421842e2742b8943f4cdefac81970c20 Mon Sep 17 00:00:00 2001 From: gitronald Date: Sat, 21 Mar 2026 14:29:52 -0700 Subject: [PATCH 2/5] add .ruff_cache to gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index fb9c498..10f2db2 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,9 @@ build/ .pytest_cache .coverage +# Linting +.ruff_cache + # Data/output data output/ From 95f011a2a4c234291224684d3b98d31d7dd50e97 Mon Sep 17 00:00:00 2001 From: gitronald Date: Sat, 21 Mar 2026 14:41:16 -0700 Subject: [PATCH 3/5] remove redundant schema docs and generator script --- docs/schema.md | 177 -------------------------------- scripts/generate_schema_docs.py | 48 --------- 2 files changed, 225 deletions(-) delete mode 100644 docs/schema.md delete mode 100644 scripts/generate_schema_docs.py diff --git a/docs/schema.md b/docs/schema.md deleted file mode 100644 index 59a7cba..0000000 --- a/docs/schema.md +++ /dev/null @@ -1,177 +0,0 @@ -# Zotero Database Schema - -Schema definitions for Zotero SQLite database tables used by zotlib. - -## items - -Base table for all Zotero items (papers, books, etc.) - -| Name | Type | Description | -|------|------|-------------| -| itemID | INTEGER | Primary key | -| itemTypeID | INTEGER | Foreign key to itemTypes | -| dateAdded | TIMESTAMP | Timestamp when item was added | -| dateModified | TIMESTAMP | Timestamp of last modification | -| clientDateModified | TIMESTAMP | Client-side modification timestamp | -| libraryID | INTEGER | Foreign key to libraries | -| key | TEXT | Unique sync key | -| version | INTEGER | Sync version number | -| synced | INTEGER | Sync status flag (0 or 1) | - -## itemData - -Links items to their field values (title, date, DOI, etc.) - -| Name | Type | Description | -|------|------|-------------| -| itemID | INTEGER | Foreign key to items | -| fieldID | INTEGER | Foreign key to fieldsCombined | -| valueID | INTEGER | Foreign key to itemDataValues | - -## itemDataValues - -Stores actual field values (deduplicated) - -| Name | Type | Description | -|------|------|-------------| -| valueID | INTEGER | Primary key | -| value | TEXT | The actual field value text | - -## fieldsCombined - -Field definitions (title, date, DOI, volume, etc.) - -| Name | Type | Description | -|------|------|-------------| -| fieldID | INTEGER | Primary key | -| fieldName | TEXT | Internal field name (e.g., 'title', 'DOI') | -| label | TEXT | Display label | -| fieldFormatID | INTEGER | Format specification | -| custom | INTEGER | Whether this is a custom field (0 or 1) | - -## itemTypes - -Item type definitions (journalArticle, book, etc.) - -| Name | Type | Description | -|------|------|-------------| -| itemTypeID | INTEGER | Primary key | -| typeName | TEXT | Internal type name | -| templateItemTypeID | INTEGER | Template reference | -| display | INTEGER | Display order | - -## itemCreators - -Links items to their creators (authors, editors, etc.) - -| Name | Type | Description | -|------|------|-------------| -| itemID | INTEGER | Foreign key to items | -| creatorID | INTEGER | Foreign key to creators | -| creatorTypeID | INTEGER | Type of creator (author, editor, etc.) | -| orderIndex | INTEGER | Position in author list | - -## creators - -Creator (person) records - -| Name | Type | Description | -|------|------|-------------| -| creatorID | INTEGER | Primary key | -| firstName | TEXT | First name | -| lastName | TEXT | Last name | -| fieldMode | INTEGER | Name format mode (0=two-field, 1=single-field) | - -## collectionItems - -Links items to collections - -| Name | Type | Description | -|------|------|-------------| -| collectionID | INTEGER | Foreign key to collections | -| itemID | INTEGER | Foreign key to items | -| orderIndex | INTEGER | Position in collection | - -## collections - -Collection (folder) definitions - -| Name | Type | Description | -|------|------|-------------| -| collectionID | INTEGER | Primary key | -| collectionName | TEXT | Display name | -| parentCollectionID | INTEGER | Parent collection (for nesting) | -| clientDateModified | TIMESTAMP | Client-side modification timestamp | -| libraryID | INTEGER | Foreign key to libraries | -| key | TEXT | Unique sync key | -| version | INTEGER | Sync version number | -| synced | INTEGER | Sync status flag (0 or 1) | - -## libraries - -Library definitions (personal, group libraries) - -| Name | Type | Description | -|------|------|-------------| -| libraryID | INTEGER | Primary key | -| type | TEXT | Library type (user, group) | -| editable | INTEGER | Whether library is editable (0 or 1) | -| filesEditable | INTEGER | Whether files can be modified (0 or 1) | -| version | INTEGER | Sync version | -| storageVersion | INTEGER | Storage version | -| lastSync | TIMESTAMP | Last sync timestamp | -| archived | INTEGER | Archive status (0 or 1) | - -## itemAttachments - -PDF and file attachments linked to items - -| Name | Type | Description | -|------|------|-------------| -| itemID | INTEGER | Primary key (the attachment item) | -| parentItemID | INTEGER | Foreign key to the parent item | -| linkMode | INTEGER | How the file is stored (0=imported, 1=linked, 2=web) | -| contentType | TEXT | MIME type (e.g., application/pdf) | -| charsetID | INTEGER | Character set for text attachments | -| path | TEXT | File path (storage:, attachments:, or absolute) | -| syncState | INTEGER | File sync status | -| storageModTime | INTEGER | Storage modification timestamp | -| storageHash | TEXT | File content hash | -| lastProcessedModificationTime | INTEGER | Last processing timestamp | - -## itemAnnotations - -PDF annotations created in Zotero's built-in reader - -| Name | Type | Description | -|------|------|-------------| -| itemID | INTEGER | Primary key (the annotation item) | -| parentItemID | INTEGER | Foreign key to the PDF attachment item | -| type | INTEGER | Annotation type (1=highlight, 2=note, 3=image, 5=underline) | -| authorName | TEXT | Name of annotation author | -| text | TEXT | Highlighted or selected text | -| comment | TEXT | User comment on the annotation | -| color | TEXT | Hex color string (e.g., #ffd400) | -| pageLabel | TEXT | Page number label | -| sortIndex | TEXT | Lexicographic sort index for ordering | -| position | TEXT | JSON with pageIndex and rects/paths coordinates | -| isExternal | INTEGER | Whether annotation is external (0 or 1) | - -## itemTags - -Links items to tags - -| Name | Type | Description | -|------|------|-------------| -| itemID | INTEGER | Foreign key to items | -| tagID | INTEGER | Foreign key to tags | -| type | INTEGER | Tag type (0=manual, 1=automatic) | - -## tags - -Tag definitions - -| Name | Type | Description | -|------|------|-------------| -| tagID | INTEGER | Primary key | -| name | TEXT | Tag display name | diff --git a/scripts/generate_schema_docs.py b/scripts/generate_schema_docs.py deleted file mode 100644 index b2fa984..0000000 --- a/scripts/generate_schema_docs.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -"""Generate docs/schema.md from schema.py definitions.""" - -from pathlib import Path - -from zotlib.tables import ALL_SCHEMAS - - -def generate_schema_markdown() -> str: - """Generate markdown documentation for all schema tables.""" - lines = [ - "# Zotero Database Schema", - "", - "Schema definitions for Zotero SQLite database tables used by zotlib.", - "", - ] - - for table in ALL_SCHEMAS: - # Table header - lines.append(f"## {table.name}") - lines.append("") - lines.append(table.description) - lines.append("") - - # Column table - lines.append("| Name | Type | Description |") - lines.append("|------|------|-------------|") - for col_name, col in table.columns.items(): - lines.append(f"| {col_name} | {col.type} | {col.description} |") - lines.append("") - - return "\n".join(lines) - - -def main(): - """Generate and write schema documentation.""" - docs_dir = Path(__file__).parent.parent / "docs" - docs_dir.mkdir(exist_ok=True) - - output_path = docs_dir / "schema.md" - content = generate_schema_markdown() - output_path.write_text(content) - - print(f"Generated {output_path}") - - -if __name__ == "__main__": - main() From e62719798b3a4f4937fe4befc68c99b8fac4cc05 Mon Sep 17 00:00:00 2001 From: gitronald Date: Sat, 21 Mar 2026 14:41:29 -0700 Subject: [PATCH 4/5] polish readme: reorganize sections, add descriptions, expand install --- README.md | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 7515b0e..a3793ae 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Tools for extracting and formatting bibliographic data from Zotero databases. +Reads directly from Zotero's local SQLite database — no API key needed. Export collections as CSV or APA-formatted references, generate PDF cover images with thumbnails, and extract annotated PDFs with baked-in highlights and markdown notes. Includes a CLI for common workflows and a Python API for custom pipelines. + ## Project Structure ``` @@ -20,20 +22,27 @@ zotlib/ ├── scripts/ # Utility scripts │ ├── extract-annotations.js # Annotation extractor (interactive + headless) │ ├── create-parent-item.js # Create parents for standalone PDFs -│ ├── run-extract.sh # Shell wrapper for headless extraction -│ └── generate_schema_docs.py # Generate docs/schema.md -├── docs/ # Documentation -│ └── schema.md # Database schema reference +│ └── run-extract.sh # Shell wrapper for headless extraction ├── tests/ # Test suite └── pyproject.toml # Project configuration ``` ## Installation +From source: + ```bash +git clone https://github.com/gitronald/zotlib.git +cd zotlib uv sync ``` +As a dependency: + +```bash +uv add git+https://github.com/gitronald/zotlib.git +``` + ## Configuration Run `zotlib init` to auto-discover Zotero paths and save them to `zotlib.toml`: @@ -66,7 +75,22 @@ Path resolution priority (for both database and PDFs dir): ## CLI Commands -### Export data +### Explore + +Browse collections and inspect database schema. The `show-tables` command documents Zotero's largely undocumented SQLite table structure, including column descriptions and types. + +```bash +# List available collections +zotlib show-collections + +# Show database tables +zotlib show-tables +zotlib show-tables items +``` + +### Export + +Export collection data in multiple formats. Supports linked attachments via `--pdfs-dir` for PDFs stored outside Zotero's default storage. ```bash # Export all tables as CSV @@ -80,25 +104,18 @@ zotlib export-apa -c publications # Generate cover images and thumbnails zotlib export-covers -c publications -zotlib export-covers -c publications -p "/path/to/linked-pdfs/" # Export annotated PDFs and markdown notes zotlib export-annotations -c mycollection -zotlib export-annotations -c mycollection -p "/path/to/linked-pdfs/" ``` -### Explore and manage +### Backup -```bash -# List available collections -zotlib show-collections - -# Show database tables -zotlib show-tables -zotlib show-tables items +Archive the entire Zotero data directory as a compressed `.tar.bz2` file with a progress bar. Saves to `data/backups/zotero-YYYY-MM-DD.tar.bz2` by default. Use `-o` to specify a custom output path or `-d` to point to a different database. -# Back up the Zotero data directory +```bash zotlib backup +zotlib backup -o ~/backups/zotero-2026-03-21.tar.bz2 ``` ### Output structure From 6a8b29d66a436f976e06e8564bcb1f77e8cd2466 Mon Sep 17 00:00:00 2001 From: gitronald Date: Sat, 21 Mar 2026 14:41:51 -0700 Subject: [PATCH 5/5] version [patch]: 0.4.1 --- pyproject.toml | 2 +- uv.lock | 2 +- zotlib/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 22cccb3..31315e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "zotlib" -version = "0.4.1a0" +version = "0.4.1" description = "Extract and format bibliographic data from Zotero databases" readme = "README.md" requires-python = ">=3.10" diff --git a/uv.lock b/uv.lock index dadc857..71f9af3 100644 --- a/uv.lock +++ b/uv.lock @@ -1080,7 +1080,7 @@ wheels = [ [[package]] name = "zotlib" -version = "0.4.1a0" +version = "0.4.1" source = { editable = "." } dependencies = [ { name = "pillow" }, diff --git a/zotlib/__init__.py b/zotlib/__init__.py index 5fb96c2..35af45a 100644 --- a/zotlib/__init__.py +++ b/zotlib/__init__.py @@ -1,6 +1,6 @@ """Zotlib - Extract and format bibliographic data from Zotero databases.""" -__version__ = "0.4.1a0" +__version__ = "0.4.1" from zotlib.database import ZoteroDatabase from zotlib.extractors import (