sukanth · sukanth · Apr 18, 2026 · Apr 18, 2026 · Apr 18, 2026
diff --git a/.coverage b/.coverage
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -83,8 +83,13 @@ src/mindmark/
 ├── __init__.py     # Package initialisation
 ├── __main__.py     # Entry point for `python -m mindmark`
 ├── cli.py          # CLI entry point (argparse)
-├── index.py        # Indexing logic
-└── parser.py       # Bookmark / document parser
+├── index.py        # Embedding index + incremental sync logic
+├── parser.py       # Netscape HTML bookmark parser
+└── browsers/       # Direct browser bookmark reading
+    ├── __init__.py  # detect + collect bookmarks from all browsers
+    ├── paths.py     # OS-specific browser path resolution
+    ├── chromium.py  # Chrome / Edge / Brave JSON parser
+    └── firefox.py   # Firefox places.sqlite parser
 ```
 
 ## Questions?

diff --git a/README.md b/README.md
@@ -38,13 +38,25 @@ Ask in natural language — mindmark remembers what you saved.
 
 | Command | What it does |
 |---|---|
-| `mindmark index <file>` | Parse an exported bookmarks HTML file, embed every bookmark locally, store vectors in SQLite |
+| `mindmark sync` | **Auto-detect** installed browsers and sync bookmarks directly — no export needed |
 | `mindmark find "query"` | Semantic search over titles, folders, domains, and URL slugs — returns top-K with similarity scores |
 | `mindmark open "query"` | Search and open the best match in your default browser |
 | `mindmark stats` | Show index size, model info, top domains, and top folders |
+| `mindmark index <file>` | Import bookmarks from an exported HTML file (legacy workflow) |
 
 > 🔌 **Works offline** after the first run. Embeddings run on-device via [fastembed](https://github.com/qdrant/fastembed) (ONNX Runtime, ~130 MB one-time model download).
 
+### Supported Browsers
+
+| Browser | macOS | Linux | Windows |
+|---|---|---|---|
+| **Chrome** | ✅ | ✅ | ✅ |
+| **Edge** | ✅ | ✅ | ✅ |
+| **Brave** | ✅ | ✅ | ✅ |
+| **Firefox** | ✅ | ✅ | ✅ |
+
+mindmark reads bookmark files directly from browser data directories — no export step, no browser extension.
+
 ---
 
 ## 📋 Prerequisites
@@ -121,16 +133,58 @@ pip install -e .[dev]
 
 ## ⚡ Quick Start
 
-### 1️⃣ Export your bookmarks
+### 1️⃣ Sync your bookmarks (no export needed!)
+
+```bash
+mindmark sync
+```
+
+That's it — mindmark auto-detects your installed browsers, reads their bookmark files directly, and builds a searchable index. **No manual export required.**
+
+> First run downloads the embedding model (~130 MB) and caches it locally. Every run after that is instant and fully offline.
+
+<details>
+<summary>💡 <strong>See which browsers were detected</strong></summary>
+
+```bash
+mindmark sync --list-browsers
+```
+
+Example output:
+
+```
+Browser      Profile                  Path
+-------      -------                  ----
+Chrome       Default                  ~/Library/.../Google/Chrome/Default/Bookmarks
+Chrome       Profile 3                ~/Library/.../Google/Chrome/Profile 3/Bookmarks
+Edge         Default                  ~/Library/.../Microsoft Edge/Default/Bookmarks
+```
+
+</details>
+
+<details>
+<summary>💡 <strong>Sync a specific browser only</strong></summary>
+
+```bash
+mindmark sync --browser chrome
+mindmark sync --browser firefox
+mindmark sync --browser edge
+mindmark sync --browser brave
+```
+
+</details>
 
-| Browser | How |
+<details>
+<summary>💡 <strong>Alternative — import from an exported HTML file</strong></summary>
+
+If you prefer the manual export workflow, or need to import bookmarks from an unsupported browser:
+
+| Browser | How to export |
 |---|---|
 | **Edge** | `edge://favorites` → `⋯` → **Export favorites** → save as HTML |
 | **Chrome** | `chrome://bookmarks` → `⋮` → **Export bookmarks** → save as HTML |
 | **Firefox** | `Ctrl+Shift+O` (`Cmd+Shift+O` on macOS) → **Import and Backup** → **Export Bookmarks to HTML** |
 
-### 2️⃣ Build the index
-
 ```bash
 # macOS / Linux
 mindmark index ~/Downloads/bookmarks.html
@@ -139,9 +193,9 @@ mindmark index ~/Downloads/bookmarks.html
 mindmark index "$env:USERPROFILE\Downloads\bookmarks.html"
 ```
 
-> First run downloads the embedding model (~130 MB) and caches it locally. Every run after that is instant and fully offline.
+</details>
 
-### 3️⃣ Search in natural language
+### 2️⃣ Search in natural language
 
 <p align="center">
   <img src="assets/mindmark-find.gif" alt="mindmark find demo" width="800" />
@@ -154,7 +208,7 @@ mindmark find "helm chart examples" --domain github.com
 mindmark find "docker compose setup" --folder devops
 ```
 
-### 4️⃣ Open a result directly
+### 3️⃣ Open a result directly
 
 ```bash
 mindmark open "k8s cheat sheet"           # opens the best match
@@ -179,7 +233,7 @@ mm open "docker setup"
 ```
 </details>
 
-### 5️⃣ JSON output for scripting
+### 4️⃣ JSON output for scripting
 
 Pipe results into **fzf**, **jq**, **Alfred**, **Raycast**, **PowerToys Run**, or any tool that accepts JSON:
 
@@ -195,6 +249,21 @@ mindmark find "istio service mesh" --json | ConvertFrom-Json | ForEach-Object {
 
 ## 📖 Usage
 
+### Syncing
+
+`mindmark sync` reads bookmarks directly from your browser data directories. It's **incremental** — only new or changed bookmarks are re-embedded, making re-syncs near-instant.
+
+```bash
+mindmark sync                         # sync all detected browsers
+mindmark sync --browser chrome        # sync only Chrome
+mindmark sync --browser firefox       # sync only Firefox
+mindmark sync --list-browsers         # list detected browsers and profiles
+```
+
+When you add new bookmarks in your browser, just run `mindmark sync` again — it will pick up only the changes.
+
+> 💡 **Note:** If you change the embedding model with `--model`, all bookmarks will be re-embedded on the next sync. Browser names are case-insensitive (e.g., `--browser Chrome` and `--browser chrome` both work).
+
 ### Filters
 
 Narrow down results without changing your query:
@@ -207,42 +276,47 @@ mindmark find "useful tools" -k 20                    # return top 20 instead of
 
 ### Re-indexing
 
-Just rerun `mindmark index <file>`. It clears and rebuilds the index. The model is cached, so re-indexing 800+ bookmarks takes only seconds.
+For the `sync` workflow, just rerun `mindmark sync`. It's incremental — only changed bookmarks are re-embedded.
+
+For the `index` workflow, rerun `mindmark index <file>`. It clears and rebuilds the index. The model is cached, so re-indexing 800+ bookmarks takes only seconds.
 
 ### Swap the embedding model
 
 ```bash
-mindmark index bookmarks.html --model BAAI/bge-small-en-v1.5              # default, 384-dim
-mindmark index bookmarks.html --model sentence-transformers/all-MiniLM-L6-v2
-mindmark index bookmarks.html --model BAAI/bge-base-en-v1.5               # 768-dim, higher quality
+mindmark sync --model BAAI/bge-small-en-v1.5                # default, 384-dim
+mindmark sync --model sentence-transformers/all-MiniLM-L6-v2
+mindmark sync --model BAAI/bge-base-en-v1.5                 # 768-dim, higher quality
 ```
 
-Switching models triggers a full re-embed automatically. See the [fastembed supported models list](https://qdrant.github.io/fastembed/examples/Supported_Models/).
+The `--model` flag also works with `mindmark index`. Switching models triggers a full re-embed automatically. See the [fastembed supported models list](https://qdrant.github.io/fastembed/examples/Supported_Models/).
 
 ---
 
 ## 🧠 How It Works
 
 ```
-Bookmarks HTML                                  "python async tutorial"
-      │                                                  │
-      ▼                                                  ▼
-  ┌────────┐    ┌──────────┐    ┌──────────┐     ┌──────────┐
-  │ Parse  │───▶│  Embed   │───▶│  Store   │     │  Embed   │
-  │  HTML  │    │ (ONNX)   │    │ (SQLite) │◀────│  query   │
-  └────────┘    └──────────┘    └──────────┘     └──────────┘
-                                      │                │
-                                      ▼                ▼
-                                ┌──────────────────────────┐
-                                │  Dot-product similarity  │
-                                │   → top-K results        │
+Browser data files                              "python async tutorial"
+(Chrome JSON / Firefox SQLite)                            │
+       │                                                  │
+       ▼                                                  ▼
+  ┌────────────┐  ┌──────────┐  ┌──────────┐     ┌──────────┐
+  │  Detect &  │─▶│  Embed   │─▶│  Store   │     │  Embed   │
+  │   Parse    │  │ (ONNX)   │  │ (SQLite) │◀────│  query   │
+  └────────────┘  └──────────┘  └──────────┘     └──────────┘
+                      ▲               │                │
+                      │               ▼                ▼
+                 only new/      ┌──────────────────────────┐
+                 changed        │  Dot-product similarity  │
+                 bookmarks      │   → top-K results        │
                                 └──────────────────────────┘
 ```
 
-1. **Parse** — A stateful tokenizer reads the Netscape bookmarks HTML and extracts every link with its full folder path.
-2. **Embed** — Each bookmark becomes a rich text string (`title | folder | domain | path`) and is passed through a BGE/MiniLM ONNX model. Vectors are L2-normalized.
-3. **Store** — Vectors live as `float32` blobs in a single SQLite file. For 800–10,000 bookmarks this is simpler than a vector DB and still sub-millisecond.
-4. **Search** — Encode the query, compute dot products against all vectors, return the top-K.
+1. **Detect** — Auto-discover installed browsers (Chrome, Edge, Brave, Firefox) and their profiles across macOS, Linux, and Windows.
+2. **Parse** — Read bookmark files natively: Chromium JSON format or Firefox `places.sqlite`. No export step needed.
+3. **Diff** — Hash each bookmark's content and compare against the existing index. Only new or changed bookmarks proceed to embedding.
+4. **Embed** — Each bookmark becomes a rich text string (`title | folder | domain | path`) and is passed through a BGE/MiniLM ONNX model. Vectors are L2-normalized.
+5. **Store** — Vectors live as `float32` blobs in a single SQLite file. A `bookmark_sources` table tracks which browser contributed each bookmark, so multi-browser syncs don't conflict.
+6. **Search** — Encode the query, compute dot products against all vectors, return the top-K.
 
 ---
 
@@ -357,6 +431,15 @@ ENTRYPOINT ["mindmark"]
 
 ```bash
 docker build -t mindmark .
+
+# Sync from browser bookmarks (mount browser data directories)
+# Note: browser data paths vary — this example is for macOS Chrome
+docker run --rm \
+    -v $HOME/.mindmark:/root/.mindmark \
+    -v "$HOME/Library/Application Support/Google/Chrome":/chrome:ro \
+    mindmark sync
+
+# Or import from an exported HTML file
 docker run --rm -v $HOME/.mindmark:/root/.mindmark \
     -v $HOME/Downloads:/downloads mindmark \
     index /downloads/bookmarks.html

diff --git a/src/mindmark/browsers/__init__.py b/src/mindmark/browsers/__init__.py
@@ -0,0 +1,56 @@
+"""Browser detection, path resolution, and bookmark parsing.
+
+Provides auto-detection of installed browsers and their bookmark files,
+with parsers that produce the same ``Bookmark`` dataclass used by the
+rest of mindmark.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+import json
+import sqlite3
+
+from ..parser import Bookmark
+from ..index import SyncResult
+from .paths import detect_browsers, BrowserProfile, SUPPORTED_BROWSERS
+
+
+def parse_browser_bookmarks(profile: BrowserProfile) -> list[Bookmark]:
+    """Parse bookmarks from a detected browser profile."""
+    if profile.browser_type == "chromium":
+        from .chromium import parse_chromium_json
+        return parse_chromium_json(profile.bookmark_path)
+    elif profile.browser_type == "firefox":
+        from .firefox import parse_firefox_places
+        return parse_firefox_places(profile.bookmark_path)
+    else:
+        raise ValueError(f"Unsupported browser type: {profile.browser_type}")
+
+
+def collect_all_bookmarks(
+    browser_filter: str | None = None,
+) -> list[tuple[BrowserProfile, list[Bookmark]]]:
+    """Detect browsers and parse bookmarks from all (or filtered) profiles.
+
+    Returns a list of (profile, bookmarks) pairs.
+    """
+    profiles = detect_browsers()
+    if browser_filter:
+        filt = browser_filter.lower()
+        profiles = [p for p in profiles if p.browser_name.lower() == filt]
+
+    results: list[tuple[BrowserProfile, list[Bookmark]]] = []
+    for profile in profiles:
+        try:
+            bookmarks = parse_browser_bookmarks(profile)
+            results.append((profile, bookmarks))
+        except (OSError, ValueError, KeyError, json.JSONDecodeError,
+                sqlite3.Error) as e:
+            import sys
+            print(
+                f"warning: failed to read {profile.browser_name} "
+                f"({profile.profile_name}): {e}",
+                file=sys.stderr,
+            )
+    return results
diff --git a/src/mindmark/browsers/chromium.py b/src/mindmark/browsers/chromium.py
@@ -0,0 +1,80 @@
+"""Parse Chromium-based browser bookmarks (Chrome, Edge, Brave).
+
+The ``Bookmarks`` file is JSON with this structure::
+
+    {
+      "roots": {
+        "bookmark_bar": { "children": [...] },
+        "other":        { "children": [...] },
+        "synced":       { "children": [...] }
+      }
+    }
+
+Each node is either a **folder** (``"type": "folder"``, has ``children``)
+or a **url** (``"type": "url"``, has ``url`` + ``name``).
+"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from ..parser import Bookmark
+
+
+def parse_chromium_json(path: Path) -> list[Bookmark]:
+    """Parse a Chromium ``Bookmarks`` JSON file into a list of Bookmark objects.
+
+    Deduplicates by URL (keeps the first occurrence).
+    """
+    path = Path(path)
+    with open(path, "r", encoding="utf-8", errors="replace") as f:
+        data = json.load(f)
+
+    roots = data.get("roots", {})
+    bookmarks: list[Bookmark] = []
+    seen: set[str] = set()
+
+    for root_name in ("bookmark_bar", "other", "synced"):
+        node = roots.get(root_name)
+        if node and isinstance(node, dict):
+            _walk(node, [], bookmarks, seen)
+
+    return bookmarks
+
+
+def _walk(
+    node: dict,
+    folder_stack: list[str],
+    out: list[Bookmark],
+    seen: set[str],
+) -> None:
+    """Recursively walk a Chromium bookmark tree node."""
+    node_type = node.get("type", "")
+
+    if node_type == "url":
+        url = node.get("url", "")
+        if not url or url in seen:
+            return
+        seen.add(url)
+
+        name = node.get("name", url)
+        try:
+            add_date_str = node.get("date_added", "0")
+            # Chromium stores dates as microseconds since 1601-01-01
+            add_date = int(add_date_str) if add_date_str else 0
+        except (ValueError, TypeError):
+            add_date = 0
+
+        out.append(Bookmark(
+            title=name,
+            url=url,
+            folder_path="/".join(folder_stack),
+            add_date=add_date,
+            icon=None,
+        ))
+
+    elif node_type == "folder":
+        folder_name = node.get("name", "Unnamed")
+        children = node.get("children", [])
+        for child in children:
+            _walk(child, folder_stack + [folder_name], out, seen)