From 50f05dc43bcd56f6c59fa0f65809cc236e2ee0c1 Mon Sep 17 00:00:00 2001
From: Sukanth Gunda <contact.sukanth@gmail.com>
Date: Sat, 18 Apr 2026 13:51:26 -0400
Subject: [PATCH 1/2] Add browser sync, detection, and parsers

Introduce browser-based sync by adding a new mindmark.browsers package (paths, chromium, firefox, __init__) to detect installed browsers and parse bookmarks (Chromium JSON and Firefox places.sqlite). Add CLI `sync` command and related UI hints, plus README/CONTRIBUTING updates documenting the sync workflow. Extend Index to support incremental sync: content hashing, bookmark_sources table, schema migration to v2, sync/rebuild logic, and model-change handling; implement safe Firefox DB snapshotting and Chromium tree walking. Add comprehensive tests for detection, parsing, and orchestration and include .coverage capture.
---
 .coverage                         | Bin 0 -> 90112 bytes
 CONTRIBUTING.md                   |   9 +-
 README.md                         | 143 +++++++++---
 src/mindmark/browsers/__init__.py |  56 +++++
 src/mindmark/browsers/chromium.py |  80 +++++++
 src/mindmark/browsers/firefox.py  | 147 ++++++++++++
 src/mindmark/browsers/paths.py    | 165 +++++++++++++
 src/mindmark/cli.py               |  75 +++++-
 src/mindmark/index.py             | 265 ++++++++++++++++++++-
 tests/test_browser_detection.py   |  88 +++++++
 tests/test_browsers_init.py       | 143 ++++++++++++
 tests/test_chromium_parser.py     | 135 +++++++++++
 tests/test_firefox_parser.py      | 137 +++++++++++
 tests/test_incremental_sync.py    | 370 ++++++++++++++++++++++++++++++
 14 files changed, 1774 insertions(+), 39 deletions(-)
 create mode 100644 .coverage
 create mode 100644 src/mindmark/browsers/__init__.py
 create mode 100644 src/mindmark/browsers/chromium.py
 create mode 100644 src/mindmark/browsers/firefox.py
 create mode 100644 src/mindmark/browsers/paths.py
 create mode 100644 tests/test_browser_detection.py
 create mode 100644 tests/test_browsers_init.py
 create mode 100644 tests/test_chromium_parser.py
 create mode 100644 tests/test_firefox_parser.py
 create mode 100644 tests/test_incremental_sync.py
diff --git a/.coverage b/.coverage
new file mode 100644
index 0000000000000000000000000000000000000000..e65aedba98f9e50bdf88d37d0ec34efbb7b64453
GIT binary patch
literal 90112
zcmeHw33L?4op*Il&pp#ULZAZ))CdW2=;Sb9473CoaoAu^AwV-DX@q8xX5^WX0h^#6
zfgN6KZxV9Y1P91|S;zZwZX6#ujK{<;uxo#|le}cJ`##%p><!5t4mL5yLDqc#sy;^&
z31Q2dm%J`e|LgqKzkc<vs;=(o?#*|skESAGS3J=hPKonS05J@bL=hq6fxiOy)0`7L
zoE8AdnEcoBdQk0ySN*~^<a4wjq0K*8sPXObz2kkvx5&NK8+BH@YazjM76TRo76TRo
ze>DuG@9;Sbi;J1TkEO!xJ&{x*+!0AANB@dVs~Weg61OxqtzRW7apFvm2(G3^Y!nmm
zePUlEA$CQ3B4RYw8SMzCqOm<<sym|P?oUQK@tOs3uxjCqwWK`u1*4r%DizrS5PgYg
zZ#Z#4+#5MiO){vpr6T)NN(6vJqI+UEhd5I$K1)nQx*~~4tRs?Cw3r#~oaLEyc);T<
z%+F^&9wBWCCpzH2H>WiaVKik;7XWp{6P;N5_Cz?=(H%)vi($Qm9dXcee@ba>BHmk<
zii<tb7-?oK8BImwF)^|~($SxabdKA=YD}8ufIM?+04szW6jDm6b(z>P3XfA#&8`;i
z0_9?HjaH)|t{Msxon!sI?Lhh8^!N7R&Vdq2#i{tUN?xmMDy}y6y8C9FzBJmK)14aS
zxJQ*z{juns{Sm$Mt3{(^HEBjHUM=EAfCW@~9fgbY83a}t*`K_#2Tq-W;r>*dgoDue
z!3H?^j^w$Vg=J;T$L}PDg6RpiM^j1N<IXV}J^C7^gVV2VH+Yof8VMdnJw0N)iwqfD
zT2wj*tl2zf>NGOK2|UomxSA0({j*vdRu*&++k4{exwb|uCK-VN0&{_ut*bxQfrm;G
zx1l3`S0oYM6RA~Gm1*J6ZAf4`4Q@x_s<LaDj&1_M2HopP7dxDVB_+(Do0yT>TdHv8
zn2^?!<4mJkQg0-j>`!PD$sd{|wGzfGsf`ge1)5XibQCt0T&p>1zF>puaiyJ{vvATR
zCasJJJTKutdrl*8j4=d8Ed<xEeqT7%EsoV4M}y*3c1PioN!L;ws}O9!N5_!>OH@8;
zBkcFWq>uk>IV$5AL*;dm12uPrdtf%{3r7<$WMK|gRsa!a><mNZaC^KzMeG*`iFx9+
z$Iw>+Fv1%5kZTsn4#Rt6JeiEbvWl0j<jq>FSk)a);#nwJCAP=oJ&|xsSsS3lnk3X2
zO^QT^v8&Q@10#6=W9+%Bis+;*E_j;X0)tK)U*PHO4aYjw^$IV;3M$O~cyQu+lt}~%
zid8TulQ2X_VQQdiDxR@4{MtU?1f%wbJC)&%JH>>!uRAgZPFamshEtJLv^OH+EJPV%
zSo^|Bu_F<IM0G7CiUDKYqqS0CnjuA-LzULds<Qnd+reENY`{mnOLpN?xpW!hESxik
zN%yNGSecxHNjMYe4Qp*xgKUmvjY>3(Yy-^FaEfF$E=;(ILmX6Uz%!!K?d=M6S}yej
z$ur9jS~9{_1ph5(F<>!ZF<>!ZF<>!ZF<>!ZF<>!ZF<>!ZF<>$9*T;a3VHqdh|Fgmn
z65fV`<tzp)1}p|F1}p|F1}p|F1}p|F1}p|F1}p|F2L2)$@UgZjg7&kJ`T%1qDOP_2
zu&}maUhM+>BY#%-H4=U;y!RK`Kuc+h0gC~P0gC~P0gC~P0gC~P0gC~P0gC~P0gC~h
z0iSIOqg@7IJ+>0JdKJJ9IauK`&NorGDqQ9t;aB@F`9JGl?t9aB*jMR&$(!^Rc%Joy
zJ*@ki?#-_EU8h`<^MdnUXQku3Bj(7nKW=Z~u5e%EB-@bfh;0u0V>ZEhnXfV%(0k|+
zU4a|$Y_H}}dR|?;FICqa?~Sx4BKzuAM0$F{i8^??tM5SF+}ioI4Rt-y_PV|UsqT1e
zUTwqtI(Tj}Qqvdi*b7gACDn{LOE4IXMN`2bJXz8*2Y?sQ01iUMkz@-0MG}DAIvbD|
zQ$dCYM<aEhGXTfJJ%HUd3$Pn0Vt1wv^hJUl_?QIz$V|ZBKoLLO*%^#Q_I1WPlJJO2
z82SxYv%3naS^m*f1Mn>~0G>|Yk{(c=YQl<duLNW&<`^W%y%BhVst<r$D*%|H(d>z&
zdZNAfDNZn&>3~hqXtakdgYDsD1n_s31O5_vy%|m<!Uy27?N|!HcT5BDh4jFK;UvMw
zyohvp3HD=VpkrfQ(LGr8GRmsMb7kG}PTZxtrULX*daRI#yZd`1iD(C)@0<eY6gxE8
z9fnCpwU(Ar0H-)OVHwcWE}^S-BpOTh_u@Xq#xfbeDGs6_nRvrJQ5ZwGM?=MczlvUu
z29qfmL3@JTksf%F0-3vtpazO_SCFh7p!)7Y0H!#bt39f%N!tnlzJVSy{qUBfZcw{*
z65!IAa{E)!o+MtVlYNm6JZrU1q=rq_3H(HRBHBqT<&J#7uA;#Q=|F56yC(o7#U3P+
zH=MKKC-MP)S02DqoUMY9{T&e<eQN;F>uB(i1G`fISc;QiZ+}lJsx3V`cmSulsu?G?
zTm7`?2&zjNIbYu816VqJ*cruhQ#-sfBSF*#3u*HLI-S{79gDCE!<f-5u+;<jRL8nO
z_+~d?&!J&|!9DP{i*OGakuCtGGa@7V`=ap9hhR9BN+g4GgZ04oCMUqpqM@_8hXl;c
z4r)9Y<Vtn`wbyM;!WveW?B5%XrMma@$2!Ax`{IecN!a5>>U!bTEAWb+y>-b%hY=v}
zV*#}qIY4mU2tprhR>+QSy$v9y+z3QFJf%-mH|;!Euz=>h5i}FXS_VLhuM-4sO_O!_
z7*rJsc1B<`p0^DSfoc|xtgsUA&L;_97FG&Rcwgq5y(u=y`nZ$MUo&6xv@;v+JDm49
zXK){IjgEh^f5-NQ?Ot28x6l!HHM+NWZ0t`Q6I`c#GyI?POX#xiE#DE>75BG18T3W}
z?=bKUyB01zkD+0xqxS#RJGk@$D!`-n|8sV7>BV%A$L#-S2f1`3P3%$o|5;sJdIL@T
z@%R5T+qm@dkEj|L@>TTucGUiV1~qdWwg0cAY&7Qme+6Zu8NL6XzKcsQq1c-_`~UJ*
zF1?T<xM}}CjWREz_x~bo)s6js*={bqlp-s+`~RtwJ2YqiKZWw(H1_|cl-17J|Ci7<
zmQnlv$zd+NieitB-TxO;p1bt@e-Y)`eANEGa66Z7pva82|1Y37<&N3^PojrCX8%8N
z6PK={!iT>9&!^mjqxSz3D9={8`~SRcT)K`5A36K~0Oi>vXa6tIUK)%Y6Hkqfoc+I_
zULTIL|M$_GUB})3d+Ciw72hD-vxQ5~p<;ig{lA;uh#YVK@7m0zXHn7F*#A4}@nDea
zXynp%<~sMe<L>|MO<dY}LkRl*pSz7qPx-?Tu)h_k*%;ZX#`+FK<H2ZeAHMH<E8z1V
z)jDXs|L=ycPFMvk1}p|F1}p|F1}p|F1}p|F1}p|F1}p|F2L2KlurW5|$It&W!Z3pW
zma`bJ7_b<y7_b<y7_b<y7_b<y7_b<y7_b<y82Bq>z-IHiP0#-q;OGB^L4o71^8dmA
zl>ZU`4F4VeA^tA^34g!8)qlIc(O>Sj`!4%V`g(jjeJi{tJ-4`@az5ewTjwfgjboMl
ziv3CZpq*n5GwYc}=qdCldgume^H+0>yR@1O!KV~MjWxm?2#RyyVFkG|1dvY;%(gS`
z^r6{<@dxqGBLeClLec~Hi3V^p*GxdD{YP+h1GxerglzRP14#_RA7=!Izt4#OE@rde
z%w~h9@&?hvE(WLMI2X>}D07lXqLsoN5+}?8jb_XOL@^Jia1xRi%!Ig}nP(ltUqC3s
z9Dn7dlMgXRkBa5kH61w$xs(bRBtq4zH#V>n8`zrr4pl*89z0mZzJeSWOGYC6A(sUH
zBKZ3wPKHLwh?C(#4iT{d6hfL)eJ+e9BMJVAK$VD-Gx#vtk=m6=S<w^%AvIS<mB^-e
zGN@jSsn?QxI6~Dpg#C&OsZz<GG1Y<-4nL<R>osZNYe}tMJ^d(9D;go}!;O`-D2aGj
zY$qbHL_(aY2(|+ymW4tt@*gwiLQDVw5iI<}d5D{a(9k3Eit3AyErfhBV`xK~k8li$
z*4v5mAx8!d;7E3oq+~}Zh4>5%7aKvtSu>DLbEFm=FgiplSA?d>wtD2qAO%_N7o~q}
zAw-;^of1|H%M7Sc6arlX3Moa9QL{A;xWnWDMetvQf|wkL$aO%B>J;v@kRA_(vD3pq
zp-5uj5Yjv#B~lNTkRt#QP!OnxDkYhus=Wgv8ImA3^b%x&G!X*C$#7&5Bjd;r_=&4f
z7*U#4gpO6LIR<Z9fP&$o`_ke1dR#?c@;=|e8F(_;KjYmmoX(b=M^2@*2w8D%3Ds90
zfkMb7B3a{@WP#L1>;V`gV!d#IfWkixKp~u@I=!eXr%yt2D`D2?s(kW$pVV_`MHDHS
z<G~;$;H3g<j4FPzmu07Bv&ozS(o(MI@=i4L;e*GEr;`*=)P_olSMEQ_eCq-mST`Lb
z6ih$!)QgKvrHfvD<2k{1y7DtQI-(<{cBr}2$9%s7TyP^yI#Ld3hu4-ju_(67d5~4}
ztXJh|(X0_8_u(W7csqjoop^yYbd%f;fwz7!wpz?ISc!x)n;bV4Cm0SF)XZ5Ob84au
z75@?pDT74Th5~HIQ6#5*c-lnGBnK|3Sb(Gegh0nIPLhR!Y0y=*)6Two4(HMxw_3zx
z^5WoZqfV`fWT<01ARe`h(xVS6T>>_r8F;VFyZl?8$}*^6ec9Q2Mi!@!pFqlj1<lo`
zTI7%=$#HmKDpdIKLsJvqAO6a%Xb-}pLel01T*pz9f>OO&t0pkFP^oHGs%+3wQ8Dou
zVRL5i%ceH!W#;KV><K9~C~=~>;id#r1-ke792f|k)gsIdg041UDuwl!2TY^Ir4D5c
zMAj?9G3Jm2;UYcvn>bl<hDLe~V14_wWFlo7dLt(S@#u(24V<RcoWZ?Rq|#u317|3+
zC>I3P^P?j4;G1c73Ru7(HzjdG?G6=7*HSX<A*7eHnQY`!nu*9&qLv}BQt`=(2q0Nm
zTC{Xsa)?EN!C)yUaqqIyk^d$Mnb=5nho<wdRO5D4{ZOkYq%~0Tcr_%$nzSxe?NRA)
z!(I#@6z$TABUGi8Gomz~&IO4uTaZeFHUx6Vj#gZilDPutYSp<7Dg)*m`cUPDa8Ajj
zx6xJ=Lq5OSKv7LZS9<@UkwfY765P!zOMcn*FfJp)ABQ~vOpf@V(MzP7swRr693>c2
zt)r5Rph&#q<b=u0R7;?~U^1Q+C$p&j>7lo;o;Ww~($w_s*Iw`n#SkwRv&%p+Ji9aX
zMG(<m)Rg@sVqBbvL6k~xx&#)5T>uOCV^9;4wiQD2)76Da5+ukA5Zf)<)m{Kia2z>w
zb`pVO@{y@S)Zqc+i@5pc-YfeU>n{LJN()$cuL~N=mX%C`;G#)vC9=zKre{AN&a#i$
zrN9u(2#2Ojgv47Wwzb$^!z(3^ny;S2%|R!fPknttnKvJji}FFk=6|z!CP1KI0{ipq
zai%yA{PXizmU-mOyo;_y7hUkL2=ggd1R!ZGX{qb8u=*6A57_2nNN}eSr!-4Sw+PVs
z!}S6?nayOg=Z3QbBiVgtjpdrOm=uD8WDaSK))pZx6H<xGkO*=#FAmDgubxigEF=d=
zQY-`>gp_GR@$2bEPCSu;Q<`8x!jWp_5L?Cr0TR!CB97z14c*UNzSJaTgI72gCvCQs
z_#wk0f6<HX7qTNG!)Hd`KXs3-kz^GeP9G!&eC$Q17kj)-gI*7Kiacx!d+|<ok{bfm
zZWe8sX+w|QUyAy`+TbB~7etG$v)SQn_SEaaOUs>3T-x~-f6#%gY`;UdGK4O?^LN)Z
zNQg$mQx)5_fnY_%As2(#Ok*f>-|NiJve`R7_ZFOZt~n-4EMp&H`;fP61IpYj+a0KK
z1>3k3Hq7pW_RCw{7-i_E|LK1EMXt{E<KYwMe*2zRzWIY6{MVVsmoItp_`kl}^G?r-
zSL3Unz548<PYhgm`9d(8-8Yeg;<LHS!%Ghhzx?8>uP%M|_uh*e{5FU!wGF=Zn};%2
z2N(wnL7sI47|!jkXgdUI=WmC@glc^MKkVO$VDJAs;lG7n39k#k5PmHDv+zUV2f`D=
zH-xVUr-kFfG2wpUi10DtE+H;-3t^#E*et9URtd|5g+jeBODGpggo%P*aPV3F3jYpV
zKX{Y>fBet*m-ushhW{@AZT_qLm-#R9pXWcxALY~h-F%Yo;Ujz-zn$O6ui=~cCH#E8
zhM&Qg@kM+d@8NC!VgGynxBVCVL;ipD|HS`-|9Ss2{wMw4^grhRlK%_-&-fqk-|Ii*
z-{<f1NB!;41j|_rSPWPUSPWPUSPWPUSPWPUSPWPU{J9u#z@Cc<hlvP<hzJIWXlo;4
z_iiF~?IL35P9j=ciP*7&h?W*2wr?k5+cqM$ZY5&N79uvoyZ4}?O`C|g;|?M=ZX{yE
z1|n|1orv3RBVzq}BG#=VV(nTY)~q3-xtWO7tBJVvRw7odB4XuAB37&*qN$09#zrFG
zqBK@w`EnwbEhA#-QX+1-g@`3fh`9M?A{H+uV$mWZ7A_>>rkjXZuz-m9^NE-@kBGT*
ziD+mbqQ0Jpx;i3iYl*0-A)>mPh&gkJm_3__S+j_kIg^O0Dk5ggAfmF8h>8j#rcWoL
zyqt(>(})m7BFf5$m^zh+DN~3jEhVC)gow$Li6|~6qNs?7!a^bn3W%6AiHM04iOA0<
zV!{L>^74oX1c(p>B6yw%zn=)7j|i`q2#<#dx0?u;iwLKa2#13RyPXJ*Bf@4Qf@L|l
z7U0Iu|C6h8f38tt)oU?eF<>!ZF<>!ZF<>!ZF<>!ZF<>!ZF<>!ZG4PQyfbaiX`~Q!e
zW~%~=0gC~P0gC~P0gC~P0gC~P0gC~P0gC~Pfj=t)*8TrKEB#jG76TRo76TRo76TRo
z76TRo76TRo76TRo76bn)40zDb(JXW~6233=+Wy`qaQ6yx_+Rs1<d)hl*><tdvxoUx
z{qLZ={h#+Q^u6Ni_mz5o;BE7=p09XTx&PgLuY0EJ1y`@j@BBw+i{q-}OO7V{uk0VU
z*Kn^epJWyQ_%$81FD3@bH^YPBz9@V;vMUZ>OoUe(Fkh$gh07p(sW}?kQx}Z2M^nM}
za54h$3+R2t+6)iKH`DtRs)kIy^Be4m?!}rfrjOsz9Ztaa>^mb#z+ZGD_!5`4{oxO6
z*7ZhXoxS12-nvvInM#slQ2#<_a4nZEya9k{tRoTWjl@#no?!AotON9KThFCSt`{oj
zi=*v{_`YN$k*w>ACL&$&{Q$PRnM>ziFPN!eDuU6#U4Q%&qy|IdYt_-6i1$YOdqJ(8
zYq<2J8&FGwF!X9&&872h07>JDeCZW5+VQ7!;P~z93#YoV0k%;7aPvp{_4-5vzVF?M
z)t*7`i@)TP;iJF7O>eFp`$clfs*nBvJiWP=sII~T=cSZQCHEWOEnIpYofU|v-5crc
z4flc8m6W@3PbAe7?Tx0e6;|BH>|K|PrXn?c;f}rGJ&|M`@Y{Ew4i<z&q^_$!){%<F
zW7@hoo!)1}6?~Y}jQ*TTxj!S(ShBwtw*jarp$tw=S;!~_@N&w$2m7(s&<C-u=pMiq
zY2(AWMI;#RiNfg+wwuXeF1?DPu?Lf>1e|mQyCXe)u%to_#jRX=A<Y_kBdP8<nR?48
z4=7mFQjtU~jKADHo3g<NBl|ldeRxP=Qu1hbt8%K?fqNI@&!Rks1qnU?7t!v`aFUEC
zfS%UHr8m$tlyGNfFc#U@8Sh92lO5rnFtNv(lo!3AsTycS6>WmZnN~0e(EJuIJ%<iK
z2B>2jm#(7(s?1tMS3jH=7Q5@VlJn|h|K4yc)xD=b))}tb7f<X>!kQB?&U;NK(CYbR
zurrd1kOc_Z-NdDx*L$j(bBd{NGq^@BZNDK1*t|sWhQR(mKaUR@q}x_<Y3~i`V*+74
z%v-1oJ!4}ZW7jah;|_7tY-gPR?ED*N!10fc^|mng3!B4}^_=poa9?yEcT3Em>kZ+g
zu*4tmpJ%3`XI!&=Pw~ItEA8)lv)+HOowF}-3XbnMUvTbqJs9HBE2vmSPqe)b@5Cd)
zj_ydu-e9!14^OYR(XF66noPy_B*MK(t%9ZwF1?0c3v$kfv^pB8EuMrW<yuI8D%zvB
zLbA_80dp(pO#yp);_cxcyjZyD&4`ixeXxh_42DywL^3!xSP$?ndMjxsbaNuw-k*vm
z$Y26=AHDfjyX-<v!2tRedMAz=w6P2=p?3zLfh+c-UM`xyrG=xfzcGc)qqn*1JAfAe
z{1-_8?Q`h&D7h2>&<kA;GR1f}Q8P(AaIsBxMuKqc*o(*0Hmc}w>JsgROC;K)vy~b>
zSa&$%O~jLZkxnkX&OFaV_SLP3^uQ^Tx+|PpJHNJ}4wr!5>W;_e)i%r@fBUFeV+{9a
zXD}JLvp*8+z*jJDqBqazl~|W{b2slAF}as^+qv{=D!ohADRNGvn!9F|wced*3zaJF
zf^8%0lE7Y^)a-?*t?b*wotnxu)b>aO-k2Jzshv{i>=^@_xO5d2ipgdU&s6#JHqhj>
zr4C4d$)qC%*puija4I%zFTgmoEwq=q?nV!K{5{MrDpUvY;<lAb*HA&jh8F8P5XCpj
zHdC`QjM^LB4_ybao9LdH_r-e-#NhS?+^12`{MXYRcF}k$9w(=<>!=w78PA>JR5%&$
zPmrZ+EnkV~7zDx2Imcf^?KF*Gcf}KMx}dR*KaBG@HQQ6L!Ae%s<(k;Ert+<HSF2b!
zrfuHzZPhBeRgjehPF}-gU!rVqSAGP0!(=2C@6#Kx0{{NM8a;x9@4%b=r}3}xkMOJg
zm;9ggFZaFaJM63UzT{1M3p~$y!XDQBP4{Nk`>s<i$$7zfud~u|-Vt-;*&nyJa96l5
za*}PxcEmP^{V|(hz06mc4d^`p{vWvQ)f`ICgEQ?^U3a`U(w?~C6G~###?;(02Y?sQ
z047tjY2VR08;}=MK_<62Kxh4Ga@#DxZls7k>UwWvCg5+Nh;O>6X}Wg0y9%mV{?Swe
zvr5YhfTz>9qz8<fwA(8InTk2Wtr@sIrr*<Rg|E`lG#c|gy|(FqP0?tyX;8bTx3e7Z
zm(c4?<pdjU|KQWV9n%1OAwBS*aigG3q|3|bdu}_*D64MVvD!5ipqJ8Pr7Pz)>dq;E
zPO(FCenHVv3g8q6r*ZdpX9-=kbMF4O!FQr)4kG1Rws8YBR1Em5==JE>8>rihpazO_
zmwqpAcOd{%oXyo9g-a0R9^|$HfN!A3Oh3K~1~W_RB*3M!PLJ8NwoasmP1cDxY*_Kd
zLA<lNBOkD<Xz-!$-*-;{NQyl;>bFO`@&KOVY?XTtd}{#E>uB(ibD3nP0I(D%!JOX)
z?cf2N=Bj3#*lzXHq9X|BDEQ|i<VNf+AHdS-!*TAuw0QxY&MC^c7kOGefKPR-8-#Cm
z1NIym_7^nnrR|2#)lnLe<K0W!<OKLxG;}sDv%)tBXz^f>E7<|mex3LKb8gyecibB}
zKycm&f_~e7y$v9y+z5pBGuJxP{q_|spm}cu%>=TR0g&SB1o<Psi@^K;+2~V9_>!=U
zf1MxTr~A+Q_xioQulZJc|I7P;x7za)Prqlf`)T(U*A>_2U2~m3cE+7P$3Hl3vtPC!
zw=d=1;Er-twjbHLY<Bi*>>bSSnJ+Wz(c1ue18&zgXd9KO2JiVt-&NbSl_vV=yK39E
z06LYE6{5Rx%MKR(Et>(ihQ@%!xH0wG@77I#N@eN9sQR_v9d`gMmDLTyYPS~2-GJ>I
z0h-E5EQSsmvsZ8f0I#4$aM0NDXqO|~Zifn}EOA70^5X)0k6yzM-bS?n`0P9Pr#8X$
zR4W*JMSuG`sDNJI8dvmN*8(t=)t^wGb49;(4OQ%%EBZT|0h`JxDA7CjivHHs08VY}
z>sRz!Z>7oynW;1r53d63)zrr5cvtkpE2&m7-WC1O3aEn0PM`EDsUr7^ey|B@pf{I|
z{ZU+dBUG@4T1VtuP}Dj*B+;iSm@~XumqQKIrUlXuN(Hc9w=bj0P|it=&d^e-45jwP
zJEOhiqOMlMj$5DxDhs*(5T*8`xSdM?n#$a!LmNMeYq=S~>0D#feh7mf{)B#QTMWol
zR&iZz<ra+kbGx=h)bVqE!n0!`;M46<?MHDPHv#lI8k2%f&Dh_5?Op(t(794&?pf_t
zM%#Sq^egj%+PypH0X|);H}=Xq<^njCzU_&^!z=K_vm(LlTN?nH(p0HH8)~=J)8ZU1
z>?Wf7@v7Jv=~4oq`|dixr(!Y&zG){Ot_65{Cne+kC~kKRtri$(41LBA*p&PK4<X@g
z;a9@H2+s+F0>@wF|AYT2|0Dhx{yY3b{9XPN{(gU}|8{?)zua&4UG|;y_4s!BR(MZ(
zZgD^5e8Ty+&Q;DD$13|3`;+!TJI5Sm)-#LHQ|M9j&<$Jyz(xGk@HxrSYBt0n6l$yy
z=0H%K!y+VCh9LRrf!TJ(ojx>sF#h1J5Dut+2uTk>ChTUenSfCHj}T&zD-c4+RxdM<
z#2`S#&ZYic%x1rt%?3~94Wfr#3`sH{TsVKD%t<1NRSI)RoG=SCnlTFy#XOwCNl0EW
z6XJSio^=d=0ig_Y{FRqZKExb7Dwbo{bmS=HQYv7O2-UCN*uYL~U~BF>R0WNB@L(1D
z3UXj98Hw<RToU+;;O~z(85$)cPDY3o5gR}uq&d~+!e}y*;GYOoi8wif52GEaU5S(x
zO(76cb7fSCY>Fp?>eZNfEy;%?RE<N}uegvZmHZh~EjZ!ub850)lNP>~)aupKj{>!#
z5yC#)SXqmbh=;{?A_7Y!#F>g<TTo(IDC8plF=H;o1P~Cx!atmcxM>ItJu<JTz6jYu
z$R{&~Hl+Co$B<~foj4zIWZ(dfWG6{Vc7#%h&%ki85j31N1KBi3YQX`cL$q>5Xo_sB
zM~)0qkkx)s`o|VR#2MNtVYRT#fC@z+&^4ftQUn<_TjPK`Ode1K|3xT>$$^Mm2gImO
z;Z6(b@lY5$Jq#3zBnA#4%>z;*^<W7(0uTWOfqJM?l1ZxCJ1~+V335X(K^908AwZlA
zM;0+MjtqgHxC(_4rCCMjSjC!S@JkCQ7%sXm9j>p(RrDqA^BtUlCzJg%-u=SqY}t9_
zR7#7G73Y>vef1G2gj^z$HI7LZNNvO(fI%YG3l|6|{Nn%=!bz&ri@I|9Bs8}YW{s}O
zC%^YeJ%?6Ak&-zc3{nDKDzL_=;wO7qc4{`8%qbu(<%%xvL_;4wc)WNzNdZM|sFZl+
z{*%nNF0g@h(=kH9^fOPrxY$&>=+!r#6MUyDKa-;)I$~;vnmc{W_dCD^H^QVN<$!j0
zZFv)mV!NCNSvAjkRgM<T8ZmMoPLhDPBe>s*7g$3#$?Xt$>lb6I#Y}^hNI0{}aZ_=E
z;c!9CoYgU>CfZQ(FTs#9NMvm&z;+x(a@vQdP1H<s;F5|3ND4p*bPVGpStytWT~#~n
z?7Qc1F5PjfMNB3y4$d~})S5_!I<^DiQOhVj`moX^VDp)Q_u9P6zvZbcg9_G{oxNvd
zar*cPq%2s_Tz#rV4jDeRe0X3gRQT~jQxo4G{>rUr55l8D(&hzR$5E4lQoUNMCNQ^9
zscKfLY|v6sG4UB;b7t_%rZ(zj=IK7{2`M!waiY25rUX+3y7&1U7zmuzBFqhft~OyR
zh4q*ROryo64rL8Q)+@p>=8y#8B0cw;I9YLqMtTikefza!B4rzTBPRm!=!i)ToTk;B
z!M#+Z(qMoCXDG8M7X;PwqayU+n`w3mSim4RC2>OS4i!t+QZnozq?faqY~)j#iO5x=
zmLafG@yUt^AX!;jv~*o^h(&?HU@0hZ@3PX7|0W5U*hqGVrt_~<<91d3P^&1UHBj<+
zH6+8Dv@TWcQR#5QUJM@;?b3=PRHc<OqBNh*1&J_QkV=C#1ailYR$P^mxdP~F)wvBS
z1Lhq1Q00bjPRXRV(N+~hKEK*PQB6cwdjFx3L+SDo+|4UXe%bagE+fJphdlsHj`*O_
zOQf2rCW@*YB^Xn!qmqoENWA0ZgvrcQOQ61BGM*JDv#9>*p|`J|I5+Up)b#GxUhoRV
z5HA+9%Rn(ayEFAg5Yb-Ll>H=PT%3qOluB^A1Qvx|01NnIP!p226+-jV)rCqDB*+U8
z+b!DFUI0yS965A$5`km#k*Pz};Q`}|xcTSaEBhGhF91zS3t0GF7aGczl}v)*qDgEe
zvdeI$XFng#vX9xNz!1y`ho($~#9Jn|wb)+6D<zMbub#urK_{J0eSJciHy@IV@<GGq
zf3tZeK%ig(`}6E^rZ^A$^Yd7idF0K!i>^f%UGT36^C?#ZAZaaWsq3?_`V^lJ*ydtL
zaHkQcG)qdi2+;b&^#VJY&1AFZhO+}B*?nh?<(jmZ6oP|f4rz_n79lMYQi;ou2y!$p
z4$920o=)N{BnL=RECe2elxaip>*+>LJduG@nqWf0k!s}-TgC$c63>1jj^n`%-OpUU
z)FfqtS2!0ZZMK#8A;Thn(TnaEvLhqIXGY#Xb&su)WECAwA0!5R>_w*+d%R79UJrPR
zJZuYl@lJM<8v@mC7HyenLyz5Giu%CX5G%MKT6CSw4rjBcUJqVc?sVeP&bRo34s2!n
z9lDhvbm5)9yRJb(G#Z|&*scu(D<TfL7{q28Lz(+tXMUE=-ubz=;KXyyF<D|6`w-iQ
zyk#3u=5E>UK$R=l#-*@fb|17~-s;9ELqGjb_tP(Ob*>)|pE&p1_q_7WAN=6I&OE++
z$&<(b_1&I#dRDv|U;XUWXCHlH;KIung4yi8i5wK4&0QW|dT99N7hipK>9fE0Ufkff
zL2RjQ@U`DOl({;<I9LestRuj1Zg)l7p%2zA^S^!4arJEm@BinZy-0Xms1YjohxvPb
zPy0^z?(-#m?Y<qpg}!oMk&pK|yfJU3cZzqStJ(Q~oeQ1CjtcHI?&sWk+iz_5+e+A0
zCd*u4Rx{PibjFT8gkS#uA9Mm>&8~JkDABf^<%o%#CP~Jr9E`+V$HH4BK<RDl#;v$}
z#H}nkBa#6R`9O*wfG|;Aa1^1eFftKJ%u&VaGNj}XlmU(%TQKq#*0CAO&1~r=kT-AI
z#C~GdqmOPj&Se6JMotY(b>0D~fji2X;*H>$zmbh#keOssgFOe5j#V(}DH}k^!J9W+
zcK$$iBlJAZVshH@4^g9&a!%IFOGE}$ka<j75j1H4XffK*%8(jia?%$zMTMhG!R?@W
z?d?tZgbgJhq}A`Z4Rbs9woUtQQ|C;gsiFp1F+&Q3Qbgg>)vm<pN3C@u*#Q-0xXO`U
zwjNV*aD90U9a8Gm3*;=g<~EzjAg4-35zV(~YePOd`D=IS!<sSFpVH?$MM0!$feb|<
zW=KOI@|B|d8XmqAAY4sgMc|}kl2|U52V6&FGy#v~*gDM3u63KLAd&14M4S(Wu<1b=
zNk*(%K{Z5c+DIG(<WGN^QD*e|#7hq}C{v$9h7`p$LKd=>MVzEYF}v2{`WCGPXmlDb
zUg1KuAYiv*$9WXn7G+5cAdXy@!pjie9e_eas3GMPBrrh)B&-!yhzqa5=uK-zYPBVu
zaFszvFaR`2CISvhLpYRiAvG?fC7Gm}q@a`aJ&<-b;~wXmaSKMfi{Ci2|5G8%o_0!^
zQL}&^qV^xQNistc$hw3zhcwk?G!N?2eb|H9*PR4DpJO$cJHMJ8TyZOCbg$#qcL$zz
zqUUhS%UuDLNeJ}WQ3WF&178J_0`MM6@Qvg2;Up8S)EvfXAMOXlLKig4uxnNLQ!|Yf
z8DVVPs2I0qzoX)npgB>_UPI+^Oa7^mV4~pUbYc~-c(`*FTzC0cJ~UGzK-lEewh{uA
z%{)m`4rfO(@w^hPjdxMo{^Dq3zoe7hu4Fgu%WwpX%^B(otEpIQe6i3J(!CvLbg#+t
zt`mt$P=dC-X-YA9hZdEt1QWV>C0nf#EHN;zBXaS^5lH8+z@|Q91&c1v&^5;Ci{MJc
z_%5T-;X0y~P^BuA>0H#tC!k2gFO9|fk%pxt5lREL15Kb*I??p*&zqrVM8XqLMS2lJ
zq`M?6FK$p`0flrOj>C>sR8Rt=j^R;ZQ&2*Vd)G8V-S<yuJX01x86@|N*wCkzEZST1
z^b=oO@LZFrO<LP=d1c;)>B78g*Map}T?*-O#FH}&jlDH8Q<q!fS|NpCQ|g!qeW}tG
zIVH1V#AKo~Cup`!;xc66fc!wSEFw{vz)h8bSB5mO8a$R*;suYWkE@V*h(L<jjlDWh
zZ%)?Cmc;2y;*fd%kRU9>UjXH4&B)0MNhU^cPSu^&JW6>8$qk|rm*abY(OQ-htp%`!
zieyrepzSWi4&oFUABg2*&nWl&sPMQw-#$8%623;XZuxSNQ-Yx%kP^j?<#-uc&aPgD
z#}&V9@C6qW(orA4=gxn?i9jpq1qsQ@IQFF{EydHu?4{qfq0n=WXy<|%6~(EcHS6O=
z?TsNNMOGW0A;BWOk6$>$DT}_6%{VF<A#CbZwWIYUSd$&M;D+Avd)E?>6PA49N{Q=c
z?7MmPfRMi!d@~nsLqp}RzS~zy%}+lSbcu@~vT4!e*S`PZhtJ$Y^aw#M(0@bOk>9@l
z+sjVpLdX$VcoQSs1Rn9G+5b+G;Z9iI0*I(s@Oh=Rk3RYSYxm^Ohp3tJV}*C8FQG%F
z2OaOc{ytYS4<Z-Mo2}Xc5pdcgG7{TkJG^!-F!}L`bJ>gcDAsi`KQHeAJYLouyZEbT
z{{HpG3RL>s^l6;wVxcxVu_ng3VZBZxj(&U8sjA8RA=S8ubPY3tJbK>&8|JZQipLdU
zXX+It!Zev_5vo*rEmxb-*J5+31HO!5r&bg)>odHk_)O^NZGivcsJb8sKef&9Uza8;
zDXtuf@bo0&1SzM1rrM41X`<<9-dy&)f`>X)Nknm81Gb0;R*#f_mUh?UkustFd(#=k
zw8?hvw30;vYN37d%5p{oWn=`7!v|0a9G*=@?7;-!08BAhkb{IhB0k_$8GCg5#WdmA
z4E)L1a|LwrIqN{ZK;5?|r){-ZDM#&V1Nnv};6kU#89I1WqLtm$E1l<aPC)|Z8M+(x
z2dDoDDrrUmG8EwYfL6YyepE**Dgi#&?YbIJB3)3^R7f@f&@h=K00Fp>q@hX-gaA@^
z3PTW-wJ6Sq=P<ADG!nin>=5$!bNm6m-v6fmVgGjDRfx8n#el_t#el_t#el_t#el_t
z#el_t#lZg>13SVTrQfwD!m-YHZ(Xnt-mu$)UnScb0%$6~YuBKS-?c9f0=Df>e?(h<
z?0iid02Kb|0ONerYTa&tD!ERmoX6<(C)kaTTCLs%VENYxW_tE~%tx)(?*yR#*>m;U
z7pb5xn_B^G(x0xD24Q^E>ed~Al=r72X<QK-0FBnP0M7Mx;P|X-AGK=T4p3AKb?m3|
zJTQ_Ao!>^nDWQ=c;t%qZ{onQP@O|j}vai|uj`#E4<(}VoKJA(3{ulQ__Y~JN@TI?R
zyQ2SV!{4gNV!&d+V!&d+V!&d6FhK3o4cb|&enK3k_CjV2+BhK&ZRXOm7#a@<8?S9~
zQhPhR@o=a4;n8|(-wrVFO%IROQR~SukL=E+){`2v(UT2Szf6+zasxPR-nN}fH&C$?
z{J5+7*%QFs@)z~~e2SJa>K#gVP<`XS{^l3D?{d$5oyKyyFZ(y<kBt;Z3A|4#=lvpU
zsLf5}vF$N$e7ZgKQGZ;G?z`v5dBMn1x?fkznMt&lm@K19)7UqAZJ;_Ej((%uR=Vns
zeeKqIeE;9WzmJ5sgcsl$fG-J0g_y8Nm?bzN(sC9976TRo76TRo76TRo76TRo76TRo
z76TRof7uN1Vfab^)bPkM<tO*`VhF-bA@i^0p@Hz$1?=8|mk7@H2O&pka0WaRaOu)8
zGrtXjf75oR>p!x?hn9W!;Ia!3U6?;F`O?cT|I~qgQ*if&;PPjdFMmcDc;<tP|M|i2
z7lN1kuYceu+6|@V?jCHLk^b&dbgb^x4cYH+k*<E@io0+Zq}1<f`#180Mt%0cZDr`<
zC0oW8*a->cJ6Tlo#@m&z?0e@DQ_u=AwXJO<@4vsNYxzCDd}*L<!9Ax=Weay;h#hB|
y2Tqjrqv0!;KInbzXNxcW?!(C~kZ@DW<>7n8LD>=Dnvc)zABbLESAX@{y8jOkv?bpF

literal 0
HcmV?d00001

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a4fc6a6..355c7b2 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -83,8 +83,13 @@ src/mindmark/
 ├── __init__.py     # Package initialisation
 ├── __main__.py     # Entry point for `python -m mindmark`
 ├── cli.py          # CLI entry point (argparse)
-├── index.py        # Indexing logic
-└── parser.py       # Bookmark / document parser
+├── index.py        # Embedding index + incremental sync logic
+├── parser.py       # Netscape HTML bookmark parser
+└── browsers/       # Direct browser bookmark reading
+    ├── __init__.py  # detect + collect bookmarks from all browsers
+    ├── paths.py     # OS-specific browser path resolution
+    ├── chromium.py  # Chrome / Edge / Brave JSON parser
+    └── firefox.py   # Firefox places.sqlite parser
 ```
 
 ## Questions?
diff --git a/README.md b/README.md
index a5c3055..f3474fd 100644
--- a/README.md
+++ b/README.md
@@ -38,13 +38,25 @@ Ask in natural language — mindmark remembers what you saved.
 
 | Command | What it does |
 |---|---|
-| `mindmark index <file>` | Parse an exported bookmarks HTML file, embed every bookmark locally, store vectors in SQLite |
+| `mindmark sync` | **Auto-detect** installed browsers and sync bookmarks directly — no export needed |
 | `mindmark find "query"` | Semantic search over titles, folders, domains, and URL slugs — returns top-K with similarity scores |
 | `mindmark open "query"` | Search and open the best match in your default browser |
 | `mindmark stats` | Show index size, model info, top domains, and top folders |
+| `mindmark index <file>` | Import bookmarks from an exported HTML file (legacy workflow) |
 
 > 🔌 **Works offline** after the first run. Embeddings run on-device via [fastembed](https://github.com/qdrant/fastembed) (ONNX Runtime, ~130 MB one-time model download).
 
+### Supported Browsers
+
+| Browser | macOS | Linux | Windows |
+|---|---|---|---|
+| **Chrome** | ✅ | ✅ | ✅ |
+| **Edge** | ✅ | ✅ | ✅ |
+| **Brave** | ✅ | ✅ | ✅ |
+| **Firefox** | ✅ | ✅ | ✅ |
+
+mindmark reads bookmark files directly from browser data directories — no export step, no browser extension.
+
 ---
 
 ## 📋 Prerequisites
@@ -121,16 +133,58 @@ pip install -e .[dev]
 
 ## ⚡ Quick Start
 
-### 1️⃣ Export your bookmarks
+### 1️⃣ Sync your bookmarks (no export needed!)
+
+```bash
+mindmark sync
+```
+
+That's it — mindmark auto-detects your installed browsers, reads their bookmark files directly, and builds a searchable index. **No manual export required.**
+
+> First run downloads the embedding model (~130 MB) and caches it locally. Every run after that is instant and fully offline.
+
+<details>
+<summary>💡 <strong>See which browsers were detected</strong></summary>
+
+```bash
+mindmark sync --list-browsers
+```
+
+Example output:
+
+```
+Browser      Profile                  Path
+-------      -------                  ----
+Chrome       Default                  ~/Library/.../Google/Chrome/Default/Bookmarks
+Chrome       Profile 3                ~/Library/.../Google/Chrome/Profile 3/Bookmarks
+Edge         Default                  ~/Library/.../Microsoft Edge/Default/Bookmarks
+```
+
+</details>
+
+<details>
+<summary>💡 <strong>Sync a specific browser only</strong></summary>
+
+```bash
+mindmark sync --browser chrome
+mindmark sync --browser firefox
+mindmark sync --browser edge
+mindmark sync --browser brave
+```
+
+</details>
 
-| Browser | How |
+<details>
+<summary>💡 <strong>Alternative — import from an exported HTML file</strong></summary>
+
+If you prefer the manual export workflow, or need to import bookmarks from an unsupported browser:
+
+| Browser | How to export |
 |---|---|
 | **Edge** | `edge://favorites` → `⋯` → **Export favorites** → save as HTML |
 | **Chrome** | `chrome://bookmarks` → `⋮` → **Export bookmarks** → save as HTML |
 | **Firefox** | `Ctrl+Shift+O` (`Cmd+Shift+O` on macOS) → **Import and Backup** → **Export Bookmarks to HTML** |
 
-### 2️⃣ Build the index
-
 ```bash
 # macOS / Linux
 mindmark index ~/Downloads/bookmarks.html
@@ -139,9 +193,9 @@ mindmark index ~/Downloads/bookmarks.html
 mindmark index "$env:USERPROFILE\Downloads\bookmarks.html"
 ```
 
-> First run downloads the embedding model (~130 MB) and caches it locally. Every run after that is instant and fully offline.
+</details>
 
-### 3️⃣ Search in natural language
+### 2️⃣ Search in natural language
 
 <p align="center">
   <img src="assets/mindmark-find.gif" alt="mindmark find demo" width="800" />
@@ -154,7 +208,7 @@ mindmark find "helm chart examples" --domain github.com
 mindmark find "docker compose setup" --folder devops
 ```
 
-### 4️⃣ Open a result directly
+### 3️⃣ Open a result directly
 
 ```bash
 mindmark open "k8s cheat sheet"           # opens the best match
@@ -179,7 +233,7 @@ mm open "docker setup"
 ```
 </details>
 
-### 5️⃣ JSON output for scripting
+### 4️⃣ JSON output for scripting
 
 Pipe results into **fzf**, **jq**, **Alfred**, **Raycast**, **PowerToys Run**, or any tool that accepts JSON:
 
@@ -195,6 +249,21 @@ mindmark find "istio service mesh" --json | ConvertFrom-Json | ForEach-Object {
 
 ## 📖 Usage
 
+### Syncing
+
+`mindmark sync` reads bookmarks directly from your browser data directories. It's **incremental** — only new or changed bookmarks are re-embedded, making re-syncs near-instant.
+
+```bash
+mindmark sync                         # sync all detected browsers
+mindmark sync --browser chrome        # sync only Chrome
+mindmark sync --browser firefox       # sync only Firefox
+mindmark sync --list-browsers         # list detected browsers and profiles
+```
+
+When you add new bookmarks in your browser, just run `mindmark sync` again — it will pick up only the changes.
+
+> 💡 **Note:** If you change the embedding model with `--model`, all bookmarks will be re-embedded on the next sync. Browser names are case-insensitive (e.g., `--browser Chrome` and `--browser chrome` both work).
+
 ### Filters
 
 Narrow down results without changing your query:
@@ -207,42 +276,47 @@ mindmark find "useful tools" -k 20                    # return top 20 instead of
 
 ### Re-indexing
 
-Just rerun `mindmark index <file>`. It clears and rebuilds the index. The model is cached, so re-indexing 800+ bookmarks takes only seconds.
+For the `sync` workflow, just rerun `mindmark sync`. It's incremental — only changed bookmarks are re-embedded.
+
+For the `index` workflow, rerun `mindmark index <file>`. It clears and rebuilds the index. The model is cached, so re-indexing 800+ bookmarks takes only seconds.
 
 ### Swap the embedding model
 
 ```bash
-mindmark index bookmarks.html --model BAAI/bge-small-en-v1.5              # default, 384-dim
-mindmark index bookmarks.html --model sentence-transformers/all-MiniLM-L6-v2
-mindmark index bookmarks.html --model BAAI/bge-base-en-v1.5               # 768-dim, higher quality
+mindmark sync --model BAAI/bge-small-en-v1.5                # default, 384-dim
+mindmark sync --model sentence-transformers/all-MiniLM-L6-v2
+mindmark sync --model BAAI/bge-base-en-v1.5                 # 768-dim, higher quality
 ```
 
-Switching models triggers a full re-embed automatically. See the [fastembed supported models list](https://qdrant.github.io/fastembed/examples/Supported_Models/).
+The `--model` flag also works with `mindmark index`. Switching models triggers a full re-embed automatically. See the [fastembed supported models list](https://qdrant.github.io/fastembed/examples/Supported_Models/).
 
 ---
 
 ## 🧠 How It Works
 
 ```
-Bookmarks HTML                                  "python async tutorial"
-      │                                                  │
-      ▼                                                  ▼
-  ┌────────┐    ┌──────────┐    ┌──────────┐     ┌──────────┐
-  │ Parse  │───▶│  Embed   │───▶│  Store   │     │  Embed   │
-  │  HTML  │    │ (ONNX)   │    │ (SQLite) │◀────│  query   │
-  └────────┘    └──────────┘    └──────────┘     └──────────┘
-                                      │                │
-                                      ▼                ▼
-                                ┌──────────────────────────┐
-                                │  Dot-product similarity  │
-                                │   → top-K results        │
+Browser data files                              "python async tutorial"
+(Chrome JSON / Firefox SQLite)                            │
+       │                                                  │
+       ▼                                                  ▼
+  ┌────────────┐  ┌──────────┐  ┌──────────┐     ┌──────────┐
+  │  Detect &  │─▶│  Embed   │─▶│  Store   │     │  Embed   │
+  │   Parse    │  │ (ONNX)   │  │ (SQLite) │◀────│  query   │
+  └────────────┘  └──────────┘  └──────────┘     └──────────┘
+                      ▲               │                │
+                      │               ▼                ▼
+                 only new/      ┌──────────────────────────┐
+                 changed        │  Dot-product similarity  │
+                 bookmarks      │   → top-K results        │
                                 └──────────────────────────┘
 ```
 
-1. **Parse** — A stateful tokenizer reads the Netscape bookmarks HTML and extracts every link with its full folder path.
-2. **Embed** — Each bookmark becomes a rich text string (`title | folder | domain | path`) and is passed through a BGE/MiniLM ONNX model. Vectors are L2-normalized.
-3. **Store** — Vectors live as `float32` blobs in a single SQLite file. For 800–10,000 bookmarks this is simpler than a vector DB and still sub-millisecond.
-4. **Search** — Encode the query, compute dot products against all vectors, return the top-K.
+1. **Detect** — Auto-discover installed browsers (Chrome, Edge, Brave, Firefox) and their profiles across macOS, Linux, and Windows.
+2. **Parse** — Read bookmark files natively: Chromium JSON format or Firefox `places.sqlite`. No export step needed.
+3. **Diff** — Hash each bookmark's content and compare against the existing index. Only new or changed bookmarks proceed to embedding.
+4. **Embed** — Each bookmark becomes a rich text string (`title | folder | domain | path`) and is passed through a BGE/MiniLM ONNX model. Vectors are L2-normalized.
+5. **Store** — Vectors live as `float32` blobs in a single SQLite file. A `bookmark_sources` table tracks which browser contributed each bookmark, so multi-browser syncs don't conflict.
+6. **Search** — Encode the query, compute dot products against all vectors, return the top-K.
 
 ---
 
@@ -357,6 +431,15 @@ ENTRYPOINT ["mindmark"]
 
 ```bash
 docker build -t mindmark .
+
+# Sync from browser bookmarks (mount browser data directories)
+# Note: browser data paths vary — this example is for macOS Chrome
+docker run --rm \
+    -v $HOME/.mindmark:/root/.mindmark \
+    -v "$HOME/Library/Application Support/Google/Chrome":/chrome:ro \
+    mindmark sync
+
+# Or import from an exported HTML file
 docker run --rm -v $HOME/.mindmark:/root/.mindmark \
     -v $HOME/Downloads:/downloads mindmark \
     index /downloads/bookmarks.html
diff --git a/src/mindmark/browsers/__init__.py b/src/mindmark/browsers/__init__.py
new file mode 100644
index 0000000..af265a8
--- /dev/null
+++ b/src/mindmark/browsers/__init__.py
@@ -0,0 +1,56 @@
+"""Browser detection, path resolution, and bookmark parsing.
+
+Provides auto-detection of installed browsers and their bookmark files,
+with parsers that produce the same ``Bookmark`` dataclass used by the
+rest of mindmark.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+import json
+import sqlite3
+
+from ..parser import Bookmark
+from ..index import SyncResult
+from .paths import detect_browsers, BrowserProfile, SUPPORTED_BROWSERS
+
+
+def parse_browser_bookmarks(profile: BrowserProfile) -> list[Bookmark]:
+    """Parse bookmarks from a detected browser profile."""
+    if profile.browser_type == "chromium":
+        from .chromium import parse_chromium_json
+        return parse_chromium_json(profile.bookmark_path)
+    elif profile.browser_type == "firefox":
+        from .firefox import parse_firefox_places
+        return parse_firefox_places(profile.bookmark_path)
+    else:
+        raise ValueError(f"Unsupported browser type: {profile.browser_type}")
+
+
+def collect_all_bookmarks(
+    browser_filter: str | None = None,
+) -> list[tuple[BrowserProfile, list[Bookmark]]]:
+    """Detect browsers and parse bookmarks from all (or filtered) profiles.
+
+    Returns a list of (profile, bookmarks) pairs.
+    """
+    profiles = detect_browsers()
+    if browser_filter:
+        filt = browser_filter.lower()
+        profiles = [p for p in profiles if p.browser_name.lower() == filt]
+
+    results: list[tuple[BrowserProfile, list[Bookmark]]] = []
+    for profile in profiles:
+        try:
+            bookmarks = parse_browser_bookmarks(profile)
+            results.append((profile, bookmarks))
+        except (OSError, ValueError, KeyError, json.JSONDecodeError,
+                sqlite3.Error) as e:
+            import sys
+            print(
+                f"warning: failed to read {profile.browser_name} "
+                f"({profile.profile_name}): {e}",
+                file=sys.stderr,
+            )
+    return results
diff --git a/src/mindmark/browsers/chromium.py b/src/mindmark/browsers/chromium.py
new file mode 100644
index 0000000..6e037f2
--- /dev/null
+++ b/src/mindmark/browsers/chromium.py
@@ -0,0 +1,80 @@
+"""Parse Chromium-based browser bookmarks (Chrome, Edge, Brave).
+
+The ``Bookmarks`` file is JSON with this structure::
+
+    {
+      "roots": {
+        "bookmark_bar": { "children": [...] },
+        "other":        { "children": [...] },
+        "synced":       { "children": [...] }
+      }
+    }
+
+Each node is either a **folder** (``"type": "folder"``, has ``children``)
+or a **url** (``"type": "url"``, has ``url`` + ``name``).
+"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from ..parser import Bookmark
+
+
+def parse_chromium_json(path: Path) -> list[Bookmark]:
+    """Parse a Chromium ``Bookmarks`` JSON file into a list of Bookmark objects.
+
+    Deduplicates by URL (keeps the first occurrence).
+    """
+    path = Path(path)
+    with open(path, "r", encoding="utf-8", errors="replace") as f:
+        data = json.load(f)
+
+    roots = data.get("roots", {})
+    bookmarks: list[Bookmark] = []
+    seen: set[str] = set()
+
+    for root_name in ("bookmark_bar", "other", "synced"):
+        node = roots.get(root_name)
+        if node and isinstance(node, dict):
+            _walk(node, [], bookmarks, seen)
+
+    return bookmarks
+
+
+def _walk(
+    node: dict,
+    folder_stack: list[str],
+    out: list[Bookmark],
+    seen: set[str],
+) -> None:
+    """Recursively walk a Chromium bookmark tree node."""
+    node_type = node.get("type", "")
+
+    if node_type == "url":
+        url = node.get("url", "")
+        if not url or url in seen:
+            return
+        seen.add(url)
+
+        name = node.get("name", url)
+        try:
+            add_date_str = node.get("date_added", "0")
+            # Chromium stores dates as microseconds since 1601-01-01
+            add_date = int(add_date_str) if add_date_str else 0
+        except (ValueError, TypeError):
+            add_date = 0
+
+        out.append(Bookmark(
+            title=name,
+            url=url,
+            folder_path="/".join(folder_stack),
+            add_date=add_date,
+            icon=None,
+        ))
+
+    elif node_type == "folder":
+        folder_name = node.get("name", "Unnamed")
+        children = node.get("children", [])
+        for child in children:
+            _walk(child, folder_stack + [folder_name], out, seen)
diff --git a/src/mindmark/browsers/firefox.py b/src/mindmark/browsers/firefox.py
new file mode 100644
index 0000000..e2a77e6
--- /dev/null
+++ b/src/mindmark/browsers/firefox.py
@@ -0,0 +1,147 @@
+"""Parse Firefox bookmarks from ``places.sqlite``.
+
+Firefox stores bookmarks in an SQLite database.  The browser holds a lock
+on the file while running, so we **copy** it (including WAL/SHM files) to
+a temporary directory before reading.
+"""
+from __future__ import annotations
+
+import shutil
+import sqlite3
+import tempfile
+from pathlib import Path
+
+from ..parser import Bookmark
+
+# Firefox bookmark types (moz_bookmarks.type)
+_TYPE_BOOKMARK = 1
+_TYPE_FOLDER = 2
+
+# Built-in root folder IDs to skip as folder-path components
+_ROOT_IDS = {1, 2, 3, 4, 5, 6}  # root, menu, toolbar, tags, unfiled, mobile
+
+
+def parse_firefox_places(path: Path) -> list[Bookmark]:
+    """Parse bookmarks from a Firefox ``places.sqlite`` file.
+
+    Uses SQLite's backup API to create a consistent snapshot, which is
+    safer than filesystem copies when Firefox is running (especially on
+    Windows where file locking is stricter).
+    """
+    path = Path(path)
+    if not path.is_file():
+        raise FileNotFoundError(f"Firefox places.sqlite not found: {path}")
+
+    with tempfile.TemporaryDirectory(prefix="mindmark_ff_") as tmpdir:
+        dst = Path(tmpdir) / "places.sqlite"
+        try:
+            # SQLite backup API: creates a consistent snapshot even with WAL
+            src_con = sqlite3.connect(
+                path.resolve().as_uri() + "?mode=ro", uri=True
+            )
+            dst_con = sqlite3.connect(str(dst))
+            src_con.backup(dst_con)
+            src_con.close()
+            dst_con.close()
+        except (sqlite3.OperationalError, OSError):
+            # Fallback: filesystem copy if backup fails (e.g. locked by OS)
+            shutil.copy2(path, dst)
+            for suffix in ("-wal", "-shm"):
+                sidecar = path.parent / (path.name + suffix)
+                if sidecar.is_file():
+                    try:
+                        shutil.copy2(sidecar, Path(tmpdir) / (dst.name + suffix))
+                    except OSError:
+                        pass
+
+        return _read_places(dst)
+
+
+def _read_places(db_path: Path) -> list[Bookmark]:
+    """Read bookmarks from a copied places.sqlite."""
+    # Use Path.as_uri() for Windows-safe URI (handles drive letters, spaces)
+    uri = db_path.resolve().as_uri() + "?mode=ro"
+    con = sqlite3.connect(uri, uri=True)
+    con.row_factory = sqlite3.Row
+    try:
+        return _query_bookmarks(con)
+    finally:
+        con.close()
+
+
+def _build_folder_map(con: sqlite3.Connection) -> dict[int, str]:
+    """Build a mapping from folder id → full folder path string."""
+    cur = con.execute(
+        "SELECT id, parent, title, type FROM moz_bookmarks WHERE type = ?",
+        (_TYPE_FOLDER,),
+    )
+    folders: dict[int, tuple[int, str]] = {}
+    for row in cur:
+        fid = row["id"]
+        parent = row["parent"]
+        title = row["title"] or ""
+        folders[fid] = (parent, title)
+
+    # Resolve full paths by walking up parent chain
+    cache: dict[int, str] = {}
+
+    def resolve(fid: int) -> str:
+        if fid in cache:
+            return cache[fid]
+        if fid not in folders or fid in _ROOT_IDS:
+            cache[fid] = ""
+            return ""
+        parent_id, title = folders[fid]
+        parent_path = resolve(parent_id)
+        if parent_path:
+            full = f"{parent_path}/{title}" if title else parent_path
+        else:
+            full = title
+        cache[fid] = full
+        return full
+
+    return {fid: resolve(fid) for fid in folders}
+
+
+def _query_bookmarks(con: sqlite3.Connection) -> list[Bookmark]:
+    """Query bookmarks from a places.sqlite connection."""
+    folder_map = _build_folder_map(con)
+
+    cur = con.execute("""
+        SELECT b.id, b.title, b.parent, b.dateAdded,
+               p.url
+        FROM moz_bookmarks b
+        JOIN moz_places p ON b.fk = p.id
+        WHERE b.type = ?
+          AND p.url IS NOT NULL
+          AND p.url NOT LIKE 'place:%'
+    """, (_TYPE_BOOKMARK,))
+
+    seen: set[str] = set()
+    bookmarks: list[Bookmark] = []
+
+    for row in cur:
+        url = row["url"]
+        if not url or url in seen:
+            continue
+        seen.add(url)
+
+        title = row["title"] or url
+        parent_id = row["parent"]
+        folder_path = folder_map.get(parent_id, "")
+
+        try:
+            # Firefox stores dates as microseconds since epoch
+            add_date = int(row["dateAdded"] or 0)
+        except (ValueError, TypeError):
+            add_date = 0
+
+        bookmarks.append(Bookmark(
+            title=title,
+            url=url,
+            folder_path=folder_path,
+            add_date=add_date,
+            icon=None,
+        ))
+
+    return bookmarks
diff --git a/src/mindmark/browsers/paths.py b/src/mindmark/browsers/paths.py
new file mode 100644
index 0000000..4cf00ab
--- /dev/null
+++ b/src/mindmark/browsers/paths.py
@@ -0,0 +1,165 @@
+"""OS-specific browser bookmark path resolution and detection."""
+from __future__ import annotations
+
+import os
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass(frozen=True)
+class BrowserProfile:
+    """A detected browser profile with its bookmark file path."""
+    browser_name: str          # e.g. "Chrome", "Firefox"
+    browser_type: str          # "chromium" or "firefox"
+    profile_name: str          # e.g. "Default", "Profile 1", "default-release"
+    bookmark_path: Path        # full path to the bookmark file
+    source_id: str = ""        # unique id like "chrome:Default"
+
+    def __post_init__(self):
+        if not self.source_id:
+            sid = f"{self.browser_name.lower()}:{self.profile_name}"
+            object.__setattr__(self, "source_id", sid)
+
+
+# ---------------------------------------------------------------------------
+# Browser path definitions per OS
+# ---------------------------------------------------------------------------
+
+def _home() -> Path:
+    return Path.home()
+
+
+def _local_app_data() -> Path:
+    """Windows %LOCALAPPDATA%."""
+    val = os.environ.get("LOCALAPPDATA")
+    return Path(val) if val else Path.home() / "AppData" / "Local"
+
+
+def _app_data() -> Path:
+    """Windows %APPDATA%."""
+    val = os.environ.get("APPDATA")
+    return Path(val) if val else Path.home() / "AppData" / "Roaming"
+
+
+# Each entry: (browser_name, browser_type, path_parts_tuple)
+# Path parts are joined with Path.joinpath() — no OS-specific separators.
+_CHROMIUM_BOOKMARK_FILE = "Bookmarks"
+
+_BROWSER_DEFS: dict[str, list[tuple[str, str, tuple[str, ...]]]] = {
+    "darwin": [
+        ("Chrome", "chromium",
+         ("Library", "Application Support", "Google", "Chrome")),
+        ("Edge", "chromium",
+         ("Library", "Application Support", "Microsoft Edge")),
+        ("Brave", "chromium",
+         ("Library", "Application Support", "BraveSoftware", "Brave-Browser")),
+        ("Firefox", "firefox",
+         ("Library", "Application Support", "Firefox", "Profiles")),
+    ],
+    "linux": [
+        ("Chrome", "chromium", (".config", "google-chrome")),
+        ("Edge", "chromium", (".config", "microsoft-edge")),
+        ("Brave", "chromium", (".config", "BraveSoftware", "Brave-Browser")),
+        ("Firefox", "firefox", (".mozilla", "firefox")),
+    ],
+    "win32": [
+        ("Chrome", "chromium", ("Google", "Chrome", "User Data")),
+        ("Edge", "chromium", ("Microsoft", "Edge", "User Data")),
+        ("Brave", "chromium", ("BraveSoftware", "Brave-Browser", "User Data")),
+        ("Firefox", "firefox", ()),  # handled specially
+    ],
+}
+
+
+SUPPORTED_BROWSERS = ["chrome", "edge", "brave", "firefox"]
+
+
+def _chromium_base(path_parts: tuple[str, ...]) -> Path | None:
+    """Resolve the Chromium base directory for the current OS."""
+    if sys.platform == "win32":
+        base = _local_app_data().joinpath(*path_parts)
+    else:
+        base = _home().joinpath(*path_parts)
+    return base if base.is_dir() else None
+
+
+def _firefox_base(path_parts: tuple[str, ...]) -> Path | None:
+    """Resolve the Firefox profiles directory for the current OS."""
+    if sys.platform == "win32":
+        base = _app_data() / "Mozilla" / "Firefox" / "Profiles"
+    else:
+        base = _home().joinpath(*path_parts) if path_parts else None
+    return base if base and base.is_dir() else None
+
+
+def _discover_chromium_profiles(base: Path) -> list[BrowserProfile]:
+    """Find all Chromium profiles in a browser's data directory."""
+    profiles: list[BrowserProfile] = []
+    # Chromium profile dirs: "Default", "Profile 1", "Profile 2", etc.
+    candidates = sorted(base.iterdir()) if base.is_dir() else []
+    for d in candidates:
+        if not d.is_dir():
+            continue
+        bookmark_file = d / _CHROMIUM_BOOKMARK_FILE
+        if bookmark_file.is_file():
+            profiles.append(BrowserProfile(
+                browser_name="",  # filled by caller
+                browser_type="chromium",
+                profile_name=d.name,
+                bookmark_path=bookmark_file,
+            ))
+    return profiles
+
+
+def _discover_firefox_profiles(base: Path) -> list[BrowserProfile]:
+    """Find all Firefox profiles in the profiles directory."""
+    profiles: list[BrowserProfile] = []
+    if not base.is_dir():
+        return profiles
+    for d in sorted(base.iterdir()):
+        if not d.is_dir():
+            continue
+        places = d / "places.sqlite"
+        if places.is_file():
+            profiles.append(BrowserProfile(
+                browser_name="Firefox",
+                browser_type="firefox",
+                profile_name=d.name,
+                bookmark_path=places,
+            ))
+    return profiles
+
+
+def detect_browsers() -> list[BrowserProfile]:
+    """Auto-detect installed browsers and their profiles.
+
+    Returns a list of ``BrowserProfile`` instances for every discovered
+    profile that contains a bookmark file.
+    """
+    platform = sys.platform
+    if platform.startswith("linux"):
+        platform = "linux"
+
+    defs = _BROWSER_DEFS.get(platform, [])
+    found: list[BrowserProfile] = []
+
+    for browser_name, browser_type, path_parts in defs:
+        if browser_type == "chromium":
+            base = _chromium_base(path_parts)
+            if base is None:
+                continue
+            for p in _discover_chromium_profiles(base):
+                found.append(BrowserProfile(
+                    browser_name=browser_name,
+                    browser_type=p.browser_type,
+                    profile_name=p.profile_name,
+                    bookmark_path=p.bookmark_path,
+                ))
+        elif browser_type == "firefox":
+            base = _firefox_base(path_parts)
+            if base is None:
+                continue
+            found.extend(_discover_firefox_profiles(base))
+
+    return found
diff --git a/src/mindmark/cli.py b/src/mindmark/cli.py
index 9df3251..79aeee5 100644
--- a/src/mindmark/cli.py
+++ b/src/mindmark/cli.py
@@ -9,7 +9,7 @@
 
 from . import __version__
 from .parser import parse_file
-from .index import Index, default_db_path, DEFAULT_MODEL
+from .index import Index, SyncResult, default_db_path, DEFAULT_MODEL
 
 
 def _cmd_index(args):
@@ -28,14 +28,25 @@ def _cmd_index(args):
     return 0
 
 
+def _auto_sync_hint(idx: Index) -> None:
+    """Print a hint when the index is empty."""
+    if not idx.is_empty():
+        return
+    print("index is empty — run 'mindmark sync' to import bookmarks from your browsers,")
+    print("or run 'mindmark index <bookmarks.html>' to import from an exported file.")
+    print()
+
+
 def _cmd_find(args):
     idx = Index(db_path=args.db)
+    if not getattr(args, 'json', False):
+        _auto_sync_hint(idx)
     results = idx.search(
         query=args.query, k=args.top,
         domain=args.domain, folder=args.folder,
     )
     if not results:
-        print("no results (is the index empty? run: mindmark index <path-to-bookmarks.html>)")
+        print("no results (is the index empty? run: mindmark sync)")
         return 1
 
     if args.open is not None:
@@ -79,6 +90,7 @@ def _cmd_stats(args):
 
 def _cmd_open(args):
     idx = Index(db_path=args.db)
+    _auto_sync_hint(idx)
     results = idx.search(args.query, k=1)
     if not results:
         print("no results")
@@ -88,6 +100,49 @@ def _cmd_open(args):
     return 0
 
 
+def _cmd_sync(args):
+    from .browsers import collect_all_bookmarks, detect_browsers
+
+    if args.list_browsers:
+        profiles = detect_browsers()
+        if not profiles:
+            print("no supported browsers detected")
+            return 1
+        print(f"{'Browser':<12} {'Profile':<24} Path")
+        print(f"{'-------':<12} {'-------':<24} ----")
+        for p in profiles:
+            print(f"{p.browser_name:<12} {p.profile_name:<24} {p.bookmark_path}")
+        return 0
+
+    print("detecting browsers...")
+    pairs = collect_all_bookmarks(browser_filter=args.browser)
+
+    if not pairs:
+        if args.browser:
+            print(f"no bookmarks found for browser: {args.browser}", file=sys.stderr)
+        else:
+            print("no supported browsers detected", file=sys.stderr)
+        return 1
+
+    idx = Index(db_path=args.db, model_name=args.model)
+    total_result = SyncResult()
+
+    for profile, bookmarks in pairs:
+        source_id = profile.source_id
+        print(f"syncing {profile.browser_name} ({profile.profile_name}): "
+              f"{len(bookmarks)} bookmarks...")
+        result = idx.sync(bookmarks, source=source_id, batch_size=args.batch_size)
+        total_result.added += result.added
+        total_result.updated += result.updated
+        total_result.removed += result.removed
+        total_result.unchanged += result.unchanged
+        if result.total_changed > 0:
+            print(f"  {result}")
+
+    print(f"\ndone. {total_result}")
+    return 0
+
+
 def build_parser():
     p = argparse.ArgumentParser(
         prog="mindmark",
@@ -123,6 +178,22 @@ def build_parser():
     po.add_argument("query")
     po.set_defaults(func=_cmd_open)
 
+    psync = sub.add_parser(
+        "sync",
+        help="sync bookmarks directly from installed browsers (no export needed)",
+    )
+    psync.add_argument(
+        "--browser", type=str, default=None,
+        help="sync only this browser (chrome, edge, brave, firefox)",
+    )
+    psync.add_argument(
+        "--list-browsers", action="store_true",
+        help="list detected browsers and profiles, then exit",
+    )
+    psync.add_argument("--model", default=DEFAULT_MODEL)
+    psync.add_argument("--batch-size", type=int, default=64)
+    psync.set_defaults(func=_cmd_sync)
+
     return p
 
 
diff --git a/src/mindmark/index.py b/src/mindmark/index.py
index fd48de6..50fd430 100644
--- a/src/mindmark/index.py
+++ b/src/mindmark/index.py
@@ -1,6 +1,7 @@
 """Embedding + SQLite-backed vector index for bookmarks."""
 from __future__ import annotations
 
+import hashlib
 import os
 import sqlite3
 from dataclasses import dataclass
@@ -12,6 +13,8 @@
 
 DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
 
+_SCHEMA_VERSION = 2
+
 
 def default_db_path() -> Path:
     env = os.environ.get("MINDMARK_HOME")
@@ -41,7 +44,14 @@ def default_db_path() -> Path:
     add_date INTEGER NOT NULL,
     icon TEXT,
     embedding BLOB NOT NULL,
-    dim INTEGER NOT NULL
+    dim INTEGER NOT NULL,
+    content_hash TEXT NOT NULL DEFAULT ''
+);
+CREATE TABLE IF NOT EXISTS bookmark_sources (
+    url TEXT NOT NULL,
+    source TEXT NOT NULL,
+    content_hash TEXT NOT NULL DEFAULT '',
+    PRIMARY KEY (url, source)
 );
 CREATE INDEX IF NOT EXISTS idx_bookmarks_domain ON bookmarks(domain);
 CREATE INDEX IF NOT EXISTS idx_bookmarks_folder ON bookmarks(folder_path);
@@ -51,9 +61,40 @@ def default_db_path() -> Path:
 def _connect(db_path: Path) -> sqlite3.Connection:
     con = sqlite3.connect(db_path)
     con.executescript(_SCHEMA)
+    _migrate(con)
     return con
 
 
+def _migrate(con: sqlite3.Connection) -> None:
+    """Run schema migrations for existing databases."""
+    cur = con.cursor()
+    cur.execute("SELECT value FROM meta WHERE key = 'schema_version'")
+    row = cur.fetchone()
+    version = int(row[0]) if row else 1
+
+    if version < 2:
+        # Add content_hash column if missing (pre-v2 databases)
+        cols = {r[1] for r in cur.execute("PRAGMA table_info(bookmarks)")}
+        if "content_hash" not in cols:
+            cur.execute(
+                "ALTER TABLE bookmarks ADD COLUMN content_hash TEXT NOT NULL DEFAULT ''"
+            )
+        # Create bookmark_sources table
+        cur.execute("""
+            CREATE TABLE IF NOT EXISTS bookmark_sources (
+                url TEXT NOT NULL,
+                source TEXT NOT NULL,
+                content_hash TEXT NOT NULL DEFAULT '',
+                PRIMARY KEY (url, source)
+            )
+        """)
+        cur.execute(
+            "INSERT OR REPLACE INTO meta(key, value) VALUES ('schema_version', ?)",
+            (str(_SCHEMA_VERSION),),
+        )
+        con.commit()
+
+
 def _vec_to_blob(v: np.ndarray) -> bytes:
     return v.astype(np.float32).tobytes()
 
@@ -88,6 +129,32 @@ def embed_one(self, text: str) -> np.ndarray:
         return self.embed([text])[0]
 
 
+def _content_hash(b: Bookmark) -> str:
+    """Hash the fields that affect embedding text."""
+    payload = f"{b.url}\0{b.title}\0{b.folder_path}\0{b.domain}"
+    return hashlib.sha256(payload.encode()).hexdigest()[:16]
+
+
+@dataclass
+class SyncResult:
+    """Result of an incremental sync operation."""
+    added: int = 0
+    updated: int = 0
+    removed: int = 0
+    unchanged: int = 0
+    source: str = ""
+
+    @property
+    def total_changed(self) -> int:
+        return self.added + self.updated + self.removed
+
+    def __str__(self) -> str:
+        return (
+            f"{self.added} new, {self.updated} updated, "
+            f"{self.removed} removed, {self.unchanged} unchanged"
+        )
+
+
 class Index:
     def __init__(self, db_path: Path | None = None, model_name: str = DEFAULT_MODEL):
         self.db_path = Path(db_path) if db_path else default_db_path()
@@ -95,9 +162,189 @@ def __init__(self, db_path: Path | None = None, model_name: str = DEFAULT_MODEL)
         self.con = _connect(self.db_path)
         self.embedder = Embedder(model_name=model_name)
 
+    def is_empty(self) -> bool:
+        cur = self.con.cursor()
+        cur.execute("SELECT COUNT(*) FROM bookmarks")
+        return cur.fetchone()[0] == 0
+
+    def _model_changed(self) -> bool:
+        """Check if the stored model differs from the current one."""
+        cur = self.con.cursor()
+        cur.execute("SELECT value FROM meta WHERE key = 'model'")
+        row = cur.fetchone()
+        if row is None:
+            return False  # no model stored yet
+        return row[0] != self.model_name
+
+    def sync(
+        self,
+        bookmarks: list[Bookmark],
+        source: str = "html",
+        batch_size: int = 64,
+    ) -> SyncResult:
+        """Incrementally sync bookmarks from a source.
+
+        Only embeds new/changed bookmarks.  Bookmarks removed from *this*
+        source are deleted from the index only if no other source
+        references them.
+        """
+        result = SyncResult(source=source)
+
+        if not bookmarks:
+            # Delete all bookmarks from this source
+            removed_urls = self._remove_source(source)
+            result.removed = len(removed_urls)
+            return result
+
+        # If the embedding model changed, force full re-embed
+        force_reembed = self._model_changed()
+
+        # 1. Hash incoming bookmarks
+        new_map: dict[str, tuple[Bookmark, str]] = {}
+        for b in bookmarks:
+            h = _content_hash(b)
+            if b.url not in new_map:  # dedup by URL
+                new_map[b.url] = (b, h)
+
+        # 2. Load existing hashes for this source
+        cur = self.con.cursor()
+        cur.execute(
+            "SELECT url, content_hash FROM bookmark_sources WHERE source = ?",
+            (source,),
+        )
+        existing: dict[str, str] = {r[0]: r[1] for r in cur.fetchall()}
+
+        # 3. Compute diff
+        new_urls = set(new_map.keys())
+        old_urls = set(existing.keys())
+
+        to_add_urls = new_urls - old_urls
+        to_delete_urls = old_urls - new_urls
+        common_urls = new_urls & old_urls
+
+        to_update_urls: set[str] = set()
+        for url in common_urls:
+            _, new_hash = new_map[url]
+            if force_reembed or new_hash != existing[url]:
+                to_update_urls.add(url)
+
+        result.unchanged = len(common_urls) - len(to_update_urls)
+        result.added = len(to_add_urls)
+        result.updated = len(to_update_urls)
+
+        # 4. Embed only what changed
+        to_embed_urls = to_add_urls | to_update_urls
+        embed_list = [new_map[u] for u in to_embed_urls]
+
+        embedded: dict[str, tuple[Bookmark, str, bytes, int]] = {}
+        for start in range(0, len(embed_list), batch_size):
+            chunk = embed_list[start:start + batch_size]
+            texts = [b.embedding_text() for b, _h in chunk]
+            vecs = self.embedder.embed(texts)
+            for (b, h), v in zip(chunk, vecs):
+                embedded[b.url] = (b, h, _vec_to_blob(v), int(v.shape[0]))
+
+        # 5. Apply all DB changes in a single transaction
+        cur = self.con.cursor()
+        try:
+            # Update model metadata
+            cur.execute(
+                "INSERT OR REPLACE INTO meta(key, value) VALUES ('model', ?)",
+                (self.model_name,),
+            )
+
+            # Insert/update bookmarks and sources
+            for url in to_add_urls:
+                b, h, vec_blob, dim = embedded[url]
+                # Check if URL exists from another source
+                cur.execute("SELECT 1 FROM bookmarks WHERE url = ?", (url,))
+                if cur.fetchone():
+                    # URL already indexed by another source — update metadata
+                    cur.execute(
+                        "UPDATE bookmarks SET title=?, folder_path=?, domain=?, "
+                        "add_date=?, icon=?, embedding=?, dim=?, content_hash=? "
+                        "WHERE url=?",
+                        (b.title, b.folder_path, b.domain, b.add_date,
+                         b.icon, vec_blob, dim, h, url),
+                    )
+                else:
+                    cur.execute(
+                        "INSERT INTO bookmarks "
+                        "(url, title, folder_path, domain, add_date, icon, "
+                        "embedding, dim, content_hash) VALUES (?,?,?,?,?,?,?,?,?)",
+                        (url, b.title, b.folder_path, b.domain, b.add_date,
+                         b.icon, vec_blob, dim, h),
+                    )
+                cur.execute(
+                    "INSERT OR REPLACE INTO bookmark_sources (url, source, content_hash) "
+                    "VALUES (?, ?, ?)",
+                    (url, source, h),
+                )
+
+            for url in to_update_urls:
+                b, h, vec_blob, dim = embedded[url]
+                cur.execute(
+                    "UPDATE bookmarks SET title=?, folder_path=?, domain=?, "
+                    "add_date=?, icon=?, embedding=?, dim=?, content_hash=? "
+                    "WHERE url=?",
+                    (b.title, b.folder_path, b.domain, b.add_date,
+                     b.icon, vec_blob, dim, h, url),
+                )
+                cur.execute(
+                    "UPDATE bookmark_sources SET content_hash=? "
+                    "WHERE url=? AND source=?",
+                    (h, url, source),
+                )
+
+            # Delete bookmarks removed from this source
+            for url in to_delete_urls:
+                cur.execute(
+                    "DELETE FROM bookmark_sources WHERE url=? AND source=?",
+                    (url, source),
+                )
+                # Only delete from bookmarks if no other source references it
+                cur.execute(
+                    "SELECT COUNT(*) FROM bookmark_sources WHERE url=?", (url,)
+                )
+                if cur.fetchone()[0] == 0:
+                    cur.execute("DELETE FROM bookmarks WHERE url=?", (url,))
+
+            result.removed = len(to_delete_urls)
+            self.con.commit()
+        except Exception:
+            self.con.rollback()
+            raise
+
+        return result
+
+    def _remove_source(self, source: str) -> list[str]:
+        """Remove all bookmarks from a source, cleaning up orphans."""
+        cur = self.con.cursor()
+        cur.execute(
+            "SELECT url FROM bookmark_sources WHERE source = ?", (source,)
+        )
+        urls = [r[0] for r in cur.fetchall()]
+        try:
+            for url in urls:
+                cur.execute(
+                    "DELETE FROM bookmark_sources WHERE url=? AND source=?",
+                    (url, source),
+                )
+                cur.execute(
+                    "SELECT COUNT(*) FROM bookmark_sources WHERE url=?", (url,)
+                )
+                if cur.fetchone()[0] == 0:
+                    cur.execute("DELETE FROM bookmarks WHERE url=?", (url,))
+            self.con.commit()
+        except Exception:
+            self.con.rollback()
+            raise
+        return urls
+
     def rebuild(self, bookmarks: list[Bookmark], batch_size: int = 64) -> dict:
         cur = self.con.cursor()
         cur.execute("DELETE FROM bookmarks")
+        cur.execute("DELETE FROM bookmark_sources")
         cur.execute("INSERT OR REPLACE INTO meta(key, value) VALUES ('model', ?)", (self.model_name,))
         self.con.commit()
 
@@ -106,24 +353,32 @@ def rebuild(self, bookmarks: list[Bookmark], batch_size: int = 64) -> dict:
             return {"indexed": 0, "model": self.model_name, "dim": 0}
 
         rows = []
+        source_rows = []
         for start in range(0, total, batch_size):
             chunk = bookmarks[start:start + batch_size]
             texts = [b.embedding_text() for b in chunk]
             vecs = self.embedder.embed(texts)
             for b, v in zip(chunk, vecs):
+                h = _content_hash(b)
                 rows.append((
                     b.url, b.title, b.folder_path, b.domain,
-                    b.add_date, b.icon, _vec_to_blob(v), int(v.shape[0]),
+                    b.add_date, b.icon, _vec_to_blob(v), int(v.shape[0]), h,
                 ))
+                source_rows.append((b.url, "html", h))
 
         cur.executemany(
             "INSERT OR REPLACE INTO bookmarks "
-            "(url, title, folder_path, domain, add_date, icon, embedding, dim) "
-            "VALUES (?,?,?,?,?,?,?,?)",
+            "(url, title, folder_path, domain, add_date, icon, embedding, dim, content_hash) "
+            "VALUES (?,?,?,?,?,?,?,?,?)",
             rows,
         )
+        cur.executemany(
+            "INSERT OR REPLACE INTO bookmark_sources (url, source, content_hash) "
+            "VALUES (?,?,?)",
+            source_rows,
+        )
         self.con.commit()
-        return {"indexed": total, "model": self.model_name, "dim": rows[0][-1]}
+        return {"indexed": total, "model": self.model_name, "dim": rows[0][-2]}
 
     def stats(self) -> dict:
         cur = self.con.cursor()
diff --git a/tests/test_browser_detection.py b/tests/test_browser_detection.py
new file mode 100644
index 0000000..f61a189
--- /dev/null
+++ b/tests/test_browser_detection.py
@@ -0,0 +1,88 @@
+"""Tests for browser detection and path resolution."""
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+from mindmark.browsers.paths import detect_browsers, BrowserProfile
+
+
+def test_browser_profile_source_id():
+    p = BrowserProfile(
+        browser_name="Chrome",
+        browser_type="chromium",
+        profile_name="Default",
+        bookmark_path=Path("/fake/path"),
+    )
+    assert p.source_id == "chrome:Default"
+
+
+def test_browser_profile_custom_source_id():
+    p = BrowserProfile(
+        browser_name="Chrome",
+        browser_type="chromium",
+        profile_name="Default",
+        bookmark_path=Path("/fake/path"),
+        source_id="custom:id",
+    )
+    assert p.source_id == "custom:id"
+
+
+def test_detect_browsers_returns_list(tmp_path):
+    """detect_browsers should return a list (possibly empty) on any platform."""
+    # With a fake home, no browsers should be detected
+    with patch("mindmark.browsers.paths._home", return_value=tmp_path):
+        with patch("mindmark.browsers.paths._local_app_data", return_value=tmp_path / "Local"):
+            with patch("mindmark.browsers.paths._app_data", return_value=tmp_path / "Roaming"):
+                profiles = detect_browsers()
+    assert isinstance(profiles, list)
+
+
+def test_detect_chromium_with_fake_profile(tmp_path):
+    """Simulate a Chrome installation with a Default profile."""
+    if sys.platform == "darwin":
+        chrome_dir = tmp_path / "Library" / "Application Support" / "Google" / "Chrome"
+    elif sys.platform.startswith("linux"):
+        chrome_dir = tmp_path / ".config" / "google-chrome"
+    else:
+        chrome_dir = tmp_path / "Google" / "Chrome" / "User Data"
+
+    default_profile = chrome_dir / "Default"
+    default_profile.mkdir(parents=True)
+    (default_profile / "Bookmarks").write_text('{"roots":{}}')
+
+    with patch("mindmark.browsers.paths._home", return_value=tmp_path):
+        with patch("mindmark.browsers.paths._local_app_data", return_value=tmp_path):
+            profiles = detect_browsers()
+
+    chrome_profiles = [p for p in profiles if p.browser_name == "Chrome"]
+    assert len(chrome_profiles) >= 1
+    assert chrome_profiles[0].profile_name == "Default"
+    assert chrome_profiles[0].browser_type == "chromium"
+
+
+def test_detect_firefox_with_fake_profile(tmp_path):
+    """Simulate a Firefox installation with a profile."""
+    if sys.platform == "darwin":
+        ff_dir = tmp_path / "Library" / "Application Support" / "Firefox" / "Profiles"
+    elif sys.platform.startswith("linux"):
+        ff_dir = tmp_path / ".mozilla" / "firefox"
+    else:
+        ff_dir = tmp_path / "Roaming" / "Mozilla" / "Firefox" / "Profiles"
+
+    profile_dir = ff_dir / "abc12345.default-release"
+    profile_dir.mkdir(parents=True)
+    # Create a minimal places.sqlite
+    import sqlite3
+    db = profile_dir / "places.sqlite"
+    con = sqlite3.connect(db)
+    con.execute("CREATE TABLE moz_places (id INTEGER PRIMARY KEY, url TEXT)")
+    con.close()
+
+    with patch("mindmark.browsers.paths._home", return_value=tmp_path):
+        with patch("mindmark.browsers.paths._app_data", return_value=tmp_path / "Roaming"):
+            profiles = detect_browsers()
+
+    ff_profiles = [p for p in profiles if p.browser_name == "Firefox"]
+    assert len(ff_profiles) >= 1
+    assert ff_profiles[0].browser_type == "firefox"
+    assert "default-release" in ff_profiles[0].profile_name
diff --git a/tests/test_browsers_init.py b/tests/test_browsers_init.py
new file mode 100644
index 0000000..a04341b
--- /dev/null
+++ b/tests/test_browsers_init.py
@@ -0,0 +1,143 @@
+"""Tests for the browsers orchestration layer (__init__.py)."""
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+from mindmark.browsers import (
+    parse_browser_bookmarks,
+    collect_all_bookmarks,
+)
+from mindmark.browsers.paths import BrowserProfile
+
+
+def _make_chromium_profile(tmp_path: Path) -> BrowserProfile:
+    """Create a fake Chromium profile with a Bookmarks JSON file."""
+    bookmark_file = tmp_path / "Bookmarks"
+    data = {
+        "roots": {
+            "bookmark_bar": {
+                "children": [
+                    {"name": "Example", "type": "url", "url": "https://example.com"},
+                    {"name": "Test", "type": "url", "url": "https://test.com"},
+                ],
+                "name": "Bookmarks Bar",
+                "type": "folder",
+            },
+            "other": {"children": [], "name": "Other", "type": "folder"},
+            "synced": {"children": [], "name": "Synced", "type": "folder"},
+        }
+    }
+    bookmark_file.write_text(json.dumps(data))
+    return BrowserProfile(
+        browser_name="Chrome",
+        browser_type="chromium",
+        profile_name="Default",
+        bookmark_path=bookmark_file,
+    )
+
+
+def _make_firefox_profile(tmp_path: Path) -> BrowserProfile:
+    """Create a fake Firefox profile with a places.sqlite file."""
+    import sqlite3
+
+    db_path = tmp_path / "places.sqlite"
+    con = sqlite3.connect(db_path)
+    con.executescript("""
+        CREATE TABLE moz_places (id INTEGER PRIMARY KEY, url TEXT);
+        CREATE TABLE moz_bookmarks (
+            id INTEGER PRIMARY KEY, type INTEGER, fk INTEGER,
+            parent INTEGER, title TEXT, dateAdded INTEGER
+        );
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title) VALUES
+            (1, 2, NULL, 0, 'root'), (2, 2, NULL, 1, 'menu');
+        INSERT INTO moz_places (id, url) VALUES (1, 'https://firefox.example.com');
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title, dateAdded) VALUES
+            (100, 1, 1, 2, 'Firefox Example', 0);
+    """)
+    con.close()
+    return BrowserProfile(
+        browser_name="Firefox",
+        browser_type="firefox",
+        profile_name="default-release",
+        bookmark_path=db_path,
+    )
+
+
+def test_parse_browser_bookmarks_chromium(tmp_path):
+    profile = _make_chromium_profile(tmp_path)
+    bookmarks = parse_browser_bookmarks(profile)
+    assert len(bookmarks) == 2
+    urls = {b.url for b in bookmarks}
+    assert "https://example.com" in urls
+    assert "https://test.com" in urls
+
+
+def test_parse_browser_bookmarks_firefox(tmp_path):
+    profile = _make_firefox_profile(tmp_path)
+    bookmarks = parse_browser_bookmarks(profile)
+    assert len(bookmarks) == 1
+    assert bookmarks[0].url == "https://firefox.example.com"
+
+
+def test_parse_browser_bookmarks_unsupported():
+    profile = BrowserProfile(
+        browser_name="Safari",
+        browser_type="safari",
+        profile_name="Default",
+        bookmark_path=Path("/fake"),
+    )
+    try:
+        parse_browser_bookmarks(profile)
+        assert False, "Should have raised ValueError"
+    except ValueError as e:
+        assert "Unsupported" in str(e)
+
+
+def test_collect_all_bookmarks_with_filter(tmp_path):
+    chrome_dir = tmp_path / "chrome"
+    chrome_dir.mkdir()
+    chrome_profile = _make_chromium_profile(chrome_dir)
+
+    ff_dir = tmp_path / "firefox"
+    ff_dir.mkdir()
+    ff_profile = _make_firefox_profile(ff_dir)
+
+    fake_profiles = [chrome_profile, ff_profile]
+
+    with patch("mindmark.browsers.detect_browsers", return_value=fake_profiles):
+        # Filter to Chrome only
+        results = collect_all_bookmarks(browser_filter="Chrome")
+        assert len(results) == 1
+        assert results[0][0].browser_name == "Chrome"
+
+        # Filter to Firefox only
+        results = collect_all_bookmarks(browser_filter="firefox")
+        assert len(results) == 1
+        assert results[0][0].browser_name == "Firefox"
+
+        # No filter — gets all
+        results = collect_all_bookmarks(browser_filter=None)
+        assert len(results) == 2
+
+
+def test_collect_all_bookmarks_no_browsers():
+    with patch("mindmark.browsers.detect_browsers", return_value=[]):
+        results = collect_all_bookmarks()
+        assert results == []
+
+
+def test_collect_all_bookmarks_handles_parse_error(tmp_path, capsys):
+    """A broken profile should print a warning and not crash."""
+    bad_profile = BrowserProfile(
+        browser_name="Chrome",
+        browser_type="chromium",
+        profile_name="Corrupt",
+        bookmark_path=tmp_path / "nonexistent",
+    )
+    with patch("mindmark.browsers.detect_browsers", return_value=[bad_profile]):
+        results = collect_all_bookmarks()
+        assert results == []
+        captured = capsys.readouterr()
+        assert "warning" in captured.err
+        assert "Chrome" in captured.err
diff --git a/tests/test_chromium_parser.py b/tests/test_chromium_parser.py
new file mode 100644
index 0000000..fe6a709
--- /dev/null
+++ b/tests/test_chromium_parser.py
@@ -0,0 +1,135 @@
+"""Tests for the Chromium JSON bookmark parser."""
+import json
+import tempfile
+from pathlib import Path
+
+from mindmark.browsers.chromium import parse_chromium_json
+
+
+SAMPLE_CHROMIUM = {
+    "checksum": "abc123",
+    "roots": {
+        "bookmark_bar": {
+            "children": [
+                {
+                    "date_added": "13300000000000000",
+                    "name": "Python Docs",
+                    "type": "url",
+                    "url": "https://docs.python.org/3/",
+                },
+                {
+                    "children": [
+                        {
+                            "date_added": "13300000000000001",
+                            "name": "GitHub",
+                            "type": "url",
+                            "url": "https://github.com",
+                        },
+                        {
+                            "children": [
+                                {
+                                    "date_added": "13300000000000002",
+                                    "name": "Kusto Guide",
+                                    "type": "url",
+                                    "url": "https://eng.ms/docs/kusto",
+                                }
+                            ],
+                            "name": "Internal",
+                            "type": "folder",
+                        },
+                    ],
+                    "name": "Work",
+                    "type": "folder",
+                },
+            ],
+            "name": "Bookmarks Bar",
+            "type": "folder",
+        },
+        "other": {
+            "children": [
+                {
+                    "name": "Stack Overflow",
+                    "type": "url",
+                    "url": "https://stackoverflow.com",
+                }
+            ],
+            "name": "Other bookmarks",
+            "type": "folder",
+        },
+        "synced": {
+            "children": [],
+            "name": "Mobile bookmarks",
+            "type": "folder",
+        },
+    },
+    "version": 1,
+}
+
+
+def _write_json(data: dict) -> Path:
+    f = tempfile.NamedTemporaryFile(
+        mode="w", suffix=".json", delete=False, encoding="utf-8"
+    )
+    json.dump(data, f)
+    f.close()
+    return Path(f.name)
+
+
+def test_parses_urls_and_titles():
+    path = _write_json(SAMPLE_CHROMIUM)
+    bms = parse_chromium_json(path)
+    by_url = {b.url: b for b in bms}
+    assert "https://docs.python.org/3/" in by_url
+    assert by_url["https://docs.python.org/3/"].title == "Python Docs"
+    assert "https://github.com" in by_url
+    assert "https://stackoverflow.com" in by_url
+    path.unlink()
+
+
+def test_folder_paths():
+    path = _write_json(SAMPLE_CHROMIUM)
+    bms = parse_chromium_json(path)
+    by_url = {b.url: b for b in bms}
+    # Top-level bar bookmark
+    assert by_url["https://docs.python.org/3/"].folder_path == "Bookmarks Bar"
+    # Nested in Work
+    assert by_url["https://github.com"].folder_path == "Bookmarks Bar/Work"
+    # Nested in Work/Internal
+    assert by_url["https://eng.ms/docs/kusto"].folder_path == "Bookmarks Bar/Work/Internal"
+    # "Other" root
+    assert by_url["https://stackoverflow.com"].folder_path == "Other bookmarks"
+    path.unlink()
+
+
+def test_deduplicates_by_url():
+    data = json.loads(json.dumps(SAMPLE_CHROMIUM))
+    # Add a duplicate URL in "other"
+    data["roots"]["other"]["children"].append({
+        "name": "Python Docs Dup",
+        "type": "url",
+        "url": "https://docs.python.org/3/",
+    })
+    path = _write_json(data)
+    bms = parse_chromium_json(path)
+    python_urls = [b for b in bms if b.url == "https://docs.python.org/3/"]
+    assert len(python_urls) == 1
+    path.unlink()
+
+
+def test_empty_roots():
+    data = {"roots": {"bookmark_bar": {"children": [], "name": "Bar", "type": "folder"}}}
+    path = _write_json(data)
+    bms = parse_chromium_json(path)
+    assert bms == []
+    path.unlink()
+
+
+def test_embedding_text_contains_key_fields():
+    path = _write_json(SAMPLE_CHROMIUM)
+    bms = parse_chromium_json(path)
+    k = next(b for b in bms if "kusto" in b.url)
+    t = k.embedding_text()
+    assert "Kusto Guide" in t
+    assert "eng.ms" in t
+    assert "Work/Internal" in t
+    path.unlink()
diff --git a/tests/test_firefox_parser.py b/tests/test_firefox_parser.py
new file mode 100644
index 0000000..2e2feb9
--- /dev/null
+++ b/tests/test_firefox_parser.py
@@ -0,0 +1,137 @@
+"""Tests for the Firefox places.sqlite bookmark parser."""
+import sqlite3
+import tempfile
+from pathlib import Path
+
+from mindmark.browsers.firefox import parse_firefox_places
+
+
+def _create_places_db() -> Path:
+    """Create a minimal Firefox places.sqlite with test bookmarks."""
+    tmp = tempfile.NamedTemporaryFile(
+        suffix=".sqlite", delete=False, prefix="mindmark_test_ff_"
+    )
+    tmp.close()
+    db_path = Path(tmp.name)
+
+    con = sqlite3.connect(db_path)
+    con.executescript("""
+        CREATE TABLE moz_places (
+            id INTEGER PRIMARY KEY,
+            url TEXT
+        );
+        CREATE TABLE moz_bookmarks (
+            id INTEGER PRIMARY KEY,
+            type INTEGER,
+            fk INTEGER,
+            parent INTEGER,
+            title TEXT,
+            dateAdded INTEGER
+        );
+
+        -- Root folders (IDs 1-6 are built-in roots)
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title) VALUES
+            (1, 2, NULL, 0, 'root'),
+            (2, 2, NULL, 1, 'menu'),
+            (3, 2, NULL, 1, 'toolbar'),
+            (4, 2, NULL, 1, 'tags'),
+            (5, 2, NULL, 1, 'unfiled'),
+            (6, 2, NULL, 1, 'mobile');
+
+        -- User folders
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title) VALUES
+            (100, 2, NULL, 3, 'Work'),
+            (101, 2, NULL, 100, 'Internal');
+
+        -- Places (URLs)
+        INSERT INTO moz_places (id, url) VALUES
+            (1, 'https://docs.python.org/3/'),
+            (2, 'https://github.com'),
+            (3, 'https://eng.ms/docs/kusto'),
+            (4, 'https://stackoverflow.com');
+
+        -- Bookmarks referencing places
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title, dateAdded) VALUES
+            (200, 1, 1, 3, 'Python Docs', 1700000000000000),
+            (201, 1, 2, 100, 'GitHub', 1700000000000001),
+            (202, 1, 3, 101, 'Kusto Guide', 1700000000000002),
+            (203, 1, 4, 5, 'Stack Overflow', 1700000000000003);
+    """)
+    con.close()
+    return db_path
+
+
+def test_parses_urls_and_titles():
+    path = _create_places_db()
+    bms = parse_firefox_places(path)
+    by_url = {b.url: b for b in bms}
+    assert "https://docs.python.org/3/" in by_url
+    assert by_url["https://docs.python.org/3/"].title == "Python Docs"
+    assert "https://github.com" in by_url
+    assert "https://stackoverflow.com" in by_url
+    path.unlink()
+
+
+def test_folder_paths():
+    path = _create_places_db()
+    bms = parse_firefox_places(path)
+    by_url = {b.url: b for b in bms}
+    # toolbar > Work
+    assert by_url["https://github.com"].folder_path == "Work"
+    # toolbar > Work > Internal
+    assert by_url["https://eng.ms/docs/kusto"].folder_path == "Work/Internal"
+    path.unlink()
+
+
+def test_skips_place_urls():
+    """Firefox internal place: URLs should be excluded."""
+    tmp = tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False)
+    tmp.close()
+    db_path = Path(tmp.name)
+    con = sqlite3.connect(db_path)
+    con.executescript("""
+        CREATE TABLE moz_places (id INTEGER PRIMARY KEY, url TEXT);
+        CREATE TABLE moz_bookmarks (
+            id INTEGER PRIMARY KEY, type INTEGER, fk INTEGER,
+            parent INTEGER, title TEXT, dateAdded INTEGER
+        );
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title) VALUES
+            (1, 2, NULL, 0, 'root'),
+            (2, 2, NULL, 1, 'menu');
+        INSERT INTO moz_places (id, url) VALUES
+            (1, 'place:sort=8&maxResults=10'),
+            (2, 'https://example.com');
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title, dateAdded) VALUES
+            (100, 1, 1, 2, 'Recent Tags', 0),
+            (101, 1, 2, 2, 'Example', 0);
+    """)
+    con.close()
+    bms = parse_firefox_places(db_path)
+    urls = [b.url for b in bms]
+    assert "https://example.com" in urls
+    assert not any(u.startswith("place:") for u in urls)
+    db_path.unlink()
+
+
+def test_deduplicates_by_url():
+    tmp = tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False)
+    tmp.close()
+    db_path = Path(tmp.name)
+    con = sqlite3.connect(db_path)
+    con.executescript("""
+        CREATE TABLE moz_places (id INTEGER PRIMARY KEY, url TEXT);
+        CREATE TABLE moz_bookmarks (
+            id INTEGER PRIMARY KEY, type INTEGER, fk INTEGER,
+            parent INTEGER, title TEXT, dateAdded INTEGER
+        );
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title) VALUES
+            (1, 2, NULL, 0, 'root'), (2, 2, NULL, 1, 'menu');
+        INSERT INTO moz_places (id, url) VALUES (1, 'https://example.com');
+        INSERT INTO moz_bookmarks (id, type, fk, parent, title, dateAdded) VALUES
+            (100, 1, 1, 2, 'Example A', 0),
+            (101, 1, 1, 2, 'Example B', 0);
+    """)
+    con.close()
+    bms = parse_firefox_places(db_path)
+    assert len([b for b in bms if b.url == "https://example.com"]) == 1
+    db_path.unlink()
diff --git a/tests/test_incremental_sync.py b/tests/test_incremental_sync.py
new file mode 100644
index 0000000..66e9943
--- /dev/null
+++ b/tests/test_incremental_sync.py
@@ -0,0 +1,370 @@
+"""Tests for incremental sync logic in Index."""
+import sqlite3
+import tempfile
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import numpy as np
+
+from mindmark.parser import Bookmark
+from mindmark.index import Index, SyncResult, _content_hash
+
+
+def _make_bookmark(url: str, title: str = "T", folder: str = "") -> Bookmark:
+    return Bookmark(title=title, url=url, folder_path=folder, add_date=0, icon=None)
+
+
+def _make_index(tmp: Path) -> Index:
+    """Create an Index with a mock embedder to avoid loading the real model."""
+    idx = Index(db_path=tmp / "test.db")
+    # Replace embedder with a mock that returns deterministic vectors
+    mock_embedder = MagicMock()
+    dim = 4
+    def fake_embed(texts):
+        vecs = np.random.RandomState(42).randn(len(texts), dim).astype(np.float32)
+        norms = np.linalg.norm(vecs, axis=1, keepdims=True)
+        norms[norms == 0] = 1.0
+        return vecs / norms
+    mock_embedder.embed.side_effect = fake_embed
+    mock_embedder.embed_one.side_effect = lambda t: fake_embed([t])[0]
+    idx.embedder = mock_embedder
+    return idx
+
+
+def test_sync_adds_new_bookmarks():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [
+            _make_bookmark("https://a.com", "A"),
+            _make_bookmark("https://b.com", "B"),
+        ]
+        result = idx.sync(bms, source="chrome:Default")
+        assert result.added == 2
+        assert result.updated == 0
+        assert result.removed == 0
+        assert result.unchanged == 0
+        assert not idx.is_empty()
+
+
+def test_sync_unchanged_skips_embedding():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [_make_bookmark("https://a.com", "A")]
+        idx.sync(bms, source="test")
+
+        # Reset call count
+        idx.embedder.embed.reset_mock()
+
+        # Sync again with same data
+        result = idx.sync(bms, source="test")
+        assert result.added == 0
+        assert result.unchanged == 1
+        # embed should NOT be called for unchanged bookmarks
+        idx.embedder.embed.assert_not_called()
+
+
+def test_sync_updates_changed_bookmarks():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [_make_bookmark("https://a.com", "A", "Folder1")]
+        idx.sync(bms, source="test")
+
+        # Change the title
+        bms2 = [_make_bookmark("https://a.com", "A Updated", "Folder1")]
+        result = idx.sync(bms2, source="test")
+        assert result.updated == 1
+        assert result.added == 0
+        assert result.unchanged == 0
+
+        # Verify the title was updated in the DB
+        cur = idx.con.cursor()
+        cur.execute("SELECT title FROM bookmarks WHERE url = ?", ("https://a.com",))
+        assert cur.fetchone()[0] == "A Updated"
+
+
+def test_sync_removes_deleted_bookmarks():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [
+            _make_bookmark("https://a.com", "A"),
+            _make_bookmark("https://b.com", "B"),
+        ]
+        idx.sync(bms, source="test")
+
+        # Remove one bookmark
+        bms2 = [_make_bookmark("https://a.com", "A")]
+        result = idx.sync(bms2, source="test")
+        assert result.removed == 1
+        assert result.unchanged == 1
+
+        # Verify b.com is gone
+        cur = idx.con.cursor()
+        cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://b.com",))
+        assert cur.fetchone()[0] == 0
+
+
+def test_multi_source_no_cross_deletion():
+    """Syncing source A should not delete bookmarks from source B."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+
+        # Source A adds url X
+        bms_a = [_make_bookmark("https://shared.com", "Shared")]
+        idx.sync(bms_a, source="chrome:Default")
+
+        # Source B also adds url X
+        bms_b = [_make_bookmark("https://shared.com", "Shared")]
+        idx.sync(bms_b, source="firefox:default")
+
+        # Source A removes url X
+        result = idx.sync([], source="chrome:Default")
+        assert result.removed == 1  # removed from source A
+
+        # But the bookmark should still exist (source B still references it)
+        cur = idx.con.cursor()
+        cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://shared.com",))
+        assert cur.fetchone()[0] == 1
+
+        # Now remove from source B too
+        result = idx.sync([], source="firefox:default")
+        cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://shared.com",))
+        assert cur.fetchone()[0] == 0  # now truly gone
+
+
+def test_sync_result_str():
+    r = SyncResult(added=3, updated=1, removed=2, unchanged=10)
+    s = str(r)
+    assert "3 new" in s
+    assert "1 updated" in s
+    assert "2 removed" in s
+
+
+def test_content_hash_deterministic():
+    b = _make_bookmark("https://a.com", "A", "Work")
+    h1 = _content_hash(b)
+    h2 = _content_hash(b)
+    assert h1 == h2
+    assert len(h1) == 16  # truncated sha256
+
+
+def test_content_hash_changes_on_title_change():
+    b1 = _make_bookmark("https://a.com", "A", "Work")
+    b2 = _make_bookmark("https://a.com", "B", "Work")
+    assert _content_hash(b1) != _content_hash(b2)
+
+
+def test_schema_migration_on_old_db():
+    """Ensure opening a v1 database migrates cleanly."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = Path(tmpdir) / "old.db"
+        # Create a v1 database (no content_hash, no bookmark_sources)
+        con = sqlite3.connect(db_path)
+        con.executescript("""
+            CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
+            CREATE TABLE bookmarks (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                url TEXT UNIQUE NOT NULL,
+                title TEXT NOT NULL,
+                folder_path TEXT NOT NULL,
+                domain TEXT NOT NULL,
+                add_date INTEGER NOT NULL,
+                icon TEXT,
+                embedding BLOB NOT NULL,
+                dim INTEGER NOT NULL
+            );
+        """)
+        con.close()
+
+        # Opening with Index should trigger migration
+        idx = Index(db_path=db_path)
+        # Verify new columns/tables exist
+        cur = idx.con.cursor()
+        cols = {r[1] for r in cur.execute("PRAGMA table_info(bookmarks)")}
+        assert "content_hash" in cols
+
+        tables = {r[0] for r in cur.execute(
+            "SELECT name FROM sqlite_master WHERE type='table'"
+        )}
+        assert "bookmark_sources" in tables
+
+
+# ---- rebuild() tests ----
+
+def test_rebuild_populates_content_hash():
+    """rebuild() must set content_hash so sync() can do incremental diffs."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [_make_bookmark("https://a.com", "A", "Work")]
+        idx.rebuild(bms)
+
+        cur = idx.con.cursor()
+        cur.execute("SELECT content_hash FROM bookmarks WHERE url = ?", ("https://a.com",))
+        h = cur.fetchone()[0]
+        assert h and len(h) == 16  # non-empty, truncated sha256
+
+
+def test_rebuild_populates_bookmark_sources():
+    """rebuild() must populate bookmark_sources with source='html'."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [
+            _make_bookmark("https://a.com", "A"),
+            _make_bookmark("https://b.com", "B"),
+        ]
+        idx.rebuild(bms)
+
+        cur = idx.con.cursor()
+        cur.execute("SELECT url, source FROM bookmark_sources ORDER BY url")
+        rows = cur.fetchall()
+        assert len(rows) == 2
+        assert rows[0] == ("https://a.com", "html")
+        assert rows[1] == ("https://b.com", "html")
+
+
+def test_rebuild_clears_previous_data():
+    """rebuild() should clear old bookmarks and sources before inserting."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        idx.rebuild([_make_bookmark("https://old.com", "Old")])
+        idx.rebuild([_make_bookmark("https://new.com", "New")])
+
+        cur = idx.con.cursor()
+        cur.execute("SELECT COUNT(*) FROM bookmarks")
+        assert cur.fetchone()[0] == 1
+        cur.execute("SELECT url FROM bookmarks")
+        assert cur.fetchone()[0] == "https://new.com"
+        cur.execute("SELECT COUNT(*) FROM bookmark_sources")
+        assert cur.fetchone()[0] == 1
+
+
+def test_rebuild_empty_list():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        result = idx.rebuild([])
+        assert result["indexed"] == 0
+        assert idx.is_empty()
+
+
+def test_rebuild_then_sync_detects_unchanged():
+    """rebuild() followed by sync() with the same data should show all unchanged."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [_make_bookmark("https://a.com", "A", "Work")]
+        idx.rebuild(bms)
+
+        idx.embedder.embed.reset_mock()
+        result = idx.sync(bms, source="html")
+        assert result.unchanged == 1
+        assert result.added == 0
+        idx.embedder.embed.assert_not_called()
+
+
+# ---- stats() tests ----
+
+def test_stats_on_populated_index():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [
+            _make_bookmark("https://github.com/a", "Repo A", "Work"),
+            _make_bookmark("https://github.com/b", "Repo B", "Work"),
+            _make_bookmark("https://docs.python.org", "Python Docs", "Ref"),
+        ]
+        idx.rebuild(bms)
+
+        s = idx.stats()
+        assert s["total"] == 3
+        assert s["model"] is not None
+        assert str(idx.db_path) in s["db_path"]
+        # github.com should be top domain with count 2
+        domains = dict(s["top_domains"])
+        assert domains.get("github.com") == 2
+
+
+def test_stats_on_empty_index():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        s = idx.stats()
+        assert s["total"] == 0
+
+
+# ---- search() tests ----
+
+def test_search_returns_results():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [
+            _make_bookmark("https://a.com", "Alpha"),
+            _make_bookmark("https://b.com", "Beta"),
+        ]
+        idx.rebuild(bms)
+
+        results = idx.search("anything", k=10)
+        assert len(results) == 2
+        assert all("score" in r for r in results)
+        assert all("url" in r for r in results)
+
+
+def test_search_empty_index():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        results = idx.search("test")
+        assert results == []
+
+
+def test_search_domain_filter():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [
+            _make_bookmark("https://github.com/x", "GitHub"),
+            _make_bookmark("https://docs.python.org", "Docs"),
+        ]
+        idx.rebuild(bms)
+
+        results = idx.search("test", domain="github.com")
+        assert all("github.com" in r["domain"] for r in results)
+
+
+def test_search_folder_filter():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [
+            _make_bookmark("https://a.com", "A", "Work/Internal"),
+            _make_bookmark("https://b.com", "B", "Personal"),
+        ]
+        idx.rebuild(bms)
+
+        results = idx.search("test", folder="work")
+        assert all("work" in r["folder_path"].lower() for r in results)
+
+
+def test_search_k_limit():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [_make_bookmark(f"https://{i}.com", f"Site {i}") for i in range(20)]
+        idx.rebuild(bms)
+
+        results = idx.search("test", k=5)
+        assert len(results) == 5
+
+
+# ---- _remove_source() tests ----
+
+def test_remove_source_cleans_orphans():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        bms = [_make_bookmark("https://a.com", "A")]
+        idx.sync(bms, source="chrome:Default")
+
+        removed = idx._remove_source("chrome:Default")
+        assert len(removed) == 1
+        assert idx.is_empty()
+
+
+def test_remove_source_preserves_other_sources():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        idx = _make_index(Path(tmpdir))
+        idx.sync([_make_bookmark("https://a.com", "A")], source="chrome:Default")
+        idx.sync([_make_bookmark("https://a.com", "A")], source="firefox:default")
+
+        idx._remove_source("chrome:Default")
+        assert not idx.is_empty()  # firefox still references it

From 330f40dca59dc7ff1438b70d330dd9a1ddcef785 Mon Sep 17 00:00:00 2001
From: Sukanth Gunda <contact.sukanth@gmail.com>
Date: Sat, 18 Apr 2026 13:57:42 -0400
Subject: [PATCH 2/2] Add Index.close and use pytest fixture in tests

Add a close() method to Index to explicitly close the SQLite connection. Refactor tests to use pytest (import pytest) and a shared idx fixture that creates an Index via _make_index(tmp_path/"test.db") and ensures the DB is closed on teardown. Update _make_index signature to accept a db_path and convert many tests to use the idx fixture (removing tempfile usage and simplifying setup/teardown). Also ensure the migration test closes the index after assertions. These changes improve test hygiene and ensure DB connections are properly cleaned up.
---
 src/mindmark/index.py          |   4 +
 tests/test_incremental_sync.py | 469 +++++++++++++++------------------
 2 files changed, 223 insertions(+), 250 deletions(-)

diff --git a/src/mindmark/index.py b/src/mindmark/index.py
index 50fd430..5d0da1d 100644
--- a/src/mindmark/index.py
+++ b/src/mindmark/index.py
@@ -162,6 +162,10 @@ def __init__(self, db_path: Path | None = None, model_name: str = DEFAULT_MODEL)
         self.con = _connect(self.db_path)
         self.embedder = Embedder(model_name=model_name)
 
+    def close(self) -> None:
+        """Close the underlying database connection."""
+        self.con.close()
+
     def is_empty(self) -> bool:
         cur = self.con.cursor()
         cur.execute("SELECT COUNT(*) FROM bookmarks")
diff --git a/tests/test_incremental_sync.py b/tests/test_incremental_sync.py
index 66e9943..41e8f36 100644
--- a/tests/test_incremental_sync.py
+++ b/tests/test_incremental_sync.py
@@ -1,9 +1,9 @@
 """Tests for incremental sync logic in Index."""
 import sqlite3
-import tempfile
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 
+import pytest
 import numpy as np
 
 from mindmark.parser import Bookmark
@@ -14,10 +14,9 @@ def _make_bookmark(url: str, title: str = "T", folder: str = "") -> Bookmark:
     return Bookmark(title=title, url=url, folder_path=folder, add_date=0, icon=None)
 
 
-def _make_index(tmp: Path) -> Index:
+def _make_index(db_path: Path) -> Index:
     """Create an Index with a mock embedder to avoid loading the real model."""
-    idx = Index(db_path=tmp / "test.db")
-    # Replace embedder with a mock that returns deterministic vectors
+    idx = Index(db_path=db_path)
     mock_embedder = MagicMock()
     dim = 4
     def fake_embed(texts):
@@ -31,104 +30,101 @@ def fake_embed(texts):
     return idx
 
 
-def test_sync_adds_new_bookmarks():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [
-            _make_bookmark("https://a.com", "A"),
-            _make_bookmark("https://b.com", "B"),
-        ]
-        result = idx.sync(bms, source="chrome:Default")
-        assert result.added == 2
-        assert result.updated == 0
-        assert result.removed == 0
-        assert result.unchanged == 0
-        assert not idx.is_empty()
-
-
-def test_sync_unchanged_skips_embedding():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [_make_bookmark("https://a.com", "A")]
-        idx.sync(bms, source="test")
-
-        # Reset call count
-        idx.embedder.embed.reset_mock()
-
-        # Sync again with same data
-        result = idx.sync(bms, source="test")
-        assert result.added == 0
-        assert result.unchanged == 1
-        # embed should NOT be called for unchanged bookmarks
-        idx.embedder.embed.assert_not_called()
-
-
-def test_sync_updates_changed_bookmarks():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [_make_bookmark("https://a.com", "A", "Folder1")]
-        idx.sync(bms, source="test")
-
-        # Change the title
-        bms2 = [_make_bookmark("https://a.com", "A Updated", "Folder1")]
-        result = idx.sync(bms2, source="test")
-        assert result.updated == 1
-        assert result.added == 0
-        assert result.unchanged == 0
-
-        # Verify the title was updated in the DB
-        cur = idx.con.cursor()
-        cur.execute("SELECT title FROM bookmarks WHERE url = ?", ("https://a.com",))
-        assert cur.fetchone()[0] == "A Updated"
-
-
-def test_sync_removes_deleted_bookmarks():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [
-            _make_bookmark("https://a.com", "A"),
-            _make_bookmark("https://b.com", "B"),
-        ]
-        idx.sync(bms, source="test")
-
-        # Remove one bookmark
-        bms2 = [_make_bookmark("https://a.com", "A")]
-        result = idx.sync(bms2, source="test")
-        assert result.removed == 1
-        assert result.unchanged == 1
-
-        # Verify b.com is gone
-        cur = idx.con.cursor()
-        cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://b.com",))
-        assert cur.fetchone()[0] == 0
+@pytest.fixture
+def idx(tmp_path):
+    """Yield an Index with a mock embedder; close DB on teardown."""
+    index = _make_index(tmp_path / "test.db")
+    yield index
+    index.close()
 
 
-def test_multi_source_no_cross_deletion():
-    """Syncing source A should not delete bookmarks from source B."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
+def test_sync_adds_new_bookmarks(idx):
+    bms = [
+        _make_bookmark("https://a.com", "A"),
+        _make_bookmark("https://b.com", "B"),
+    ]
+    result = idx.sync(bms, source="chrome:Default")
+    assert result.added == 2
+    assert result.updated == 0
+    assert result.removed == 0
+    assert result.unchanged == 0
+    assert not idx.is_empty()
 
-        # Source A adds url X
-        bms_a = [_make_bookmark("https://shared.com", "Shared")]
-        idx.sync(bms_a, source="chrome:Default")
 
-        # Source B also adds url X
-        bms_b = [_make_bookmark("https://shared.com", "Shared")]
-        idx.sync(bms_b, source="firefox:default")
+def test_sync_unchanged_skips_embedding(idx):
+    bms = [_make_bookmark("https://a.com", "A")]
+    idx.sync(bms, source="test")
 
-        # Source A removes url X
-        result = idx.sync([], source="chrome:Default")
-        assert result.removed == 1  # removed from source A
+    # Reset call count
+    idx.embedder.embed.reset_mock()
 
-        # But the bookmark should still exist (source B still references it)
-        cur = idx.con.cursor()
-        cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://shared.com",))
-        assert cur.fetchone()[0] == 1
+    # Sync again with same data
+    result = idx.sync(bms, source="test")
+    assert result.added == 0
+    assert result.unchanged == 1
+    # embed should NOT be called for unchanged bookmarks
+    idx.embedder.embed.assert_not_called()
+
+
+def test_sync_updates_changed_bookmarks(idx):
+    bms = [_make_bookmark("https://a.com", "A", "Folder1")]
+    idx.sync(bms, source="test")
+
+    # Change the title
+    bms2 = [_make_bookmark("https://a.com", "A Updated", "Folder1")]
+    result = idx.sync(bms2, source="test")
+    assert result.updated == 1
+    assert result.added == 0
+    assert result.unchanged == 0
+
+    # Verify the title was updated in the DB
+    cur = idx.con.cursor()
+    cur.execute("SELECT title FROM bookmarks WHERE url = ?", ("https://a.com",))
+    assert cur.fetchone()[0] == "A Updated"
+
+
+def test_sync_removes_deleted_bookmarks(idx):
+    bms = [
+        _make_bookmark("https://a.com", "A"),
+        _make_bookmark("https://b.com", "B"),
+    ]
+    idx.sync(bms, source="test")
+
+    # Remove one bookmark
+    bms2 = [_make_bookmark("https://a.com", "A")]
+    result = idx.sync(bms2, source="test")
+    assert result.removed == 1
+    assert result.unchanged == 1
 
-        # Now remove from source B too
-        result = idx.sync([], source="firefox:default")
-        cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://shared.com",))
-        assert cur.fetchone()[0] == 0  # now truly gone
+    # Verify b.com is gone
+    cur = idx.con.cursor()
+    cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://b.com",))
+    assert cur.fetchone()[0] == 0
+
+
+def test_multi_source_no_cross_deletion(idx):
+    """Syncing source A should not delete bookmarks from source B."""
+    # Source A adds url X
+    bms_a = [_make_bookmark("https://shared.com", "Shared")]
+    idx.sync(bms_a, source="chrome:Default")
+
+    # Source B also adds url X
+    bms_b = [_make_bookmark("https://shared.com", "Shared")]
+    idx.sync(bms_b, source="firefox:default")
+
+    # Source A removes url X
+    result = idx.sync([], source="chrome:Default")
+    assert result.removed == 1  # removed from source A
+
+    # But the bookmark should still exist (source B still references it)
+    cur = idx.con.cursor()
+    cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://shared.com",))
+    assert cur.fetchone()[0] == 1
+
+    # Now remove from source B too
+    result = idx.sync([], source="firefox:default")
+    cur.execute("SELECT COUNT(*) FROM bookmarks WHERE url = ?", ("https://shared.com",))
+    assert cur.fetchone()[0] == 0  # now truly gone
 
 
 def test_sync_result_str():
@@ -153,31 +149,30 @@ def test_content_hash_changes_on_title_change():
     assert _content_hash(b1) != _content_hash(b2)
 
 
-def test_schema_migration_on_old_db():
+def test_schema_migration_on_old_db(tmp_path):
     """Ensure opening a v1 database migrates cleanly."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        db_path = Path(tmpdir) / "old.db"
-        # Create a v1 database (no content_hash, no bookmark_sources)
-        con = sqlite3.connect(db_path)
-        con.executescript("""
-            CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
-            CREATE TABLE bookmarks (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                url TEXT UNIQUE NOT NULL,
-                title TEXT NOT NULL,
-                folder_path TEXT NOT NULL,
-                domain TEXT NOT NULL,
-                add_date INTEGER NOT NULL,
-                icon TEXT,
-                embedding BLOB NOT NULL,
-                dim INTEGER NOT NULL
-            );
-        """)
-        con.close()
-
-        # Opening with Index should trigger migration
-        idx = Index(db_path=db_path)
-        # Verify new columns/tables exist
+    db_path = tmp_path / "old.db"
+    # Create a v1 database (no content_hash, no bookmark_sources)
+    con = sqlite3.connect(db_path)
+    con.executescript("""
+        CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
+        CREATE TABLE bookmarks (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            url TEXT UNIQUE NOT NULL,
+            title TEXT NOT NULL,
+            folder_path TEXT NOT NULL,
+            domain TEXT NOT NULL,
+            add_date INTEGER NOT NULL,
+            icon TEXT,
+            embedding BLOB NOT NULL,
+            dim INTEGER NOT NULL
+        );
+    """)
+    con.close()
+
+    # Opening with Index should trigger migration
+    idx = Index(db_path=db_path)
+    try:
         cur = idx.con.cursor()
         cols = {r[1] for r in cur.execute("PRAGMA table_info(bookmarks)")}
         assert "content_hash" in cols
@@ -186,185 +181,159 @@ def test_schema_migration_on_old_db():
             "SELECT name FROM sqlite_master WHERE type='table'"
         )}
         assert "bookmark_sources" in tables
+    finally:
+        idx.close()
 
 
 # ---- rebuild() tests ----
 
-def test_rebuild_populates_content_hash():
+def test_rebuild_populates_content_hash(idx):
     """rebuild() must set content_hash so sync() can do incremental diffs."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [_make_bookmark("https://a.com", "A", "Work")]
-        idx.rebuild(bms)
+    bms = [_make_bookmark("https://a.com", "A", "Work")]
+    idx.rebuild(bms)
 
-        cur = idx.con.cursor()
-        cur.execute("SELECT content_hash FROM bookmarks WHERE url = ?", ("https://a.com",))
-        h = cur.fetchone()[0]
-        assert h and len(h) == 16  # non-empty, truncated sha256
+    cur = idx.con.cursor()
+    cur.execute("SELECT content_hash FROM bookmarks WHERE url = ?", ("https://a.com",))
+    h = cur.fetchone()[0]
+    assert h and len(h) == 16  # non-empty, truncated sha256
 
 
-def test_rebuild_populates_bookmark_sources():
+def test_rebuild_populates_bookmark_sources(idx):
     """rebuild() must populate bookmark_sources with source='html'."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [
-            _make_bookmark("https://a.com", "A"),
-            _make_bookmark("https://b.com", "B"),
-        ]
-        idx.rebuild(bms)
+    bms = [
+        _make_bookmark("https://a.com", "A"),
+        _make_bookmark("https://b.com", "B"),
+    ]
+    idx.rebuild(bms)
 
-        cur = idx.con.cursor()
-        cur.execute("SELECT url, source FROM bookmark_sources ORDER BY url")
-        rows = cur.fetchall()
-        assert len(rows) == 2
-        assert rows[0] == ("https://a.com", "html")
-        assert rows[1] == ("https://b.com", "html")
+    cur = idx.con.cursor()
+    cur.execute("SELECT url, source FROM bookmark_sources ORDER BY url")
+    rows = cur.fetchall()
+    assert len(rows) == 2
+    assert rows[0] == ("https://a.com", "html")
+    assert rows[1] == ("https://b.com", "html")
 
 
-def test_rebuild_clears_previous_data():
+def test_rebuild_clears_previous_data(idx):
     """rebuild() should clear old bookmarks and sources before inserting."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        idx.rebuild([_make_bookmark("https://old.com", "Old")])
-        idx.rebuild([_make_bookmark("https://new.com", "New")])
+    idx.rebuild([_make_bookmark("https://old.com", "Old")])
+    idx.rebuild([_make_bookmark("https://new.com", "New")])
 
-        cur = idx.con.cursor()
-        cur.execute("SELECT COUNT(*) FROM bookmarks")
-        assert cur.fetchone()[0] == 1
-        cur.execute("SELECT url FROM bookmarks")
-        assert cur.fetchone()[0] == "https://new.com"
-        cur.execute("SELECT COUNT(*) FROM bookmark_sources")
-        assert cur.fetchone()[0] == 1
+    cur = idx.con.cursor()
+    cur.execute("SELECT COUNT(*) FROM bookmarks")
+    assert cur.fetchone()[0] == 1
+    cur.execute("SELECT url FROM bookmarks")
+    assert cur.fetchone()[0] == "https://new.com"
+    cur.execute("SELECT COUNT(*) FROM bookmark_sources")
+    assert cur.fetchone()[0] == 1
 
 
-def test_rebuild_empty_list():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        result = idx.rebuild([])
-        assert result["indexed"] == 0
-        assert idx.is_empty()
+def test_rebuild_empty_list(idx):
+    result = idx.rebuild([])
+    assert result["indexed"] == 0
+    assert idx.is_empty()
 
 
-def test_rebuild_then_sync_detects_unchanged():
+def test_rebuild_then_sync_detects_unchanged(idx):
     """rebuild() followed by sync() with the same data should show all unchanged."""
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [_make_bookmark("https://a.com", "A", "Work")]
-        idx.rebuild(bms)
+    bms = [_make_bookmark("https://a.com", "A", "Work")]
+    idx.rebuild(bms)
 
-        idx.embedder.embed.reset_mock()
-        result = idx.sync(bms, source="html")
-        assert result.unchanged == 1
-        assert result.added == 0
-        idx.embedder.embed.assert_not_called()
+    idx.embedder.embed.reset_mock()
+    result = idx.sync(bms, source="html")
+    assert result.unchanged == 1
+    assert result.added == 0
+    idx.embedder.embed.assert_not_called()
 
 
 # ---- stats() tests ----
 
-def test_stats_on_populated_index():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [
-            _make_bookmark("https://github.com/a", "Repo A", "Work"),
-            _make_bookmark("https://github.com/b", "Repo B", "Work"),
-            _make_bookmark("https://docs.python.org", "Python Docs", "Ref"),
-        ]
-        idx.rebuild(bms)
+def test_stats_on_populated_index(idx):
+    bms = [
+        _make_bookmark("https://github.com/a", "Repo A", "Work"),
+        _make_bookmark("https://github.com/b", "Repo B", "Work"),
+        _make_bookmark("https://docs.python.org", "Python Docs", "Ref"),
+    ]
+    idx.rebuild(bms)
 
-        s = idx.stats()
-        assert s["total"] == 3
-        assert s["model"] is not None
-        assert str(idx.db_path) in s["db_path"]
-        # github.com should be top domain with count 2
-        domains = dict(s["top_domains"])
-        assert domains.get("github.com") == 2
+    s = idx.stats()
+    assert s["total"] == 3
+    assert s["model"] is not None
+    assert str(idx.db_path) in s["db_path"]
+    # github.com should be top domain with count 2
+    domains = dict(s["top_domains"])
+    assert domains.get("github.com") == 2
 
 
-def test_stats_on_empty_index():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        s = idx.stats()
-        assert s["total"] == 0
+def test_stats_on_empty_index(idx):
+    s = idx.stats()
+    assert s["total"] == 0
 
 
 # ---- search() tests ----
 
-def test_search_returns_results():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [
-            _make_bookmark("https://a.com", "Alpha"),
-            _make_bookmark("https://b.com", "Beta"),
-        ]
-        idx.rebuild(bms)
+def test_search_returns_results(idx):
+    bms = [
+        _make_bookmark("https://a.com", "Alpha"),
+        _make_bookmark("https://b.com", "Beta"),
+    ]
+    idx.rebuild(bms)
 
-        results = idx.search("anything", k=10)
-        assert len(results) == 2
-        assert all("score" in r for r in results)
-        assert all("url" in r for r in results)
+    results = idx.search("anything", k=10)
+    assert len(results) == 2
+    assert all("score" in r for r in results)
+    assert all("url" in r for r in results)
 
 
-def test_search_empty_index():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        results = idx.search("test")
-        assert results == []
+def test_search_empty_index(idx):
+    results = idx.search("test")
+    assert results == []
 
 
-def test_search_domain_filter():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [
-            _make_bookmark("https://github.com/x", "GitHub"),
-            _make_bookmark("https://docs.python.org", "Docs"),
-        ]
-        idx.rebuild(bms)
+def test_search_domain_filter(idx):
+    bms = [
+        _make_bookmark("https://github.com/x", "GitHub"),
+        _make_bookmark("https://docs.python.org", "Docs"),
+    ]
+    idx.rebuild(bms)
 
-        results = idx.search("test", domain="github.com")
-        assert all("github.com" in r["domain"] for r in results)
+    results = idx.search("test", domain="github.com")
+    assert all("github.com" in r["domain"] for r in results)
 
 
-def test_search_folder_filter():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [
-            _make_bookmark("https://a.com", "A", "Work/Internal"),
-            _make_bookmark("https://b.com", "B", "Personal"),
-        ]
-        idx.rebuild(bms)
+def test_search_folder_filter(idx):
+    bms = [
+        _make_bookmark("https://a.com", "A", "Work/Internal"),
+        _make_bookmark("https://b.com", "B", "Personal"),
+    ]
+    idx.rebuild(bms)
 
-        results = idx.search("test", folder="work")
-        assert all("work" in r["folder_path"].lower() for r in results)
+    results = idx.search("test", folder="work")
+    assert all("work" in r["folder_path"].lower() for r in results)
 
 
-def test_search_k_limit():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [_make_bookmark(f"https://{i}.com", f"Site {i}") for i in range(20)]
-        idx.rebuild(bms)
+def test_search_k_limit(idx):
+    bms = [_make_bookmark(f"https://{i}.com", f"Site {i}") for i in range(20)]
+    idx.rebuild(bms)
 
-        results = idx.search("test", k=5)
-        assert len(results) == 5
+    results = idx.search("test", k=5)
+    assert len(results) == 5
 
 
 # ---- _remove_source() tests ----
 
-def test_remove_source_cleans_orphans():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        bms = [_make_bookmark("https://a.com", "A")]
-        idx.sync(bms, source="chrome:Default")
+def test_remove_source_cleans_orphans(idx):
+    bms = [_make_bookmark("https://a.com", "A")]
+    idx.sync(bms, source="chrome:Default")
 
-        removed = idx._remove_source("chrome:Default")
-        assert len(removed) == 1
-        assert idx.is_empty()
+    removed = idx._remove_source("chrome:Default")
+    assert len(removed) == 1
+    assert idx.is_empty()
 
 
-def test_remove_source_preserves_other_sources():
-    with tempfile.TemporaryDirectory() as tmpdir:
-        idx = _make_index(Path(tmpdir))
-        idx.sync([_make_bookmark("https://a.com", "A")], source="chrome:Default")
-        idx.sync([_make_bookmark("https://a.com", "A")], source="firefox:default")
+def test_remove_source_preserves_other_sources(idx):
+    idx.sync([_make_bookmark("https://a.com", "A")], source="chrome:Default")
+    idx.sync([_make_bookmark("https://a.com", "A")], source="firefox:default")
 
-        idx._remove_source("chrome:Default")
-        assert not idx.is_empty()  # firefox still references it
+    idx._remove_source("chrome:Default")
+    assert not idx.is_empty()  # firefox still references it