diff --git a/lib/kith/contacts.ex b/lib/kith/contacts.ex
index 1427759..d92f9e4 100644
--- a/lib/kith/contacts.ex
+++ b/lib/kith/contacts.ex
@@ -1740,14 +1740,28 @@ defmodule Kith.Contacts do
end,
set: [contact_id: survivor.id]
)
- # Remap photos
- |> Ecto.Multi.update_all(
- :remap_photos,
- fn _changes ->
+ # Remap photos (delete duplicates by content_hash first, then move remaining)
+ |> Ecto.Multi.run(:remap_photos, fn repo, _changes ->
+ # Delete photos from non-survivor that already exist on survivor (same content_hash)
+ repo.query(
+ """
+ DELETE FROM photos
+ WHERE contact_id = $1
+ AND content_hash IS NOT NULL
+ AND content_hash IN (
+ SELECT content_hash FROM photos WHERE contact_id = $2 AND content_hash IS NOT NULL
+ )
+ """,
+ [non_survivor.id, survivor.id]
+ )
+
+ # Move remaining photos
+ {count, _} =
from(p in Photo, where: p.contact_id == ^non_survivor.id)
- end,
- set: [contact_id: survivor.id]
- )
+ |> repo.update_all(set: [contact_id: survivor.id])
+
+ {:ok, count}
+ end)
# Remap addresses
|> Ecto.Multi.update_all(
:remap_addresses,
diff --git a/lib/kith/duplicate_detection.ex b/lib/kith/duplicate_detection.ex
index a7599d0..359417e 100644
--- a/lib/kith/duplicate_detection.ex
+++ b/lib/kith/duplicate_detection.ex
@@ -4,13 +4,19 @@ defmodule Kith.DuplicateDetection do
alias Kith.Contacts.DuplicateCandidate
alias Kith.Repo
+ @default_page_size 20
+
def list_candidates(account_id, opts \\ []) do
status = Keyword.get(opts, :status, "pending")
+ limit = Keyword.get(opts, :limit, @default_page_size)
+ offset = Keyword.get(opts, :offset, 0)
DuplicateCandidate
|> scope_to_account(account_id)
|> where([d], d.status == ^status)
|> order_by([d], desc: d.score)
+ |> limit(^limit)
+ |> offset(^offset)
|> Repo.all()
|> Repo.preload([:contact, :duplicate_contact])
end
diff --git a/lib/kith/workers/duplicate_detection_worker.ex b/lib/kith/workers/duplicate_detection_worker.ex
index 8e67e29..efea61d 100644
--- a/lib/kith/workers/duplicate_detection_worker.ex
+++ b/lib/kith/workers/duplicate_detection_worker.ex
@@ -5,9 +5,18 @@ defmodule Kith.Workers.DuplicateDetectionWorker do
Detection algorithm:
1. Name similarity via pg_trgm similarity() on display_name (threshold: 0.5)
- 2. Exact email match across contact_fields
- 3. Exact phone match across contact_fields
- 4. Weighted score: name(0.4) + email(0.35) + phone(0.25)
+ 2. Case-insensitive email match across contact_fields
+ 3. Normalized phone match across contact_fields (digits only)
+ 4. Address match on line1 + postal_code
+
+ Scoring (max-signal + bonus):
+ Each signal has an independent base score:
+ - email_match: 0.85
+ - phone_match: 0.75
+ - address_match: 0.60
+ - name_match: the raw pg_trgm similarity (> 0.5)
+ Final score = max(base scores) + 0.05 per additional signal, capped at 1.0
+ Threshold: >= 0.5
"""
use Oban.Worker,
@@ -15,7 +24,7 @@ defmodule Kith.Workers.DuplicateDetectionWorker do
max_attempts: 3
import Ecto.Query
- alias Kith.Contacts.{Contact, ContactField, DuplicateCandidate}
+ alias Kith.Contacts.{Address, Contact, ContactField, ContactFieldType, DuplicateCandidate}
alias Kith.Repo
@impl Oban.Worker
@@ -36,33 +45,25 @@ defmodule Kith.Workers.DuplicateDetectionWorker do
end
defp detect_duplicates(account_id) do
- # Get active contacts for this account
- contacts =
+ contact_count =
Contact
|> where([c], c.account_id == ^account_id)
|> where([c], is_nil(c.deleted_at))
- |> select([c], %{id: c.id, display_name: c.display_name})
- |> Repo.all()
+ |> Repo.aggregate(:count)
- if length(contacts) < 2, do: :ok, else: find_duplicates(account_id, contacts)
+ if contact_count >= 2, do: find_duplicates(account_id)
end
- defp find_duplicates(account_id, _contacts) do
- # Find name-based duplicates using pg_trgm
+ defp find_duplicates(account_id) do
name_matches = find_name_matches(account_id)
-
- # Find email-based duplicates
email_matches = find_email_matches(account_id)
-
- # Find phone-based duplicates
phone_matches = find_phone_matches(account_id)
+ address_matches = find_address_matches(account_id)
- # Merge and score all matches
all_pairs =
- merge_matches(name_matches, email_matches, phone_matches)
- |> Enum.filter(fn {_pair, score, _reasons} -> score >= 0.4 end)
+ merge_matches(name_matches, email_matches, phone_matches, address_matches)
+ |> Enum.filter(fn {_pair, score, _reasons} -> score >= 0.5 end)
- # Get existing pending/dismissed candidates to avoid re-inserting
existing =
DuplicateCandidate
|> where([d], d.account_id == ^account_id)
@@ -73,9 +74,7 @@ defmodule Kith.Workers.DuplicateDetectionWorker do
now = DateTime.utc_now() |> DateTime.truncate(:second)
- # Insert new candidates
Enum.each(all_pairs, fn {{id1, id2}, score, reasons} ->
- # Canonicalize: smaller id first
{contact_id, dup_id} = if id1 < id2, do: {id1, id2}, else: {id2, id1}
unless MapSet.member?(existing, {contact_id, dup_id}) do
@@ -93,7 +92,6 @@ defmodule Kith.Workers.DuplicateDetectionWorker do
end
defp find_name_matches(account_id) do
- # Use pg_trgm similarity for fuzzy name matching
query = """
SELECT c1.id AS id1, c2.id AS id2, similarity(c1.display_name, c2.display_name) AS sim
FROM contacts c1
@@ -102,6 +100,8 @@ defmodule Kith.Workers.DuplicateDetectionWorker do
WHERE c1.account_id = $1
AND c1.deleted_at IS NULL
AND c2.deleted_at IS NULL
+ AND c1.display_name IS NOT NULL AND c1.display_name != ''
+ AND c2.display_name IS NOT NULL AND c2.display_name != ''
AND similarity(c1.display_name, c2.display_name) > 0.5
ORDER BY sim DESC
LIMIT 500
@@ -119,55 +119,94 @@ defmodule Kith.Workers.DuplicateDetectionWorker do
end
defp find_email_matches(account_id) do
- # Find contacts that share an exact email address
+ # Case-insensitive email match, both fields verified as email type
query =
from cf1 in ContactField,
join: cf2 in ContactField,
- on: cf1.value == cf2.value and cf1.id < cf2.id,
- join: cft in assoc(cf1, :contact_field_type),
+ on:
+ fragment("LOWER(?)", cf1.value) == fragment("LOWER(?)", cf2.value) and
+ cf1.id < cf2.id,
+ join: cft1 in ContactFieldType,
+ on: cf1.contact_field_type_id == cft1.id,
+ join: cft2 in ContactFieldType,
+ on: cf2.contact_field_type_id == cft2.id,
where: cf1.account_id == ^account_id,
where: cf2.account_id == ^account_id,
- where: cft.protocol == "mailto:",
+ where: fragment("? LIKE 'mailto%'", cft1.protocol),
+ where: fragment("? LIKE 'mailto%'", cft2.protocol),
where: cf1.contact_id != cf2.contact_id,
+ where: cf1.value != "" and not is_nil(cf1.value),
select: {cf1.contact_id, cf2.contact_id}
query
|> Repo.all()
- |> Enum.uniq()
|> Enum.map(fn {id1, id2} ->
- {id1, id2} = if id1 < id2, do: {id1, id2}, else: {id2, id1}
- {{id1, id2}, 1.0, ["email_match"]}
+ if id1 < id2, do: {id1, id2}, else: {id2, id1}
end)
- |> Enum.uniq_by(fn {pair, _, _} -> pair end)
+ |> Enum.uniq()
+ |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["email_match"]} end)
end
defp find_phone_matches(account_id) do
- # Find contacts that share an exact phone number (normalized: digits only)
+ # Normalized phone match (digits only), both fields verified as phone type
query =
from cf1 in ContactField,
join: cf2 in ContactField,
on:
fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf1.value) ==
- fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf2.value) and cf1.id < cf2.id,
- join: cft in assoc(cf1, :contact_field_type),
+ fragment("regexp_replace(?, '[^0-9]', '', 'g')", cf2.value) and
+ cf1.id < cf2.id,
+ join: cft1 in ContactFieldType,
+ on: cf1.contact_field_type_id == cft1.id,
+ join: cft2 in ContactFieldType,
+ on: cf2.contact_field_type_id == cft2.id,
where: cf1.account_id == ^account_id,
where: cf2.account_id == ^account_id,
- where: cft.protocol == "tel:",
+ where: fragment("? LIKE 'tel%'", cft1.protocol),
+ where: fragment("? LIKE 'tel%'", cft2.protocol),
where: cf1.contact_id != cf2.contact_id,
+ where: cf1.value != "" and not is_nil(cf1.value),
select: {cf1.contact_id, cf2.contact_id}
query
|> Repo.all()
+ |> Enum.map(fn {id1, id2} ->
+ if id1 < id2, do: {id1, id2}, else: {id2, id1}
+ end)
|> Enum.uniq()
+ |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["phone_match"]} end)
+ end
+
+ defp find_address_matches(account_id) do
+ # Match on normalized line1 + postal_code
+ query =
+ from a1 in Address,
+ join: a2 in Address,
+ on:
+ fragment("LOWER(TRIM(?))", a1.line1) == fragment("LOWER(TRIM(?))", a2.line1) and
+ fragment("LOWER(TRIM(?))", a1.postal_code) ==
+ fragment("LOWER(TRIM(?))", a2.postal_code) and
+ a1.id < a2.id,
+ where: a1.account_id == ^account_id,
+ where: a2.account_id == ^account_id,
+ where: a1.contact_id != a2.contact_id,
+ where: a1.line1 != "" and not is_nil(a1.line1),
+ where: a1.postal_code != "" and not is_nil(a1.postal_code),
+ where: a2.line1 != "" and not is_nil(a2.line1),
+ where: a2.postal_code != "" and not is_nil(a2.postal_code),
+ select: {a1.contact_id, a2.contact_id}
+
+ query
+ |> Repo.all()
|> Enum.map(fn {id1, id2} ->
- {id1, id2} = if id1 < id2, do: {id1, id2}, else: {id2, id1}
- {{id1, id2}, 1.0, ["phone_match"]}
+ if id1 < id2, do: {id1, id2}, else: {id2, id1}
end)
- |> Enum.uniq_by(fn {pair, _, _} -> pair end)
+ |> Enum.uniq()
+ |> Enum.map(fn {id1, id2} -> {{id1, id2}, 1.0, ["address_match"]} end)
end
- defp merge_matches(name_matches, email_matches, phone_matches) do
- (name_matches ++ email_matches ++ phone_matches)
+ defp merge_matches(name_matches, email_matches, phone_matches, address_matches) do
+ (name_matches ++ email_matches ++ phone_matches ++ address_matches)
|> Enum.group_by(fn {pair, _score, _reasons} -> pair end)
|> Enum.map(&compute_merged_score/1)
end
@@ -176,9 +215,19 @@ defmodule Kith.Workers.DuplicateDetectionWorker do
reasons = matches |> Enum.flat_map(fn {_, _, r} -> r end) |> Enum.uniq()
name_sim = Enum.find_value(matches, 0.0, &extract_name_score/1)
- email_weight = if "email_match" in reasons, do: 0.35, else: 0.0
- phone_weight = if "phone_match" in reasons, do: 0.25, else: 0.0
- score = min(name_sim * 0.4 + email_weight + phone_weight, 1.0)
+ # Base score for each signal type
+ base_scores =
+ []
+ |> then(fn acc -> if "email_match" in reasons, do: [0.85 | acc], else: acc end)
+ |> then(fn acc -> if "phone_match" in reasons, do: [0.75 | acc], else: acc end)
+ |> then(fn acc -> if "address_match" in reasons, do: [0.60 | acc], else: acc end)
+ |> then(fn acc -> if name_sim > 0.0, do: [name_sim | acc], else: acc end)
+
+ signal_count = length(base_scores)
+ max_score = Enum.max(base_scores, fn -> 0.0 end)
+ bonus = max(signal_count - 1, 0) * 0.05
+
+ score = min(max_score + bonus, 1.0)
{pair, Float.round(score, 2), reasons}
end
diff --git a/lib/kith/workers/import_source_worker.ex b/lib/kith/workers/import_source_worker.ex
index 6cdf883..d5feaa6 100644
--- a/lib/kith/workers/import_source_worker.ex
+++ b/lib/kith/workers/import_source_worker.ex
@@ -11,6 +11,7 @@ defmodule Kith.Workers.ImportSourceWorker do
alias Kith.Imports
alias Kith.Storage
+ alias Kith.Workers.DuplicateDetectionWorker
@impl Oban.Worker
def perform(%Oban.Job{args: %{"import_id" => import_id}}) do
@@ -33,6 +34,9 @@ defmodule Kith.Workers.ImportSourceWorker do
topic = "import:#{import.account_id}"
Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map})
+ # Trigger duplicate detection for newly imported contacts
+ Oban.insert(DuplicateDetectionWorker.new(%{account_id: import.account_id}))
+
Logger.info("Import #{import_id} completed: #{inspect(summary_map)}")
:ok
else
diff --git a/lib/kith/workers/import_worker.ex b/lib/kith/workers/import_worker.ex
index 790620e..68dde77 100644
--- a/lib/kith/workers/import_worker.ex
+++ b/lib/kith/workers/import_worker.ex
@@ -11,6 +11,7 @@ defmodule Kith.Workers.ImportWorker do
alias Kith.Contacts
alias Kith.VCard.Parser
+ alias Kith.Workers.DuplicateDetectionWorker
@impl Oban.Worker
def perform(%Oban.Job{
@@ -42,6 +43,9 @@ defmodule Kith.Workers.ImportWorker do
{:import_complete, results}
)
+ # Trigger duplicate detection for newly imported contacts
+ Oban.insert(DuplicateDetectionWorker.new(%{account_id: account_id}))
+
Logger.info(
"vCard import complete for account #{account_id}: " <>
"#{results.imported} imported, #{results.skipped} skipped"
diff --git a/lib/kith/workers/monica_api_crawl_worker.ex b/lib/kith/workers/monica_api_crawl_worker.ex
index b5355ba..f366140 100644
--- a/lib/kith/workers/monica_api_crawl_worker.ex
+++ b/lib/kith/workers/monica_api_crawl_worker.ex
@@ -15,6 +15,7 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do
alias Kith.Imports
alias Kith.Imports.Sources.MonicaApi
+ alias Kith.Workers.DuplicateDetectionWorker
@impl Oban.Worker
def perform(%Oban.Job{args: %{"import_id" => import_id}}) do
@@ -47,6 +48,9 @@ defmodule Kith.Workers.MonicaApiCrawlWorker do
topic = "import:#{import_job.account_id}"
Phoenix.PubSub.broadcast(Kith.PubSub, topic, {:import_complete, summary_map})
+ # Trigger duplicate detection for newly imported contacts
+ Oban.insert(DuplicateDetectionWorker.new(%{account_id: import_job.account_id}))
+
Logger.info("MonicaApi import #{import_id} completed: #{inspect(summary_map)}")
:ok
else
diff --git a/lib/kith_web/live/contact_live/duplicates.ex b/lib/kith_web/live/contact_live/duplicates.ex
index 1dad4f5..36c951b 100644
--- a/lib/kith_web/live/contact_live/duplicates.ex
+++ b/lib/kith_web/live/contact_live/duplicates.ex
@@ -7,12 +7,16 @@ defmodule KithWeb.ContactLive.Duplicates do
alias Kith.Policy
alias Kith.Workers.DuplicateDetectionWorker
+ @page_size 20
+
@impl true
def mount(_params, _session, socket) do
{:ok,
socket
|> assign(:page_title, "Duplicate Contacts")
- |> assign(:candidates, [])}
+ |> assign(:candidates, [])
+ |> assign(:has_more, false)
+ |> assign(:total_count, 0)}
end
@impl true
@@ -20,12 +24,15 @@ defmodule KithWeb.ContactLive.Duplicates do
scope = socket.assigns.current_scope
account_id = scope.account.id
- candidates = DuplicateDetection.list_candidates(account_id)
+ candidates = DuplicateDetection.list_candidates(account_id, limit: @page_size)
+ total_count = DuplicateDetection.pending_count(account_id)
{:noreply,
socket
|> assign(:account_id, account_id)
- |> assign(:candidates, candidates)}
+ |> assign(:candidates, candidates)
+ |> assign(:total_count, total_count)
+ |> assign(:has_more, length(candidates) >= @page_size)}
end
@impl true
@@ -35,15 +42,32 @@ defmodule KithWeb.ContactLive.Duplicates do
{:ok, _} = DuplicateDetection.dismiss_candidate(candidate)
- candidates = DuplicateDetection.list_candidates(socket.assigns.account_id)
+ candidates = Enum.reject(socket.assigns.candidates, &(&1.id == candidate.id))
+ total_count = socket.assigns.total_count - 1
{:noreply,
socket
|> assign(:candidates, candidates)
- |> assign(:pending_duplicates_count, length(candidates))
+ |> assign(:total_count, total_count)
+ |> assign(:pending_duplicates_count, total_count)
|> put_flash(:info, "Duplicate dismissed.")}
end
+ def handle_event("load_more", _params, socket) do
+ offset = length(socket.assigns.candidates)
+
+ more =
+ DuplicateDetection.list_candidates(socket.assigns.account_id,
+ limit: @page_size,
+ offset: offset
+ )
+
+ {:noreply,
+ socket
+ |> assign(:candidates, socket.assigns.candidates ++ more)
+ |> assign(:has_more, length(more) >= @page_size)}
+ end
+
def handle_event("scan", _params, socket) do
user = socket.assigns.current_scope.user
@@ -79,7 +103,7 @@ defmodule KithWeb.ContactLive.Duplicates do
Duplicate Contacts
- {length(@candidates)} potential duplicate{if length(@candidates) != 1, do: "s"} found
+ {@total_count} potential duplicate{if @total_count != 1, do: "s"} found
<% else %>
- <%= for candidate <- @candidates do %>
-
-
-
- = 0.8 &&
- "bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400",
- candidate.score >= 0.5 && candidate.score < 0.8 &&
- "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400",
- candidate.score < 0.5 &&
- "bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400"
- ]}>
- {Float.round(candidate.score * 100, 0)}% match
-
-
- {Enum.join(candidate.reasons, ", ")}
-
-
+
+
+
+ = 0.8 &&
+ "bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400",
+ candidate.score >= 0.5 && candidate.score < 0.8 &&
+ "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400",
+ candidate.score < 0.5 &&
+ "bg-blue-100 text-blue-700 dark:bg-blue-900/30 dark:text-blue-400"
+ ]}>
+ {Float.round(candidate.score * 100, 0)}% match
+
- Detected <.date_display date={candidate.detected_at} />
+ {Enum.join(candidate.reasons, ", ")}
+
+ Detected <.date_display date={candidate.detected_at} />
+
+
-
-
- <.link
- navigate={~p"/contacts/#{candidate.contact.id}"}
- class="text-sm font-semibold text-[var(--color-accent)] hover:underline"
- >
- {candidate.contact.display_name}
-
-
-
- <.link
- navigate={~p"/contacts/#{candidate.duplicate_contact.id}"}
- class="text-sm font-semibold text-[var(--color-accent)] hover:underline"
- >
- {candidate.duplicate_contact.display_name}
-
-
-
-
-
+
+
<.link
- navigate={
- ~p"/contacts/#{candidate.contact.id}/merge?with=#{candidate.duplicate_contact.id}&candidate_id=#{candidate.id}"
- }
- class="inline-flex items-center gap-1.5 rounded-[var(--radius-md)] bg-[var(--color-accent)] text-[var(--color-accent-foreground)] px-3 py-1.5 text-xs font-medium hover:bg-[var(--color-accent-hover)] transition-colors"
+ navigate={~p"/contacts/#{candidate.contact.id}"}
+ class="text-sm font-semibold text-[var(--color-accent)] hover:underline"
>
- <.icon name="hero-arrows-right-left" class="size-4" /> Merge
+ {candidate.contact.display_name}
-
- <% end %>
+
+
+ <.link
+ navigate={
+ ~p"/contacts/#{candidate.contact.id}/merge?with=#{candidate.duplicate_contact.id}&candidate_id=#{candidate.id}"
+ }
+ class="inline-flex items-center gap-1.5 rounded-[var(--radius-md)] bg-[var(--color-accent)] text-[var(--color-accent-foreground)] px-3 py-1.5 text-xs font-medium hover:bg-[var(--color-accent-hover)] transition-colors"
+ >
+ <.icon name="hero-arrows-right-left" class="size-4" /> Merge
+
+
+
+
+
+
+
+
<% end %>
diff --git a/lib/kith_web/live/contact_live/index.ex b/lib/kith_web/live/contact_live/index.ex
index c1f3b55..491b9b4 100644
--- a/lib/kith_web/live/contact_live/index.ex
+++ b/lib/kith_web/live/contact_live/index.ex
@@ -33,6 +33,8 @@ defmodule KithWeb.ContactLive.Index do
|> assign(:meta, nil)
|> assign(:tags, Contacts.list_tags(account_id))
|> assign(:candidates, [])
+ |> assign(:duplicates_total, 0)
+ |> assign(:duplicates_has_more, false)
|> assign(:trashed_contacts, [])}
end
@@ -55,12 +57,18 @@ defmodule KithWeb.ContactLive.Index do
|> load_contacts()
end
+ @duplicates_page_size 20
+
defp apply_action(socket, :duplicates, _params) do
- candidates = DuplicateDetection.list_candidates(socket.assigns.account_id)
+ account_id = socket.assigns.account_id
+ candidates = DuplicateDetection.list_candidates(account_id, limit: @duplicates_page_size)
+ total_count = DuplicateDetection.pending_count(account_id)
socket
|> assign(:page_title, "Duplicate Contacts")
|> assign(:candidates, candidates)
+ |> assign(:duplicates_total, total_count)
+ |> assign(:duplicates_has_more, length(candidates) >= @duplicates_page_size)
end
defp apply_action(socket, :trash, _params) do
@@ -245,15 +253,32 @@ defmodule KithWeb.ContactLive.Index do
{:ok, _} = DuplicateDetection.dismiss_candidate(candidate)
- candidates = DuplicateDetection.list_candidates(socket.assigns.account_id)
+ candidates = Enum.reject(socket.assigns.candidates, &(&1.id == candidate.id))
+ total = socket.assigns.duplicates_total - 1
{:noreply,
socket
|> assign(:candidates, candidates)
- |> assign(:pending_duplicates_count, length(candidates))
+ |> assign(:duplicates_total, total)
+ |> assign(:pending_duplicates_count, total)
|> put_flash(:info, "Duplicate dismissed.")}
end
+ def handle_event("load_more_duplicates", _params, socket) do
+ offset = length(socket.assigns.candidates)
+
+ more =
+ DuplicateDetection.list_candidates(socket.assigns.account_id,
+ limit: @duplicates_page_size,
+ offset: offset
+ )
+
+ {:noreply,
+ socket
+ |> assign(:candidates, socket.assigns.candidates ++ more)
+ |> assign(:duplicates_has_more, length(more) >= @duplicates_page_size)}
+ end
+
def handle_event("scan", _params, socket) do
user = socket.assigns.current_scope.user
diff --git a/lib/kith_web/live/contact_live/index.html.heex b/lib/kith_web/live/contact_live/index.html.heex
index 7e08df4..3db183f 100644
--- a/lib/kith_web/live/contact_live/index.html.heex
+++ b/lib/kith_web/live/contact_live/index.html.heex
@@ -381,7 +381,7 @@
- {length(@candidates)} potential duplicate{if length(@candidates) != 1, do: "s"} found
+ {@duplicates_total} potential duplicate{if @duplicates_total != 1, do: "s"} found
<% end %>
+
+ <%= if @duplicates_has_more do %>
+
+
+
+ <% end %>
<% end %>
<% end %>
diff --git a/lib/kith_web/live/contact_live/merge.ex b/lib/kith_web/live/contact_live/merge.ex
index 6bdad13..f45dbbc 100644
--- a/lib/kith_web/live/contact_live/merge.ex
+++ b/lib/kith_web/live/contact_live/merge.ex
@@ -150,13 +150,7 @@ defmodule KithWeb.ContactLive.Merge do
end
end
- # ── Step 3: Preview ────────────────────────────────────────────────────
-
- def handle_event("confirm-merge", _params, socket) do
- {:noreply, assign(socket, :step, 4)}
- end
-
- # ── Step 4: Execute ────────────────────────────────────────────────────
+ # ── Step 3: Preview & Execute ───────────────────────────────────────────
def handle_event("execute-merge", _params, socket) do
contact_a = socket.assigns.contact_a
@@ -233,14 +227,14 @@ defmodule KithWeb.ContactLive.Merge do
Merge Contacts
- Step {@step} of 4 — {step_label(@step)}
+ Step {@step} of 3 — {step_label(@step)}
<%!-- Step indicator (horizontal stepper) --%>
- Back
- Confirm Merge
-
-
-
-
- <%!-- Step 4: Confirm & Execute --%>
-
-
-
-
- Final Confirmation
-
-
- This action cannot be easily undone. Are you sure you want to merge
-
- {@contact_b.display_name}
-
- into {@contact_a.display_name}?
-
-
-
-
- Go Back
-
-
- {if @merging, do: "Merging...", else: "Merge Contacts"}
-
-
+
Back
+
+ {if @merging, do: "Merging...", else: "Merge Contacts"}
+
@@ -463,8 +432,7 @@ defmodule KithWeb.ContactLive.Merge do
defp step_label(1), do: "Select contact"
defp step_label(2), do: "Choose fields"
- defp step_label(3), do: "Preview"
- defp step_label(4), do: "Confirm"
+ defp step_label(3), do: "Review & merge"
defp default_field_choices do
@mergeable_fields
diff --git a/test/kith/workers/duplicate_detection_worker_test.exs b/test/kith/workers/duplicate_detection_worker_test.exs
new file mode 100644
index 0000000..8f2728e
--- /dev/null
+++ b/test/kith/workers/duplicate_detection_worker_test.exs
@@ -0,0 +1,601 @@
+defmodule Kith.Workers.DuplicateDetectionWorkerTest do
+ use Kith.DataCase, async: true
+ use Oban.Testing, repo: Kith.Repo
+
+ import Kith.Factory
+ import Kith.ContactsFixtures
+
+ alias Kith.Contacts.DuplicateCandidate
+ alias Kith.Workers.DuplicateDetectionWorker
+
+ setup do
+ seed_reference_data!()
+ {account, _user} = setup_account()
+
+ email_type =
+ Repo.one!(
+ from t in "contact_field_types",
+ where: t.protocol == "mailto:",
+ select: %{id: t.id},
+ limit: 1
+ )
+
+ phone_type =
+ Repo.one!(
+ from t in "contact_field_types",
+ where: t.protocol == "tel:",
+ select: %{id: t.id},
+ limit: 1
+ )
+
+ %{account: account, email_type_id: email_type.id, phone_type_id: phone_type.id}
+ end
+
+ defp run_detection(account_id) do
+ perform_job(DuplicateDetectionWorker, %{account_id: account_id})
+ end
+
+ defp pending_candidates(account_id) do
+ DuplicateCandidate
+ |> where([d], d.account_id == ^account_id)
+ |> where([d], d.status == "pending")
+ |> order_by([d], desc: d.score)
+ |> Repo.all()
+ end
+
+ describe "name matching" do
+ test "detects contacts with similar display names", %{account: account} do
+ insert(:contact,
+ account: account,
+ display_name: "John Smith",
+ first_name: "John",
+ last_name: "Smith"
+ )
+
+ insert(:contact,
+ account: account,
+ display_name: "John Smithe",
+ first_name: "John",
+ last_name: "Smithe"
+ )
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert "name_match" in hd(candidates).reasons
+ assert hd(candidates).score >= 0.5
+ end
+
+ test "does not match dissimilar names", %{account: account} do
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ assert :ok = run_detection(account.id)
+
+ assert pending_candidates(account.id) == []
+ end
+ end
+
+ describe "email matching" do
+ test "detects contacts sharing the same email", %{
+ account: account,
+ email_type_id: email_type_id
+ } do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert "email_match" in hd(candidates).reasons
+ assert hd(candidates).score >= 0.8
+ end
+
+ test "email matching is case-insensitive", %{account: account, email_type_id: email_type_id} do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "SHARED@Example.COM"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert "email_match" in hd(candidates).reasons
+ end
+
+ test "email-only match scores around 0.85", %{account: account, email_type_id: email_type_id} do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Completely Different",
+ first_name: "Completely",
+ last_name: "Different"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Totally Unique",
+ first_name: "Totally",
+ last_name: "Unique"
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "same@email.com"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "same@email.com"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert hd(candidates).score == 0.85
+ end
+ end
+
+ describe "phone matching" do
+ test "detects contacts sharing the same phone number", %{
+ account: account,
+ phone_type_id: phone_type_id
+ } do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ contact_field_fixture(c1, phone_type_id, %{"value" => "+1-555-1234"})
+ contact_field_fixture(c2, phone_type_id, %{"value" => "+1-555-1234"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert "phone_match" in hd(candidates).reasons
+ assert hd(candidates).score >= 0.7
+ end
+
+ test "phone matching normalizes formatting", %{account: account, phone_type_id: phone_type_id} do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ contact_field_fixture(c1, phone_type_id, %{"value" => "+1-555-1234"})
+ contact_field_fixture(c2, phone_type_id, %{"value" => "15551234"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert "phone_match" in hd(candidates).reasons
+ end
+
+ test "phone-only match scores 0.75", %{account: account, phone_type_id: phone_type_id} do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Completely Different",
+ first_name: "Completely",
+ last_name: "Different"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Totally Unique",
+ first_name: "Totally",
+ last_name: "Unique"
+ )
+
+ contact_field_fixture(c1, phone_type_id, %{"value" => "5559876"})
+ contact_field_fixture(c2, phone_type_id, %{"value" => "5559876"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert hd(candidates).score == 0.75
+ end
+ end
+
+ describe "address matching" do
+ test "detects contacts sharing the same address", %{account: account} do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ address_fixture(c1, %{"line1" => "123 Main St", "postal_code" => "90210"})
+ address_fixture(c2, %{"line1" => "123 Main St", "postal_code" => "90210"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert "address_match" in hd(candidates).reasons
+ assert hd(candidates).score == 0.6
+ end
+
+ test "address matching is case-insensitive and trims whitespace", %{account: account} do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ address_fixture(c1, %{"line1" => " 123 Main St ", "postal_code" => "90210"})
+ address_fixture(c2, %{"line1" => "123 MAIN ST", "postal_code" => "90210"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ assert "address_match" in hd(candidates).reasons
+ end
+
+ test "does not match on postal_code alone", %{account: account} do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ address_fixture(c1, %{"line1" => "123 Main St", "postal_code" => "90210"})
+ address_fixture(c2, %{"line1" => "456 Oak Ave", "postal_code" => "90210"})
+
+ assert :ok = run_detection(account.id)
+
+ assert pending_candidates(account.id) == []
+ end
+ end
+
+ describe "combined signals" do
+ test "email + name match scores higher than email alone", %{
+ account: account,
+ email_type_id: email_type_id
+ } do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "John Smith",
+ first_name: "John",
+ last_name: "Smith"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "John Smithe",
+ first_name: "John",
+ last_name: "Smithe"
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "john@example.com"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "john@example.com"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ candidate = hd(candidates)
+ assert "name_match" in candidate.reasons
+ assert "email_match" in candidate.reasons
+ # email base (0.85) + bonus for name signal (0.05) = 0.90
+ assert candidate.score > 0.85
+ end
+
+ test "email + phone match boosts score", %{
+ account: account,
+ email_type_id: email_type_id,
+ phone_type_id: phone_type_id
+ } do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Completely Different",
+ first_name: "Completely",
+ last_name: "Different"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Totally Unique",
+ first_name: "Totally",
+ last_name: "Unique"
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "same@email.com"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "same@email.com"})
+ contact_field_fixture(c1, phone_type_id, %{"value" => "5551234"})
+ contact_field_fixture(c2, phone_type_id, %{"value" => "5551234"})
+
+ assert :ok = run_detection(account.id)
+
+ candidates = pending_candidates(account.id)
+ assert length(candidates) == 1
+ candidate = hd(candidates)
+ assert "email_match" in candidate.reasons
+ assert "phone_match" in candidate.reasons
+ # email base (0.85) + 1 bonus (0.05) = 0.90
+ assert candidate.score == 0.9
+ end
+ end
+
+ describe "edge cases" do
+ test "skips soft-deleted contacts", %{account: account, email_type_id: email_type_id} do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams",
+ deleted_at: DateTime.utc_now(:second)
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"})
+
+ assert :ok = run_detection(account.id)
+
+ assert pending_candidates(account.id) == []
+ end
+
+ test "does not re-insert existing pending candidates", %{
+ account: account,
+ email_type_id: email_type_id
+ } do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"})
+
+ # First run
+ assert :ok = run_detection(account.id)
+ assert length(pending_candidates(account.id)) == 1
+
+ # Second run should not create duplicates
+ assert :ok = run_detection(account.id)
+ assert length(pending_candidates(account.id)) == 1
+ end
+
+ test "does not re-insert dismissed candidates", %{
+ account: account,
+ email_type_id: email_type_id
+ } do
+ c1 =
+ insert(:contact,
+ account: account,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"})
+
+ # First run, then dismiss
+ assert :ok = run_detection(account.id)
+ [candidate] = pending_candidates(account.id)
+ Kith.DuplicateDetection.dismiss_candidate(candidate)
+
+ # Second run should not re-create dismissed candidate
+ assert :ok = run_detection(account.id)
+ assert pending_candidates(account.id) == []
+ end
+
+ test "account isolation — only detects within same account", %{email_type_id: email_type_id} do
+ {account1, _} = setup_account()
+ {account2, _} = setup_account()
+
+ c1 =
+ insert(:contact,
+ account: account1,
+ display_name: "Alice Johnson",
+ first_name: "Alice",
+ last_name: "Johnson"
+ )
+
+ c2 =
+ insert(:contact,
+ account: account2,
+ display_name: "Bob Williams",
+ first_name: "Bob",
+ last_name: "Williams"
+ )
+
+ contact_field_fixture(c1, email_type_id, %{"value" => "shared@example.com"})
+ contact_field_fixture(c2, email_type_id, %{"value" => "shared@example.com"})
+
+ assert :ok = run_detection(account1.id)
+ assert :ok = run_detection(account2.id)
+
+ assert pending_candidates(account1.id) == []
+ assert pending_candidates(account2.id) == []
+ end
+
+ test "handles fewer than 2 contacts gracefully", %{account: account} do
+ insert(:contact,
+ account: account,
+ display_name: "Only Contact",
+ first_name: "Only",
+ last_name: "Contact"
+ )
+
+ assert :ok = run_detection(account.id)
+
+ assert pending_candidates(account.id) == []
+ end
+
+ test "handles zero contacts gracefully", %{account: account} do
+ assert :ok = run_detection(account.id)
+
+ assert pending_candidates(account.id) == []
+ end
+ end
+
+ describe "cron mode" do
+ test "runs for all accounts when no account_id provided" do
+ {account1, _} = setup_account()
+ {account2, _} = setup_account()
+
+ insert(:contact,
+ account: account1,
+ display_name: "John Smith",
+ first_name: "John",
+ last_name: "Smith"
+ )
+
+ insert(:contact,
+ account: account1,
+ display_name: "John Smithe",
+ first_name: "John",
+ last_name: "Smithe"
+ )
+
+ insert(:contact,
+ account: account2,
+ display_name: "Jane Doe",
+ first_name: "Jane",
+ last_name: "Doe"
+ )
+
+ insert(:contact,
+ account: account2,
+ display_name: "Jane Doee",
+ first_name: "Jane",
+ last_name: "Doee"
+ )
+
+ assert :ok = perform_job(DuplicateDetectionWorker, %{})
+
+ assert length(pending_candidates(account1.id)) == 1
+ assert length(pending_candidates(account2.id)) == 1
+ end
+ end
+end