From 9d1f725479c65572db4aecd65bb5f1a220bc6736 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 1 Jun 2026 10:33:02 +0200 Subject: [PATCH] perf: skip mapNodes dedup map in FindNodes Each source node passed to FindNodes maps to at most itself (when it is a descendant of the current selection), so there is nothing to deduplicate across distinct sources. Replace the mapNodes call and its per-node callback/slice allocations with a direct append loop. Duplicate input nodes are still discarded with a cheap isInSlice check (run before the more expensive sliceContains tree walk) so the resulting Selection stays a proper set, matching the previous behaviour. benchstat: sec/op -43.68% B/op -82.00% allocs 85 -> 9 Add TestFindNodesDuplicateInput to cover the duplicate-input case. --- traversal.go | 17 ++++++++++++----- traversal_test.go | 10 ++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/traversal.go b/traversal.go index c495eea4..948e68a9 100644 --- a/traversal.go +++ b/traversal.go @@ -51,12 +51,19 @@ func (s *Selection) FindSelection(sel *Selection) *Selection { // Selection, filtered by some nodes. It returns a new Selection object // containing these matched elements. func (s *Selection) FindNodes(nodes ...*html.Node) *Selection { - return pushStack(s, mapNodes(nodes, func(i int, n *html.Node) []*html.Node { - if sliceContains(s.Nodes, n) { - return []*html.Node{n} + // Each source node maps to at most itself (when it is a descendant of the + // current selection), so there is nothing to deduplicate across distinct + // sources. Skip mapNodes' callback indirection and append matches directly, + // while still discarding duplicate input nodes so the resulting Selection + // stays a proper set. The cheap isInSlice check runs first so duplicate + // inputs short-circuit before the more expensive sliceContains tree walk. + var result []*html.Node + for _, n := range nodes { + if !isInSlice(result, n) && sliceContains(s.Nodes, n) { + result = append(result, n) } - return nil - })) + } + return pushStack(s, result) } // Contents gets the children of each element in the Selection, diff --git a/traversal_test.go b/traversal_test.go index 04383a41..627bcc57 100644 --- a/traversal_test.go +++ b/traversal_test.go @@ -36,6 +36,16 @@ func TestFindBig(t *testing.T) { assertLength(t, sel3.Nodes, 248) } +func TestFindNodesDuplicateInput(t *testing.T) { + doc := DocW() + sel := doc.Find("body") + span := doc.Find("span").Nodes[0] + // Duplicate input nodes must be discarded so the resulting Selection + // stays a proper set. + sel2 := sel.FindNodes(span, span) + assertLength(t, sel2.Nodes, 1) +} + func TestChainedFind(t *testing.T) { sel := Doc().Find("div.hero-unit").Find(".row-fluid") assertLength(t, sel.Nodes, 4)