diff --git a/changelog/unreleased/SOLR_18197_nest_path_easy_of_use.yml b/changelog/unreleased/SOLR_18197_nest_path_easy_of_use.yml new file mode 100644 index 000000000000..b2b1678da6cb --- /dev/null +++ b/changelog/unreleased/SOLR_18197_nest_path_easy_of_use.yml @@ -0,0 +1,8 @@ +title: Add root document query shortcut support to NestPathField +type: added +authors: + - name: Abhishek Umarjikar + nick: abumarjikar +links: + - name: SOLR-18197 + url: https://issues.apache.org/jira/browse/SOLR-18197 diff --git a/solr/core/src/java/org/apache/solr/schema/NestPathField.java b/solr/core/src/java/org/apache/solr/schema/NestPathField.java index d34e532abbb5..136e0493e3cb 100644 --- a/solr/core/src/java/org/apache/solr/schema/NestPathField.java +++ b/solr/core/src/java/org/apache/solr/schema/NestPathField.java @@ -22,8 +22,13 @@ import org.apache.lucene.analysis.core.KeywordTokenizerFactory; import org.apache.lucene.analysis.custom.CustomAnalyzer; import org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.common.SolrException; +import org.apache.solr.search.QParser; /** * To be used for field {@link IndexSchema#NEST_PATH_FIELD_NAME} for enhanced nested doc @@ -62,4 +67,22 @@ public void setArgs(IndexSchema schema, Map args) { setIndexAnalyzer(new TokenizerChain(customAnalyzer)); // leave queryAnalyzer as literal } + + @Override + public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) { + if (externalVal == null) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + "Field " + field.getName() + " cannot be queried with a null."); + } + + if (externalVal.isEmpty() || "/".equals(externalVal)) { + return new BooleanQuery.Builder() + .add(MatchAllDocsQuery.INSTANCE, BooleanClause.Occur.MUST) + .add(field.getType().getExistenceQuery(parser, field), BooleanClause.Occur.MUST_NOT) + .build(); + } + + return super.getFieldQuery(parser, field, externalVal); + } } diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java index 6e7dbaf4c63e..4a6176483495 100644 --- a/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java @@ -171,7 +171,7 @@ protected Query parseUsingParentPath(String parentPath, String childPath) throws if (parsedChildQuery.clauses().isEmpty()) { // i.e. all children // no block-join needed; just return all "parent" docs at this level - return wrapWithParentPathConstraint(parentPath, new MatchAllDocsQuery()); + return wrapWithParentPathConstraint(parentPath, MatchAllDocsQuery.INSTANCE); } // allParents filter: (*:* -{!prefix f="_nest_path_" v="/"}) @@ -209,15 +209,16 @@ protected Query parseUsingParentPath(String parentPath, String childPath) throws * /}): {@code (*:* -_nest_path_:*)} */ protected Query buildAllParentsFilterFromPath(String parentPath) { - final Query excludeQuery; - if (parentPath.equals("/")) { - excludeQuery = newNestPathExistsQuery(); - } else { - excludeQuery = new PrefixQuery(new Term(IndexSchema.NEST_PATH_FIELD_NAME, parentPath + "/")); + if (parentPath.equals("/") || parentPath.isEmpty()) { + final SchemaField nestPathField = req.getSchema().getField(IndexSchema.NEST_PATH_FIELD_NAME); + return nestPathField.getType().getFieldQuery(this, nestPathField, parentPath); } + return new BooleanQuery.Builder() - .add(new MatchAllDocsQuery(), Occur.MUST) - .add(excludeQuery, Occur.MUST_NOT) + .add(MatchAllDocsQuery.INSTANCE, Occur.MUST) + .add( + new PrefixQuery(new Term(IndexSchema.NEST_PATH_FIELD_NAME, parentPath + "/")), + Occur.MUST_NOT) .build(); } diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index ecad83f39de0..3c9bdc39261d 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -24,6 +24,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.NamedMatches; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; @@ -697,7 +698,7 @@ public void testBlockJoin() throws Exception { final String parent_path = "/aa/bb"; try (SolrQueryRequest req = req( - "parent_filt", "(*:* -{!prefix f='_nest_path_' v='" + parent_path + "/'})", + "parent_filt", "({!field f='_nest_path_' v='" + parent_path + "/'})", "child_q", "(+foo +{!prefix f='_nest_path_' v='" + parent_path + "/'})", "parent_q", "(+bar +{!field f='_nest_path_' v='" + parent_path + "'})")) { @@ -2000,6 +2001,29 @@ public void testHashRangeQuery() throws Exception { "{!hash_range l='107347968' u='214695935' f='x_id'}"); } + @Test + public void testNestPathRootShortcut() throws Exception { + try (SolrQueryRequest req = req("df", "_nest_path_")) { + Query parsedQ = + assertQueryEqualsAndReturn( + null, req, "{!field f=_nest_path_ v=''}", "{!field f=_nest_path_}/"); + + var schemaField = req.getSchema().getField("_nest_path_"); + Query expectedQ = + new BooleanQuery.Builder() + .add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST) + .add( + schemaField.getType().getExistenceQuery(null, schemaField), + BooleanClause.Occur.MUST_NOT) + .build(); + + assertEquals( + "The root shortcut query did not form the expected match-all minus existence structure", + expectedQ, + parsedQ); + } + } + // Override req to add df param public static SolrQueryRequest req(String... q) { return SolrTestCaseJ4.req(q, "df", "text"); diff --git a/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java index fca48d5765a9..c90a50baa00d 100644 --- a/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java +++ b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java @@ -793,7 +793,7 @@ private SolrParams childQueryMaker(String parent_path, String inner_parent_query if (verbose) { return params( "q", "{!child of=$parent_filt v=$parent_q})", - "parent_filt", "(*:* -{!prefix f='_nest_path_' v='" + parent_path + "/'})", + "parent_filt", "({!field f='_nest_path_' v='" + parent_path + "/'})", "parent_q", "(+" + inner_parent_query + " +{!field f='_nest_path_' v='" + parent_path + "'})"); } else { diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc index 2681cc598409..99392789c8fe 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc @@ -537,7 +537,7 @@ Here is an example of a `knn` search using a `childrenOf`: [source,text] ?q={!knn f=vector topK=3 childrenOf=$allParents}[1.0, 2.0, 3.0, 4.0] -&allParents=*:* -_nest_path_:* +&allParents={!field f=_nest_path_ v='/'} The search results retrieved are the k=3 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`, each of them with a different parent. The 'childrenOf' parameter must return all valid parents to guarantee the correct functioning of the query. @@ -558,7 +558,7 @@ Here is an example of a `knn` search using a `parents.preFilter`: [source,text] ?q={!knn f=vector topK=3 parents.preFilter=$someParents childrenOf=$allParents}[1.0, 2.0, 3.0, 4.0] -&allParents=*:* -_nest_path_:* +&allParents={!field f=_nest_path_ v='/'} &someParents=color_s:RED The search results retrieved are the k=3 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`, each of them with a different parent. Only the documents with a parent that satisfy the 'color_s:RED' condition are considered candidates for the ANN search. @@ -603,7 +603,7 @@ So you should query a multivalued vector fields following the same syntax: [source,text] ?q={!parent which=$allParents score=max v=$children.q} &children.q={!knn f=vector_multivalued topK=3 parents.preFilter=$someParents childrenOf=$allParents}[1.0, 2.0, 3.0, 4.0] -&allParents=*:* -_nest_path_:* +&allParents={!field f=_nest_path_ v='/'} &someParents=color_s:RED In terms of rendering the results, you need the child transformer if you want to output them flat (you can choose to only return the best vector per result or all vectors): diff --git a/solr/solr-ref-guide/modules/query-guide/pages/searching-nested-documents.adoc b/solr/solr-ref-guide/modules/query-guide/pages/searching-nested-documents.adoc index 81220d51318a..879663b17acf 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/searching-nested-documents.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/searching-nested-documents.adoc @@ -274,6 +274,21 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' - }} ---- +=== Root Document Query Shortcut + +When working with hierarchical nested documents, you may frequently need to isolate or filter your search results exclusively to top-level "root" documents (documents that do not belong to a parent path layout). + +Instead of using a verbose negative field existence filter (like `*:* -_nest_path_:*`), the `NestPathField` type supports an explicit root document query shortcut. Passing either a single forward slash (`/`) or an empty string (`''`) automatically constructs a query matching all documents while excluding any nested child paths: + +[source,text] +---- +# Targets only top-level root documents via the slash shortcut +fq={!field f=_nest_path_}/ + +# Alternative equivalent syntax using an empty parameter string +fq={!field f=_nest_path_ v=''} +---- + === Nested Vectors search through Block Join Query Parsers and Child Doc Transformer @@ -293,7 +308,7 @@ An example: [source,text] ?q={!parent which=$allParents score=max v=$children.q}& children.q={!knn f=vector topK=3 parents.preFilter=$someParents childrenOf=$allParents}[1.0, 2.0, 3.0, 4.0]& -allParents=*:* -_nest_path_:*& +allParents={!field f=_nest_path_ v=''}& someParents=color_s:RED& fl=id,score,vectors,vector,[child fl=vector childFilter=$children.q]