Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions changelog/unreleased/SOLR_18197_nest_path_easy_of_use.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
title: Add root document query shortcut support to NestPathField
type: added
authors:
- name: Abhishek Umarjikar
nick: abumarjikar
links:
- name: SOLR-18197
url: https://issues.apache.org/jira/browse/SOLR-18197
23 changes: 23 additions & 0 deletions solr/core/src/java/org/apache/solr/schema/NestPathField.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,13 @@
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.search.QParser;

/**
* To be used for field {@link IndexSchema#NEST_PATH_FIELD_NAME} for enhanced nested doc
Expand Down Expand Up @@ -62,4 +67,22 @@ public void setArgs(IndexSchema schema, Map<String, String> args) {
setIndexAnalyzer(new TokenizerChain(customAnalyzer));
// leave queryAnalyzer as literal
}

@Override
public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) {
if (externalVal == null) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
"Field " + field.getName() + " cannot be queried with a null.");
}

if (externalVal.isEmpty() || "/".equals(externalVal)) {
return new BooleanQuery.Builder()
.add(MatchAllDocsQuery.INSTANCE, BooleanClause.Occur.MUST)
.add(field.getType().getExistenceQuery(parser, field), BooleanClause.Occur.MUST_NOT)
.build();
}

return super.getFieldQuery(parser, field, externalVal);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ protected Query parseUsingParentPath(String parentPath, String childPath) throws

if (parsedChildQuery.clauses().isEmpty()) { // i.e. all children
// no block-join needed; just return all "parent" docs at this level
return wrapWithParentPathConstraint(parentPath, new MatchAllDocsQuery());
return wrapWithParentPathConstraint(parentPath, MatchAllDocsQuery.INSTANCE);
}

// allParents filter: (*:* -{!prefix f="_nest_path_" v="<parentPath>/"})
Expand Down Expand Up @@ -209,15 +209,16 @@ protected Query parseUsingParentPath(String parentPath, String childPath) throws
* /}): {@code (*:* -_nest_path_:*)}
*/
protected Query buildAllParentsFilterFromPath(String parentPath) {
final Query excludeQuery;
if (parentPath.equals("/")) {
excludeQuery = newNestPathExistsQuery();
} else {
excludeQuery = new PrefixQuery(new Term(IndexSchema.NEST_PATH_FIELD_NAME, parentPath + "/"));
if (parentPath.equals("/") || parentPath.isEmpty()) {
final SchemaField nestPathField = req.getSchema().getField(IndexSchema.NEST_PATH_FIELD_NAME);
return nestPathField.getType().getFieldQuery(this, nestPathField, parentPath);
}

return new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), Occur.MUST)
.add(excludeQuery, Occur.MUST_NOT)
.add(MatchAllDocsQuery.INSTANCE, Occur.MUST)
.add(
new PrefixQuery(new Term(IndexSchema.NEST_PATH_FIELD_NAME, parentPath + "/")),
Occur.MUST_NOT)
.build();
}

Expand Down
26 changes: 25 additions & 1 deletion solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NamedMatches;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
Expand Down Expand Up @@ -697,7 +698,7 @@ public void testBlockJoin() throws Exception {
final String parent_path = "/aa/bb";
try (SolrQueryRequest req =
req(
"parent_filt", "(*:* -{!prefix f='_nest_path_' v='" + parent_path + "/'})",
"parent_filt", "({!field f='_nest_path_' v='" + parent_path + "/'})",
Comment on lines -700 to +701

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That change has a different underlying query & semantic effect. Formerly it found all docs separate from that parent path, thus matched not only the root doc but also /aa and /ff if that exists, say. You changed it to only match documents at /aa exactly.

"child_q", "(+foo +{!prefix f='_nest_path_' v='" + parent_path + "/'})",
"parent_q", "(+bar +{!field f='_nest_path_' v='" + parent_path + "'})")) {

Expand Down Expand Up @@ -2000,6 +2001,29 @@ public void testHashRangeQuery() throws Exception {
"{!hash_range l='107347968' u='214695935' f='x_id'}");
}

@Test
public void testNestPathRootShortcut() throws Exception {
try (SolrQueryRequest req = req("df", "_nest_path_")) {
Query parsedQ =
assertQueryEqualsAndReturn(
null, req, "{!field f=_nest_path_ v=''}", "{!field f=_nest_path_}/");

var schemaField = req.getSchema().getField("_nest_path_");
Query expectedQ =
new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST)
.add(
schemaField.getType().getExistenceQuery(null, schemaField),
BooleanClause.Occur.MUST_NOT)
.build();

assertEquals(
"The root shortcut query did not form the expected match-all minus existence structure",
expectedQ,
parsedQ);
}
Comment thread
abumarjikar marked this conversation as resolved.
}

// Override req to add df param
public static SolrQueryRequest req(String... q) {
return SolrTestCaseJ4.req(q, "df", "text");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ private SolrParams childQueryMaker(String parent_path, String inner_parent_query
if (verbose) {
return params(
"q", "{!child of=$parent_filt v=$parent_q})",
"parent_filt", "(*:* -{!prefix f='_nest_path_' v='" + parent_path + "/'})",
"parent_filt", "({!field f='_nest_path_' v='" + parent_path + "/'})",
"parent_q",
"(+" + inner_parent_query + " +{!field f='_nest_path_' v='" + parent_path + "'})");
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ Here is an example of a `knn` search using a `childrenOf`:

[source,text]
?q={!knn f=vector topK=3 childrenOf=$allParents}[1.0, 2.0, 3.0, 4.0]
&allParents=*:* -_nest_path_:*
&allParents={!field f=_nest_path_ v='/'}

The search results retrieved are the k=3 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`, each of them with a different parent. The 'childrenOf' parameter must return all valid parents to guarantee the correct functioning of the query.

Expand All @@ -558,7 +558,7 @@ Here is an example of a `knn` search using a `parents.preFilter`:

[source,text]
?q={!knn f=vector topK=3 parents.preFilter=$someParents childrenOf=$allParents}[1.0, 2.0, 3.0, 4.0]
&allParents=*:* -_nest_path_:*
&allParents={!field f=_nest_path_ v='/'}
&someParents=color_s:RED

The search results retrieved are the k=3 nearest documents to the vector in input `[1.0, 2.0, 3.0, 4.0]`, each of them with a different parent. Only the documents with a parent that satisfy the 'color_s:RED' condition are considered candidates for the ANN search.
Expand Down Expand Up @@ -603,7 +603,7 @@ So you should query a multivalued vector fields following the same syntax:
[source,text]
?q={!parent which=$allParents score=max v=$children.q}
&children.q={!knn f=vector_multivalued topK=3 parents.preFilter=$someParents childrenOf=$allParents}[1.0, 2.0, 3.0, 4.0]
&allParents=*:* -_nest_path_:*
&allParents={!field f=_nest_path_ v='/'}
&someParents=color_s:RED

In terms of rendering the results, you need the child transformer if you want to output them flat (you can choose to only return the best vector per result or all vectors):
Expand Down
Comment thread
abumarjikar marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,21 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' -
}}
----

=== Root Document Query Shortcut

When working with hierarchical nested documents, you may frequently need to isolate or filter your search results exclusively to top-level "root" documents (documents that do not belong to a parent path layout).

Instead of using a verbose negative field existence filter (like `*:* -_nest_path_:*`), the `NestPathField` type supports an explicit root document query shortcut. Passing either a single forward slash (`/`) or an empty string (`''`) automatically constructs a query matching all documents while excluding any nested child paths:

[source,text]
----
# Targets only top-level root documents via the slash shortcut
fq={!field f=_nest_path_}/

# Alternative equivalent syntax using an empty parameter string
fq={!field f=_nest_path_ v=''}
----

=== Nested Vectors search through Block Join Query Parsers and Child Doc Transformer


Expand All @@ -293,7 +308,7 @@ An example:
[source,text]
?q={!parent which=$allParents score=max v=$children.q}&
children.q={!knn f=vector topK=3 parents.preFilter=$someParents childrenOf=$allParents}[1.0, 2.0, 3.0, 4.0]&
allParents=*:* -_nest_path_:*&
allParents={!field f=_nest_path_ v=''}&
someParents=color_s:RED&
fl=id,score,vectors,vector,[child fl=vector childFilter=$children.q]

Expand Down