Skip to content

[SOLR-17726] Fix CloudMLTQParser to support copyField in qf #3328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ protected Query likeDoc(MoreLikeThis moreLikeThis, SolrDocument doc) throws IOEx
Map<String, Collection<Object>> filteredDocument = new HashMap<>();

for (String field : moreLikeThis.getFieldNames()) {
Collection<Object> fieldValues = doc.getFieldValues(field);
Collection<Object> fieldValues = getFieldOrCopyFieldValues(doc, field);

if (fieldValues != null) {
Collection<Object> values = new ArrayList<>();
for (Object val : fieldValues) {
Expand Down Expand Up @@ -110,4 +111,22 @@ private SolrDocument getDocument(String id) {

return (SolrDocument) response.get("doc");
}

private Collection<Object> getFieldOrCopyFieldValues(SolrDocument doc, String field) {
Collection<Object> fieldValues = doc.getFieldValues(field);
if (fieldValues != null) return fieldValues;

// Fields created using copyField are not included in documents returned by RealTime Get.
// So if a copyField destination is used in the MLT query (qf), we need to get the values
// from its source field instead. If there are multiple source fields, their values must be
// combined.
Collection<Object> combinedValues = new ArrayList<>();
for (String fieldSource : req.getSchema().getCopySources(field)) {
Collection<Object> sourceValues = doc.getFieldValues(fieldSource);
if (sourceValues != null) {
combinedValues.addAll(sourceValues);
}
}
return combinedValues.isEmpty() ? null : combinedValues;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,18 @@
<field name="payload" type="sortable_binary" indexed="false"
stored="true" multiValued="false"/>

<!-- to test copyField in MLT qf -->
<field name="copyfield_dest" type="nametext" indexed="true" stored="true" multiValued="true"/>
<copyField source="copyfield_source" dest="copyfield_dest" />

<field name="copyfield_dest_not_stored" type="nametext" indexed="true" stored="false" multiValued="true"/>
<copyField source="copyfield_source" dest="copyfield_dest_not_stored" />

<field name="copyfield_source_multiple" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="copyfield_dest_multiple" type="nametext" indexed="true" stored="true" multiValued="true"/>
<copyField source="copyfield_source" dest="copyfield_dest_multiple" />
<copyField source="copyfield_source_multiple" dest="copyfield_dest_multiple" />

<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>
<!-- points to the root document of a block of nested documents -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ static void indexDocs()
String id = "id";
String FIELD1 = "lowerfilt_u";
String FIELD2 = "lowerfilt1_u";
String FIELD3 = "copyfield_source";
String FIELD4 = "copyfield_source_not_stored";
String FIELD5 = "copyfield_source_multiple";

new UpdateRequest()
.add(sdoc(id, "1", FIELD1, "toyota"))
Expand Down Expand Up @@ -119,6 +122,13 @@ static void indexDocs()
"The slim red fox jumped over the lazy brown dogs.",
FIELD2,
"yellow white black"))
.add(
sdoc(
id, "33", FIELD3, "hard rock", FIELD4, "hard rock", FIELD5, "instrumental version"))
.add(
sdoc(
id, "34", FIELD3, "hard rock", FIELD4, "hard rock", FIELD5, "instrumental version"))
.add(sdoc(id, "35", FIELD3, "pop rock", FIELD4, "pop rock", FIELD5, "full version"))
.commit(client, COLLECTION);
}

Expand Down Expand Up @@ -339,4 +349,84 @@ public void testInvalidSourceDocument() {
.getSolrClient()
.query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u}999999")));
}

@Test
public void testUsesACopyFieldInQf_shouldUseTheSourceFieldAndReturnResults() throws Exception {
// Verifies that when a copyField destination is used in the qf parameter, the MLT query
// correctly
// retrieves values from the source field(s) and returns relevant results.
QueryResponse queryResponse =
cluster
.getSolrClient()
.query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0 mintf=1}33"));
SolrDocumentList solrDocuments = queryResponse.getResults();
int[] expectedIds = new int[] {34, 35};
int[] actualIds = new int[solrDocuments.size()];
int i = 0;
for (SolrDocument solrDocument : solrDocuments) {
actualIds[i++] = Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
}

Arrays.sort(actualIds);
Arrays.sort(expectedIds);
assertArrayEquals(expectedIds, actualIds);
}

@Test
public void testCopyFieldSourceMissing_shouldReturnNoResults() throws Exception {
// Ensures that no results are returned when the copyField source field is missing in the source
// document.
QueryResponse queryResponse =
cluster
.getSolrClient()
.query(COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest mindf=0 mintf=1}30"));
SolrDocumentList solrDocuments = queryResponse.getResults();
assertEquals("Expected no results if source field is missing", 0, solrDocuments.size());
}

@Test
public void testCopyFieldDestinationNotStored_shouldReturnResults() throws Exception {
// Even if the copyField destination field used in the MLT query (qf) is NOT stored, documents
// can still be returned, as long as its source field contains the text used
// to build the similarity query.
QueryResponse queryResponse =
cluster
.getSolrClient()
.query(
COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest_not_stored mindf=0 mintf=1}33"));
SolrDocumentList solrDocuments = queryResponse.getResults();
int[] expectedIds = new int[] {34, 35};
int[] actualIds = new int[solrDocuments.size()];
int i = 0;
for (SolrDocument solrDocument : solrDocuments) {
actualIds[i++] = Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
}

Arrays.sort(actualIds);
Arrays.sort(expectedIds);
assertArrayEquals(expectedIds, actualIds);
}

@Test
public void testCopyFieldDestinationMultiple_shouldReturnResults() throws Exception {
// Validates that when multiple source fields map to a single copyField destination, their
// values are
// correctly combined to construct the MLT query, and appropriate results are returned.
QueryResponse queryResponse =
cluster
.getSolrClient()
.query(
COLLECTION, new SolrQuery("{!mlt qf=copyfield_dest_multiple mindf=0 mintf=1}33"));
SolrDocumentList solrDocuments = queryResponse.getResults();
int[] expectedIds = new int[] {34, 35};
int[] actualIds = new int[solrDocuments.size()];
int i = 0;
for (SolrDocument solrDocument : solrDocuments) {
actualIds[i++] = Integer.parseInt(String.valueOf(solrDocument.getFieldValue("id")));
}

Arrays.sort(actualIds);
Arrays.sort(expectedIds);
assertArrayEquals(expectedIds, actualIds);
}
}
Loading