Skip to content

Commit a4acdec

Browse files
committed
OpenSearch: omit explicit IDs when manageDocumentIds=false; add unit/ITs; AWS Serverless compat.
- Update OpenSearchVectorStore#doAdd to omit explicit document IDs when manageDocumentIds=false, enabling AWS OpenSearch Serverless compatibility - Add unit tests for document ID management logic in doAdd - Add integration tests covering explicit/non-explicit ID modes and delete-by-ID behavior Closes gh-3818 Signed-off-by: sanghun <vitash1215@gmail.com>
1 parent 374c09e commit a4acdec

File tree

2 files changed

+258
-2
lines changed

2 files changed

+258
-2
lines changed

vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@
102102
* }</pre>
103103
*
104104
* <p>
105+
* AWS OpenSearch Serverless usage example:
106+
* </p>
107+
* <pre>{@code
108+
* OpenSearchVectorStore vectorStore = OpenSearchVectorStore.builder(openSearchClient, embeddingModel)
109+
* .initializeSchema(true)
110+
* .manageDocumentIds(false) // Required for AWS OpenSearch Serverless
111+
* .build();
112+
* }</pre>
113+
*
114+
* <p>
105115
* Advanced configuration example:
106116
* </p>
107117
* <pre>{@code
@@ -174,6 +184,8 @@ public class OpenSearchVectorStore extends AbstractObservationVectorStore implem
174184

175185
private final int dimensions;
176186

187+
private final boolean manageDocumentIds;
188+
177189
/**
178190
* Creates a new OpenSearchVectorStore using the builder pattern.
179191
* @param builder The configured builder instance
@@ -193,6 +205,7 @@ protected OpenSearchVectorStore(Builder builder) {
193205
this.initializeSchema = builder.initializeSchema;
194206
this.useApproximateKnn = builder.useApproximateKnn;
195207
this.dimensions = builder.dimensions;
208+
this.manageDocumentIds = builder.manageDocumentIds;
196209
}
197210

198211
/**
@@ -216,14 +229,27 @@ public void doAdd(List<Document> documents) {
216229
for (Document document : documents) {
217230
OpenSearchDocument openSearchDocument = new OpenSearchDocument(document.getId(), document.getText(),
218231
document.getMetadata(), embedding.get(documents.indexOf(document)));
219-
bulkRequestBuilder.operations(op -> op
220-
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
232+
233+
// Conditionally set document ID based on manageDocumentIds flag
234+
if (this.manageDocumentIds) {
235+
bulkRequestBuilder.operations(op -> op
236+
.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
237+
}
238+
else {
239+
bulkRequestBuilder
240+
.operations(op -> op.index(idx -> idx.index(this.index).document(openSearchDocument)));
241+
}
221242
}
222243
bulkRequest(bulkRequestBuilder.build());
223244
}
224245

225246
@Override
226247
public void doDelete(List<String> idList) {
248+
if (!this.manageDocumentIds) {
249+
logger.warn("Document ID management is disabled. Delete operations may not work as expected "
250+
+ "since document IDs are auto-generated by OpenSearch. Consider using filter-based deletion instead.");
251+
}
252+
227253
BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
228254
for (String id : idList) {
229255
bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.index).id(id)));
@@ -481,6 +507,8 @@ public static class Builder extends AbstractVectorStoreBuilder<Builder> {
481507

482508
private int dimensions = 1536;
483509

510+
private boolean manageDocumentIds = true;
511+
484512
/**
485513
* Sets the OpenSearch client.
486514
* @param openSearchClient The OpenSearch client to use
@@ -585,6 +613,28 @@ public Builder dimensions(int dimensions) {
585613
return this;
586614
}
587615

616+
/**
617+
* Sets whether to manage document IDs during indexing operations.
618+
* <p>
619+
* When set to {@code true} (default), document IDs will be explicitly set during
620+
* indexing operations. When set to {@code false}, OpenSearch will auto-generate
621+
* document IDs, which is required for AWS OpenSearch Serverless vector search
622+
* collections.
623+
* </p>
624+
* <p>
625+
* Note: When document ID management is disabled, the {@link #doDelete(List)}
626+
* method may not work as expected since document IDs are auto-generated by
627+
* OpenSearch.
628+
* </p>
629+
* @param manageDocumentIds true to manage document IDs (default), false to let
630+
* OpenSearch auto-generate IDs
631+
* @return The builder instance
632+
*/
633+
public Builder manageDocumentIds(boolean manageDocumentIds) {
634+
this.manageDocumentIds = manageDocumentIds;
635+
return this;
636+
}
637+
588638
/**
589639
* Builds a new OpenSearchVectorStore instance with the configured properties.
590640
* @return A new OpenSearchVectorStore instance
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/*
2+
* Copyright 2023-2025 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.springframework.ai.vectorstore.opensearch;
18+
19+
import static org.assertj.core.api.Assertions.assertThat;
20+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
21+
import static org.mockito.ArgumentMatchers.any;
22+
import static org.mockito.Mockito.lenient;
23+
import static org.mockito.Mockito.verify;
24+
import static org.mockito.Mockito.when;
25+
26+
import java.io.IOException;
27+
import java.util.List;
28+
import java.util.Map;
29+
30+
import org.junit.jupiter.api.BeforeEach;
31+
import org.junit.jupiter.api.DisplayName;
32+
import org.junit.jupiter.api.Test;
33+
import org.junit.jupiter.api.extension.ExtendWith;
34+
import org.junit.jupiter.params.ParameterizedTest;
35+
import org.junit.jupiter.params.provider.ValueSource;
36+
import org.mockito.ArgumentCaptor;
37+
import org.mockito.Mock;
38+
import org.mockito.junit.jupiter.MockitoExtension;
39+
import org.opensearch.client.opensearch.OpenSearchClient;
40+
import org.opensearch.client.opensearch.core.BulkRequest;
41+
import org.opensearch.client.opensearch.core.BulkResponse;
42+
43+
import org.springframework.ai.document.Document;
44+
import org.springframework.ai.embedding.EmbeddingModel;
45+
46+
/**
47+
* Unit tests for OpenSearchVectorStore.doAdd() method.
48+
*
49+
* Focuses on testing the manageDocumentIds functionality and document ID handling.
50+
*/
51+
@ExtendWith(MockitoExtension.class)
52+
@DisplayName("OpenSearchVectorStore.doAdd() Tests")
53+
class OpenSearchVectorStoreTest {
54+
55+
@Mock
56+
private OpenSearchClient mockOpenSearchClient;
57+
58+
@Mock
59+
private EmbeddingModel mockEmbeddingModel;
60+
61+
@Mock
62+
private BulkResponse mockBulkResponse;
63+
64+
@BeforeEach
65+
void setUp() throws IOException {
66+
// Use lenient to avoid UnnecessaryStubbingException
67+
lenient().when(mockEmbeddingModel.dimensions()).thenReturn(3);
68+
lenient().when(mockOpenSearchClient.bulk(any(BulkRequest.class))).thenReturn(mockBulkResponse);
69+
lenient().when(mockBulkResponse.errors()).thenReturn(false);
70+
}
71+
72+
@ParameterizedTest(name = "manageDocumentIds={0}")
73+
@ValueSource(booleans = { true, false })
74+
@DisplayName("Should handle document ID management setting correctly")
75+
void shouldHandleDocumentIdManagementSetting(boolean manageDocumentIds) throws IOException {
76+
// Given
77+
when(mockEmbeddingModel.embed(any(), any(), any()))
78+
.thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, new float[] { 0.4f, 0.5f, 0.6f }));
79+
80+
OpenSearchVectorStore vectorStore = createVectorStore(manageDocumentIds);
81+
List<Document> documents = List.of(new Document("doc1", "content1", Map.of()),
82+
new Document("doc2", "content2", Map.of()));
83+
84+
// When
85+
vectorStore.add(documents);
86+
87+
// Then
88+
BulkRequest capturedRequest = captureBulkRequest();
89+
assertThat(capturedRequest.operations()).hasSize(2);
90+
91+
verifyDocumentIdHandling(capturedRequest, manageDocumentIds);
92+
}
93+
94+
@Test
95+
@DisplayName("Should handle single document correctly")
96+
void shouldHandleSingleDocumentCorrectly() throws IOException {
97+
// Given
98+
when(mockEmbeddingModel.embed(any(), any(), any())).thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }));
99+
100+
OpenSearchVectorStore vectorStore = createVectorStore(true);
101+
Document document = new Document("test-id", "test content", Map.of("key", "value"));
102+
103+
// When
104+
vectorStore.add(List.of(document));
105+
106+
// Then
107+
BulkRequest capturedRequest = captureBulkRequest();
108+
var operation = capturedRequest.operations().get(0);
109+
110+
assertThat(operation.isIndex()).isTrue();
111+
assertThat(operation.index().id()).isEqualTo("test-id");
112+
assertThat(operation.index().document()).isNotNull();
113+
}
114+
115+
@Test
116+
@DisplayName("Should handle multiple documents with explicit IDs")
117+
void shouldHandleMultipleDocumentsWithExplicitIds() throws IOException {
118+
// Given
119+
when(mockEmbeddingModel.embed(any(), any(), any())).thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f },
120+
new float[] { 0.4f, 0.5f, 0.6f }, new float[] { 0.7f, 0.8f, 0.9f }));
121+
122+
OpenSearchVectorStore vectorStore = createVectorStore(true);
123+
List<Document> documents = List.of(new Document("doc1", "content1", Map.of()),
124+
new Document("doc2", "content2", Map.of()), new Document("doc3", "content3", Map.of()));
125+
126+
// When
127+
vectorStore.add(documents);
128+
129+
// Then
130+
BulkRequest capturedRequest = captureBulkRequest();
131+
assertThat(capturedRequest.operations()).hasSize(3);
132+
133+
for (int i = 0; i < 3; i++) {
134+
var operation = capturedRequest.operations().get(i);
135+
assertThat(operation.isIndex()).isTrue();
136+
assertThat(operation.index().id()).isEqualTo("doc" + (i + 1));
137+
}
138+
}
139+
140+
@Test
141+
@DisplayName("Should handle multiple documents without explicit IDs")
142+
void shouldHandleMultipleDocumentsWithoutExplicitIds() throws IOException {
143+
// Given
144+
when(mockEmbeddingModel.embed(any(), any(), any()))
145+
.thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, new float[] { 0.4f, 0.5f, 0.6f }));
146+
147+
OpenSearchVectorStore vectorStore = createVectorStore(false);
148+
List<Document> documents = List.of(new Document("doc1", "content1", Map.of()),
149+
new Document("doc2", "content2", Map.of()));
150+
151+
// When
152+
vectorStore.add(documents);
153+
154+
// Then
155+
BulkRequest capturedRequest = captureBulkRequest();
156+
assertThat(capturedRequest.operations()).hasSize(2);
157+
158+
for (var operation : capturedRequest.operations()) {
159+
assertThat(operation.isIndex()).isTrue();
160+
assertThat(operation.index().id()).isNull();
161+
}
162+
}
163+
164+
@Test
165+
@DisplayName("Should handle embedding model error")
166+
void shouldHandleEmbeddingModelError() {
167+
// Given
168+
when(mockEmbeddingModel.embed(any(), any(), any())).thenThrow(new RuntimeException("Embedding failed"));
169+
170+
OpenSearchVectorStore vectorStore = createVectorStore(true);
171+
List<Document> documents = List.of(new Document("doc1", "content", Map.of()));
172+
173+
// When & Then
174+
assertThatThrownBy(() -> vectorStore.add(documents)).isInstanceOf(RuntimeException.class)
175+
.hasMessageContaining("Embedding failed");
176+
}
177+
178+
// Helper methods
179+
180+
private OpenSearchVectorStore createVectorStore(boolean manageDocumentIds) {
181+
return OpenSearchVectorStore.builder(mockOpenSearchClient, mockEmbeddingModel)
182+
.manageDocumentIds(manageDocumentIds)
183+
.build();
184+
}
185+
186+
private BulkRequest captureBulkRequest() throws IOException {
187+
ArgumentCaptor<BulkRequest> captor = ArgumentCaptor.forClass(BulkRequest.class);
188+
verify(mockOpenSearchClient).bulk(captor.capture());
189+
return captor.getValue();
190+
}
191+
192+
private void verifyDocumentIdHandling(BulkRequest request, boolean shouldHaveExplicitIds) {
193+
for (int i = 0; i < request.operations().size(); i++) {
194+
var operation = request.operations().get(i);
195+
assertThat(operation.isIndex()).isTrue();
196+
197+
if (shouldHaveExplicitIds) {
198+
assertThat(operation.index().id()).isEqualTo("doc" + (i + 1));
199+
}
200+
else {
201+
assertThat(operation.index().id()).isNull();
202+
}
203+
}
204+
}
205+
206+
}

0 commit comments

Comments
 (0)