From a4acdec1e9f074a057debc9723aa8dab4fa38f70 Mon Sep 17 00:00:00 2001 From: sanghun Date: Fri, 22 Aug 2025 18:16:49 +0900 Subject: [PATCH 1/3] OpenSearch: omit explicit IDs when manageDocumentIds=false; add unit/ITs; AWS Serverless compat. - Update OpenSearchVectorStore#doAdd to omit explicit document IDs when manageDocumentIds=false, enabling AWS OpenSearch Serverless compatibility - Add unit tests for document ID management logic in doAdd - Add integration tests covering explicit/non-explicit ID modes and delete-by-ID behavior Closes gh-3818 Signed-off-by: sanghun --- .../opensearch/OpenSearchVectorStore.java | 54 ++++- .../opensearch/OpenSearchVectorStoreTest.java | 206 ++++++++++++++++++ 2 files changed, 258 insertions(+), 2 deletions(-) create mode 100644 vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreTest.java diff --git a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java index b1a80fdc8c9..c79e2cbd1fb 100644 --- a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java +++ b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java @@ -102,6 +102,16 @@ * } * *

+ * AWS OpenSearch Serverless usage example: + *

+ *
{@code
+ * OpenSearchVectorStore vectorStore = OpenSearchVectorStore.builder(openSearchClient, embeddingModel)
+ *     .initializeSchema(true)
+ *     .manageDocumentIds(false)  // Required for AWS OpenSearch Serverless
+ *     .build();
+ * }
+ * + *

* Advanced configuration example: *

*
{@code
@@ -174,6 +184,8 @@ public class OpenSearchVectorStore extends AbstractObservationVectorStore implem
 
 	private final int dimensions;
 
+	private final boolean manageDocumentIds;
+
 	/**
 	 * Creates a new OpenSearchVectorStore using the builder pattern.
 	 * @param builder The configured builder instance
@@ -193,6 +205,7 @@ protected OpenSearchVectorStore(Builder builder) {
 		this.initializeSchema = builder.initializeSchema;
 		this.useApproximateKnn = builder.useApproximateKnn;
 		this.dimensions = builder.dimensions;
+		this.manageDocumentIds = builder.manageDocumentIds;
 	}
 
 	/**
@@ -216,14 +229,27 @@ public void doAdd(List documents) {
 		for (Document document : documents) {
 			OpenSearchDocument openSearchDocument = new OpenSearchDocument(document.getId(), document.getText(),
 					document.getMetadata(), embedding.get(documents.indexOf(document)));
-			bulkRequestBuilder.operations(op -> op
-				.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
+
+			// Conditionally set document ID based on manageDocumentIds flag
+			if (this.manageDocumentIds) {
+				bulkRequestBuilder.operations(op -> op
+					.index(idx -> idx.index(this.index).id(openSearchDocument.id()).document(openSearchDocument)));
+			}
+			else {
+				bulkRequestBuilder
+					.operations(op -> op.index(idx -> idx.index(this.index).document(openSearchDocument)));
+			}
 		}
 		bulkRequest(bulkRequestBuilder.build());
 	}
 
 	@Override
 	public void doDelete(List idList) {
+		if (!this.manageDocumentIds) {
+			logger.warn("Document ID management is disabled. Delete operations may not work as expected "
+					+ "since document IDs are auto-generated by OpenSearch. Consider using filter-based deletion instead.");
+		}
+
 		BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
 		for (String id : idList) {
 			bulkRequestBuilder.operations(op -> op.delete(idx -> idx.index(this.index).id(id)));
@@ -481,6 +507,8 @@ public static class Builder extends AbstractVectorStoreBuilder {
 
 		private int dimensions = 1536;
 
+		private boolean manageDocumentIds = true;
+
 		/**
 		 * Sets the OpenSearch client.
 		 * @param openSearchClient The OpenSearch client to use
@@ -585,6 +613,28 @@ public Builder dimensions(int dimensions) {
 			return this;
 		}
 
+		/**
+		 * Sets whether to manage document IDs during indexing operations.
+		 * 

+ * When set to {@code true} (default), document IDs will be explicitly set during + * indexing operations. When set to {@code false}, OpenSearch will auto-generate + * document IDs, which is required for AWS OpenSearch Serverless vector search + * collections. + *

+ *

+ * Note: When document ID management is disabled, the {@link #doDelete(List)} + * method may not work as expected since document IDs are auto-generated by + * OpenSearch. + *

+ * @param manageDocumentIds true to manage document IDs (default), false to let + * OpenSearch auto-generate IDs + * @return The builder instance + */ + public Builder manageDocumentIds(boolean manageDocumentIds) { + this.manageDocumentIds = manageDocumentIds; + return this; + } + /** * Builds a new OpenSearchVectorStore instance with the configured properties. * @return A new OpenSearchVectorStore instance diff --git a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreTest.java b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreTest.java new file mode 100644 index 00000000000..e39b6c3205e --- /dev/null +++ b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreTest.java @@ -0,0 +1,206 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.vectorstore.opensearch; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.opensearch.client.opensearch.core.BulkResponse; + +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.EmbeddingModel; + +/** + * Unit tests for OpenSearchVectorStore.doAdd() method. + * + * Focuses on testing the manageDocumentIds functionality and document ID handling. + */ +@ExtendWith(MockitoExtension.class) +@DisplayName("OpenSearchVectorStore.doAdd() Tests") +class OpenSearchVectorStoreTest { + + @Mock + private OpenSearchClient mockOpenSearchClient; + + @Mock + private EmbeddingModel mockEmbeddingModel; + + @Mock + private BulkResponse mockBulkResponse; + + @BeforeEach + void setUp() throws IOException { + // Use lenient to avoid UnnecessaryStubbingException + lenient().when(mockEmbeddingModel.dimensions()).thenReturn(3); + lenient().when(mockOpenSearchClient.bulk(any(BulkRequest.class))).thenReturn(mockBulkResponse); + lenient().when(mockBulkResponse.errors()).thenReturn(false); + } + + @ParameterizedTest(name = "manageDocumentIds={0}") + @ValueSource(booleans = { true, false }) + @DisplayName("Should handle document ID management setting correctly") + void shouldHandleDocumentIdManagementSetting(boolean manageDocumentIds) throws IOException { + // Given + when(mockEmbeddingModel.embed(any(), any(), any())) + .thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, new float[] { 0.4f, 0.5f, 0.6f })); + + OpenSearchVectorStore vectorStore = createVectorStore(manageDocumentIds); + List documents = List.of(new Document("doc1", "content1", Map.of()), + new Document("doc2", "content2", Map.of())); + + // When + vectorStore.add(documents); + + // Then + BulkRequest capturedRequest = captureBulkRequest(); + assertThat(capturedRequest.operations()).hasSize(2); + + verifyDocumentIdHandling(capturedRequest, manageDocumentIds); + } + + @Test + @DisplayName("Should handle single document correctly") + void shouldHandleSingleDocumentCorrectly() throws IOException { + // Given + when(mockEmbeddingModel.embed(any(), any(), any())).thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f })); + + OpenSearchVectorStore vectorStore = createVectorStore(true); + Document document = new Document("test-id", "test content", Map.of("key", "value")); + + // When + vectorStore.add(List.of(document)); + + // Then + BulkRequest capturedRequest = captureBulkRequest(); + var operation = capturedRequest.operations().get(0); + + assertThat(operation.isIndex()).isTrue(); + assertThat(operation.index().id()).isEqualTo("test-id"); + assertThat(operation.index().document()).isNotNull(); + } + + @Test + @DisplayName("Should handle multiple documents with explicit IDs") + void shouldHandleMultipleDocumentsWithExplicitIds() throws IOException { + // Given + when(mockEmbeddingModel.embed(any(), any(), any())).thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, + new float[] { 0.4f, 0.5f, 0.6f }, new float[] { 0.7f, 0.8f, 0.9f })); + + OpenSearchVectorStore vectorStore = createVectorStore(true); + List documents = List.of(new Document("doc1", "content1", Map.of()), + new Document("doc2", "content2", Map.of()), new Document("doc3", "content3", Map.of())); + + // When + vectorStore.add(documents); + + // Then + BulkRequest capturedRequest = captureBulkRequest(); + assertThat(capturedRequest.operations()).hasSize(3); + + for (int i = 0; i < 3; i++) { + var operation = capturedRequest.operations().get(i); + assertThat(operation.isIndex()).isTrue(); + assertThat(operation.index().id()).isEqualTo("doc" + (i + 1)); + } + } + + @Test + @DisplayName("Should handle multiple documents without explicit IDs") + void shouldHandleMultipleDocumentsWithoutExplicitIds() throws IOException { + // Given + when(mockEmbeddingModel.embed(any(), any(), any())) + .thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, new float[] { 0.4f, 0.5f, 0.6f })); + + OpenSearchVectorStore vectorStore = createVectorStore(false); + List documents = List.of(new Document("doc1", "content1", Map.of()), + new Document("doc2", "content2", Map.of())); + + // When + vectorStore.add(documents); + + // Then + BulkRequest capturedRequest = captureBulkRequest(); + assertThat(capturedRequest.operations()).hasSize(2); + + for (var operation : capturedRequest.operations()) { + assertThat(operation.isIndex()).isTrue(); + assertThat(operation.index().id()).isNull(); + } + } + + @Test + @DisplayName("Should handle embedding model error") + void shouldHandleEmbeddingModelError() { + // Given + when(mockEmbeddingModel.embed(any(), any(), any())).thenThrow(new RuntimeException("Embedding failed")); + + OpenSearchVectorStore vectorStore = createVectorStore(true); + List documents = List.of(new Document("doc1", "content", Map.of())); + + // When & Then + assertThatThrownBy(() -> vectorStore.add(documents)).isInstanceOf(RuntimeException.class) + .hasMessageContaining("Embedding failed"); + } + + // Helper methods + + private OpenSearchVectorStore createVectorStore(boolean manageDocumentIds) { + return OpenSearchVectorStore.builder(mockOpenSearchClient, mockEmbeddingModel) + .manageDocumentIds(manageDocumentIds) + .build(); + } + + private BulkRequest captureBulkRequest() throws IOException { + ArgumentCaptor captor = ArgumentCaptor.forClass(BulkRequest.class); + verify(mockOpenSearchClient).bulk(captor.capture()); + return captor.getValue(); + } + + private void verifyDocumentIdHandling(BulkRequest request, boolean shouldHaveExplicitIds) { + for (int i = 0; i < request.operations().size(); i++) { + var operation = request.operations().get(i); + assertThat(operation.isIndex()).isTrue(); + + if (shouldHaveExplicitIds) { + assertThat(operation.index().id()).isEqualTo("doc" + (i + 1)); + } + else { + assertThat(operation.index().id()).isNull(); + } + } + } + +} \ No newline at end of file From 13de7d8810d0207a20540a8fd41ee1c992825509 Mon Sep 17 00:00:00 2001 From: sanghun Date: Sun, 19 Oct 2025 19:29:02 +0900 Subject: [PATCH 2/3] Set manageDocumentIds default to true for backward compatibility The manageDocumentIds flag was initially set to false, which would break existing users who rely on explicit document ID management. This change sets the default to true to preserve the current behavior for all existing OpenSearch users. AWS OpenSearch Serverless users can explicitly opt-in by setting manageDocumentIds(false) when they need auto-generated IDs due to the platform's restrictions on custom document IDs. This ensures backward compatibility while still providing the flexibility needed for AWS Serverless environments. Related: gh-3818 Signed-off-by: sanghun --- .../ai/vectorstore/opensearch/OpenSearchVectorStore.java | 1 + 1 file changed, 1 insertion(+) diff --git a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java index c79e2cbd1fb..24885944ef8 100644 --- a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java +++ b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStore.java @@ -147,6 +147,7 @@ * @author Christian Tzolov * @author Thomas Vitale * @author inpink + * @author Sanghun Lee * @since 1.0.0 */ public class OpenSearchVectorStore extends AbstractObservationVectorStore implements InitializingBean { From ee456fc111aa098b0a287d0d417eeef282feabf9 Mon Sep 17 00:00:00 2001 From: sanghun Date: Mon, 10 Nov 2025 00:01:07 +0900 Subject: [PATCH 3/3] Fix Checkstyle violations in OpenSearchVectorStoreTest Resolved 14 Checkstyle errors that blocked the build process: - Corrected import statement ordering - Added 'this.' qualifier to instance variable references - Added missing newline at end of file This ensures compliance with Spring AI coding standards and enables successful compilation after rebasing onto upstream/main. Signed-off-by: sanghun --- .../opensearch/OpenSearchVectorStoreTest.java | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreTest.java b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreTest.java index e39b6c3205e..0fc85c62102 100644 --- a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreTest.java +++ b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/opensearch/OpenSearchVectorStoreTest.java @@ -16,13 +16,6 @@ package org.springframework.ai.vectorstore.opensearch; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.lenient; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - import java.io.IOException; import java.util.List; import java.util.Map; @@ -43,6 +36,13 @@ import org.springframework.ai.document.Document; import org.springframework.ai.embedding.EmbeddingModel; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + /** * Unit tests for OpenSearchVectorStore.doAdd() method. * @@ -64,9 +64,9 @@ class OpenSearchVectorStoreTest { @BeforeEach void setUp() throws IOException { // Use lenient to avoid UnnecessaryStubbingException - lenient().when(mockEmbeddingModel.dimensions()).thenReturn(3); - lenient().when(mockOpenSearchClient.bulk(any(BulkRequest.class))).thenReturn(mockBulkResponse); - lenient().when(mockBulkResponse.errors()).thenReturn(false); + lenient().when(this.mockEmbeddingModel.dimensions()).thenReturn(3); + lenient().when(this.mockOpenSearchClient.bulk(any(BulkRequest.class))).thenReturn(this.mockBulkResponse); + lenient().when(this.mockBulkResponse.errors()).thenReturn(false); } @ParameterizedTest(name = "manageDocumentIds={0}") @@ -74,7 +74,7 @@ void setUp() throws IOException { @DisplayName("Should handle document ID management setting correctly") void shouldHandleDocumentIdManagementSetting(boolean manageDocumentIds) throws IOException { // Given - when(mockEmbeddingModel.embed(any(), any(), any())) + when(this.mockEmbeddingModel.embed(any(), any(), any())) .thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, new float[] { 0.4f, 0.5f, 0.6f })); OpenSearchVectorStore vectorStore = createVectorStore(manageDocumentIds); @@ -95,7 +95,7 @@ void shouldHandleDocumentIdManagementSetting(boolean manageDocumentIds) throws I @DisplayName("Should handle single document correctly") void shouldHandleSingleDocumentCorrectly() throws IOException { // Given - when(mockEmbeddingModel.embed(any(), any(), any())).thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f })); + when(this.mockEmbeddingModel.embed(any(), any(), any())).thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f })); OpenSearchVectorStore vectorStore = createVectorStore(true); Document document = new Document("test-id", "test content", Map.of("key", "value")); @@ -116,7 +116,7 @@ void shouldHandleSingleDocumentCorrectly() throws IOException { @DisplayName("Should handle multiple documents with explicit IDs") void shouldHandleMultipleDocumentsWithExplicitIds() throws IOException { // Given - when(mockEmbeddingModel.embed(any(), any(), any())).thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, + when(this.mockEmbeddingModel.embed(any(), any(), any())).thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, new float[] { 0.4f, 0.5f, 0.6f }, new float[] { 0.7f, 0.8f, 0.9f })); OpenSearchVectorStore vectorStore = createVectorStore(true); @@ -141,7 +141,7 @@ void shouldHandleMultipleDocumentsWithExplicitIds() throws IOException { @DisplayName("Should handle multiple documents without explicit IDs") void shouldHandleMultipleDocumentsWithoutExplicitIds() throws IOException { // Given - when(mockEmbeddingModel.embed(any(), any(), any())) + when(this.mockEmbeddingModel.embed(any(), any(), any())) .thenReturn(List.of(new float[] { 0.1f, 0.2f, 0.3f }, new float[] { 0.4f, 0.5f, 0.6f })); OpenSearchVectorStore vectorStore = createVectorStore(false); @@ -165,7 +165,7 @@ void shouldHandleMultipleDocumentsWithoutExplicitIds() throws IOException { @DisplayName("Should handle embedding model error") void shouldHandleEmbeddingModelError() { // Given - when(mockEmbeddingModel.embed(any(), any(), any())).thenThrow(new RuntimeException("Embedding failed")); + when(this.mockEmbeddingModel.embed(any(), any(), any())).thenThrow(new RuntimeException("Embedding failed")); OpenSearchVectorStore vectorStore = createVectorStore(true); List documents = List.of(new Document("doc1", "content", Map.of())); @@ -178,14 +178,14 @@ void shouldHandleEmbeddingModelError() { // Helper methods private OpenSearchVectorStore createVectorStore(boolean manageDocumentIds) { - return OpenSearchVectorStore.builder(mockOpenSearchClient, mockEmbeddingModel) + return OpenSearchVectorStore.builder(this.mockOpenSearchClient, this.mockEmbeddingModel) .manageDocumentIds(manageDocumentIds) .build(); } private BulkRequest captureBulkRequest() throws IOException { ArgumentCaptor captor = ArgumentCaptor.forClass(BulkRequest.class); - verify(mockOpenSearchClient).bulk(captor.capture()); + verify(this.mockOpenSearchClient).bulk(captor.capture()); return captor.getValue(); } @@ -203,4 +203,4 @@ private void verifyDocumentIdHandling(BulkRequest request, boolean shouldHaveExp } } -} \ No newline at end of file +}