diff --git a/driver-sync/src/test/functional/com/mongodb/client/AbstractAtlasSearchIndexManagementProseTest.java b/driver-sync/src/test/functional/com/mongodb/client/AbstractAtlasSearchIndexManagementProseTest.java index 17c007e14ba..1685a79fce2 100644 --- a/driver-sync/src/test/functional/com/mongodb/client/AbstractAtlasSearchIndexManagementProseTest.java +++ b/driver-sync/src/test/functional/com/mongodb/client/AbstractAtlasSearchIndexManagementProseTest.java @@ -31,8 +31,11 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.util.List; import java.util.Map; @@ -79,6 +82,8 @@ public abstract class AbstractAtlasSearchIndexManagementProseTest { "{" + " mappings: { dynamic: true }" + "}"); + private static final String AUTO_EMBED_FIELD_PATH = "plot"; + private static final String AUTO_EMBED_INDEX_NAME = "voyage_4"; private static final Document VECTOR_SEARCH_DEFINITION = Document.parse( "{" + " fields: [" @@ -281,6 +286,218 @@ public void shouldRequireExplicitTypeToCreateVectorSearchIndex() { VECTOR_SEARCH_DEFINITION)); } + @Test + @DisplayName("should fail when invalid model name was used for auto embedding index") + void shouldFailWhenInvalidModelNameWasUsed() { + //TODO-JAVA-6059 remove this assumption when auto embedding is generally available + Assumptions.assumeTrue(false); + + assertThrows( + MongoCommandException.class, + () -> createAutoEmbeddingIndex("test"), + "Valid voyage model name was not used" + ); + } + + @Test + @DisplayName("should fail to create auto embedding index without model") + void shouldFailToCreateAutoEmbeddingIndexWithoutModel() { + //TODO-JAVA-6059 remove this assumption when auto embedding is generally available + Assumptions.assumeTrue(false); + + SearchIndexModel indexModel = new SearchIndexModel( + AUTO_EMBED_INDEX_NAME, + new Document( + "fields", + singletonList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", AUTO_EMBED_FIELD_PATH) + )), + SearchIndexType.vectorSearch() + ); + assertThrows( + MongoCommandException.class, + () -> collection.createSearchIndexes(singletonList(indexModel)), + "Expected index creation to fail because model is not specified" + ); + } + + @ParameterizedTest(name = "should create auto embedding index with {0} quantization") + @ValueSource(strings = {"float", "scalar", "binary", "binaryNoRescore"}) + void shouldCreateAutoEmbeddingIndexWithQuantization(final String quantization) { + //TODO-JAVA-6059 remove this assumption when auto embedding is generally available + Assumptions.assumeTrue(false); + + final String indexName = AUTO_EMBED_INDEX_NAME + "_" + quantization; + SearchIndexModel indexModel = new SearchIndexModel( + indexName, + new Document( + "fields", + singletonList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", AUTO_EMBED_FIELD_PATH) + .append("model", "voyage-4-large") + .append("quantization", quantization) + )), + SearchIndexType.vectorSearch() + ); + List result = collection.createSearchIndexes(singletonList(indexModel)); + Assertions.assertFalse(result.isEmpty()); + } + + @Test + @DisplayName("should create auto embedding index with custom numDimensions") + @Disabled("Currently numDimensions can't be used, it fails with server error:" + + " 'Invalid numDimensions value for autoEmbed field. Expected an integer.'") + void shouldCreateAutoEmbeddingIndexWithCustomNumDimensions() { + //TODO-JAVA-6059 remove this assumption when auto embedding is generally available + Assumptions.assumeTrue(false); + + SearchIndexModel indexModel = new SearchIndexModel( + AUTO_EMBED_INDEX_NAME, + new Document( + "fields", + singletonList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", AUTO_EMBED_FIELD_PATH) + .append("model", "voyage-4-large") + .append("numDimensions", 512) + )), + SearchIndexType.vectorSearch() + ); + List result = collection.createSearchIndexes(singletonList(indexModel)); + Assertions.assertFalse(result.isEmpty()); + } + + @Test + @DisplayName("should create auto embedding index with filter field") + void shouldCreateAutoEmbeddingIndexWithFilterField() { + //TODO-JAVA-6059 remove this assumption when auto embedding is generally available + Assumptions.assumeTrue(false); + + SearchIndexModel indexModel = new SearchIndexModel( + AUTO_EMBED_INDEX_NAME, + new Document( + "fields", + asList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", AUTO_EMBED_FIELD_PATH) + .append("model", "voyage-4-large"), + new Document("type", "filter") + .append("path", "director") + )), + SearchIndexType.vectorSearch() + ); + List result = collection.createSearchIndexes(singletonList(indexModel)); + Assertions.assertFalse(result.isEmpty()); + } + + @Test + @DisplayName("should fail when mixing vector and autoEmbed types in the same index") + void shouldFailWhenMixingVectorAndAutoEmbedTypes() { + //TODO-JAVA-6059 remove this assumption when auto embedding is generally available + Assumptions.assumeTrue(false); + + SearchIndexModel indexModel = new SearchIndexModel( + AUTO_EMBED_INDEX_NAME, + new Document( + "fields", + asList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", AUTO_EMBED_FIELD_PATH) + .append("model", "voyage-4-large"), + new Document("type", "vector") + .append("path", "plot_embedding") + .append("numDimensions", 1024) + .append("similarity", "cosine") + )), + SearchIndexType.vectorSearch() + ); + assertThrows( + MongoCommandException.class, + () -> collection.createSearchIndexes(singletonList(indexModel)), + "Expected index creation to fail because vector and autoEmbed types cannot be mixed" + ); + } + + @Test + @DisplayName("should fail when duplicate paths are used in auto embedding index") + void shouldFailWhenDuplicatePathsAreUsed() { + //TODO-JAVA-6059 remove this assumption when auto embedding is generally available + Assumptions.assumeTrue(false); + + SearchIndexModel indexModel = new SearchIndexModel( + AUTO_EMBED_INDEX_NAME, + new Document( + "fields", + asList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", AUTO_EMBED_FIELD_PATH) + .append("model", "voyage-4-large"), + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", AUTO_EMBED_FIELD_PATH) + .append("model", "voyage-4-large") + )), + SearchIndexType.vectorSearch() + ); + assertThrows( + MongoCommandException.class, + () -> collection.createSearchIndexes(singletonList(indexModel)), + "Expected index creation to fail because of duplicate paths" + ); + } + + @Test + @DisplayName("should fail when autoEmbed field is used as filter field") + void shouldFailWhenAutoEmbedFieldUsedAsFilterField() { + //TODO-JAVA-6059 remove this assumption when auto embedding is generally available + Assumptions.assumeTrue(false); + + SearchIndexModel indexModel = new SearchIndexModel( + AUTO_EMBED_INDEX_NAME, + new Document( + "fields", + asList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("path", AUTO_EMBED_FIELD_PATH) + .append("model", "voyage-4-large"), + new Document("type", "filter") + .append("path", AUTO_EMBED_FIELD_PATH) + )), + SearchIndexType.vectorSearch() + ); + assertThrows( + MongoCommandException.class, + () -> collection.createSearchIndexes(singletonList(indexModel)), + "Expected index creation to fail because autoEmbed field cannot be used as a filter field" + ); + } + + private void createAutoEmbeddingIndex(final String modelName) { + SearchIndexModel indexModel = new SearchIndexModel( + AUTO_EMBED_INDEX_NAME, + new Document( + "fields", + singletonList( + new Document("type", "autoEmbed") + .append("modality", "text") + .append("model", modelName) + .append("path", AUTO_EMBED_FIELD_PATH) + )), + SearchIndexType.vectorSearch() + ); + List result = collection.createSearchIndexes(singletonList(indexModel)); + Assertions.assertFalse(result.isEmpty()); + } + private void assertIndexDeleted() throws InterruptedException { int attempts = MAX_WAIT_ATTEMPTS; while (collection.listSearchIndexes().first() != null && checkAttempt(attempts--)) { diff --git a/driver-sync/src/test/functional/com/mongodb/client/vector/AbstractAutomatedEmbeddingVectorSearchFunctionalTest.java b/driver-sync/src/test/functional/com/mongodb/client/model/search/AbstractAutoEmbeddingVectorSearchFunctionalTest.java similarity index 65% rename from driver-sync/src/test/functional/com/mongodb/client/vector/AbstractAutomatedEmbeddingVectorSearchFunctionalTest.java rename to driver-sync/src/test/functional/com/mongodb/client/model/search/AbstractAutoEmbeddingVectorSearchFunctionalTest.java index 0331ed563c9..2d1e1feec12 100644 --- a/driver-sync/src/test/functional/com/mongodb/client/vector/AbstractAutomatedEmbeddingVectorSearchFunctionalTest.java +++ b/driver-sync/src/test/functional/com/mongodb/client/model/search/AbstractAutoEmbeddingVectorSearchFunctionalTest.java @@ -14,10 +14,9 @@ * limitations under the License. */ -package com.mongodb.client.vector; +package com.mongodb.client.model.search; import com.mongodb.MongoClientSettings; -import com.mongodb.MongoCommandException; import com.mongodb.client.Fixture; import com.mongodb.client.MongoClient; import com.mongodb.client.MongoCollection; @@ -28,12 +27,13 @@ import org.bson.codecs.configuration.CodecRegistry; import org.bson.codecs.pojo.PojoCodecProvider; import org.bson.conversions.Bson; -import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assumptions; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; import java.util.ArrayList; import java.util.Collections; @@ -44,36 +44,33 @@ import static com.mongodb.client.model.Aggregates.vectorSearch; import static com.mongodb.client.model.search.SearchPath.fieldPath; import static com.mongodb.client.model.search.VectorSearchOptions.approximateVectorSearchOptions; +import static com.mongodb.client.model.search.VectorSearchOptions.exactVectorSearchOptions; import static com.mongodb.client.model.search.VectorSearchQuery.textQuery; import static java.util.Arrays.asList; import static org.bson.codecs.configuration.CodecRegistries.fromProviders; import static org.bson.codecs.configuration.CodecRegistries.fromRegistries; /** - * The test cases were borrowed from - * this repository. + * Tests for auto-embedding vector search queries. + * Index creation and validation tests are in {@link com.mongodb.client.AbstractAtlasSearchIndexManagementProseTest}. */ -public abstract class AbstractAutomatedEmbeddingVectorSearchFunctionalTest extends OperationTest { +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public abstract class AbstractAutoEmbeddingVectorSearchFunctionalTest extends OperationTest { private static final String FIELD_SEARCH_PATH = "plot"; - // as of 2025-01-13 only voyage 4 is supported for automated embedding - // it might change in the future so for now we are only testing with voyage-4-large model private static final String INDEX_NAME = "voyage_4"; - private static final String MOVIE_NAME = "Breathe"; private static final CodecRegistry CODEC_REGISTRY = fromRegistries(getDefaultCodecRegistry(), fromProviders(PojoCodecProvider .builder() .automatic(true).build())); - private MongoCollection documentCollection; private MongoClient mongoClient; + private MongoCollection documentCollection; - @BeforeEach - public void setUp() { - //TODO-JAVA-6059 remove this line when Atlas Vector Search with automated embedding is generally available - // right now atlas search with automated embedding is in private preview and - // only available via a custom docker image + @BeforeAll + void setUpOnce() throws InterruptedException { + //TODO-JAVA-6059 remove this assumption when Atlas Vector Search with automated embedding is generally available Assumptions.assumeTrue(false); super.beforeEach(); @@ -83,11 +80,22 @@ public void setUp() { documentCollection = mongoClient .getDatabase(getDatabaseName()) .getCollection(getCollectionName()); + + mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + createAutoEmbeddingIndex("voyage-4-large"); + // TODO-JAVA-6063 + // community search with automated embedding doesn't support queryable field yet + // once supported remove the sleep and uncomment waitForIndex + TimeUnit.SECONDS.sleep(2L); + //waitForIndex(documentCollection, INDEX_NAME); + insertDocumentsForEmbedding(); + // TODO-JAVA-6063 wait for embeddings to be generated + TimeUnit.SECONDS.sleep(2L); } - @AfterEach + @AfterAll @SuppressWarnings("try") - public void afterEach() { + void tearDownOnce() { try (MongoClient ignore = mongoClient) { super.afterEach(); } @@ -99,52 +107,61 @@ private static MongoClientSettings.Builder getMongoClientSettingsBuilder() { protected abstract MongoClient getMongoClient(MongoClientSettings settings); - /** - * Happy path for automated embedding with Voyage-4 model. - * - *

Steps: - *

    - *
  1. Create empty collection
  2. - *
  3. Create auto-embedding search index with voyage-4-large model
  4. - *
  5. Insert movie documents
  6. - *
  7. Run vector search query using query text
  8. - *
- * - *

Expected: Query returns "Breathe" as the top match for "movies about love" - */ @Test - @DisplayName("should create auto embedding index and run vector search query using query text") - void shouldCreateAutoEmbeddingIndexAndRunVectorSearchQuery() throws InterruptedException { - mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); - createAutoEmbeddingIndex("voyage-4-large"); - // TODO-JAVA-6063 - // community search with automated embedding doesn't support queryable field yet - // once supported remove the sleep and uncomment waitForIndex - TimeUnit.SECONDS.sleep(2L); - //waitForIndex(documentCollection, INDEX_NAME); - insertDocumentsForEmbedding(); - // TODO-JAVA-6063 wait for embeddings to be generated - // once there is an official way to check the index status, we should use it instead of sleep - // there is a workaround to pass a feature flag `internalListAllIndexesForTesting` but it's not official yet - TimeUnit.SECONDS.sleep(2L); - runEmbeddingQuery(); + @DisplayName("should execute vector search query using query text") + void shouldExecuteVectorSearchQuery() { + List pipeline = asList( + vectorSearch( + fieldPath(FIELD_SEARCH_PATH), + textQuery("movies about love"), + INDEX_NAME, + 5L, + approximateVectorSearchOptions(5L) + ) + ); + List documents = documentCollection.aggregate(pipeline).into(new ArrayList<>()); + + Assertions.assertFalse(documents.isEmpty(), "Expected to get some results from vector search query"); + Assertions.assertEquals(MOVIE_NAME, documents.get(0).getString("title")); } @Test - @DisplayName("should fail when invalid model name was used") - void shouldFailWhenInvalidModelNameWasUsed() { - mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); - Assertions.assertThrows( - MongoCommandException.class, - () -> createAutoEmbeddingIndex("test"), - "Valid voyage model name was not used" + @DisplayName("should execute vector search query with model override") + void shouldExecuteVectorSearchWithModelOverride() { + List pipeline = asList( + vectorSearch( + fieldPath(FIELD_SEARCH_PATH), + textQuery("movies about love").model("voyage-4"), + INDEX_NAME, + 5L, + approximateVectorSearchOptions(5L) + ) ); + List documents = documentCollection.aggregate(pipeline).into(new ArrayList<>()); + + Assertions.assertFalse(documents.isEmpty(), "Expected to get some results from vector search query"); + Assertions.assertEquals(MOVIE_NAME, documents.get(0).getString("title")); } @Test - @DisplayName("should fail to create auto embedding index without model") - void shouldFailToCreateAutoEmbeddingIndexWithoutModel() { - mongoClient.getDatabase(getDatabaseName()).createCollection(getCollectionName()); + @DisplayName("should execute exact vector search query") + void shouldExecuteExactVectorSearchQuery() { + List pipeline = asList( + vectorSearch( + fieldPath(FIELD_SEARCH_PATH), + textQuery("movies about love"), + INDEX_NAME, + 5L, + exactVectorSearchOptions() + ) + ); + List documents = documentCollection.aggregate(pipeline).into(new ArrayList<>()); + + Assertions.assertFalse(documents.isEmpty(), "Expected to get some results from exact vector search query"); + Assertions.assertEquals(MOVIE_NAME, documents.get(0).getString("title")); + } + + private void createAutoEmbeddingIndex(final String modelName) { SearchIndexModel indexModel = new SearchIndexModel( INDEX_NAME, new Document( @@ -152,35 +169,17 @@ void shouldFailToCreateAutoEmbeddingIndexWithoutModel() { Collections.singletonList( new Document("type", "autoEmbed") .append("modality", "text") + .append("model", modelName) .append("path", FIELD_SEARCH_PATH) )), SearchIndexType.vectorSearch() ); - Assertions.assertThrows( - MongoCommandException.class, - () -> documentCollection.createSearchIndexes(Collections.singletonList(indexModel)), - "Expected index creation to fail because model is not specified" - ); - } - - private void runEmbeddingQuery() { - List pipeline = asList( - vectorSearch( - fieldPath(FIELD_SEARCH_PATH), - textQuery("movies about love"), - INDEX_NAME, - 5L, // limit - approximateVectorSearchOptions(5L) // numCandidates - ) - ); - final List documents = documentCollection.aggregate(pipeline).into(new ArrayList<>()); - - Assertions.assertFalse(documents.isEmpty(), "Expected to get some results from vector search query"); - Assertions.assertEquals(MOVIE_NAME, documents.get(0).getString("title")); + List result = documentCollection.createSearchIndexes(Collections.singletonList(indexModel)); + Assertions.assertFalse(result.isEmpty()); } /** - * All the documents were borrowed from + * Documents borrowed from * here */ private void insertDocumentsForEmbedding() { @@ -209,22 +208,4 @@ private void insertDocumentsForEmbedding() { .append("year", 2017) )); } - - private void createAutoEmbeddingIndex(final String modelName) { - SearchIndexModel indexModel = new SearchIndexModel( - INDEX_NAME, - new Document( - "fields", - Collections.singletonList( - new Document("type", "autoEmbed") // type autoEmbed accepts a text - .append("modality", "text") - .append("model", modelName) - .append("path", FIELD_SEARCH_PATH) - )), - SearchIndexType.vectorSearch() - ); - List result = documentCollection.createSearchIndexes(Collections.singletonList(indexModel)); - - Assertions.assertFalse(result.isEmpty()); - } } diff --git a/driver-sync/src/test/functional/com/mongodb/client/vector/AutomatedEmbeddingVectorFunctionalTest.java b/driver-sync/src/test/functional/com/mongodb/client/model/search/AutoEmbeddingVectorSearchFunctionalTest.java similarity index 85% rename from driver-sync/src/test/functional/com/mongodb/client/vector/AutomatedEmbeddingVectorFunctionalTest.java rename to driver-sync/src/test/functional/com/mongodb/client/model/search/AutoEmbeddingVectorSearchFunctionalTest.java index 8f7db557440..1798162f17d 100644 --- a/driver-sync/src/test/functional/com/mongodb/client/vector/AutomatedEmbeddingVectorFunctionalTest.java +++ b/driver-sync/src/test/functional/com/mongodb/client/model/search/AutoEmbeddingVectorSearchFunctionalTest.java @@ -14,13 +14,13 @@ * limitations under the License. */ -package com.mongodb.client.vector; +package com.mongodb.client.model.search; import com.mongodb.MongoClientSettings; import com.mongodb.client.MongoClient; import com.mongodb.client.MongoClients; -public class AutomatedEmbeddingVectorFunctionalTest extends AbstractAutomatedEmbeddingVectorSearchFunctionalTest { +public class AutoEmbeddingVectorSearchFunctionalTest extends AbstractAutoEmbeddingVectorSearchFunctionalTest { @Override protected MongoClient getMongoClient(final MongoClientSettings settings) { return MongoClients.create(settings);