From ff19012b8d1df7508f4d8552ef087721bb0a2413 Mon Sep 17 00:00:00 2001 From: "manthapavankumar11@gmail.com" Date: Sun, 24 May 2026 10:49:17 +0530 Subject: [PATCH 1/2] fix for the issue#42 --- src/qql/executor.py | 58 +++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/src/qql/executor.py b/src/qql/executor.py index d0a43cf..b8cbbb3 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -131,6 +131,18 @@ class CollectionTopology: has_unnamed_dense: bool = False dense_names: tuple[str, ...] = () sparse_names: tuple[str, ...] = () + # Sizes fetched once in _resolve_topology() so _ensure_collection() never + # needs a second get_collection() call. + dense_sizes: tuple[tuple[str, int], ...] = () + + def dense_size_map(self) -> dict[str, int]: + """Return {vector_name: size} for every dense vector whose size was fetched. + + Unnamed single-vector collections appear under the ``""`` key, matching + ``dense_config_key()``. Returns an empty dict when ``exists`` is False or + sizes were not available (e.g. when a mock omits the size attribute). + """ + return dict(self.dense_sizes) @property def has_dense(self) -> bool: @@ -239,14 +251,23 @@ def _resolve_topology(self, name: str) -> CollectionTopology: if isinstance(vectors, dict): dense_names = tuple(vectors.keys()) + dense_sizes: tuple[tuple[str, int], ...] = tuple( + (k, v.size) + for k, v in vectors.items() + if getattr(v, "size", None) is not None + ) has_unnamed_dense = False is_named_dense = True elif vectors is None: dense_names = () + dense_sizes = () has_unnamed_dense = False is_named_dense = False else: + # Single unnamed dense vector dense_names = () + unnamed_size = getattr(vectors, "size", None) + dense_sizes = (("", unnamed_size),) if unnamed_size is not None else () has_unnamed_dense = True is_named_dense = False @@ -259,6 +280,7 @@ def _resolve_topology(self, name: str) -> CollectionTopology: has_unnamed_dense=has_unnamed_dense, dense_names=dense_names, sparse_names=sparse_names, + dense_sizes=dense_sizes, ) def _default_dense_vector_name(self) -> str: @@ -565,9 +587,9 @@ def _execute_create(self, node: CreateCollectionStmt) -> ExecutionResult: ) def _execute_alter_collection(self, node: AlterCollectionStmt) -> ExecutionResult: - if not self._client.collection_exists(node.collection): - raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") topology = self._resolve_topology(node.collection) + if not topology.exists: + raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") update_kwargs: dict[str, Any] = {"collection_name": node.collection} vectors_config = self._build_vectors_config_diff(topology, node.config) @@ -828,9 +850,9 @@ def _execute_select(self, node: SelectStmt) -> ExecutionResult: ) def _execute_search(self, node: SearchStmt) -> ExecutionResult: - if not self._client.collection_exists(node.collection): - raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") topology = self._resolve_topology(node.collection) + if not topology.exists: + raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") # Build WHERE filter (shared by both hybrid and dense-only paths) qdrant_filter: Filter | None = None @@ -1711,9 +1733,9 @@ def _execute_search_groups( def _execute_update_vector(self, node: UpdateVectorStmt) -> ExecutionResult: """Execute UPDATE ... SET VECTOR using update_vectors().""" - if not self._client.collection_exists(node.collection): - raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") topology = self._resolve_topology(node.collection) + if not topology.exists: + raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") vector_name = topology.dense_payload_name(node.vector_name) vector_struct: Any = ( {vector_name: list(node.vector)} if vector_name else list(node.vector) @@ -1915,20 +1937,20 @@ def _ensure_collection( topology: CollectionTopology, explicit_vector: str | None, ) -> None: - """Create the collection if it doesn't exist. Raises on dimension mismatch. + """Create the collection if needed, or validate dimension compatibility. QQL-created dense collections use the configured dense vector name. Externally created unnamed collections still accept plain dense vectors. + All validation is done against pre-fetched ``topology`` data; no extra + Qdrant API calls are made. """ if topology.exists: - info = self._client.get_collection(name) - vectors = info.config.params.vectors # type: ignore[union-attr] - if isinstance(vectors, dict): + sizes = topology.dense_size_map() + if topology.is_named_dense: + # dense_using() raises QQLRuntimeError on bad/ambiguous names, + # and always returns a non-None string in the named-dense branch. vector_name = topology.dense_using(explicit_vector) - if vector_name is None: - raise QQLRuntimeError("Collection has no dense vector") - vector_config = vectors[vector_name] - expected_size = getattr(vector_config, "size", None) + expected_size = sizes.get(vector_name) # type: ignore[arg-type] if expected_size is not None and expected_size != vector_size: raise QQLRuntimeError( f"Vector dimension mismatch: collection '{name}' vector " @@ -1936,12 +1958,12 @@ def _ensure_collection( f"model produces {vector_size} dims. Specify a compatible " "model with USING MODEL ''." ) - elif vectors is not None: - # Unnamed single-vector collection: validate dimensions - if vectors.size != vector_size: + elif topology.has_unnamed_dense: + expected_size = sizes.get("") + if expected_size is not None and expected_size != vector_size: raise QQLRuntimeError( f"Vector dimension mismatch: collection '{name}' expects " - f"{vectors.size} dims, but model produces {vector_size} dims. " + f"{expected_size} dims, but model produces {vector_size} dims. " f"Specify a compatible model with USING MODEL ''." ) else: From 0275e851a07f1fb08e75c9dc28a509ee626524e1 Mon Sep 17 00:00:00 2001 From: "manthapavankumar11@gmail.com" Date: Sun, 24 May 2026 17:48:47 +0530 Subject: [PATCH 2/2] fix for the issue: https://github.com/pavanjava/qql/pull/44 --- src/qql/executor.py | 41 +++++++++++--- tests/test_executor.py | 119 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+), 7 deletions(-) diff --git a/src/qql/executor.py b/src/qql/executor.py index b8cbbb3..975947d 100644 --- a/src/qql/executor.py +++ b/src/qql/executor.py @@ -240,11 +240,28 @@ def execute(self, node: ASTNode) -> ExecutionResult: # ── Statement executors ─────────────────────────────────────────────── - def _resolve_topology(self, name: str) -> CollectionTopology: - if not self._client.collection_exists(name): - return CollectionTopology(exists=False, is_named_dense=False) + def _fetch_collection_info(self, name: str): + """Fetch full CollectionInfo for *name* in a single API call. + + Returns the CollectionInfo object when the collection exists, or + ``None`` when the collection is not found (HTTP 404). Any other + Qdrant error is re-raised as :class:`QQLRuntimeError`. + """ + try: + return self._client.get_collection(name) + except UnexpectedResponse as e: + if e.status_code == 404: + return None + raise QQLRuntimeError( + f"Qdrant error fetching collection '{name}': {e}" + ) from e + + def _topology_from_collection_info(self, info: Any) -> CollectionTopology: + """Parse a CollectionInfo object into a :class:`CollectionTopology`. - info = self._client.get_collection(name) + Separates API access (handled by :meth:`_fetch_collection_info`) from + topology parsing so each concern can be tested independently. + """ params = info.config.params vectors = params.vectors # type: ignore[union-attr] sparse_vectors = params.sparse_vectors or {} @@ -283,6 +300,17 @@ def _resolve_topology(self, name: str) -> CollectionTopology: dense_sizes=dense_sizes, ) + def _resolve_topology(self, name: str) -> CollectionTopology: + """Return the topology for *name* using exactly one Qdrant API call. + + Calls :meth:`_fetch_collection_info` once. A 404 response is treated + as ``exists=False``; any other error is propagated. + """ + info = self._fetch_collection_info(name) + if info is None: + return CollectionTopology(exists=False, is_named_dense=False) + return self._topology_from_collection_info(info) + def _default_dense_vector_name(self) -> str: return self._config.default_dense_vector_name @@ -682,10 +710,9 @@ def _execute_show(self, node: ShowCollectionsStmt) -> ExecutionResult: ) def _execute_show_collection(self, node: ShowCollectionStmt) -> ExecutionResult: - if not self._client.collection_exists(node.collection): + info = self._fetch_collection_info(node.collection) + if info is None: raise QQLRuntimeError(f"Collection '{node.collection}' does not exist") - - info = self._client.get_collection(node.collection) config = info.config params = config.params diff --git a/tests/test_executor.py b/tests/test_executor.py index a2f4c3f..0a2a74f 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -1,4 +1,7 @@ import pytest +from unittest.mock import DEFAULT + +from qdrant_client.http.exceptions import UnexpectedResponse from qql.ast_nodes import ( AlterCollectionStmt, @@ -52,8 +55,20 @@ def collection_exists(_name): def create_collection(**_kwargs): state["exists"] = True + def get_collection(_name): + if not (state["exists"] or bool(client.collection_exists.return_value)): + raise UnexpectedResponse( + status_code=404, + reason_phrase="Not Found", + content=b"Not Found", + headers={}, + ) + # Collection exists — fall through to the configured return_value + return DEFAULT + client.collection_exists.side_effect = collection_exists client.create_collection.side_effect = create_collection + client.get_collection.side_effect = get_collection return client @@ -3492,3 +3507,107 @@ def test_grouped_hybrid_search_uses_build_hybrid_vectors(self, executor, mock_cl ) executor.execute(node) helper.assert_called_once() + + +# ── Regression: single metadata fetch path ──────────────────────────────────── + + +class TestCollectionMetadataFetchCount: + """Verify that every high-level operation issues exactly ONE get_collection() + call (Issue #42 — eliminate duplicate Qdrant API calls). + + _resolve_topology() must delegate to _fetch_collection_info() which calls + get_collection() once and handles 404 as exists=False without a prior + collection_exists() round-trip. + """ + + def test_insert_existing_dense_collection_fetches_metadata_once( + self, executor, mock_client + ): + """Dense INSERT to an existing collection must call get_collection() exactly once.""" + mock_client.collection_exists.return_value = True + mock_client.get_collection.return_value.config.params.vectors.size = 384 + # Use a plain unnamed-dense mock (non-dict vectors) + mock_client.get_collection.return_value.config.params.sparse_vectors = None + + node = InsertStmt(collection="docs", values={"text": "hello"}, model=None) + executor.execute(node) + + mock_client.get_collection.assert_called_once() + + def test_insert_existing_hybrid_collection_fetches_metadata_once( + self, executor, mock_client + ): + """Hybrid auto-detect INSERT must call get_collection() exactly once.""" + from qdrant_client.models import Distance, SparseVectorParams, VectorParams + + mock_client.collection_exists.return_value = True + mock_client.get_collection.return_value.config.params.vectors = { + "dense": VectorParams(size=384, distance=Distance.COSINE) + } + mock_client.get_collection.return_value.config.params.sparse_vectors = { + "sparse": SparseVectorParams() + } + + node = InsertStmt(collection="docs", values={"text": "hello"}, model=None) + executor.execute(node) + + mock_client.get_collection.assert_called_once() + + def test_dimension_mismatch_fetches_metadata_once(self, executor, mock_client): + """A dimension-mismatch error must be raised without a second metadata fetch.""" + from qdrant_client.models import Distance, VectorParams + + mock_client.collection_exists.return_value = True + mock_client.get_collection.return_value.config.params.vectors = { + "dense": VectorParams(size=768, distance=Distance.COSINE) + } + mock_client.get_collection.return_value.config.params.sparse_vectors = None + + node = InsertStmt( + collection="docs", + values={"text": "hello"}, + model=None, + dense_vector="dense", + ) + from qql.exceptions import QQLRuntimeError + + with pytest.raises(QQLRuntimeError, match="dimension mismatch"): + executor.execute(node) + + # Only one metadata call despite the error + mock_client.get_collection.assert_called_once() + + def test_insert_nonexistent_collection_issues_one_get_collection_call( + self, executor, mock_client + ): + """Inserting into a brand-new collection calls get_collection() once + (which returns 404 and is handled as exists=False) then creates the collection.""" + mock_client.collection_exists.return_value = False + + node = InsertStmt(collection="new_col", values={"text": "hello"}, model=None) + executor.execute(node) + + # _fetch_collection_info() must have been called once (got 404 → new collection) + mock_client.get_collection.assert_called_once() + mock_client.create_collection.assert_called_once() + + def test_search_existing_collection_fetches_metadata_once( + self, executor, mock_client + ): + """SEARCH on an existing dense collection must call get_collection() exactly once.""" + from qdrant_client.models import Distance, VectorParams + + mock_client.collection_exists.return_value = True + mock_client.get_collection.return_value.config.params.vectors = { + "dense": VectorParams(size=384, distance=Distance.COSINE) + } + mock_client.get_collection.return_value.config.params.sparse_vectors = None + mock_client.query_points.return_value.points = [] + + node = SearchStmt( + collection="docs", query_text="hello", limit=5, model=None + ) + executor.execute(node) + + mock_client.get_collection.assert_called_once()