From 934649108d22039584ec7187cf498d86a0bf3f3f Mon Sep 17 00:00:00 2001 From: Peng Ren Date: Thu, 9 Apr 2026 17:55:37 -0400 Subject: [PATCH 1/5] Fix sql parser bugs --- pymongosql/__init__.py | 2 +- pymongosql/sql/handler.py | 34 +++- tests/test_sql_parser_comprehensive.py | 217 +++++++++++++++++++++++++ 3 files changed, 247 insertions(+), 6 deletions(-) create mode 100644 tests/test_sql_parser_comprehensive.py diff --git a/pymongosql/__init__.py b/pymongosql/__init__.py index c7b0e18..af3c27c 100644 --- a/pymongosql/__init__.py +++ b/pymongosql/__init__.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: from .connection import Connection -__version__: str = "0.4.5" +__version__: str = "0.4.6" # Globals https://www.python.org/dev/peps/pep-0249/#globals apilevel: str = "2.0" diff --git a/pymongosql/sql/handler.py b/pymongosql/sql/handler.py index 19461fe..d42f480 100644 --- a/pymongosql/sql/handler.py +++ b/pymongosql/sql/handler.py @@ -26,6 +26,13 @@ "NOT IN": "$nin", } +# Pattern to count all comparison operations in ANTLR getText() output (no spaces). +# Includes standard operators and SQL keywords (IN, LIKE, BETWEEN, IS [NOT] NULL). +_COMPARISON_COUNT_PATTERN = re.compile( + r">=|<=|!=|<>|=|<|>|IN\(|LIKE['\"]|BETWEEN|ISNOTNULL|ISNULL", + re.IGNORECASE, +) + class ContextUtilsMixin: """Mixin providing common context utility methods""" @@ -194,8 +201,10 @@ def can_handle(self, ctx: Any) -> bool: text = self.get_context_text(ctx) text_upper = text.upper() - # Count comparison operators - comparison_count = sum(1 for op in COMPARISON_OPERATORS if op in text) + # Count comparison operator *occurrences* (not distinct types) so that + # "B=trueANDA=1" is correctly seen as having 2 comparisons even though + # both use the same "=" operator. Also counts IN(, LIKE, BETWEEN, etc. + comparison_count = len(_COMPARISON_COUNT_PATTERN.findall(text)) # If there are multiple comparisons and logical operators, it's a logical expression has_logical_ops = any(op in text_upper for op in LOGICAL_OPERATORS) @@ -550,8 +559,23 @@ def _find_operator_positions(self, text: str, operator: str) -> List[int]: and i + len(operator) < len(text) and text[i + len(operator)].isalpha() ): - i += len(operator) - continue + # ANTLR getText() concatenates tokens without spaces, so + # "B=true AND A=1" becomes "B=trueANDA=1". We must still + # recognise AND/OR here when the preceding text ends with a + # SQL literal keyword (true/false/null) that is itself + # preceded by a comparison operator (=, <, >, !, etc.). + # This avoids false positives like "category" containing "OR". + before_upper = text[:i].upper() + is_value_boundary = False + for keyword in ("TRUE", "FALSE", "NULL"): + if before_upper.endswith(keyword): + prefix = text[: i - len(keyword)] + if prefix and prefix[-1] in ("=", "<", ">", "!"): + is_value_boundary = True + break + if not is_value_boundary: + i += len(operator) + continue # Check parentheses and quote depth if self._is_at_valid_split_position(text, i): @@ -583,7 +607,7 @@ def _has_logical_operators(self, ctx: Any) -> bool: # Count comparison operator occurrences, not just distinct operator types # so that "a = 1 OR b = 2" counts as 2 comparisons and is treated # as a logical expression instead of a single comparison. - comparison_count = len(re.findall(r"(>=|<=|!=|<>|=|<|>)", text)) + comparison_count = len(_COMPARISON_COUNT_PATTERN.findall(text)) has_logical_ops = any(op in text for op in ["AND", "OR"]) return has_logical_ops and comparison_count >= 2 except Exception: diff --git a/tests/test_sql_parser_comprehensive.py b/tests/test_sql_parser_comprehensive.py new file mode 100644 index 0000000..48ef87c --- /dev/null +++ b/tests/test_sql_parser_comprehensive.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +import datetime + +from pymongosql.sql.parser import SQLParser + + +class TestWhereClauseFieldOrdering: + """Test that WHERE clause conditions produce correct filters regardless of field order. + + Regression tests for a bug where boolean values (true/false/null) adjacent to + AND/OR operators in ANTLR getText() output caused the logical operator to be + missed, e.g. "B=trueANDA=1" was not split on AND because the word boundary + check treated "trueAND" as a single word. + """ + + # --- Boolean + Numeric (various orderings) --- + + def test_bool_first_3_conditions(self): + sql = "SELECT * FROM col WHERE active=true AND age=30 AND score=100" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"active": True}, {"age": 30}, {"score": 100}]} + + def test_bool_middle_3_conditions(self): + sql = "SELECT * FROM col WHERE age=30 AND active=true AND score=100" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"age": 30}, {"active": True}, {"score": 100}]} + + def test_bool_last_3_conditions(self): + sql = "SELECT * FROM col WHERE age=30 AND score=100 AND active=true" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"age": 30}, {"score": 100}, {"active": True}]} + + # --- Boolean + String --- + + def test_bool_and_string_bool_first(self): + sql = "SELECT * FROM col WHERE active=true AND name='John'" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"active": True}, {"name": "John"}]} + + def test_bool_and_string_string_first(self): + sql = "SELECT * FROM col WHERE name='John' AND active=true" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"name": "John"}, {"active": True}]} + + def test_false_and_string(self): + sql = "SELECT * FROM col WHERE deleted=false AND status='pending'" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"deleted": False}, {"status": "pending"}]} + + # --- Boolean + String + Numeric (4 conditions) --- + + def test_4_mixed_types(self): + sql = "SELECT * FROM col WHERE active=true AND name='Alice' AND age=25 AND score>90" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"active": True}, {"name": "Alice"}, {"age": 25}, {"score": {"$gt": 90}}]} + + def test_4_mixed_types_bool_last(self): + sql = "SELECT * FROM col WHERE name='Alice' AND age=25 AND score>90 AND active=true" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"name": "Alice"}, {"age": 25}, {"score": {"$gt": 90}}, {"active": True}]} + + # --- Datetime with value function --- + + def test_bool_and_datetime_func(self): + sql = "SELECT * FROM col WHERE active=true AND created_at>str_to_datetime('2024-01-01','%Y-%m-%d')" + plan = SQLParser(sql).get_execution_plan() + expected_dt = datetime.datetime(2024, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + assert plan.filter_stage == {"$and": [{"active": True}, {"created_at": {"$gt": expected_dt}}]} + + def test_datetime_func_and_bool(self): + sql = "SELECT * FROM col WHERE created_at>str_to_datetime('2024-01-01','%Y-%m-%d') AND active=true" + plan = SQLParser(sql).get_execution_plan() + expected_dt = datetime.datetime(2024, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + assert plan.filter_stage == {"$and": [{"created_at": {"$gt": expected_dt}}, {"active": True}]} + + # --- Bracketed / parenthesized groups --- + + def test_brackets_bool_and_num_or_string(self): + sql = "SELECT * FROM col WHERE (active=true AND age>25) OR status='admin'" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$or": [{"$and": [{"active": True}, {"age": {"$gt": 25}}]}, {"status": "admin"}]} + + def test_brackets_string_or_bool_and_num(self): + sql = "SELECT * FROM col WHERE status='admin' OR (active=true AND age>25)" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$or": [{"status": "admin"}, {"$and": [{"active": True}, {"age": {"$gt": 25}}]}]} + + def test_brackets_bool_and_string_and_num(self): + sql = "SELECT * FROM col WHERE (active=true AND name='John') AND age>30" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"$and": [{"active": True}, {"name": "John"}]}, {"age": {"$gt": 30}}]} + + def test_nested_brackets(self): + sql = "SELECT * FROM col WHERE ((active=true AND age>25) OR status='admin') AND score>50" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == { + "$and": [ + {"$or": [{"$and": [{"active": True}, {"age": {"$gt": 25}}]}, {"status": "admin"}]}, + {"score": {"$gt": 50}}, + ] + } + + # --- 5+ conditions --- + + def test_5_conditions_all_and(self): + sql = "SELECT * FROM col WHERE active=true AND name='Bob' AND age>20 AND score<100 AND tier=3" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == { + "$and": [ + {"active": True}, + {"name": "Bob"}, + {"age": {"$gt": 20}}, + {"score": {"$lt": 100}}, + {"tier": 3}, + ] + } + + def test_5_conditions_bool_scattered(self): + sql = "SELECT * FROM col WHERE name='Eve' AND active=true AND age=28 AND deleted=false AND score>=50" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == { + "$and": [ + {"name": "Eve"}, + {"active": True}, + {"age": 28}, + {"deleted": False}, + {"score": {"$gte": 50}}, + ] + } + + # --- OR with multiple booleans --- + + def test_or_with_3_bool_conditions(self): + sql = "SELECT * FROM col WHERE active=true OR deleted=false OR verified=true" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$or": [{"active": True}, {"deleted": False}, {"verified": True}]} + + # --- Complex mixed AND/OR with brackets --- + + def test_brackets_two_and_groups_with_or(self): + sql = "SELECT * FROM col WHERE (active=true AND age>25) OR (deleted=false AND status='archived')" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == { + "$or": [ + {"$and": [{"active": True}, {"age": {"$gt": 25}}]}, + {"$and": [{"deleted": False}, {"status": "archived"}]}, + ] + } + + def test_bool_and_bracketed_or(self): + sql = "SELECT * FROM col WHERE active=true AND (name='John' OR age>30)" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"active": True}, {"$or": [{"name": "John"}, {"age": {"$gt": 30}}]}]} + + # --- Comparison operators with booleans --- + + def test_bool_not_equal_and_comparison(self): + sql = "SELECT * FROM col WHERE active!=false AND age>25" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"active": {"$ne": False}}, {"age": {"$gt": 25}}]} + + # --- null mixed with bool --- + + def test_null_bool_and_string(self): + sql = "SELECT * FROM col WHERE deleted_at=null AND active=true AND name='test'" + plan = SQLParser(sql).get_execution_plan() + assert plan.filter_stage == {"$and": [{"deleted_at": None}, {"active": True}, {"name": "test"}]} + + # --- LIKE with bool --- + + def test_like_and_bool(self): + sql = "SELECT * FROM col WHERE name LIKE '%john%' AND active=true" + plan = SQLParser(sql).get_execution_plan() + f = plan.filter_stage + assert "$and" in f + assert {"active": True} in f["$and"] + assert any("name" in cond and "$regex" in cond.get("name", {}) for cond in f["$and"]) + + def test_bool_and_like(self): + sql = "SELECT * FROM col WHERE active=true AND name LIKE '%john%'" + plan = SQLParser(sql).get_execution_plan() + f = plan.filter_stage + assert "$and" in f + assert {"active": True} in f["$and"] + assert any("name" in cond and "$regex" in cond.get("name", {}) for cond in f["$and"]) + + # --- IN with bool --- + + def test_in_and_bool(self): + sql = "SELECT * FROM col WHERE status IN ('a','b','c') AND active=true" + plan = SQLParser(sql).get_execution_plan() + f = plan.filter_stage + assert "$and" in f + assert {"active": True} in f["$and"] + assert any("status" in cond and "$in" in cond.get("status", {}) for cond in f["$and"]) + + def test_bool_and_in(self): + sql = "SELECT * FROM col WHERE active=true AND status IN ('a','b','c')" + plan = SQLParser(sql).get_execution_plan() + f = plan.filter_stage + assert "$and" in f + assert {"active": True} in f["$and"] + assert any("status" in cond and "$in" in cond.get("status", {}) for cond in f["$and"]) + + # --- 6 conditions with brackets --- + + def test_6_conditions_with_brackets(self): + sql = ( + "SELECT * FROM col WHERE (active=true AND deleted=false) " + "AND (age>20 AND age<60) AND (name='X' OR name='Y')" + ) + plan = SQLParser(sql).get_execution_plan() + f = plan.filter_stage + flat = str(f) + for key in ["active", "deleted", "age", "name"]: + assert key in flat, f"Missing expected key '{key}' in filter: {f}" + assert "$or" in flat From 8650b288086133d232d7ef7a9df0fe3cc2d7c66e Mon Sep 17 00:00:00 2001 From: Peng Ren Date: Thu, 9 Apr 2026 17:59:55 -0400 Subject: [PATCH 2/5] Update CI dependencies version --- .github/workflows/ci.yml | 8 ++++---- .github/workflows/release.yml | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0fc12ed..a2158e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,15 +32,15 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Cache pip dependencies - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-py${{ matrix.python-version }}-mongo${{ matrix.mongodb-version }}-sqlalchemy-${{ matrix.sqlalchemy-version }}-pip-${{ hashFiles('**/requirements-test.txt', 'pyproject.toml') }} @@ -101,7 +101,7 @@ jobs: python -m pytest tests/ --cov=pymongosql --cov-report=term-missing --cov-report=xml - name: Upload coverage reports - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: env_vars: OS,PYTHON token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 785075b..bbc14a9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -14,15 +14,15 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} - name: Cache pip dependencies - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-lint-${{ hashFiles('**/requirements-test.txt', 'pyproject.toml') }} @@ -57,13 +57,13 @@ jobs: needs: [lint-and-format, test] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: # Fetch full history for setuptools_scm fetch-depth: 0 - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ env.PYTHON_VERSION }} From 1d6800e045f1a085ff7e78980e5cbd4e406399ed Mon Sep 17 00:00:00 2001 From: Peng Ren Date: Thu, 9 Apr 2026 18:01:19 -0400 Subject: [PATCH 3/5] Update requirements --- requirements-optional.txt | 2 +- requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-optional.txt b/requirements-optional.txt index 7729cbc..c57c163 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -1,2 +1,2 @@ # SQLAlchemy support (optional) - supports 1.4+ and 2.x -sqlalchemy>=1.4.0,<3.0.0 \ No newline at end of file +sqlalchemy>=1.4.0,<3.0.0 diff --git a/requirements-test.txt b/requirements-test.txt index fc68ae1..7b57803 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -5,4 +5,4 @@ pytest>=7.0.0 pytest-cov>=4.0.0 flake8>=6.0.0 -flake8-pyproject>=1.2.0 \ No newline at end of file +flake8-pyproject>=1.2.0 From 21dc1c8f8c360461471e83f1cae6d6ca74292383 Mon Sep 17 00:00:00 2001 From: Peng Ren Date: Thu, 9 Apr 2026 18:13:39 -0400 Subject: [PATCH 4/5] Upgrade CI dependencies --- .github/workflows/ci.yml | 4 ++-- .github/workflows/release.yml | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a2158e5..9688fff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,12 +35,12 @@ jobs: - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Cache pip dependencies - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-py${{ matrix.python-version }}-mongo${{ matrix.mongodb-version }}-sqlalchemy-${{ matrix.sqlalchemy-version }}-pip-${{ hashFiles('**/requirements-test.txt', 'pyproject.toml') }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bbc14a9..11a3f7c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,12 +17,12 @@ jobs: - uses: actions/checkout@v5 - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ env.PYTHON_VERSION }} - name: Cache pip dependencies - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-lint-${{ hashFiles('**/requirements-test.txt', 'pyproject.toml') }} @@ -63,7 +63,7 @@ jobs: fetch-depth: 0 - name: Set up Python ${{ env.PYTHON_VERSION }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ env.PYTHON_VERSION }} @@ -85,7 +85,7 @@ jobs: ls -la dist/ - name: Upload build artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: dist path: dist/ @@ -100,7 +100,7 @@ jobs: steps: - name: Download build artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: dist path: dist/ @@ -118,12 +118,12 @@ jobs: contents: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 0 - name: Download build artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: dist path: dist/ From 7442979b1da8dc9133ebc6a5c75ee5fea3a56d65 Mon Sep 17 00:00:00 2001 From: Peng Ren Date: Thu, 9 Apr 2026 18:18:29 -0400 Subject: [PATCH 5/5] Force nodejs 24 --- .github/workflows/ci.yml | 3 +++ .github/workflows/release.yml | 1 + 2 files changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9688fff..a38f3fb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,9 @@ on: branches: [ main ] workflow_call: +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + jobs: test: runs-on: ubuntu-latest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 11a3f7c..be5395e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,6 +7,7 @@ on: env: PYTHON_VERSION: "3.11" MONGODB_VERSION: "8.0" + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true jobs: lint-and-format: