From f786b92f5b32a48f2861905298a14657a9a93628 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Thu, 26 Mar 2026 11:16:11 -0400 Subject: [PATCH 1/6] Swap out pylint and flake8 with ruff --- .../apache_beam/typehints/typehints_test.py | 3 +- sdks/python/ruff.toml | 96 +++++++++++++++++++ sdks/python/scripts/run_pylint.sh | 7 +- sdks/python/tox.ini | 7 +- 4 files changed, 102 insertions(+), 11 deletions(-) create mode 100644 sdks/python/ruff.toml diff --git a/sdks/python/apache_beam/typehints/typehints_test.py b/sdks/python/apache_beam/typehints/typehints_test.py index 1377bea6d56d..992c129fd8a5 100644 --- a/sdks/python/apache_beam/typehints/typehints_test.py +++ b/sdks/python/apache_beam/typehints/typehints_test.py @@ -712,7 +712,8 @@ def test_type_checks_not_dict(self): def test_type_check_collection(self): hint = typehints.Dict[str, int] - l = collections.defaultdict(list[("blue", 2)]) + element = ("blue", 2) + l = collections.defaultdict(list[element]) self.assertIsNone(hint.type_check(l)) def test_type_check_invalid_key_type(self): diff --git a/sdks/python/ruff.toml b/sdks/python/ruff.toml new file mode 100644 index 000000000000..5a862e013f12 --- /dev/null +++ b/sdks/python/ruff.toml @@ -0,0 +1,96 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", + "*.pxd", + "*.pyx", + "*pb2*.py", + "**/examples/**/*.py", + "**/examples/**/*.ipynb", + "**/portability/api/**/*.py", + "**/portability/api/__init__.py", +] + +target-version = "py310" + +src = ["apache_beam"] + +[lint] +select = ["E9", "PL", "F821", "F822", "F823"] +ignore = [ + # Ignored Pylint Checks + "PLC0415", # import-outside-toplevel + "PLR2004", # magic-value-comparison + "PLR0913", # too-many-arguments + "PLR0912", # too-many-branches + "PLW0108", # unnecessary-lambda + "PLW2901", # redefined-loop-name + "PLR0915", # too-many-statements + "PLR1714", # repeated-equality-comparison + "PLR0911", # too-many-return-statements + "PLR5501", # collapsible-else-if + "PLW0603", # global-statement + "PLR1730", # if-stmt-min-max + "PLW1641", # eq-without-hash + "PLW0602", # global-variable-not-assigned + "PLC1802", # len-test + "PLC3002", # unnecessary-direct-lambda-call + "PLW0642", # self-or-cls-assignment + "PLR1733", # unnecessary-dict-index-lookup + "PLR0402", # manual-from-import + "PLC0132", # type-param-name-mismatch + "PLC0206", # dict-index-missing-items + "PLC0207", # missing-maxsplit-arg + "PLR1704", # redefined-argument-from-local + "PLR1711", # useless-return + "PLW0406", # import-self + "PLW3301", # nested-min-max + "PLR2044", # empty-comment +] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" \ No newline at end of file diff --git a/sdks/python/scripts/run_pylint.sh b/sdks/python/scripts/run_pylint.sh index 4a6bf4c2ef06..a9a0b0ec50d5 100755 --- a/sdks/python/scripts/run_pylint.sh +++ b/sdks/python/scripts/run_pylint.sh @@ -78,11 +78,8 @@ done echo -e "Skipping lint for files:\n${FILES_TO_IGNORE}" echo -e "Linting modules:\n${MODULE}" -echo "Running pylint..." -pylint -j8 ${MODULE} --ignore-patterns="$FILES_TO_IGNORE" -echo "Running flake8..." -flake8 ${MODULE} --count --select=E9,F821,F822,F823 --show-source --statistics \ - --exclude="${FILES_TO_IGNORE}" +echo "Running ruff..." +ruff check ${MODULE} --extend-exclude="$FILES_TO_IGNORE" echo "Running isort..." # Skip files where isort is behaving weirdly diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 7efb68b5b707..deef75543a6e 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -192,13 +192,10 @@ commands = setenv = # keep the version of pylint in sync with the 'rev' in .pre-commit-config.yaml deps = - astroid<4.1.0,>=4.0.1 - pycodestyle==2.8.0 - pylint==4.0.2 + ruff==0.15.7 isort==7.0.0 - flake8==4.0.1 commands = - pylint --version + ruff --version time {toxinidir}/scripts/run_pylint.sh [testenv:whitespacelint] From eb88bac5e927953598cec9b308ab03c354562684 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Thu, 26 Mar 2026 11:47:30 -0400 Subject: [PATCH 2/6] time the mypy run --- sdks/python/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index deef75543a6e..8a8c21e7936b 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -215,7 +215,7 @@ extras = gcp commands = mypy --version - python setup.py mypy + time python setup.py mypy [testenv:docs] From b1a4e5bcab5aa2e88bf081df2ec1bd59c40d81e1 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Thu, 26 Mar 2026 12:11:02 -0400 Subject: [PATCH 3/6] consolidate mypy checks into linting --- sdks/python/setup.py | 8 ++++---- sdks/python/test-suites/tox/pycommon/build.gradle | 4 ++-- sdks/python/tox.ini | 7 +++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index bf95dbb9bb11..d68fd24d333d 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -49,7 +49,7 @@ def normalize_path(filename): return os.path.normcase(os.path.realpath(os.path.normpath(filename))) -class mypy(Command): +class pyrefly(Command): user_options = [] def initialize_options(self): @@ -71,10 +71,10 @@ def get_project_path(self): return os.path.join(project_path, to_filename(ei_cmd.egg_name)) def run(self): - args = ['mypy', self.get_project_path()] + args = ['pyrefly', 'check', self.get_project_path()] result = subprocess.call(args) if result != 0: - raise DistutilsError("mypy exited with status %d" % result) + raise DistutilsError("pyrefly exited with status %d" % result) def get_version(): @@ -659,6 +659,6 @@ def get_portability_package_data(): license='Apache License, Version 2.0', keywords=PACKAGE_KEYWORDS, cmdclass={ - 'mypy': mypy, + 'pyrefly': pyrefly, }, ) diff --git a/sdks/python/test-suites/tox/pycommon/build.gradle b/sdks/python/test-suites/tox/pycommon/build.gradle index 38cf39941d62..08cfe23916d2 100644 --- a/sdks/python/test-suites/tox/pycommon/build.gradle +++ b/sdks/python/test-suites/tox/pycommon/build.gradle @@ -41,5 +41,5 @@ check.dependsOn formatter toxTask "lint", "lint", "${posargs}" linter.dependsOn lint -toxTask "mypy", "mypy", "${posargs}" -linter.dependsOn mypy +// toxTask "mypy", "mypy", "${posargs}" +// linter.dependsOn mypy diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 8a8c21e7936b..0b6e1a90686d 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -194,9 +194,16 @@ setenv = deps = ruff==0.15.7 isort==7.0.0 + pyrefly==0.54.0 + dask==2022.01.0 + distributed==2022.01.0 +extras = + gcp commands = ruff --version time {toxinidir}/scripts/run_pylint.sh + pyrefly --version + time python setup.py pyrefly [testenv:whitespacelint] setenv = From 2684530a4f97319083ce76d81af610f259f94bd6 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Wed, 8 Apr 2026 12:10:28 -0400 Subject: [PATCH 4/6] Migrate pre-commit hook, pin versions in setup.py --- .agent/skills/python-development/SKILL.md | 4 ++-- .pre-commit-config.yaml | 15 +++++++-------- sdks/python/setup.py | 6 ++++++ sdks/python/tox.ini | 5 +---- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/.agent/skills/python-development/SKILL.md b/.agent/skills/python-development/SKILL.md index ed71bf9acc2b..f7a8e7c491bb 100644 --- a/.agent/skills/python-development/SKILL.md +++ b/.agent/skills/python-development/SKILL.md @@ -176,10 +176,10 @@ Use `--requirements_file=requirements.txt` or custom containers. ## Code Quality Tools ```bash # Linting -pylint apache_beam/ +ruff check apache_beam/ # Type checking -mypy apache_beam/ +pyrefly check apache_beam/ # Formatting (via yapf) yapf -i apache_beam/file.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f84f6b9e7418..4fa6ccd6bf85 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,12 +31,11 @@ repos: sdks/python/apache_beam/portability/api/.*pb2.*.py )$ - - repo: https://github.com/pycqa/pylint - # this rev is a release tag in the repo above and corresponds with a pylint - # version. make sure this matches the version of pylint in tox.ini. - rev: v4.0.2 + - repo: https://github.com/astral-sh/ruff-pre-commit + # this rev is a release tag in the repo above and corresponds with a ruff + # version. make sure this matches the version of yapf in setup.py + rev: v0.15.7 hooks: - - id: pylint - args: ["--rcfile=sdks/python/.pylintrc"] - files: ^sdks/python/apache_beam/ - exclude: *exclude + - id: ruff-check + files: "sdks/python/apache_beam" + args: ["--config=sdks/python/ruff.toml"] diff --git a/sdks/python/setup.py b/sdks/python/setup.py index d68fd24d333d..1505f7013bb7 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -424,6 +424,12 @@ def get_portability_package_data(): python_requires=python_requires, # BEAM-8840: Do NOT use tests_require or setup_requires. extras_require={ + 'dev': [ + 'isort==7.0.0', + 'pyrefly==0.54.0', + 'ruff==0.15.7', + 'yapf==0.43.0', + ], 'dill': [ # Dill doesn't have forwards-compatibility guarantees within minor # version. Pickles created with a new version of dill may not diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 0b6e1a90686d..0ce4753752f9 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -190,15 +190,12 @@ commands = [testenv:lint] # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint. setenv = -# keep the version of pylint in sync with the 'rev' in .pre-commit-config.yaml deps = - ruff==0.15.7 - isort==7.0.0 - pyrefly==0.54.0 dask==2022.01.0 distributed==2022.01.0 extras = gcp + dev commands = ruff --version time {toxinidir}/scripts/run_pylint.sh From a8b3cda49155edbeed7f41af2a6d6b4aa91b0c05 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Wed, 8 Apr 2026 12:44:56 -0400 Subject: [PATCH 5/6] fix new breakages --- sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py | 2 +- sdks/python/apache_beam/ml/inference/agent_development_kit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py b/sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py index 04cc84e6ef9e..0b0044d9ab71 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py @@ -43,7 +43,7 @@ def _ts(*args, **kwargs) -> Timestamp: """Create a UTC datetime and return a Beam Timestamp.""" - dt = datetime.datetime(*args, tzinfo=datetime.timezone.utc, **kwargs) + dt = datetime.datetime(*args, tzinfo=datetime.timezone.utc, **kwargs) # pyrefly: ignore[bad-keyword-argument] return Timestamp(dt.timestamp()) diff --git a/sdks/python/apache_beam/ml/inference/agent_development_kit.py b/sdks/python/apache_beam/ml/inference/agent_development_kit.py index 2e0dcba9de1b..f3ad7b898390 100644 --- a/sdks/python/apache_beam/ml/inference/agent_development_kit.py +++ b/sdks/python/apache_beam/ml/inference/agent_development_kit.py @@ -237,7 +237,7 @@ def run_inference( # Wrap plain strings in a Content object if isinstance(element, str): - message = genai_Content(role="user", parts=[genai_Part(text=element)]) + message = genai_Content(role="user", parts=[genai_Part(text=element)]) # pyrefly: ignore[bad-instantiation] else: # Assume the caller has already constructed a types.Content object message = element From c86fdbb91606a593a76cdcba9555d0b93fe60569 Mon Sep 17 00:00:00 2001 From: jrmccluskey Date: Wed, 8 Apr 2026 13:07:43 -0400 Subject: [PATCH 6/6] lint/format adjustments --- sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py | 3 ++- sdks/python/apache_beam/ml/inference/agent_development_kit.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py b/sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py index 0b0044d9ab71..11b467f26d49 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_change_history_test.py @@ -43,7 +43,8 @@ def _ts(*args, **kwargs) -> Timestamp: """Create a UTC datetime and return a Beam Timestamp.""" - dt = datetime.datetime(*args, tzinfo=datetime.timezone.utc, **kwargs) # pyrefly: ignore[bad-keyword-argument] + # pyrefly: ignore[bad-keyword-argument] + dt = datetime.datetime(*args, tzinfo=datetime.timezone.utc, **kwargs) return Timestamp(dt.timestamp()) diff --git a/sdks/python/apache_beam/ml/inference/agent_development_kit.py b/sdks/python/apache_beam/ml/inference/agent_development_kit.py index f3ad7b898390..8f3f046c7f26 100644 --- a/sdks/python/apache_beam/ml/inference/agent_development_kit.py +++ b/sdks/python/apache_beam/ml/inference/agent_development_kit.py @@ -237,7 +237,8 @@ def run_inference( # Wrap plain strings in a Content object if isinstance(element, str): - message = genai_Content(role="user", parts=[genai_Part(text=element)]) # pyrefly: ignore[bad-instantiation] + # pyrefly: ignore[bad-instantiation] + message = genai_Content(role="user", parts=[genai_Part(text=element)]) else: # Assume the caller has already constructed a types.Content object message = element