From 77958849468c2946496e300343c4a9fca310fd01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 05:30:20 +0000 Subject: [PATCH 01/16] Initial plan From a4585d8d948ea028eb91335536458c0e5bf50727 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 05:48:40 +0000 Subject: [PATCH 02/16] Add test documenting missing PEP249 alerts for connection stored in self attribute --- .../library-tests/frameworks/hdbcli/pep249.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py index 713f15cb6d4f..f317a4958169 100644 --- a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py +++ b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py @@ -7,3 +7,31 @@ cursor.executemany("some sql", (42,)) # $ getSql="some sql" cursor.close() + + +# Connection stored in a class attribute (`self._conn`) and used in another method. +# +# This is currently NOT detected: the `Connection::instance()`/`execute()` predicates in +# PEP249.qll are based on type tracking, which cannot follow a value that is stored into a +# `self` attribute in one method and read from a `self` attribute in another method (see the +# `MISSING` markers below). Regular (global) data flow handles this case correctly, so the +# limitation is specific to the type-tracking-based modeling. +class Database: + def __init__(self): + self._conn = dbapi.connect(address="hostname", port=300, user="username") + + def get_connection(self): + return self._conn + + def run_via_getter(self): + conn = self.get_connection() + cursor = conn.cursor() + cursor.execute("getter sql") # $ MISSING: getSql="getter sql" + + def run_direct(self): + self._conn.execute("direct sql") # $ MISSING: getSql="direct sql" + + +db = Database() +db.run_via_getter() +db.run_direct() From 73bc2d70ae1067b8bc0ee0d8f55c7e9a0c38326e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 06:08:49 +0000 Subject: [PATCH 03/16] Model instance-attribute type flow Use a field level step like JS and Ruby. --- .../new/internal/TypeTrackingImpl.qll | 47 +++++++++++++++++++ .../dataflow/typetracking/attribute_tests.py | 4 +- .../library-tests/frameworks/hdbcli/pep249.py | 13 +++-- 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 215c7906e655..a242c1d8e50c 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -172,6 +172,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) { TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo) + or + localFieldStep(nodeFrom, nodeTo) } /** @@ -317,6 +319,51 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `ref` accesses attribute `attr` of `self`, where `self` is the first + * parameter of an instance method of `cls` (i.e. an access of the form `self.attr`). + * + * Static methods and class methods are excluded, since their first parameter is not a + * `self` instance reference. + */ + private predicate selfAttrRef(Class cls, string attr, DataFlowPublic::AttrRef ref) { + exists(Function method, Name selfUse | + method = cls.getAMethod() and + not DataFlowDispatch::isStaticmethod(method) and + not DataFlowDispatch::isClassmethod(method) and + selfUse.getVariable() = method.getArg(0).(Name).getVariable() and + ref.getObject().asCfgNode().getNode() = selfUse and + ref.mayHaveAttributeName(attr) + ) + } + + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a + * class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance + * method of the same class. + * + * This models flow through instance attributes (`self.foo`): a value stored into + * `self.foo` in one method can be read from `self.foo` in another method. Type-tracking + * handles the store and read steps via `AttrWrite`/`AttrRead`, but on its own it cannot + * relate the `self` of the writing method to the `self` of the reading method. Following + * the approach used for Ruby and JavaScript, we model this directly as a level step from + * the written value to the read reference, for any pair of methods on the class (not + * just from `__init__`). + * + * This is an over-approximation: it is instance-insensitive (it does not distinguish + * between different instances of the same class) and order-insensitive (it does not + * require the write to happen before the read), matching the precision of + * instance-attribute handling for Ruby and JavaScript. + */ + private predicate localFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read | + selfAttrRef(cls, attr, write) and + nodeFrom = write.getValue() and + selfAttrRef(cls, attr, read) and + nodeTo = read + ) + } + /** * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. */ diff --git a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py index c49cdf77fcdf..05496ad74d09 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py @@ -151,10 +151,10 @@ def __init__(self): # $ tracked=foo self.foo = tracked # $ tracked=foo tracked def print_foo(self): # $ MISSING: tracked=foo - print(self.foo) # $ MISSING: tracked=foo tracked + print(self.foo) # $ tracked MISSING: tracked=foo def possibly_uncalled_method(self): # $ MISSING: tracked=foo - print(self.foo) # $ MISSING: tracked=foo tracked + print(self.foo) # $ tracked MISSING: tracked=foo instance = MyClass2() print(instance.foo) # $ MISSING: tracked=foo tracked diff --git a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py index f317a4958169..0c6c39086482 100644 --- a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py +++ b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py @@ -11,11 +11,10 @@ # Connection stored in a class attribute (`self._conn`) and used in another method. # -# This is currently NOT detected: the `Connection::instance()`/`execute()` predicates in -# PEP249.qll are based on type tracking, which cannot follow a value that is stored into a -# `self` attribute in one method and read from a `self` attribute in another method (see the -# `MISSING` markers below). Regular (global) data flow handles this case correctly, so the -# limitation is specific to the type-tracking-based modeling. +# This is detected because type tracking includes a level step modelling flow through +# instance attributes: a value written to `self._conn` in one method (here `__init__`) can +# be read back from `self._conn` (directly or via a getter) in any other method on the same +# class. This follows the same approach used for instance fields in Ruby and JavaScript. class Database: def __init__(self): self._conn = dbapi.connect(address="hostname", port=300, user="username") @@ -26,10 +25,10 @@ def get_connection(self): def run_via_getter(self): conn = self.get_connection() cursor = conn.cursor() - cursor.execute("getter sql") # $ MISSING: getSql="getter sql" + cursor.execute("getter sql") # $ getSql="getter sql" def run_direct(self): - self._conn.execute("direct sql") # $ MISSING: getSql="direct sql" + self._conn.execute("direct sql") # $ getSql="direct sql" db = Database() From befb557bfd8b0c3d1800177c155a580765abbe22 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Thu, 11 Jun 2026 15:44:20 +0200 Subject: [PATCH 04/16] Accept fixed MISSING tests --- .../library-tests/CallGraph/InlineCallGraphTest.expected | 3 --- .../library-tests/CallGraph/code/class_attr_assign.py | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index b353309e852f..1cd62c6de347 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -1,9 +1,6 @@ testFailures debug_callableNotUnique pointsTo_found_typeTracker_notFound -| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func | -| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func | -| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method | | code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def | | code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar | | code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo | diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py index 605375925f72..714e27dba1a0 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py @@ -7,8 +7,8 @@ def __init__(self, func): self.direct_ref = my_func def later(self): - self.indirect_ref() # $ pt=my_func MISSING: tt=my_func - self.direct_ref() # $ pt=my_func MISSING: tt=my_func + self.indirect_ref() # $ pt=my_func tt=my_func + self.direct_ref() # $ pt=my_func tt=my_func foo = Foo(my_func) # $ tt=Foo.__init__ foo.later() # $ pt,tt=Foo.later @@ -23,7 +23,7 @@ def __init__(self): self.obj = DummyObject() def later(self): - self.obj.method() # $ pt=DummyObject.method MISSING: tt=DummyObject.method + self.obj.method() # $ pt=DummyObject.method tt=DummyObject.method bar = Bar(my_func) # $ tt=Bar.__init__ From 913dcb119059b42918661e644333e5052f7f6a6b Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Thu, 11 Jun 2026 22:20:52 +0200 Subject: [PATCH 05/16] Add change note --- .../2026-06-11-fix-type-tracking-instance-attributes.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md diff --git a/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md new file mode 100644 index 000000000000..25bc0e0f31f9 --- /dev/null +++ b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods on the same class. As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results. From d389ea4039e28f1876b2ea962f210d3cfaecae7a Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Fri, 12 Jun 2026 13:28:00 +0100 Subject: [PATCH 06/16] Convert sql-injection test to inline expectations --- .../SqlInjection.expected | 34 +++++++++---------- .../CWE-089-SqlInjection/SqlInjection.qlref | 3 +- .../CWE-089-SqlInjection/sql_injection.py | 10 +++--- .../sqlalchemy_textclause.py | 28 +++++++-------- 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected index 9ff8b1d718c1..c1958c23858d 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected @@ -1,3 +1,20 @@ +#select +| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | edges | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | provenance | | @@ -35,20 +52,3 @@ nodes | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | subpaths -#select -| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref index d1d02cbe8d37..444c0e5f46aa 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref @@ -1 +1,2 @@ -Security/CWE-089/SqlInjection.ql +query: Security/CWE-089/SqlInjection.ql +postprocess: utils/test/InlineExpectationsTestQuery.ql diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py index c79bee16cb21..52aa3169616e 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py @@ -11,19 +11,19 @@ class User(models.Model): pass @app.route("/users/") -def show_user(username): +def show_user(username): # $ Source with connection.cursor() as cursor: # GOOD -- Using parameters cursor.execute("SELECT * FROM users WHERE username = %s", username) User.objects.raw("SELECT * FROM users WHERE username = %s", (username,)) # BAD -- Using string formatting - cursor.execute("SELECT * FROM users WHERE username = '%s'" % username) + cursor.execute("SELECT * FROM users WHERE username = '%s'" % username) # $ Alert # BAD -- other ways of executing raw SQL code with string interpolation - User.objects.annotate(RawSQL("insert into names_file ('name') values ('%s')" % username)) - User.objects.raw("insert into names_file ('name') values ('%s')" % username) - User.objects.extra("insert into names_file ('name') values ('%s')" % username) + User.objects.annotate(RawSQL("insert into names_file ('name') values ('%s')" % username)) # $ Alert + User.objects.raw("insert into names_file ('name') values ('%s')" % username) # $ Alert + User.objects.extra("insert into names_file ('name') values ('%s')" % username) # $ Alert # BAD (but currently no custom query to find this) # diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py index a54d64517d42..f35b1325366c 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py @@ -20,15 +20,15 @@ class User(Base): @app.route("/users/") -def show_user(username): +def show_user(username): # $ Source session = sqlalchemy.orm.Session(engine) # BAD, normal SQL injection - stmt = sqlalchemy.text("SELECT * FROM users WHERE username = '{}'".format(username)) + stmt = sqlalchemy.text("SELECT * FROM users WHERE username = '{}'".format(username)) # $ Alert results = session.execute(stmt).fetchall() # BAD, allows SQL injection - username_formatted_for_sql = sqlalchemy.text("'{}'".format(username)) + username_formatted_for_sql = sqlalchemy.text("'{}'".format(username)) # $ Alert stmt = sqlalchemy.select(User).where(User.username == username_formatted_for_sql) results = session.execute(stmt).scalars().all() @@ -38,14 +38,14 @@ def show_user(username): # All of these should be flagged by query - t1 = sqlalchemy.text(username) - t2 = sqlalchemy.text(text=username) - t3 = sqlalchemy.sql.text(username) - t4 = sqlalchemy.sql.text(text=username) - t5 = sqlalchemy.sql.expression.text(username) - t6 = sqlalchemy.sql.expression.text(text=username) - t7 = sqlalchemy.sql.expression.TextClause(username) - t8 = sqlalchemy.sql.expression.TextClause(text=username) - - t9 = db.text(username) - t10 = db.text(text=username) + t1 = sqlalchemy.text(username) # $ Alert + t2 = sqlalchemy.text(text=username) # $ Alert + t3 = sqlalchemy.sql.text(username) # $ Alert + t4 = sqlalchemy.sql.text(text=username) # $ Alert + t5 = sqlalchemy.sql.expression.text(username) # $ Alert + t6 = sqlalchemy.sql.expression.text(text=username) # $ Alert + t7 = sqlalchemy.sql.expression.TextClause(username) # $ Alert + t8 = sqlalchemy.sql.expression.TextClause(text=username) # $ Alert + + t9 = db.text(username) # $ Alert + t10 = db.text(text=username) # $ Alert From 434a99447e087334c31a81d085ab42dcda283bb7 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Fri, 12 Jun 2026 13:29:27 +0100 Subject: [PATCH 07/16] Add thorough tests, including one MISSING alert --- .../Security/CWE-089-SqlInjection/app.py | 52 +++++++++++++++++++ .../CWE-089-SqlInjection/db_connection.py | 24 +++++++++ 2 files changed, 76 insertions(+) create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py new file mode 100644 index 000000000000..8046f1ef52ed --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py @@ -0,0 +1,52 @@ +from fastapi import FastAPI +from hdbcli import dbapi +from db_connection import get_conn +from db_connection import hdb_con +from db_connection import hdb_con2 +from db_connection import hdb_con3 +app = FastAPI() + +class DatabaseConnection: + + def __init__(self): + self._conn = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + def get_conn(self): + return self._conn + +db_connection = DatabaseConnection() + +@app.get("/unsafe1/") +async def unsafe(name: str): # $ Source + query = "select * from users where name=" + name + cursor = hdb_con.cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe2/") +async def unsafe2(name: str): # $ Source + query = "select * from users where name=" + name + cursor = hdb_con2.cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe3/") +async def unsafe3(name: str): # $ MISSING: Source + query = "select * from users where name=" + name + cursor = hdb_con3.cursor() + cursor.execute(query) # $ MISSING: Alert + cursor.close() + +@app.get("/unsafe4/") +async def unsafe4(name: str): # $ Source + query = "select * from users where name=" + name + cursor = get_conn().cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe5/") +async def unsafe5(name: str): # $ Source + query = "select * from users where name=" + name + cursor = db_connection.get_conn().cursor() + cursor.execute(query) # $ Alert + cursor.close() diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py new file mode 100644 index 000000000000..b05a43bdebba --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py @@ -0,0 +1,24 @@ +from hdbcli import dbapi +from typing import Optional + +hdb_con = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + +class DatabaseConnection: + + def __init__(self): + self._conn = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + def get_conn(self): + return self._conn + + +hdb_con2 = DatabaseConnection().get_conn() +hdb_con3 = DatabaseConnection()._conn + +_hana_connection: Optional[DatabaseConnection] = None +def get_conn(): + global _hana_connection + if _hana_connection is None: + _hana_connection = DatabaseConnection() + return _hana_connection.get_conn() From 9c65082189ca248b76d652048c634cb61faa9386 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Fri, 12 Jun 2026 13:43:24 +0100 Subject: [PATCH 08/16] Fix MISSING alert --- .../new/internal/TypeTrackingImpl.qll | 40 ++++++++++++++++++- .../dataflow/typetracking/attribute_tests.py | 2 +- .../SqlInjection.expected | 30 ++++++++++++++ .../Security/CWE-089-SqlInjection/app.py | 4 +- 4 files changed, 72 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index a242c1d8e50c..b76f8aeb9a57 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -167,7 +167,9 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */ - predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { none() } + predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { + instanceFieldStep(nodeFrom, nodeTo) + } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) { @@ -337,6 +339,20 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `read` reads attribute `attr` from an instance of `cls`, where the instance + * is referred to from outside the methods of `cls` (i.e. an access of the form + * `instance.attr`, where `instance` is a reference to an instance of `cls`). + * + * This complements `selfAttrRef`, which only handles `self.attr` accesses inside the + * methods of `cls`. Unlike `selfAttrRef`, this depends on the call graph (via + * `classInstanceTracker`), so steps using it must be reported as `levelStepCall`. + */ + private predicate instanceAttrRead(Class cls, string attr, DataFlowPublic::AttrRead read) { + read.getObject() = DataFlowDispatch::classInstanceTracker(cls) and + read.mayHaveAttributeName(attr) + } + /** * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a * class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance @@ -364,6 +380,28 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a + * class, and `nodeTo` reads attribute `attr` from an instance of the same class outside + * its methods (e.g. `instance.attr`). + * + * This is the cross-instance counterpart of `localFieldStep`: it relates a write of + * `self.attr` inside the class to a read of `attr` on a reference to an instance of the + * class. Identifying instances relies on the call graph (via `classInstanceTracker`), so + * this step is reported as `levelStepCall` rather than `levelStepNoCall`. + * + * Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive + * and order-insensitive. + */ + private predicate instanceFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read | + selfAttrRef(cls, attr, write) and + nodeFrom = write.getValue() and + instanceAttrRead(cls, attr, read) and + nodeTo = read + ) + } + /** * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. */ diff --git a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py index 05496ad74d09..55f2edcac2ad 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py @@ -157,7 +157,7 @@ def possibly_uncalled_method(self): # $ MISSING: tracked=foo print(self.foo) # $ tracked MISSING: tracked=foo instance = MyClass2() -print(instance.foo) # $ MISSING: tracked=foo tracked +print(instance.foo) # $ tracked MISSING: tracked=foo instance.print_foo() # $ MISSING: tracked=foo diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected index c1958c23858d..8f60394d8a2b 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected @@ -1,4 +1,9 @@ #select +| app.py:23:20:23:24 | ControlFlowNode for query | app.py:20:18:20:21 | ControlFlowNode for name | app.py:23:20:23:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:20:18:20:21 | ControlFlowNode for name | user-provided value | +| app.py:30:20:30:24 | ControlFlowNode for query | app.py:27:19:27:22 | ControlFlowNode for name | app.py:30:20:30:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:27:19:27:22 | ControlFlowNode for name | user-provided value | +| app.py:37:20:37:24 | ControlFlowNode for query | app.py:34:19:34:22 | ControlFlowNode for name | app.py:37:20:37:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:34:19:34:22 | ControlFlowNode for name | user-provided value | +| app.py:44:20:44:24 | ControlFlowNode for query | app.py:41:19:41:22 | ControlFlowNode for name | app.py:44:20:44:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:41:19:41:22 | ControlFlowNode for name | user-provided value | +| app.py:51:20:51:24 | ControlFlowNode for query | app.py:48:19:48:22 | ControlFlowNode for name | app.py:51:20:51:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:48:19:48:22 | ControlFlowNode for name | user-provided value | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | @@ -16,6 +21,16 @@ | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | edges +| app.py:20:18:20:21 | ControlFlowNode for name | app.py:21:5:21:9 | ControlFlowNode for query | provenance | | +| app.py:21:5:21:9 | ControlFlowNode for query | app.py:23:20:23:24 | ControlFlowNode for query | provenance | | +| app.py:27:19:27:22 | ControlFlowNode for name | app.py:28:5:28:9 | ControlFlowNode for query | provenance | | +| app.py:28:5:28:9 | ControlFlowNode for query | app.py:30:20:30:24 | ControlFlowNode for query | provenance | | +| app.py:34:19:34:22 | ControlFlowNode for name | app.py:35:5:35:9 | ControlFlowNode for query | provenance | | +| app.py:35:5:35:9 | ControlFlowNode for query | app.py:37:20:37:24 | ControlFlowNode for query | provenance | | +| app.py:41:19:41:22 | ControlFlowNode for name | app.py:42:5:42:9 | ControlFlowNode for query | provenance | | +| app.py:42:5:42:9 | ControlFlowNode for query | app.py:44:20:44:24 | ControlFlowNode for query | provenance | | +| app.py:48:19:48:22 | ControlFlowNode for name | app.py:49:5:49:9 | ControlFlowNode for query | provenance | | +| app.py:49:5:49:9 | ControlFlowNode for query | app.py:51:20:51:24 | ControlFlowNode for query | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | provenance | | @@ -33,6 +48,21 @@ edges | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | provenance | | | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | provenance | | nodes +| app.py:20:18:20:21 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:21:5:21:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:23:20:23:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:27:19:27:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:28:5:28:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:30:20:30:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:34:19:34:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:35:5:35:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:37:20:37:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:41:19:41:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:42:5:42:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:44:20:44:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:48:19:48:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:49:5:49:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:51:20:51:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py index 8046f1ef52ed..4de61346d8f5 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py @@ -31,10 +31,10 @@ async def unsafe2(name: str): # $ Source cursor.close() @app.get("/unsafe3/") -async def unsafe3(name: str): # $ MISSING: Source +async def unsafe3(name: str): # $ Source query = "select * from users where name=" + name cursor = hdb_con3.cursor() - cursor.execute(query) # $ MISSING: Alert + cursor.execute(query) # $ Alert cursor.close() @app.get("/unsafe4/") From 890969433f313c2704cbe8fd58130d880e19c991 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 12:19:03 +0100 Subject: [PATCH 09/16] Add test for FP for py/file-not-closed --- .../FileNotAlwaysClosed.expected | 19 ++++++------ .../FileNotAlwaysClosed/resources_test.py | 29 +++++++++++++++++++ 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected index 7f48feb72eb9..6f17eab01aa1 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected @@ -1,9 +1,10 @@ -| resources_test.py:4:10:4:25 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:5:5:5:33 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:9:10:9:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | -| resources_test.py:108:11:108:20 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | -| resources_test.py:112:11:112:28 | ControlFlowNode for opener_func2() | File may not be closed if $@ raises an exception. | resources_test.py:113:5:113:22 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:123:11:123:24 | ControlFlowNode for opener_func2() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | -| resources_test.py:129:15:129:24 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:130:9:130:26 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:248:11:248:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | -| resources_test.py:269:10:269:27 | ControlFlowNode for Attribute() | File may not be closed if $@ raises an exception. | resources_test.py:271:5:271:19 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:285:11:285:20 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:287:5:287:31 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:6:10:6:25 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:7:5:7:33 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:11:10:11:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | +| resources_test.py:110:11:110:20 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | +| resources_test.py:114:11:114:28 | ControlFlowNode for opener_func2() | File may not be closed if $@ raises an exception. | resources_test.py:115:5:115:22 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:125:11:125:24 | ControlFlowNode for opener_func2() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | +| resources_test.py:131:15:131:24 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:132:9:132:26 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:250:11:250:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | +| resources_test.py:271:10:271:27 | ControlFlowNode for Attribute() | File may not be closed if $@ raises an exception. | resources_test.py:273:5:273:19 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:287:11:287:20 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:289:5:289:31 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:361:13:361:27 | ControlFlowNode for Attribute() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py index f4bd33eb12cd..4f9b067d773b 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py @@ -1,5 +1,7 @@ #File not always closed +import os + def not_close1(): f1 = open("filename") # $ Alert # not closed on exception f1.write("Error could occur") @@ -332,3 +334,30 @@ def closed32(path): # due to a check that an operation is lexically contained within a `with` block (with `expr.getParent*()`) # not detecting this case. return list(wrap.read()) + + +class FdHolder33(): + # Mirrors CPython's `_pyio.FileIO`: it opens a file descriptor with `os.open`, + # stores it in an instance attribute, and exposes it again via `fileno()`. + def __init__(self, path): + fd = os.open(path, os.O_RDONLY) + self._fd = fd + + def fileno(self): + return self._fd + + def close(self): + os.close(self._fd) + +def closed33(path): + # False positive mirroring CPython's `_pyio.open`. + # `holder.fileno()` merely returns the existing file descriptor; it does not + # open a new resource. With instance-attribute type tracking, the `os.open` + # source flows through `self._fd` and back out of `fileno()`, so the call + # `holder.fileno()` is wrongly treated as a fresh file-open whose result is + # never closed. The descriptor is in fact owned and closed by `holder.close()`. + holder = FdHolder33(path) + try: + n = holder.fileno() # $ SPURIOUS: Alert + finally: + holder.close() From 199fd864add1babc4c5e8196b7f5b619d0f80fdd Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 12:36:04 +0100 Subject: [PATCH 10/16] Fix FP for py/file-not-closed --- .../Resources/FileNotAlwaysClosedQuery.qll | 29 ++++++++++++++++++- .../FileNotAlwaysClosed.expected | 1 - .../FileNotAlwaysClosed/resources_test.py | 10 +++---- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 9d91e4f523c2..bda0ee5dbdc6 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -21,12 +21,39 @@ private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) { exists(DataFlow::TypeTracker t2 | result = fileOpenInstance(t2).track(t2, t)) } +/** + * Holds if `read` is an attribute read that re-exposes an already-open file held in an + * instance attribute, for example `FileIO.fileno` returning `self._fd`. + * + * Instance-attribute type tracking can launder an open file out of such an accessor, which + * would otherwise be mistaken for a fresh file open. The underlying open is tracked, and its + * lifetime handled, separately at its real creation site. + */ +private predicate launderedAttrRead(DataFlow::AttrRead read) { + fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(read) +} + +/** Type tracking forward from an attribute read that re-exposes a file held in a field. */ +private DataFlow::TypeTrackingNode launderedFileInstance(DataFlow::TypeTracker t) { + t.start() and + launderedAttrRead(result) + or + exists(DataFlow::TypeTracker t2 | result = launderedFileInstance(t2).track(t2, t)) +} + /** * A call that returns an instance of an open file object. * This includes calls to methods that transitively call `open` or similar. */ class FileOpen extends DataFlow::CallCfgNode { - FileOpen() { fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) } + FileOpen() { + fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) and + // Don't treat an accessor that merely re-exposes a file held in an instance attribute + // (e.g. `FileIO.fileno` returning `self._fd`) as opening a new file. Such flow is + // introduced by instance-attribute type tracking; the underlying open is tracked at its + // real creation site. + not launderedFileInstance(DataFlow::TypeTracker::end()).flowsTo(this) + } } /** A call that may wrap a file object in a wrapper class or `os.fdopen`. */ diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected index 6f17eab01aa1..b7d9d37785b0 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected @@ -7,4 +7,3 @@ | resources_test.py:250:11:250:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | | resources_test.py:271:10:271:27 | ControlFlowNode for Attribute() | File may not be closed if $@ raises an exception. | resources_test.py:273:5:273:19 | ControlFlowNode for Attribute() | this operation | | resources_test.py:287:11:287:20 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:289:5:289:31 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:361:13:361:27 | ControlFlowNode for Attribute() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py index 4f9b067d773b..57568a6eb118 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py @@ -350,14 +350,14 @@ def close(self): os.close(self._fd) def closed33(path): - # False positive mirroring CPython's `_pyio.open`. + # Regression test mirroring CPython's `_pyio.open`. # `holder.fileno()` merely returns the existing file descriptor; it does not # open a new resource. With instance-attribute type tracking, the `os.open` - # source flows through `self._fd` and back out of `fileno()`, so the call - # `holder.fileno()` is wrongly treated as a fresh file-open whose result is - # never closed. The descriptor is in fact owned and closed by `holder.close()`. + # source flows through `self._fd` and back out of `fileno()`. The query must + # not treat that re-exposed descriptor as a fresh file-open whose result is + # never closed. The descriptor is owned and closed by `holder.close()`. holder = FdHolder33(path) try: - n = holder.fileno() # $ SPURIOUS: Alert + n = holder.fileno() # No alert: this re-exposes an existing descriptor, not a new open. finally: holder.close() From d72144646aa79a21c21712e38d29167d7de2e65b Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 12:55:17 +0100 Subject: [PATCH 11/16] Add test for FP for py/should-use-with --- .../general/ShouldUseWithStatement.expected | 1 + .../ql/test/query-tests/Statements/general/test.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected index 50ff6cc1f914..0ad3faa00327 100644 --- a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected +++ b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected @@ -1 +1,2 @@ | test.py:168:9:168:17 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | Class CM | CM | +| test.py:182:13:182:26 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | Class CM | CM | diff --git a/python/ql/test/query-tests/Statements/general/test.py b/python/ql/test/query-tests/Statements/general/test.py index a5848f7c718d..77398958a5df 100644 --- a/python/ql/test/query-tests/Statements/general/test.py +++ b/python/ql/test/query-tests/Statements/general/test.py @@ -167,6 +167,20 @@ def no_with(): finally: f.close() +# Should use context manager, with the resource held in an instance attribute +# (caught via instance-attribute type tracking). +class HoldsCM(object): + + def __init__(self): + self.f = CM() + + def no_with_attribute(self): + try: + self.f.write("Hello ") + self.f.write(" World\n") + finally: + self.f.close() + #Assert without side-effect def assert_ok(seq): assert all(isinstance(element, (str, unicode)) for element in seq) From 415857cacb36da15f524ebf28cfb12b7d492ec6b Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 13:01:36 +0100 Subject: [PATCH 12/16] Fix FP for py/should-use-with --- .../src/Statements/ShouldUseWithStatement.ql | 31 +++++++++++++++++-- .../general/ShouldUseWithStatement.expected | 1 - .../query-tests/Statements/general/test.py | 8 +++-- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/python/ql/src/Statements/ShouldUseWithStatement.ql b/python/ql/src/Statements/ShouldUseWithStatement.ql index 20bf053f6daa..3536a017d2b6 100644 --- a/python/ql/src/Statements/ShouldUseWithStatement.ql +++ b/python/ql/src/Statements/ShouldUseWithStatement.ql @@ -13,6 +13,7 @@ */ import python +private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate calls_close(Call c) { exists(Attribute a | c.getFunc() = a and a.getName() = "close") } @@ -23,11 +24,37 @@ predicate only_stmt_in_finally(Try t, Call c) { ) } -from Call close, Try t, Class cls +/** + * Holds if `read` is an attribute read that re-exposes an instance of `cls` held in an + * instance attribute, for example `BufferedRWPair.reader`. + * + * Instance-attribute type tracking can launder such an instance out of a field. The object + * is owned by the enclosing instance, so its lifetime spans that instance and cannot be + * expressed with a `with` statement; closing it in a `finally` block is therefore not a + * candidate for refactoring. + */ +private predicate launderedAttrRead(Class cls, DataFlow::AttrRead read) { + read = classInstanceTracker(cls) +} + +/** Type tracking forward from an attribute read that re-exposes an instance held in a field. */ +private DataFlow::TypeTrackingNode launderedInstance(Class cls, DataFlow::TypeTracker t) { + t.start() and + launderedAttrRead(cls, result) + or + exists(DataFlow::TypeTracker t2 | result = launderedInstance(cls, t2).track(t2, t)) +} + +from Call close, Try t, Class cls, DataFlow::Node closeTarget where only_stmt_in_finally(t, close) and calls_close(close) and - classInstanceTracker(cls).asExpr() = close.getFunc().(Attribute).getObject() and + closeTarget.asExpr() = close.getFunc().(Attribute).getObject() and + closeTarget = classInstanceTracker(cls) and + // Don't report closing a resource that is held in an instance attribute (e.g. `self.reader`). + // Such flow is introduced by instance-attribute type tracking; the object's lifetime is tied + // to the enclosing instance and cannot be expressed with a `with` statement. + not launderedInstance(cls, DataFlow::TypeTracker::end()).flowsTo(closeTarget) and DuckTyping::isContextManager(cls) select close, "Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.", diff --git a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected index 0ad3faa00327..50ff6cc1f914 100644 --- a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected +++ b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected @@ -1,2 +1 @@ | test.py:168:9:168:17 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | Class CM | CM | -| test.py:182:13:182:26 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | Class CM | CM | diff --git a/python/ql/test/query-tests/Statements/general/test.py b/python/ql/test/query-tests/Statements/general/test.py index 77398958a5df..326c3f2a1120 100644 --- a/python/ql/test/query-tests/Statements/general/test.py +++ b/python/ql/test/query-tests/Statements/general/test.py @@ -167,8 +167,10 @@ def no_with(): finally: f.close() -# Should use context manager, with the resource held in an instance attribute -# (caught via instance-attribute type tracking). +# Should not use a 'with' statement here: the resource is held in an instance +# attribute, so its lifetime spans the enclosing instance and cannot be expressed +# with a 'with' statement. Instance-attribute type tracking can launder the +# instance out of the field, but this must not be reported. class HoldsCM(object): def __init__(self): @@ -179,7 +181,7 @@ def no_with_attribute(self): self.f.write("Hello ") self.f.write(" World\n") finally: - self.f.close() + self.f.close() # No alert: re-exposes a field, not a local resource. #Assert without side-effect def assert_ok(seq): From ea7510bf724d45688ebb7471696622703e653b6c Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 13:10:47 +0100 Subject: [PATCH 13/16] Refactor ReExposedInstance logic into one place --- .../new/internal/ReExposedInstance.qll | 46 +++++++++++++++++++ .../Resources/FileNotAlwaysClosedQuery.qll | 22 +++------ .../src/Statements/ShouldUseWithStatement.ql | 25 ++-------- 3 files changed, 57 insertions(+), 36 deletions(-) create mode 100644 python/ql/lib/semmle/python/dataflow/new/internal/ReExposedInstance.qll diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/ReExposedInstance.qll b/python/ql/lib/semmle/python/dataflow/new/internal/ReExposedInstance.qll new file mode 100644 index 000000000000..b080610f512a --- /dev/null +++ b/python/ql/lib/semmle/python/dataflow/new/internal/ReExposedInstance.qll @@ -0,0 +1,46 @@ +/** + * Provides a parameterized module for identifying values that instance-attribute type tracking + * has re-exposed (laundered) out of an instance attribute, rather than freshly created. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow + +/** Holds if `node` should be treated as an instance source. */ +signature predicate instanceNodeSig(DataFlow::Node node); + +/** + * Provides a predicate for identifying values that instance-attribute type tracking has + * re-exposed (laundered) out of an instance attribute, rather than freshly created. + * + * Instance-attribute type tracking can flow a value into an instance attribute and back out at + * a later attribute read, for example `BufferedRWPair.reader` or `FileIO.fileno` returning + * `self._fd`. Such a re-exposed value is owned by the enclosing instance and is not a fresh + * resource; queries that reason about resource creation or lifetime should not treat it as one. + * + * The parameter `isInstance` defines which nodes count as instance sources (typically the result + * of a class- or resource-instance type tracker). + */ +module ReExposedInstance { + /** + * Holds if `read` is an attribute read that re-exposes an instance held in an instance + * attribute. + */ + private predicate launderedAttrRead(DataFlow::AttrRead read) { isInstance(read) } + + /** Type tracking forward from an attribute read that re-exposes an instance held in a field. */ + private DataFlow::TypeTrackingNode launderedInstance(DataFlow::TypeTracker t) { + t.start() and + launderedAttrRead(result) + or + exists(DataFlow::TypeTracker t2 | result = launderedInstance(t2).track(t2, t)) + } + + /** + * Holds if `node` is a value that has been re-exposed (laundered) out of an instance attribute, + * rather than being a freshly created instance. + */ + predicate isReExposed(DataFlow::Node node) { + launderedInstance(DataFlow::TypeTracker::end()).flowsTo(node) + } +} diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index bda0ee5dbdc6..2dfda7044d92 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -3,6 +3,7 @@ import python import semmle.python.dataflow.new.internal.DataFlowDispatch import semmle.python.ApiGraphs +private import semmle.python.dataflow.new.internal.ReExposedInstance /** A CFG node where a file is opened. */ abstract class FileOpenSource extends DataFlow::CfgNode { } @@ -22,24 +23,13 @@ private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) { } /** - * Holds if `read` is an attribute read that re-exposes an already-open file held in an - * instance attribute, for example `FileIO.fileno` returning `self._fd`. - * - * Instance-attribute type tracking can launder an open file out of such an accessor, which - * would otherwise be mistaken for a fresh file open. The underlying open is tracked, and its - * lifetime handled, separately at its real creation site. + * Holds if `node` is tracked to be an instance of an open file object. */ -private predicate launderedAttrRead(DataFlow::AttrRead read) { - fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(read) +private predicate fileInstanceNode(DataFlow::Node node) { + fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(node) } -/** Type tracking forward from an attribute read that re-exposes a file held in a field. */ -private DataFlow::TypeTrackingNode launderedFileInstance(DataFlow::TypeTracker t) { - t.start() and - launderedAttrRead(result) - or - exists(DataFlow::TypeTracker t2 | result = launderedFileInstance(t2).track(t2, t)) -} +private module FileReExposed = ReExposedInstance; /** * A call that returns an instance of an open file object. @@ -52,7 +42,7 @@ class FileOpen extends DataFlow::CallCfgNode { // (e.g. `FileIO.fileno` returning `self._fd`) as opening a new file. Such flow is // introduced by instance-attribute type tracking; the underlying open is tracked at its // real creation site. - not launderedFileInstance(DataFlow::TypeTracker::end()).flowsTo(this) + not FileReExposed::isReExposed(this) } } diff --git a/python/ql/src/Statements/ShouldUseWithStatement.ql b/python/ql/src/Statements/ShouldUseWithStatement.ql index 3536a017d2b6..8ead51fa6b34 100644 --- a/python/ql/src/Statements/ShouldUseWithStatement.ql +++ b/python/ql/src/Statements/ShouldUseWithStatement.ql @@ -15,6 +15,7 @@ import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.dataflow.new.internal.ReExposedInstance predicate calls_close(Call c) { exists(Attribute a | c.getFunc() = a and a.getName() = "close") } @@ -24,26 +25,10 @@ predicate only_stmt_in_finally(Try t, Call c) { ) } -/** - * Holds if `read` is an attribute read that re-exposes an instance of `cls` held in an - * instance attribute, for example `BufferedRWPair.reader`. - * - * Instance-attribute type tracking can launder such an instance out of a field. The object - * is owned by the enclosing instance, so its lifetime spans that instance and cannot be - * expressed with a `with` statement; closing it in a `finally` block is therefore not a - * candidate for refactoring. - */ -private predicate launderedAttrRead(Class cls, DataFlow::AttrRead read) { - read = classInstanceTracker(cls) -} +/** Holds if `node` is tracked to be an instance of some class. */ +private predicate classInstanceNode(DataFlow::Node node) { node = classInstanceTracker(_) } -/** Type tracking forward from an attribute read that re-exposes an instance held in a field. */ -private DataFlow::TypeTrackingNode launderedInstance(Class cls, DataFlow::TypeTracker t) { - t.start() and - launderedAttrRead(cls, result) - or - exists(DataFlow::TypeTracker t2 | result = launderedInstance(cls, t2).track(t2, t)) -} +private module ClassReExposed = ReExposedInstance; from Call close, Try t, Class cls, DataFlow::Node closeTarget where @@ -54,7 +39,7 @@ where // Don't report closing a resource that is held in an instance attribute (e.g. `self.reader`). // Such flow is introduced by instance-attribute type tracking; the object's lifetime is tied // to the enclosing instance and cannot be expressed with a `with` statement. - not launderedInstance(cls, DataFlow::TypeTracker::end()).flowsTo(closeTarget) and + not ClassReExposed::isReExposed(closeTarget) and DuckTyping::isContextManager(cls) select close, "Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.", From 47c2c9e7635a8fb8dfae17049ba82ec748a2eedf Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 14:22:42 +0100 Subject: [PATCH 14/16] Add test for FP for py/modification-of-locals --- .../general/ModificationOfLocals.expected | 7 ++++ .../query-tests/Statements/general/test.py | 38 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/python/ql/test/query-tests/Statements/general/ModificationOfLocals.expected b/python/ql/test/query-tests/Statements/general/ModificationOfLocals.expected index 547cec0c3b81..5575d3930c14 100644 --- a/python/ql/test/query-tests/Statements/general/ModificationOfLocals.expected +++ b/python/ql/test/query-tests/Statements/general/ModificationOfLocals.expected @@ -3,3 +3,10 @@ | test.py:101:5:101:14 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | | test.py:102:9:102:14 | Subscript | Modification of the locals() dictionary will have no effect on the local variables. | | test.py:103:5:103:13 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | +| test.py:206:5:206:11 | Subscript | Modification of the locals() dictionary will have no effect on the local variables. | +| test.py:207:5:207:23 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | +| test.py:208:5:208:15 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | +| test.py:209:9:209:15 | Subscript | Modification of the locals() dictionary will have no effect on the local variables. | +| test.py:210:5:210:14 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | +| test.py:228:9:228:24 | Subscript | Modification of the locals() dictionary will have no effect on the local variables. | +| test.py:229:9:229:35 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | diff --git a/python/ql/test/query-tests/Statements/general/test.py b/python/ql/test/query-tests/Statements/general/test.py index 326c3f2a1120..eb38acd89b1d 100644 --- a/python/ql/test/query-tests/Statements/general/test.py +++ b/python/ql/test/query-tests/Statements/general/test.py @@ -196,3 +196,41 @@ class false_positive: class MyClass: locals()['x'] = 43 # OK y = x + + +# Once a `locals()` dictionary is passed out of the scope that created it, it is +# just an ordinary mapping. Modifying it in a different scope is meaningful and +# effective, so these modifications must NOT be flagged: the "no effect on local +# variables" gotcha only applies within the scope that called `locals()`. +def modify_passed_dict(ns): + ns['k'] = 1 # OK: `ns` is a parameter here, not this scope's locals() + ns.update({'j': 2}) # OK + ns.pop('k') # OK + del ns['j'] # OK + ns.clear() # OK + + +def pass_locals_to_function(): + y = 1 + modify_passed_dict(locals()) + return y + + +# The same situation, but where the `locals()` dictionary is laundered through an +# instance attribute (as instance-attribute type tracking now models). These must +# also not be flagged. +class NamespaceHolder(object): + + def __init__(self, ns): + self.ns = ns + + def populate(self): + self.ns['extra'] = 1 # OK: different scope from the `locals()` call + self.ns.update({'more': 2}) # OK + + +def launder_locals_through_instance(): + x = 1 + holder = NamespaceHolder(locals()) + holder.populate() + return x From dd61dd2d74793a36f15f339b0f181fb7bbb57a48 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 14:24:18 +0100 Subject: [PATCH 15/16] Fix FP for py/modification-of-locals --- python/ql/src/Statements/ModificationOfLocals.ql | 12 +++++++++++- .../2026-06-17-modification-of-locals-cross-scope.md | 4 ++++ .../Statements/general/ModificationOfLocals.expected | 7 ------- 3 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 python/ql/src/change-notes/2026-06-17-modification-of-locals-cross-scope.md diff --git a/python/ql/src/Statements/ModificationOfLocals.ql b/python/ql/src/Statements/ModificationOfLocals.ql index f32ddcf78849..886a49c763e9 100644 --- a/python/ql/src/Statements/ModificationOfLocals.ql +++ b/python/ql/src/Statements/ModificationOfLocals.ql @@ -13,9 +13,19 @@ import python private import semmle.python.ApiGraphs +private import semmle.python.dataflow.new.DataFlow predicate originIsLocals(ControlFlowNode n) { - API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n + // Only consider the `locals()` dictionary within the scope that called `locals()`. + // Once the dictionary is passed to another scope (e.g. as an argument or via an + // instance attribute) it is just an ordinary mapping, and modifying it is both + // meaningful and effective. Restricting to local (intraprocedural) flow ensures we + // only report modifications in the scope where the `locals()` gotcha actually applies. + exists(DataFlow::LocalSourceNode src, DataFlow::Node use | + src = API::builtin("locals").getReturn().asSource() and + src.flowsTo(use) and + use.asCfgNode() = n + ) } predicate modification_of_locals(ControlFlowNode f) { diff --git a/python/ql/src/change-notes/2026-06-17-modification-of-locals-cross-scope.md b/python/ql/src/change-notes/2026-06-17-modification-of-locals-cross-scope.md new file mode 100644 index 000000000000..5a625a95511d --- /dev/null +++ b/python/ql/src/change-notes/2026-06-17-modification-of-locals-cross-scope.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The `py/modification-of-locals` query no longer flags modifications of a `locals()` dictionary that has been passed out of the scope in which `locals()` was called (for example, by passing it to another function or storing it in an instance attribute). In such cases the dictionary is used as an ordinary mapping and modifying it is meaningful, so these were false positives. The "modification has no effect" claim only applies within the scope that called `locals()`, which is now the only case reported. diff --git a/python/ql/test/query-tests/Statements/general/ModificationOfLocals.expected b/python/ql/test/query-tests/Statements/general/ModificationOfLocals.expected index 5575d3930c14..547cec0c3b81 100644 --- a/python/ql/test/query-tests/Statements/general/ModificationOfLocals.expected +++ b/python/ql/test/query-tests/Statements/general/ModificationOfLocals.expected @@ -3,10 +3,3 @@ | test.py:101:5:101:14 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | | test.py:102:9:102:14 | Subscript | Modification of the locals() dictionary will have no effect on the local variables. | | test.py:103:5:103:13 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | -| test.py:206:5:206:11 | Subscript | Modification of the locals() dictionary will have no effect on the local variables. | -| test.py:207:5:207:23 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | -| test.py:208:5:208:15 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | -| test.py:209:9:209:15 | Subscript | Modification of the locals() dictionary will have no effect on the local variables. | -| test.py:210:5:210:14 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | -| test.py:228:9:228:24 | Subscript | Modification of the locals() dictionary will have no effect on the local variables. | -| test.py:229:9:229:35 | Attribute() | Modification of the locals() dictionary will have no effect on the local variables. | From 1f9899d7dbd77fa65583d9c67bd9258c35e77daa Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Wed, 17 Jun 2026 15:04:53 +0100 Subject: [PATCH 16/16] Extend added type tracking step to related types --- ...1-fix-type-tracking-instance-attributes.md | 2 +- .../new/internal/TypeTrackingImpl.qll | 66 ++++++++++++++++--- .../dataflow/typetracking/attribute_tests.py | 47 +++++++++++++ 3 files changed, 105 insertions(+), 10 deletions(-) diff --git a/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md index 25bc0e0f31f9..da7b752ad670 100644 --- a/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md +++ b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md @@ -1,4 +1,4 @@ --- category: minorAnalysis --- -* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods on the same class. As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results. +* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods, including across a class hierarchy (for example, a value stored on `self.attr` in a base class and read in a subclass, or vice versa). As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index b76f8aeb9a57..4d9e95b4c7a3 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -169,6 +169,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */ predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { instanceFieldStep(nodeFrom, nodeTo) + or + inheritedFieldStep(nodeFrom, nodeTo) } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ @@ -366,6 +368,11 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { * the written value to the read reference, for any pair of methods on the class (not * just from `__init__`). * + * Flow across the class hierarchy (a write in one class observed in a method inherited + * from, or contributed by, a related class) is handled separately by + * `inheritedFieldStep`, because resolving superclasses depends on the call graph and so + * cannot appear in this call-graph-independent step. + * * This is an over-approximation: it is instance-insensitive (it does not distinguish * between different instances of the same class) and order-insensitive (it does not * require the write to happen before the read), matching the precision of @@ -380,25 +387,66 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in an instance method of one + * class, and `nodeTo` reads attribute `self.attr` in an instance method of a different + * class that is related to it by inheritance (one is a transitive superclass of the + * other). + * + * This is the cross-hierarchy counterpart of `localFieldStep`: at runtime the receiver + * of both methods may be an instance of the more-derived class, whose behaviour is made + * up of the methods it declares together with those inherited from all of its ancestors. + * It therefore models the common pattern of a base class storing `self.attr` that a + * subclass reads, and vice versa. Resolving the superclass relationship depends on the + * call graph (via `getADirectSuperclass`), so this step is reported as `levelStepCall` + * rather than `levelStepNoCall`. + * + * Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive + * and order-insensitive. + */ + private predicate inheritedFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists( + Class writeCls, Class readCls, string attr, DataFlowPublic::AttrWrite write, + DataFlowPublic::AttrRead read + | + selfAttrRef(writeCls, attr, write) and + nodeFrom = write.getValue() and + selfAttrRef(readCls, attr, read) and + nodeTo = read and + writeCls != readCls and + ( + writeCls = DataFlowDispatch::getADirectSuperclass*(readCls) + or + readCls = DataFlowDispatch::getADirectSuperclass*(writeCls) + ) + ) + } + /** * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a - * class, and `nodeTo` reads attribute `attr` from an instance of the same class outside - * its methods (e.g. `instance.attr`). + * class, and `nodeTo` reads attribute `attr` from an instance of that class (or a + * subclass of it) outside its methods (e.g. `instance.attr`). * * This is the cross-instance counterpart of `localFieldStep`: it relates a write of - * `self.attr` inside the class to a read of `attr` on a reference to an instance of the - * class. Identifying instances relies on the call graph (via `classInstanceTracker`), so - * this step is reported as `levelStepCall` rather than `levelStepNoCall`. + * `self.attr` inside a class to a read of `attr` on a reference to an instance of that + * class or one of its subclasses. Identifying instances relies on the call graph (via + * `classInstanceTracker`), so this step is reported as `levelStepCall` rather than + * `levelStepNoCall`. The write may occur in the instance's own class or in any of its + * superclasses, since those methods are inherited. * * Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive * and order-insensitive. */ private predicate instanceFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { - exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read | - selfAttrRef(cls, attr, write) and + exists( + Class writeCls, Class instanceCls, string attr, DataFlowPublic::AttrWrite write, + DataFlowPublic::AttrRead read + | + selfAttrRef(writeCls, attr, write) and nodeFrom = write.getValue() and - instanceAttrRead(cls, attr, read) and - nodeTo = read + instanceAttrRead(instanceCls, attr, read) and + nodeTo = read and + writeCls = DataFlowDispatch::getADirectSuperclass*(instanceCls) ) } diff --git a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py index 55f2edcac2ad..b6bca72507f6 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py @@ -177,3 +177,50 @@ def possibly_uncalled_method(self): # $ MISSING: tracked=foo instance.print_self() # $ tracked=foo instance.foo = tracked # $ tracked=foo tracked instance.print_foo() # $ tracked=foo + + +# ------------------------------------------------------------------------------ +# Tracking of instance attribute across the class hierarchy +# ------------------------------------------------------------------------------ + +# attribute written in a base class method, read in a subclass method + +class Base1(object): + def __init__(self): # $ tracked=foo + self.foo = tracked # $ tracked=foo tracked + +class Sub1(Base1): + def read_foo(self): # $ MISSING: tracked=foo + print(self.foo) # $ tracked MISSING: tracked=foo + +sub1 = Sub1() +sub1.read_foo() +print(sub1.foo) # $ tracked MISSING: tracked=foo + + +# attribute written in a subclass method, read in an inherited base class method + +class Base2(object): + def read_bar(self): # $ MISSING: tracked=bar + print(self.bar) # $ tracked MISSING: tracked=bar + +class Sub2(Base2): + def __init__(self): # $ tracked=bar + self.bar = tracked # $ tracked=bar tracked + +sub2 = Sub2() +sub2.read_bar() +print(sub2.bar) # $ tracked MISSING: tracked=bar + + +# attribute written in a base class method, read on an instance of the subclass + +class Base3(object): + def __init__(self): # $ tracked=baz + self.baz = tracked # $ tracked=baz tracked + +class Sub3(Base3): + pass + +sub3 = Sub3() +print(sub3.baz) # $ tracked MISSING: tracked=baz