diff --git a/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md new file mode 100644 index 000000000000..da7b752ad670 --- /dev/null +++ b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods, including across a class hierarchy (for example, a value stored on `self.attr` in a base class and read in a subclass, or vice versa). As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/ReExposedInstance.qll b/python/ql/lib/semmle/python/dataflow/new/internal/ReExposedInstance.qll new file mode 100644 index 000000000000..b080610f512a --- /dev/null +++ b/python/ql/lib/semmle/python/dataflow/new/internal/ReExposedInstance.qll @@ -0,0 +1,46 @@ +/** + * Provides a parameterized module for identifying values that instance-attribute type tracking + * has re-exposed (laundered) out of an instance attribute, rather than freshly created. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow + +/** Holds if `node` should be treated as an instance source. */ +signature predicate instanceNodeSig(DataFlow::Node node); + +/** + * Provides a predicate for identifying values that instance-attribute type tracking has + * re-exposed (laundered) out of an instance attribute, rather than freshly created. + * + * Instance-attribute type tracking can flow a value into an instance attribute and back out at + * a later attribute read, for example `BufferedRWPair.reader` or `FileIO.fileno` returning + * `self._fd`. Such a re-exposed value is owned by the enclosing instance and is not a fresh + * resource; queries that reason about resource creation or lifetime should not treat it as one. + * + * The parameter `isInstance` defines which nodes count as instance sources (typically the result + * of a class- or resource-instance type tracker). + */ +module ReExposedInstance { + /** + * Holds if `read` is an attribute read that re-exposes an instance held in an instance + * attribute. + */ + private predicate launderedAttrRead(DataFlow::AttrRead read) { isInstance(read) } + + /** Type tracking forward from an attribute read that re-exposes an instance held in a field. */ + private DataFlow::TypeTrackingNode launderedInstance(DataFlow::TypeTracker t) { + t.start() and + launderedAttrRead(result) + or + exists(DataFlow::TypeTracker t2 | result = launderedInstance(t2).track(t2, t)) + } + + /** + * Holds if `node` is a value that has been re-exposed (laundered) out of an instance attribute, + * rather than being a freshly created instance. + */ + predicate isReExposed(DataFlow::Node node) { + launderedInstance(DataFlow::TypeTracker::end()).flowsTo(node) + } +} diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 215c7906e655..4d9e95b4c7a3 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -167,11 +167,17 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */ - predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { none() } + predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { + instanceFieldStep(nodeFrom, nodeTo) + or + inheritedFieldStep(nodeFrom, nodeTo) + } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) { TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo) + or + localFieldStep(nodeFrom, nodeTo) } /** @@ -317,6 +323,133 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `ref` accesses attribute `attr` of `self`, where `self` is the first + * parameter of an instance method of `cls` (i.e. an access of the form `self.attr`). + * + * Static methods and class methods are excluded, since their first parameter is not a + * `self` instance reference. + */ + private predicate selfAttrRef(Class cls, string attr, DataFlowPublic::AttrRef ref) { + exists(Function method, Name selfUse | + method = cls.getAMethod() and + not DataFlowDispatch::isStaticmethod(method) and + not DataFlowDispatch::isClassmethod(method) and + selfUse.getVariable() = method.getArg(0).(Name).getVariable() and + ref.getObject().asCfgNode().getNode() = selfUse and + ref.mayHaveAttributeName(attr) + ) + } + + /** + * Holds if `read` reads attribute `attr` from an instance of `cls`, where the instance + * is referred to from outside the methods of `cls` (i.e. an access of the form + * `instance.attr`, where `instance` is a reference to an instance of `cls`). + * + * This complements `selfAttrRef`, which only handles `self.attr` accesses inside the + * methods of `cls`. Unlike `selfAttrRef`, this depends on the call graph (via + * `classInstanceTracker`), so steps using it must be reported as `levelStepCall`. + */ + private predicate instanceAttrRead(Class cls, string attr, DataFlowPublic::AttrRead read) { + read.getObject() = DataFlowDispatch::classInstanceTracker(cls) and + read.mayHaveAttributeName(attr) + } + + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a + * class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance + * method of the same class. + * + * This models flow through instance attributes (`self.foo`): a value stored into + * `self.foo` in one method can be read from `self.foo` in another method. Type-tracking + * handles the store and read steps via `AttrWrite`/`AttrRead`, but on its own it cannot + * relate the `self` of the writing method to the `self` of the reading method. Following + * the approach used for Ruby and JavaScript, we model this directly as a level step from + * the written value to the read reference, for any pair of methods on the class (not + * just from `__init__`). + * + * Flow across the class hierarchy (a write in one class observed in a method inherited + * from, or contributed by, a related class) is handled separately by + * `inheritedFieldStep`, because resolving superclasses depends on the call graph and so + * cannot appear in this call-graph-independent step. + * + * This is an over-approximation: it is instance-insensitive (it does not distinguish + * between different instances of the same class) and order-insensitive (it does not + * require the write to happen before the read), matching the precision of + * instance-attribute handling for Ruby and JavaScript. + */ + private predicate localFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read | + selfAttrRef(cls, attr, write) and + nodeFrom = write.getValue() and + selfAttrRef(cls, attr, read) and + nodeTo = read + ) + } + + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in an instance method of one + * class, and `nodeTo` reads attribute `self.attr` in an instance method of a different + * class that is related to it by inheritance (one is a transitive superclass of the + * other). + * + * This is the cross-hierarchy counterpart of `localFieldStep`: at runtime the receiver + * of both methods may be an instance of the more-derived class, whose behaviour is made + * up of the methods it declares together with those inherited from all of its ancestors. + * It therefore models the common pattern of a base class storing `self.attr` that a + * subclass reads, and vice versa. Resolving the superclass relationship depends on the + * call graph (via `getADirectSuperclass`), so this step is reported as `levelStepCall` + * rather than `levelStepNoCall`. + * + * Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive + * and order-insensitive. + */ + private predicate inheritedFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists( + Class writeCls, Class readCls, string attr, DataFlowPublic::AttrWrite write, + DataFlowPublic::AttrRead read + | + selfAttrRef(writeCls, attr, write) and + nodeFrom = write.getValue() and + selfAttrRef(readCls, attr, read) and + nodeTo = read and + writeCls != readCls and + ( + writeCls = DataFlowDispatch::getADirectSuperclass*(readCls) + or + readCls = DataFlowDispatch::getADirectSuperclass*(writeCls) + ) + ) + } + + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a + * class, and `nodeTo` reads attribute `attr` from an instance of that class (or a + * subclass of it) outside its methods (e.g. `instance.attr`). + * + * This is the cross-instance counterpart of `localFieldStep`: it relates a write of + * `self.attr` inside a class to a read of `attr` on a reference to an instance of that + * class or one of its subclasses. Identifying instances relies on the call graph (via + * `classInstanceTracker`), so this step is reported as `levelStepCall` rather than + * `levelStepNoCall`. The write may occur in the instance's own class or in any of its + * superclasses, since those methods are inherited. + * + * Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive + * and order-insensitive. + */ + private predicate instanceFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists( + Class writeCls, Class instanceCls, string attr, DataFlowPublic::AttrWrite write, + DataFlowPublic::AttrRead read + | + selfAttrRef(writeCls, attr, write) and + nodeFrom = write.getValue() and + instanceAttrRead(instanceCls, attr, read) and + nodeTo = read and + writeCls = DataFlowDispatch::getADirectSuperclass*(instanceCls) + ) + } + /** * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. */ diff --git a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll index 9d91e4f523c2..2dfda7044d92 100644 --- a/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll +++ b/python/ql/src/Resources/FileNotAlwaysClosedQuery.qll @@ -3,6 +3,7 @@ import python import semmle.python.dataflow.new.internal.DataFlowDispatch import semmle.python.ApiGraphs +private import semmle.python.dataflow.new.internal.ReExposedInstance /** A CFG node where a file is opened. */ abstract class FileOpenSource extends DataFlow::CfgNode { } @@ -21,12 +22,28 @@ private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) { exists(DataFlow::TypeTracker t2 | result = fileOpenInstance(t2).track(t2, t)) } +/** + * Holds if `node` is tracked to be an instance of an open file object. + */ +private predicate fileInstanceNode(DataFlow::Node node) { + fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(node) +} + +private module FileReExposed = ReExposedInstance; + /** * A call that returns an instance of an open file object. * This includes calls to methods that transitively call `open` or similar. */ class FileOpen extends DataFlow::CallCfgNode { - FileOpen() { fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) } + FileOpen() { + fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) and + // Don't treat an accessor that merely re-exposes a file held in an instance attribute + // (e.g. `FileIO.fileno` returning `self._fd`) as opening a new file. Such flow is + // introduced by instance-attribute type tracking; the underlying open is tracked at its + // real creation site. + not FileReExposed::isReExposed(this) + } } /** A call that may wrap a file object in a wrapper class or `os.fdopen`. */ diff --git a/python/ql/src/Statements/ModificationOfLocals.ql b/python/ql/src/Statements/ModificationOfLocals.ql index f32ddcf78849..886a49c763e9 100644 --- a/python/ql/src/Statements/ModificationOfLocals.ql +++ b/python/ql/src/Statements/ModificationOfLocals.ql @@ -13,9 +13,19 @@ import python private import semmle.python.ApiGraphs +private import semmle.python.dataflow.new.DataFlow predicate originIsLocals(ControlFlowNode n) { - API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n + // Only consider the `locals()` dictionary within the scope that called `locals()`. + // Once the dictionary is passed to another scope (e.g. as an argument or via an + // instance attribute) it is just an ordinary mapping, and modifying it is both + // meaningful and effective. Restricting to local (intraprocedural) flow ensures we + // only report modifications in the scope where the `locals()` gotcha actually applies. + exists(DataFlow::LocalSourceNode src, DataFlow::Node use | + src = API::builtin("locals").getReturn().asSource() and + src.flowsTo(use) and + use.asCfgNode() = n + ) } predicate modification_of_locals(ControlFlowNode f) { diff --git a/python/ql/src/Statements/ShouldUseWithStatement.ql b/python/ql/src/Statements/ShouldUseWithStatement.ql index 20bf053f6daa..8ead51fa6b34 100644 --- a/python/ql/src/Statements/ShouldUseWithStatement.ql +++ b/python/ql/src/Statements/ShouldUseWithStatement.ql @@ -13,7 +13,9 @@ */ import python +private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.dataflow.new.internal.ReExposedInstance predicate calls_close(Call c) { exists(Attribute a | c.getFunc() = a and a.getName() = "close") } @@ -23,11 +25,21 @@ predicate only_stmt_in_finally(Try t, Call c) { ) } -from Call close, Try t, Class cls +/** Holds if `node` is tracked to be an instance of some class. */ +private predicate classInstanceNode(DataFlow::Node node) { node = classInstanceTracker(_) } + +private module ClassReExposed = ReExposedInstance; + +from Call close, Try t, Class cls, DataFlow::Node closeTarget where only_stmt_in_finally(t, close) and calls_close(close) and - classInstanceTracker(cls).asExpr() = close.getFunc().(Attribute).getObject() and + closeTarget.asExpr() = close.getFunc().(Attribute).getObject() and + closeTarget = classInstanceTracker(cls) and + // Don't report closing a resource that is held in an instance attribute (e.g. `self.reader`). + // Such flow is introduced by instance-attribute type tracking; the object's lifetime is tied + // to the enclosing instance and cannot be expressed with a `with` statement. + not ClassReExposed::isReExposed(closeTarget) and DuckTyping::isContextManager(cls) select close, "Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.", diff --git a/python/ql/src/change-notes/2026-06-17-modification-of-locals-cross-scope.md b/python/ql/src/change-notes/2026-06-17-modification-of-locals-cross-scope.md new file mode 100644 index 000000000000..5a625a95511d --- /dev/null +++ b/python/ql/src/change-notes/2026-06-17-modification-of-locals-cross-scope.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The `py/modification-of-locals` query no longer flags modifications of a `locals()` dictionary that has been passed out of the scope in which `locals()` was called (for example, by passing it to another function or storing it in an instance attribute). In such cases the dictionary is used as an ordinary mapping and modifying it is meaningful, so these were false positives. The "modification has no effect" claim only applies within the scope that called `locals()`, which is now the only case reported. diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index b353309e852f..1cd62c6de347 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -1,9 +1,6 @@ testFailures debug_callableNotUnique pointsTo_found_typeTracker_notFound -| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func | -| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func | -| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method | | code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def | | code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar | | code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo | diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py index 605375925f72..714e27dba1a0 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py @@ -7,8 +7,8 @@ def __init__(self, func): self.direct_ref = my_func def later(self): - self.indirect_ref() # $ pt=my_func MISSING: tt=my_func - self.direct_ref() # $ pt=my_func MISSING: tt=my_func + self.indirect_ref() # $ pt=my_func tt=my_func + self.direct_ref() # $ pt=my_func tt=my_func foo = Foo(my_func) # $ tt=Foo.__init__ foo.later() # $ pt,tt=Foo.later @@ -23,7 +23,7 @@ def __init__(self): self.obj = DummyObject() def later(self): - self.obj.method() # $ pt=DummyObject.method MISSING: tt=DummyObject.method + self.obj.method() # $ pt=DummyObject.method tt=DummyObject.method bar = Bar(my_func) # $ tt=Bar.__init__ diff --git a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py index c49cdf77fcdf..b6bca72507f6 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py @@ -151,13 +151,13 @@ def __init__(self): # $ tracked=foo self.foo = tracked # $ tracked=foo tracked def print_foo(self): # $ MISSING: tracked=foo - print(self.foo) # $ MISSING: tracked=foo tracked + print(self.foo) # $ tracked MISSING: tracked=foo def possibly_uncalled_method(self): # $ MISSING: tracked=foo - print(self.foo) # $ MISSING: tracked=foo tracked + print(self.foo) # $ tracked MISSING: tracked=foo instance = MyClass2() -print(instance.foo) # $ MISSING: tracked=foo tracked +print(instance.foo) # $ tracked MISSING: tracked=foo instance.print_foo() # $ MISSING: tracked=foo @@ -177,3 +177,50 @@ def possibly_uncalled_method(self): # $ MISSING: tracked=foo instance.print_self() # $ tracked=foo instance.foo = tracked # $ tracked=foo tracked instance.print_foo() # $ tracked=foo + + +# ------------------------------------------------------------------------------ +# Tracking of instance attribute across the class hierarchy +# ------------------------------------------------------------------------------ + +# attribute written in a base class method, read in a subclass method + +class Base1(object): + def __init__(self): # $ tracked=foo + self.foo = tracked # $ tracked=foo tracked + +class Sub1(Base1): + def read_foo(self): # $ MISSING: tracked=foo + print(self.foo) # $ tracked MISSING: tracked=foo + +sub1 = Sub1() +sub1.read_foo() +print(sub1.foo) # $ tracked MISSING: tracked=foo + + +# attribute written in a subclass method, read in an inherited base class method + +class Base2(object): + def read_bar(self): # $ MISSING: tracked=bar + print(self.bar) # $ tracked MISSING: tracked=bar + +class Sub2(Base2): + def __init__(self): # $ tracked=bar + self.bar = tracked # $ tracked=bar tracked + +sub2 = Sub2() +sub2.read_bar() +print(sub2.bar) # $ tracked MISSING: tracked=bar + + +# attribute written in a base class method, read on an instance of the subclass + +class Base3(object): + def __init__(self): # $ tracked=baz + self.baz = tracked # $ tracked=baz tracked + +class Sub3(Base3): + pass + +sub3 = Sub3() +print(sub3.baz) # $ tracked MISSING: tracked=baz diff --git a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py index 713f15cb6d4f..0c6c39086482 100644 --- a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py +++ b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py @@ -7,3 +7,30 @@ cursor.executemany("some sql", (42,)) # $ getSql="some sql" cursor.close() + + +# Connection stored in a class attribute (`self._conn`) and used in another method. +# +# This is detected because type tracking includes a level step modelling flow through +# instance attributes: a value written to `self._conn` in one method (here `__init__`) can +# be read back from `self._conn` (directly or via a getter) in any other method on the same +# class. This follows the same approach used for instance fields in Ruby and JavaScript. +class Database: + def __init__(self): + self._conn = dbapi.connect(address="hostname", port=300, user="username") + + def get_connection(self): + return self._conn + + def run_via_getter(self): + conn = self.get_connection() + cursor = conn.cursor() + cursor.execute("getter sql") # $ getSql="getter sql" + + def run_direct(self): + self._conn.execute("direct sql") # $ getSql="direct sql" + + +db = Database() +db.run_via_getter() +db.run_direct() diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected index 7f48feb72eb9..b7d9d37785b0 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/FileNotAlwaysClosed.expected @@ -1,9 +1,9 @@ -| resources_test.py:4:10:4:25 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:5:5:5:33 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:9:10:9:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | -| resources_test.py:108:11:108:20 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | -| resources_test.py:112:11:112:28 | ControlFlowNode for opener_func2() | File may not be closed if $@ raises an exception. | resources_test.py:113:5:113:22 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:123:11:123:24 | ControlFlowNode for opener_func2() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | -| resources_test.py:129:15:129:24 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:130:9:130:26 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:248:11:248:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | -| resources_test.py:269:10:269:27 | ControlFlowNode for Attribute() | File may not be closed if $@ raises an exception. | resources_test.py:271:5:271:19 | ControlFlowNode for Attribute() | this operation | -| resources_test.py:285:11:285:20 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:287:5:287:31 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:6:10:6:25 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:7:5:7:33 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:11:10:11:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | +| resources_test.py:110:11:110:20 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | +| resources_test.py:114:11:114:28 | ControlFlowNode for opener_func2() | File may not be closed if $@ raises an exception. | resources_test.py:115:5:115:22 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:125:11:125:24 | ControlFlowNode for opener_func2() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | +| resources_test.py:131:15:131:24 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:132:9:132:26 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:250:11:250:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation | +| resources_test.py:271:10:271:27 | ControlFlowNode for Attribute() | File may not be closed if $@ raises an exception. | resources_test.py:273:5:273:19 | ControlFlowNode for Attribute() | this operation | +| resources_test.py:287:11:287:20 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:289:5:289:31 | ControlFlowNode for Attribute() | this operation | diff --git a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py index f4bd33eb12cd..57568a6eb118 100644 --- a/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py +++ b/python/ql/test/query-tests/Resources/FileNotAlwaysClosed/resources_test.py @@ -1,5 +1,7 @@ #File not always closed +import os + def not_close1(): f1 = open("filename") # $ Alert # not closed on exception f1.write("Error could occur") @@ -332,3 +334,30 @@ def closed32(path): # due to a check that an operation is lexically contained within a `with` block (with `expr.getParent*()`) # not detecting this case. return list(wrap.read()) + + +class FdHolder33(): + # Mirrors CPython's `_pyio.FileIO`: it opens a file descriptor with `os.open`, + # stores it in an instance attribute, and exposes it again via `fileno()`. + def __init__(self, path): + fd = os.open(path, os.O_RDONLY) + self._fd = fd + + def fileno(self): + return self._fd + + def close(self): + os.close(self._fd) + +def closed33(path): + # Regression test mirroring CPython's `_pyio.open`. + # `holder.fileno()` merely returns the existing file descriptor; it does not + # open a new resource. With instance-attribute type tracking, the `os.open` + # source flows through `self._fd` and back out of `fileno()`. The query must + # not treat that re-exposed descriptor as a fresh file-open whose result is + # never closed. The descriptor is owned and closed by `holder.close()`. + holder = FdHolder33(path) + try: + n = holder.fileno() # No alert: this re-exposes an existing descriptor, not a new open. + finally: + holder.close() diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected index 9ff8b1d718c1..8f60394d8a2b 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected @@ -1,4 +1,36 @@ +#select +| app.py:23:20:23:24 | ControlFlowNode for query | app.py:20:18:20:21 | ControlFlowNode for name | app.py:23:20:23:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:20:18:20:21 | ControlFlowNode for name | user-provided value | +| app.py:30:20:30:24 | ControlFlowNode for query | app.py:27:19:27:22 | ControlFlowNode for name | app.py:30:20:30:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:27:19:27:22 | ControlFlowNode for name | user-provided value | +| app.py:37:20:37:24 | ControlFlowNode for query | app.py:34:19:34:22 | ControlFlowNode for name | app.py:37:20:37:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:34:19:34:22 | ControlFlowNode for name | user-provided value | +| app.py:44:20:44:24 | ControlFlowNode for query | app.py:41:19:41:22 | ControlFlowNode for name | app.py:44:20:44:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:41:19:41:22 | ControlFlowNode for name | user-provided value | +| app.py:51:20:51:24 | ControlFlowNode for query | app.py:48:19:48:22 | ControlFlowNode for name | app.py:51:20:51:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:48:19:48:22 | ControlFlowNode for name | user-provided value | +| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | edges +| app.py:20:18:20:21 | ControlFlowNode for name | app.py:21:5:21:9 | ControlFlowNode for query | provenance | | +| app.py:21:5:21:9 | ControlFlowNode for query | app.py:23:20:23:24 | ControlFlowNode for query | provenance | | +| app.py:27:19:27:22 | ControlFlowNode for name | app.py:28:5:28:9 | ControlFlowNode for query | provenance | | +| app.py:28:5:28:9 | ControlFlowNode for query | app.py:30:20:30:24 | ControlFlowNode for query | provenance | | +| app.py:34:19:34:22 | ControlFlowNode for name | app.py:35:5:35:9 | ControlFlowNode for query | provenance | | +| app.py:35:5:35:9 | ControlFlowNode for query | app.py:37:20:37:24 | ControlFlowNode for query | provenance | | +| app.py:41:19:41:22 | ControlFlowNode for name | app.py:42:5:42:9 | ControlFlowNode for query | provenance | | +| app.py:42:5:42:9 | ControlFlowNode for query | app.py:44:20:44:24 | ControlFlowNode for query | provenance | | +| app.py:48:19:48:22 | ControlFlowNode for name | app.py:49:5:49:9 | ControlFlowNode for query | provenance | | +| app.py:49:5:49:9 | ControlFlowNode for query | app.py:51:20:51:24 | ControlFlowNode for query | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | provenance | | @@ -16,6 +48,21 @@ edges | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | provenance | | | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | provenance | | nodes +| app.py:20:18:20:21 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:21:5:21:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:23:20:23:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:27:19:27:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:28:5:28:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:30:20:30:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:34:19:34:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:35:5:35:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:37:20:37:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:41:19:41:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:42:5:42:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:44:20:44:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:48:19:48:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:49:5:49:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:51:20:51:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | @@ -35,20 +82,3 @@ nodes | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | subpaths -#select -| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref index d1d02cbe8d37..444c0e5f46aa 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref @@ -1 +1,2 @@ -Security/CWE-089/SqlInjection.ql +query: Security/CWE-089/SqlInjection.ql +postprocess: utils/test/InlineExpectationsTestQuery.ql diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py new file mode 100644 index 000000000000..4de61346d8f5 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py @@ -0,0 +1,52 @@ +from fastapi import FastAPI +from hdbcli import dbapi +from db_connection import get_conn +from db_connection import hdb_con +from db_connection import hdb_con2 +from db_connection import hdb_con3 +app = FastAPI() + +class DatabaseConnection: + + def __init__(self): + self._conn = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + def get_conn(self): + return self._conn + +db_connection = DatabaseConnection() + +@app.get("/unsafe1/") +async def unsafe(name: str): # $ Source + query = "select * from users where name=" + name + cursor = hdb_con.cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe2/") +async def unsafe2(name: str): # $ Source + query = "select * from users where name=" + name + cursor = hdb_con2.cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe3/") +async def unsafe3(name: str): # $ Source + query = "select * from users where name=" + name + cursor = hdb_con3.cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe4/") +async def unsafe4(name: str): # $ Source + query = "select * from users where name=" + name + cursor = get_conn().cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe5/") +async def unsafe5(name: str): # $ Source + query = "select * from users where name=" + name + cursor = db_connection.get_conn().cursor() + cursor.execute(query) # $ Alert + cursor.close() diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py new file mode 100644 index 000000000000..b05a43bdebba --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py @@ -0,0 +1,24 @@ +from hdbcli import dbapi +from typing import Optional + +hdb_con = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + +class DatabaseConnection: + + def __init__(self): + self._conn = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + def get_conn(self): + return self._conn + + +hdb_con2 = DatabaseConnection().get_conn() +hdb_con3 = DatabaseConnection()._conn + +_hana_connection: Optional[DatabaseConnection] = None +def get_conn(): + global _hana_connection + if _hana_connection is None: + _hana_connection = DatabaseConnection() + return _hana_connection.get_conn() diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py index c79bee16cb21..52aa3169616e 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py @@ -11,19 +11,19 @@ class User(models.Model): pass @app.route("/users/") -def show_user(username): +def show_user(username): # $ Source with connection.cursor() as cursor: # GOOD -- Using parameters cursor.execute("SELECT * FROM users WHERE username = %s", username) User.objects.raw("SELECT * FROM users WHERE username = %s", (username,)) # BAD -- Using string formatting - cursor.execute("SELECT * FROM users WHERE username = '%s'" % username) + cursor.execute("SELECT * FROM users WHERE username = '%s'" % username) # $ Alert # BAD -- other ways of executing raw SQL code with string interpolation - User.objects.annotate(RawSQL("insert into names_file ('name') values ('%s')" % username)) - User.objects.raw("insert into names_file ('name') values ('%s')" % username) - User.objects.extra("insert into names_file ('name') values ('%s')" % username) + User.objects.annotate(RawSQL("insert into names_file ('name') values ('%s')" % username)) # $ Alert + User.objects.raw("insert into names_file ('name') values ('%s')" % username) # $ Alert + User.objects.extra("insert into names_file ('name') values ('%s')" % username) # $ Alert # BAD (but currently no custom query to find this) # diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py index a54d64517d42..f35b1325366c 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py @@ -20,15 +20,15 @@ class User(Base): @app.route("/users/") -def show_user(username): +def show_user(username): # $ Source session = sqlalchemy.orm.Session(engine) # BAD, normal SQL injection - stmt = sqlalchemy.text("SELECT * FROM users WHERE username = '{}'".format(username)) + stmt = sqlalchemy.text("SELECT * FROM users WHERE username = '{}'".format(username)) # $ Alert results = session.execute(stmt).fetchall() # BAD, allows SQL injection - username_formatted_for_sql = sqlalchemy.text("'{}'".format(username)) + username_formatted_for_sql = sqlalchemy.text("'{}'".format(username)) # $ Alert stmt = sqlalchemy.select(User).where(User.username == username_formatted_for_sql) results = session.execute(stmt).scalars().all() @@ -38,14 +38,14 @@ def show_user(username): # All of these should be flagged by query - t1 = sqlalchemy.text(username) - t2 = sqlalchemy.text(text=username) - t3 = sqlalchemy.sql.text(username) - t4 = sqlalchemy.sql.text(text=username) - t5 = sqlalchemy.sql.expression.text(username) - t6 = sqlalchemy.sql.expression.text(text=username) - t7 = sqlalchemy.sql.expression.TextClause(username) - t8 = sqlalchemy.sql.expression.TextClause(text=username) - - t9 = db.text(username) - t10 = db.text(text=username) + t1 = sqlalchemy.text(username) # $ Alert + t2 = sqlalchemy.text(text=username) # $ Alert + t3 = sqlalchemy.sql.text(username) # $ Alert + t4 = sqlalchemy.sql.text(text=username) # $ Alert + t5 = sqlalchemy.sql.expression.text(username) # $ Alert + t6 = sqlalchemy.sql.expression.text(text=username) # $ Alert + t7 = sqlalchemy.sql.expression.TextClause(username) # $ Alert + t8 = sqlalchemy.sql.expression.TextClause(text=username) # $ Alert + + t9 = db.text(username) # $ Alert + t10 = db.text(text=username) # $ Alert diff --git a/python/ql/test/query-tests/Statements/general/test.py b/python/ql/test/query-tests/Statements/general/test.py index a5848f7c718d..eb38acd89b1d 100644 --- a/python/ql/test/query-tests/Statements/general/test.py +++ b/python/ql/test/query-tests/Statements/general/test.py @@ -167,6 +167,22 @@ def no_with(): finally: f.close() +# Should not use a 'with' statement here: the resource is held in an instance +# attribute, so its lifetime spans the enclosing instance and cannot be expressed +# with a 'with' statement. Instance-attribute type tracking can launder the +# instance out of the field, but this must not be reported. +class HoldsCM(object): + + def __init__(self): + self.f = CM() + + def no_with_attribute(self): + try: + self.f.write("Hello ") + self.f.write(" World\n") + finally: + self.f.close() # No alert: re-exposes a field, not a local resource. + #Assert without side-effect def assert_ok(seq): assert all(isinstance(element, (str, unicode)) for element in seq) @@ -180,3 +196,41 @@ class false_positive: class MyClass: locals()['x'] = 43 # OK y = x + + +# Once a `locals()` dictionary is passed out of the scope that created it, it is +# just an ordinary mapping. Modifying it in a different scope is meaningful and +# effective, so these modifications must NOT be flagged: the "no effect on local +# variables" gotcha only applies within the scope that called `locals()`. +def modify_passed_dict(ns): + ns['k'] = 1 # OK: `ns` is a parameter here, not this scope's locals() + ns.update({'j': 2}) # OK + ns.pop('k') # OK + del ns['j'] # OK + ns.clear() # OK + + +def pass_locals_to_function(): + y = 1 + modify_passed_dict(locals()) + return y + + +# The same situation, but where the `locals()` dictionary is laundered through an +# instance attribute (as instance-attribute type tracking now models). These must +# also not be flagged. +class NamespaceHolder(object): + + def __init__(self, ns): + self.ns = ns + + def populate(self): + self.ns['extra'] = 1 # OK: different scope from the `locals()` call + self.ns.update({'more': 2}) # OK + + +def launder_locals_through_instance(): + x = 1 + holder = NamespaceHolder(locals()) + holder.populate() + return x