Skip to content

Commit e618883

Browse files
authored
Merge pull request #21969 from github/copilot/investigate-missing-alerts
Python: Track instance attributes through type tracking
2 parents 3dd3e2c + c7c1eca commit e618883

20 files changed

Lines changed: 547 additions & 60 deletions

File tree

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods, including across a class hierarchy (for example, a value stored on `self.attr` in a base class and read in a subclass, or vice versa). As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/**
2+
* Provides a parameterized module for identifying values that instance-attribute type tracking
3+
* has re-exposed (laundered) out of an instance attribute, rather than freshly created.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
9+
/** Holds if `node` should be treated as an instance source. */
10+
signature predicate instanceNodeSig(DataFlow::Node node);
11+
12+
/**
13+
* Provides a predicate for identifying values that instance-attribute type tracking has
14+
* re-exposed (laundered) out of an instance attribute, rather than freshly created.
15+
*
16+
* Instance-attribute type tracking can flow a value into an instance attribute and back out at
17+
* a later attribute read, for example `BufferedRWPair.reader` or `FileIO.fileno` returning
18+
* `self._fd`. Such a re-exposed value is owned by the enclosing instance and is not a fresh
19+
* resource; queries that reason about resource creation or lifetime should not treat it as one.
20+
*
21+
* The parameter `isInstance` defines which nodes count as instance sources (typically the result
22+
* of a class- or resource-instance type tracker).
23+
*/
24+
module ReExposedInstance<instanceNodeSig/1 isInstance> {
25+
/**
26+
* Holds if `read` is an attribute read that re-exposes an instance held in an instance
27+
* attribute.
28+
*/
29+
private predicate launderedAttrRead(DataFlow::AttrRead read) { isInstance(read) }
30+
31+
/** Type tracking forward from an attribute read that re-exposes an instance held in a field. */
32+
private DataFlow::TypeTrackingNode launderedInstance(DataFlow::TypeTracker t) {
33+
t.start() and
34+
launderedAttrRead(result)
35+
or
36+
exists(DataFlow::TypeTracker t2 | result = launderedInstance(t2).track(t2, t))
37+
}
38+
39+
/**
40+
* Holds if `node` is a value that has been re-exposed (laundered) out of an instance attribute,
41+
* rather than being a freshly created instance.
42+
*/
43+
predicate isReExposed(DataFlow::Node node) {
44+
launderedInstance(DataFlow::TypeTracker::end()).flowsTo(node)
45+
}
46+
}

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll

Lines changed: 134 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,17 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
167167
}
168168

169169
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
170-
predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { none() }
170+
predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) {
171+
instanceFieldStep(nodeFrom, nodeTo)
172+
or
173+
inheritedFieldStep(nodeFrom, nodeTo)
174+
}
171175

172176
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */
173177
predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) {
174178
TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo)
179+
or
180+
localFieldStep(nodeFrom, nodeTo)
175181
}
176182

177183
/**
@@ -317,6 +323,133 @@ module TypeTrackingInput implements Shared::TypeTrackingInput<Location> {
317323
)
318324
}
319325

326+
/**
327+
* Holds if `ref` accesses attribute `attr` of `self`, where `self` is the first
328+
* parameter of an instance method of `cls` (i.e. an access of the form `self.attr`).
329+
*
330+
* Static methods and class methods are excluded, since their first parameter is not a
331+
* `self` instance reference.
332+
*/
333+
private predicate selfAttrRef(Class cls, string attr, DataFlowPublic::AttrRef ref) {
334+
exists(Function method, Name selfUse |
335+
method = cls.getAMethod() and
336+
not DataFlowDispatch::isStaticmethod(method) and
337+
not DataFlowDispatch::isClassmethod(method) and
338+
selfUse.getVariable() = method.getArg(0).(Name).getVariable() and
339+
ref.getObject().asCfgNode().getNode() = selfUse and
340+
ref.mayHaveAttributeName(attr)
341+
)
342+
}
343+
344+
/**
345+
* Holds if `read` reads attribute `attr` from an instance of `cls`, where the instance
346+
* is referred to from outside the methods of `cls` (i.e. an access of the form
347+
* `instance.attr`, where `instance` is a reference to an instance of `cls`).
348+
*
349+
* This complements `selfAttrRef`, which only handles `self.attr` accesses inside the
350+
* methods of `cls`. Unlike `selfAttrRef`, this depends on the call graph (via
351+
* `classInstanceTracker`), so steps using it must be reported as `levelStepCall`.
352+
*/
353+
private predicate instanceAttrRead(Class cls, string attr, DataFlowPublic::AttrRead read) {
354+
read.getObject() = DataFlowDispatch::classInstanceTracker(cls) and
355+
read.mayHaveAttributeName(attr)
356+
}
357+
358+
/**
359+
* Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a
360+
* class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance
361+
* method of the same class.
362+
*
363+
* This models flow through instance attributes (`self.foo`): a value stored into
364+
* `self.foo` in one method can be read from `self.foo` in another method. Type-tracking
365+
* handles the store and read steps via `AttrWrite`/`AttrRead`, but on its own it cannot
366+
* relate the `self` of the writing method to the `self` of the reading method. Following
367+
* the approach used for Ruby and JavaScript, we model this directly as a level step from
368+
* the written value to the read reference, for any pair of methods on the class (not
369+
* just from `__init__`).
370+
*
371+
* Flow across the class hierarchy (a write in one class observed in a method inherited
372+
* from, or contributed by, a related class) is handled separately by
373+
* `inheritedFieldStep`, because resolving superclasses depends on the call graph and so
374+
* cannot appear in this call-graph-independent step.
375+
*
376+
* This is an over-approximation: it is instance-insensitive (it does not distinguish
377+
* between different instances of the same class) and order-insensitive (it does not
378+
* require the write to happen before the read), matching the precision of
379+
* instance-attribute handling for Ruby and JavaScript.
380+
*/
381+
private predicate localFieldStep(Node nodeFrom, LocalSourceNode nodeTo) {
382+
exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read |
383+
selfAttrRef(cls, attr, write) and
384+
nodeFrom = write.getValue() and
385+
selfAttrRef(cls, attr, read) and
386+
nodeTo = read
387+
)
388+
}
389+
390+
/**
391+
* Holds if `nodeFrom` is written to attribute `self.attr` in an instance method of one
392+
* class, and `nodeTo` reads attribute `self.attr` in an instance method of a different
393+
* class that is related to it by inheritance (one is a transitive superclass of the
394+
* other).
395+
*
396+
* This is the cross-hierarchy counterpart of `localFieldStep`: at runtime the receiver
397+
* of both methods may be an instance of the more-derived class, whose behaviour is made
398+
* up of the methods it declares together with those inherited from all of its ancestors.
399+
* It therefore models the common pattern of a base class storing `self.attr` that a
400+
* subclass reads, and vice versa. Resolving the superclass relationship depends on the
401+
* call graph (via `getADirectSuperclass`), so this step is reported as `levelStepCall`
402+
* rather than `levelStepNoCall`.
403+
*
404+
* Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive
405+
* and order-insensitive.
406+
*/
407+
private predicate inheritedFieldStep(Node nodeFrom, LocalSourceNode nodeTo) {
408+
exists(
409+
Class writeCls, Class readCls, string attr, DataFlowPublic::AttrWrite write,
410+
DataFlowPublic::AttrRead read
411+
|
412+
selfAttrRef(writeCls, attr, write) and
413+
nodeFrom = write.getValue() and
414+
selfAttrRef(readCls, attr, read) and
415+
nodeTo = read and
416+
writeCls != readCls and
417+
(
418+
writeCls = DataFlowDispatch::getADirectSuperclass*(readCls)
419+
or
420+
readCls = DataFlowDispatch::getADirectSuperclass*(writeCls)
421+
)
422+
)
423+
}
424+
425+
/**
426+
* Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a
427+
* class, and `nodeTo` reads attribute `attr` from an instance of that class (or a
428+
* subclass of it) outside its methods (e.g. `instance.attr`).
429+
*
430+
* This is the cross-instance counterpart of `localFieldStep`: it relates a write of
431+
* `self.attr` inside a class to a read of `attr` on a reference to an instance of that
432+
* class or one of its subclasses. Identifying instances relies on the call graph (via
433+
* `classInstanceTracker`), so this step is reported as `levelStepCall` rather than
434+
* `levelStepNoCall`. The write may occur in the instance's own class or in any of its
435+
* superclasses, since those methods are inherited.
436+
*
437+
* Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive
438+
* and order-insensitive.
439+
*/
440+
private predicate instanceFieldStep(Node nodeFrom, LocalSourceNode nodeTo) {
441+
exists(
442+
Class writeCls, Class instanceCls, string attr, DataFlowPublic::AttrWrite write,
443+
DataFlowPublic::AttrRead read
444+
|
445+
selfAttrRef(writeCls, attr, write) and
446+
nodeFrom = write.getValue() and
447+
instanceAttrRead(instanceCls, attr, read) and
448+
nodeTo = read and
449+
writeCls = DataFlowDispatch::getADirectSuperclass*(instanceCls)
450+
)
451+
}
452+
320453
/**
321454
* Holds if data can flow from `node1` to `node2` in a way that discards call contexts.
322455
*/

python/ql/src/Resources/FileNotAlwaysClosedQuery.qll

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import python
44
import semmle.python.dataflow.new.internal.DataFlowDispatch
55
import semmle.python.ApiGraphs
6+
private import semmle.python.dataflow.new.internal.ReExposedInstance
67

78
/** A CFG node where a file is opened. */
89
abstract class FileOpenSource extends DataFlow::CfgNode { }
@@ -21,12 +22,28 @@ private DataFlow::TypeTrackingNode fileOpenInstance(DataFlow::TypeTracker t) {
2122
exists(DataFlow::TypeTracker t2 | result = fileOpenInstance(t2).track(t2, t))
2223
}
2324

25+
/**
26+
* Holds if `node` is tracked to be an instance of an open file object.
27+
*/
28+
private predicate fileInstanceNode(DataFlow::Node node) {
29+
fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(node)
30+
}
31+
32+
private module FileReExposed = ReExposedInstance<fileInstanceNode/1>;
33+
2434
/**
2535
* A call that returns an instance of an open file object.
2636
* This includes calls to methods that transitively call `open` or similar.
2737
*/
2838
class FileOpen extends DataFlow::CallCfgNode {
29-
FileOpen() { fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) }
39+
FileOpen() {
40+
fileOpenInstance(DataFlow::TypeTracker::end()).flowsTo(this) and
41+
// Don't treat an accessor that merely re-exposes a file held in an instance attribute
42+
// (e.g. `FileIO.fileno` returning `self._fd`) as opening a new file. Such flow is
43+
// introduced by instance-attribute type tracking; the underlying open is tracked at its
44+
// real creation site.
45+
not FileReExposed::isReExposed(this)
46+
}
3047
}
3148

3249
/** A call that may wrap a file object in a wrapper class or `os.fdopen`. */

python/ql/src/Statements/ModificationOfLocals.ql

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,19 @@
1313

1414
import python
1515
private import semmle.python.ApiGraphs
16+
private import semmle.python.dataflow.new.DataFlow
1617

1718
predicate originIsLocals(ControlFlowNode n) {
18-
API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n
19+
// Only consider the `locals()` dictionary within the scope that called `locals()`.
20+
// Once the dictionary is passed to another scope (e.g. as an argument or via an
21+
// instance attribute) it is just an ordinary mapping, and modifying it is both
22+
// meaningful and effective. Restricting to local (intraprocedural) flow ensures we
23+
// only report modifications in the scope where the `locals()` gotcha actually applies.
24+
exists(DataFlow::LocalSourceNode src, DataFlow::Node use |
25+
src = API::builtin("locals").getReturn().asSource() and
26+
src.flowsTo(use) and
27+
use.asCfgNode() = n
28+
)
1929
}
2030

2131
predicate modification_of_locals(ControlFlowNode f) {

python/ql/src/Statements/ShouldUseWithStatement.ql

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
*/
1414

1515
import python
16+
private import semmle.python.dataflow.new.DataFlow
1617
private import semmle.python.dataflow.new.internal.DataFlowDispatch
18+
private import semmle.python.dataflow.new.internal.ReExposedInstance
1719

1820
predicate calls_close(Call c) { exists(Attribute a | c.getFunc() = a and a.getName() = "close") }
1921

@@ -23,11 +25,21 @@ predicate only_stmt_in_finally(Try t, Call c) {
2325
)
2426
}
2527

26-
from Call close, Try t, Class cls
28+
/** Holds if `node` is tracked to be an instance of some class. */
29+
private predicate classInstanceNode(DataFlow::Node node) { node = classInstanceTracker(_) }
30+
31+
private module ClassReExposed = ReExposedInstance<classInstanceNode/1>;
32+
33+
from Call close, Try t, Class cls, DataFlow::Node closeTarget
2734
where
2835
only_stmt_in_finally(t, close) and
2936
calls_close(close) and
30-
classInstanceTracker(cls).asExpr() = close.getFunc().(Attribute).getObject() and
37+
closeTarget.asExpr() = close.getFunc().(Attribute).getObject() and
38+
closeTarget = classInstanceTracker(cls) and
39+
// Don't report closing a resource that is held in an instance attribute (e.g. `self.reader`).
40+
// Such flow is introduced by instance-attribute type tracking; the object's lifetime is tied
41+
// to the enclosing instance and cannot be expressed with a `with` statement.
42+
not ClassReExposed::isReExposed(closeTarget) and
3143
DuckTyping::isContextManager(cls)
3244
select close,
3345
"Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.",
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The `py/modification-of-locals` query no longer flags modifications of a `locals()` dictionary that has been passed out of the scope in which `locals()` was called (for example, by passing it to another function or storing it in an instance attribute). In such cases the dictionary is used as an ordinary mapping and modifying it is meaningful, so these were false positives. The "modification has no effect" claim only applies within the scope that called `locals()`, which is now the only case reported.

python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
testFailures
22
debug_callableNotUnique
33
pointsTo_found_typeTracker_notFound
4-
| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func |
5-
| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func |
6-
| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method |
74
| code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def |
85
| code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar |
96
| code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo |

python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ def __init__(self, func):
77
self.direct_ref = my_func
88

99
def later(self):
10-
self.indirect_ref() # $ pt=my_func MISSING: tt=my_func
11-
self.direct_ref() # $ pt=my_func MISSING: tt=my_func
10+
self.indirect_ref() # $ pt=my_func tt=my_func
11+
self.direct_ref() # $ pt=my_func tt=my_func
1212

1313
foo = Foo(my_func) # $ tt=Foo.__init__
1414
foo.later() # $ pt,tt=Foo.later
@@ -23,7 +23,7 @@ def __init__(self):
2323
self.obj = DummyObject()
2424

2525
def later(self):
26-
self.obj.method() # $ pt=DummyObject.method MISSING: tt=DummyObject.method
26+
self.obj.method() # $ pt=DummyObject.method tt=DummyObject.method
2727

2828

2929
bar = Bar(my_func) # $ tt=Bar.__init__

0 commit comments

Comments
 (0)