Skip to content

Commit 57fa3ee

Browse files
yoffCopilot
andcommitted
Python: SSA: handle closure variables via per-scope entry defs
The new SSA's implicit entry-def predicate previously placed entries in the variable's defining scope. For closure variables that's the outer function, so inner functions had no entry def for the captured variable — reads in the inner scope failed to resolve to any definition. Mirrors legacy ESSA's 'NonLocalVariable.getScopeEntryDefinition()': place an implicit entry def at every reading scope's entry block, independently of where the variable is *defined*. A closure variable accessed in two nested functions and the outer one gets three entry defs (one per reading scope). Also makes 'ScopeEntryDefinition' extend 'EssaNodeDefinition' (matching legacy ESSA), with 'getDefiningNode()' returning the scope's entry CFG node. This requires extending the private 'writeDefNode' helper to project i=-1 entries to bb.getNode(0). Updates the new-vs-legacy comparison snapshot: closure-variable reads ('x:32:5'), nested global reads ('GLOBAL:52:1') now resolve. New 'def-only-new' entries appear for unbound names ('sum', 'open', 'compute') — the new SSA uniformly creates scope-entry defs for all non-local reads, including those that legacy ESSA classifies as builtin and excludes. This is a more uniform semantic and arguably cleaner. Updates the SsaTest 'some_undefined' annotation: previously documented as a known limitation, now correctly resolves to a scope-entry def. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent ac468c8 commit 57fa3ee

3 files changed

Lines changed: 40 additions & 25 deletions

File tree

python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ class SsaSourceVariable extends TSsaSourceVariable {
8383
* Holds if `v` is a non-local read in scope `s`, in the sense that `s`
8484
* uses `v` but does not write it within `s`. This includes globals,
8585
* builtins, and variables captured from an enclosing function scope.
86+
*
87+
* The `Py::Variable` `v` lives in some defining scope (the module for
88+
* globals, an outer function for closures, etc.); the reading scope
89+
* `s` is the scope where the use of `v` occurs.
8690
*/
8791
private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) {
8892
exists(Cfg::NameNode n |
@@ -93,17 +97,23 @@ private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) {
9397
}
9498

9599
/**
96-
* Holds if `v` should have an implicit entry definition at the start of
97-
* scope `s`. This covers:
98-
* - non-local / global / builtin variables (defined outside `s`), and
99-
* - captured variables (defined in an enclosing scope but read here).
100+
* Holds if `bb` is the entry basic block of a scope where `v` should
101+
* have an implicit entry definition. This covers:
102+
* - non-local / global / builtin variables read in `s`, and
103+
* - captured variables (defined in an enclosing scope but read in `s`).
104+
*
105+
* Each reading scope gets its own entry def, so a closure variable can
106+
* have multiple entry defs across all functions/methods that read it.
100107
*
101108
* Parameters are *not* included: their bound `Name` is itself a CFG
102109
* node (per the C#-style parameter wiring), so `variableWrite` fires at
103110
* the parameter's natural CFG index.
104111
*/
105-
private predicate hasEntryDef(SsaSourceVariable v, Py::Scope s) {
106-
nonLocalReadIn(v.getVariable(), s)
112+
private predicate hasEntryDefIn(SsaSourceVariable v, CfgImpl::BasicBlock bb) {
113+
exists(Py::Scope s |
114+
nonLocalReadIn(v.getVariable(), s) and
115+
bb = entryBlock(s)
116+
)
107117
}
108118

109119
/**
@@ -144,9 +154,11 @@ private module SsaImplInput implements SsaImplCommon::InputSig<Py::Location, Cfg
144154
)
145155
or
146156
// Implicit entry definition for non-local / captured / global /
147-
// builtin variables read in the scope.
148-
bb = entryBlock(v.getVariable().getScope()) and
149-
hasEntryDef(v, v.getVariable().getScope()) and
157+
// builtin variables read in some scope. Each reading scope's entry
158+
// block gets one such write, allowing closures: e.g. when `x` is a
159+
// parameter of an outer function and read inside a nested
160+
// function, both scopes get entry defs for `x`.
161+
hasEntryDefIn(v, bb) and
150162
i = -1 and
151163
certain = true
152164
}
@@ -197,14 +209,16 @@ final class PhiNode = Ssa::PhiNode;
197209
/**
198210
* Gets the CFG node at which a write definition's binding takes place.
199211
*
200-
* This is the `Cfg::ControlFlowNode` whose index in `def`'s basic block
201-
* is the same as `def`'s defining index. Phi definitions have no
202-
* defining CFG node and are excluded.
212+
* For ordinary writes (assignment, deletion, parameter) this is the
213+
* canonical CFG node of the bound Name. For implicit entry definitions
214+
* (synthesised at position `-1` of a scope's entry BB) this is the
215+
* scope's entry node.
203216
*/
204217
private Cfg::ControlFlowNode writeDefNode(Ssa::WriteDefinition def) {
205-
exists(CfgImpl::BasicBlock bb, int i |
206-
def.definesAt(_, bb, i) and
207-
result = bb.getNode(i)
218+
exists(CfgImpl::BasicBlock bb, int i | def.definesAt(_, bb, i) |
219+
i >= 0 and result = bb.getNode(i)
220+
or
221+
i = -1 and result = bb.getNode(0)
208222
)
209223
}
210224

@@ -290,23 +304,23 @@ class WithDefinition extends EssaNodeDefinition {
290304
/**
291305
* An implicit entry definition for a non-local / captured / global /
292306
* builtin variable read in a scope but not defined there.
307+
*
308+
* Inherits from `EssaNodeDefinition` and exposes the scope's entry node
309+
* as its defining node (matching legacy ESSA semantics).
293310
*/
294-
class ScopeEntryDefinition extends Ssa::Definition {
311+
class ScopeEntryDefinition extends EssaNodeDefinition {
295312
ScopeEntryDefinition() {
296313
exists(CfgImpl::BasicBlock bb |
297314
this.definesAt(_, bb, -1) and
298315
bb instanceof CfgImpl::Cfg::EntryBasicBlock
299316
)
300317
}
301318

302-
/** Gets the variable being entered. */
303-
SsaSourceVariable getVariable() { result = this.getSourceVariable() }
304-
305-
/** Gets the enclosing scope. */
306-
Py::Scope getScope() {
319+
/** Gets the enclosing scope (the scope whose entry block this def is in). */
320+
override Py::Scope getScope() {
307321
exists(CfgImpl::BasicBlock bb |
308322
this.definesAt(_, bb, -1) and
309-
result = this.getSourceVariable().getVariable().getScope()
323+
result = bb.getNode(0).(Cfg::ControlFlowNode).getScope()
310324
)
311325
}
312326
}

python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
| def-only-new | compute:37:1 |
2+
| def-only-new | open:44:1 |
3+
| def-only-new | sum:27:1 |
14
| def-only-old | $:0:0 |
25
| def-only-old | GLOBAL:49:1 |
3-
| def-only-old | GLOBAL:52:1 |
46
| def-only-old | __name__:0:0 |
57
| def-only-old | __package__:0:0 |
68
| def-only-old | closure:31:5 |
@@ -17,4 +19,3 @@
1719
| def-only-old | with_binding:44:5 |
1820
| def-only-old | x:20:1 |
1921
| def-only-old | x:31:13 |
20-
| def-only-old | x:32:5 |

python/ql/test/library-tests/dataflow-new-ssa/test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,6 @@ def if_else_phi(cond): # $ def=cond
3535

3636

3737
def use_global():
38-
return some_undefined # known limitation: undefined globals not resolved here
38+
return some_undefined # $ use=some_undefined
3939

4040

0 commit comments

Comments
 (0)