Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,446 changes: 1,237 additions & 1,209 deletions Include/internal/pycore_uop_ids.h

Large diffs are not rendered by default.

133 changes: 133 additions & 0 deletions Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

165 changes: 165 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3072,6 +3072,171 @@ def testfunc(args):
uops = get_opnames(ex)
self.assertIn("_POP_TOP_NOP", uops)

def test_float_add_inplace_unique_lhs(self):
# a * b produces a unique float; adding c reuses it in place
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += a * b + c
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)

def test_float_add_inplace_unique_rhs(self):
# a * b produces a unique float on the right side of +
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += c + a * b
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)

def test_float_add_no_inplace_non_unique(self):
# Both operands of a + b are locals — neither is unique,
# so the first add is regular. But total += (a+b) has a
# unique RHS, so it uses _INPLACE_RIGHT.
def testfunc(args):
a, b, n = args
total = 0.0
for _ in range(n):
total += a + b
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 5.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# a + b: both are locals, no inplace
self.assertIn("_BINARY_OP_ADD_FLOAT", uops)
# total += result: result is unique RHS
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
# No LHS inplace variant for the first add
self.assertNotIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)

def test_float_subtract_inplace_unique_lhs(self):
# a * b produces a unique float; subtracting c reuses it
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += a * b - c
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 5.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE", uops)

def test_float_subtract_inplace_unique_rhs(self):
# a * b produces a unique float on the right of -;
# result is c - (a * b), must get the sign correct
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += c - a * b
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * -5.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT", uops)

def test_float_multiply_inplace_unique_lhs(self):
# (a + b) produces a unique float; multiplying by c reuses it
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += (a + b) * c
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 20.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE", uops)

def test_float_multiply_inplace_unique_rhs(self):
# (a + b) produces a unique float on the right side of *
def testfunc(args):
a, b, c, n = args
total = 0.0
for _ in range(n):
total += c * (a + b)
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 20.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT", uops)

def test_float_inplace_chain_propagation(self):
# a * b + c * d: both products are unique, the + reuses one;
# result of + is also unique for the subsequent +=
def testfunc(args):
a, b, c, d, n = args
total = 0.0
for _ in range(n):
total += a * b + c * d
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * 26.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# The + between the two products should use an inplace variant
inplace_add = (
"_BINARY_OP_ADD_FLOAT_INPLACE" in uops
or "_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT" in uops
)
self.assertTrue(inplace_add,
"Expected an inplace add for unique intermediate results")

def test_float_negate_inplace_unique(self):
# -(a * b): the product is unique, negate it in place
def testfunc(args):
a, b, n = args
total = 0.0
for _ in range(n):
total += -(a * b)
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * -6.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

def test_float_negate_no_inplace_non_unique(self):
# -a where a is a local — not unique, no inplace
def testfunc(args):
a, n = args
total = 0.0
for _ in range(n):
total += -a
return total

res, ex = self._run_with_optimizer(testfunc, (2.0, TIER2_THRESHOLD))
self.assertAlmostEqual(res, TIER2_THRESHOLD * -2.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)

def test_load_attr_instance_value(self):
def testfunc(n):
class C():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Optimize float arithmetic in the JIT by mutating uniquely-referenced
operands in place, avoiding allocation of a new float object. Speeds up
the pyperformance ``nbody`` benchmark by ~19%.
Loading
Loading