Skip to content

Commit 42d12b7

Browse files
eendebakptclaude
andcommitted
gh-146306: Optimize float arithmetic in JIT by mutating uniquely-referenced operands in place
When the tier 2 optimizer can prove that an operand to a float operation is uniquely referenced (refcount 1), mutate it in place instead of allocating a new PyFloatObject. New tier 2 micro-ops: - _BINARY_OP_{ADD,SUBTRACT,MULTIPLY}_FLOAT_INPLACE (unique LHS) - _BINARY_OP_{ADD,SUBTRACT,MULTIPLY}_FLOAT_INPLACE_RIGHT (unique RHS) - _UNARY_NEGATIVE_FLOAT_INPLACE (unique operand) Speeds up the pyperformance nbody benchmark by ~19%. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a17301a commit 42d12b7

File tree

8 files changed

+2469
-1229
lines changed

8 files changed

+2469
-1229
lines changed

Include/internal/pycore_uop_ids.h

Lines changed: 1237 additions & 1209 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

Lines changed: 133 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_capi/test_opt.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3072,6 +3072,171 @@ def testfunc(args):
30723072
uops = get_opnames(ex)
30733073
self.assertIn("_POP_TOP_NOP", uops)
30743074

3075+
def test_float_add_inplace_unique_lhs(self):
3076+
# a * b produces a unique float; adding c reuses it in place
3077+
def testfunc(args):
3078+
a, b, c, n = args
3079+
total = 0.0
3080+
for _ in range(n):
3081+
total += a * b + c
3082+
return total
3083+
3084+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
3085+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
3086+
self.assertIsNotNone(ex)
3087+
uops = get_opnames(ex)
3088+
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)
3089+
3090+
def test_float_add_inplace_unique_rhs(self):
3091+
# a * b produces a unique float on the right side of +
3092+
def testfunc(args):
3093+
a, b, c, n = args
3094+
total = 0.0
3095+
for _ in range(n):
3096+
total += c + a * b
3097+
return total
3098+
3099+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
3100+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 10.0)
3101+
self.assertIsNotNone(ex)
3102+
uops = get_opnames(ex)
3103+
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
3104+
3105+
def test_float_add_no_inplace_non_unique(self):
3106+
# Both operands of a + b are locals — neither is unique,
3107+
# so the first add is regular. But total += (a+b) has a
3108+
# unique RHS, so it uses _INPLACE_RIGHT.
3109+
def testfunc(args):
3110+
a, b, n = args
3111+
total = 0.0
3112+
for _ in range(n):
3113+
total += a + b
3114+
return total
3115+
3116+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, TIER2_THRESHOLD))
3117+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 5.0)
3118+
self.assertIsNotNone(ex)
3119+
uops = get_opnames(ex)
3120+
# a + b: both are locals, no inplace
3121+
self.assertIn("_BINARY_OP_ADD_FLOAT", uops)
3122+
# total += result: result is unique RHS
3123+
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
3124+
# No LHS inplace variant for the first add
3125+
self.assertNotIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)
3126+
3127+
def test_float_subtract_inplace_unique_lhs(self):
3128+
# a * b produces a unique float; subtracting c reuses it
3129+
def testfunc(args):
3130+
a, b, c, n = args
3131+
total = 0.0
3132+
for _ in range(n):
3133+
total += a * b - c
3134+
return total
3135+
3136+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, TIER2_THRESHOLD))
3137+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 5.0)
3138+
self.assertIsNotNone(ex)
3139+
uops = get_opnames(ex)
3140+
self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE", uops)
3141+
3142+
def test_float_subtract_inplace_unique_rhs(self):
3143+
# a * b produces a unique float on the right of -;
3144+
# result is c - (a * b), must get the sign correct
3145+
def testfunc(args):
3146+
a, b, c, n = args
3147+
total = 0.0
3148+
for _ in range(n):
3149+
total += c - a * b
3150+
return total
3151+
3152+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 1.0, TIER2_THRESHOLD))
3153+
self.assertAlmostEqual(res, TIER2_THRESHOLD * -5.0)
3154+
self.assertIsNotNone(ex)
3155+
uops = get_opnames(ex)
3156+
self.assertIn("_BINARY_OP_SUBTRACT_FLOAT_INPLACE_RIGHT", uops)
3157+
3158+
def test_float_multiply_inplace_unique_lhs(self):
3159+
# (a + b) produces a unique float; multiplying by c reuses it
3160+
def testfunc(args):
3161+
a, b, c, n = args
3162+
total = 0.0
3163+
for _ in range(n):
3164+
total += (a + b) * c
3165+
return total
3166+
3167+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
3168+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 20.0)
3169+
self.assertIsNotNone(ex)
3170+
uops = get_opnames(ex)
3171+
self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE", uops)
3172+
3173+
def test_float_multiply_inplace_unique_rhs(self):
3174+
# (a + b) produces a unique float on the right side of *
3175+
def testfunc(args):
3176+
a, b, c, n = args
3177+
total = 0.0
3178+
for _ in range(n):
3179+
total += c * (a + b)
3180+
return total
3181+
3182+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
3183+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 20.0)
3184+
self.assertIsNotNone(ex)
3185+
uops = get_opnames(ex)
3186+
self.assertIn("_BINARY_OP_MULTIPLY_FLOAT_INPLACE_RIGHT", uops)
3187+
3188+
def test_float_inplace_chain_propagation(self):
3189+
# a * b + c * d: both products are unique, the + reuses one;
3190+
# result of + is also unique for the subsequent +=
3191+
def testfunc(args):
3192+
a, b, c, d, n = args
3193+
total = 0.0
3194+
for _ in range(n):
3195+
total += a * b + c * d
3196+
return total
3197+
3198+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
3199+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 26.0)
3200+
self.assertIsNotNone(ex)
3201+
uops = get_opnames(ex)
3202+
# The + between the two products should use an inplace variant
3203+
inplace_add = (
3204+
"_BINARY_OP_ADD_FLOAT_INPLACE" in uops
3205+
or "_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT" in uops
3206+
)
3207+
self.assertTrue(inplace_add,
3208+
"Expected an inplace add for unique intermediate results")
3209+
3210+
def test_float_negate_inplace_unique(self):
3211+
# -(a * b): the product is unique, negate it in place
3212+
def testfunc(args):
3213+
a, b, n = args
3214+
total = 0.0
3215+
for _ in range(n):
3216+
total += -(a * b)
3217+
return total
3218+
3219+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, TIER2_THRESHOLD))
3220+
self.assertAlmostEqual(res, TIER2_THRESHOLD * -6.0)
3221+
self.assertIsNotNone(ex)
3222+
uops = get_opnames(ex)
3223+
self.assertIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)
3224+
3225+
def test_float_negate_no_inplace_non_unique(self):
3226+
# -a where a is a local — not unique, no inplace
3227+
def testfunc(args):
3228+
a, n = args
3229+
total = 0.0
3230+
for _ in range(n):
3231+
total += -a
3232+
return total
3233+
3234+
res, ex = self._run_with_optimizer(testfunc, (2.0, TIER2_THRESHOLD))
3235+
self.assertAlmostEqual(res, TIER2_THRESHOLD * -2.0)
3236+
self.assertIsNotNone(ex)
3237+
uops = get_opnames(ex)
3238+
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)
3239+
30753240
def test_load_attr_instance_value(self):
30763241
def testfunc(n):
30773242
class C():
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Optimize float arithmetic in the JIT by mutating uniquely-referenced
2+
operands in place, avoiding allocation of a new float object. Speeds up
3+
the pyperformance ``nbody`` benchmark by ~19%.

0 commit comments

Comments
 (0)