Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ yacctab.py
virtualenv-[0-9]*[0-9]

*.so
.venv

.asv

Expand Down
47 changes: 47 additions & 0 deletions loopy/type_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,53 @@ def map_quotient(self, expr: p.Quotient):
else:
return self.combine([n_dtype_set, d_dtype_set])

def _map_int_div_modulo(self, expr: p.FloorDiv | p.Remainder):
# This is pretty gross, but generally appears to lack alternatives.
# See https://github.com/inducer/loopy/pull/1000 for some discussion.
# In general, for array // array, numpy is very eager to infer
# float dtypes (for example for u64/i32), which doesn't work for us:
# integers should stay integers to stay usable as array indices.

n_dtype_set = self.rec(expr.numerator)
d_dtype_set = self.rec(expr.denominator)

if not (n_dtype_set and d_dtype_set):
return cast("list[NumpyType]", [])

n_dtype = n_dtype_set[0].numpy_dtype
d_dtype = d_dtype_set[0].numpy_dtype
num = (
np.empty(0, dtype=n_dtype)
if not is_integer(expr.numerator)
else expr.numerator
Comment on lines +449 to +450
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Numpy no longer does value-dependent types AFAIK (as of 2.0), so IMO this if isn't necessary. Just use 1/1 unconditionally.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can avoid the conditional.

>>> ary_u64 = np.empty(0, dtype=np.uint64)
>>> (ary_u64 // 2).dtype
dtype('uint64')
>>> (ary_u64 // np.empty(0, np.int32)).dtype  # if we don't use the if.
dtype('float64')

)
denom = (
np.empty(0, dtype=d_dtype)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there a risk here of using a zero denominator in a carried-out calculation?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of: 3cdca14?

if not is_integer(expr.denominator)
else expr.denominator
)
denom = (
cast("int | np.integer", denom + 1)
if is_integer(denom) and denom == 0
else denom
) # avoid divide by zero.

if is_integer(num) and is_integer(denom):
return self.rec(num // denom)

floor_div_np = num // denom
assert isinstance(floor_div_np, np.ndarray)

return [NumpyType(floor_div_np.dtype)]

@override
def map_floor_div(self, expr: p.FloorDiv):
return self._map_int_div_modulo(expr)

@override
def map_remainder(self, expr: p.Remainder):
return self._map_int_div_modulo(expr)

@override
def map_constant(self, expr: object):
if isinstance(expr, np.generic):
Expand Down
14 changes: 14 additions & 0 deletions test/test_loopy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3733,6 +3733,20 @@ def test_type_cast_parse_stringify_roundtrip():
assert expr == parsed


def test_floor_div_modulo_with_uint_index():
# See <https://github.com/inducer/loopy/issues/999>
knl = lp.make_kernel(
"{[i]: 0<=i<10}",
"a[map[i] // 2, map[i] % 35] = i",
[
lp.GlobalArg("map", dtype=np.uint64, shape=lp.auto),
lp.GlobalArg("a", dtype=np.float64, shape=(10, 4)),
],
)
# check the codegen is successful
lp.generate_code_v2(knl).device_code()


if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
Expand Down
Loading