diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py index e4ab3befa3..9145c30d25 100644 --- a/pyiceberg/expressions/visitors.py +++ b/pyiceberg/expressions/visitors.py @@ -1973,6 +1973,12 @@ def residual_for(self, partition_data: Record) -> BooleanExpression: def residual_evaluator_of( spec: PartitionSpec, expr: BooleanExpression, case_sensitive: bool, schema: Schema ) -> ResidualEvaluator: + """Create a residual evaluator. + + Returns a fresh evaluator instance because evaluators are stateful + (they set `self.struct` during evaluation) and may be used from multiple + threads. + """ return ( UnpartitionedResidualEvaluator(schema=schema, expr=expr) if spec.is_unpartitioned() diff --git a/tests/expressions/test_residual_evaluator.py b/tests/expressions/test_residual_evaluator.py index ba0a0da2e5..f175e49f66 100644 --- a/tests/expressions/test_residual_evaluator.py +++ b/tests/expressions/test_residual_evaluator.py @@ -88,6 +88,19 @@ def test_identity_transform_residual() -> None: assert residual == AlwaysFalse() +def test_residual_evaluator_of_returns_fresh_instance() -> None: + schema = Schema(NestedField(50, "dateint", IntegerType()), NestedField(51, "hour", IntegerType())) + spec = PartitionSpec(PartitionField(50, 1050, IdentityTransform(), "dateint_part")) + predicate = LessThan("dateint", 20170815) + + res_eval_1 = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema) + res_eval_2 = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema) + + assert res_eval_1 is not res_eval_2 + assert res_eval_1.residual_for(Record(20170814)) == AlwaysTrue() + assert res_eval_2.residual_for(Record(20170816)) == AlwaysFalse() + + def test_case_insensitive_identity_transform_residuals() -> None: schema = Schema(NestedField(50, "dateint", IntegerType()), NestedField(51, "hour", IntegerType()))