Skip to content

Commit 9b2dbc9

Browse files
committed
GH-47348: [Python] Include retry_strategy in S3FileSystem.__reduce__ method
S3FileSystem's retry_strategy should survive serialization and deserialization. Signed-off-by: Kit Lee <7000003+wingkitlee0@users.noreply.github.com>
1 parent ed77d25 commit 9b2dbc9

File tree

2 files changed

+18
-1
lines changed

2 files changed

+18
-1
lines changed

python/pyarrow/_s3fs.pyx

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ class S3RetryStrategy:
120120
def __init__(self, max_attempts=3):
121121
self.max_attempts = max_attempts
122122

123+
def __reduce__(self):
124+
return (self.__class__, (self.max_attempts,))
125+
123126

124127
class AwsStandardS3RetryStrategy(S3RetryStrategy):
125128
"""
@@ -281,6 +284,7 @@ cdef class S3FileSystem(FileSystem):
281284

282285
cdef:
283286
CS3FileSystem* s3fs
287+
object _retry_strategy
284288

285289
def __init__(self, *, access_key=None, secret_key=None, session_token=None,
286290
bint anonymous=False, region=None, request_timeout=None,
@@ -412,9 +416,11 @@ cdef class S3FileSystem(FileSystem):
412416
if isinstance(retry_strategy, AwsStandardS3RetryStrategy):
413417
options.value().retry_strategy = CS3RetryStrategy.GetAwsStandardRetryStrategy(
414418
retry_strategy.max_attempts)
419+
self._retry_strategy = retry_strategy
415420
elif isinstance(retry_strategy, AwsDefaultS3RetryStrategy):
416421
options.value().retry_strategy = CS3RetryStrategy.GetAwsDefaultRetryStrategy(
417422
retry_strategy.max_attempts)
423+
self._retry_strategy = retry_strategy
418424
else:
419425
raise ValueError(f'Invalid retry_strategy {retry_strategy!r}')
420426
if tls_ca_file_path is not None:
@@ -470,6 +476,7 @@ cdef class S3FileSystem(FileSystem):
470476
allow_bucket_creation=opts.allow_bucket_creation,
471477
allow_bucket_deletion=opts.allow_bucket_deletion,
472478
check_directory_existence_before_creation=opts.check_directory_existence_before_creation,
479+
retry_strategy=self._retry_strategy,
473480
default_metadata=pyarrow_wrap_metadata(opts.default_metadata),
474481
proxy_options={'scheme': frombytes(opts.proxy_options.scheme),
475482
'host': frombytes(opts.proxy_options.host),
@@ -489,3 +496,10 @@ cdef class S3FileSystem(FileSystem):
489496
The AWS region this filesystem connects to.
490497
"""
491498
return frombytes(self.s3fs.region())
499+
500+
@property
501+
def retry_strategy(self):
502+
"""
503+
The retry strategy currently configured for this S3 filesystem.
504+
"""
505+
return self._retry_strategy

python/pyarrow/tests/test_fs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1201,14 +1201,17 @@ def test_s3_options(pickle_module):
12011201
assert isinstance(fs, S3FileSystem)
12021202
assert pickle_module.loads(pickle_module.dumps(fs)) == fs
12031203

1204-
# Note that the retry strategy won't survive pickling for now
12051204
fs = S3FileSystem(
12061205
retry_strategy=AwsStandardS3RetryStrategy(max_attempts=5))
12071206
assert isinstance(fs, S3FileSystem)
1207+
assert pickle_module.loads(pickle_module.dumps(fs)) == fs
1208+
assert fs.retry_strategy.max_attempts == 5
12081209

12091210
fs = S3FileSystem(
12101211
retry_strategy=AwsDefaultS3RetryStrategy(max_attempts=5))
12111212
assert isinstance(fs, S3FileSystem)
1213+
assert pickle_module.loads(pickle_module.dumps(fs)) == fs
1214+
assert fs.retry_strategy.max_attempts == 5
12121215

12131216
fs2 = S3FileSystem(role_arn='role')
12141217
assert isinstance(fs2, S3FileSystem)

0 commit comments

Comments
 (0)