-
Notifications
You must be signed in to change notification settings - Fork 454
fix: sigv4 auth to use base64-encoded content sha256 and custom canonical request #3120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -678,6 +678,8 @@ def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str, | |
| return {"namespace": identifier_tuple[:-1], "name": identifier_tuple[-1]} | ||
|
|
||
| def _init_sigv4(self, session: Session) -> None: | ||
| import base64 | ||
| import hashlib | ||
| from urllib import parse | ||
|
|
||
| import boto3 | ||
|
|
@@ -686,6 +688,12 @@ def _init_sigv4(self, session: Session) -> None: | |
| from requests import PreparedRequest | ||
| from requests.adapters import HTTPAdapter | ||
|
|
||
| class _IcebergSigV4Auth(SigV4Auth): | ||
| def canonical_request(self, request: Any) -> str: | ||
| cr = super().canonical_request(request) | ||
| # Replace the last line (body_checksum) with hex-encoded payload hash. | ||
| return cr.rsplit("\n", 1)[0] + "\n" + self.payload(request) | ||
|
|
||
| class SigV4Adapter(HTTPAdapter): | ||
| def __init__(self, **properties: str): | ||
| super().__init__() | ||
|
|
@@ -710,17 +718,27 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin | |
| # remove the connection header as it will be updated after signing | ||
| if "connection" in request.headers: | ||
| del request.headers["connection"] | ||
| # For empty bodies, explicitly set the content hash header to the SHA256 of an empty string | ||
| if not request.body: | ||
| request.headers["x-amz-content-sha256"] = EMPTY_BODY_SHA256 | ||
|
|
||
| # Compute the x-amz-content-sha256 header to match Iceberg Java SDK: | ||
| # - empty body → hex (EMPTY_BODY_SHA256) | ||
| # - non-empty body → base64 | ||
| if request.body: | ||
| body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body | ||
| content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode() | ||
|
Comment on lines
+725
to
+727
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i dont think we need to base64 encode this. the java implementation doesnt do this. neither does the python reference implementation |
||
| else: | ||
| content_sha256_header = EMPTY_BODY_SHA256 | ||
|
|
||
| signing_headers = dict(request.headers) | ||
| signing_headers["x-amz-content-sha256"] = content_sha256_header | ||
|
|
||
| aws_request = AWSRequest( | ||
| method=request.method, url=url, params=params, data=request.body, headers=dict(request.headers) | ||
| method=request.method, url=url, params=params, data=request.body, headers=signing_headers | ||
| ) | ||
|
|
||
| SigV4Auth(credentials, service, region).add_auth(aws_request) | ||
| original_header = request.headers | ||
| signed_headers = aws_request.headers | ||
| _IcebergSigV4Auth(credentials, service, region).add_auth(aws_request) | ||
|
|
||
| original_header = dict(request.headers) | ||
| signed_headers = dict(aws_request.headers) | ||
| relocated_headers = {} | ||
|
|
||
| # relocate headers if there is a conflict with signed headers | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ideally we should test against an integration test before merging. unit tests can only do so much |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
using
rsplitfeels fragile, can we reuse the same logic fromcanonical_requestbut just replace the last part withself.payload(request)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
im confused why the _IcebergSigV4Auth class is needed