Skip to content

Commit 3f37a9b

Browse files
committed
More
1 parent cf737e1 commit 3f37a9b

File tree

2 files changed

+38
-17
lines changed

2 files changed

+38
-17
lines changed

bin/recompress-raw-mime.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from concurrent.futures import ThreadPoolExecutor
1111

1212
import click
13-
import zstandard
1413
from sqlalchemy.orm import Query
1514
from sqlalchemy.sql import func
1615

@@ -100,12 +99,8 @@ def recompress_batch(recompress_sha256s: "set[str]", dry_run: bool) -> None:
10099
if data is None:
101100
continue
102101

103-
if data.startswith(blockstore.ZSTD_MAGIC_NUMBER_PREFIX):
104-
decompressed_raw_mime = zstandard.decompress(data)
105-
else:
106-
decompressed_raw_mime = data
107-
108-
compressed_raw_mime = blockstore.get_maybe_compressed_mime(
102+
decompressed_raw_mime = blockstore.maybe_decompress_raw_mime(data)
103+
compressed_raw_mime = blockstore.maybe_compress_raw_mime(
109104
decompressed_raw_mime, compress=True
110105
)
111106
compressed_raw_mimes_by_sha256[data_sha256] = (

inbox/util/blockstore.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,22 @@ def _data_file_path(h):
3636
return os.path.join(_data_file_directory(h), h)
3737

3838

39-
def get_maybe_compressed_mime(
39+
def maybe_compress_raw_mime(
4040
decompressed_raw_mime: bytes, *, compress: "bool | None" = None
4141
) -> bytes:
42+
"""
43+
Optionally compress the raw MIME data.
44+
45+
Args:
46+
decompressed_raw_mime: The raw MIME data, always *decompressed*.
47+
compress:
48+
Whether to compress the data.
49+
If None, the value of `config["COMPRESS_RAW_MIME"]` is used
50+
which defaults to False.
51+
52+
Returns:
53+
The optionally compressed raw MIME data.
54+
"""
4255
if compress is None:
4356
compress = config.get("COMPRESS_RAW_MIME", False)
4457

@@ -81,7 +94,7 @@ def save_raw_mime(
8194
Returns:
8295
The length of the data in the datastore.
8396
"""
84-
compressed_raw_mime = get_maybe_compressed_mime(
97+
compressed_raw_mime = maybe_compress_raw_mime(
8598
decompressed_raw_mime, compress=compress
8699
)
87100

@@ -174,6 +187,26 @@ def get_from_blockstore(data_sha256, *, check_sha=True) -> Optional[bytes]:
174187
return value
175188

176189

190+
def maybe_decompress_raw_mime(compressed_raw_mime: bytes) -> bytes:
191+
"""
192+
Decompress the raw MIME data if it's compressed.
193+
194+
Args:
195+
compressed_raw_mime: The raw MIME data, either compressed or not.
196+
197+
Returns:
198+
The decompressed raw MIME data.
199+
"""
200+
# Raw MIME data will never start with the ZSTD magic number,
201+
# because email messages always start with headers in 7-bit ASCII.
202+
# ZSTD magic number contains bytes with the highest bit set to 1,
203+
# so we can use it as a marker to check if the data is compressed.
204+
if compressed_raw_mime.startswith(ZSTD_MAGIC_NUMBER_PREFIX):
205+
return zstandard.decompress(compressed_raw_mime)
206+
else:
207+
return compressed_raw_mime
208+
209+
177210
def get_raw_mime(data_sha256: str) -> "bytes | None":
178211
"""
179212
Get the raw MIME data from the blockstore.
@@ -190,14 +223,7 @@ def get_raw_mime(data_sha256: str) -> "bytes | None":
190223
if compressed_raw_mime is None:
191224
return None
192225

193-
# Raw MIME data will never start with the ZSTD magic number,
194-
# because email messages always start with headers in 7-bit ASCII.
195-
# ZSTD magic number contains bytes with the highest bit set to 1,
196-
# so we can use it as a marker to check if the data is compressed.
197-
if compressed_raw_mime.startswith(ZSTD_MAGIC_NUMBER_PREFIX):
198-
decompressed_raw_mime = zstandard.decompress(compressed_raw_mime)
199-
else:
200-
decompressed_raw_mime = compressed_raw_mime
226+
decompressed_raw_mime = maybe_decompress_raw_mime(compressed_raw_mime)
201227

202228
assert (
203229
sha256(decompressed_raw_mime).hexdigest() == data_sha256

0 commit comments

Comments
 (0)