Skip to content

Commit 1cea9bb

Browse files
committed
Fix #41: Fix PDF binary detection and apply formatting
Signed-off-by: Kushagar Garg <dreamstick909@gmail.com>
1 parent 3f5d6e3 commit 1cea9bb

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

src/typecode/contenttype.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,14 @@ def is_binary(self):
362362
self._is_binary = False
363363
if self.is_file is True:
364364
self._is_binary = is_binary(self.location)
365+
if not self._is_binary:
366+
try:
367+
with open(self.location, "rb") as f:
368+
if f.read(5) == b"%PDF-":
369+
self._is_binary = True
370+
except Exception:
371+
pass
372+
365373
return self._is_binary
366374

367375
@property

tests/test_contenttype.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,10 @@ def test_size(self):
395395
test_dir = self.get_test_loc("contenttype/size")
396396
result = size(test_dir)
397397
assert result == 18
398+
399+
def test_is_binary_handles_pdf_signature(self):
400+
test_dir = self.get_temp_dir()
401+
test_file = os.path.join(test_dir, "test_pdf.pdf")
402+
with open(test_file, "wb") as f:
403+
f.write(b"%PDF-1.4\nSome binary content \x00\xff")
404+
assert is_binary(test_file) is True

0 commit comments

Comments
 (0)