Skip to content

Commit a1cca0c

Browse files
committed
CM-60540: remove binaryornot dep
1 parent 6419aaf commit a1cca0c

File tree

6 files changed

+56
-41
lines changed

6 files changed

+56
-41
lines changed

cycode/cli/utils/binary_utils.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import logging
2+
3+
logger = logging.getLogger(__name__)
4+
5+
_CONTROL_CHARS = b'\n\r\t\f\b'
6+
_PRINTABLE_ASCII = _CONTROL_CHARS + bytes(range(32, 127))
7+
_PRINTABLE_HIGH_ASCII = bytes(range(127, 256))
8+
9+
10+
def is_binary_string(bytes_to_check: bytes) -> bool:
11+
"""Check if a chunk of bytes appears to be binary content.
12+
13+
Uses a simplified version of the Perl detection algorithm.
14+
"""
15+
if not bytes_to_check:
16+
return False
17+
18+
# Null bytes are a strong binary indicator
19+
if b'\x00' in bytes_to_check:
20+
return True
21+
22+
# Binary if control chars are > 30% of the string
23+
low_chars = bytes_to_check.translate(None, _PRINTABLE_ASCII)
24+
nontext_ratio1 = len(low_chars) / len(bytes_to_check)
25+
26+
# Binary if high ASCII chars are < 5% of the string
27+
high_chars = bytes_to_check.translate(None, _PRINTABLE_HIGH_ASCII)
28+
nontext_ratio2 = len(high_chars) / len(bytes_to_check)
29+
30+
is_likely_binary = (nontext_ratio1 > 0.3 and nontext_ratio2 < 0.05) or (
31+
nontext_ratio1 > 0.8 and nontext_ratio2 > 0.8
32+
)
33+
34+
if not is_likely_binary:
35+
return False
36+
37+
# Try UTF-8 decode to rescue files that look binary by ratios but are valid text
38+
try:
39+
bytes_to_check.decode('utf-8')
40+
return False
41+
except UnicodeDecodeError:
42+
return True

cycode/cli/utils/path_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import TYPE_CHECKING, AnyStr, Optional, Union
55

66
import typer
7-
from binaryornot.helpers import is_binary_string
7+
from cycode.cli.utils.binary_utils import is_binary_string
88

99
from cycode.cli.logger import logger
1010

cycode/cli/utils/string_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import string
66
from sys import getsizeof
77

8-
from binaryornot.check import is_binary_string
8+
from cycode.cli.utils.binary_utils import is_binary_string
99

1010
from cycode.cli.consts import SCA_SHORTCUT_DEPENDENCY_PATHS
1111

cycode/logger.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ def _set_io_encodings() -> None:
3131
logging.getLogger('werkzeug').setLevel(logging.WARNING)
3232
logging.getLogger('schedule').setLevel(logging.WARNING)
3333
logging.getLogger('kubernetes').setLevel(logging.WARNING)
34-
logging.getLogger('binaryornot').setLevel(logging.WARNING)
35-
logging.getLogger('chardet').setLevel(logging.WARNING)
3634
logging.getLogger('git.cmd').setLevel(logging.WARNING)
3735
logging.getLogger('git.util').setLevel(logging.WARNING)
3836

poetry.lock

Lines changed: 12 additions & 36 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ pyyaml = ">=6.0,<7.0"
3939
marshmallow = ">=3.15.0,<4.0.0"
4040
gitpython = ">=3.1.30,<3.2.0"
4141
arrow = ">=1.0.0,<1.4.0"
42-
binaryornot = ">=0.4.4,<0.5.0"
4342
requests = ">=2.32.4,<3.0"
4443
urllib3 = ">=2.4.0,<3.0.0"
4544
pyjwt = ">=2.8.0,<3.0"

0 commit comments

Comments
 (0)