Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion modelopt/onnx/quantization/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,21 @@ def _preprocess_onnx(
)

if original_opset_version < target_opset and original_opset_version != 1:
onnx_model = onnx.version_converter.convert_version(onnx_model, target_opset)
try:
onnx_model = onnx.version_converter.convert_version(onnx_model, target_opset)
except Exception as e:
logger.warning(
"onnx.version_converter failed (%s). Performing lightweight opset update.", e
)
current_opset = {opset.domain: opset.version for opset in onnx_model.opset_import}
new_opset_imports = [onnx.helper.make_opsetid("", target_opset)]
if "com.microsoft" not in current_opset:
new_opset_imports.append(onnx.helper.make_opsetid("com.microsoft", 1))
for domain, version in current_opset.items():
if domain not in ["", "ai.onnx"]:
new_opset_imports.append(onnx.helper.make_opsetid(domain, version))
onnx_model.ClearField("opset_import")
onnx_model.opset_import.extend(new_opset_imports)
onnx_path = os.path.join(output_dir, f"{model_name}_opset{target_opset}.onnx")
save_onnx(onnx_model, onnx_path, use_external_data_format)
logger.info(f"Model is cloned to {onnx_path} with opset_version {target_opset}")
Expand Down
18 changes: 16 additions & 2 deletions modelopt/onnx/trt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,22 @@ def load_onnx_model(

# Load the model and weights
onnx_model = onnx.load(onnx_path, load_external_data=True)
size_threshold = 2 * (1024**3) # 2GB
use_external_data_format = onnx_model.ByteSize() > size_threshold or use_external_data_format
if not use_external_data_format:
try:
model_size = onnx_model.ByteSize()
except Exception as e:
logger.warning(
"Failed to compute model size with ByteSize (%s). Saving tensors as external data.",
e,
)
use_external_data_format = True
else:
if model_size <= 0 or model_size >= onnx.checker.MAXIMUM_PROTOBUF:
use_external_data_format = True
logger.debug(
"Model is too large to save as a single file but 'use_external_data_format'"
" is False. Saving tensors as external data, regardless."
)

# If inputs are dynamic and override shapes are given, set them as static
dynamic_inputs = get_dynamic_graph_inputs(onnx_model)
Expand Down
51 changes: 36 additions & 15 deletions modelopt/onnx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,19 @@ def _get_unique_name(old_name):

def check_model(model: onnx.ModelProto) -> None:
"""Checks if the given model is valid."""
if model.ByteSize() > (2 * (1024**3)): # 2GB limit
save_as_external_data = False
try:
model_size = model.ByteSize()
except Exception as e:
logger.warning(
"Failed to compute model size with ByteSize (%s). Using external data path.", e
)
save_as_external_data = True
else:
if model_size <= 0 or model_size > (2 * (1024**3)):
save_as_external_data = True

if save_as_external_data:
with tempfile.TemporaryDirectory() as temp_dir:
# ONNX also looks in CWD, so we need to use a unique id
unique_id = str(uuid.uuid4())[:8]
Expand Down Expand Up @@ -642,21 +654,18 @@ def get_variable_inputs(node: Node) -> list[Variable]:
def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: bool = False):
"""Save an ONNX model to given path. If a model is larger than 2GB, will save with external data."""
size_threshold = 2 * (1024**3) # 2GB
try:
model_proto = model.SerializeToString()
model_size = len(model_proto)
save_as_external_data = save_as_external_data or model_size > size_threshold
logger.debug(
f"Model size: {model_size} bytes, using external data: {save_as_external_data}"
)

except ValueError as e:
if "Message onnx.ModelProto exceeds maximum protobuf size of 2GB" in str(e):
logger.warning("Model exceeds 2GB limit, switching to external data storage")
if not save_as_external_data:
try:
model_proto = model.SerializeToString()
except Exception as e:
logger.warning("Failed to serialize model. Saving tensors as external data. (%s)", e)
save_as_external_data = True
else:
logger.error(f"Failed to serialize model: {e!s}")
raise
model_size = len(model_proto)
save_as_external_data = model_size > size_threshold
logger.debug(
f"Model size: {model_size} bytes, using external data: {save_as_external_data}"
)
Comment on lines +657 to +668
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Expected: the protobuf 7.34 workaround should cover all large-model size/serialization probes.
rg -n -C1 '\bByteSize\s*\(|SerializeToString\s*\(' modelopt/onnx/utils.py modelopt/onnx/trt_utils.py

Repository: NVIDIA/Model-Optimizer

Length of output: 1773


🏁 Script executed:

#!/bin/bash
# Search for ByteSize() and SerializeToString() calls in the specified files
rg -n 'ByteSize\s*\(|SerializeToString\s*\(' modelopt/onnx/utils.py modelopt/onnx/trt_utils.py

Repository: NVIDIA/Model-Optimizer

Length of output: 1858


Apply this large-model fallback consistently across the remaining probes.

This fixes save_onnx(), but the same raw size/serialization checks remain unguarded in check_model() (Line 561), infer_shapes() (Line 1162), and modelopt/onnx/trt_utils.py::get_custom_layers() when passed a ModelProto (Line 129). On models >2 GiB, those paths can still fail before switching to alternative handling. Factor this guarded large-model decision into a shared helper function and apply it consistently across all three locations.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@modelopt/onnx/utils.py` around lines 645 - 656, Create a shared helper (e.g.
determine_use_external_data or should_use_external_data) that accepts a
ModelProto, size_threshold, and logger, attempts model.SerializeToString()
inside a try/except, falls back to True on exception, otherwise checks
len(serialized) > size_threshold and returns the boolean; then replace the raw
serialize/size logic in save_onnx(), check_model(), infer_shapes(), and
modelopt/onnx/trt_utils.py::get_custom_layers() (when receiving a ModelProto) to
call this helper to decide save_as_external_data so all large-model probes
consistently use the guarded fallback.


# Set ir_version to 10, remove it once ORT supports ir_version 11
model.ir_version = 10
Expand Down Expand Up @@ -1162,7 +1171,19 @@ def infer_types_verification(model: onnx.ModelProto) -> onnx.ModelProto:

def infer_shapes(model: onnx.ModelProto, **kwargs):
"""Infers shapes of the onnx graph, handles large models."""
if model.ByteSize() > (2 * (1024**3)): # 2GB limit
save_as_external_data = False
try:
model_size = model.ByteSize()
except Exception as e:
logger.warning(
"Failed to compute model size with ByteSize (%s). Using external data path.", e
)
save_as_external_data = True
else:
if model_size <= 0 or model_size > (2 * (1024**3)):
save_as_external_data = True

if save_as_external_data:
with tempfile.TemporaryDirectory() as temp_dir:
# ONNX also looks in CWD, so we need to use a unique id
unique_id = str(uuid.uuid4())[:8]
Expand Down
Loading