Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions centml/sdk/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
CreateComputeDeploymentRequest,
CreateCServeV3DeploymentRequest,
CreateJobDeploymentRequest,
CreateHardwareInstanceRequest,
ApiException,
InviteUserRequest,
Metric,
Expand Down Expand Up @@ -121,6 +122,12 @@ def get_hardware_instances(self, cluster_id=None):
cluster_id=cluster_id if cluster_id else None
).results

def create_hardware_instance(self, request: CreateHardwareInstanceRequest):
return self._api.create_hardware_instance_hardware_instances_post(request)

def delete_hardware_instance(self, hardware_instance_id: int):
return self._api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete(hardware_instance_id)

def get_prebuilt_images(self, depl_type: DeploymentType):
return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type)

Expand Down
68 changes: 68 additions & 0 deletions examples/sdk/manage_hardware_instances.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""
Example showing how to manage hardware instances with the CentML SDK.

Covers listing, creating and deleting hardware instances. Running this script
lists the hardware instances you have access to; the create/delete helpers show
the call pattern and are not invoked automatically.

This uses the centml CLI authentication, so make sure you are logged in to the
centml CLI before running it. Creating and deleting hardware instances requires
admin privileges (PERM_ADMIN_MANAGE_HARDWARE) on your CentML organization.
"""

from centml.sdk import CreateHardwareInstanceRequest
from centml.sdk.api import get_centml_client


def list_hardware_instances():
"""List hardware instances, showing the cluster they belong to by name."""
with get_centml_client() as client:
clusters = {c.id: c for c in client.get_clusters().results}
instances = client.get_hardware_instances()

if not instances:
print("No hardware instances found.")
return

print(f"\nFound {len(instances)} hardware instance(s)\n")
for hw in sorted(instances, key=lambda x: x.id):
cluster = clusters.get(hw.cluster_id)
cluster_name = cluster.display_name if cluster else f"cluster {hw.cluster_id}"
print(f"Name: {hw.name}")
print(f"Cluster: {cluster_name}")
print(f"GPU Type: {hw.gpu_type}")
print(f"Num GPUs: {hw.num_gpu}")
print(f"CPU: {hw.cpu}")
print(f"Memory: {hw.memory}")
print("-" * 40)


def create_hardware_instance():
"""Create a hardware instance (requires admin privileges)."""
request = CreateHardwareInstanceRequest(
cluster_id=1,
name="h100-8x",
gpu_type="H100",
num_gpu=8,
cpu=64000,
memory=128000,
accelerator_resource_key="nvidia.com/gpu",
node_affinity_labels={"gpu": "h100"},
accelerator_memory=80000,
)
with get_centml_client() as client:
instance = client.create_hardware_instance(request)
print(f"Created hardware instance '{instance.name}' with ID {instance.id}")
return instance.id


def delete_hardware_instance(hardware_instance_id):
"""Delete a hardware instance by ID (requires admin privileges)."""
with get_centml_client() as client:
client.delete_hardware_instance(hardware_instance_id)
print(f"Deleted hardware instance {hardware_instance_id}")


if __name__ == "__main__":
list_hardware_instances()
49 changes: 48 additions & 1 deletion tests/test_sdk_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from types import SimpleNamespace
from unittest.mock import MagicMock

from platform_api_python_client import CreateJobDeploymentRequest
from platform_api_python_client import CreateJobDeploymentRequest, CreateHardwareInstanceRequest

from centml.sdk import ApiException
from centml.sdk.api import CentMLClient
Expand Down Expand Up @@ -72,3 +72,50 @@ def test_create_job_delegates_to_platform_client():

assert response is expected_response
api.create_job_deployment_deployments_job_post.assert_called_once_with(request)


def test_get_hardware_instances_returns_results():
api = MagicMock()
expected_results = [SimpleNamespace(id=1), SimpleNamespace(id=2)]
api.get_hardware_instances_hardware_instances_get.return_value = SimpleNamespace(results=expected_results)
client = CentMLClient(api)

response = client.get_hardware_instances(cluster_id=5)

assert response is expected_results
api.get_hardware_instances_hardware_instances_get.assert_called_once_with(cluster_id=5)


def test_create_hardware_instance_delegates_to_platform_client():
api = MagicMock()
expected_response = MagicMock()
api.create_hardware_instance_hardware_instances_post.return_value = expected_response
request = CreateHardwareInstanceRequest(
cluster_id=1,
name="h100-test",
gpu_type="H100",
num_gpu=8,
cpu=64000,
memory=128000,
accelerator_resource_key="nvidia.com/gpu",
node_affinity_labels={"gpu": "h100"},
accelerator_memory=80000,
)
client = CentMLClient(api)

response = client.create_hardware_instance(request)

assert response is expected_response
api.create_hardware_instance_hardware_instances_post.assert_called_once_with(request)


def test_delete_hardware_instance_delegates_to_platform_client():
api = MagicMock()
expected_response = MagicMock()
api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete.return_value = expected_response
client = CentMLClient(api)

response = client.delete_hardware_instance(123)

assert response is expected_response
api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete.assert_called_once_with(123)
Loading