diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 0bff87d..4fe0ecc 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -8,6 +8,7 @@ CreateComputeDeploymentRequest, CreateCServeV3DeploymentRequest, CreateJobDeploymentRequest, + CreateHardwareInstanceRequest, ApiException, InviteUserRequest, Metric, @@ -121,6 +122,12 @@ def get_hardware_instances(self, cluster_id=None): cluster_id=cluster_id if cluster_id else None ).results + def create_hardware_instance(self, request: CreateHardwareInstanceRequest): + return self._api.create_hardware_instance_hardware_instances_post(request) + + def delete_hardware_instance(self, hardware_instance_id: int): + return self._api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete(hardware_instance_id) + def get_prebuilt_images(self, depl_type: DeploymentType): return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) diff --git a/examples/sdk/manage_hardware_instances.py b/examples/sdk/manage_hardware_instances.py new file mode 100644 index 0000000..3ba57ff --- /dev/null +++ b/examples/sdk/manage_hardware_instances.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +""" +Example showing how to manage hardware instances with the CentML SDK. + +Covers listing, creating and deleting hardware instances. Running this script +lists the hardware instances you have access to; the create/delete helpers show +the call pattern and are not invoked automatically. + +This uses the centml CLI authentication, so make sure you are logged in to the +centml CLI before running it. Creating and deleting hardware instances requires +admin privileges (PERM_ADMIN_MANAGE_HARDWARE) on your CentML organization. +""" + +from centml.sdk import CreateHardwareInstanceRequest +from centml.sdk.api import get_centml_client + + +def list_hardware_instances(): + """List hardware instances, showing the cluster they belong to by name.""" + with get_centml_client() as client: + clusters = {c.id: c for c in client.get_clusters().results} + instances = client.get_hardware_instances() + + if not instances: + print("No hardware instances found.") + return + + print(f"\nFound {len(instances)} hardware instance(s)\n") + for hw in sorted(instances, key=lambda x: x.id): + cluster = clusters.get(hw.cluster_id) + cluster_name = cluster.display_name if cluster else f"cluster {hw.cluster_id}" + print(f"Name: {hw.name}") + print(f"Cluster: {cluster_name}") + print(f"GPU Type: {hw.gpu_type}") + print(f"Num GPUs: {hw.num_gpu}") + print(f"CPU: {hw.cpu}") + print(f"Memory: {hw.memory}") + print("-" * 40) + + +def create_hardware_instance(): + """Create a hardware instance (requires admin privileges).""" + request = CreateHardwareInstanceRequest( + cluster_id=1, + name="h100-8x", + gpu_type="H100", + num_gpu=8, + cpu=64000, + memory=128000, + accelerator_resource_key="nvidia.com/gpu", + node_affinity_labels={"gpu": "h100"}, + accelerator_memory=80000, + ) + with get_centml_client() as client: + instance = client.create_hardware_instance(request) + print(f"Created hardware instance '{instance.name}' with ID {instance.id}") + return instance.id + + +def delete_hardware_instance(hardware_instance_id): + """Delete a hardware instance by ID (requires admin privileges).""" + with get_centml_client() as client: + client.delete_hardware_instance(hardware_instance_id) + print(f"Deleted hardware instance {hardware_instance_id}") + + +if __name__ == "__main__": + list_hardware_instances() diff --git a/tests/test_sdk_api.py b/tests/test_sdk_api.py index 42933b1..97fcda3 100644 --- a/tests/test_sdk_api.py +++ b/tests/test_sdk_api.py @@ -1,7 +1,7 @@ from types import SimpleNamespace from unittest.mock import MagicMock -from platform_api_python_client import CreateJobDeploymentRequest +from platform_api_python_client import CreateJobDeploymentRequest, CreateHardwareInstanceRequest from centml.sdk import ApiException from centml.sdk.api import CentMLClient @@ -72,3 +72,50 @@ def test_create_job_delegates_to_platform_client(): assert response is expected_response api.create_job_deployment_deployments_job_post.assert_called_once_with(request) + + +def test_get_hardware_instances_returns_results(): + api = MagicMock() + expected_results = [SimpleNamespace(id=1), SimpleNamespace(id=2)] + api.get_hardware_instances_hardware_instances_get.return_value = SimpleNamespace(results=expected_results) + client = CentMLClient(api) + + response = client.get_hardware_instances(cluster_id=5) + + assert response is expected_results + api.get_hardware_instances_hardware_instances_get.assert_called_once_with(cluster_id=5) + + +def test_create_hardware_instance_delegates_to_platform_client(): + api = MagicMock() + expected_response = MagicMock() + api.create_hardware_instance_hardware_instances_post.return_value = expected_response + request = CreateHardwareInstanceRequest( + cluster_id=1, + name="h100-test", + gpu_type="H100", + num_gpu=8, + cpu=64000, + memory=128000, + accelerator_resource_key="nvidia.com/gpu", + node_affinity_labels={"gpu": "h100"}, + accelerator_memory=80000, + ) + client = CentMLClient(api) + + response = client.create_hardware_instance(request) + + assert response is expected_response + api.create_hardware_instance_hardware_instances_post.assert_called_once_with(request) + + +def test_delete_hardware_instance_delegates_to_platform_client(): + api = MagicMock() + expected_response = MagicMock() + api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete.return_value = expected_response + client = CentMLClient(api) + + response = client.delete_hardware_instance(123) + + assert response is expected_response + api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete.assert_called_once_with(123)