From 87f6f4de1afdb12f98028b2cef85a67c8d823b1d Mon Sep 17 00:00:00 2001 From: Yazan Date: Mon, 15 Jun 2026 10:46:58 -0700 Subject: [PATCH 1/4] add api to sdk --- centml/sdk/api.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/centml/sdk/api.py b/centml/sdk/api.py index 0bff87d..4fe0ecc 100644 --- a/centml/sdk/api.py +++ b/centml/sdk/api.py @@ -8,6 +8,7 @@ CreateComputeDeploymentRequest, CreateCServeV3DeploymentRequest, CreateJobDeploymentRequest, + CreateHardwareInstanceRequest, ApiException, InviteUserRequest, Metric, @@ -121,6 +122,12 @@ def get_hardware_instances(self, cluster_id=None): cluster_id=cluster_id if cluster_id else None ).results + def create_hardware_instance(self, request: CreateHardwareInstanceRequest): + return self._api.create_hardware_instance_hardware_instances_post(request) + + def delete_hardware_instance(self, hardware_instance_id: int): + return self._api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete(hardware_instance_id) + def get_prebuilt_images(self, depl_type: DeploymentType): return self._api.get_prebuilt_images_prebuilt_images_get(type=depl_type) From 70abab6ff255edc184344fc71b95096b17203d24 Mon Sep 17 00:00:00 2001 From: Yazan Date: Mon, 15 Jun 2026 10:47:16 -0700 Subject: [PATCH 2/4] tests --- tests/test_sdk_api.py | 49 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/test_sdk_api.py b/tests/test_sdk_api.py index 42933b1..97fcda3 100644 --- a/tests/test_sdk_api.py +++ b/tests/test_sdk_api.py @@ -1,7 +1,7 @@ from types import SimpleNamespace from unittest.mock import MagicMock -from platform_api_python_client import CreateJobDeploymentRequest +from platform_api_python_client import CreateJobDeploymentRequest, CreateHardwareInstanceRequest from centml.sdk import ApiException from centml.sdk.api import CentMLClient @@ -72,3 +72,50 @@ def test_create_job_delegates_to_platform_client(): assert response is expected_response api.create_job_deployment_deployments_job_post.assert_called_once_with(request) + + +def test_get_hardware_instances_returns_results(): + api = MagicMock() + expected_results = [SimpleNamespace(id=1), SimpleNamespace(id=2)] + api.get_hardware_instances_hardware_instances_get.return_value = SimpleNamespace(results=expected_results) + client = CentMLClient(api) + + response = client.get_hardware_instances(cluster_id=5) + + assert response is expected_results + api.get_hardware_instances_hardware_instances_get.assert_called_once_with(cluster_id=5) + + +def test_create_hardware_instance_delegates_to_platform_client(): + api = MagicMock() + expected_response = MagicMock() + api.create_hardware_instance_hardware_instances_post.return_value = expected_response + request = CreateHardwareInstanceRequest( + cluster_id=1, + name="h100-test", + gpu_type="H100", + num_gpu=8, + cpu=64000, + memory=128000, + accelerator_resource_key="nvidia.com/gpu", + node_affinity_labels={"gpu": "h100"}, + accelerator_memory=80000, + ) + client = CentMLClient(api) + + response = client.create_hardware_instance(request) + + assert response is expected_response + api.create_hardware_instance_hardware_instances_post.assert_called_once_with(request) + + +def test_delete_hardware_instance_delegates_to_platform_client(): + api = MagicMock() + expected_response = MagicMock() + api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete.return_value = expected_response + client = CentMLClient(api) + + response = client.delete_hardware_instance(123) + + assert response is expected_response + api.delete_hardware_instance_hardware_instances_hardware_instance_id_delete.assert_called_once_with(123) From ec6d2819ed2a55e33d99e7af80883cb3422f2fd2 Mon Sep 17 00:00:00 2001 From: Yazan Date: Mon, 15 Jun 2026 10:47:52 -0700 Subject: [PATCH 3/4] examples --- examples/sdk/manage_hardware_instances.py | 124 ++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 examples/sdk/manage_hardware_instances.py diff --git a/examples/sdk/manage_hardware_instances.py b/examples/sdk/manage_hardware_instances.py new file mode 100644 index 0000000..a898ade --- /dev/null +++ b/examples/sdk/manage_hardware_instances.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +""" +Script to manage hardware instances via the CentML SDK. + +Demonstrates the hardware instance lifecycle: +- Listing hardware instances (optionally filtered by cluster) +- Creating a new hardware instance +- Deleting a hardware instance + +Note: creating and deleting hardware instances require admin privileges +(PERM_ADMIN_MANAGE_HARDWARE) on your CentML organization. +""" + +import click + +from centml.sdk import CreateHardwareInstanceRequest +from centml.sdk.api import get_centml_client + + +def display_hardware_instances(instances): + """Display hardware instance information in a formatted list.""" + if not instances: + click.echo("No hardware instances found.") + return + + click.echo(f"\nFound {len(instances)} hardware instance(s)\n") + + for hw in sorted(instances, key=lambda x: x.id): + click.echo(f"ID: {hw.id}") + click.echo(f"Name: {hw.name}") + click.echo(f"Cluster ID: {hw.cluster_id}") + click.echo(f"GPU Type: {hw.gpu_type}") + click.echo(f"Num GPUs: {hw.num_gpu}") + click.echo(f"CPU: {hw.cpu}") + click.echo(f"Memory: {hw.memory}") + click.echo(f"Cost / hr: {hw.cost_per_hr}") + click.echo("-" * 40) + + +@click.group() +def cli(): + """Manage hardware instances. + + These commands use the centml CLI authentication, so make sure you are + logged in to the centml CLI before running this script. + """ + + +@cli.command(name="list") +@click.option("--cluster-id", type=int, default=None, help="Filter to a specific cluster") +def list_instances(cluster_id): + """List hardware instances, optionally filtered by cluster. + + \b + Examples: + python manage_hardware_instances.py list + python manage_hardware_instances.py list --cluster-id 1 + """ + with get_centml_client() as client: + instances = client.get_hardware_instances(cluster_id) + display_hardware_instances(instances) + + +@cli.command() +@click.option("--cluster-id", type=int, required=True, help="Cluster the hardware belongs to") +@click.option("--name", required=True, help="Display name for the hardware instance") +@click.option("--gpu-type", required=True, help="GPU type identifier (e.g. H100, A100)") +@click.option("--num-gpu", type=int, required=True, help="Number of GPUs") +@click.option("--cpu", type=int, required=True, help="CPU in millicores") +@click.option("--memory", type=int, required=True, help="Memory in MB") +@click.option("--accelerator-resource-key", required=True, help="Kubernetes accelerator resource key") +@click.option("--accelerator-memory", type=int, required=True, help="Accelerator memory in MB") +@click.option( + "--node-affinity-label", + "node_affinity_labels", + type=(str, str), + multiple=True, + help="Node affinity label as KEY VALUE (repeatable)", +) +def create( + cluster_id, name, gpu_type, num_gpu, cpu, memory, accelerator_resource_key, accelerator_memory, node_affinity_labels +): + """Create a new hardware instance (requires admin privileges). + + \b + Examples: + python manage_hardware_instances.py create \\ + --cluster-id 1 --name h100-8x --gpu-type H100 --num-gpu 8 \\ + --cpu 64000 --memory 128000 \\ + --accelerator-resource-key nvidia.com/gpu --accelerator-memory 80000 \\ + --node-affinity-label gpu h100 + """ + request = CreateHardwareInstanceRequest( + cluster_id=cluster_id, + name=name, + gpu_type=gpu_type, + num_gpu=num_gpu, + cpu=cpu, + memory=memory, + accelerator_resource_key=accelerator_resource_key, + node_affinity_labels=dict(node_affinity_labels), + accelerator_memory=accelerator_memory, + ) + with get_centml_client() as client: + instance = client.create_hardware_instance(request) + click.echo(f"Created hardware instance '{instance.name}' with ID {instance.id}") + + +@cli.command() +@click.argument("hardware_instance_id", type=int) +def delete(hardware_instance_id): + """Delete a hardware instance by ID (requires admin privileges). + + \b + Examples: + python manage_hardware_instances.py delete 123 + """ + with get_centml_client() as client: + client.delete_hardware_instance(hardware_instance_id) + click.echo(f"Deleted hardware instance {hardware_instance_id}") + + +if __name__ == "__main__": + cli() From fe27fd513932b211ea39dbc469158b0ac97a76d4 Mon Sep 17 00:00:00 2001 From: Yazan Date: Mon, 15 Jun 2026 11:50:10 -0700 Subject: [PATCH 4/4] hardware instance example per review fix --- examples/sdk/manage_hardware_instances.py | 142 +++++++--------------- 1 file changed, 43 insertions(+), 99 deletions(-) diff --git a/examples/sdk/manage_hardware_instances.py b/examples/sdk/manage_hardware_instances.py index a898ade..3ba57ff 100644 --- a/examples/sdk/manage_hardware_instances.py +++ b/examples/sdk/manage_hardware_instances.py @@ -1,124 +1,68 @@ #!/usr/bin/env python3 """ -Script to manage hardware instances via the CentML SDK. +Example showing how to manage hardware instances with the CentML SDK. -Demonstrates the hardware instance lifecycle: -- Listing hardware instances (optionally filtered by cluster) -- Creating a new hardware instance -- Deleting a hardware instance +Covers listing, creating and deleting hardware instances. Running this script +lists the hardware instances you have access to; the create/delete helpers show +the call pattern and are not invoked automatically. -Note: creating and deleting hardware instances require admin privileges -(PERM_ADMIN_MANAGE_HARDWARE) on your CentML organization. +This uses the centml CLI authentication, so make sure you are logged in to the +centml CLI before running it. Creating and deleting hardware instances requires +admin privileges (PERM_ADMIN_MANAGE_HARDWARE) on your CentML organization. """ -import click - from centml.sdk import CreateHardwareInstanceRequest from centml.sdk.api import get_centml_client -def display_hardware_instances(instances): - """Display hardware instance information in a formatted list.""" +def list_hardware_instances(): + """List hardware instances, showing the cluster they belong to by name.""" + with get_centml_client() as client: + clusters = {c.id: c for c in client.get_clusters().results} + instances = client.get_hardware_instances() + if not instances: - click.echo("No hardware instances found.") + print("No hardware instances found.") return - click.echo(f"\nFound {len(instances)} hardware instance(s)\n") - + print(f"\nFound {len(instances)} hardware instance(s)\n") for hw in sorted(instances, key=lambda x: x.id): - click.echo(f"ID: {hw.id}") - click.echo(f"Name: {hw.name}") - click.echo(f"Cluster ID: {hw.cluster_id}") - click.echo(f"GPU Type: {hw.gpu_type}") - click.echo(f"Num GPUs: {hw.num_gpu}") - click.echo(f"CPU: {hw.cpu}") - click.echo(f"Memory: {hw.memory}") - click.echo(f"Cost / hr: {hw.cost_per_hr}") - click.echo("-" * 40) - - -@click.group() -def cli(): - """Manage hardware instances. - - These commands use the centml CLI authentication, so make sure you are - logged in to the centml CLI before running this script. - """ - - -@cli.command(name="list") -@click.option("--cluster-id", type=int, default=None, help="Filter to a specific cluster") -def list_instances(cluster_id): - """List hardware instances, optionally filtered by cluster. - - \b - Examples: - python manage_hardware_instances.py list - python manage_hardware_instances.py list --cluster-id 1 - """ - with get_centml_client() as client: - instances = client.get_hardware_instances(cluster_id) - display_hardware_instances(instances) - - -@cli.command() -@click.option("--cluster-id", type=int, required=True, help="Cluster the hardware belongs to") -@click.option("--name", required=True, help="Display name for the hardware instance") -@click.option("--gpu-type", required=True, help="GPU type identifier (e.g. H100, A100)") -@click.option("--num-gpu", type=int, required=True, help="Number of GPUs") -@click.option("--cpu", type=int, required=True, help="CPU in millicores") -@click.option("--memory", type=int, required=True, help="Memory in MB") -@click.option("--accelerator-resource-key", required=True, help="Kubernetes accelerator resource key") -@click.option("--accelerator-memory", type=int, required=True, help="Accelerator memory in MB") -@click.option( - "--node-affinity-label", - "node_affinity_labels", - type=(str, str), - multiple=True, - help="Node affinity label as KEY VALUE (repeatable)", -) -def create( - cluster_id, name, gpu_type, num_gpu, cpu, memory, accelerator_resource_key, accelerator_memory, node_affinity_labels -): - """Create a new hardware instance (requires admin privileges). - - \b - Examples: - python manage_hardware_instances.py create \\ - --cluster-id 1 --name h100-8x --gpu-type H100 --num-gpu 8 \\ - --cpu 64000 --memory 128000 \\ - --accelerator-resource-key nvidia.com/gpu --accelerator-memory 80000 \\ - --node-affinity-label gpu h100 - """ + cluster = clusters.get(hw.cluster_id) + cluster_name = cluster.display_name if cluster else f"cluster {hw.cluster_id}" + print(f"Name: {hw.name}") + print(f"Cluster: {cluster_name}") + print(f"GPU Type: {hw.gpu_type}") + print(f"Num GPUs: {hw.num_gpu}") + print(f"CPU: {hw.cpu}") + print(f"Memory: {hw.memory}") + print("-" * 40) + + +def create_hardware_instance(): + """Create a hardware instance (requires admin privileges).""" request = CreateHardwareInstanceRequest( - cluster_id=cluster_id, - name=name, - gpu_type=gpu_type, - num_gpu=num_gpu, - cpu=cpu, - memory=memory, - accelerator_resource_key=accelerator_resource_key, - node_affinity_labels=dict(node_affinity_labels), - accelerator_memory=accelerator_memory, + cluster_id=1, + name="h100-8x", + gpu_type="H100", + num_gpu=8, + cpu=64000, + memory=128000, + accelerator_resource_key="nvidia.com/gpu", + node_affinity_labels={"gpu": "h100"}, + accelerator_memory=80000, ) with get_centml_client() as client: instance = client.create_hardware_instance(request) - click.echo(f"Created hardware instance '{instance.name}' with ID {instance.id}") - + print(f"Created hardware instance '{instance.name}' with ID {instance.id}") + return instance.id -@cli.command() -@click.argument("hardware_instance_id", type=int) -def delete(hardware_instance_id): - """Delete a hardware instance by ID (requires admin privileges). - \b - Examples: - python manage_hardware_instances.py delete 123 - """ +def delete_hardware_instance(hardware_instance_id): + """Delete a hardware instance by ID (requires admin privileges).""" with get_centml_client() as client: client.delete_hardware_instance(hardware_instance_id) - click.echo(f"Deleted hardware instance {hardware_instance_id}") + print(f"Deleted hardware instance {hardware_instance_id}") if __name__ == "__main__": - cli() + list_hardware_instances()