From 6c36986edc23b569713864cc21738fd11efc86d3 Mon Sep 17 00:00:00 2001 From: Michael Choi Date: Fri, 23 Jan 2026 09:54:13 -0800 Subject: [PATCH] Add new parameters to LaunchClient methods - Add concurrent_requests_per_worker to create_model_endpoint and edit_model_endpoint - Add forwarder_type, routes, extra_routes, worker_command, worker_env to create_model_bundle_from_runnable_image_v2 Co-Authored-By: Claude Opus 4.5 --- launch/client.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/launch/client.py b/launch/client.py index 89a686a9..db96c54e 100644 --- a/launch/client.py +++ b/launch/client.py @@ -679,6 +679,11 @@ def create_model_bundle_from_runnable_image_v2( env: Dict[str, str], readiness_initial_delay_seconds: int, metadata: Optional[Dict[str, Any]] = None, + forwarder_type: Optional[str] = None, + routes: Optional[List[str]] = None, + extra_routes: Optional[List[str]] = None, + worker_command: Optional[List[str]] = None, + worker_env: Optional[Dict[str, str]] = None, ) -> CreateModelBundleV2Response: """ Create a model bundle from a runnable image. The specified ``command`` must start a process @@ -711,6 +716,16 @@ def create_model_bundle_from_runnable_image_v2( metadata: Metadata to record with the bundle. + forwarder_type: The type of forwarder to use for the bundle. + + routes: A list of routes that the bundle will serve. + + extra_routes: A list of additional routes that the bundle will serve. + + worker_command: The command to start worker processes. + + worker_env: A dictionary of environment variables for worker processes. + Returns: An object containing the following keys: @@ -728,6 +743,11 @@ def create_model_bundle_from_runnable_image_v2( env=env, protocol="http", readiness_initial_delay_seconds=readiness_initial_delay_seconds, + forwarder_type=forwarder_type, + routes=routes, + extra_routes=extra_routes, + worker_command=worker_command, + worker_env=worker_env, ) ) create_model_bundle_request = CreateModelBundleV2Request( @@ -1400,6 +1420,7 @@ def create_model_endpoint( min_workers: int = 1, max_workers: int = 1, per_worker: int = 10, + concurrent_requests_per_worker: Optional[int] = None, gpu_type: Optional[str] = None, endpoint_type: str = "sync", high_priority: Optional[bool] = False, @@ -1464,6 +1485,9 @@ def create_model_endpoint( concurrent requests in the workload. Divide this number by ``max_workers``. Doing this ensures that the number of workers will "climb" to ``max_workers``. + concurrent_requests_per_worker: The maximum number of concurrent requests that each + worker can handle. If not specified, the server will use a default value. + gpu_type: If specifying a non-zero number of gpus, this controls the type of gpu requested. Here are the supported values: @@ -1530,6 +1554,7 @@ def create_model_endpoint( min_workers=min_workers, max_workers=max_workers, per_worker=per_worker, + concurrent_requests_per_worker=concurrent_requests_per_worker, gpu_type=gpu_type, high_priority=high_priority, default_callback_url=default_callback_url, @@ -1584,6 +1609,7 @@ def create_model_endpoint( model_bundle_id=model_bundle.id, name=endpoint_name, per_worker=per_worker, + concurrent_requests_per_worker=concurrent_requests_per_worker, high_priority=high_priority, post_inference_hooks=post_inference_hooks_strs, default_callback_url=default_callback_url, @@ -1621,6 +1647,7 @@ def edit_model_endpoint( min_workers: Optional[int] = None, max_workers: Optional[int] = None, per_worker: Optional[int] = None, + concurrent_requests_per_worker: Optional[int] = None, gpu_type: Optional[str] = None, high_priority: Optional[bool] = None, post_inference_hooks: Optional[List[PostInferenceHooks]] = None, @@ -1671,6 +1698,9 @@ def edit_model_endpoint( ``per_worker``, then the number of workers will be increased to meet the elevated traffic. + concurrent_requests_per_worker: The maximum number of concurrent requests that each + worker can handle. If not specified, the server will use a default value. + gpu_type: If specifying a non-zero number of gpus, this controls the type of gpu requested. Here are the supported values: @@ -1764,6 +1794,7 @@ def edit_model_endpoint( min_workers=min_workers, model_bundle_id=model_bundle_id, per_worker=per_worker, + concurrent_requests_per_worker=concurrent_requests_per_worker, high_priority=high_priority, post_inference_hooks=post_inference_hooks_strs, default_callback_url=default_callback_url,