From 49d12c63c73ca7d1a2c89388678e166ea322e306 Mon Sep 17 00:00:00 2001 From: Jose Angel Morena Date: Wed, 20 May 2026 00:54:03 +0200 Subject: [PATCH] feat(profile): add --operator-channel and --catalog-source flags Allow overriding the OLM subscription channel and catalog source per operator when creating SNC clusters. This lets QE teams test specific operator versions or custom index images without modifying profile code. Validation runs before any infrastructure is provisioned to fail fast on invalid inputs. --- cmd/mapt/cmd/aws/services/snc.go | 11 +++- docs/aws/openshift-snc.md | 50 ++++++++++++++++ pkg/provider/aws/action/snc/snc.go | 27 ++++++--- pkg/target/service/snc/api.go | 4 +- pkg/target/service/snc/profile/operator.go | 10 ++++ pkg/target/service/snc/profile/profile.go | 68 ++++++++++++++++++++++ 6 files changed, 159 insertions(+), 11 deletions(-) diff --git a/cmd/mapt/cmd/aws/services/snc.go b/cmd/mapt/cmd/aws/services/snc.go index 0e1d07665..8019e1a7b 100644 --- a/cmd/mapt/cmd/aws/services/snc.go +++ b/cmd/mapt/cmd/aws/services/snc.go @@ -27,6 +27,11 @@ const ( sncProfile = "profile" sncProfileDesc = "comma separated list of profiles to apply on the SNC cluster. Profiles available: virtualization, serverless-serving, serverless-eventing, serverless, servicemesh, ai, nvidia. The ai profile automatically includes servicemesh and serverless-serving as prerequisites and raises the minimum instance size to 16 vCPUs. The nvidia profile installs NFD and the NVIDIA GPU Operator" + + operatorChannel = "operator-channel" + operatorChannelDesc = "override the OLM subscription channel for an operator (--operator-channel serverless-operator=preview,nfd=4.17)" + catalogSource = "catalog-source" + catalogSourceDesc = "override the OLM catalog source with a custom index image (--catalog-source serverless-operator=quay.io/my-org/my-index:latest)" ) func GetOpenshiftSNCCmd() *cobra.Command { @@ -92,7 +97,9 @@ func createSNC() *cobra.Command { PullSecretFile: viper.GetString(pullSecretFile), Timeout: viper.GetString(params.Timeout), ServiceEndpoints: params.NetworkServiceEndpoints(), - Profiles: profiles}); err != nil { + Profiles: profiles, + OperatorChannels: viper.GetStringMapString(operatorChannel), + CatalogSources: viper.GetStringMapString(catalogSource)}); err != nil { return err } return nil @@ -107,6 +114,8 @@ func createSNC() *cobra.Command { flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc) flagSet.StringToStringP(params.Tags, "", nil, params.TagsDesc) flagSet.StringSliceP(sncProfile, "", []string{}, sncProfileDesc) + flagSet.StringToStringP(operatorChannel, "", nil, operatorChannelDesc) + flagSet.StringToStringP(catalogSource, "", nil, catalogSourceDesc) params.AddComputeRequestFlags(flagSet) params.AddSpotFlags(flagSet) params.AddNetworkFlags(flagSet, awsParams.ServiceEndpointsDesc) diff --git a/docs/aws/openshift-snc.md b/docs/aws/openshift-snc.md index 3528bd9d5..9c68967c0 100644 --- a/docs/aws/openshift-snc.md +++ b/docs/aws/openshift-snc.md @@ -77,6 +77,56 @@ Multiple profiles can be specified as a comma-separated list (e.g., `--profile v | `nvidia` | Installs the [NVIDIA GPU Operator](https://docs.nvidia.com/datacenter/cloud-native/openshift/latest/install-gpu-ocp.html) on the cluster. Automatically installs [Node Feature Discovery](https://docs.redhat.com/en/documentation/openshift_container_platform/latest/html/specialized_hardware_and_driver_enablement/psap-node-feature-discovery-operator) (NFD) as a prerequisite and creates a ClusterPolicy with the recommended OpenShift defaults (CRI-O runtime, OCP driver toolkit). The cluster must run on a GPU-capable instance type (e.g. `g4dn`, `g5`, `p4d`).| +### Operator overrides + +Profiles install operators using the default OLM channel (`stable`) and catalog (`redhat-operators`). Two flags allow overriding these per operator, which is useful for testing pre-release operator builds: + +#### `--operator-channel` + +Override the OLM subscription channel for a specific operator: + +```bash +mapt aws openshift-snc create \ + --profile serverless-serving \ + --operator-channel serverless-operator=candidate +``` + +Multiple operators can be overridden at once: + +```bash +--operator-channel serverless-operator=preview,nfd=4.17 +``` + +#### `--catalog-source` + +Use a custom index image instead of the default catalog. This creates a `CatalogSource` CR in `openshift-marketplace` and points the operator's subscription to it: + +```bash +mapt aws openshift-snc create \ + --profile nvidia \ + --catalog-source gpu-operator-certified=quay.io/my-team/gpu-operator-index:test-v1.0 +``` + +Both flags can be combined: + +```bash +mapt aws openshift-snc create \ + --profile ai \ + --operator-channel serverless-operator=candidate \ + --catalog-source rhods-operator=quay.io/my-team/rhoai-index:nightly +``` + +When neither flag is provided, operators use the defaults: channel `stable` and catalog `redhat-operators` (unless overridden in the profile definition, e.g. `gpu-operator-certified` and `nfd` use `certified-operators`). + +The keys are operator package names as they appear in OLM. The operators installed by each profile are: + +| Profile | Operator package names | +|---------|----------------------| +| `serverless-serving` / `serverless-eventing` / `serverless` | `serverless-operator` | +| `servicemesh` | `servicemeshoperator3` | +| `ai` | `rhods-operator`, `servicemeshoperator`, `authorino-operator`, `serverless-operator` | +| `nvidia` | `gpu-operator-certified`, `nfd` | + ### Adding new profiles To add a new profile: diff --git a/pkg/provider/aws/action/snc/snc.go b/pkg/provider/aws/action/snc/snc.go index df643c51e..e702344f7 100644 --- a/pkg/provider/aws/action/snc/snc.go +++ b/pkg/provider/aws/action/snc/snc.go @@ -45,8 +45,10 @@ type openshiftSNCRequest struct { pullSecretFile *string serviceEndpoints []string allocationData *allocation.AllocationResult - profiles []string - diskSize *int + profiles []string + operatorChannels map[string]string + catalogSources map[string]string + diskSize *int } func (r *openshiftSNCRequest) validate() error { @@ -67,10 +69,13 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiSNC.SNCArgs) (_ *apiSNC.SNCResult if err != nil { return nil, err } - // Validate profiles + // Validate profiles and operator overrides if err := profile.Validate(args.Profiles); err != nil { return nil, err } + if err := profile.ValidateOperatorOverrides(args.OperatorChannels, args.CatalogSources); err != nil { + return nil, err + } // Compose request prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main") r := openshiftSNCRequest{ @@ -82,8 +87,10 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiSNC.SNCArgs) (_ *apiSNC.SNCResult pullSecretFile: &args.PullSecretFile, timeout: &args.Timeout, serviceEndpoints: args.ServiceEndpoints, - profiles: args.Profiles, - diskSize: args.ComputeRequest.DiskSize} + profiles: args.Profiles, + operatorChannels: args.OperatorChannels, + catalogSources: args.CatalogSources, + diskSize: args.ComputeRequest.DiskSize} if args.Spot != nil { r.spot = args.Spot.Spot } @@ -290,10 +297,12 @@ func (r *openshiftSNCRequest) deploy(ctx *pulumi.Context) error { deletedWith = c.AutoscalingGroup } if err := profile.Deploy(ctx, r.profiles, &profile.DeployArgs{ - K8sProvider: k8sProvider, - Kubeconfig: kubeconfig, - Prefix: *r.prefix, - DeletedWith: deletedWith, + K8sProvider: k8sProvider, + Kubeconfig: kubeconfig, + Prefix: *r.prefix, + DeletedWith: deletedWith, + OperatorChannels: r.operatorChannels, + CatalogSources: r.catalogSources, }); err != nil { return err } diff --git a/pkg/target/service/snc/api.go b/pkg/target/service/snc/api.go index cc1a71325..c3de13b36 100644 --- a/pkg/target/service/snc/api.go +++ b/pkg/target/service/snc/api.go @@ -50,7 +50,9 @@ type SNCArgs struct { Spot *spotTypes.SpotArgs Timeout string ServiceEndpoints []string - Profiles []string + Profiles []string + OperatorChannels map[string]string + CatalogSources map[string]string } type SNCResults struct { diff --git a/pkg/target/service/snc/profile/operator.go b/pkg/target/service/snc/profile/operator.go index f13b85956..58e5979d7 100644 --- a/pkg/target/service/snc/profile/operator.go +++ b/pkg/target/service/snc/profile/operator.go @@ -59,8 +59,18 @@ func installOperator(ctx *pulumi.Context, args *DeployArgs, oi operatorInstall) catalogSource = catalogSourceRedHat } + if override, ok := args.OperatorChannels[oi.packageName]; ok { + channel = override + } + if cs, ok := args.catalogSourceCRs[oi.packageName]; ok { + catalogSource = cs.Name + } + deps := append([]pulumi.Resource{}, args.Deps...) deps = append(deps, oi.extraDeps...) + if cs, ok := args.catalogSourceCRs[oi.packageName]; ok { + deps = append(deps, cs.Resource) + } // If ogName is provided, create a dedicated namespace and OperatorGroup. if oi.ogName != "" { diff --git a/pkg/target/service/snc/profile/profile.go b/pkg/target/service/snc/profile/profile.go index a7f7f59f4..9fab4db06 100644 --- a/pkg/target/service/snc/profile/profile.go +++ b/pkg/target/service/snc/profile/profile.go @@ -1,11 +1,13 @@ package profile import ( + "crypto/sha256" "fmt" "maps" "slices" "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes" + "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/apiextensions" corev1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/core/v1" metav1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/meta/v1" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" @@ -63,6 +65,18 @@ type DeployArgs struct { // so that Pulumi skips deleting them individually during destroy — the // resources disappear when the VM is terminated. DeletedWith pulumi.Resource + // OperatorChannels maps operator packageName to an OLM channel override. + OperatorChannels map[string]string + // CatalogSources maps operator packageName to a custom index image URL. + CatalogSources map[string]string + + // catalogSourceCRs maps packageName to the CatalogSource CR info. + catalogSourceCRs map[string]catalogSourceInfo +} + +type catalogSourceInfo struct { + Name string + Resource pulumi.Resource } // Validate checks that all requested profiles are supported and @@ -88,6 +102,10 @@ func Validate(profiles []string) error { // The AI profile implicitly brings in Service Mesh v2 (Maistra) and // serverless-serving as prerequisites for Kserve. func Deploy(ctx *pulumi.Context, profiles []string, args *DeployArgs) error { + if err := args.ensureCatalogSources(ctx); err != nil { + return err + } + needServing := false needEventing := false needAI := false @@ -194,6 +212,56 @@ func (a *DeployArgs) newNamespace(ctx *pulumi.Context, name string, nsName pulum a.k8sOpts(extra...)...) } +func ValidateOperatorOverrides(channels, catalogs map[string]string) error { + for pkg, ch := range channels { + if pkg == "" || ch == "" { + return fmt.Errorf("invalid --operator-channel: both package name and channel must be non-empty (got %q=%q)", pkg, ch) + } + } + for pkg, img := range catalogs { + if pkg == "" || img == "" { + return fmt.Errorf("invalid --catalog-source: both package name and index image must be non-empty (got %q=%q)", pkg, img) + } + } + return nil +} + +// ensureCatalogSources creates CatalogSource CRs for any custom index images +// specified via --catalog-source, so that operator subscriptions can reference them. +func (a *DeployArgs) ensureCatalogSources(ctx *pulumi.Context) error { + if len(a.CatalogSources) == 0 { + return nil + } + a.catalogSourceCRs = make(map[string]catalogSourceInfo, len(a.CatalogSources)) + for pkg, indexImage := range a.CatalogSources { + hash := fmt.Sprintf("%x", sha256.Sum256([]byte(indexImage)))[:8] + csName := fmt.Sprintf("mapt-cs-%s-%s", pkg, hash) + cs, err := apiextensions.NewCustomResource(ctx, csName, + &apiextensions.CustomResourceArgs{ + ApiVersion: pulumi.String("operators.coreos.com/v1alpha1"), + Kind: pulumi.String("CatalogSource"), + Metadata: &metav1.ObjectMetaArgs{ + Name: pulumi.String(csName), + Namespace: pulumi.String("openshift-marketplace"), + }, + OtherFields: map[string]interface{}{ + "spec": map[string]interface{}{ + "sourceType": "grpc", + "image": indexImage, + "displayName": fmt.Sprintf("MAPT custom catalog for %s", pkg), + "publisher": "MAPT", + }, + }, + }, + a.k8sOpts(pulumi.DependsOn(a.Deps))...) + if err != nil { + return err + } + a.catalogSourceCRs[pkg] = catalogSourceInfo{Name: csName, Resource: cs} + } + return nil +} + // k8sOpts returns the common Pulumi resource options for K8s resources: // the K8s provider and (when set) the DeletedWith option. Extra options // (e.g. DependsOn) can be appended.