Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,30 @@
"source": "openshift:payload:cluster-version-operator",
"lifecycle": "informing",
"environmentSelector": {}
},
{
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator must get the APIServer when the TLS profile manager is created",
"labels": {
"Lifecycle:informing": {}
},
"resources": {
"isolation": {}
},
"source": "openshift:payload:cluster-version-operator",
"lifecycle": "informing",
"environmentSelector": {}
},
{
"name": "[Jira:\"Cluster Version Operator\"] cluster-version-operator should update TLS profile",
"labels": {
"Local": {},
"OTA-1996": {}
},
"resources": {
"isolation": {}
},
"source": "openshift:payload:cluster-version-operator",
"lifecycle": "blocking",
"environmentSelector": {}
}
]
9 changes: 7 additions & 2 deletions pkg/tls/tls.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ type Settings struct {
CipherSuites []uint16
}

const (
APIServerNotAvailableAtStartupLogKeyword = "APIServer resource not available at startup"
SyncedCachedTLSProfileLogKeyword = "Synced cached TLS profile"
)

// NewProfileManager creates a new TLS profile manager and performs initial resolution.
// Falls back to safe defaults on any error to prioritize availability.
func NewProfileManager(apiServerInformer configinformersv1.APIServerInformer, overrides *Settings) (*ProfileManager, error) {
Expand All @@ -39,7 +44,7 @@ func NewProfileManager(apiServerInformer configinformersv1.APIServerInformer, ov

apiServer, err := apiServerInformer.Lister().Get(tlsprofile.APIServerName)
if err != nil {
klog.Warningf("APIServer resource not available at startup: %v, using fallback defaults", err)
klog.Warningf("%s: %v, using fallback defaults", APIServerNotAvailableAtStartupLogKeyword, err)
apiServer = nil
}

Expand Down Expand Up @@ -106,7 +111,7 @@ func (m *ProfileManager) updateSettings(apiServer *configv1.APIServer) error {
m.applyProfile = applyFunc
m.mu.Unlock()

klog.V(2).Info("Synced cached TLS profile")
klog.V(2).Info(SyncedCachedTLSProfileLogKeyword)
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion test/cvo/cvo.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
"github.com/openshift/cluster-version-operator/test/util"
)

var logger = g.GinkgoLogr.WithName("cluster-version-operator-tests")
var logger = util.Logger

var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator-tests`, func() {
g.It("should support passing tests", func() {
Expand Down
45 changes: 45 additions & 0 deletions test/cvo/prometheus.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package cvo

import (
"fmt"
"regexp"
)

type prometheusTarget struct {
Labels map[string]string
Health string
ScrapeUrl string
}

// Ref. https://github.com/openshift/origin/blob/f4d1c208855b7216452041276a7f909c3cf477ce/test/extended/prometheus/prometheus.go#L970
type prometheusTargets struct {
Data struct {
ActiveTargets []prometheusTarget
}
Status string
}

type labels map[string]string

func (t *prometheusTargets) Expect(l labels, health, scrapeURLPattern string) error {
for _, target := range t.Data.ActiveTargets {
match := true
for k, v := range l {
if target.Labels[k] != v {
match = false
break
}
}
if !match {
continue
}
if health != target.Health {
continue
}
if !regexp.MustCompile(scrapeURLPattern).MatchString(target.ScrapeUrl) {
continue
}
return nil
}
return fmt.Errorf("no match for %v with health %s and scrape URL %s", l, health, scrapeURLPattern)
}
263 changes: 263 additions & 0 deletions test/cvo/tls.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
package cvo

import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"strings"
"time"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"

corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

oteginkgo "github.com/openshift-eng/openshift-tests-extension/pkg/ginkgo"
configv1 "github.com/openshift/api/config/v1"
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
routev1client "github.com/openshift/client-go/route/clientset/versioned"
tlsprofile "github.com/openshift/controller-runtime-common/pkg/tls"
"github.com/openshift/library-go/pkg/crypto"

"github.com/openshift/cluster-version-operator/pkg/external"
"github.com/openshift/cluster-version-operator/pkg/tls"
"github.com/openshift/cluster-version-operator/test/util"
)

var _ = g.Describe(`[Jira:"Cluster Version Operator"] cluster-version-operator`, func() {

var (
c *rest.Config
kubeClient kubernetes.Interface
configClient *configv1client.ConfigV1Client
routeClient *routev1client.Clientset
err error

ctx = context.Background()
needRecover bool
backup configv1.APIServerSpec

prometheusURL, bearerToken string
)

g.BeforeEach(func() {
c, err = util.GetRestConfig()
o.Expect(err).To(o.BeNil())

o.Expect(util.SkipIfHypershift(ctx, c)).To(o.BeNil())
o.Expect(util.SkipIfMicroshift(ctx, c)).To(o.BeNil())

kubeClient, err = util.GetKubeClient(c)
o.Expect(err).NotTo(o.HaveOccurred())

configClient, err = configv1client.NewForConfig(c)
o.Expect(err).To(o.BeNil())

routeClient, err = routev1client.NewForConfig(c)
o.Expect(err).To(o.BeNil())

prometheusURL, err = util.PrometheusRouteURL(ctx, routeClient)
o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get public url of prometheus")
bearerToken, err = util.RequestPrometheusServiceAccountAPIToken(ctx, kubeClient)
o.Expect(err).NotTo(o.HaveOccurred(), "Failed to request Prometheus service account API token")

apiServer, err := configClient.APIServers().Get(ctx, tlsprofile.APIServerName, metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
backup = *apiServer.Spec.DeepCopy()
if backup.TLSAdherence == "" {
backup.TLSAdherence = configv1.TLSAdherencePolicyLegacyAdheringComponentsOnly
}
})

g.AfterEach(func() {
if needRecover {
apiServer, err := configClient.APIServers().Get(ctx, tlsprofile.APIServerName, metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
Comment thread
hongkailiu marked this conversation as resolved.
apiServer.Spec = backup
_, err = configClient.APIServers().Update(ctx, apiServer, metav1.UpdateOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
}
})

// Automate the manual verification in https://github.com/openshift/cluster-version-operator/pull/1338#issuecomment-4593397211
g.It("must get the APIServer when the TLS profile manager is created", oteginkgo.Informing(), func() {
g.By("Checking if the APIServer exists on the cluster")
_, err := configClient.APIServers().Get(ctx, tlsprofile.APIServerName, metav1.GetOptions{})
if !kerrors.IsNotFound(err) {
o.Expect(err).NotTo(o.HaveOccurred())
} else {
g.Skip("Skipping test: APIServer/cluster not found on the cluster")
}

g.By("Checking if CVO failed to load the APIServer when the TLS profile manager is created")
podList, err := kubeClient.CoreV1().Pods(external.DefaultCVONamespace).List(ctx, metav1.ListOptions{
LabelSelector: "k8s-app=cluster-version-operator",
})
o.Expect(err).NotTo(o.HaveOccurred())

var podName string
for _, pod := range podList.Items {
podName = pod.Name
break
Comment thread
hongkailiu marked this conversation as resolved.
}
o.Expect(podName).NotTo(o.BeEmpty(), "Failed to find the CVO pod")

req := kubeClient.CoreV1().Pods(external.DefaultCVONamespace).GetLogs(podName, &corev1.PodLogOptions{
Follow: false,
})

podStream, err := req.Stream(ctx)
o.Expect(err).NotTo(o.HaveOccurred())
defer func() {
err := podStream.Close()
o.Expect(err).NotTo(o.HaveOccurred())
}()

buf := new(strings.Builder)
_, err = io.Copy(buf, podStream)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(strings.Contains(buf.String(), tls.APIServerNotAvailableAtStartupLogKeyword)).To(o.BeFalse())
})

// Local as it updates APIServer/cluster on the cluster which is very destructive and impacts many monitor tests
g.It("should update TLS profile", g.Label("Local"), g.Label("OTA-1996"), func() {

controlPlaneTopology, err := util.GetControlPlaneTopology(ctx, configClient)
o.Expect(err).NotTo(o.HaveOccurred())
if controlPlaneTopology == configv1.ExternalTopologyMode {
g.Skip("Skipping test: running on External cluster!")
}

g.By("Checking if the CVO target is up in Prometheus")

promTargets := func() (*prometheusTargets, error) {
contents, err := util.GetURLWithToken(util.MustJoinUrlPath(prometheusURL, "api/v1/targets"), bearerToken)
if err != nil {
return nil, err
}
targets := &prometheusTargets{}
err = json.Unmarshal([]byte(contents), targets)
if err != nil {
return nil, err
}
// sanity check.
if len(targets.Data.ActiveTargets) < 5 {
return nil, fmt.Errorf("only got %d targets, something is wrong", len(targets.Data.ActiveTargets))
}
return targets, nil
}

targets, err := promTargets()
o.Expect(err).NotTo(o.HaveOccurred())
// ref. https://github.com/openshift/origin/blob/f4d1c208855b7216452041276a7f909c3cf477ce/test/extended/prometheus/prometheus.go#L722
err = targets.Expect(labels{"job": "cluster-version-operator"}, "up", "^https://.*/metrics$")
o.Expect(err).NotTo(o.HaveOccurred())

g.By("Setting up modern TLS profile and strict TLS adherence")
t := time.Now()
apiServer, err := configClient.APIServers().Get(ctx, tlsprofile.APIServerName, metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
apiServer.Spec.TLSAdherence = configv1.TLSAdherencePolicyStrictAllComponents
apiServer.Spec.TLSSecurityProfile = &configv1.TLSSecurityProfile{
Type: configv1.TLSProfileModernType,
Modern: &configv1.ModernTLSProfile{},
}

_, err = configClient.APIServers().Update(ctx, apiServer, metav1.UpdateOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
needRecover = true

g.By("Checking if the CVO target is still up in Prometheus")
count := 3
for i := 0; i < count; i++ {
time.Sleep(30 * time.Second)
var errUp error
errWait := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 2*time.Minute, true, func(context.Context) (bool, error) {
targets, err = promTargets()
o.Expect(err).NotTo(o.HaveOccurred())
errUp = targets.Expect(labels{"job": "cluster-version-operator"}, "up", "^https://.*/metrics$")
if errUp != nil {
logger.Error(errUp, "The CVO target is not up in Prometheus, retrying...", "count", i)
}
return errUp == nil, nil
})
o.Expect(errWait).NotTo(o.HaveOccurred(), "The CVO target is not up in Prometheus with count=%d and errUp=%v", i, errUp)
logger.Info("The CVO target is still up in Prometheus", "count", i, "at", time.Now().Format(time.RFC3339))
}

g.By("Checking if CVO updates TLS profile")
podList, err := kubeClient.CoreV1().Pods(external.DefaultCVONamespace).List(ctx, metav1.ListOptions{
LabelSelector: "k8s-app=cluster-version-operator",
})
o.Expect(err).NotTo(o.HaveOccurred())

var podName string
for _, pod := range podList.Items {
podName = pod.Name
break
}
o.Expect(podName).NotTo(o.BeEmpty(), "Failed to find the CVO pod")

req := kubeClient.CoreV1().Pods(external.DefaultCVONamespace).GetLogs(podName, &corev1.PodLogOptions{
Follow: false,
Timestamps: true,
})

podStream, err := req.Stream(ctx)
o.Expect(err).NotTo(o.HaveOccurred())
defer func() {
err := podStream.Close()
o.Expect(err).NotTo(o.HaveOccurred())
}()

buf := new(strings.Builder)
_, err = io.Copy(buf, podStream)
o.Expect(err).NotTo(o.HaveOccurred())

scanner := bufio.NewScanner(strings.NewReader(buf.String()))
var found bool
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, tls.SyncedCachedTLSProfileLogKeyword) {
if timeInLog, logMessage, err := parseLogTimestamp(line); err == nil && timeInLog.After(t) {
logger.Info("Found log", "logMessage", logMessage, "timestamp", timeInLog.Format(time.RFC3339))
found = true
break
}

}
}
o.Expect(found).To(o.BeTrue(), "Failed to find logs about updating TLS profile when ShouldHonorClusterTLSProfile=%t after %s",
crypto.ShouldHonorClusterTLSProfile(apiServer.Spec.TLSAdherence), t.Format(time.RFC3339))
})
})

func parseLogTimestamp(logLine string) (time.Time, string, error) {
// 1. Split the line by the first space to separate the timestamp from the message
parts := strings.SplitN(logLine, " ", 2)
if len(parts) < 2 {
return time.Time{}, "", fmt.Errorf("invalid log format, no space separator found")
}

timestampStr := parts[0]
logMessage := parts[1]

// 2. Parse the timestamp using the RFC3339Nano layout
t, err := time.Parse(time.RFC3339Nano, timestampStr)
if err != nil {
// Fallback: Try standard RFC3339 if Nano fails for some reason
t, err = time.Parse(time.RFC3339, timestampStr)
if err != nil {
return time.Time{}, "", fmt.Errorf("failed to parse timestamp '%s': %w", timestampStr, err)
}
}

return t, logMessage, nil
}
Loading