From aeed5e66379bfcdf3b4c1393f132f90096bf7f8d Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Mon, 15 Jul 2024 13:39:09 +0200 Subject: [PATCH] use sync.OnceValue for various regular expressions, require go1.21 Using regex.MustCompile consumes a significant amount of memory when importing the package, even if those regular expressions are not used. This changes compiling the regular expressions to use a sync.OnceValue so that they're only compiled the first time they're used. There are various regular expressions remaining that are still compiled on import, but these are exported, so changing them to a sync.OnceValue would be a breaking change; we can still decide to do so, but leaving that for a follow-up. It's worth noting that sync.OnceValue requires go1.21 or up, so raising the minimum version accordingly. Signed-off-by: Sebastiaan van Stijn --- normalize.go | 4 ++-- reference.go | 10 +++++----- regexp.go | 17 +++++++++++++---- regexp_test.go | 8 ++++---- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/normalize.go b/normalize.go index 4979eec..7c83bd4 100644 --- a/normalize.go +++ b/normalize.go @@ -64,7 +64,7 @@ type normalizedNamed interface { // _ "crypto/sha256" // ) func ParseNormalizedNamed(s string) (Named, error) { - if ok := anchoredIdentifierRegexp.MatchString(s); ok { + if ok := anchoredIdentifierRegexp().MatchString(s); ok { return nil, fmt.Errorf("invalid repository name (%s), cannot specify 64-byte hexadecimal strings", s) } domain, remainder := splitDockerDomain(s) @@ -274,7 +274,7 @@ func TagNameOnly(ref Named) Named { // _ "crypto/sha256" // ) func ParseAnyReference(ref string) (Reference, error) { - if ok := anchoredIdentifierRegexp.MatchString(ref); ok { + if ok := anchoredIdentifierRegexp().MatchString(ref); ok { return digestReference("sha256:" + ref), nil } if dgst, err := digest.Parse(ref); err == nil { diff --git a/reference.go b/reference.go index 14d5b46..2fceb46 100644 --- a/reference.go +++ b/reference.go @@ -207,7 +207,7 @@ func Path(named Named) (name string) { // If no valid hostname is found, the hostname is empty and the full value // is returned as name func splitDomain(name string) (string, string) { - match := anchoredNameRegexp.FindStringSubmatch(name) + match := anchoredNameRegexp().FindStringSubmatch(name) if len(match) != 3 { return "", name } @@ -241,7 +241,7 @@ func Parse(s string) (Reference, error) { var repo repository - nameMatch := anchoredNameRegexp.FindStringSubmatch(matches[1]) + nameMatch := anchoredNameRegexp().FindStringSubmatch(matches[1]) if len(nameMatch) == 3 { repo.domain = nameMatch[1] repo.path = nameMatch[2] @@ -292,7 +292,7 @@ func ParseNamed(s string) (Named, error) { // WithName returns a named object representing the given string. If the input // is invalid ErrReferenceInvalidFormat will be returned. func WithName(name string) (Named, error) { - match := anchoredNameRegexp.FindStringSubmatch(name) + match := anchoredNameRegexp().FindStringSubmatch(name) if match == nil || len(match) != 3 { return nil, ErrReferenceInvalidFormat } @@ -310,7 +310,7 @@ func WithName(name string) (Named, error) { // WithTag combines the name from "name" and the tag from "tag" to form a // reference incorporating both the name and the tag. func WithTag(name Named, tag string) (NamedTagged, error) { - if !anchoredTagRegexp.MatchString(tag) { + if !anchoredTagRegexp().MatchString(tag) { return nil, ErrTagInvalidFormat } var repo repository @@ -336,7 +336,7 @@ func WithTag(name Named, tag string) (NamedTagged, error) { // WithDigest combines the name from "name" and the digest from "digest" to form // a reference incorporating both the name and the digest. func WithDigest(name Named, digest digest.Digest) (Canonical, error) { - if !anchoredDigestRegexp.MatchString(digest.String()) { + if !anchoredDigestRegexp().MatchString(digest.String()) { return nil, ErrDigestInvalidFormat } var repo repository diff --git a/regexp.go b/regexp.go index 65bc49d..3d65c39 100644 --- a/regexp.go +++ b/regexp.go @@ -3,6 +3,7 @@ package reference import ( "regexp" "strings" + "sync" ) // DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:"). @@ -111,11 +112,15 @@ var ( // anchoredTagRegexp matches valid tag names, anchored at the start and // end of the matched string. - anchoredTagRegexp = regexp.MustCompile(anchored(tag)) + anchoredTagRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(anchored(tag)) + }) // anchoredDigestRegexp matches valid digests, anchored at the start and // end of the matched string. - anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat)) + anchoredDigestRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(anchored(digestPat)) + }) // pathComponent restricts path-components to start with an alphanumeric // character, with following parts able to be separated by a separator @@ -131,13 +136,17 @@ var ( // anchoredNameRegexp is used to parse a name value, capturing the // domain and trailing components. - anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName))) + anchoredNameRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName))) + }) referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat))) // anchoredIdentifierRegexp is used to check or match an // identifier value, anchored at start and end of string. - anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier)) + anchoredIdentifierRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(anchored(identifier)) + }) ) // optional wraps the expression in a non-capturing group and makes the diff --git a/regexp_test.go b/regexp_test.go index ca4680d..b92ed6a 100644 --- a/regexp_test.go +++ b/regexp_test.go @@ -176,9 +176,9 @@ func TestDomainRegexp(t *testing.T) { func TestFullNameRegexp(t *testing.T) { t.Parallel() - if anchoredNameRegexp.NumSubexp() != 2 { + if anchoredNameRegexp().NumSubexp() != 2 { t.Fatalf("anchored name regexp should have two submatches: %v, %v != 2", - anchoredNameRegexp, anchoredNameRegexp.NumSubexp()) + anchoredNameRegexp(), anchoredNameRegexp().NumSubexp()) } tests := []regexpMatch{ @@ -469,7 +469,7 @@ func TestFullNameRegexp(t *testing.T) { tc := tc t.Run(tc.input, func(t *testing.T) { t.Parallel() - checkRegexp(t, anchoredNameRegexp, tc) + checkRegexp(t, anchoredNameRegexp(), tc) }) } } @@ -580,7 +580,7 @@ func TestIdentifierRegexp(t *testing.T) { tc := tc t.Run(tc.input, func(t *testing.T) { t.Parallel() - match := anchoredIdentifierRegexp.MatchString(tc.input) + match := anchoredIdentifierRegexp().MatchString(tc.input) if match != tc.match { t.Errorf("Expected match=%t, got %t", tc.match, match) }