diff --git a/docs/reference/server-json/official-registry-requirements.md b/docs/reference/server-json/official-registry-requirements.md index e03b6598..886aee0d 100644 --- a/docs/reference/server-json/official-registry-requirements.md +++ b/docs/reference/server-json/official-registry-requirements.md @@ -33,6 +33,7 @@ Only trusted public registries are supported. Private registries and alternative - **NPM**: `https://registry.npmjs.org` only - **PyPI**: `https://pypi.org` only - **NuGet**: `https://api.nuget.org/v3/index.json` only +- **Cargo**: `https://crates.io` only - **Docker/OCI**: - Docker Hub (`docker.io`) - GitHub Container Registry (`ghcr.io`) diff --git a/internal/validators/registries/cargo.go b/internal/validators/registries/cargo.go index 18f7c0f1..d90af778 100644 --- a/internal/validators/registries/cargo.go +++ b/internal/validators/registries/cargo.go @@ -8,7 +8,6 @@ import ( "io" "net/http" "net/url" - "strings" "time" "github.com/modelcontextprotocol/registry/pkg/model" @@ -19,6 +18,19 @@ var ( ErrMissingVersionForCargo = errors.New("package version is required for Cargo packages") ) +// cargoUserAgent identifies the validator to crates.io. crates.io's crawler +// policy expects a non-generic User-Agent with a contact URL; a bare UA may be +// rate-limited or blocked. (Distinct from the package-level userAgent constant +// used by the NuGet validator, which has no contact URL.) +const cargoUserAgent = "MCP-Registry-Validator/1.0 (https://registry.modelcontextprotocol.io)" + +// cargoStaticHost is the CDN host crates.io serves rendered READMEs from. +const cargoStaticHost = "static.crates.io" + +// maxCargoReadmeBytes caps how much of a rendered README we buffer, so a hostile +// or oversized response cannot exhaust validator memory. +const maxCargoReadmeBytes = 5 << 20 // 5 MiB + // CargoReadmeMetaResponse is the structure returned by the crates.io readme metadata endpoint. // // With `Accept: application/json`, crates.io's /api/v1/crates/{name}/{version}/readme @@ -74,14 +86,134 @@ func ValidateCargo(ctx context.Context, pkg model.Package, serverName string) er return validateCargoREADME(ctx, pkg, serverName) } +// cargoAllowedHosts returns the set of hosts the validator is permitted to talk +// to for a given base URL. For the real crates.io base this is crates.io (the API) +// plus static.crates.io (the rendered-README CDN). For any other base — only the +// httptest-driven tests, since the public ValidateCargo pins the base to +// crates.io — it is the base host itself, so mock servers keep working. +// +// This is the allowlist enforced both on the README pointer (step 2 URL) and on +// every redirect hop, so a metadata response or redirect cannot steer the +// validator at an internal or attacker-chosen host (SSRF). +func cargoAllowedHosts(baseURL string) map[string]struct{} { + hosts := map[string]struct{}{} + if u, err := url.Parse(baseURL); err == nil && u.Hostname() != "" { + hosts[u.Hostname()] = struct{}{} + } + if baseURL == model.RegistryURLCrates { + hosts[cargoStaticHost] = struct{}{} + } + return hosts +} + +// newCargoHTTPClient builds the client used for all crates.io calls. The +// CheckRedirect policy pins every redirect hop to allowedHosts, so even though +// the initial URL is host-pinned, an upstream 3xx cannot redirect the validator +// to an unexpected host. +func newCargoHTTPClient(allowedHosts map[string]struct{}) *http.Client { + return &http.Client{ + Timeout: 10 * time.Second, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if _, ok := allowedHosts[req.URL.Hostname()]; !ok { + return fmt.Errorf("refusing redirect to unexpected host %q", req.URL.Hostname()) + } + if len(via) >= 10 { + return errors.New("stopped after 10 redirects") + } + return nil + }, + } +} + +// cargoVersionExists checks whether a specific crate version exists on crates.io, +// used to disambiguate a 403 from the README CDN. static.crates.io (S3) returns +// 403 both for a genuinely-missing crate/version AND for a crate that exists but +// has no rendered README, so a 403 alone cannot tell a publisher which it is. +// +// Returns (exists, determined): determined is false if the existence endpoint +// itself was unreachable or returned an unexpected status, in which case the +// caller should fall back to a generic message rather than assert existence. +func cargoVersionExists(ctx context.Context, client *http.Client, baseURL, identifier, version string) (exists, determined bool) { + versionURL := fmt.Sprintf("%s/api/v1/crates/%s/%s", + baseURL, url.PathEscape(identifier), url.PathEscape(version)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, versionURL, nil) + if err != nil { + return false, false + } + req.Header.Set("User-Agent", cargoUserAgent) + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + return false, false + } + defer resp.Body.Close() + + switch resp.StatusCode { + case http.StatusOK: + return true, true + case http.StatusNotFound: + return false, true + default: + return false, false + } +} + +// cargoMetadataStatusError maps a non-200 status from the metadata endpoint to a +// caller-actionable error, distinguishing transient upstream conditions (429, +// 5xx) from a genuine fetch failure. +func cargoMetadataStatusError(identifier string, status int) error { + switch { + case status == http.StatusTooManyRequests: + return fmt.Errorf("crates.io rate-limited the metadata request for cargo package '%s' (status: 429) — likely transient, retry later", identifier) + case status >= 500 && status < 600: + return fmt.Errorf("crates.io upstream error fetching metadata for cargo package '%s' (status: %d) — likely transient, retry later", identifier, status) + default: + return fmt.Errorf("cargo package '%s' metadata fetch failed (status: %d)", identifier, status) + } +} + +// cargoReadmeStatusError maps a non-200 status from the README CDN to a +// caller-actionable error. 429/5xx are transient; 403 is disambiguated (see +// cargoReadme403Error) because static.crates.io returns it both for a missing +// crate/version and for a crate that has no rendered README. +func cargoReadmeStatusError(ctx context.Context, client *http.Client, pkg model.Package, serverName string, status int) error { + switch { + case status == http.StatusTooManyRequests: + return fmt.Errorf("crates.io rate-limited the README fetch for cargo package '%s' version '%s' (status: 429) — likely transient, retry later", pkg.Identifier, pkg.Version) + case status >= 500 && status < 600: + return fmt.Errorf("crates.io upstream error fetching README for cargo package '%s' version '%s' (status: %d) — likely transient, retry later", pkg.Identifier, pkg.Version, status) + case status == http.StatusForbidden: + return cargoReadme403Error(ctx, client, pkg, serverName) + default: + return fmt.Errorf("cargo package '%s' version '%s' README fetch failed (status: %d)", pkg.Identifier, pkg.Version, status) + } +} + +// cargoReadme403Error disambiguates a 403 from static.crates.io (S3's default +// for a missing key): a genuinely-missing crate/version versus a crate that +// exists but has no rendered README. It probes the crate-version metadata +// endpoint so the publisher gets an actionable message rather than a blanket +// "not found". +func cargoReadme403Error(ctx context.Context, client *http.Client, pkg model.Package, serverName string) error { + exists, determined := cargoVersionExists(ctx, client, pkg.RegistryBaseURL, pkg.Identifier, pkg.Version) + switch { + case determined && exists: + return fmt.Errorf("cargo package '%s' version '%s' exists on crates.io but has no rendered README. Add a README containing 'mcp-name: %s' and publish a new version", pkg.Identifier, pkg.Version, serverName) + case determined && !exists: + return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io", pkg.Identifier, pkg.Version) + default: + return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io (status: 403)", pkg.Identifier, pkg.Version) + } +} + // validateCargoREADME performs the two-call README fetch and the mcp-name token // check. It is split out from ValidateCargo so that httptest-based tests can // drive the HTTP pipeline against a mock server (exposed via export_test.go), // bypassing the exact-baseURL guard that ValidateCargo enforces for callers. func validateCargoREADME(ctx context.Context, pkg model.Package, serverName string) error { - client := &http.Client{Timeout: 10 * time.Second} - // crates.io's crawler policy expects a non-generic User-Agent identifying the source. - userAgent := "MCP-Registry-Validator/1.0 (https://registry.modelcontextprotocol.io)" + allowedHosts := cargoAllowedHosts(pkg.RegistryBaseURL) + client := newCargoHTTPClient(allowedHosts) // Step 1: fetch the README pointer from the documented API endpoint. metaURL := fmt.Sprintf("%s/api/v1/crates/%s/%s/readme", @@ -93,7 +225,7 @@ func validateCargoREADME(ctx context.Context, pkg model.Package, serverName stri if err != nil { return fmt.Errorf("failed to create crates.io metadata request: %w", err) } - metaReq.Header.Set("User-Agent", userAgent) + metaReq.Header.Set("User-Agent", cargoUserAgent) metaReq.Header.Set("Accept", "application/json") metaResp, err := client.Do(metaReq) @@ -103,11 +235,7 @@ func validateCargoREADME(ctx context.Context, pkg model.Package, serverName stri defer metaResp.Body.Close() if metaResp.StatusCode != http.StatusOK { - // 5xx from the metadata endpoint is upstream availability, not a missing crate. - if metaResp.StatusCode >= 500 && metaResp.StatusCode < 600 { - return fmt.Errorf("crates.io upstream error fetching metadata for cargo package '%s' (status: %d) — likely transient, retry later", pkg.Identifier, metaResp.StatusCode) - } - return fmt.Errorf("cargo package '%s' metadata fetch failed (status: %d)", pkg.Identifier, metaResp.StatusCode) + return cargoMetadataStatusError(pkg.Identifier, metaResp.StatusCode) } var meta CargoReadmeMetaResponse @@ -118,12 +246,22 @@ func validateCargoREADME(ctx context.Context, pkg model.Package, serverName stri return fmt.Errorf("cargo package '%s' metadata response missing 'url' field", pkg.Identifier) } - // Step 2: fetch the rendered README from the URL the API gave us. + // Pin the README pointer to an allowed host before fetching it, so a metadata + // response cannot steer the validator at an internal or attacker-chosen host. + readmeParsed, err := url.Parse(meta.URL) + if err != nil || readmeParsed.Hostname() == "" { + return fmt.Errorf("cargo package '%s': crates.io returned an unparseable README URL", pkg.Identifier) + } + if _, ok := allowedHosts[readmeParsed.Hostname()]; !ok { + return fmt.Errorf("cargo package '%s': crates.io returned a README URL on unexpected host %q — refusing to fetch", pkg.Identifier, readmeParsed.Hostname()) + } + + // Step 2: fetch the rendered README from the (now host-validated) URL. readmeReq, err := http.NewRequestWithContext(ctx, http.MethodGet, meta.URL, nil) if err != nil { return fmt.Errorf("failed to create crates.io readme request: %w", err) } - readmeReq.Header.Set("User-Agent", userAgent) + readmeReq.Header.Set("User-Agent", cargoUserAgent) readmeReq.Header.Set("Accept", "text/html") readmeResp, err := client.Do(readmeReq) @@ -132,27 +270,20 @@ func validateCargoREADME(ctx context.Context, pkg model.Package, serverName stri } defer readmeResp.Body.Close() - // Missing crates and missing versions surface as 403 from static.crates.io - // (S3's default for missing keys), not 404. 5xx from the CDN is upstream - // availability — surface it as transient so callers can distinguish retryable - // failures from genuinely missing crates. if readmeResp.StatusCode != http.StatusOK { - if readmeResp.StatusCode >= 500 && readmeResp.StatusCode < 600 { - return fmt.Errorf("crates.io upstream error fetching README for cargo package '%s' version '%s' (status: %d) — likely transient, retry later", pkg.Identifier, pkg.Version, readmeResp.StatusCode) - } - return fmt.Errorf("cargo package '%s' version '%s' not found on crates.io (status: %d)", pkg.Identifier, pkg.Version, readmeResp.StatusCode) + return cargoReadmeStatusError(ctx, client, pkg, serverName, readmeResp.StatusCode) } - body, err := io.ReadAll(readmeResp.Body) + body, err := io.ReadAll(io.LimitReader(readmeResp.Body, maxCargoReadmeBytes)) if err != nil { return fmt.Errorf("failed to read rendered README: %w", err) } - // Search for the mcp-name: token. The token contains no characters - // that get HTML-escaped during README rendering (no <, >, &, ", '), so a direct - // substring match against the rendered HTML is reliable. - mcpNamePattern := "mcp-name: " + serverName - if strings.Contains(string(body), mcpNamePattern) { + // Search for the mcp-name: ownership token. The token contains no + // characters that get HTML-escaped during README rendering (no <, >, &, ", '), + // so matching against the rendered HTML is reliable; containsMCPNameToken + // additionally requires a trailing boundary to avoid prefix confusion. + if containsMCPNameToken(string(body), serverName) { return nil } diff --git a/internal/validators/registries/cargo_test.go b/internal/validators/registries/cargo_test.go index e06d0993..d2c8128d 100644 --- a/internal/validators/registries/cargo_test.go +++ b/internal/validators/registries/cargo_test.go @@ -151,10 +151,11 @@ func TestValidateCargo_RejectsMCPBOnlyFields(t *testing.T) { // Server names follow io.github.OWNER/REPO and may contain dots, slashes, // hyphens, underscores, and digits. None of these get HTML-escaped during -// README rendering, so substring match against the rendered HTML is reliable. -// These tests exercise format variations against a real crate that doesn't -// declare any mcp-name (serde) — every case fails ownership, but we verify -// the failure error preserves the exact server name unchanged. +// README rendering, so a boundary-anchored match against the rendered HTML is +// reliable. This is a hermetic POSITIVE test: each format variation is placed in +// a mock README as the exact mcp-name token and must validate successfully, so +// it actually exercises the match (the earlier version used a token-less live +// crate, where every case failed and the assertion was satisfied trivially). func TestValidateCargo_ServerNameFormats(t *testing.T) { ctx := context.Background() @@ -166,19 +167,33 @@ func TestValidateCargo_ServerNameFormats(t *testing.T) { {name: "multiple hyphens", serverName: "io.github.example/multi-hyphen-test-name"}, {name: "underscore", serverName: "io.github.example/snake_case_name"}, {name: "numeric suffix", serverName: "io.github.example/server-v2"}, + {name: "dotted name segment", serverName: "io.github.example/group.tool"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + serverName := tt.serverName + var mock *httptest.Server + mock = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/readme") { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"url": mock.URL + "/static-readme"}) + return + } + // Token on its own line; the trailing newline is the boundary. + fmt.Fprintf(w, "\nmcp-name: %s\n", serverName) + })) + defer mock.Close() + pkg := model.Package{ - RegistryType: model.RegistryTypeCargo, - Identifier: "serde", - Version: "1.0.219", + RegistryType: model.RegistryTypeCargo, + RegistryBaseURL: mock.URL, + Identifier: "fmt-crate", + Version: "0.1.0", } - err := registries.ValidateCargo(ctx, pkg, tt.serverName) - assert.Error(t, err) - assert.Contains(t, err.Error(), tt.serverName) + err := registries.ValidateCargoREADME(ctx, pkg, serverName) + assert.NoError(t, err, "server name %q should validate when present as an exact mcp-name token", serverName) }) } } @@ -197,11 +212,10 @@ func TestValidateCargo_PositivePathMock(t *testing.T) { mock = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if strings.HasSuffix(r.URL.Path, "/readme") { w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(map[string]string{ - "url": mock.URL + "/static-readme", - }); err != nil { - t.Fatalf("encode meta response: %v", err) - } + // Encode of a static map cannot fail; ignore the error rather than + // calling t.Fatalf from this (server) goroutine, which is not the test + // goroutine and would truncate the response instead of failing cleanly. + _ = json.NewEncoder(w).Encode(map[string]string{"url": mock.URL + "/static-readme"}) return } // Rendered README HTML containing the mcp-name token. @@ -280,6 +294,34 @@ func TestValidateCargo_TransientUpstreamError(t *testing.T) { assert.NotContains(t, err.Error(), "not found", "transient upstream errors should not be reported as 'not found'") } +// TestValidateCargo_RejectsForeignReadmeHost is the SSRF guard: the README +// pointer returned by the metadata endpoint must be on an allowed host. Here the +// (mock) metadata endpoint points the README at an unrelated host; the validator +// must refuse to fetch it rather than follow the pointer anywhere crates.io names. +// The ".invalid" host never resolves, so a regression that dropped the host check +// would fail to connect rather than silently pass — but the assertion targets the +// explicit "unexpected host" refusal, which happens before any fetch. +func TestValidateCargo_RejectsForeignReadmeHost(t *testing.T) { + ctx := context.Background() + + mock := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]string{"url": "http://internal.invalid/secret-readme"}) + })) + defer mock.Close() + + pkg := model.Package{ + RegistryType: model.RegistryTypeCargo, + RegistryBaseURL: mock.URL, + Identifier: "evil-crate", + Version: "0.1.0", + } + + err := registries.ValidateCargoREADME(ctx, pkg, "io.github.test/evil") + assert.Error(t, err) + assert.Contains(t, err.Error(), "unexpected host", "a README URL on a foreign host must be refused (SSRF guard)") +} + // TestValidateCargoCombinedFixture exercises path-encoding and the full status // matrix in one httptest fixture — the same pattern praised by @P4ST4S on // PR #1321 for Go modules. One server dispatches on the crate identifier @@ -297,6 +339,7 @@ func TestValidateCargoCombinedFixture(t *testing.T) { metaStatus int readmeStatus int readmeBody string + versionExists bool // response for the /api/v1/crates/{n}/{v} existence probe (403 disambiguation) wantErr bool wantContains []string wantNotContains []string @@ -310,7 +353,11 @@ func TestValidateCargoCombinedFixture(t *testing.T) { readmeBody: fmt.Sprintf("

mcp-name: %s

", serverName), }, { - name: "metadata_404", + // Defensive branch: crates.io's metadata endpoint returns 200 (with a + // CDN url) even for missing crates, so it does NOT 404 in practice for a + // missing crate — the real missing-crate path is readme_403_missing below. + // This case only covers what we'd report if the API ever did 404. + name: "metadata_404_defensive", crateName: "combined-meta404", version: "0.1.0", metaStatus: http.StatusNotFound, @@ -318,13 +365,39 @@ func TestValidateCargoCombinedFixture(t *testing.T) { wantContains: []string{"metadata fetch failed", "status: 404"}, }, { - name: "readme_403_s3_not_found", - crateName: "combined-readme403", - version: "0.1.0", - metaStatus: http.StatusOK, - readmeStatus: http.StatusForbidden, - wantErr: true, - wantContains: []string{"not found", "status: 403"}, + // Real missing-crate/version path: CDN 403 + existence probe 404. + name: "readme_403_missing", + crateName: "combined-readme403-missing", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusForbidden, + versionExists: false, + wantErr: true, + wantContains: []string{"not found"}, + wantNotContains: []string{"has no rendered README"}, + }, + { + // Crate/version exists but has no rendered README: CDN 403 + existence + // probe 200. Must NOT be reported as "not found". + name: "readme_403_no_readme", + crateName: "combined-readme403-noreadme", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusForbidden, + versionExists: true, + wantErr: true, + wantContains: []string{"has no rendered README"}, + wantNotContains: []string{"not found"}, + }, + { + name: "readme_429_transient", + crateName: "combined-readme429", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusTooManyRequests, + wantErr: true, + wantContains: []string{"transient"}, + wantNotContains: []string{"not found"}, }, { name: "readme_502_transient", @@ -336,6 +409,18 @@ func TestValidateCargoCombinedFixture(t *testing.T) { wantContains: []string{"transient"}, wantNotContains: []string{"not found"}, }, + { + // Prefix confusion: README declares a LONGER name; a claim for the + // shorter serverName must be rejected by the boundary-anchored match. + name: "prefix_confusion_rejected", + crateName: "combined-prefix", + version: "0.1.0", + metaStatus: http.StatusOK, + readmeStatus: http.StatusOK, + readmeBody: fmt.Sprintf("

mcp-name: %s-extended

", serverName), + wantErr: true, + wantContains: []string{"ownership validation failed"}, + }, } // lastMetaPath captures the metadata request path seen by the handler so @@ -349,6 +434,8 @@ func TestValidateCargoCombinedFixture(t *testing.T) { tt := &tests[i] metaPath := fmt.Sprintf("/api/v1/crates/%s/%s/readme", url.PathEscape(tt.crateName), url.PathEscape(tt.version)) + versionPath := fmt.Sprintf("/api/v1/crates/%s/%s", + url.PathEscape(tt.crateName), url.PathEscape(tt.version)) staticPath := "/readme-static/" + url.PathEscape(tt.crateName) if r.URL.Path == metaPath { @@ -361,6 +448,16 @@ func TestValidateCargoCombinedFixture(t *testing.T) { _ = json.NewEncoder(w).Encode(map[string]string{"url": srv.URL + staticPath}) return } + // Existence probe used to disambiguate a README 403. + if r.URL.Path == versionPath { + if tt.versionExists { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{"version": map[string]string{"num": tt.version}}) + } else { + http.Error(w, "not found", http.StatusNotFound) + } + return + } if r.URL.Path == staticPath { if tt.readmeStatus != http.StatusOK { http.Error(w, "simulated non-200", tt.readmeStatus) diff --git a/internal/validators/registries/mcpname.go b/internal/validators/registries/mcpname.go new file mode 100644 index 00000000..a757db3b --- /dev/null +++ b/internal/validators/registries/mcpname.go @@ -0,0 +1,53 @@ +package registries + +import "strings" + +// isServerNameChar reports whether c can appear in an MCP server name. +// +// Server names follow the schema pattern ^[a-zA-Z0-9.-]+/[a-zA-Z0-9._-]+$ +// (reverse-DNS namespace + "/" + name), so the full set of characters that may +// continue a server name is [A-Za-z0-9._/-]. +func isServerNameChar(c byte) bool { + switch { + case c >= 'a' && c <= 'z', c >= 'A' && c <= 'Z', c >= '0' && c <= '9': + return true + case c == '.', c == '-', c == '_', c == '/': + return true + default: + return false + } +} + +// containsMCPNameToken reports whether the package README/description contains +// the ownership token "mcp-name: " as a complete token — i.e. the +// matched server name is not merely a prefix of a longer declared name. +// +// A bare strings.Contains check is vulnerable to prefix confusion: a README that +// legitimately declares `mcp-name: io.github.acme/widget-pro` would otherwise +// satisfy an ownership claim for the shorter `io.github.acme/widget`, because the +// shorter string is a substring of the longer one. This is contained by namespace +// authorization (a publisher can only claim names within a namespace they own), +// but it still weakens the crate↔server-name binding the token is meant to prove, +// so we require a trailing boundary: the character following the server name must +// be the end of the content or any non-server-name character (whitespace, a +// newline, or an HTML tag delimiter from a rendered README such as `<`). +// +// Shared by the README-token validators (PyPI, NuGet, Cargo). NPM is unaffected +// because it compares an exact metadata field rather than scanning README text. +func containsMCPNameToken(content, serverName string) bool { + token := "mcp-name: " + serverName + searchFrom := 0 + for { + idx := strings.Index(content[searchFrom:], token) + if idx < 0 { + return false + } + tokenEnd := searchFrom + idx + len(token) + if tokenEnd >= len(content) || !isServerNameChar(content[tokenEnd]) { + return true + } + // This occurrence is a prefix of a longer name; keep scanning in case a + // properly-terminated occurrence appears later in the content. + searchFrom = searchFrom + idx + 1 + } +} diff --git a/internal/validators/registries/mcpname_internal_test.go b/internal/validators/registries/mcpname_internal_test.go new file mode 100644 index 00000000..0d693523 --- /dev/null +++ b/internal/validators/registries/mcpname_internal_test.go @@ -0,0 +1,35 @@ +package registries + +import "testing" + +// TestContainsMCPNameToken covers the boundary-anchored ownership-token match +// shared by the PyPI, NuGet, and Cargo validators — in particular that a server +// name which is a prefix of a longer declared name does not satisfy the match. +func TestContainsMCPNameToken(t *testing.T) { + const name = "io.github.acme/widget" + + cases := []struct { + desc string + content string + want bool + }{ + {"exact on its own line", "intro text\nmcp-name: io.github.acme/widget\nmore text", true}, + {"exact at end of content", "mcp-name: io.github.acme/widget", true}, + {"followed by HTML tag", "

mcp-name: io.github.acme/widget

", true}, + {"followed by space", "mcp-name: io.github.acme/widget is the name", true}, + {"longer hyphenated name not a match", "mcp-name: io.github.acme/widget-pro\n", false}, + {"longer dotted name not a match", "mcp-name: io.github.acme/widget.core\n", false}, + {"longer slashed name not a match", "mcp-name: io.github.acme/widget/sub\n", false}, + {"absent", "nothing to see here", false}, + {"different name", "mcp-name: io.github.other/thing\n", false}, + {"prefix occurrence before a real one still matches", "mcp-name: io.github.acme/widget-pro then mcp-name: io.github.acme/widget\n", true}, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + if got := containsMCPNameToken(tc.content, name); got != tc.want { + t.Fatalf("containsMCPNameToken(%q, %q) = %v, want %v", tc.content, name, got, tc.want) + } + }) + } +} diff --git a/pkg/model/types.go b/pkg/model/types.go index b0bf8e43..e9ade020 100644 --- a/pkg/model/types.go +++ b/pkg/model/types.go @@ -22,12 +22,13 @@ type Transport struct { // - NPM: RegistryType, Identifier (package name), Version, RegistryBaseURL (optional) // - PyPI: RegistryType, Identifier (package name), Version, RegistryBaseURL (optional) // - NuGet: RegistryType, Identifier (package ID), Version, RegistryBaseURL (optional) +// - Cargo: RegistryType, Identifier (crate name), Version, RegistryBaseURL (optional) // - OCI: RegistryType, Identifier (full image reference like "ghcr.io/owner/repo:tag") // - MCPB: RegistryType, Identifier (download URL), Version (optional), FileSHA256 (required) type Package struct { - // RegistryType indicates how to download packages (e.g., "npm", "pypi", "oci", "nuget", "mcpb") - RegistryType string `json:"registryType" minLength:"1" doc:"Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'oci', 'nuget', 'mcpb')" example:"npm"` - // RegistryBaseURL is the base URL of the package registry (used by npm, pypi, nuget; not used by oci, mcpb) + // RegistryType indicates how to download packages (e.g., "npm", "pypi", "cargo", "oci", "nuget", "mcpb") + RegistryType string `json:"registryType" minLength:"1" doc:"Registry type indicating how to download packages (e.g., 'npm', 'pypi', 'cargo', 'oci', 'nuget', 'mcpb')" example:"npm"` + // RegistryBaseURL is the base URL of the package registry (used by npm, pypi, nuget, cargo; not used by oci, mcpb) RegistryBaseURL string `json:"registryBaseUrl,omitempty" format:"uri" doc:"Base URL of the package registry" example:"https://registry.npmjs.org"` // Identifier is the package identifier: // - For NPM/PyPI/NuGet: package name or ID