From 14e321ed7c29b3a29c47f5fbce929acf5133a3ad Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Fri, 16 Jan 2026 23:16:13 -0500 Subject: [PATCH 1/3] add license matching verification --- app/src/App/API.purs | 105 +++++++++++++++++++++++++++++++++ app/test/App/API.purs | 49 +++++++++++++++ lib/src/License.js | 31 ++++++++++ lib/src/License.purs | 9 +++ lib/test/Registry/License.purs | 43 ++++++++++++++ 5 files changed, 237 insertions(+) diff --git a/app/src/App/API.purs b/app/src/App/API.purs index afabd0bcc..74a1cc45a 100644 --- a/app/src/App/API.purs +++ b/app/src/App/API.purs @@ -2,6 +2,7 @@ module Registry.App.API ( AuthenticatedEffects , COMPILER_CACHE , CompilerCache(..) + , LicenseValidationError(..) , PackageSetUpdateEffects , PublishEffects , _compilerCache @@ -12,8 +13,10 @@ module Registry.App.API , getPacchettiBotti , packageSetUpdate , packagingTeam + , printLicenseValidationError , publish , removeIgnoredTarballFiles + , validateLicense ) where import Registry.App.Prelude @@ -53,6 +56,7 @@ import Parsing.Combinators.Array as Parsing.Combinators.Array import Parsing.String as Parsing.String import Registry.API.V1 (PackageSetJobData) import Registry.App.Auth as Auth +import Registry.App.CLI.Licensee as Licensee import Registry.App.CLI.Purs (CompilerFailure(..), compilerFailureCodec) import Registry.App.CLI.Purs as Purs import Registry.App.CLI.PursVersions as PursVersions @@ -90,6 +94,7 @@ import Registry.Foreign.Octokit (Team) import Registry.Foreign.Tmp as Tmp import Registry.Internal.Codec as Internal.Codec import Registry.Internal.Path as Internal.Path +import Registry.License as License import Registry.Location as Location import Registry.Manifest as Manifest import Registry.Metadata as Metadata @@ -519,6 +524,16 @@ publish maybeLegacyIndex payload = do when (Operation.Validation.isMetadataPackage (Manifest receivedManifest)) do Except.throw "The `metadata` package cannot be uploaded to the registry because it is a protected package." + -- Validate that the manifest license is consistent with licenses detected + -- in the repository (LICENSE file, package.json, bower.json). We skip this + -- check for legacy imports because they may have inconsistent licenses that + -- we've already accepted. + when (isNothing maybeLegacyIndex) do + Log.notice "Verifying license consistency..." + validateLicense downloadedPackage receivedManifest.license >>= case _ of + Nothing -> Log.debug "License validation passed." + Just err -> Except.throw $ printLicenseValidationError err + for_ (Operation.Validation.isNotUnpublished (Manifest receivedManifest) (Metadata metadata)) \info -> do Except.throw $ String.joinWith "\n" [ "You tried to upload a version that has been unpublished: " <> Version.print receivedManifest.version @@ -1322,3 +1337,93 @@ instance FsEncodable CompilerCache where } Exists.mkExists $ Cache.AsJson cacheKey codec next + +-- | Errors that can occur when validating license consistency +data LicenseValidationError = LicenseMismatch + { manifestLicense :: License + , detectedLicenses :: Array License + } + +derive instance Eq LicenseValidationError + +printLicenseValidationError :: LicenseValidationError -> String +printLicenseValidationError = case _ of + LicenseMismatch { manifestLicense, detectedLicenses } -> Array.fold + [ "License mismatch: The manifest specifies license '" + , License.print manifestLicense + , "' but the following license(s) were detected in your repository: " + , String.joinWith ", " (map License.print detectedLicenses) + , ". Please ensure your manifest license accurately represents all licenses " + , "in your repository. If multiple licenses apply, join them using SPDX " + , "conjunctions (e.g., 'MIT AND Apache-2.0' or 'MIT OR Apache-2.0')." + ] + +-- | Validate that the license in the manifest is consistent with licenses +-- | detected in the repository (LICENSE file, package.json, bower.json). +-- | +-- | This check ensures that the SPDX identifier asserted in the package +-- | manifest accurately represents the licenses present in the repository. +-- | If multiple distinct licenses are detected, they must all be represented +-- | in the manifest license (e.g., joined with AND or OR). +validateLicense :: forall r. FilePath -> License -> Run (LOG + AFF + r) (Maybe LicenseValidationError) +validateLicense packageDir manifestLicense = do + Log.debug "Detecting licenses from repository files..." + detected <- Run.liftAff $ Licensee.detect packageDir + case detected of + Left err -> do + -- If license detection fails, we let the package through. + -- The manifest license is the source of truth. + Log.warn $ "License detection failed, relying on manifest: " <> err + pure Nothing + Right detectedStrings -> do + let + -- Parse detected license strings into License values + parsedLicenses :: Array License + parsedLicenses = Array.mapMaybe (hush <<< License.parse) detectedStrings + + Log.debug $ "Detected licenses: " <> String.joinWith ", " detectedStrings + + -- If no licenses were detected, we can't validate - allow the package through + -- (the manifest license is the source of truth) + if Array.null parsedLicenses then do + Log.debug "No licenses detected from repository files, skipping validation." + pure Nothing + else do + -- Extract all license IDs from the manifest license expression. + -- This properly handles compound expressions like "MIT AND Apache-2.0". + case License.extractIds manifestLicense of + Left err -> do + -- If we can't parse the manifest license, log a warning but allow through + -- (the manifest was already validated during parsing) + Log.warn $ "Could not extract license IDs from manifest: " <> err + pure Nothing + Right manifestIds -> do + -- Convert manifest IDs to a Set for efficient lookup + let manifestIdSet = Set.fromFoldable manifestIds + + -- Extract and uppercase each detected license for case-insensitive comparison + let + getDetectedId :: License -> Maybe String + getDetectedId license = case License.extractIds license of + Right ids -> Array.head ids + Left _ -> Nothing + + -- A detected license is covered if its ID (uppercased) is in the manifest IDs + isCovered :: License -> Boolean + isCovered license = case getDetectedId license of + Just detectedId -> Set.member detectedId manifestIdSet + Nothing -> false + + uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses + + if Array.null uncoveredLicenses then do + Log.debug "All detected licenses are covered by the manifest license." + pure Nothing + else do + Log.warn $ "License mismatch detected: manifest has '" <> License.print manifestLicense + <> "' but detected " + <> String.joinWith ", " (map License.print parsedLicenses) + pure $ Just $ LicenseMismatch + { manifestLicense + , detectedLicenses: uncoveredLicenses + } diff --git a/app/test/App/API.purs b/app/test/App/API.purs index 28f17f90e..36d9a4886 100644 --- a/app/test/App/API.purs +++ b/app/test/App/API.purs @@ -14,6 +14,7 @@ import Effect.Ref as Ref import Node.FS.Aff as FS.Aff import Node.Path as Path import Node.Process as Process +import Registry.App.API (LicenseValidationError(..), validateLicense) import Registry.App.API as API import Registry.App.CLI.Tar as Tar import Registry.App.Effect.Env as Env @@ -27,6 +28,7 @@ import Registry.Foreign.FSExtra as FS.Extra import Registry.Foreign.FastGlob as FastGlob import Registry.Foreign.Tmp as Tmp import Registry.Internal.Codec as Internal.Codec +import Registry.License as License import Registry.Manifest as Manifest import Registry.ManifestIndex as ManifestIndex import Registry.PackageName as PackageName @@ -57,6 +59,9 @@ spec = do Spec.describe "Verifies build plans" do checkBuildPlanToResolutions + Spec.describe "Validates licenses match" do + licenseValidation + Spec.describe "Includes correct files in tarballs" do removeIgnoredTarballFiles copySourceFiles @@ -441,3 +446,47 @@ copySourceFiles = Spec.hoistSpec identity (\_ -> Assert.Run.runBaseEffects) $ Sp writeFiles = Run.liftAff <<< traverse_ (\path -> FS.Aff.writeTextFile UTF8 (inTmp path) "module Module where") pure { source: tmp, destination: destTmp, writeDirectories, writeFiles } + +licenseValidation :: Spec.Spec Unit +licenseValidation = do + let fixtures = Path.concat [ "app", "fixtures", "licenses", "halogen-hooks" ] + + Spec.describe "validateLicense" do + Spec.it "Passes when manifest license covers all detected licenses" do + -- The halogen-hooks fixture has MIT in LICENSE and Apache-2.0 in package.json + let manifestLicense = unsafeLicense "MIT AND Apache-2.0" + result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense + Assert.shouldEqual Nothing result + + Spec.it "Fails when manifest license does not cover a detected license" do + -- Manifest says MIT only, but Apache-2.0 is also in package.json + let manifestLicense = unsafeLicense "MIT" + result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense + case result of + Just (LicenseMismatch { detectedLicenses }) -> + -- Should detect that Apache-2.0 is not covered + Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0" + _ -> + Assert.fail "Expected LicenseMismatch error" + + Spec.it "Fails when manifest has completely different license" do + -- Manifest says BSD-3-Clause, but fixture has MIT and Apache-2.0 + let manifestLicense = unsafeLicense "BSD-3-Clause" + result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense + case result of + Just (LicenseMismatch { manifestLicense: ml, detectedLicenses }) -> do + Assert.shouldEqual "BSD-3-Clause" (License.print ml) + -- Both MIT and Apache-2.0 should be in the detected licenses + Assert.shouldContain (map License.print detectedLicenses) "MIT" + Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0" + _ -> + Assert.fail "Expected LicenseMismatch error" + + Spec.it "Passes when manifest uses OR conjunction" do + -- OR conjunction is also valid - means either license applies + let manifestLicense = unsafeLicense "MIT OR Apache-2.0" + result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense + Assert.shouldEqual Nothing result + +unsafeLicense :: String -> License +unsafeLicense str = unsafeFromRight $ License.parse str diff --git a/lib/src/License.js b/lib/src/License.js index 451fc7749..1faaccfe8 100644 --- a/lib/src/License.js +++ b/lib/src/License.js @@ -8,3 +8,34 @@ export const parseSPDXLicenseIdImpl = (onError, onSuccess, identifier) => { return onError(`Invalid SPDX identifier ${identifier}`); } }; + +// Extract all license IDs from a parsed SPDX expression AST. +// The AST structure from spdx-expression-parse is: +// - Simple: { license: 'MIT' } +// - With exception: { license: 'GPL-2.0', exception: 'Classpath-exception-2.0' } +// - Compound: { left: {...}, conjunction: 'and'|'or', right: {...} } +const extractLicenseIds = (ast) => { + const ids = new Set(); + + const walk = (node) => { + if (!node) return; + if (node.license) { + // Normalize to uppercase for case-insensitive comparison + ids.add(node.license.toUpperCase()); + } + if (node.left) walk(node.left); + if (node.right) walk(node.right); + }; + + walk(ast); + return Array.from(ids); +}; + +export const extractLicenseIdsImpl = (onError, onSuccess, expression) => { + try { + const ast = parse(expression); + return onSuccess(extractLicenseIds(ast)); + } catch (_) { + return onError(`Invalid SPDX expression: ${expression}`); + } +}; diff --git a/lib/src/License.purs b/lib/src/License.purs index fa5ab9723..8e8d16783 100644 --- a/lib/src/License.purs +++ b/lib/src/License.purs @@ -9,6 +9,7 @@ module Registry.License ( License , SPDXConjunction(..) , codec + , extractIds , joinWith , parse , print @@ -52,6 +53,14 @@ foreign import parseSPDXLicenseIdImpl :: forall r. Fn3 (String -> r) (String -> parse :: String -> Either String License parse = runFn3 parseSPDXLicenseIdImpl Left (Right <<< License) +foreign import extractLicenseIdsImpl :: forall r. Fn3 (String -> r) (Array String -> r) String r + +-- | Extract all license identifiers from a SPDX expression. +-- | Returns an array of uppercase license IDs for case-insensitive comparison. +-- | For example, "MIT AND Apache-2.0" returns ["MIT", "APACHE-2.0"]. +extractIds :: License -> Either String (Array String) +extractIds (License expr) = runFn3 extractLicenseIdsImpl Left Right expr + -- | A valid conjunction for SPDX license identifiers. AND means that both -- | licenses must be satisfied; OR means that at least one license must be -- | satisfied. diff --git a/lib/test/Registry/License.purs b/lib/test/Registry/License.purs index d2a6a7cd1..41e914a89 100644 --- a/lib/test/Registry/License.purs +++ b/lib/test/Registry/License.purs @@ -44,6 +44,49 @@ spec = do Left err -> Assert.fail $ "joinWith created unparseable expression: " <> License.print joined <> " - Error: " <> err Right _ -> pure unit + Spec.describe "extractIds" do + Spec.it "extracts single license ID" do + case License.parse "MIT" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> Assert.shouldEqual [ "MIT" ] ids + + Spec.it "extracts IDs from AND expression" do + case License.parse "MIT AND Apache-2.0" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> do + Assert.shouldContain ids "MIT" + Assert.shouldContain ids "APACHE-2.0" + + Spec.it "extracts IDs from OR expression" do + case License.parse "MIT OR BSD-3-Clause" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> do + Assert.shouldContain ids "MIT" + Assert.shouldContain ids "BSD-3-CLAUSE" + + Spec.it "extracts IDs from nested expression" do + case License.parse "MIT AND (Apache-2.0 OR BSD-3-Clause)" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> do + Assert.shouldContain ids "MIT" + Assert.shouldContain ids "APACHE-2.0" + Assert.shouldContain ids "BSD-3-CLAUSE" + + Spec.it "normalizes license IDs to uppercase" do + case License.parse "mit" of + Left err -> Assert.fail err + Right license -> case License.extractIds license of + Left err -> Assert.fail err + Right ids -> Assert.shouldEqual [ "MIT" ] ids + valid :: Array String valid = [ "MIT" From 80ceae97190f64c9c930af9eb12c8ad9f13a98d7 Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Sat, 17 Jan 2026 08:28:15 -0500 Subject: [PATCH 2/3] tweak messaging, apply to all imports --- app/src/App/API.purs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/app/src/App/API.purs b/app/src/App/API.purs index 74a1cc45a..98983a52e 100644 --- a/app/src/App/API.purs +++ b/app/src/App/API.purs @@ -524,15 +524,10 @@ publish maybeLegacyIndex payload = do when (Operation.Validation.isMetadataPackage (Manifest receivedManifest)) do Except.throw "The `metadata` package cannot be uploaded to the registry because it is a protected package." - -- Validate that the manifest license is consistent with licenses detected - -- in the repository (LICENSE file, package.json, bower.json). We skip this - -- check for legacy imports because they may have inconsistent licenses that - -- we've already accepted. - when (isNothing maybeLegacyIndex) do - Log.notice "Verifying license consistency..." - validateLicense downloadedPackage receivedManifest.license >>= case _ of - Nothing -> Log.debug "License validation passed." - Just err -> Except.throw $ printLicenseValidationError err + Log.info "Verifying licenses are consistent among manifest files..." + validateLicense downloadedPackage receivedManifest.license >>= case _ of + Nothing -> Log.debug "License validation passed." + Just err -> Except.throw $ printLicenseValidationError err for_ (Operation.Validation.isNotUnpublished (Manifest receivedManifest) (Metadata metadata)) \info -> do Except.throw $ String.joinWith "\n" From b54d2438e02faf315b58cd81158dbdba4531de59 Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Sat, 17 Jan 2026 08:43:31 -0500 Subject: [PATCH 3/3] tweaks --- app/src/App/API.purs | 77 +++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 44 deletions(-) diff --git a/app/src/App/API.purs b/app/src/App/API.purs index 98983a52e..7fb542bab 100644 --- a/app/src/App/API.purs +++ b/app/src/App/API.purs @@ -1366,59 +1366,48 @@ validateLicense packageDir manifestLicense = do detected <- Run.liftAff $ Licensee.detect packageDir case detected of Left err -> do - -- If license detection fails, we let the package through. - -- The manifest license is the source of truth. Log.warn $ "License detection failed, relying on manifest: " <> err pure Nothing Right detectedStrings -> do let - -- Parse detected license strings into License values parsedLicenses :: Array License parsedLicenses = Array.mapMaybe (hush <<< License.parse) detectedStrings Log.debug $ "Detected licenses: " <> String.joinWith ", " detectedStrings - -- If no licenses were detected, we can't validate - allow the package through - -- (the manifest license is the source of truth) if Array.null parsedLicenses then do - Log.debug "No licenses detected from repository files, skipping validation." + Log.debug "No licenses detected from repository files, nothing to validate." pure Nothing - else do - -- Extract all license IDs from the manifest license expression. - -- This properly handles compound expressions like "MIT AND Apache-2.0". - case License.extractIds manifestLicense of - Left err -> do - -- If we can't parse the manifest license, log a warning but allow through - -- (the manifest was already validated during parsing) - Log.warn $ "Could not extract license IDs from manifest: " <> err + else case License.extractIds manifestLicense of + Left err -> do + -- This shouldn't be possible (we have already validated the license) + -- as part of constructing the manifest + Log.warn $ "Could not extract license IDs from manifest: " <> err + pure Nothing + Right manifestIds -> do + let + manifestIdSet = Set.fromFoldable manifestIds + + -- A detected license is covered if all its IDs are in the manifest IDs + isCovered :: License -> Boolean + isCovered license = case License.extractIds license of + Left _ -> false + Right ids -> Array.all (\id -> Set.member id manifestIdSet) ids + + uncoveredLicenses :: Array License + uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses + + if Array.null uncoveredLicenses then do + Log.debug "All detected licenses are covered by the manifest license." pure Nothing - Right manifestIds -> do - -- Convert manifest IDs to a Set for efficient lookup - let manifestIdSet = Set.fromFoldable manifestIds - - -- Extract and uppercase each detected license for case-insensitive comparison - let - getDetectedId :: License -> Maybe String - getDetectedId license = case License.extractIds license of - Right ids -> Array.head ids - Left _ -> Nothing - - -- A detected license is covered if its ID (uppercased) is in the manifest IDs - isCovered :: License -> Boolean - isCovered license = case getDetectedId license of - Just detectedId -> Set.member detectedId manifestIdSet - Nothing -> false - - uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses - - if Array.null uncoveredLicenses then do - Log.debug "All detected licenses are covered by the manifest license." - pure Nothing - else do - Log.warn $ "License mismatch detected: manifest has '" <> License.print manifestLicense - <> "' but detected " - <> String.joinWith ", " (map License.print parsedLicenses) - pure $ Just $ LicenseMismatch - { manifestLicense - , detectedLicenses: uncoveredLicenses - } + else do + Log.warn $ Array.fold + [ "License mismatch detected: manifest has '" + , License.print manifestLicense + , "' but detected " + , String.joinWith ", " (map License.print parsedLicenses) + ] + pure $ Just $ LicenseMismatch + { manifestLicense + , detectedLicenses: uncoveredLicenses + }