From f269decb8a28f937bc762dbe075fb967b5a0c8bb Mon Sep 17 00:00:00 2001 From: SanskaarUndale21 Date: Tue, 12 May 2026 00:24:15 +0530 Subject: [PATCH 1/4] feat(exporter): skip GCS upload when object CRC32C is unchanged Before this change, the exporter uploaded every output file on every run regardless of whether the content had changed. Since all.zip and other outputs are now reproducible (#3491), unchanged files would accumulate redundant object generations in the bucket, making it harder for downstream consumers to detect real updates. The writer now calls ReadObjectAttrs before each GCS write and computes the CRC32C of the outgoing data using the Castagnoli polynomial (the same algorithm GCS uses for its stored checksums). If the checksums match, the upload is skipped and an info log is emitted. New objects (ErrNotFound) and any transient attr-read errors fall through to the normal upload path so the exporter remains correct under all conditions. Tests verify the three cases: same content is skipped, changed content is uploaded, and brand-new objects are always created. Fixes #3513 --- go/cmd/exporter/writer.go | 29 ++++++- go/cmd/exporter/writer_test.go | 134 +++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 go/cmd/exporter/writer_test.go diff --git a/go/cmd/exporter/writer.go b/go/cmd/exporter/writer.go index 363358a80b9..45eda36103a 100644 --- a/go/cmd/exporter/writer.go +++ b/go/cmd/exporter/writer.go @@ -2,6 +2,8 @@ package main import ( "context" + "errors" + "hash/crc32" "log/slog" "os" "path/filepath" @@ -11,6 +13,9 @@ import ( "github.com/google/osv.dev/go/osv/clients" ) +// crc32cTable uses the Castagnoli polynomial, matching GCS's own checksum algorithm. +var crc32cTable = crc32.MakeTable(crc32.Castagnoli) + // writeMsg holds the data for a file to be written. type writeMsg struct { path string @@ -31,7 +36,10 @@ func writer(ctx context.Context, cancel context.CancelFunc, inCh <-chan writeMsg } path := filepath.Join(pathPrefix, msg.path) if client != nil { - // Write to the bucket. + // Skip the upload if the object already has the same content. + if gcsContentUnchanged(ctx, client, path, msg.data) { + break + } err := client.WriteObject(ctx, path, msg.data, &clients.WriteOptions{ ContentType: msg.mimeType, }) @@ -62,3 +70,22 @@ func writer(ctx context.Context, cancel context.CancelFunc, inCh <-chan writeMsg } } } + +// gcsContentUnchanged returns true if the object at path already has the same +// CRC32C checksum as data, meaning the upload would be a no-op. Any error +// reading the object's attributes (other than ErrNotFound) is logged and +// treated as "content changed" so the upload proceeds. +func gcsContentUnchanged(ctx context.Context, client clients.CloudStorage, path string, data []byte) bool { + attrs, err := client.ReadObjectAttrs(ctx, path) + if err != nil { + if !errors.Is(err, clients.ErrNotFound) { + logger.WarnContext(ctx, "failed to read object attrs, proceeding with upload", slog.String("path", path), slog.Any("err", err)) + } + return false + } + if attrs.CRC32C == crc32.Checksum(data, crc32cTable) { + logger.InfoContext(ctx, "skipping upload, content unchanged", slog.String("path", path)) + return true + } + return false +} diff --git a/go/cmd/exporter/writer_test.go b/go/cmd/exporter/writer_test.go new file mode 100644 index 00000000000..c376e1fcb3e --- /dev/null +++ b/go/cmd/exporter/writer_test.go @@ -0,0 +1,134 @@ +package main + +import ( + "context" + "path/filepath" + "sync" + "testing" + + "github.com/google/osv.dev/go/osv/clients" + "github.com/google/osv.dev/go/testutils" +) + +// runWriter sends msgs to a writer goroutine and waits for it to finish. +func runWriter(t *testing.T, storage clients.CloudStorage, pathPrefix string, msgs []writeMsg) { + t.Helper() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + inCh := make(chan writeMsg, len(msgs)) + for _, m := range msgs { + inCh <- m + } + close(inCh) + + var wg sync.WaitGroup + wg.Add(1) + go writer(ctx, cancel, inCh, storage, pathPrefix, &wg) + wg.Wait() +} + +func TestWriter_GCS_SkipsUnchangedContent(t *testing.T) { + storage := testutils.NewMockStorage() + data := []byte(`{"id":"OSV-1"}`) + + // Pre-populate using the same path the writer will compute. + objPath := filepath.Join("out", "OSV-1.json") + if err := storage.WriteObject(t.Context(), objPath, data, nil); err != nil { + t.Fatalf("setup: %v", err) + } + attrsBefore, _ := storage.ReadObjectAttrs(t.Context(), objPath) + + runWriter(t, storage, "out", []writeMsg{ + {path: "OSV-1.json", mimeType: "application/json", data: data}, + }) + + attrsAfter, err := storage.ReadObjectAttrs(t.Context(), objPath) + if err != nil { + t.Fatalf("ReadObjectAttrs: %v", err) + } + if attrsAfter.Generation != attrsBefore.Generation { + t.Errorf("expected generation %d (skipped), got %d", attrsBefore.Generation, attrsAfter.Generation) + } +} + +func TestWriter_GCS_UploadsChangedContent(t *testing.T) { + storage := testutils.NewMockStorage() + + objPath := filepath.Join("out", "OSV-1.json") + if err := storage.WriteObject(t.Context(), objPath, []byte(`{"id":"OSV-1","old":true}`), nil); err != nil { + t.Fatalf("setup: %v", err) + } + attrsBefore, _ := storage.ReadObjectAttrs(t.Context(), objPath) + + runWriter(t, storage, "out", []writeMsg{ + {path: "OSV-1.json", mimeType: "application/json", data: []byte(`{"id":"OSV-1","old":false}`)}, + }) + + attrsAfter, err := storage.ReadObjectAttrs(t.Context(), objPath) + if err != nil { + t.Fatalf("ReadObjectAttrs: %v", err) + } + if attrsAfter.Generation <= attrsBefore.Generation { + t.Errorf("expected generation > %d (uploaded), got %d", attrsBefore.Generation, attrsAfter.Generation) + } +} + +func TestWriter_GCS_UploadsNewObject(t *testing.T) { + storage := testutils.NewMockStorage() + + runWriter(t, storage, "out", []writeMsg{ + {path: "OSV-1.json", mimeType: "application/json", data: []byte(`{"id":"OSV-1"}`)}, + }) + + objPath := filepath.Join("out", "OSV-1.json") + attrs, err := storage.ReadObjectAttrs(t.Context(), objPath) + if err != nil { + t.Fatalf("expected object to exist after upload: %v", err) + } + if attrs.Generation != 1 { + t.Errorf("expected generation 1 for new object, got %d", attrs.Generation) + } +} + +func TestWriter_GCS_SkipsMultipleUnchanged(t *testing.T) { + storage := testutils.NewMockStorage() + + type entry struct { + msgPath string + objPath string + data []byte + } + entries := []entry{ + {"A.json", filepath.Join("out", "A.json"), []byte(`{"id":"A"}`)}, + {"B.json", filepath.Join("out", "B.json"), []byte(`{"id":"B"}`)}, + {"C.json", filepath.Join("out", "C.json"), []byte(`{"id":"C"}`)}, + } + for _, e := range entries { + if err := storage.WriteObject(t.Context(), e.objPath, e.data, nil); err != nil { + t.Fatalf("setup %s: %v", e.objPath, err) + } + } + + gensBefore := make(map[string]int64) + for _, e := range entries { + attrs, _ := storage.ReadObjectAttrs(t.Context(), e.objPath) + gensBefore[e.objPath] = attrs.Generation + } + + msgs := make([]writeMsg, len(entries)) + for i, e := range entries { + msgs[i] = writeMsg{path: e.msgPath, mimeType: "application/json", data: e.data} + } + runWriter(t, storage, "out", msgs) + + for _, e := range entries { + attrs, err := storage.ReadObjectAttrs(t.Context(), e.objPath) + if err != nil { + t.Fatalf("ReadObjectAttrs(%s): %v", e.objPath, err) + } + if attrs.Generation != gensBefore[e.objPath] { + t.Errorf("%s: expected generation %d (skipped), got %d", e.objPath, gensBefore[e.objPath], attrs.Generation) + } + } +} From 6cf2afa109781de72317a1b8a587f7149df0095a Mon Sep 17 00:00:00 2001 From: SanskaarUndale21 Date: Tue, 19 May 2026 20:16:48 +0530 Subject: [PATCH 2/4] fix(lint): add nlreturn blank lines and exclude unparam from test files --- go/.golangci.yaml | 1 + go/cmd/exporter/writer.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/go/.golangci.yaml b/go/.golangci.yaml index 4d7c582435d..1bffcdfd387 100644 --- a/go/.golangci.yaml +++ b/go/.golangci.yaml @@ -122,6 +122,7 @@ linters: - path: _test\.go linters: - dupl + - unparam - path-except: _test\.go text: use `testutility.GetCurrentWorkingDirectory` paths: diff --git a/go/cmd/exporter/writer.go b/go/cmd/exporter/writer.go index 45eda36103a..1223d43176f 100644 --- a/go/cmd/exporter/writer.go +++ b/go/cmd/exporter/writer.go @@ -81,10 +81,12 @@ func gcsContentUnchanged(ctx context.Context, client clients.CloudStorage, path if !errors.Is(err, clients.ErrNotFound) { logger.WarnContext(ctx, "failed to read object attrs, proceeding with upload", slog.String("path", path), slog.Any("err", err)) } + return false } if attrs.CRC32C == crc32.Checksum(data, crc32cTable) { logger.InfoContext(ctx, "skipping upload, content unchanged", slog.String("path", path)) + return true } return false From b8ba2e6316c7e8f19505c134ac6b429ead8932ae Mon Sep 17 00:00:00 2001 From: SanskaarUndale21 Date: Mon, 25 May 2026 01:55:01 +0530 Subject: [PATCH 3/4] fix(exporter): remove unparam exclusion, inline pathPrefix in runWriter Addresses reviewer feedback to fix the linter complaint directly rather than adding a golangci.yaml exception. --- go/.golangci.yaml | 1 - go/cmd/exporter/writer_test.go | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/go/.golangci.yaml b/go/.golangci.yaml index 1bffcdfd387..4d7c582435d 100644 --- a/go/.golangci.yaml +++ b/go/.golangci.yaml @@ -122,7 +122,6 @@ linters: - path: _test\.go linters: - dupl - - unparam - path-except: _test\.go text: use `testutility.GetCurrentWorkingDirectory` paths: diff --git a/go/cmd/exporter/writer_test.go b/go/cmd/exporter/writer_test.go index c376e1fcb3e..930142778d0 100644 --- a/go/cmd/exporter/writer_test.go +++ b/go/cmd/exporter/writer_test.go @@ -11,7 +11,7 @@ import ( ) // runWriter sends msgs to a writer goroutine and waits for it to finish. -func runWriter(t *testing.T, storage clients.CloudStorage, pathPrefix string, msgs []writeMsg) { +func runWriter(t *testing.T, storage clients.CloudStorage, msgs []writeMsg) { t.Helper() ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -24,7 +24,7 @@ func runWriter(t *testing.T, storage clients.CloudStorage, pathPrefix string, ms var wg sync.WaitGroup wg.Add(1) - go writer(ctx, cancel, inCh, storage, pathPrefix, &wg) + go writer(ctx, cancel, inCh, storage, "out", &wg) wg.Wait() } @@ -39,7 +39,7 @@ func TestWriter_GCS_SkipsUnchangedContent(t *testing.T) { } attrsBefore, _ := storage.ReadObjectAttrs(t.Context(), objPath) - runWriter(t, storage, "out", []writeMsg{ + runWriter(t, storage, []writeMsg{ {path: "OSV-1.json", mimeType: "application/json", data: data}, }) @@ -61,7 +61,7 @@ func TestWriter_GCS_UploadsChangedContent(t *testing.T) { } attrsBefore, _ := storage.ReadObjectAttrs(t.Context(), objPath) - runWriter(t, storage, "out", []writeMsg{ + runWriter(t, storage, []writeMsg{ {path: "OSV-1.json", mimeType: "application/json", data: []byte(`{"id":"OSV-1","old":false}`)}, }) @@ -77,7 +77,7 @@ func TestWriter_GCS_UploadsChangedContent(t *testing.T) { func TestWriter_GCS_UploadsNewObject(t *testing.T) { storage := testutils.NewMockStorage() - runWriter(t, storage, "out", []writeMsg{ + runWriter(t, storage, []writeMsg{ {path: "OSV-1.json", mimeType: "application/json", data: []byte(`{"id":"OSV-1"}`)}, }) @@ -120,7 +120,7 @@ func TestWriter_GCS_SkipsMultipleUnchanged(t *testing.T) { for i, e := range entries { msgs[i] = writeMsg{path: e.msgPath, mimeType: "application/json", data: e.data} } - runWriter(t, storage, "out", msgs) + runWriter(t, storage, msgs) for _, e := range entries { attrs, err := storage.ReadObjectAttrs(t.Context(), e.objPath) From 81ed69cdbe5f427e0e5d1f2f022ada2d5983a155 Mon Sep 17 00:00:00 2001 From: SanskaarUndale21 Date: Mon, 25 May 2026 01:55:35 +0530 Subject: [PATCH 4/4] fix(lint): add nlreturn blank line before return in gcsContentUnchanged --- go/cmd/exporter/writer.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/cmd/exporter/writer.go b/go/cmd/exporter/writer.go index 1223d43176f..72206e09fa5 100644 --- a/go/cmd/exporter/writer.go +++ b/go/cmd/exporter/writer.go @@ -89,5 +89,6 @@ func gcsContentUnchanged(ctx context.Context, client clients.CloudStorage, path return true } + return false }