diff --git a/handwritten/storage/internal-tooling/README.md b/handwritten/storage/internal-tooling/README.md index 9a40bb4c97a1..1d66c707af9e 100644 --- a/handwritten/storage/internal-tooling/README.md +++ b/handwritten/storage/internal-tooling/README.md @@ -40,4 +40,34 @@ For each invocation of the benchmark, write a new object of random size between | ElapsedTimeUs | the elapsed time in microseconds the operation took | | Status | completion state of the operation [OK, FAIL] | | AppBufferSize | N/A | -| CpuTimeUs | N/A | \ No newline at end of file +| CpuTimeUs | N/A | + +--- + +## Comparative Latency & Memory Benchmarking (`benchmark.ts`) + +This benchmark compares the current codebase build against a specified baseline NPM version of `@google-cloud/storage` (e.g. comparing Gaxios migration vs baseline `7.19.0`). It measures latency stats for upload, metadata lookup, and download scenarios, while tracking heap memory footprint changes. + +### Run Example: + +1. **Compile the codebase:** + ```bash + cd handwritten/storage + npm run compile + ``` + +2. **Execute the benchmark comparison:** + ```bash + node build/esm/internal-tooling/benchmark.js --projectid --bucket --iterations 100 --baseline 7.19.0 --fileSize 10485760 --resumable + ``` + +### CLI Parameters: + +| Parameter | Description | Requirement | Default | +| --------- | ----------- | :---: | :---: | +| `--projectid` | Google Cloud Project ID | **Required** | - | +| `--bucket` | Cloud Storage Bucket Name to upload/download files | **Required** | - | +| `--iterations` | Number of iterations for each workload scenario | Optional | `100` | +| `--baseline` | Stable baseline NPM version of `@google-cloud/storage` to compare against | Optional | - | +| `--fileSize` | File size in bytes for benchmark uploads/downloads | Optional | `1024` (1KB) | +| `--resumable` | Force resumable upload for the upload scenario | Optional | - (default behavior) | \ No newline at end of file diff --git a/handwritten/storage/internal-tooling/benchmark.ts b/handwritten/storage/internal-tooling/benchmark.ts new file mode 100644 index 000000000000..b32d57baf59a --- /dev/null +++ b/handwritten/storage/internal-tooling/benchmark.ts @@ -0,0 +1,232 @@ +/*! + * Copyright 2026 Google LLC. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import {Storage, File, Bucket} from '../src/index.js'; +import {performance} from 'perf_hooks'; +import * as path from 'path'; +import * as fs from 'fs'; +import {execSync} from 'child_process'; +import * as os from 'os'; +import yargs from 'yargs'; + +interface Args { + projectId: string; + bucket: string; + iterations: number; + baseline?: string; + fileSize: number; + resumable?: boolean; +} + +const argv = yargs(process.argv.slice(2)) + .option('projectid', { + type: 'string', + demandOption: true, + description: 'Google Cloud Project ID' + }) + .option('bucket', { + type: 'string', + demandOption: true, + description: 'Cloud Storage Bucket Name' + }) + .option('iterations', { + type: 'number', + default: 100, + description: 'Number of iterations for each test' + }) + .option('baseline', { + type: 'string', + description: 'Baseline version of @google-cloud/storage to compare against (e.g., 7.19.0)' + }) + .option('fileSize', { + type: 'number', + default: 1024, + description: 'File size in bytes for benchmark uploads' + }) + .option('resumable', { + type: 'boolean', + description: 'Force resumable upload for the upload scenario' + }) + .parseSync() as unknown as Args; + +let tempDirToDelete: string | undefined; + +async function loadBaseline(version: string) { + // Strict SemVer regular expression to prevent command injection + const semverRegex = /^\d+\.\d+\.\d+(-[a-zA-Z0-9.]+)?$/; + if (!semverRegex.test(version)) { + throw new Error(`Invalid baseline version format: "${version}". Must be a valid semver string (e.g. 7.19.0).`); + } + + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'storage-benchmark-')); + tempDirToDelete = tempDir; // Track for cleanup + + console.log(`Installing baseline version ${version} in ${tempDir}...`); + fs.writeFileSync(path.join(tempDir, 'package.json'), JSON.stringify({name: 'bench-temp'})); + execSync(`npm install @google-cloud/storage@${version} --silent`, {cwd: tempDir}); + const baselinePath = path.join(tempDir, 'node_modules', '@google-cloud/storage'); + + const pkgJson = JSON.parse(fs.readFileSync(path.join(baselinePath, 'package.json'), 'utf8')); + const main = pkgJson.main || './build/src/index.js'; + const entry = path.join(baselinePath, main); + + console.log(`Loading baseline from ${entry}`); + const pkg = await import(entry); + return pkg.Storage || pkg.default?.Storage || pkg.default; +} + +const logMemory = (prefix: string) => { + const mem = process.memoryUsage(); + console.log(`${prefix} - Heap Used: ${(mem.heapUsed / 1024 / 1024).toFixed(2)} MB / Heap Total: ${(mem.heapTotal / 1024 / 1024).toFixed(2)} MB`); +}; + +async function runUploadScenario( + bucket: Bucket, + content: Buffer, + name: string, + uploadedFiles: File[] +): Promise { + console.log(`Starting Scenario 1: Upload (${argv.fileSize} bytes)...`); + const uploadTimes: number[] = []; + const options = argv.resumable !== undefined ? {resumable: argv.resumable} : {}; + + for (let i = 0; i < argv.iterations; i++) { + if (i % 10 === 0) logMemory(` Upload iteration ${i}`); + const iterFilename = `bench-${name}-${Date.now()}-${i}.bin`; + const iterFile = bucket.file(iterFilename); + const start = performance.now(); + await iterFile.save(content, options); + uploadTimes.push(performance.now() - start); + uploadedFiles.push(iterFile); + } + return uploadTimes; +} + +async function runMetadataScenario( + mainFile: File +): Promise { + console.log('Starting Scenario 2: Get Metadata...'); + const metadataTimes: number[] = []; + for (let i = 0; i < argv.iterations; i++) { + if (i % 10 === 0) logMemory(` Metadata iteration ${i}`); + const start = performance.now(); + await mainFile.getMetadata(); + metadataTimes.push(performance.now() - start); + } + return metadataTimes; +} + +async function runDownloadScenario( + mainFile: File +): Promise { + console.log(`Starting Scenario 3: Download (${argv.fileSize} bytes)...`); + const downloadTimes: number[] = []; + for (let i = 0; i < argv.iterations; i++) { + if (i % 10 === 0) logMemory(` Download iteration ${i}`); + const start = performance.now(); + await mainFile.download(); + downloadTimes.push(performance.now() - start); + } + return downloadTimes; +} + +async function runBenchmark(StorageClass: typeof Storage, name: string, bucketName: string) { + // Pass custom project ID to the storage client + const storage = new StorageClass({ projectId: argv.projectId }); + const bucket = storage.bucket(bucketName); + const content = Buffer.alloc(argv.fileSize, 'a'); + const uploadedFiles: File[] = []; + + console.log(`\n=== Running benchmark for ${name} ===`); + + try { + const uploadTimes = await runUploadScenario(bucket, content, name, uploadedFiles); + reportResults(`Upload (${argv.fileSize} bytes)`, uploadTimes, true); + logMemory('After Upload'); + + const mainFile = uploadedFiles[0]; + + const metadataTimes = await runMetadataScenario(mainFile); + reportResults('Get Metadata', metadataTimes); + logMemory('After Metadata'); + + const downloadTimes = await runDownloadScenario(mainFile); + reportResults(`Download (${argv.fileSize} bytes)`, downloadTimes, true); + logMemory('After Download'); + + } finally { + // Guaranteed cloud files deletion + console.log('Cleaning up cloud files...'); + await Promise.all(uploadedFiles.map(f => f.delete().catch(() => {}))); + logMemory('After Cleanup'); + } +} + +function reportResults(operation: string, times: number[], includeThroughput = false) { + const min = Math.min(...times); + const max = Math.max(...times); + const avg = times.reduce((a, b) => a + b, 0) / times.length; + + console.log(`\n${operation}:`); + console.log(` Iterations: ${times.length}`); + console.log(` Average Latency: ${avg.toFixed(2)} ms`); + console.log(` Min Latency: ${min.toFixed(2)} ms`); + console.log(` Max Latency: ${max.toFixed(2)} ms`); + if (includeThroughput) { + const throughput = (argv.fileSize / 1024) * (1000 / avg); // KB/s + console.log(` Approx. Throughput: ${throughput.toFixed(2)} KB/s`); + } +} + +async function main() { + try { + // Validate iterations parameter to handle edge cases + if (argv.iterations < 1) { + throw new Error('Iterations parameter must be greater than or equal to 1'); + } + + // Validate fileSize parameter + if (argv.fileSize < 0) { + throw new Error('fileSize parameter must be greater than or equal to 0'); + } + + // Run for local version + await runBenchmark(Storage, 'Current (Gaxios)', argv.bucket); + + // Run for baseline if specified + if (argv.baseline) { + const BaselineStorage = await loadBaseline(argv.baseline); + await runBenchmark(BaselineStorage, `Baseline (${argv.baseline})`, argv.bucket); + } + } catch (error) { + console.error('Error running benchmark:', error); + // Exit with non-zero code on failures for CI integration + process.exitCode = 1; + } finally { + // Guaranteed local directory cleanup + if (tempDirToDelete) { + console.log(`Cleaning up local temporary directory: ${tempDirToDelete}`); + try { + fs.rmSync(tempDirToDelete, { recursive: true, force: true }); + } catch (cleanupErr) { + console.error('Failed to clean up local temporary directory:', cleanupErr); + } + } + } +} + +main(); +