diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs index 6b6fdb600c3..6e6bdba40d4 100644 --- a/docs/astro.config.mjs +++ b/docs/astro.config.mjs @@ -126,8 +126,10 @@ export default defineConfig({ '/patterns/siderepoops/': '/gh-aw/patterns/side-repo-ops/', '/patterns/specops/': '/gh-aw/patterns/spec-ops/', '/patterns/researchplanassignops/': '/gh-aw/patterns/research-plan-assign-ops/', + '/patterns/batchops/': '/gh-aw/patterns/batch-ops/', '/patterns/taskops/': '/gh-aw/patterns/task-ops/', '/patterns/trialops/': '/gh-aw/patterns/trial-ops/', + '/patterns/workqueueops/': '/gh-aw/patterns/workqueue-ops/', }, integrations: [ sitemap(), @@ -270,6 +272,7 @@ export default defineConfig({ { label: 'Design Patterns', items: [ + { label: 'BatchOps', link: '/patterns/batch-ops/' }, { label: 'CentralRepoOps', link: '/patterns/central-repo-ops/' }, { label: 'ChatOps', link: '/patterns/chat-ops/' }, { label: 'DailyOps', link: '/patterns/daily-ops/' }, @@ -286,6 +289,7 @@ export default defineConfig({ { label: 'SpecOps', link: '/patterns/spec-ops/' }, { label: 'TaskOps', link: '/patterns/task-ops/' }, { label: 'TrialOps', link: '/patterns/trial-ops/' }, + { label: 'WorkQueueOps', link: '/patterns/workqueue-ops/' }, ], }, { diff --git a/docs/src/content/docs/patterns/batch-ops.md b/docs/src/content/docs/patterns/batch-ops.md new file mode 100644 index 00000000000..84983210a65 --- /dev/null +++ b/docs/src/content/docs/patterns/batch-ops.md @@ -0,0 +1,268 @@ +--- +title: BatchOps +description: Process large volumes of work in parallel or chunked batches using matrix jobs, rate-limit-aware throttling, and result aggregation +sidebar: + badge: { text: 'Batch processing', variant: 'caution' } +--- + +BatchOps is a pattern for processing large volumes of work items efficiently. Instead of iterating sequentially through hundreds of items in a single workflow run, BatchOps splits work into chunks, parallelizes where possible, handles partial failures gracefully, and aggregates results into a consolidated report. + +## When to Use BatchOps vs Sequential Processing + +| Scenario | Recommendation | +|----------|----------------| +| < 50 items, order matters | Sequential ([WorkQueueOps](/gh-aw/patterns/workqueue-ops/)) | +| 50–500 items, order doesn't matter | BatchOps with chunked processing | +| > 500 items, high parallelism safe | BatchOps with matrix fan-out | +| Items have dependencies on each other | Sequential (WorkQueueOps) | +| Items are fully independent | BatchOps (any strategy) | +| Strict rate limits or quotas | Rate-limit-aware batching | + +## Batch Strategy 1: Chunked Processing + +Split work into fixed-size pages using `GITHUB_RUN_NUMBER`. Each run processes one page, picking up the next slice on the next scheduled run. Items must have a stable sort key (creation date, issue number) so pagination is deterministic. + +```aw wrap +--- +on: + schedule: + - cron: "0 2 * * 1-5" # Weekdays at 2 AM + workflow_dispatch: + +tools: + github: + toolsets: [issues] + bash: + - "jq" + - "date" + +safe-outputs: + add-labels: + allowed: [stale, needs-triage, archived] + max: 30 + add-comment: + max: 30 + +steps: + - name: compute-page + id: compute-page + run: | + PAGE_SIZE=25 + # Use run number mod to cycle through pages; reset every 1000 runs + PAGE=$(( (GITHUB_RUN_NUMBER % 1000) * PAGE_SIZE )) + echo "page_offset=$PAGE" >> "$GITHUB_OUTPUT" + echo "page_size=$PAGE_SIZE" >> "$GITHUB_OUTPUT" +--- + +# Chunked Issue Processor + +This run covers offset ${{ steps.compute-page.outputs.page_offset }} with page size ${{ steps.compute-page.outputs.page_size }}. + +1. List issues sorted by creation date (oldest first), skipping the first ${{ steps.compute-page.outputs.page_offset }} and taking ${{ steps.compute-page.outputs.page_size }}. +2. For each issue: add `stale` if last updated > 90 days ago with no recent comments; add `needs-triage` if it has no labels; post a stale warning comment if applicable. +3. Summarize: issues labeled, comments posted, any errors. +``` + +## Batch Strategy 2: Fan-Out with Matrix + +Use GitHub Actions matrix to run multiple batch workers in parallel, each responsible for a non-overlapping shard. Use `fail-fast: false` so one shard failure doesn't cancel the others. Each shard gets its own token and API rate limit quota. + +```aw wrap +--- +on: + workflow_dispatch: + inputs: + total_shards: + description: "Number of parallel workers" + default: "4" + required: false + +jobs: + batch: + strategy: + matrix: + shard: [0, 1, 2, 3] + fail-fast: false # Continue other shards even if one fails + +tools: + github: + toolsets: [issues, pull_requests] + +safe-outputs: + add-labels: + allowed: [reviewed, duplicate, wontfix] + max: 50 +--- + +# Matrix Batch Worker — Shard ${{ matrix.shard }} of ${{ inputs.total_shards }} + +Process only issues where `(issue_number % ${{ inputs.total_shards }}) == ${{ matrix.shard }}` — this ensures no two shards process the same issue. + +1. List all open issues (up to 500) and keep only those assigned to this shard. +2. For each issue: check for duplicates (similar title/content); add label `reviewed`; if a duplicate is found, add `duplicate` and reference the original. +3. Report: issues in this shard, how many labeled, any failures. +``` + +## Batch Strategy 3: Rate-Limit-Aware Batching + +Throttle API calls by processing items in small sub-batches with explicit pauses. Slower than unbounded processing but dramatically reduces rate-limit errors. Use [Rate Limiting Controls](/gh-aw/reference/rate-limiting-controls/) for built-in throttling. + +```aw wrap +--- +on: + workflow_dispatch: + inputs: + batch_size: + description: "Items per sub-batch" + default: "10" + pause_seconds: + description: "Seconds to pause between sub-batches" + default: "30" + +tools: + github: + toolsets: [repos, issues] + bash: + - "sleep" + - "jq" + +safe-outputs: + add-comment: + max: 100 + add-labels: + allowed: [labeled-by-bot] + max: 100 +--- + +# Rate-Limited Batch Processor + +Process all open issues in sub-batches of ${{ inputs.batch_size }}, pausing ${{ inputs.pause_seconds }} seconds between batches. + +1. Fetch all open issue numbers (paginate if needed). +2. For each sub-batch: read each issue body, determine the correct label, add the label, then pause before the next sub-batch. +3. On HTTP 429: pause 60 seconds and retry once before marking the item as failed. +4. Report: total processed, failed, skipped. +``` + +## Batch Strategy 4: Result Aggregation + +Collect results from multiple batch workers or runs and aggregate them into a single summary issue. Use [cache-memory](/gh-aw/reference/cache-memory/) to store intermediate results when runs span multiple days. + +```aw wrap +--- +on: + workflow_dispatch: + inputs: + report_issue: + description: "Issue number to aggregate results into" + required: true + +tools: + cache-memory: true + github: + toolsets: [issues, repos] + bash: + - "jq" + +safe-outputs: + add-comment: + max: 1 + update-issue: + body: true + +steps: + - name: collect-results + run: | + # Aggregate results from all result files written by previous batch runs + RESULTS_DIR="/tmp/gh-aw/cache-memory/batch-results" + if [ -d "$RESULTS_DIR" ]; then + jq -s ' + { + total_processed: (map(.processed) | add // 0), + total_failed: (map(.failed) | add // 0), + total_skipped: (map(.skipped) | add // 0), + runs: length, + errors: (map(.errors // []) | add // []) + } + ' "$RESULTS_DIR"/*.json > /tmp/gh-aw/cache-memory/aggregate.json + cat /tmp/gh-aw/cache-memory/aggregate.json + else + echo '{"total_processed":0,"total_failed":0,"total_skipped":0,"runs":0,"errors":[]}' \ + > /tmp/gh-aw/cache-memory/aggregate.json + fi +--- + +# Batch Result Aggregator + +Aggregate results from previous batch runs stored in `/tmp/gh-aw/cache-memory/batch-results/` into issue #${{ inputs.report_issue }}. + +1. Read `/tmp/gh-aw/cache-memory/aggregate.json` for totals and each individual result file for per-run breakdowns. +2. Update issue #${{ inputs.report_issue }} body with a Markdown table: summary row (processed/failed/skipped) plus per-run breakdown. List any errors requiring manual intervention. +3. Add a comment: "Batch complete ✅" if no failures, or "Batch complete with failures ⚠️" with a list of failed items. +4. For each failed item, create a sub-issue so it can be retried. +``` + +## Error Handling and Partial Failures + +Batch workflows must be resilient to individual item failures. + +**Retry pattern**: When using cache-memory queues, track `retry_count` per failed item. Retry items where `retry_count < 3`; after three failures move them to `permanently_failed` for human review. Increment the count and save the queue after each attempt. + +**Failure isolation**: + +- Use `fail-fast: false` in matrix jobs so one shard failure doesn't cancel others +- Write per-item results before moving to the next item +- Store errors with enough context to diagnose and retry + +## Real-World Example: Updating Labels Across 100+ Issues + +This example processes a label migration (rename `bug` to `type:bug`) across all open and closed issues. + +```aw wrap +--- +on: + workflow_dispatch: + inputs: + dry_run: + description: "Preview changes without applying them" + default: "true" + +tools: + github: + toolsets: [issues] + bash: + - "jq" + +safe-outputs: + add-labels: + allowed: [type:bug] + max: 200 + remove-labels: + allowed: [bug] + max: 200 + add-comment: + max: 1 + +concurrency: + group: label-migration + cancel-in-progress: false +--- + +# Label Migration: `bug` → `type:bug` + +Migrate all issues with the label `bug` to use `type:bug`. List all issues (open and closed) with label `bug`, paginating to retrieve all of them. + +- If `${{ inputs.dry_run }}` is `true`: report how many issues would be updated and add a preview comment. Make no changes. +- If `${{ inputs.dry_run }}` is `false`: for each issue add `type:bug` then remove `bug`. Process in sub-batches of 20 with 15-second pauses. Track successes and failures. + +Add a final comment with totals and a search link to verify no `bug` labels remain. +``` + +## Related Pages + +- [WorkQueueOps](/gh-aw/patterns/workqueue-ops/) — Sequential queue processing with issue checklists, sub-issues, cache-memory, and Discussions +- [TaskOps](/gh-aw/patterns/task-ops/) — Research → Plan → Assign for developer-supervised work +- [Cache Memory](/gh-aw/reference/cache-memory/) — Persistent state storage across workflow runs +- [Repo Memory](/gh-aw/reference/repo-memory/) — Git-committed persistent state +- [Rate Limiting Controls](/gh-aw/reference/rate-limiting-controls/) — Built-in throttling for API-heavy workflows +- [Concurrency](/gh-aw/reference/concurrency/) — Prevent overlapping batch runs diff --git a/docs/src/content/docs/patterns/workqueue-ops.md b/docs/src/content/docs/patterns/workqueue-ops.md new file mode 100644 index 00000000000..6006dfe5aad --- /dev/null +++ b/docs/src/content/docs/patterns/workqueue-ops.md @@ -0,0 +1,188 @@ +--- +title: WorkQueueOps +description: Process a queue of work items using GitHub issues, sub-issues, cache-memory, or Discussions as durable queue backends +sidebar: + badge: { text: 'Queue-based', variant: 'note' } +--- + +WorkQueueOps is a pattern for systematically processing a large backlog of work items. Instead of processing everything at once, work is queued, tracked, and consumed incrementally — surviving interruptions, rate limits, and multi-day horizons. Use it when operations are idempotent and progress visibility matters. + +## Queue Strategy 1: Issue Checklist as Queue + +Use GitHub issue checkboxes as a lightweight, human-readable queue. The agent reads the issue body, finds unchecked items, processes each one, and checks it off. Best for small-to-medium batches (< 100 items). Use [Concurrency](/gh-aw/reference/concurrency/) controls to prevent race conditions between parallel runs. + +```aw wrap +--- +on: + workflow_dispatch: + inputs: + queue_issue: + description: "Issue number containing the checklist queue" + required: true + +tools: + github: + toolsets: [issues] + +safe-outputs: + update-issue: + body: true + add-comment: + max: 1 + +concurrency: + group: workqueue-${{ inputs.queue_issue }} + cancel-in-progress: false +--- + +# Checklist Queue Processor + +You are processing a work queue stored as checkboxes in issue #${{ inputs.queue_issue }}. + +1. Read issue #${{ inputs.queue_issue }} and find all unchecked items (`- [ ]`). +2. For each unchecked item (at most 10 per run): perform the required work, then edit the issue body to change `- [ ]` to `- [x]`. +3. Add a comment summarizing what was completed and what remains. +4. If all items are checked, close the issue with a summary comment. +``` + +## Queue Strategy 2: Sub-Issues as Queue + +Create one sub-issue per work item. The agent queries open sub-issues of a parent tracking issue, processes each one, and closes it when done. Scales to hundreds of items with individual discussion threads per item. Use `max:` limits on `close-issue` to avoid notification storms. + +```aw wrap +--- +on: + schedule: + - cron: "0 * * * *" # Every hour + workflow_dispatch: + +tools: + github: + toolsets: [issues] + +safe-outputs: + add-comment: + max: 5 + close-issue: + max: 5 + +concurrency: + group: sub-issue-queue + cancel-in-progress: false +--- + +# Sub-Issue Queue Processor + +You are processing a queue of open sub-issues. The parent tracking issue is labeled `queue-tracking`. + +1. Find the open issue labeled `queue-tracking` — this is the queue parent. +2. List its open sub-issues and process at most 5 per run. +3. For each sub-issue: read the body, perform the work, add a result comment, then close the issue. +4. Add a progress comment on the parent issue showing how many items remain. + +If no sub-issues are open, post a comment on the parent issue saying the queue is empty. +``` + +## Queue Strategy 3: Cache-Memory Queue + +Store queue state as a JSON file in [cache-memory](/gh-aw/reference/cache-memory/). Each run loads the file, picks up where the last run left off, and saves the updated state. Best for large queues and multi-day processing horizons where items are generated programmatically. Cache-memory is scoped to a single branch; use filesystem-safe timestamps in filenames (no colons — e.g., `YYYY-MM-DD-HH-MM-SS-sss`). + +```aw wrap +--- +on: + schedule: + - cron: "0 6 * * 1-5" # Weekdays at 6 AM + workflow_dispatch: + +tools: + cache-memory: true + github: + toolsets: [repos, issues] + bash: + - "jq" + +safe-outputs: + add-comment: + max: 10 + add-labels: + allowed: [processed, needs-review] + max: 10 +--- + +# Cache-Memory Queue Processor + +You process items from a persistent JSON queue at `/tmp/gh-aw/cache-memory/workqueue.json`: + +```json +{ + "pending": ["item-1", "item-2"], + "in_progress": [], + "completed": ["item-0"], + "failed": [], + "last_run": "2026-04-07-06-00-00" +} +``` + +1. Load the queue file. If it doesn't exist, initialize it by listing all open issues without the label `processed` and populating `pending` with their numbers. +2. Move up to 10 items from `pending` to `in_progress`. +3. For each item: perform the required operation, then move it to `completed` on success or `failed` (with an error note) on failure. +4. Save the updated queue JSON and report: X completed, Y failed, Z remaining. + +If `pending` is empty, announce that the queue is exhausted. +``` + +## Queue Strategy 4: Discussion-Based Queue + +Use a GitHub Discussion to track pending work items. Unresolved replies represent pending work; processing an item means resolving its reply. Best for community-sourced queues and async collaboration where humans need to inspect items before or after processing. Requires `discussions` in the GitHub toolset. + +```aw wrap +--- +on: + schedule: + - cron: "0 8 * * *" # Daily at 8 AM + workflow_dispatch: + +tools: + github: + toolsets: [discussions] + +safe-outputs: + add-comment: + max: 5 + create-discussion: + title-prefix: "[queue-log] " + category: "General" + +concurrency: + group: discussion-queue + cancel-in-progress: false +--- + +# Discussion Queue Processor + +A GitHub Discussion titled "Work Queue" (category "General") tracks pending items. +Each unresolved top-level reply is a work item. + +1. Find the "Work Queue" discussion and list all unresolved replies (`isAnswered: false`). +2. For each unresolved reply (at most 5 per run): parse the work description, perform the work, then reply with the result. +3. Create a summary discussion post documenting what was processed today. +``` + +## Idempotency and Concurrency + +All WorkQueueOps patterns should be **idempotent**: running the same item twice should not cause double processing. + +| Technique | How | +|-----------|-----| +| Check before acting | Query current state (label present? comment exists?) before making changes | +| Atomic state updates | Write queue state in a single step; avoid partial updates | +| Concurrency groups | Use `concurrency.group` with `cancel-in-progress: false` to prevent parallel runs | +| Retry budgets | Track failed items separately; set a retry limit before giving up | + +## Related Pages + +- [BatchOps](/gh-aw/patterns/batch-ops/) — Process large volumes in parallel chunks rather than sequentially +- [TaskOps](/gh-aw/patterns/task-ops/) — Research → Plan → Assign pattern for developer-supervised work +- [Cache Memory](/gh-aw/reference/cache-memory/) — Persistent state storage across workflow runs +- [Repo Memory](/gh-aw/reference/repo-memory/) — Git-committed persistent state for cross-branch sharing +- [Concurrency](/gh-aw/reference/concurrency/) — Prevent race conditions in queue-based workflows