From 28089bd6416f70006f3ae0695a9e43e51e08653d Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Mon, 9 Mar 2026 20:50:17 -0500 Subject: [PATCH 01/10] feat: full DIPs dev environment with local source mounts Co-Authored-By: Claude Opus 4.6 --- .claude/skills/deploy-test-subgraphs/SKILL.md | 15 + .claude/skills/fresh-deploy/SKILL.md | 162 +++++++++++ .claude/skills/network-status/SKILL.md | 8 + .claude/skills/send-indexing-request/SKILL.md | 86 ++++++ .env | 135 +-------- .environment | 151 ++++++++++ .gitignore | 3 +- BUGS.md | 71 +++++ CLAUDE.md | 45 +++ TESTING-STATUS.md | 142 +++++++++ compose/dev/README.md | 1 + compose/dev/dips.yaml | 190 ++++++++++++ compose/dev/eligibility-oracle.yaml | 7 +- containers/core/chain/run.sh | 2 + containers/core/graph-contracts/run.sh | 5 + containers/core/subgraph-deploy/Dockerfile | 2 +- containers/core/subgraph-deploy/run.sh | 41 ++- .../indexer/indexer-agent/dev/run-dips.sh | 89 ++++++ .../indexer/indexer-agent/dev/run-override.sh | 8 +- containers/indexer/indexer-agent/run.sh | 8 +- .../indexer/indexer-service/dev/run-dips.sh | 88 ++++++ containers/indexer/start-indexing/run.sh | 6 + containers/indexing-payments/dipper/run.sh | 30 +- containers/indexing-payments/iisa/Dockerfile | 38 +++ .../indexing-payments/iisa/Dockerfile.scoring | 11 - .../indexing-payments/iisa/run-cronjob.sh | 20 ++ containers/indexing-payments/iisa/run-iisa.sh | 18 ++ containers/indexing-payments/iisa/scoring.py | 175 ----------- .../indexing-payments/iisa/seed_scores.json | 26 -- containers/oracles/block-oracle/Dockerfile | 28 +- containers/oracles/block-oracle/run.sh | 4 +- .../eligibility-oracle-node/dev/Dockerfile | 17 ++ .../eligibility-oracle-node/run-reo.sh | 113 +++++++ .../query-payments/tap-escrow-manager/run.sh | 2 +- docker-compose.yaml | 26 +- scripts/deploy-test-subgraph.py | 275 ++++++++++++++++++ scripts/network-status.py | 231 +++++++++++++++ 37 files changed, 1890 insertions(+), 389 deletions(-) create mode 100644 .claude/skills/deploy-test-subgraphs/SKILL.md create mode 100644 .claude/skills/fresh-deploy/SKILL.md create mode 100644 .claude/skills/network-status/SKILL.md create mode 100644 .claude/skills/send-indexing-request/SKILL.md mode change 100644 => 120000 .env create mode 100644 .environment create mode 100644 BUGS.md create mode 100644 CLAUDE.md create mode 100644 TESTING-STATUS.md create mode 100644 compose/dev/dips.yaml create mode 100755 containers/indexer/indexer-agent/dev/run-dips.sh create mode 100755 containers/indexer/indexer-service/dev/run-dips.sh create mode 100644 containers/indexing-payments/iisa/Dockerfile delete mode 100644 containers/indexing-payments/iisa/Dockerfile.scoring create mode 100755 containers/indexing-payments/iisa/run-cronjob.sh create mode 100755 containers/indexing-payments/iisa/run-iisa.sh delete mode 100644 containers/indexing-payments/iisa/scoring.py delete mode 100644 containers/indexing-payments/iisa/seed_scores.json create mode 100644 containers/oracles/eligibility-oracle-node/dev/Dockerfile create mode 100755 containers/oracles/eligibility-oracle-node/run-reo.sh create mode 100755 scripts/deploy-test-subgraph.py create mode 100755 scripts/network-status.py diff --git a/.claude/skills/deploy-test-subgraphs/SKILL.md b/.claude/skills/deploy-test-subgraphs/SKILL.md new file mode 100644 index 0000000..f3b830d --- /dev/null +++ b/.claude/skills/deploy-test-subgraphs/SKILL.md @@ -0,0 +1,15 @@ +--- +name: deploy-test-subgraphs +description: Publish test subgraphs to GNS on the local network. Use when the user asks to "deploy subgraphs", "add subgraphs", "deploy 50 subgraphs", "create test subgraphs", or wants to populate the network with subgraphs for testing. Also trigger when the user says a number followed by "subgraphs" (e.g. "deploy 500 subgraphs"). +argument-hint: "[count] [prefix]" +--- + +Run `python3 scripts/deploy-test-subgraph.py [prefix]` from the local-network repo root. + +- `count` defaults to 1 if the user doesn't specify a number +- `prefix` defaults to `test-subgraph` -- each subgraph is named `-1`, `-2`, etc. +- Subgraphs are published to GNS on-chain only -- they are NOT deployed to graph-node and will not be indexed + +The script builds once (~10s), then each publish is sub-second. 100 subgraphs takes ~30s total. + +After publishing, run `python3 scripts/network-status.py` and output the result in a code block so the user can see the updated network state. diff --git a/.claude/skills/fresh-deploy/SKILL.md b/.claude/skills/fresh-deploy/SKILL.md new file mode 100644 index 0000000..85d4ad8 --- /dev/null +++ b/.claude/skills/fresh-deploy/SKILL.md @@ -0,0 +1,162 @@ +--- +name: fresh-deploy +description: Full stack reset and fresh deploy of the local-network Docker Compose environment. Use when the user asks to tear down and redeploy, do a fresh deploy, reset the stack, or bring everything up from scratch. Also use after merging PRs that change container code, or when debugging stuck state. +--- + +# Fresh Deploy + +Reset the local-network Docker Compose environment to a clean state and bring all services up ready for DIPs testing. + +## Prerequisites + +The contracts repo at `$CONTRACTS_SOURCE_ROOT` (typically `/Users/samuel/Documents/github/contracts`) must be on `indexing-payments-management-audit` (PR #1301) with three local commits applied on top: + +1. Cherry-pick `02b6996e` from `escrow-management` -- adds RecurringCollector Ignition module, wires it into SubgraphService deployment, and links external libraries +2. Cherry-pick `d2a0d30e` from `escrow-management` -- adds `RecurringCollector` to `GraphHorizonContractNameList` in toolshed so it gets written to horizon.json +3. Local fix for BUG-007 -- adds `{ after: [GraphPeripheryModule, HorizonProxiesModule] }` to the `deployImplementation` call in `packages/horizon/ignition/modules/core/HorizonStaking.ts` + +After applying these, the toolshed package must be compiled: `cd packages/toolshed && pnpm build:self`. + +To verify the local commits are present, check: `cd $CONTRACTS_SOURCE_ROOT && git log --oneline -5`. The top 3 commits should be the fix and two cherry-picks. + +## Steps + +### 1. Tear down everything including volumes + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml down -v +``` + +This destroys all data: chain state, postgres, subgraph deployments, config volume with contract addresses. + +### 2. Clear stale Ignition journals + +If a previous deployment failed (especially `graph-contracts`), the Hardhat Ignition journal at `$CONTRACTS_SOURCE_ROOT/packages/subgraph-service/ignition/deployments/chain-1337/` will contain partial state that prevents a clean redeploy. Delete it: + +```bash +rm -rf $CONTRACTS_SOURCE_ROOT/packages/subgraph-service/ignition/deployments/chain-1337 +``` + +This is safe after a `down -v` since the chain state it references no longer exists. + +### 3. Bring everything up + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml up -d --build +``` + +The `--build` flag ensures any changes to `run.sh` scripts or Dockerfiles are picked up (e.g. chain's `--block-time` flag, config changes baked into images). Without it, Docker reuses cached images and local changes are silently ignored. + +Wait for containers to stabilize. The `graph-contracts` container runs first (deploys all Solidity contracts and writes addresses to the config volume), then `subgraph-deploy` deploys three subgraphs (network, TAP, block-oracle). Other services start as their health check dependencies are met. + +**Note:** The initial `up -d` may exit with an error if `start-indexing` fails. This is expected -- see step 5. If `graph-contracts` itself fails, check its logs -- the most likely cause is a missing prerequisite commit (see Prerequisites) or a stale Ignition journal (see step 2). + +### 4. Verify RecurringCollector was written to horizon.json + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml exec indexer-agent \ + jq '.["1337"].RecurringCollector' /opt/config/horizon.json +``` + +If this returns null, the contracts toolshed wasn't rebuilt after cherry-picking the whitelist fix. Run `cd $CONTRACTS_SOURCE_ROOT/packages/toolshed && pnpm build:self` and repeat from step 1. + +### 5. Fix nonce race failures + +Multiple containers use ACCOUNT0 concurrently after `graph-contracts` finishes (`start-indexing`, `tap-escrow-manager`). This causes "nonce too low" errors that can fail either container. The cascade is the real problem: if `start-indexing` fails, `dipper` and `ready` never start because they depend on it. + +Check whether `start-indexing` exited successfully: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml ps -a start-indexing --format '{{.Status}}' +``` + +If it shows `Exited (1)`, restart it: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml start start-indexing +``` + +Always restart `tap-escrow-manager` regardless of whether `start-indexing` succeeded. Even when authorization succeeds, the deposit step can hit "nonce too low" from competing with `start-indexing`. The `AlreadyAuthorized` error on restart is harmless -- it re-runs the deposit with a fresh nonce. + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml restart tap-escrow-manager +``` + +### 6. Bring up any cascade-failed containers + +If `start-indexing` failed on the initial `up -d`, containers that depend on it (`dipper`, `ready`) will be stuck in `Created` state. Run `up -d` again to catch them: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml up -d --build +``` + +This is idempotent -- already-running containers are left alone. + +### 7. Verify signer authorization + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml logs tap-escrow-manager --since 60s 2>&1 | grep -i "authorized" +``` + +Expected: either `authorized signer=0x70997970C51812dc3A010C7d01b50e0d17dc79C8` (fresh auth) or `AuthorizableSignerAlreadyAuthorized` (already done on first run). Both are fine. + +### 8. Wait for TAP subgraph indexing, then verify dipper + +The TAP subgraph needs to index the `SignerAuthorized` event before the indexer-service will accept paid queries. Dipper may restart once or twice with "bad indexers: BadResponse(402)" during this window -- this is normal and self-resolves. + +Check: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml ps dipper --format '{{.Name}} {{.Status}}' +``` + +Should show `dipper Up ... (healthy)`. If still restarting after 60 seconds, check gateway logs for persistent 402s. + +### 9. Full status check + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml ps --format '{{.Name}} {{.Status}}' | sort +``` + +All services should be Up. The key health-checked services are: chain, graph-node, postgres, ipfs, redpanda, indexer-agent, indexer-service, gateway, iisa-scoring, iisa, block-oracle, dipper. + +## Architecture notes + +The authorization chain that makes gateway queries work: + +1. `graph-contracts` deploys all contracts, writes addresses to config volume (`horizon.json`, `tap-contracts.json`) +2. `subgraph-deploy` deploys the TAP subgraph pointing at the Horizon PaymentsEscrow address (from `horizon.json`) +3. `tap-escrow-manager` authorizes ACCOUNT1 (gateway signer) on the PaymentsEscrow contract +4. The TAP subgraph indexes the `SignerAuthorized` event +5. `indexer-service` queries the TAP subgraph, sees ACCOUNT1 is authorized for ACCOUNT0 (the payer) +6. Gateway queries signed by ACCOUNT1 are accepted with 200 instead of 402 + +## Known issues + +- **ACCOUNT0 nonce race**: `start-indexing` and `tap-escrow-manager` both use ACCOUNT0 concurrently after `graph-contracts` finishes. Either can fail with "nonce too low". If `start-indexing` fails, `dipper` and `ready` never start (cascade). The fix is to restart the failed container and run `up -d` again. +- **Stale Ignition journals**: After a failed `graph-contracts` deployment, the journal at `packages/subgraph-service/ignition/deployments/chain-1337/` contains partial state. A fresh `down -v` destroys the chain but not the journal (it's in the mounted source). Always delete it before retrying (step 2). +- The contracts toolshed must be compiled (JS, not just TS) for the RecurringCollector whitelist to take effect. Use `pnpm build:self` in `packages/toolshed` (not `pnpm build` which fails on the `interfaces` package). + +## Key contract addresses (change each deploy) + +Read from the config volume: + +```bash +# All Horizon contracts +docker compose exec indexer-agent cat /opt/config/horizon.json | jq '.["1337"]' + +# TAP contracts +docker compose exec indexer-agent cat /opt/config/tap-contracts.json + +# Important ones for manual testing: +# GRT Token: jq '.["1337"].L2GraphToken.address' horizon.json +# PaymentsEscrow: jq '.["1337"].PaymentsEscrow.address' horizon.json +# RecurringCollector: jq '.["1337"].RecurringCollector.address' horizon.json +# GraphTallyCollector: jq '.["1337"].GraphTallyCollector.address' horizon.json +``` + +## Accounts + +- ACCOUNT0 (`0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266`): deployer, admin, payer +- ACCOUNT1 (`0x70997970C51812dc3A010C7d01b50e0d17dc79C8`): gateway signer +- RECEIVER (`0xf4EF6650E48d099a4972ea5B414daB86e1998Bd3`): indexer (mnemonic index 0 of "test...zero") diff --git a/.claude/skills/network-status/SKILL.md b/.claude/skills/network-status/SKILL.md new file mode 100644 index 0000000..0a2f725 --- /dev/null +++ b/.claude/skills/network-status/SKILL.md @@ -0,0 +1,8 @@ +--- +name: network-status +description: Show the current state of the local Graph protocol network. Use when the user asks for "network status", "show me the network", "what's deployed", "which indexers", "which subgraphs", "what's running", or wants to see allocations, sync status, or the network tree. +--- + +Run `python3 scripts/network-status.py` from the local-network repo root to fetch the current network state. + +Output the result directly as text in a code block so it renders inline without the user needing to expand tool results. diff --git a/.claude/skills/send-indexing-request/SKILL.md b/.claude/skills/send-indexing-request/SKILL.md new file mode 100644 index 0000000..f5d7563 --- /dev/null +++ b/.claude/skills/send-indexing-request/SKILL.md @@ -0,0 +1,86 @@ +--- +name: send-indexing-request +description: Send a test indexing request to dipper via the CLI. Use when testing the DIPs flow end-to-end, when the user asks to register an indexing request, send a test agreement, trigger the DIPs pipeline, or test dipper proposals. +--- + +# Send Indexing Request + +Register an indexing request with dipper and monitor the full DIPs pipeline: IISA candidate selection, RCA proposal signing, and indexer-service accept/reject. + +## Steps + +### 1. Build the dipper CLI (if not already built) + +```bash +cd /Users/samuel/Documents/github/dipper && cargo build --bin dipper-cli --release +``` + +### 2. Verify dipper is healthy + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml ps dipper --format '{{.Status}}' +``` + +Should show `Up ... (healthy)`. If not, use the `fresh-deploy` skill first. + +### 3. Send the indexing request + +```bash +cd /Users/samuel/Documents/github/dipper && ./target/release/dipper-cli indexings register \ + --server-url http://localhost:9000 \ + --signing-key "0x2ee789a68207020b45607f5adb71933de0946baebbaaab74af7cbd69c8a90573" \ + QmPdbQaRCMhgouSZSW3sHZxU3M8KwcngWASvreAexzmmrh \ + 1337 +``` + +The signing key belongs to RECEIVER (`0xf4EF6650E48d099a4972ea5B414daB86e1998Bd3`). The admin RPC allowlist only accepts this address. ACCOUNT0's key will return 403. + +On success, the CLI prints a UUID -- the indexing request ID. + +To use a different deployment, query graph-node for available ones: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml exec graph-node \ + curl -s -X POST -H "Content-Type: application/json" \ + -d '{"query":"{ indexingStatuses { subgraph chains { network } } }"}' \ + http://localhost:8030/graphql +``` + +### 4. Monitor the pipeline + +Check logs from all three services involved in the flow: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml logs -f dipper iisa indexer-service --since 30s 2>&1 +``` + +The expected sequence: + +1. **dipper** receives the request and calls IISA for candidate selection +2. **iisa** scores indexers and returns candidates (only 1 indexer in local-network) +3. **dipper** constructs an RCA, signs it via EIP-712, sends a proposal to indexer-service +4. **indexer-service** validates the RCA and accepts or rejects + +### 5. Check request status + +```bash +cd /Users/samuel/Documents/github/dipper && ./target/release/dipper-cli indexings status \ + --server-url http://localhost:9000 \ + --signing-key "0x2ee789a68207020b45607f5adb71933de0946baebbaaab74af7cbd69c8a90573" \ + +``` + +## Reference + +| Detail | Value | +|--------|-------| +| Admin RPC port | 9000 | +| Signing key | RECEIVER: `0x2ee789a68207020b45607f5adb71933de0946baebbaaab74af7cbd69c8a90573` | +| Signing address | `0xf4EF6650E48d099a4972ea5B414daB86e1998Bd3` | +| Chain ID | 1337 (hardhat) | +| Default deployment | `QmPdbQaRCMhgouSZSW3sHZxU3M8KwcngWASvreAexzmmrh` | + +## Common rejection reasons + +- **SIGNER_NOT_AUTHORISED**: The payer (ACCOUNT0) isn't authorized as a signer on the RecurringCollector contract. The escrow manager authorizes signers on PaymentsEscrow (for TAP) but not on RecurringCollector. +- **PRICE_TOO_LOW**: Dipper's pricing config doesn't meet indexer-service's minimum. Compare `pricing_table` in dipper's run.sh with `min_grt_per_30_days` in indexer-service's config. diff --git a/.env b/.env deleted file mode 100644 index 7d9edc9..0000000 --- a/.env +++ /dev/null @@ -1,134 +0,0 @@ -# Local Network Configuration -# -# This file is read by: -# - docker-compose (YAML variable substitution, plain key=value only) -# - host scripts (source .env) -# - containers (volume-mounted at /opt/config/.env, sourced by run.sh) -# -# Local overrides: create .env.local (gitignored) to override values for host -# scripts. Host scripts source .env.local after .env. Note: .env.local does NOT -# affect containers or docker-compose — those always use .env directly. -# -# Host scripts use ${VAR_HOST:-localhost} for service hostnames, allowing -# devcontainer environments to set *_HOST env vars (e.g. CHAIN_HOST=chain) -# to reach services on the Docker network instead of localhost. - -# --- Service profiles --- -# Controls which optional service groups are started. -# Available profiles: -# block-oracle epoch block oracle -# explorer block explorer UI -# rewards-eligibility REO eligibility oracle node -# indexing-payments dipper + iisa (requires GHCR auth — see README) -# Default: profiles that work out of the box. -COMPOSE_PROFILES=block-oracle,explorer -# All profiles (indexing-payments requires GHCR auth — see README): -#COMPOSE_PROFILES=rewards-eligibility,block-oracle,explorer,indexing-payments - -# --- Dev overrides --- -# Uncomment and extend to build services from local source. -# See compose/dev/README.md for available overrides. -#COMPOSE_FILE=docker-compose.yaml:compose/dev/graph-node.yaml - -# indexer components versions -GRAPH_NODE_VERSION=v0.37.0 -INDEXER_AGENT_VERSION=v0.25.4 -INDEXER_SERVICE_RS_VERSION=v1.8.0 -INDEXER_TAP_AGENT_VERSION=v1.12.2 - -# indexing-payments image versions (requires GHCR auth — see README) -# Set real tags in .env.local when enabling the indexing-payments profile. -DIPPER_VERSION=sha-24d10d4 -IISA_VERSION= - -# gateway components versions -GATEWAY_COMMIT=b37acb4976313316a2bc0a488ca98749da51c61d -TAP_AGGREGATOR_VERSION=sha-d38d0b9 -TAP_ESCROW_MANAGER_COMMIT=530a5a72da7592b8d442b94d82a5a5f57d4a2b40 - -# eligibility oracle (clone-and-build — requires published repo) -ELIGIBILITY_ORACLE_COMMIT=84710857394d3419f83dcbf6687a91f415cc1625 - -# network components versions -BLOCK_ORACLE_COMMIT=3a3a425ff96130c3842cee7e43d06bbe3d729aed -CONTRACTS_COMMIT=511cd70563593122f556c7b35469ec185574769a -NETWORK_SUBGRAPH_COMMIT=5b6c22089a2e55db16586a19cbf6e1d73a93c7b9 -TAP_CONTRACTS_COMMIT=e3351e70b3e5d9821bc0aaa90bb2173ca2a77af7 -TAP_SUBGRAPH_COMMIT=cf7279f60433bf9a9d897ec2548c13c0607234cc - -# service ports -CHAIN_RPC_PORT=8545 -IPFS_RPC_PORT=5001 -POSTGRES_PORT=5432 -GRAPH_NODE_GRAPHQL_PORT=8000 -GRAPH_NODE_ADMIN_PORT=8020 -GRAPH_NODE_STATUS_PORT=8030 -GRAPH_NODE_METRICS_PORT=8040 -INDEXER_MANAGEMENT_PORT=7600 -INDEXER_SERVICE_PORT=7601 -GATEWAY_PORT=7700 -REDPANDA_KAFKA_PORT=9092 -REDPANDA_KAFKA_EXTERNAL_PORT=29092 -REDPANDA_ADMIN_PORT=9644 -REDPANDA_PANDAPROXY_PORT=8082 -REDPANDA_SCHEMA_REGISTRY_PORT=8081 -TAP_AGGREGATOR_PORT=7610 -BLOCK_EXPLORER_PORT=3000 - -# backward compat: old names without _PORT suffix (shell-only, uses ${} expansion) -# docker-compose sees these as literal strings — use _PORT names in docker-compose.yaml -# TODO: remove once all consumers (other repos) are migrated to _PORT names -CHAIN_RPC=${CHAIN_RPC_PORT} -IPFS_RPC=${IPFS_RPC_PORT} -POSTGRES=${POSTGRES_PORT} -GRAPH_NODE_GRAPHQL=${GRAPH_NODE_GRAPHQL_PORT} -GRAPH_NODE_ADMIN=${GRAPH_NODE_ADMIN_PORT} -GRAPH_NODE_STATUS=${GRAPH_NODE_STATUS_PORT} -GRAPH_NODE_METRICS=${GRAPH_NODE_METRICS_PORT} -INDEXER_MANAGEMENT=${INDEXER_MANAGEMENT_PORT} -INDEXER_SERVICE=${INDEXER_SERVICE_PORT} -GATEWAY=${GATEWAY_PORT} -REDPANDA_KAFKA=${REDPANDA_KAFKA_PORT} -REDPANDA_KAFKA_EXTERNAL=${REDPANDA_KAFKA_EXTERNAL_PORT} -REDPANDA_ADMIN=${REDPANDA_ADMIN_PORT} -REDPANDA_PANDAPROXY=${REDPANDA_PANDAPROXY_PORT} -REDPANDA_SCHEMA_REGISTRY=${REDPANDA_SCHEMA_REGISTRY_PORT} -TAP_AGGREGATOR=${TAP_AGGREGATOR_PORT} -BLOCK_EXPLORER=${BLOCK_EXPLORER_PORT} - -# Indexing Payments (used with indexing-payments override) -DIPPER_ADMIN_RPC_PORT=9000 -DIPPER_INDEXER_RPC_PORT=9001 - -## Chain config -CHAIN_ID=1337 -CHAIN_NAME="hardhat" - -## Wallet -## - Account 0 used by: EBO, admin actions (deploy contracts, transfer ETH/GRT), gateway payer for PaymentsEscrow -## - Account 1 used by: Gateway signer for PaymentsEscrow -MNEMONIC="test test test test test test test test test test test junk" -ACCOUNT0_ADDRESS="0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266" -ACCOUNT0_SECRET="0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" -ACCOUNT1_ADDRESS="0x70997970C51812dc3A010C7d01b50e0d17dc79C8" -ACCOUNT1_SECRET="0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b6b78690d" - -# receiver of Scalar payments (receiver is index 0 of mnemonic) -INDEXER_MNEMONIC="test test test test test test test test test test test zero" -RECEIVER_ADDRESS="0xf4EF6650E48d099a4972ea5B414daB86e1998Bd3" -RECEIVER_SECRET="0x2ee789a68207020b45607f5adb71933de0946baebbaaab74af7cbd69c8a90573" - -# subgraphs -SUBGRAPH="BFr2mx7FgkJ36Y6pE5BiXs1KmNUmVDCnL82KUSdcLW1g" -SUBGRAPH_2="9p1TRzaccKzWBN4P6YEwEUxYwJn6HwPxf5dKXK2NYxgS" - -# REO (Rewards Eligibility Oracle) -# Set to 1 to deploy and configure the REO contract (Phase 4). Unset or 0 to skip. -REO_ENABLED=0 -# eligibilityPeriod: how long an indexer stays eligible after renewal (seconds) -REO_ELIGIBILITY_PERIOD=300 -# oracleUpdateTimeout: fail-safe — if no oracle update for this long, all indexers eligible (seconds) -REO_ORACLE_UPDATE_TIMEOUT=86400 - -# Gateway -GATEWAY_API_KEY="deadbeefdeadbeefdeadbeefdeadbeef" diff --git a/.env b/.env new file mode 120000 index 0000000..9bbb577 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +.environment \ No newline at end of file diff --git a/.environment b/.environment new file mode 100644 index 0000000..2b49bc2 --- /dev/null +++ b/.environment @@ -0,0 +1,151 @@ +# Local Network Configuration +# +# This file is read by: +# - docker-compose (YAML variable substitution, plain key=value only) +# - host scripts (source .env) +# - containers (volume-mounted at /opt/config/.env, sourced by run.sh) +# +# Local overrides: create .env.local (gitignored) to override values for host +# scripts. Host scripts source .env.local after .env. Note: .env.local does NOT +# affect containers or docker-compose — those always use .env directly. +# +# Host scripts use ${VAR_HOST:-localhost} for service hostnames, allowing +# devcontainer environments to set *_HOST env vars (e.g. CHAIN_HOST=chain) +# to reach services on the Docker network instead of localhost. + +# --- Service profiles --- +# Controls which optional service groups are started. +# Available profiles: +# block-oracle epoch block oracle +# explorer block explorer UI +# rewards-eligibility REO eligibility oracle node +# indexing-payments dipper + iisa (requires GHCR auth — see README) +# rewards-eligibility disabled: REO contract not deployed (REO_ENABLED=0) +COMPOSE_PROFILES=block-oracle,explorer,indexing-payments + +# --- Dev overrides --- +# DIPs development: mount local source and build inside containers +# All components built from local checkouts - no stubs or GHCR images +# contracts repo must be on escrow-management branch with `pnpm install && pnpm build` done + +COMPOSE_FILE=docker-compose.yaml:compose/dev/dips.yaml + +# Local source directories (mounted into containers, built from source) +CONTRACTS_SOURCE_ROOT=/Users/samuel/Documents/github/contracts +INDEXER_SERVICE_SOURCE_ROOT=/Users/samuel/Documents/github/indexer-rs +INDEXER_AGENT_SOURCE_ROOT=/Users/samuel/Documents/github/indexer +DIPPER_SOURCE_ROOT=/Users/samuel/Documents/github/dipper +IISA_SOURCE_ROOT=/Users/samuel/Documents/github/subgraph-dips-indexer-selection +REO_SOURCE_ROOT=/Users/samuel/Documents/github/eligibility-oracle-node + +# Legacy binary mounts (unused when dips.yaml is active) +INDEXER_SERVICE_BINARY=/Users/samuel/Documents/github/indexer-rs/target/release/indexer-service-rs +TAP_AGENT_BINARY=/Users/samuel/Documents/github/indexer-rs/target/release/indexer-tap-agent + +# indexer components versions (checked Feb 2026) +# Note: INDEXER_SERVICE_RS_VERSION and INDEXER_TAP_AGENT_VERSION are unused when dev overrides are active +GRAPH_NODE_VERSION=v0.41.2 +INDEXER_SERVICE_RS_VERSION=main +INDEXER_TAP_AGENT_VERSION=main +INDEXER_AGENT_VERSION=v0.25.5 + +# indexing-payments image versions (requires GHCR auth — see README) +DIPPER_VERSION=latest +IISA_VERSION=latest + +# gateway components versions +GATEWAY_COMMIT=b37acb4976313316a2bc0a488ca98749da51c61d +TAP_AGGREGATOR_VERSION=sha-d38d0b9 +TAP_ESCROW_MANAGER_COMMIT=530a5a72da7592b8d442b94d82a5a5f57d4a2b40 + +# eligibility oracle (clone-and-build — requires published repo) +ELIGIBILITY_ORACLE_COMMIT=84710857394d3419f83dcbf6687a91f415cc1625 + +# network components versions +BLOCK_ORACLE_COMMIT=3a3a425ff96130c3842cee7e43d06bbe3d729aed +# CONTRACTS_COMMIT is unused when dips.yaml mounts CONTRACTS_SOURCE_ROOT +# Set to escrow-management branch for non-dev builds +CONTRACTS_COMMIT=rem-baseline-merge +NETWORK_SUBGRAPH_COMMIT=5b6c22089a2e55db16586a19cbf6e1d73a93c7b9 +TAP_CONTRACTS_COMMIT=e3351e70b3e5d9821bc0aaa90bb2173ca2a77af7 +TAP_SUBGRAPH_COMMIT=cf7279f60433bf9a9d897ec2548c13c0607234cc + +# service ports +CHAIN_RPC_PORT=8545 +IPFS_RPC_PORT=5001 +POSTGRES_PORT=5432 +GRAPH_NODE_GRAPHQL_PORT=8000 +GRAPH_NODE_ADMIN_PORT=8020 +GRAPH_NODE_STATUS_PORT=8030 +GRAPH_NODE_METRICS_PORT=8040 +INDEXER_MANAGEMENT_PORT=7600 +INDEXER_SERVICE_PORT=7601 +GATEWAY_PORT=7700 +REDPANDA_KAFKA_PORT=9092 +REDPANDA_KAFKA_EXTERNAL_PORT=29092 +REDPANDA_ADMIN_PORT=9644 +REDPANDA_PANDAPROXY_PORT=8082 +REDPANDA_SCHEMA_REGISTRY_PORT=8081 +TAP_AGGREGATOR_PORT=7610 +BLOCK_EXPLORER_PORT=3000 + +# backward compat: old names without _PORT suffix (shell-only, uses ${} expansion) +# docker-compose sees these as literal strings — use _PORT names in docker-compose.yaml +# TODO: remove once all consumers (other repos) are migrated to _PORT names +CHAIN_RPC=${CHAIN_RPC_PORT} +IPFS_RPC=${IPFS_RPC_PORT} +POSTGRES=${POSTGRES_PORT} +GRAPH_NODE_GRAPHQL=${GRAPH_NODE_GRAPHQL_PORT} +GRAPH_NODE_ADMIN=${GRAPH_NODE_ADMIN_PORT} +GRAPH_NODE_STATUS=${GRAPH_NODE_STATUS_PORT} +GRAPH_NODE_METRICS=${GRAPH_NODE_METRICS_PORT} +INDEXER_MANAGEMENT=${INDEXER_MANAGEMENT_PORT} +INDEXER_SERVICE=${INDEXER_SERVICE_PORT} +GATEWAY=${GATEWAY_PORT} +REDPANDA_KAFKA=${REDPANDA_KAFKA_PORT} +REDPANDA_KAFKA_EXTERNAL=${REDPANDA_KAFKA_EXTERNAL_PORT} +REDPANDA_ADMIN=${REDPANDA_ADMIN_PORT} +REDPANDA_PANDAPROXY=${REDPANDA_PANDAPROXY_PORT} +REDPANDA_SCHEMA_REGISTRY=${REDPANDA_SCHEMA_REGISTRY_PORT} +TAP_AGGREGATOR=${TAP_AGGREGATOR_PORT} +BLOCK_EXPLORER=${BLOCK_EXPLORER_PORT} + +# Indexing Payments (used with indexing-payments override) +DIPPER_ADMIN_RPC_PORT=9000 +DIPPER_INDEXER_RPC_PORT=9001 +INDEXER_SERVICE_DIPS_RPC_PORT=7602 + +## Chain config +CHAIN_ID=1337 +CHAIN_NAME="hardhat" + +## Wallet +## - Account 0 used by: EBO, admin actions (deploy contracts, transfer ETH/GRT), gateway payer for PaymentsEscrow +## - Account 1 used by: Gateway signer for PaymentsEscrow +MNEMONIC="test test test test test test test test test test test junk" +ACCOUNT0_ADDRESS="0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266" +ACCOUNT0_SECRET="0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" +ACCOUNT1_ADDRESS="0x70997970C51812dc3A010C7d01b50e0d17dc79C8" +ACCOUNT1_SECRET="0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b6b78690d" + +# receiver of Scalar payments (receiver is index 0 of mnemonic) +INDEXER_MNEMONIC="test test test test test test test test test test test zero" +RECEIVER_ADDRESS="0xf4EF6650E48d099a4972ea5B414daB86e1998Bd3" +RECEIVER_SECRET="0x2ee789a68207020b45607f5adb71933de0946baebbaaab74af7cbd69c8a90573" + +# subgraphs +SUBGRAPH="BFr2mx7FgkJ36Y6pE5BiXs1KmNUmVDCnL82KUSdcLW1g" +SUBGRAPH_2="9p1TRzaccKzWBN4P6YEwEUxYwJn6HwPxf5dKXK2NYxgS" + +# REO (Rewards Eligibility Oracle) +# Set to 1 to deploy and configure the REO contract (Phase 4). Unset or 0 to skip. +REO_ENABLED=0 +# eligibilityPeriod: how long an indexer stays eligible after renewal (seconds) +REO_ELIGIBILITY_PERIOD=300 +# oracleUpdateTimeout: fail-safe — if no oracle update for this long, all indexers eligible (seconds) +REO_ORACLE_UPDATE_TIMEOUT=86400 + +# Gateway +GATEWAY_API_KEY="deadbeefdeadbeefdeadbeefdeadbeef" + +REO_BINARY=/Users/samuel/Documents/github/eligibility-oracle-node/target/release/eligibility-oracle diff --git a/.gitignore b/.gitignore index 9a4e456..6cb9add 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # IDEs .vscode -.claude +.claude/* +!.claude/skills/ .idea # Environment overrides diff --git a/BUGS.md b/BUGS.md new file mode 100644 index 0000000..ea43f96 --- /dev/null +++ b/BUGS.md @@ -0,0 +1,71 @@ +# DIPs Local Testing - Bug Tracker + +## BUG-001: dipper migration not embedded in service binary + +**Symptom**: `column "num_candidates" of relation "dipper_reg_indexing_requests" does not exist` on any fresh dipper deployment. + +**Root cause**: Migration `20260205000000_add_num_candidates_to_indexing_requests.sql` lives in `dipper-pgregistry/migrations/` but `dipper-service` only embeds migrations from `bin/dipper-service/migrations/`. The embedded migrator never sees it. + +**Repo**: `dipper` +**Fix**: Either move the migration into `bin/dipper-service/migrations/` or change the embedded migrator to include `dipper-pgregistry/migrations/`. +**PR**: fixed locally on `fix/delegate-migrations-to-subcrates` branch + +## BUG-002: dipper run.sh hardcodes RecurringCollector as zero address + +**Symptom**: dipper returns 503 on all admin RPC calls because it can't interact with the RecurringCollector contract. + +**Root cause**: `containers/indexing-payments/dipper/run.sh` has `"recurring_collector": "0x0000000000000000000000000000000000000000"` instead of reading the deployed address from the config volume. + +**Repo**: `local-network` +**Fix**: Read address from horizon.json via `contract_addr RecurringCollector.address horizon`. Already applied locally. +**PR**: not submitted + +## BUG-003: indexer-service run-dips.sh uses stale config field names + +**Symptom**: `Ignoring unknown configuration field: dips.?.allowed_payers`, `dips.?.price_per_entity`, `dips.?.price_per_epoch`. Then: `DIPs enabled but no networks in dips.supported_networks. All proposals will be rejected.` + +**Root cause**: `containers/indexer/indexer-service/dev/run-dips.sh` uses old config fields (`allowed_payers`, `price_per_entity`, `price_per_epoch`) that no longer exist in the indexer-rs `DipsConfig` struct. The current fields are `supported_networks`, `min_grt_per_30_days`, `min_grt_per_billion_entities_per_30_days`. + +**Repo**: `local-network` +**Fix**: Replace old fields with `supported_networks = ["hardhat"]` and `[dips.min_grt_per_30_days]`. Already applied locally. +**PR**: not submitted + +## BUG-004: register_new_indexing_request does not accept num_candidates + +**Symptom**: Studio has no way to specify how many indexers should index a given subgraph. The `num_candidates` value is hardcoded to 3 at the database default level. + +**Root cause**: The `register_new_indexing_request` JSON-RPC method and EIP-712 message struct only accept `deployment_id` and `chain_id`. There is no parameter to pass `num_candidates` through from the caller. + +**Repo**: `dipper` +**Fix**: Add an optional `num_candidates` field to the EIP-712 message struct, the RPC handler, and the CLI `--num-candidates` flag. Default to 3 when not provided. +**PR**: https://github.com/edgeandnode/dipper/pull/572 + +## BUG-005: TAP subgraph pointed at old Escrow contract instead of Horizon PaymentsEscrow + +**Symptom**: Gateway returns 402 for all queries. Indexer-service rejects with "No sender found for signer 0x7099...". Dipper crashes on bootstrap meta query. + +**Root cause**: `containers/core/subgraph-deploy/run.sh` deployed the TAP subgraph (`semiotic/tap`) pointing at the old TAP Escrow from `tap-contracts.json`. The `tap-escrow-manager` correctly authorizes signers on the Horizon PaymentsEscrow from `horizon.json`. The subgraph never indexes the Horizon authorization events, so the indexer-service sees no authorized signers. + +**Repo**: `local-network` +**Fix**: Changed `contract_addr Escrow tap-contracts` to `contract_addr PaymentsEscrow.address horizon` in subgraph-deploy/run.sh. Applied locally. +**PR**: not submitted + +## BUG-006: RecurringCollector address missing from horizon.json on fresh deploy + +**Symptom**: Dipper restart loop with `"1337".RecurringCollector.address not found in /opt/config/horizon.json`. + +**Root cause**: The `saveToAddressBook` function in contracts toolshed (`packages/toolshed/src/deployments/horizon/contracts.ts`) has a `GraphHorizonContractNameList` whitelist. `RecurringCollector` was deployed on-chain by Ignition but silently dropped from the address book because it wasn't in the whitelist. The fix exists on the `mde/dips-ignition-deployment` branch. + +**Repo**: `contracts` +**Fix**: Cherry-picked commits `3998337a` (adds RecurringCollector ignition module) and `15380514` (adds to whitelist) onto `escrow-management`. Also requires `pnpm build:self` in `packages/toolshed` to compile the TS change to JS. +**PR**: exists on `mde/dips-ignition-deployment` branch (not yet merged to `escrow-management`) + +## BUG-007: HorizonStaking Ignition module missing dependency on GraphPeripheryModule + +**Symptom**: `graph-contracts` fails with `GraphDirectoryInvalidZeroAddress("GraphToken")` during contract deployment. Nondeterministic -- may work on some branches and fail on others. + +**Root cause**: `packages/horizon/ignition/modules/core/HorizonStaking.ts` deploys HorizonStaking without an `after` dependency on `GraphPeripheryModule`. The HorizonStaking constructor extends `GraphDirectory`, which queries the Controller for GraphToken, EpochManager, RewardsManager, etc. These are registered in the Controller by `GraphPeripheryModule`. Without the explicit dependency, Ignition may schedule HorizonStaking before the periphery registrations, causing the constructor to read `address(0)` and revert. Every other core module (GraphPayments, PaymentsEscrow, GraphTallyCollector, RecurringCollector) has `{ after: [GraphPeripheryModule, HorizonProxiesModule] }` but HorizonStaking was missing it. + +**Repo**: `contracts` +**Fix**: Add `{ after: [GraphPeripheryModule, HorizonProxiesModule] }` to the `deployImplementation` call in `HorizonStaking.ts`. Applied locally on `indexing-payments-management-audit`. +**PR**: not submitted \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..afedbd4 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,45 @@ +# Local Network + +A Docker Compose environment that runs the full Graph protocol stack locally for development and integration testing. + +## Current Objective + +Systematic end-to-end testing of DIPs (Direct Indexer Payments) before testnet deployment. Every bug found here must be fixed at the source with a proper PR to the relevant repo. No hack fixes, no workarounds that won't survive a fresh deployment. + +When something breaks, document the root cause, identify which repo owns the fix, and describe what the PR should do. The goal is that testnet deployment encounters zero issues because every problem was already caught and patched here. + +## Bug Tracking + +When a bug is found during testing, log it in `BUGS.md` @BUGS.md with: + +- What broke (symptom) +- Root cause +- Which repo needs the fix +- What the fix should be +- Whether a PR has been submitted + +## Architecture + +The stack has these layers: + +- **Chain**: local Hardhat EVM node (chain ID 1337) with all Graph protocol contracts +- **Indexing**: graph-node, indexer-agent, indexer-service +- **Gateway**: routes paid queries to indexers +- **Payments (TAP)**: tap-aggregator, tap-escrow-manager, tap-agent +- **DIPs**: dipper (orchestrator), iisa (indexing indexer selection algorithm - subgraph-dips-indexer-selection) +- **Oracles**: block-oracle, eligibility-oracle-node (REO) + +Dev overrides (`compose/dev/dips.yaml`) mount local source for: contracts, indexer-rs, dipper, iisa, eligibility-oracle-node. Everything else uses pinned versions or clones at build time. + +## Key Config + +- `.environment` is the canonical config file. `.env` is a symlink to it. +- `COMPOSE_FILE=docker-compose.yaml:compose/dev/dips.yaml` activates dev overrides. +- `DOCKER_DEFAULT_PLATFORM=` must prefix docker compose commands to avoid conflicts with per-service `platform: linux/arm64` in dips.yaml. We are testing on MacOS, production on linux. + +## Rules + +- Never apply hack fixes to unblock testing. If something is broken, find the root cause and document it properly in bugs. +- Every fix that touches another repo (dipper, indexer-rs, contracts, iisa, etc.) needs a PR to that repo. +- Fixes to local-network config/scripts should be committed to this repo. +- When restarting containers that build from source, expect cargo build time. Don't assume instant restarts. diff --git a/TESTING-STATUS.md b/TESTING-STATUS.md new file mode 100644 index 0000000..15241b9 --- /dev/null +++ b/TESTING-STATUS.md @@ -0,0 +1,142 @@ +# DIPs Testing Status + +Tracking what has and hasn't been tested end-to-end in local-network before testnet deployment. + +## What works + +### 1. Proposal happy path + +1. Dipper receives an indexing request via admin RPC (`indexings register`) +2. IISA scores available indexers and returns candidates (single indexer in local-network) +3. Dipper constructs a RecurringCollectionAgreement, signs it via EIP-712, and sends the proposal to indexer-service over gRPC +4. Indexer-service validates the proposal (signature, pricing, network, deadline) and accepts +5. The signed RCA is stored in `pending_rca_proposals` with status `pending` +6. The indexer-agent consumer (PR #1174) picks up the proposal and checks whether an indexing rule exists for the deployment + +### 2. Supporting infrastructure + +TAP subgraph correctly points at Horizon PaymentsEscrow, signer authorization events are indexed, gateway queries return 200, RecurringCollector address is written to horizon.json. + +### 3. Indexer-service rejection paths + +Five of the eight rejection paths have been tested end-to-end. + +**PriceTooLow**: Temporarily set `min_grt_per_30_days["hardhat"] = "999999"` in indexer-service config. Dipper's pricing (`174000000000000` wei/s, ~450 GRT/30d) fell below the inflated minimum. Indexer-service rejected with `PRICE_TOO_LOW`, dipper recorded it correctly. The indexer enters a 1-day lookback exclusion for that deployment. + +**UnsupportedNetwork**: Set `supported_networks = []` in indexer-service config. The deployment's network (`hardhat`, resolved from the IPFS manifest) had no matching entry. Indexer-service rejected with `UNSUPPORTED_NETWORK`, dipper recorded it correctly. The indexer enters a 30-day lookback exclusion. + +**SubgraphManifestUnavailable**: Sent a request for a non-existent deployment ID (`QmWmyoMoctfbAaiEs2G46gpeUmhqFRDW6KWo64y5r581Vz`). The indexer-service attempted to fetch the manifest from IPFS (190-second timeout), failed, and rejected with `SUBGRAPH_MANIFEST_UNAVAILABLE`. Dipper recorded it correctly. The indexer enters a 5-minute lookback exclusion. + +**DeadlineExpired**: Set `deadline_seconds: 0` in dipper config and added 2-second network delay on the indexer-service gRPC port using `tc netem`. The delay is necessary because the local pipeline delivers proposals in under 6ms -- well within the same second -- so without it, the second-precision deadline check (`deadline < now`) always passes. With the delay, the indexer-service received the proposal 2 seconds after dipper computed the deadline, and rejected with `DEADLINE_EXPIRED` (`agreement deadline 1772672762 has already passed (current time: 1772672764)`). Dipper recorded the rejection correctly. The technique requires `NET_ADMIN` capability on the indexer-service container and `iproute2` installed. Port-specific delay (`tc filter` on port 7602) avoids disrupting the rest of the indexer-service's network traffic. + +**SignerNotAuthorised**: Changed dipper's DIPs signer key to an arbitrary unauthorized key (`0x0123...`, address `0xFCAd0B19bB29D4674531d6f115237E16AfCE377c`) while leaving the TAP signer unchanged. The indexer-service checked the recovered signer against the RecurringCollector's authorized signers, found no match, and rejected with `SIGNER_NOT_AUTHORISED`. Dipper recorded the rejection correctly. Previously blocked by the topology crash-on-restart bug (dipper PR #578), which has since been fixed. + +### 4. Dipper status and listing commands + +All CLI read commands work correctly. `indexings list` returns all requests with correct metadata. `indexings status` accepts both UUIDs and deployment IDs, returning 404 for unknown UUIDs. `agreements list` returns agreements per request, with an empty array when none exist. A duplicate request for the same deployment+indexer correctly fails with a unique constraint (`idx_unique_active_agreement_per_indexer_deployment`) -- the request is created but no duplicate agreement is added. + +### 5. Multiple requests and concurrent proposals + +A second request for the same deployment (`QmPdb`) was accepted -- dipper does not deduplicate requests. However, the `idx_unique_active_agreement_per_indexer_deployment` constraint prevented a duplicate agreement for the same indexer+deployment. The second request sat in OPEN with zero agreements. The constraint violation is now handled gracefully (dipper PR #579) -- the handler logs a warning and skips the candidate instead of failing the job. + +Requests for different deployments worked independently. All three local-network deployments received separate requests and agreements without interference. + +Multiple agreements for the same indexer worked as expected. With a single indexer in local-network, every agreement targets `0xf4EF...`. Three concurrent agreements (one per deployment) coexisted without issues. + +### 6. Cancellation flows + +**Request cancellation** (`indexings cancel`): Cancelling an OPEN request transitions it to `CANCELED` and cascades to all active agreements, marking them `CANCELED_BY_REQUESTER`. Cancelling an already-cancelled request is idempotent (no error). Cancelling a non-existent request returns 404. + +**Agreement cancellation** (`agreements cancel`): Cancelling a specific `CREATED` agreement marks it `CANCELED_BY_REQUESTER` and immediately triggers reassessment. IISA returns new candidates, and dipper creates a replacement agreement for the same request. In local-network with one indexer, the replacement agreement targets the same indexer -- the unique constraint allows it because the original agreement is no longer active. Cancelling the parent request after agreement cancellation cascades to both the original and the reassessment-created agreement. + +### 7. Agreement expiration and reassessment + +Enabled the expiration service (`interval: 10s, batch_size: 100`) and set `deadline_seconds: 5` to create agreements that expire quickly. The proposal was accepted by the indexer within milliseconds (pipeline completes in <6ms). Seven seconds after creation, the expiration service found the agreement past its deadline, marked it `Expired`, and queued a reassessment job. The reassessment handler ran but determined "no changes needed" -- the only candidate was the same indexer that already had the expired agreement. No replacement agreement was created, leaving the request in OPEN with one expired agreement. This is correct for a single-indexer environment; with multiple indexers, reassessment would find alternative candidates. + +## Indexer-agent + +PR #1174 (`feat/dips-pending-rca-consumer`) adds the migration and consumer that reads `pending_rca_proposals` and creates indexing rules. PR #1175 (`feat/dips-on-chain-accept`, targeting #1174) adds `acceptPendingProposals()` which calls `acceptIndexingAgreement` on SubgraphService on-chain. If no allocation exists for the deployment, it atomically creates one via `multicall(startService + acceptIndexingAgreement)`. The local-network indexer-agent now runs on `feat/dips-on-chain-accept`. + +### Payment collection + +The `DipsCollector` still operates on the old `IndexingAgreement` model, not `pending_rca_proposals`. The full collection flow (agent calls dipper's `CollectPayment` RPC, dipper calls `collect()` on RecurringCollector on-chain, funds move from payer's escrow to the indexer) can't be exercised until the collector is updated to work with the new table. + +### RecurringCollector contract operations + +The contract has several functions beyond `accept()` that are part of the full lifecycle: `collect()` (payment collection), `update()` (update agreement terms), `cancel()` (on-chain cancellation by either party), and collection window enforcement (`minSecondsPerCollection` / `maxSecondsPerCollection` validation during collect). Collection cannot be tested until the collector is updated. + +## What hasn't been tested + +### #1 Indexer-service rejection paths (remaining) + +Five of eight rejection paths were tested end-to-end (see "What works" section 3). The remaining three are defensive guards against malformed or misrouted traffic that correct clients cannot produce. All three are covered by unit tests in indexer-rs (`test_validate_and_create_rca_wrong_service_provider`, `test_validate_and_create_rca_malformed_abi`, `test_validate_and_create_rca_invalid_metadata_version`). E2E testing is not warranted. + +- **UnexpectedServiceProvider** -- guards against misrouted proposals. Correct clients always set the right `service_provider` from network topology. +- **InvalidSignature** -- catches corrupted or truncated signature bytes. No correct client produces these. +- **UnsupportedMetadataVersion** -- catches future protocol versions. Dipper always sends version 1. + +### #2 Dipper lifecycle beyond proposal delivery + +Most lifecycle paths have been tested (see "What works" sections 6 and 7). Remaining: + +- **On-chain cancellation of rejected agreements**: If an agreement was rejected off-chain but somehow accepted on-chain, dipper calls `cancelIndexingAgreementByPayer` on SubgraphService to prevent payment. Edge case, untested and blocked on indexer-agent on-chain acceptance support. + +### #3 Restart resilience + +Dipper was killed (`docker kill`) after processing a request and restarted. All state survived -- requests, agreements, and metadata were fully preserved in Postgres. Dipper has no in-memory state recovery mechanism; it reconnects to the database, runs migrations (idempotent), and resumes. The expiration service catches any `Created` agreements that expire while dipper is down. + +The pipeline completes so fast (<6ms from request registration to indexer acceptance) that simulating a crash between request registration and IISA candidate selection is impractical in local-network. If dipper crashes mid-pipeline, the request sits in `OPEN` with no agreements. There is no explicit recovery for in-flight jobs -- the request would need manual reassessment or a new request. + +Untested scenarios that depend on indexer-agent changes: +- Indexer-agent restarts mid-reconciliation while processing pending proposals (blocked on PR #1174) +- Indexer-service accepts a proposal but crashes before writing to `pending_rca_proposals` (out-of-sync risk between dipper and indexer) + +### #4 Gateway awareness of DIPs + +The gateway has no DIPs-specific code. It routes queries to indexers via TAP regardless of whether a DIPs agreement exists. This is expected (DIPs is a payment mechanism, not a query routing mechanism), but it means there's no way to verify from the gateway side that a DIPs-funded query is being served correctly. The indexer just indexes and serves -- payment happens separately. + +### #5 IISA scoring cronjob — degraded mode only + +The `iisa-cronjob` container runs the real IISA scoring pipeline from the IISA repo (`cronjobs/compute_scores/`). Without GeoIP databases (no MaxMind license key in local-network) and with minimal Redpanda data, the full pipeline (latency regression, geographic distance, iterative filtering) cannot run. The cronjob falls back to degraded mode: it discovers indexers from the network subgraph, fetches `/dips/info` from each indexer-service to collect real pricing data, and writes scores with equal quality metrics. All indexers get identical latency/uptime/success scores (0.5) but carry their actual `min_grt_per_30_days` and `supported_networks` from `/dips/info`. + +This enables the per-indexer pricing path through IISA and dipper. What remains untested is the full scoring pipeline's differentiation between indexers — latency regression, GeoIP-based distance calculation, and stake-to-fees ratios. These require production-scale Redpanda data and MaxMind GeoIP databases. + +**Verification (not yet done — requires fresh deploy):** + +1. Fresh deploy (`down -v`, `up -d --build`) +2. Cronjob container starts, fails the full pipeline (no GeoIP, minimal data), degrades to equal-score mode +3. Cronjob fetches `/dips/info` from indexer-service, writes scores file with `dips_info_available: true` and real `dips_min_grt_per_30_days` values +4. IISA loads scores — verify pricing is populated +5. Send indexing request via dipper CLI +6. Check dipper logs: `iisa_price=true` in "Creating agreement with pricing" log (confirms IISA pricing used, not static fallback) +7. Indexer-service accepts the proposal + +### #6 Scale to 10+ indexer network + +Local-network runs one indexer, so IISA candidate selection is trivial (always picks the only option). Multi-indexer scoring, tiebreaking, and reassignment to a different indexer after rejection can't be tested without scaling up. A full indexer stack (graph-node ~68MB, postgres ~200MB, indexer-agent ~300MB, indexer-service ~45MB) is roughly 600MB per indexer. On a 64GB machine, 10 full indexer stacks would use around 6GB -- well within budget. This would give us a realistic local network where different indexers index different subgraphs, IISA selects from a real candidate pool, and dipper delivers proposals to genuinely independent indexers. + +## Testing environment limitations + +**Instant finality**: Anvil mines blocks with `--block-time 1` (dev override) or `--block-time 30` (default) with no reorg risk. Timing-sensitive flows like collection window enforcement behave differently than on a real chain. Deadline expiry testing required artificial network delay (`tc netem`) because the local pipeline completes in under 6ms. + +**No real escrow funding**: The payer (ACCOUNT0) has unlimited hardhat ETH/GRT. Escrow balance checks, insufficient funds scenarios, and deposit flows aren't meaningfully tested. + +**Degraded IISA scoring**: The iisa-cronjob runs in degraded mode (no GeoIP, minimal Redpanda data) and assigns equal quality metrics to all indexers. Real per-indexer pricing is fetched from `/dips/info`, but quality differentiation between indexers is not available. See item #5. + +## Issues we encountered + +### Dipper topology crash on restart (fixed) + +Dipper's initial topology fetch used `?` to propagate errors, which crashed the process if the gateway was temporarily unavailable. After the chain went idle (no new blocks), the gateway returned 402, causing dipper to crash-loop on every restart. Fixed in dipper PR #578 -- the initial fetch now retries with indefinite exponential backoff (capped at 32 seconds). + +### Chain staleness causing gateway 402s (fixed) + +Anvil in automine mode only produced blocks on transaction submission. Once the chain went idle, the gateway considered the network subgraph stale and returned 402 for all queries. Fixed by adding `--block-time` to the chain's `run.sh`, which mines blocks periodically regardless of transaction activity. The dev compose override sets `BLOCK_TIME=1` for fast Ignition deploys; the default is 30 seconds. + +### UnexpectedServiceProvider not testable via pipeline + +Changing `indexer_address` in indexer-service config breaks query serving entirely (the indexer can't find its allocations), so IISA never finds candidates. This is expected behaviour -- the validation exists to catch misrouted proposals, not misconfigured indexers. Testing this path requires a raw gRPC call bypassing dipper's pipeline. + +### Indexer-service rejection logging + +Indexer-service previously logged rejections at WARN level without the deployment ID. Fixed in indexer-rs PR #968 -- rejections are now logged at INFO level with the deployment ID and specific rejection reason. diff --git a/compose/dev/README.md b/compose/dev/README.md index b21b5cc..106dafc 100644 --- a/compose/dev/README.md +++ b/compose/dev/README.md @@ -31,5 +31,6 @@ Then `docker compose up -d` applies the overrides automatically. | `eligibility-oracle.yaml` | eligibility-oracle-node | `REO_BINARY` | | `dipper.yaml` | dipper | `DIPPER_BINARY` | | `iisa.yaml` | iisa | `IISA_VERSION=local` | +| `dips.yaml` | indexer-service, dipper, iisa, eligibility-oracle-node | `INDEXER_SERVICE_SOURCE_ROOT`, `DIPPER_SOURCE_ROOT`, `IISA_SOURCE_ROOT`, `REO_SOURCE_ROOT` | See each file's header comments for details. diff --git a/compose/dev/dips.yaml b/compose/dev/dips.yaml new file mode 100644 index 0000000..18ccea3 --- /dev/null +++ b/compose/dev/dips.yaml @@ -0,0 +1,190 @@ +# DIPs Development Override +# +# DIPs stack with local source mounts for development components. +# +# Services overridden: +# - graph-contracts: uses local contracts source (with Ignition fix) +# - indexer-agent: built from local source with DIPs config +# - indexer-service: built from local source with [dips] config +# - dipper: built from local source +# - iisa-cronjob: scoring pipeline from local source with /dips/info fetching +# - iisa: built from local source (replaces GHCR image) +# - eligibility-oracle-node: built from local source +# +# Prerequisites: +# - Local checkouts at ~/Documents/github/: +# contracts, indexer, indexer-rs, dipper, subgraph-dips-indexer-selection, eligibility-oracle-node +# +# Activate via .env: +# COMPOSE_PROFILES=indexing-payments,block-oracle,rewards-eligibility +# COMPOSE_FILE=docker-compose.yaml:compose/dev/dips.yaml +# CONTRACTS_SOURCE_ROOT=~/Documents/github/contracts +# INDEXER_AGENT_SOURCE_ROOT=~/Documents/github/indexer +# INDEXER_SERVICE_SOURCE_ROOT=~/Documents/github/indexer-rs +# DIPPER_SOURCE_ROOT=~/Documents/github/dipper +# IISA_SOURCE_ROOT=~/Documents/github/subgraph-dips-indexer-selection +# REO_SOURCE_ROOT=~/Documents/github/eligibility-oracle-node + +services: + chain: + volumes: + - ./containers/core/chain/run.sh:/opt/run.sh:ro + + graph-contracts: + volumes: + - ${CONTRACTS_SOURCE_ROOT:?Set CONTRACTS_SOURCE_ROOT to local contracts repo}:/opt/contracts + + indexer-service: + cap_add: + - NET_ADMIN + platform: linux/arm64 + build: + target: "wrapper" + dockerfile_inline: | + FROM rust:1-slim-bookworm AS wrapper + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential curl git jq pkg-config \ + protobuf-compiler libssl-dev libsasl2-dev \ + && rm -rf /var/lib/apt/lists/* + entrypoint: ["bash", "/opt/run-dips.sh"] + volumes: + - ${INDEXER_SERVICE_SOURCE_ROOT:?Set INDEXER_SERVICE_SOURCE_ROOT to local indexer-rs checkout}:/opt/source + - ./containers/indexer/indexer-service/dev/run-dips.sh:/opt/run-dips.sh:ro + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + ports: + - "${INDEXER_SERVICE_PORT}:7601" + - "${INDEXER_SERVICE_DIPS_RPC_PORT}:7602" + environment: + RUST_LOG: info,indexer_service_rs=info,indexer_service_rs::middleware::tap_receipt=error,indexer_monitor=warn,indexer_dips=debug + RUST_BACKTRACE: 1 + SQLX_OFFLINE: "true" + healthcheck: + interval: 10s + retries: 600 + test: curl -f http://127.0.0.1:7601/ + + indexer-agent: + platform: linux/arm64 + build: + target: "wrapper" + dockerfile_inline: | + FROM node:22-slim AS wrapper + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential curl git jq python3 \ + && rm -rf /var/lib/apt/lists/* + COPY --from=ghcr.io/foundry-rs/foundry:v1.0.0 \ + /usr/local/bin/forge /usr/local/bin/cast /usr/local/bin/anvil /usr/local/bin/chisel /usr/local/bin/ + RUN npm install -g tsx nodemon + entrypoint: ["bash", "/opt/run-dips.sh"] + volumes: + - ${INDEXER_AGENT_SOURCE_ROOT:?Set INDEXER_AGENT_SOURCE_ROOT to local indexer checkout}:/opt/indexer-agent-source-root + - ./containers/indexer/indexer-agent/dev/run-dips.sh:/opt/run-dips.sh:ro + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + + dipper: + profiles: [] + platform: linux/arm64 + environment: + RUST_LOG: info,dipper_service=debug,dipper_rpc=debug,dipper_pgregistry=debug,dipper_service::network=info,sqlx::query=warn + build: + dockerfile_inline: | + FROM rust:1-slim-bookworm + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential ca-certificates clang cmake curl git jq lld \ + pkg-config libssl-dev protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + ENV CC=clang CXX=clang++ RUSTFLAGS="-C link-arg=-fuse-ld=lld" + entrypoint: ["bash", "/opt/run.sh"] + depends_on: + block-oracle: { condition: service_healthy } + postgres: { condition: service_healthy } + gateway: { condition: service_healthy } + iisa: { condition: service_healthy } + volumes: + - ${DIPPER_SOURCE_ROOT:?Set DIPPER_SOURCE_ROOT to local dipper checkout}:/opt/source + - ${INDEXER_SERVICE_SOURCE_ROOT:?Set INDEXER_SERVICE_SOURCE_ROOT to local indexer-rs checkout}:/opt/source-indexer-rs:ro + - ./containers/indexing-payments/dipper/run.sh:/opt/run.sh:ro + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + + # Real IISA cronjob from source - runs scoring pipeline with /dips/info fetching + iisa-cronjob: + platform: linux/arm64 + build: + dockerfile_inline: | + FROM python:3.11-slim + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential gcc curl protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + RUN pip install --no-cache-dir uv + entrypoint: ["bash", "/opt/run-cronjob.sh"] + volumes: + - ${IISA_SOURCE_ROOT:?Set IISA_SOURCE_ROOT to local subgraph-dips-indexer-selection checkout}/cronjobs/compute_scores:/opt/source:ro + - ./containers/indexing-payments/iisa/run-cronjob.sh:/opt/run-cronjob.sh:ro + - iisa-scores:/app/scores + environment: + PYTHONUNBUFFERED: "1" + SCORING_INTERVAL: "120" + + # Real IISA from source - replaces GHCR image + iisa: + profiles: [] + platform: linux/arm64 + image: iisa:local + pull_policy: never + build: + dockerfile_inline: | + FROM python:3.12-slim + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential gcc curl \ + && rm -rf /var/lib/apt/lists/* + RUN pip install --no-cache-dir uv + entrypoint: ["bash", "/opt/run-iisa.sh"] + depends_on: + postgres: { condition: service_healthy } + gateway: { condition: service_healthy } + ports: + - "8080:8080" + volumes: + - ${IISA_SOURCE_ROOT:?Set IISA_SOURCE_ROOT to local subgraph-dips-indexer-selection checkout}:/opt/source + - ./containers/indexing-payments/iisa/run-iisa.sh:/opt/run-iisa.sh:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + environment: + PYTHONUNBUFFERED: "1" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/health"] + interval: 10s + retries: 30 + start_period: 60s + + # Real eligibility oracle from source + eligibility-oracle-node: + platform: linux/arm64 + build: + dockerfile_inline: | + FROM rust:1-slim-bookworm + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential ca-certificates curl git pkg-config libssl-dev \ + && rm -rf /var/lib/apt/lists/* + entrypoint: ["bash", "/opt/run-reo.sh"] + volumes: + - ${REO_SOURCE_ROOT:?Set REO_SOURCE_ROOT to local eligibility-oracle-node checkout}:/opt/source + - ./containers/oracles/eligibility-oracle-node/run-reo.sh:/opt/run-reo.sh:ro + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + environment: + RUST_LOG: info,eligibility_oracle=debug + RUST_BACKTRACE: "1" diff --git a/compose/dev/eligibility-oracle.yaml b/compose/dev/eligibility-oracle.yaml index 032ef55..2ae82b5 100644 --- a/compose/dev/eligibility-oracle.yaml +++ b/compose/dev/eligibility-oracle.yaml @@ -1,8 +1,8 @@ # Eligibility Oracle Dev Override -# Mounts a locally-built binary for WIP development (skip image rebuild). +# Uses a minimal runtime image with locally-built binary (skips private repo clone). # # Set REO_BINARY to the path of the locally-built binary, e.g.: -# REO_BINARY=/git/local/eligibility-oracle-node/eligibility-oracle-node/target/release/eligibility-oracle +# REO_BINARY=/git/local/eligibility-oracle-node/target/release/eligibility-oracle # # Build the binary locally first: # cargo build --release -p eligibility-oracle @@ -13,5 +13,8 @@ services: eligibility-oracle-node: + build: + context: containers/oracles/eligibility-oracle-node/dev volumes: - ${REO_BINARY:?Set REO_BINARY to locally-built eligibility-oracle binary}:/usr/local/bin/eligibility-oracle:ro + - ./containers/oracles/eligibility-oracle-node/run.sh:/opt/run.sh:ro diff --git a/containers/core/chain/run.sh b/containers/core/chain/run.sh index ffd0996..4ad958d 100644 --- a/containers/core/chain/run.sh +++ b/containers/core/chain/run.sh @@ -9,4 +9,6 @@ fi exec anvil --host=0.0.0.0 --chain-id=1337 --base-fee=0 \ --state /data/anvil-state.json \ + --disable-code-size-limit \ + --hardfork cancun \ $FORK_ARG diff --git a/containers/core/graph-contracts/run.sh b/containers/core/graph-contracts/run.sh index 79a0e54..bb77bff 100644 --- a/containers/core/graph-contracts/run.sh +++ b/containers/core/graph-contracts/run.sh @@ -67,6 +67,11 @@ fi if [ "$phase1_skip" = "false" ]; then echo "Deploying new version of the protocol" cd /opt/contracts/packages/subgraph-service + + # Clear stale Ignition deployment state (may be baked into the image) + rm -rf ./ignition/deployments/chain-1337 + rm -rf /opt/contracts/packages/horizon/ignition/deployments/chain-1337 + npx hardhat deploy:protocol --network localNetwork --subgraph-service-config localNetwork # Add legacy contract stubs (gateway needs these) diff --git a/containers/core/subgraph-deploy/Dockerfile b/containers/core/subgraph-deploy/Dockerfile index 6196e49..7ecaa61 100644 --- a/containers/core/subgraph-deploy/Dockerfile +++ b/containers/core/subgraph-deploy/Dockerfile @@ -24,7 +24,7 @@ RUN git clone https://github.com/graphprotocol/graph-network-subgraph && \ cd graph-network-subgraph && git checkout ${NETWORK_SUBGRAPH_COMMIT} && \ pnpm install && pnpm add -D ts-node -# 2. TAP subgraph +# 2. TAP subgraph (patched at deploy time for Horizon event names) RUN git clone https://github.com/semiotic-ai/timeline-aggregation-protocol-subgraph --recursive && \ cd timeline-aggregation-protocol-subgraph && git checkout ${TAP_SUBGRAPH_COMMIT} && yarn diff --git a/containers/core/subgraph-deploy/run.sh b/containers/core/subgraph-deploy/run.sh index fa4f9b9..130858e 100644 --- a/containers/core/subgraph-deploy/run.sh +++ b/containers/core/subgraph-deploy/run.sh @@ -44,13 +44,52 @@ deploy_tap() { return fi - escrow=$(contract_addr Escrow tap-contracts) + # Horizon moved signer authorization from PaymentsEscrow to GraphTallyCollector + escrow=$(contract_addr GraphTallyCollector.address horizon) cd /opt/timeline-aggregation-protocol-subgraph sed -i "s/127.0.0.1:5001/ipfs:${IPFS_RPC_PORT}/g" package.json sed -i "s/127.0.0.1:8020/graph-node:${GRAPH_NODE_ADMIN_PORT}/g" package.json yq ".dataSources[].source.address=\"${escrow}\"" -i subgraph.yaml yq ".dataSources[].network |= \"hardhat\"" -i subgraph.yaml + + # Horizon renamed events: AuthorizeSigner -> SignerAuthorized, + # RevokeAuthorizedSigner -> SignerRevoked, and swapped the parameter order + # from (signer, sender) to (authorizer, signer). Patch all three layers. + + # 1. subgraph.yaml event signatures + sed -i 's/AuthorizeSigner(indexed address,indexed address)/SignerAuthorized(indexed address,indexed address)/g' subgraph.yaml + sed -i 's/RevokeAuthorizedSigner(indexed address,indexed address)/SignerRevoked(indexed address,indexed address)/g' subgraph.yaml + + # 2. ABI: rename events and swap parameter order so codegen accessors match + # the mapping code (event.params.signer = actual signer, event.params.sender = authorizer) + node -e " +const fs = require('fs'); +const abi = JSON.parse(fs.readFileSync('abis/Escrow.abi.json')); +for (const e of abi) { + if (e.type !== 'event') continue; + if (e.name === 'AuthorizeSigner') { + e.name = 'SignerAuthorized'; + e.inputs = [ + {indexed: true, internalType: 'address', name: 'sender', type: 'address'}, + {indexed: true, internalType: 'address', name: 'signer', type: 'address'} + ]; + } else if (e.name === 'RevokeAuthorizedSigner') { + e.name = 'SignerRevoked'; + e.inputs = [ + {indexed: true, internalType: 'address', name: 'sender', type: 'address'}, + {indexed: true, internalType: 'address', name: 'authorizedSigner', type: 'address'} + ]; + } +} +fs.writeFileSync('abis/Escrow.abi.json', JSON.stringify(abi, null, 2)); +" + + # 3. Mapping imports and type annotations + sed -i 's/AuthorizeSigner, RevokeAuthorizedSigner/SignerAuthorized, SignerRevoked/g' src/mappings/escrow.ts + sed -i 's/event: AuthorizeSigner/event: SignerAuthorized/g' src/mappings/escrow.ts + sed -i 's/event: RevokeAuthorizedSigner/event: SignerRevoked/g' src/mappings/escrow.ts + yarn codegen yarn build yarn create-local diff --git a/containers/indexer/indexer-agent/dev/run-dips.sh b/containers/indexer/indexer-agent/dev/run-dips.sh new file mode 100755 index 0000000..b6ff3af --- /dev/null +++ b/containers/indexer/indexer-agent/dev/run-dips.sh @@ -0,0 +1,89 @@ +#!/bin/bash +set -xeu +. /opt/config/.env + +. /opt/shared/lib.sh + +token_address=$(contract_addr L2GraphToken.address horizon) +staking_address=$(contract_addr HorizonStaking.address horizon) +indexer_stake="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ + "${staking_address}" 'getStake(address) (uint256)' "${RECEIVER_ADDRESS}")" +echo "indexer_stake=${indexer_stake}" +if [ "${indexer_stake}" = "0" ]; then + cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--mnemonic=${MNEMONIC}" \ + --value=1ether "${RECEIVER_ADDRESS}" + cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--mnemonic=${MNEMONIC}" \ + "${token_address}" 'transfer(address,uint256)' "${RECEIVER_ADDRESS}" '100000000000000000000000' + cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--private-key=${RECEIVER_SECRET}" \ + "${token_address}" 'approve(address,uint256)' "${staking_address}" '100000000000000000000000' + cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--private-key=${RECEIVER_SECRET}" \ + "${staking_address}" 'stake(uint256)' '100000000000000000000000' +fi + +export INDEXER_AGENT_HORIZON_ADDRESS_BOOK=/opt/config/horizon.json +export INDEXER_AGENT_SUBGRAPH_SERVICE_ADDRESS_BOOK=/opt/config/subgraph-service.json +export INDEXER_AGENT_TAP_ADDRESS_BOOK=/opt/config/tap-contracts.json +export INDEXER_AGENT_EPOCH_SUBGRAPH_ENDPOINT="http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/block-oracle" +export INDEXER_AGENT_GATEWAY_ENDPOINT="http://gateway:${GATEWAY_PORT}" +export INDEXER_AGENT_GRAPH_NODE_QUERY_ENDPOINT="http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}" +export INDEXER_AGENT_GRAPH_NODE_ADMIN_ENDPOINT="http://graph-node:${GRAPH_NODE_ADMIN_PORT}" +export INDEXER_AGENT_GRAPH_NODE_STATUS_ENDPOINT="http://graph-node:${GRAPH_NODE_STATUS_PORT}/graphql" +export INDEXER_AGENT_IPFS_ENDPOINT="http://ipfs:${IPFS_RPC_PORT}" +export INDEXER_AGENT_INDEXER_ADDRESS="${RECEIVER_ADDRESS}" +export INDEXER_AGENT_INDEXER_MANAGEMENT_PORT="${INDEXER_MANAGEMENT_PORT}" +export INDEXER_AGENT_INDEX_NODE_IDS=default +export INDEXER_AGENT_INDEXER_GEO_COORDINATES="1 1" +export INDEXER_AGENT_VOUCHER_REDEMPTION_THRESHOLD=0.01 +export INDEXER_AGENT_NETWORK_SUBGRAPH_ENDPOINT="http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network" +export INDEXER_AGENT_NETWORK_PROVIDER="http://chain:${CHAIN_RPC_PORT}" +export INDEXER_AGENT_MNEMONIC="${INDEXER_MNEMONIC}" +export INDEXER_AGENT_POSTGRES_DATABASE=indexer_components_1 +export INDEXER_AGENT_POSTGRES_HOST=postgres +export INDEXER_AGENT_POSTGRES_PORT="${POSTGRES_PORT}" +export INDEXER_AGENT_POSTGRES_USERNAME=postgres +export INDEXER_AGENT_POSTGRES_PASSWORD= +export INDEXER_AGENT_PUBLIC_INDEXER_URL="http://indexer-service:${INDEXER_SERVICE_PORT}" +export INDEXER_AGENT_TAP_SUBGRAPH_ENDPOINT="http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/semiotic/tap" +export INDEXER_AGENT_MAX_PROVISION_INITIAL_SIZE=200000 +export INDEXER_AGENT_CONFIRMATION_BLOCKS=1 +export INDEXER_AGENT_LOG_LEVEL=trace + +# DIPs configuration +export INDEXER_AGENT_ENABLE_DIPS=true +export INDEXER_AGENT_DIPS_EPOCHS_MARGIN=1 +export INDEXER_AGENT_DIPPER_ENDPOINT="http://dipper:${DIPPER_INDEXER_RPC_PORT}" +export INDEXER_AGENT_DIPS_ALLOCATION_AMOUNT=1 + +cd /opt/indexer-agent-source-root +yarn install --frozen-lockfile +mkdir -p ./config/ +cat >./config/config.yaml <<-EOF +networkIdentifier: "hardhat" +indexerOptions: + geoCoordinates: [48.4682, -123.524] + defaultAllocationAmount: 10000 + allocationManagementMode: "auto" + restakeRewards: true + poiDisputeMonitoring: false + voucherRedemptionThreshold: 0.00001 + voucherRedemptionBatchThreshold: 10 + rebateClaimThreshold: 0.00001 + rebateClaimBatchThreshold: 10 +subgraphs: + maxBlockDistance: 5000 + freshnessSleepMilliseconds: 1000 +enableDips: true +dipperEndpoint: "http://dipper:${DIPPER_INDEXER_RPC_PORT}" +dipsAllocationAmount: 1 +dipsEpochsMargin: 1 +EOF +cat config/config.yaml + +nodemon --watch . \ +--ext ts \ +--legacy-watch \ +--delay 4 \ +--verbose \ +--exec " +NODE_OPTIONS=\"--inspect=0.0.0.0:9230\" +tsx packages/indexer-agent/src/index.ts start" diff --git a/containers/indexer/indexer-agent/dev/run-override.sh b/containers/indexer/indexer-agent/dev/run-override.sh index 52631a9..4a93b94 100755 --- a/containers/indexer/indexer-agent/dev/run-override.sh +++ b/containers/indexer/indexer-agent/dev/run-override.sh @@ -6,10 +6,10 @@ set -xeu token_address=$(contract_addr L2GraphToken.address horizon) staking_address=$(contract_addr HorizonStaking.address horizon) -indexer_staked="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ - "${staking_address}" 'hasStake(address) (bool)' "${RECEIVER_ADDRESS}")" -echo "indexer_staked=${indexer_staked}" -if [ "${indexer_staked}" = "false" ]; then +indexer_stake="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ + "${staking_address}" 'getStake(address) (uint256)' "${RECEIVER_ADDRESS}")" +echo "indexer_stake=${indexer_stake}" +if [ "${indexer_stake}" = "0" ]; then # transfer ETH to receiver cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--mnemonic=${MNEMONIC}" \ --value=1ether "${RECEIVER_ADDRESS}" diff --git a/containers/indexer/indexer-agent/run.sh b/containers/indexer/indexer-agent/run.sh index 5c2e7a1..bdd47cd 100755 --- a/containers/indexer/indexer-agent/run.sh +++ b/containers/indexer/indexer-agent/run.sh @@ -6,10 +6,10 @@ set -eu token_address=$(contract_addr L2GraphToken.address horizon) staking_address=$(contract_addr HorizonStaking.address horizon) -indexer_staked="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ - "${staking_address}" 'hasStake(address) (bool)' "${RECEIVER_ADDRESS}")" -echo "indexer_staked=${indexer_staked}" -if [ "${indexer_staked}" = "false" ]; then +indexer_stake="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ + "${staking_address}" 'getStake(address) (uint256)' "${RECEIVER_ADDRESS}")" +echo "indexer_stake=${indexer_stake}" +if [ "${indexer_stake}" = "0" ]; then # transfer ETH to receiver cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--mnemonic=${MNEMONIC}" \ --value=1ether "${RECEIVER_ADDRESS}" diff --git a/containers/indexer/indexer-service/dev/run-dips.sh b/containers/indexer/indexer-service/dev/run-dips.sh new file mode 100755 index 0000000..be043fb --- /dev/null +++ b/containers/indexer/indexer-service/dev/run-dips.sh @@ -0,0 +1,88 @@ +#!/bin/bash +set -eu + +. /opt/config/.env +. /opt/shared/lib.sh + +tap_verifier=$(contract_addr TAPVerifier tap-contracts) +graph_tally_verifier=$(contract_addr GraphTallyCollector.address horizon) +subgraph_service=$(contract_addr SubgraphService.address subgraph-service) + +# RecurringCollector may not be deployed yet (contracts repo work pending) +recurring_collector=$(contract_addr RecurringCollector.address horizon 2>/dev/null) || recurring_collector="" +if [ -z "$recurring_collector" ]; then + echo "WARNING: RecurringCollector not deployed - DIPs will be disabled" + dips_enabled=false +else + dips_enabled=true +fi + +cat >/opt/config.toml <<-EOF +[indexer] +indexer_address = "${RECEIVER_ADDRESS}" +operator_mnemonic = "${INDEXER_MNEMONIC}" + +[database] +postgres_url = "postgresql://postgres@postgres:${POSTGRES_PORT}/indexer_components_1" + +[graph_node] +query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}" +status_url = "http://graph-node:${GRAPH_NODE_STATUS_PORT}/graphql" + +[subgraphs.network] +query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network" +recently_closed_allocation_buffer_secs = 60 +syncing_interval_secs = 30 + +[subgraphs.escrow] +query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/semiotic/tap" +syncing_interval_secs = 30 + +[blockchain] +chain_id = ${CHAIN_ID} +receipts_verifier_address = "${tap_verifier}" +receipts_verifier_address_v2 = "${graph_tally_verifier}" +subgraph_service_address = "${subgraph_service}" + +[service] +free_query_auth_token = "freestuff" +host_and_port = "0.0.0.0:${INDEXER_SERVICE_PORT}" +url_prefix = "/" +serve_network_subgraph = false +serve_escrow_subgraph = false +ipfs_url = "http://ipfs:${IPFS_RPC_PORT}" + +[tap] +max_amount_willing_to_lose_grt = 1 + +[tap.rav_request] +timestamp_buffer_secs = 15 + +[tap.sender_aggregator_endpoints] +${ACCOUNT0_ADDRESS} = "http://tap-aggregator:${TAP_AGGREGATOR_PORT}" + +[horizon] +enabled = true +EOF + +if [ "$dips_enabled" = "true" ]; then +cat >>/opt/config.toml <<-EOF + +[dips] +host = "0.0.0.0" +port = "${INDEXER_SERVICE_DIPS_RPC_PORT}" +recurring_collector = "${recurring_collector}" +supported_networks = ["hardhat"] + +[dips.min_grt_per_30_days] +"hardhat" = "450" + +[dips.additional_networks] +"hardhat" = "eip155:1337" +EOF +fi +cat /opt/config.toml + +cd /opt/source +cargo build --bin indexer-service-rs +exec ./target/debug/indexer-service-rs --config=/opt/config.toml diff --git a/containers/indexer/start-indexing/run.sh b/containers/indexer/start-indexing/run.sh index 48f15f1..92f6e33 100755 --- a/containers/indexer/start-indexing/run.sh +++ b/containers/indexer/start-indexing/run.sh @@ -157,4 +157,10 @@ do sleep 2 done +# Switch from automine to interval mining now that all deployments are done. +# Services like block-oracle and graph-node need regular blocks to function. +block_time="${BLOCK_TIME:-1}" +elapsed "Enabling interval mining (${block_time}s blocks)..." +cast rpc --rpc-url="http://chain:${CHAIN_RPC_PORT}" evm_setIntervalMining "${block_time}" > /dev/null + elapsed "Allocations active, done" diff --git a/containers/indexing-payments/dipper/run.sh b/containers/indexing-payments/dipper/run.sh index edd9f9d..0be6497 100755 --- a/containers/indexing-payments/dipper/run.sh +++ b/containers/indexing-payments/dipper/run.sh @@ -1,4 +1,4 @@ -#!/bin/env sh +#!/usr/bin/env sh set -eu . /opt/config/.env @@ -13,13 +13,14 @@ network_subgraph_deployment=$(wait_for_gql \ tap_verifier=$(contract_addr TAPVerifier tap-contracts) subgraph_service=$(contract_addr SubgraphService.address subgraph-service) +recurring_collector=$(contract_addr RecurringCollector.address horizon) ## Config cat >config.json <<-EOF { "dips": { "data_service": "${subgraph_service}", - "recurring_collector": "0x0000000000000000000000000000000000000000", + "recurring_collector": "${recurring_collector}", "max_initial_tokens": "1000000000000000000", "max_ongoing_tokens_per_second": "1000000000000000", "max_seconds_per_collection": 86400, @@ -28,8 +29,8 @@ cat >config.json <<-EOF "deadline_seconds": 300, "pricing_table": { "${CHAIN_ID}": { - "tokens_per_second": "101", - "tokens_per_entity_per_second": "1001" + "tokens_per_second": "174000000000000", + "tokens_per_entity_per_second": "78000" } } }, @@ -59,11 +60,11 @@ cat >config.json <<-EOF }, "signer": { "secret_key": "${ACCOUNT0_SECRET}", - "chain_id": 1337 + "chain_id": ${CHAIN_ID} }, "tap_signer": { "secret_key": "${ACCOUNT0_SECRET}", - "chain_id": 1337, + "chain_id": ${CHAIN_ID}, "verifier": "${tap_verifier}" }, "iisa": { @@ -71,6 +72,14 @@ cat >config.json <<-EOF "request_timeout": 30, "connect_timeout": 10, "max_retries": 3 + }, + "expiration": { + "enabled": true, + "interval": 10, + "batch_size": 100 + }, + "additional_networks": { + "${CHAIN_ID}": "${CHAIN_NAME}" } } EOF @@ -79,4 +88,11 @@ echo "=== Generated config.json ===" >&2 cat config.json >&2 echo "===========================" >&2 -dipper-service ./config.json +# Build from source if mounted, otherwise use pre-built binary +if [ -d /opt/source ] && [ -f /opt/source/Cargo.toml ]; then + cd /opt/source + cargo build --bin dipper-service --release + exec ./target/release/dipper-service "$OLDPWD/config.json" +else + exec dipper-service ./config.json +fi diff --git a/containers/indexing-payments/iisa/Dockerfile b/containers/indexing-payments/iisa/Dockerfile new file mode 100644 index 0000000..8b02eaa --- /dev/null +++ b/containers/indexing-payments/iisa/Dockerfile @@ -0,0 +1,38 @@ +# IISA scoring cronjob — clones from git for non-dev deployments. +# Dev overlay mounts local source instead (see compose/dev/dips.yaml). + +FROM python:3.11-slim AS builder + +WORKDIR /app + +ARG IISA_COMMIT=main + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc git protobuf-compiler \ + && rm -rf /var/lib/apt/lists/* + +# Clone cronjob source at specified commit +RUN git clone https://github.com/edgeandnode/subgraph-dips-indexer-selection.git /tmp/iisa \ + && cd /tmp/iisa && git checkout ${IISA_COMMIT} \ + && cp cronjobs/compute_scores/*.py cronjobs/compute_scores/requirements.txt /app/ \ + && cp -r cronjobs/compute_scores/proto /app/proto \ + && rm -rf /tmp/iisa + +RUN protoc -I proto --python_out=. proto/gateway_queries.proto +RUN pip install --no-cache-dir --prefix=/install -r requirements.txt + +# Runtime stage +FROM python:3.11-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends curl \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /install /usr/local +COPY --from=builder /app/*.py . + +RUN useradd -m appuser +USER appuser + +CMD ["python", "main.py"] diff --git a/containers/indexing-payments/iisa/Dockerfile.scoring b/containers/indexing-payments/iisa/Dockerfile.scoring deleted file mode 100644 index a1a50c4..0000000 --- a/containers/indexing-payments/iisa/Dockerfile.scoring +++ /dev/null @@ -1,11 +0,0 @@ -FROM python:3.12-slim - -WORKDIR /app - -# Install confluent-kafka for Redpanda connectivity -RUN pip install --no-cache-dir confluent-kafka - -COPY seed_scores.json ./ -COPY scoring.py ./ - -CMD ["python", "scoring.py"] diff --git a/containers/indexing-payments/iisa/run-cronjob.sh b/containers/indexing-payments/iisa/run-cronjob.sh new file mode 100755 index 0000000..d16c894 --- /dev/null +++ b/containers/indexing-payments/iisa/run-cronjob.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -eu + +# Copy source to writable working directory (source mount is :ro) +cp -r /opt/source/* /app/ + +cd /app + +# Install dependencies +uv pip install --system -r requirements.txt + +# Generate protobuf code +protoc -I proto --python_out=. proto/gateway_queries.proto + +echo "=== Starting IISA scoring service ===" +echo " Scores file: ${SCORES_FILE_PATH:-/app/scores/indexer_scores.json}" +echo " Interval: ${SCORING_INTERVAL:-86400}s" +echo " HTTP port: ${SCORING_HTTP_PORT:-9090}" + +exec python main.py diff --git a/containers/indexing-payments/iisa/run-iisa.sh b/containers/indexing-payments/iisa/run-iisa.sh new file mode 100755 index 0000000..374d4c5 --- /dev/null +++ b/containers/indexing-payments/iisa/run-iisa.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -eu +. /opt/config/.env + +cd /opt/source + +# Install dependencies with uv +uv pip install --system -e . + +echo "=== Starting IISA service ===" +echo " Host: 0.0.0.0" +echo " Port: 8080" + +export IISA_HOST="0.0.0.0" +export IISA_PORT="8080" +export IISA_LOG_LEVEL="${IISA_LOG_LEVEL:-INFO}" + +exec uvicorn iisa.iisa_http_endpoints:app --host $IISA_HOST --port $IISA_PORT --reload diff --git a/containers/indexing-payments/iisa/scoring.py b/containers/indexing-payments/iisa/scoring.py deleted file mode 100644 index a10ae6c..0000000 --- a/containers/indexing-payments/iisa/scoring.py +++ /dev/null @@ -1,175 +0,0 @@ -""" -IISA scoring service for local network. - -Long-running service that ensures indexer scores are available for the -IISA HTTP service. On startup writes seed scores so IISA can start -immediately, then periodically checks Redpanda for real query data -and refreshes scores when available. - -Modelled after the eligibility-oracle-node polling pattern. -""" - -import json -import logging -import os -import shutil -import signal -import sys -import time -from pathlib import Path - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", -) -logger = logging.getLogger("iisa-scoring") - -SCORES_FILE_PATH = os.environ.get("SCORES_FILE_PATH", "/app/scores/indexer_scores.json") -SEED_SCORES_PATH = "/app/seed_scores.json" -REDPANDA_BOOTSTRAP_SERVERS = os.environ.get("REDPANDA_BOOTSTRAP_SERVERS", "") -REDPANDA_TOPIC = os.environ.get("REDPANDA_TOPIC", "gateway_queries") -REFRESH_INTERVAL = int(os.environ.get("IISA_SCORING_INTERVAL", "600")) # 10 minutes - -# Graceful shutdown -shutdown_requested = False - - -def handle_signal(signum, frame): - global shutdown_requested - logger.info(f"Received signal {signum}, shutting down") - shutdown_requested = True - - -signal.signal(signal.SIGTERM, handle_signal) -signal.signal(signal.SIGINT, handle_signal) - - -def count_redpanda_messages() -> int: - """Count messages in the Redpanda gateway_queries topic. Returns 0 on error.""" - if not REDPANDA_BOOTSTRAP_SERVERS: - return 0 - - try: - from confluent_kafka import Consumer, TopicPartition - - consumer = Consumer({ - "bootstrap.servers": REDPANDA_BOOTSTRAP_SERVERS, - "group.id": "iisa-scoring-check", - "auto.offset.reset": "earliest", - "enable.auto.commit": False, - }) - - metadata = consumer.list_topics(topic=REDPANDA_TOPIC, timeout=10) - topic_metadata = metadata.topics.get(REDPANDA_TOPIC) - - if topic_metadata is None or topic_metadata.error is not None: - consumer.close() - return 0 - - partitions = topic_metadata.partitions - if not partitions: - consumer.close() - return 0 - - total = 0 - for partition_id in partitions: - tp = TopicPartition(REDPANDA_TOPIC, partition_id) - low, high = consumer.get_watermark_offsets(tp, timeout=10) - total += high - low - - consumer.close() - return total - - except Exception as e: - logger.warning(f"Failed to check Redpanda: {e}") - return 0 - - -def write_seed_scores() -> bool: - """Copy seed scores file to the scores output path. Returns True on success.""" - scores_path = Path(SCORES_FILE_PATH) - scores_path.parent.mkdir(parents=True, exist_ok=True) - - if not Path(SEED_SCORES_PATH).exists(): - logger.error(f"Seed scores file not found: {SEED_SCORES_PATH}") - return False - - shutil.copy2(SEED_SCORES_PATH, SCORES_FILE_PATH) - - with open(SCORES_FILE_PATH) as f: - data = json.load(f) - - logger.info(f"Wrote seed scores ({len(data)} indexers) to {SCORES_FILE_PATH}") - return True - - -def ensure_scores_exist() -> bool: - """Ensure a scores file exists. Returns True if scores are available.""" - if Path(SCORES_FILE_PATH).exists(): - try: - with open(SCORES_FILE_PATH) as f: - data = json.load(f) - if data: - logger.info(f"Scores file exists with {len(data)} indexers") - return True - except (json.JSONDecodeError, OSError): - logger.warning("Existing scores file is invalid, will overwrite") - - return write_seed_scores() - - -def try_compute_scores() -> bool: - """ - Attempt to compute real scores from Redpanda data. - - TODO: Integrate the actual CronJob score computation pipeline here. - For now, logs the message count and returns False (uses seed scores). - """ - msg_count = count_redpanda_messages() - - if msg_count == 0: - logger.info("No messages in Redpanda yet, keeping current scores") - return False - - # TODO: Run actual score computation from Redpanda data when the - # CronJob pipeline is integrated into this container. The pipeline - # needs: protobuf decoding, linear regression, GeoIP resolution. - logger.info( - f"Redpanda has ~{msg_count} messages. " - "CronJob integration pending, keeping current scores." - ) - return False - - -def main() -> int: - logger.info("IISA scoring service starting") - logger.info(f"Refresh interval: {REFRESH_INTERVAL}s") - logger.info(f"Scores file: {SCORES_FILE_PATH}") - logger.info(f"Redpanda: {REDPANDA_BOOTSTRAP_SERVERS or '(not configured)'}") - - # Phase 1: Ensure scores exist so IISA can start - if not ensure_scores_exist(): - logger.error("Failed to initialize scores, exiting") - return 1 - - logger.info("Initial scores ready, entering refresh loop") - - # Phase 2: Periodic refresh loop - while not shutdown_requested: - for _ in range(REFRESH_INTERVAL): - if shutdown_requested: - break - time.sleep(1) - - if shutdown_requested: - break - - logger.info("Running periodic score refresh") - try_compute_scores() - - logger.info("IISA scoring service stopped") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/containers/indexing-payments/iisa/seed_scores.json b/containers/indexing-payments/iisa/seed_scores.json deleted file mode 100644 index 8fe8ed2..0000000 --- a/containers/indexing-payments/iisa/seed_scores.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "indexer": "0xf4ef6650e48d099a4972ea5b414dab86e1998bd3", - "url": "http://indexer-service:7601", - "lat_lin_reg_coefficient": 0.002, - "lat_coefficient_std_error": 0.001, - "lat_coefficient_upper_bound": 0.004, - "lat_normalized_score": 0.85, - "uptime_score": 0.98, - "observed_duration_seconds": 86400, - "uptime_duration_seconds": 84672, - "success_rate": 0.95, - "stake_to_fees": 500.0, - "stake_to_fees_iqr_deviation": 0.3, - "norm_uptime_score": 0.9, - "norm_success_rate": 0.88, - "norm_stake_to_fees": 0.7, - "org": "local-network", - "dst_lat": 37.7749, - "dst_lon": -122.4194, - "existing_dips_agreements": 0, - "avg_sync_duration": 5.0, - "computed_at": "2026-02-20T00:00:00+00:00", - "query_count": 1000 - } -] diff --git a/containers/oracles/block-oracle/Dockerfile b/containers/oracles/block-oracle/Dockerfile index 930bc5e..c75337c 100644 --- a/containers/oracles/block-oracle/Dockerfile +++ b/containers/oracles/block-oracle/Dockerfile @@ -1,22 +1,30 @@ -FROM debian:bookworm-slim +FROM debian:bookworm-slim AS builder ARG BLOCK_ORACLE_COMMIT -# Runtime + build dependencies RUN apt-get update \ - && apt-get install -y curl git jq libssl-dev pkg-config build-essential \ + && apt-get install -y curl git libssl-dev pkg-config build-essential \ && rm -rf /var/lib/apt/lists/* -# Install Rust and build block-oracle binary RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal +ENV PATH="/root/.cargo/bin:${PATH}" -WORKDIR /opt +WORKDIR /build RUN git clone https://github.com/graphprotocol/block-oracle && \ - cd block-oracle && git checkout ${BLOCK_ORACLE_COMMIT} && . ~/.bashrc && cargo build -p block-oracle && \ - cp target/debug/block-oracle . && rm -rf target + cd block-oracle && git checkout ${BLOCK_ORACLE_COMMIT} -# Clean up build-only dependencies -RUN apt-get purge -y pkg-config build-essential git && apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* +WORKDIR /build/block-oracle +RUN --mount=type=cache,target=/root/.cargo/registry \ + --mount=type=cache,target=/root/.cargo/git \ + --mount=type=cache,target=/build/block-oracle/target \ + cargo build -p block-oracle && \ + cp target/debug/block-oracle /usr/local/bin/block-oracle +FROM debian:bookworm-slim +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl jq libssl3 ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /usr/local/bin/block-oracle /usr/local/bin/block-oracle +WORKDIR /opt COPY --chmod=755 ./run.sh /opt/run.sh ENTRYPOINT ["bash", "/opt/run.sh"] diff --git a/containers/oracles/block-oracle/run.sh b/containers/oracles/block-oracle/run.sh index 8b1d8f3..48d9a94 100755 --- a/containers/oracles/block-oracle/run.sh +++ b/containers/oracles/block-oracle/run.sh @@ -7,7 +7,7 @@ graph_epoch_manager=$(contract_addr EpochManager.address horizon) data_edge=$(contract_addr DataEdge block-oracle) echo "=== Configuring block-oracle service ===" -cd /opt/block-oracle +mkdir -p /opt/block-oracle && cd /opt/block-oracle cat >config.toml <<-EOF blockmeta_auth_token = "" owner_address = "${ACCOUNT0_ADDRESS#0x}" @@ -31,4 +31,4 @@ cat config.toml echo "=== Starting block-oracle service ===" sleep 5 -exec /opt/block-oracle/block-oracle run config.toml +exec block-oracle run config.toml diff --git a/containers/oracles/eligibility-oracle-node/dev/Dockerfile b/containers/oracles/eligibility-oracle-node/dev/Dockerfile new file mode 100644 index 0000000..1383c65 --- /dev/null +++ b/containers/oracles/eligibility-oracle-node/dev/Dockerfile @@ -0,0 +1,17 @@ +# Dev image for eligibility-oracle - runtime only (binary mounted from host) +FROM debian:bookworm-slim + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl jq unzip ca-certificates \ + libssl3 librdkafka1 \ + && rm -rf /var/lib/apt/lists/* + +# rpk CLI for Redpanda topic management +RUN curl -sLO https://github.com/redpanda-data/redpanda/releases/latest/download/rpk-linux-amd64.zip \ + && unzip rpk-linux-amd64.zip -d /usr/local/bin/ \ + && rm rpk-linux-amd64.zip + +WORKDIR /opt +# run.sh is mounted via compose override +ENTRYPOINT ["bash", "/opt/run.sh"] diff --git a/containers/oracles/eligibility-oracle-node/run-reo.sh b/containers/oracles/eligibility-oracle-node/run-reo.sh new file mode 100755 index 0000000..aef92ab --- /dev/null +++ b/containers/oracles/eligibility-oracle-node/run-reo.sh @@ -0,0 +1,113 @@ +#!/bin/bash +set -eu +. /opt/config/.env +. /opt/shared/lib.sh + +# Build from source +cd /opt/source +cargo build --release --bin eligibility-oracle +BINARY=/opt/source/target/release/eligibility-oracle + +# Wait for the REO contract address to be available in issuance.json +reo_address="" +for f in issuance.json; do + reo_address=$(jq -r '.["1337"].RewardsEligibilityOracle.address // empty' "/opt/config/$f" 2>/dev/null || true) + [ -n "$reo_address" ] && break +done + +if [ -z "$reo_address" ]; then + echo "ERROR: RewardsEligibilityOracle address not found in issuance.json" + echo "The REO contract must be deployed before starting the oracle node." + exit 1 +fi + +echo "=== Configuring eligibility-oracle-node ===" +echo " REO contract: ${reo_address}" +echo " Chain ID: ${CHAIN_ID}" +echo " Redpanda: redpanda:${REDPANDA_KAFKA_PORT}" + +cd /tmp + +# Create compacted output topic (idempotent) +rpk topic create indexer_daily_metrics \ + --brokers="redpanda:${REDPANDA_KAFKA_PORT}" \ + -c cleanup.policy=compact,delete \ + -c retention.ms=7776000000 \ + 2>/dev/null || true + +# Reset consumer group to the start of the topic +rpk group seek eligibility-oracle --to start \ + --topics gateway_queries \ + --brokers="redpanda:${REDPANDA_KAFKA_PORT}" \ + 2>/dev/null || true + +# Generate config.toml with local network values +cat >config.toml <&2 +cat config.toml >&2 +echo "=============================" >&2 + +INTERVAL=10 +CHAIN_RPC="http://chain:${CHAIN_RPC_PORT}" + +child=0 +trap 'kill -TERM "$child" 2>/dev/null; wait "$child"; exit 0' SIGTERM SIGINT + +get_block_number() { + curl -sf -X POST "$CHAIN_RPC" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' \ + | jq -r '.result // empty' 2>/dev/null || true +} + +echo "=== Running eligibility-oracle-node (one-shot, polling every ${INTERVAL}s) ===" +last_block="" +while true; do + current_block=$(get_block_number) + + if [ -z "$current_block" ]; then + echo "Could not fetch block number, retrying in ${INTERVAL}s" + sleep "$INTERVAL" & + child=$! + wait "$child" + continue + fi + + if [ "$current_block" = "$last_block" ]; then + sleep "$INTERVAL" & + child=$! + wait "$child" + continue + fi + + echo "--- New block: ${last_block:-none} -> ${current_block}, running oracle ---" + "$BINARY" --config config.toml & + child=$! + wait "$child" && echo "--- Oracle finished (ok) ---" \ + || echo "--- Oracle finished (exit $?) ---" + last_block=$current_block + + sleep "$INTERVAL" & + child=$! + wait "$child" +done diff --git a/containers/query-payments/tap-escrow-manager/run.sh b/containers/query-payments/tap-escrow-manager/run.sh index dc757f2..043288f 100755 --- a/containers/query-payments/tap-escrow-manager/run.sh +++ b/containers/query-payments/tap-escrow-manager/run.sh @@ -29,7 +29,7 @@ cat >config.json <<-EOF "network_subgraph": "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network", "query_auth": "freestuff", "rpc_url": "http://chain:${CHAIN_RPC_PORT}", - "signers": ["${ACCOUNT1_SECRET}"], + "signers": ["${ACCOUNT0_SECRET}", "${ACCOUNT1_SECRET}"], "secret_key": "${ACCOUNT0_SECRET}", "update_interval_seconds": 10 } diff --git a/docker-compose.yaml b/docker-compose.yaml index 4f1c823..8a5da95 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -210,7 +210,7 @@ services: args: TAP_ESCROW_MANAGER_COMMIT: ${TAP_ESCROW_MANAGER_COMMIT} depends_on: - subgraph-deploy: { condition: service_completed_successfully } + start-indexing: { condition: service_completed_successfully } redpanda: { condition: service_healthy } stop_signal: SIGKILL volumes: @@ -307,25 +307,31 @@ services: BLOCKCHAIN_PRIVATE_KEY: ${ACCOUNT0_SECRET} restart: unless-stopped - iisa-scoring: - container_name: iisa-scoring + iisa-cronjob: + container_name: iisa-cronjob profiles: [indexing-payments] build: context: containers/indexing-payments/iisa - dockerfile: Dockerfile.scoring + args: + IISA_COMMIT: ${IISA_COMMIT:-main} + MAXMIND_LICENSE_KEY: "skip" depends_on: redpanda: { condition: service_healthy } environment: REDPANDA_BOOTSTRAP_SERVERS: "redpanda:${REDPANDA_KAFKA_PORT}" REDPANDA_TOPIC: gateway_queries SCORES_FILE_PATH: /app/scores/indexer_scores.json - IISA_SCORING_INTERVAL: "600" + GRAPH_NETWORK_SUBGRAPH_URL: "http://graph-node:8000/subgraphs/name/graph-network" + SCORING_INTERVAL: "600" + SCORING_HTTP_PORT: "9090" + DEGRADED_ALERT_THRESHOLD: "999" volumes: - iisa-scores:/app/scores healthcheck: - test: ["CMD", "test", "-f", "/app/scores/indexer_scores.json"] - interval: 5s - retries: 10 + test: ["CMD", "curl", "-f", "http://localhost:9090/health"] + interval: 10s + retries: 60 + start_period: 30s restart: unless-stopped iisa: @@ -334,12 +340,12 @@ services: image: ghcr.io/edgeandnode/subgraph-dips-indexer-selection:${IISA_VERSION} pull_policy: if_not_present depends_on: - iisa-scoring: { condition: service_healthy } + iisa-cronjob: { condition: service_healthy } ports: ["8080:8080"] environment: IISA_HOST: "0.0.0.0" IISA_PORT: "8080" - IISA_LOG_LEVEL: INFO + IISA_LOG_LEVEL: DEBUG SCORES_FILE_PATH: /app/scores/indexer_scores.json volumes: - iisa-scores:/app/scores diff --git a/scripts/deploy-test-subgraph.py b/scripts/deploy-test-subgraph.py new file mode 100755 index 0000000..5865d3a --- /dev/null +++ b/scripts/deploy-test-subgraph.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +"""Publish test subgraphs to GNS on the local network. + +Builds a minimal block-tracker subgraph once, then creates N unique manifests +(varying startBlock), uploads each to IPFS, and publishes to GNS on-chain. + +Does NOT deploy to graph-node (no indexing), curate, or allocate. + +Usage: + python3 scripts/deploy-test-subgraph.py # publish 1 + python3 scripts/deploy-test-subgraph.py 50 # publish 50 + python3 scripts/deploy-test-subgraph.py 10 myname # publish myname-1..myname-10 +""" + +import json +import subprocess +import sys +import tempfile +import time +from pathlib import Path +from urllib.request import Request, urlopen + +IPFS_API = "http://localhost:5001" +CHAIN_RPC = "http://localhost:8545" +MNEMONIC = "test test test test test test test test test test test junk" + +SCHEMA = """\ +type Block @entity(immutable: true) { + id: ID! + number: BigInt! + timestamp: BigInt! + gasUsed: BigInt! +} +""" + +MAPPING = """\ +import { ethereum } from "@graphprotocol/graph-ts" +import { Block } from "../generated/schema" + +export function handleBlock(block: ethereum.Block): void { + let entity = new Block(block.hash.toHexString()) + entity.number = block.number + entity.timestamp = block.timestamp + entity.gasUsed = block.gasUsed + entity.save() +} +""" + +PACKAGE_JSON = """\ +{ + "name": "test-subgraph", + "version": "0.1.0", + "dependencies": { + "@graphprotocol/graph-cli": "0.97.0", + "@graphprotocol/graph-ts": "0.35.1" + } +} +""" + + +def ipfs_add(content: str | bytes) -> str: + """Upload content to IPFS, return the CID.""" + from urllib.request import urlopen as _urlopen + + if isinstance(content, str): + content = content.encode() + + boundary = b"----PythonBoundary" + body = ( + b"--" + boundary + b"\r\n" + b'Content-Disposition: form-data; name="file"; filename="file"\r\n' + b"Content-Type: application/octet-stream\r\n\r\n" + + content + b"\r\n" + b"--" + boundary + b"--\r\n" + ) + req = Request( + f"{IPFS_API}/api/v0/add?pin=true", + data=body, + headers={"Content-Type": f"multipart/form-data; boundary={boundary.decode()}"}, + method="POST", + ) + with _urlopen(req, timeout=30) as resp: + return json.loads(resp.read())["Hash"] + + +def run(cmd: str, cwd: str = None) -> str: + result = subprocess.run(cmd, shell=True, cwd=cwd, capture_output=True, text=True) + if result.returncode != 0: + print(f"FAILED: {cmd}", file=sys.stderr) + print(result.stderr, file=sys.stderr) + sys.exit(1) + return result.stdout.strip() + + +def get_contract_address(contract_path: str, config_file: str) -> str: + repo_root = Path(__file__).resolve().parent.parent + output = run( + f'DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml ' + f'exec -T indexer-agent jq -r \'.["1337"].{contract_path}\' /opt/config/{config_file}', + cwd=str(repo_root), + ) + if not output or output == "null": + print(f"Could not read {contract_path} from {config_file}", file=sys.stderr) + sys.exit(1) + return output + + +def cid_to_hex(cid: str) -> str: + """Convert an IPFS CIDv0 (Qm...) to the 32-byte hex used by GNS.""" + output = json.loads(run(f'curl -s -X POST "{IPFS_API}/api/v0/cid/format?arg={cid}&b=base16"')) + return output["Formatted"][len("f01701220"):] + + +def build_once(source_address: str) -> tuple[str, str, str]: + """Build the subgraph once, upload shared artifacts to IPFS. + + Returns (schema_cid, abi_cid, wasm_cid). + """ + with tempfile.TemporaryDirectory() as tmpdir: + Path(tmpdir, "schema.graphql").write_text(SCHEMA) + Path(tmpdir, "package.json").write_text(PACKAGE_JSON) + Path(tmpdir, "abis").mkdir() + Path(tmpdir, "abis", "Dummy.json").write_text("[]") + Path(tmpdir, "src").mkdir() + Path(tmpdir, "src", "mapping.ts").write_text(MAPPING) + + # Manifest just for building -- startBlock doesn't matter here + Path(tmpdir, "subgraph.yaml").write_text( + make_manifest("build", source_address, start_block=0) + ) + + print("Building subgraph (one-time)...") + print(" npm install...") + run("npm install --silent 2>&1", cwd=tmpdir) + print(" codegen + build...") + run("npx graph codegen 2>&1", cwd=tmpdir) + run("npx graph build 2>&1", cwd=tmpdir) + + # Upload the three shared artifacts to IPFS + schema_cid = ipfs_add(SCHEMA) + abi_cid = ipfs_add("[]") + wasm_path = Path(tmpdir, "build", next( + p.name for p in Path(tmpdir, "build").iterdir() if p.is_dir() + )) + wasm_file = next(wasm_path.glob("*.wasm")) + wasm_cid = ipfs_add(wasm_file.read_bytes()) + + print(f" schema={schema_cid} abi={abi_cid} wasm={wasm_cid}") + return schema_cid, abi_cid, wasm_cid + + +def make_manifest(name: str, source_address: str, start_block: int) -> str: + return f"""\ +specVersion: 0.0.4 +schema: + file: ./schema.graphql +dataSources: + - kind: ethereum + name: {name} + network: hardhat + source: + abi: Dummy + address: "{source_address}" + startBlock: {start_block} + mapping: + apiVersion: 0.0.6 + language: wasm/assemblyscript + kind: ethereum/events + entities: + - Block + abis: + - name: Dummy + file: ./abis/Dummy.json + blockHandlers: + - handler: handleBlock + file: ./src/mapping.ts +""" + + +def make_ipfs_manifest( + name: str, source_address: str, start_block: int, + schema_cid: str, abi_cid: str, wasm_cid: str, +) -> str: + """Produce the resolved manifest that graph-node expects from IPFS. + + File references become IPFS links: {/: /ipfs/CID} + """ + return json.dumps({ + "specVersion": "0.0.4", + "schema": {"file": {"/": f"/ipfs/{schema_cid}"}}, + "dataSources": [{ + "kind": "ethereum", + "name": name, + "network": "hardhat", + "source": { + "abi": "Dummy", + "address": source_address, + "startBlock": start_block, + }, + "mapping": { + "apiVersion": "0.0.6", + "language": "wasm/assemblyscript", + "kind": "ethereum/events", + "entities": ["Block"], + "abis": [{"name": "Dummy", "file": {"/": f"/ipfs/{abi_cid}"}}], + "blockHandlers": [{"handler": "handleBlock"}], + "file": {"/": f"/ipfs/{wasm_cid}"}, + }, + }], + }) + + +def get_nonce() -> int: + output = run(f'cast nonce 0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266 --rpc-url "{CHAIN_RPC}"') + return int(output) + + +def publish_to_gns(deployment_hex: str, gns_address: str, nonce: int) -> str: + """Publish to GNS with explicit nonce. Uses --async to avoid timeout.""" + tx_hash = run( + f'cast send "{gns_address}" ' + f'"publishNewSubgraph(bytes32,bytes32,bytes32)" ' + f'"0x{deployment_hex}" ' + f'"0x0000000000000000000000000000000000000000000000000000000000000000" ' + f'"0x0000000000000000000000000000000000000000000000000000000000000000" ' + f'--rpc-url "{CHAIN_RPC}" --async ' + f'--nonce {nonce} ' + f'--mnemonic "{MNEMONIC}"' + ) + return tx_hash + + +def main(): + count = int(sys.argv[1]) if len(sys.argv) > 1 else 1 + prefix = sys.argv[2] if len(sys.argv) > 2 else "test-subgraph" + + source_address = get_contract_address("L2GraphToken.address", "horizon.json") + gns_address = get_contract_address("L2GNS.address", "subgraph-service.json") + + schema_cid, abi_cid, wasm_cid = build_once(source_address) + + print(f"\nPublishing {count} subgraph(s) to GNS: {prefix}-1..{prefix}-{count}\n") + + # Upload unique manifests to IPFS and collect deployment hashes + to_publish = [] + for i in range(count): + idx = i + 1 + name = f"{prefix}-{idx}" + start_block = idx + + manifest_content = make_ipfs_manifest( + name, source_address, start_block, schema_cid, abi_cid, wasm_cid + ) + manifest_cid = ipfs_add(manifest_content) + dep_hex = cid_to_hex(manifest_cid) + to_publish.append((name, manifest_cid, dep_hex)) + print(f" {name} {manifest_cid}") + + # Batch-publish all to GNS with sequential nonces and --async + if to_publish: + print(f"\nPublishing {len(to_publish)} subgraph(s) to GNS...") + nonce = get_nonce() + for name, manifest_cid, dep_hex in to_publish: + publish_to_gns(dep_hex, gns_address, nonce) + nonce += 1 + # Wait for the last tx to confirm + time.sleep(2) + print(" done") + + print(f"\n{len(to_publish)}/{count} subgraph(s) published to GNS.") + print("Not deployed to graph-node, curated, or allocated.") + + +if __name__ == "__main__": + main() diff --git a/scripts/network-status.py b/scripts/network-status.py new file mode 100755 index 0000000..9011e01 --- /dev/null +++ b/scripts/network-status.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +"""Print the local network state as a tree: network > subgraph > indexer.""" + +import json +import sys +from urllib.request import Request, urlopen + +GRAPH_NODE_STATUS = "http://localhost:8030/graphql" +GRAPH_NODE_QUERY = "http://localhost:8000" +NAMED_SUBGRAPHS = ["graph-network", "semiotic/tap", "block-oracle"] + + +def gql(url: str, query: str) -> dict: + req = Request(url, json.dumps({"query": query}).encode(), {"Content-Type": "application/json"}) + with urlopen(req, timeout=5) as resp: + data = json.loads(resp.read()) + if "errors" in data: + raise RuntimeError(f"GraphQL error from {url}: {data['errors']}") + return data["data"] + + +def fetch_indexing_statuses() -> dict: + """deployment_id -> {network, health, latest_block, chain_head}""" + data = gql(GRAPH_NODE_STATUS, """{ + indexingStatuses { + subgraph + health + fatalError { message } + chains { network latestBlock { number } chainHeadBlock { number } } + } + }""") + out = {} + for s in data["indexingStatuses"]: + chain = s["chains"][0] if s["chains"] else {} + out[s["subgraph"]] = { + "network": chain.get("network", "unknown"), + "health": s["health"], + "latest_block": int(chain.get("latestBlock", {}).get("number", 0)), + "chain_head": int(chain.get("chainHeadBlock", {}).get("number", 0)), + "fatal_error": (s.get("fatalError") or {}).get("message"), + } + return out + + +def fetch_subgraph_names() -> dict: + """deployment_id -> name for known named subgraphs.""" + names = {} + for name in NAMED_SUBGRAPHS: + try: + data = gql(f"{GRAPH_NODE_QUERY}/subgraphs/name/{name}", "{ _meta { deployment } }") + dep = data["_meta"]["deployment"] + names[dep] = name + except Exception: + pass + return names + + +def fetch_network_subgraph_id(names: dict) -> str | None: + for dep, name in names.items(): + if name == "graph-network": + return dep + return None + + +def fetch_allocations(ns_id: str) -> list[dict]: + """Fetch indexers and their active allocations from the network subgraph.""" + data = gql(f"{GRAPH_NODE_QUERY}/subgraphs/id/{ns_id}", """{ + indexers(first: 100) { + id + url + stakedTokens + allocations(where: {status: Active}) { + subgraphDeployment { ipfsHash } + allocatedTokens + } + } + }""") + return data["indexers"] + + +def fetch_gns_subgraphs(ns_id: str) -> list[dict]: + """Fetch all subgraphs published to GNS from the network subgraph.""" + all_subgraphs = [] + skip = 0 + while True: + data = gql(f"{GRAPH_NODE_QUERY}/subgraphs/id/{ns_id}", f"""{{ + subgraphs(first: 100, skip: {skip}, orderBy: createdAt) {{ + id + currentVersion {{ + subgraphDeployment {{ ipfsHash }} + }} + }} + }}""") + batch = data["subgraphs"] + all_subgraphs.extend(batch) + if len(batch) < 100: + break + skip += 100 + return all_subgraphs + + +def format_tokens(raw: str) -> str: + grt = int(raw) / 1e18 + if grt >= 1_000_000: + return f"{grt / 1_000_000:.1f}M GRT" + if grt >= 1_000: + return f"{grt / 1_000:.1f}k GRT" + if grt == int(grt): + return f"{int(grt)} GRT" + return f"{grt:.4f} GRT" + + +def health_indicator(status: dict) -> str: + if status["fatal_error"]: + return " FATAL" + if status["health"] == "healthy": + lag = status["chain_head"] - status["latest_block"] + if lag <= 1: + return " synced" + return f" {lag} blocks behind" + return f" {status['health']}" + + +def main(): + statuses = fetch_indexing_statuses() + names = fetch_subgraph_names() + ns_id = fetch_network_subgraph_id(names) + + if not ns_id: + print("network subgraph not found", file=sys.stderr) + return 1 + + indexers = fetch_allocations(ns_id) + gns_subgraphs = fetch_gns_subgraphs(ns_id) + + # All deployment IDs published to GNS + gns_deployments = set() + for sg in gns_subgraphs: + cv = sg.get("currentVersion") + if cv and cv.get("subgraphDeployment"): + gns_deployments.add(cv["subgraphDeployment"]["ipfsHash"]) + + # Build tree: network -> [(deployment, name, status, [(indexer_id, alloc_tokens)])] + tree: dict[str, list] = {} + for idx in indexers: + for alloc in idx["allocations"]: + dep = alloc["subgraphDeployment"]["ipfsHash"] + status = statuses.get(dep, {}) + network = status.get("network", "unknown") + + if network not in tree: + tree[network] = {} + if dep not in tree[network]: + tree[network][dep] = [] + tree[network][dep].append({ + "id": idx["id"], + "url": idx.get("url", ""), + "staked": idx["stakedTokens"], + "allocated": alloc["allocatedTokens"], + }) + + # Print summary + total_indexers = len(indexers) + total_on_gns = len(gns_subgraphs) + total_indexed = len(statuses) + total_networks = len(tree) + print(f"{total_indexers} indexer(s), {total_on_gns} subgraph(s) on GNS, {total_indexed} indexed by graph-node, {total_networks} network(s)\n") + + # Print tree + networks = sorted(tree.keys()) + for ni, network in enumerate(networks): + is_last_network = ni == len(networks) - 1 + print(f"{network}") + + deployments = sorted(tree[network].keys(), key=lambda d: names.get(d, d)) + for di, dep in enumerate(deployments): + is_last_dep = di == len(deployments) - 1 + branch = "\u2514\u2500" if is_last_dep else "\u251c\u2500" + cont = " " if is_last_dep else "\u2502 " + + name = names.get(dep, "") + status = statuses.get(dep, {}) + label = name if name else dep + if name: + label += f" {dep}" + label += health_indicator(status) + + print(f" {branch} {label}") + + idx_list = tree[network][dep] + for ii, idx in enumerate(idx_list): + is_last_idx = ii == len(idx_list) - 1 + idx_branch = "\u2514\u2500" if is_last_idx else "\u251c\u2500" + addr = idx["id"] + alloc = format_tokens(idx["allocated"]) + print(f" {cont} {idx_branch} {addr} {alloc}") + + if not is_last_network: + print() + + # Unallocated subgraphs (indexed by graph-node but no active allocation) + allocated_deps = {dep for net in tree.values() for dep in net} + unallocated = [dep for dep in statuses if dep not in allocated_deps] + if unallocated: + print(f"\nunallocated (indexed but no active allocation)") + for i, dep in enumerate(unallocated): + is_last = i == len(unallocated) - 1 + branch = "\u2514\u2500" if is_last else "\u251c\u2500" + name = names.get(dep, "") + status = statuses[dep] + network = status.get("network", "unknown") + label = name if name else dep + if name: + label += f" {dep}" + label += f" ({network}){health_indicator(status)}" + print(f" {branch} {label}") + + # GNS-only subgraphs (published on-chain but not deployed to graph-node) + gns_only = sorted(gns_deployments - set(statuses.keys())) + if gns_only: + print(f"\nGNS-only ({len(gns_only)} published on-chain, not indexed)") + for i, dep in enumerate(gns_only): + is_last = i == len(gns_only) - 1 + branch = "\u2514\u2500" if is_last else "\u251c\u2500" + print(f" {branch} {dep}") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 661b6ca6b0ba47ba350646e8db304bf37231ef1c Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Tue, 10 Mar 2026 16:21:18 -0500 Subject: [PATCH 02/10] feat: multi-indexer support with generator and reliability hardening Parameterize run scripts (indexer-agent, indexer-service, tap-agent, graph-node) with env var overrides so the same scripts work for both the primary indexer and N extras. Add gen-extra-indexers.py which produces a compose override file with per-indexer postgres, graph-node, agent, service, and tap-agent stacks. Protocol subgraph reads go to the primary graph-node via PROTOCOL_GRAPH_NODE_HOST. Harden startup reliability for concurrent multi-indexer launches: - Replace nodemon with a retry loop (nodemon hangs forever on crash) - Serialize yarn install and cargo build via flock across shared mounts - Add wait_for_rpc readiness check with curl fallback for non-foundry containers - Use unless-stopped restart policy and retry_cast wrapper in registration - Tune healthchecks with start_period for graph-node and agent containers Co-Authored-By: Claude Opus 4.6 --- .claude/skills/add-indexers/SKILL.md | 92 +++++ .environment | 4 +- .gitignore | 3 + TESTING-STATUS.md | 3 +- containers/indexer/graph-node/run.sh | 5 +- .../indexer/indexer-agent/dev/run-dips.sh | 80 ++-- .../indexer/indexer-service/dev/run-dips.sh | 31 +- containers/query-payments/tap-agent/run.sh | 24 +- containers/shared/lib.sh | 32 ++ scripts/gen-extra-indexers.py | 369 ++++++++++++++++++ 10 files changed, 595 insertions(+), 48 deletions(-) create mode 100644 .claude/skills/add-indexers/SKILL.md create mode 100755 scripts/gen-extra-indexers.py diff --git a/.claude/skills/add-indexers/SKILL.md b/.claude/skills/add-indexers/SKILL.md new file mode 100644 index 0000000..bfc3dd6 --- /dev/null +++ b/.claude/skills/add-indexers/SKILL.md @@ -0,0 +1,92 @@ +--- +name: add-indexers +description: "Add extra indexers to the local Graph protocol network. Use when the user asks to add indexers, spin up another indexer, get more indexers up, bring up new indexers, or wants extra indexers for testing. Also trigger when user says a number followed by 'indexers' (e.g. 'add 3 indexers', 'spin up 2 more')." +argument-hint: "[count]" +allowed-tools: + - Bash + - Read + - Grep +--- + +# Add Extra Indexers + +Add N extra indexers to the running local network. Each extra indexer gets a fully isolated stack: postgres, graph-node, indexer-agent, indexer-service, and tap-agent. Protocol subgraphs (network, epoch, TAP) are read from the primary graph-node -- extra graph-nodes only handle actual indexing work. + +The argument is the number of NEW indexers to add (defaults to 1). + +## Accounts + +Extra indexers use hardhat "junk" mnemonic accounts starting at index 2. Maximum 18 extra (indices 2-19). + +| Suffix | Mnemonic Index | Address | +|--------|---------------|---------| +| 2 | 2 | 0x3C44CdDdB6a900fa2b585dd299e03d12FA4293BC | +| 3 | 3 | 0x90F79bf6EB2c4f870365E785982E1f101E93b906 | +| 4 | 4 | 0x15d34AAf54267DB7D7c367839AAf71A00a2C6A65 | +| 5 | 5 | 0x9965507D1a55bcC2695C58ba16FB37d819B0A4dc | + +## Steps + +### 1. Determine current extra indexer count + +```bash +docker ps --format '{{.Names}}' | grep -oP 'indexer-agent-\K\d+' | sort -n | tail -1 +``` + +If no matches, current extra count is 0. Otherwise the highest suffix minus 1 gives the count (suffix 2 = 1 extra, suffix 3 = 2 extras, etc.). + +### 2. Calculate new total + +New total = current extra count + number requested by user. + +Cap at 18. If the user asks for more than available slots, warn and cap. + +### 3. Regenerate compose file + +```bash +python3 scripts/gen-extra-indexers.py +``` + +This regenerates the full compose file for ALL extras (existing + new). It's idempotent -- running it with the same number produces the same file. + +### 4. Bring up new containers + +Only start the NEW containers plus the shared init. For each new suffix N, combine all into a single `up -d` command: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose \ + -f docker-compose.yaml \ + -f compose/dev/dips.yaml \ + -f compose/extra-indexers.yaml \ + up -d postgres-N graph-node-N indexer-agent-N indexer-service-N tap-agent-N start-indexing-extra +``` + +Do NOT use `--force-recreate` -- it bounces shared services (chain, postgres) causing DNS failures in already-running containers. + +### 5. Verify health + +Indexer-services share a `flock`-serialized cargo build, so they come up sequentially. The first service to start builds the binary (~2-3 minutes if not cached); subsequent services acquire the lock, find the binary already built, and start immediately. + +Wait 30 seconds after `up -d` completes, then check status: + +```bash +docker ps --format '{{.Names}}\t{{.Status}}' | grep -E '(indexer-agent|indexer-service|tap-agent)-[0-9]' | sort +``` + +All agents and services should show `(healthy)`. If a service is still `(health: starting)`, it may be waiting for the cargo build lock -- wait another 60 seconds and recheck. + +If an agent is stuck retrying (check `docker logs indexer-agent-N 2>&1 | tail -5`), the retry loop will show attempt counts. Common causes: `start-indexing-extra` hasn't completed yet (check `docker logs start-indexing-extra`), or a wrong address in JUNK_ACCOUNTS. + +### 6. Report + +Show a summary of all running indexers (primary + extras) with their container names, addresses, and health status. + +## Constraints + +- Always prefix docker compose with `DOCKER_DEFAULT_PLATFORM=` +- Always use all three compose files: `-f docker-compose.yaml -f compose/dev/dips.yaml -f compose/extra-indexers.yaml` +- Never use `--force-recreate` when adding indexers to a running stack +- The generator script is at `scripts/gen-extra-indexers.py` +- The `start-indexing-extra` container handles on-chain GRT staking and operator authorization +- Agents retry automatically (30 attempts, 10s delay) -- don't manually restart unless the error is persistent and non-transient +- If COMPOSE_FILE in .environment doesn't include `compose/extra-indexers.yaml`, warn the user to add it diff --git a/.environment b/.environment index 2b49bc2..53a092a 100644 --- a/.environment +++ b/.environment @@ -28,7 +28,9 @@ COMPOSE_PROFILES=block-oracle,explorer,indexing-payments # All components built from local checkouts - no stubs or GHCR images # contracts repo must be on escrow-management branch with `pnpm install && pnpm build` done -COMPOSE_FILE=docker-compose.yaml:compose/dev/dips.yaml +# Extra indexers: python3 scripts/gen-extra-indexers.py N +# Then add :compose/extra-indexers.yaml to COMPOSE_FILE below +COMPOSE_FILE=docker-compose.yaml:compose/dev/dips.yaml:compose/extra-indexers.yaml # Local source directories (mounted into containers, built from source) CONTRACTS_SOURCE_ROOT=/Users/samuel/Documents/github/contracts diff --git a/.gitignore b/.gitignore index 6cb9add..7e8d842 100644 --- a/.gitignore +++ b/.gitignore @@ -21,5 +21,8 @@ Thumbs.db # Rust build artifacts tests/target/ +# Generated compose overrides +compose/extra-indexers.yaml + # Legacy local config directory (now uses config-local Docker volume) config/local/ diff --git a/TESTING-STATUS.md b/TESTING-STATUS.md index 15241b9..345dfaf 100644 --- a/TESTING-STATUS.md +++ b/TESTING-STATUS.md @@ -19,7 +19,7 @@ TAP subgraph correctly points at Horizon PaymentsEscrow, signer authorization ev ### 3. Indexer-service rejection paths -Five of the eight rejection paths have been tested end-to-end. +Five of the eight rejection paths have been tested end-to-end: **PriceTooLow**: Temporarily set `min_grt_per_30_days["hardhat"] = "999999"` in indexer-service config. Dipper's pricing (`174000000000000` wei/s, ~450 GRT/30d) fell below the inflated minimum. Indexer-service rejected with `PRICE_TOO_LOW`, dipper recorded it correctly. The indexer enters a 1-day lookback exclusion for that deployment. @@ -88,6 +88,7 @@ Dipper was killed (`docker kill`) after processing a request and restarted. All The pipeline completes so fast (<6ms from request registration to indexer acceptance) that simulating a crash between request registration and IISA candidate selection is impractical in local-network. If dipper crashes mid-pipeline, the request sits in `OPEN` with no agreements. There is no explicit recovery for in-flight jobs -- the request would need manual reassessment or a new request. Untested scenarios that depend on indexer-agent changes: + - Indexer-agent restarts mid-reconciliation while processing pending proposals (blocked on PR #1174) - Indexer-service accepts a proposal but crashes before writing to `pending_rca_proposals` (out-of-sync risk between dipper and indexer) diff --git a/containers/indexer/graph-node/run.sh b/containers/indexer/graph-node/run.sh index a63f0ca..8c258e1 100755 --- a/containers/indexer/graph-node/run.sh +++ b/containers/indexer/graph-node/run.sh @@ -2,6 +2,9 @@ set -eu . /opt/config/.env +# Allow env var overrides for multi-indexer support +POSTGRES_HOST="${POSTGRES_HOST:-postgres}" + # graph-node has issues if there isn't at least one block on the chain curl -sf "http://chain:${CHAIN_RPC_PORT}" \ -H 'content-type: application/json' \ @@ -11,5 +14,5 @@ export ETHEREUM_RPC="hardhat:http://chain:${CHAIN_RPC_PORT}/" export GRAPH_ALLOW_NON_DETERMINISTIC_FULLTEXT_SEARCH="true" unset GRAPH_NODE_CONFIG export IPFS="http://ipfs:${IPFS_RPC_PORT}" -export POSTGRES_URL="postgresql://postgres:@postgres:${POSTGRES_PORT}/graph_node_1" +export POSTGRES_URL="postgresql://postgres:@${POSTGRES_HOST}:${POSTGRES_PORT}/graph_node_1" graph-node diff --git a/containers/indexer/indexer-agent/dev/run-dips.sh b/containers/indexer/indexer-agent/dev/run-dips.sh index b6ff3af..9c98705 100755 --- a/containers/indexer/indexer-agent/dev/run-dips.sh +++ b/containers/indexer/indexer-agent/dev/run-dips.sh @@ -4,46 +4,52 @@ set -xeu . /opt/shared/lib.sh -token_address=$(contract_addr L2GraphToken.address horizon) +# Allow env var overrides for multi-indexer support +INDEXER_ADDRESS="${INDEXER_ADDRESS:-$RECEIVER_ADDRESS}" +INDEXER_SECRET="${INDEXER_SECRET:-$RECEIVER_SECRET}" +INDEXER_OPERATOR_MNEMONIC="${INDEXER_OPERATOR_MNEMONIC:-$INDEXER_MNEMONIC}" +INDEXER_DB_NAME="${INDEXER_DB_NAME:-indexer_components_1}" +INDEXER_SVC_HOST="${INDEXER_SVC_HOST:-indexer-service}" +GRAPH_NODE_HOST="${GRAPH_NODE_HOST:-graph-node}" +PROTOCOL_GRAPH_NODE_HOST="${PROTOCOL_GRAPH_NODE_HOST:-graph-node}" +POSTGRES_HOST="${POSTGRES_HOST:-postgres}" + +wait_for_rpc + +# Verify this indexer is staked (registration handled by start-indexing or start-indexing-extra) staking_address=$(contract_addr HorizonStaking.address horizon) indexer_stake="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ - "${staking_address}" 'getStake(address) (uint256)' "${RECEIVER_ADDRESS}")" + "${staking_address}" 'getStake(address) (uint256)' "${INDEXER_ADDRESS}")" echo "indexer_stake=${indexer_stake}" if [ "${indexer_stake}" = "0" ]; then - cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--mnemonic=${MNEMONIC}" \ - --value=1ether "${RECEIVER_ADDRESS}" - cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--mnemonic=${MNEMONIC}" \ - "${token_address}" 'transfer(address,uint256)' "${RECEIVER_ADDRESS}" '100000000000000000000000' - cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--private-key=${RECEIVER_SECRET}" \ - "${token_address}" 'approve(address,uint256)' "${staking_address}" '100000000000000000000000' - cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--private-key=${RECEIVER_SECRET}" \ - "${staking_address}" 'stake(uint256)' '100000000000000000000000' + echo "ERROR: Indexer ${INDEXER_ADDRESS} has no stake. Run start-indexing-extra first." + exit 1 fi export INDEXER_AGENT_HORIZON_ADDRESS_BOOK=/opt/config/horizon.json export INDEXER_AGENT_SUBGRAPH_SERVICE_ADDRESS_BOOK=/opt/config/subgraph-service.json export INDEXER_AGENT_TAP_ADDRESS_BOOK=/opt/config/tap-contracts.json -export INDEXER_AGENT_EPOCH_SUBGRAPH_ENDPOINT="http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/block-oracle" +export INDEXER_AGENT_EPOCH_SUBGRAPH_ENDPOINT="http://${PROTOCOL_GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/block-oracle" export INDEXER_AGENT_GATEWAY_ENDPOINT="http://gateway:${GATEWAY_PORT}" -export INDEXER_AGENT_GRAPH_NODE_QUERY_ENDPOINT="http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}" -export INDEXER_AGENT_GRAPH_NODE_ADMIN_ENDPOINT="http://graph-node:${GRAPH_NODE_ADMIN_PORT}" -export INDEXER_AGENT_GRAPH_NODE_STATUS_ENDPOINT="http://graph-node:${GRAPH_NODE_STATUS_PORT}/graphql" +export INDEXER_AGENT_GRAPH_NODE_QUERY_ENDPOINT="http://${GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}" +export INDEXER_AGENT_GRAPH_NODE_ADMIN_ENDPOINT="http://${GRAPH_NODE_HOST}:${GRAPH_NODE_ADMIN_PORT}" +export INDEXER_AGENT_GRAPH_NODE_STATUS_ENDPOINT="http://${GRAPH_NODE_HOST}:${GRAPH_NODE_STATUS_PORT}/graphql" export INDEXER_AGENT_IPFS_ENDPOINT="http://ipfs:${IPFS_RPC_PORT}" -export INDEXER_AGENT_INDEXER_ADDRESS="${RECEIVER_ADDRESS}" +export INDEXER_AGENT_INDEXER_ADDRESS="${INDEXER_ADDRESS}" export INDEXER_AGENT_INDEXER_MANAGEMENT_PORT="${INDEXER_MANAGEMENT_PORT}" export INDEXER_AGENT_INDEX_NODE_IDS=default export INDEXER_AGENT_INDEXER_GEO_COORDINATES="1 1" export INDEXER_AGENT_VOUCHER_REDEMPTION_THRESHOLD=0.01 -export INDEXER_AGENT_NETWORK_SUBGRAPH_ENDPOINT="http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network" +export INDEXER_AGENT_NETWORK_SUBGRAPH_ENDPOINT="http://${PROTOCOL_GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network" export INDEXER_AGENT_NETWORK_PROVIDER="http://chain:${CHAIN_RPC_PORT}" -export INDEXER_AGENT_MNEMONIC="${INDEXER_MNEMONIC}" -export INDEXER_AGENT_POSTGRES_DATABASE=indexer_components_1 -export INDEXER_AGENT_POSTGRES_HOST=postgres +export INDEXER_AGENT_MNEMONIC="${INDEXER_OPERATOR_MNEMONIC}" +export INDEXER_AGENT_POSTGRES_DATABASE="${INDEXER_DB_NAME}" +export INDEXER_AGENT_POSTGRES_HOST="${POSTGRES_HOST}" export INDEXER_AGENT_POSTGRES_PORT="${POSTGRES_PORT}" export INDEXER_AGENT_POSTGRES_USERNAME=postgres export INDEXER_AGENT_POSTGRES_PASSWORD= -export INDEXER_AGENT_PUBLIC_INDEXER_URL="http://indexer-service:${INDEXER_SERVICE_PORT}" -export INDEXER_AGENT_TAP_SUBGRAPH_ENDPOINT="http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/semiotic/tap" +export INDEXER_AGENT_PUBLIC_INDEXER_URL="http://${INDEXER_SVC_HOST}:${INDEXER_SERVICE_PORT}" +export INDEXER_AGENT_TAP_SUBGRAPH_ENDPOINT="http://${PROTOCOL_GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/semiotic/tap" export INDEXER_AGENT_MAX_PROVISION_INITIAL_SIZE=200000 export INDEXER_AGENT_CONFIRMATION_BLOCKS=1 export INDEXER_AGENT_LOG_LEVEL=trace @@ -55,7 +61,13 @@ export INDEXER_AGENT_DIPPER_ENDPOINT="http://dipper:${DIPPER_INDEXER_RPC_PORT}" export INDEXER_AGENT_DIPS_ALLOCATION_AMOUNT=1 cd /opt/indexer-agent-source-root -yarn install --frozen-lockfile +( + flock -x 200 + if [ ! -f node_modules/.yarn-install-stamp ] || [ yarn.lock -nt node_modules/.yarn-install-stamp ]; then + yarn install --frozen-lockfile + touch node_modules/.yarn-install-stamp + fi +) 200>/opt/indexer-agent-source-root/.yarn-install.lock mkdir -p ./config/ cat >./config/config.yaml <<-EOF networkIdentifier: "hardhat" @@ -79,11 +91,19 @@ dipsEpochsMargin: 1 EOF cat config/config.yaml -nodemon --watch . \ ---ext ts \ ---legacy-watch \ ---delay 4 \ ---verbose \ ---exec " -NODE_OPTIONS=\"--inspect=0.0.0.0:9230\" -tsx packages/indexer-agent/src/index.ts start" +MAX_RETRIES=30 +RETRY_DELAY=10 +attempt=0 +while [ $attempt -lt $MAX_RETRIES ]; do + attempt=$((attempt + 1)) + echo "=== Starting indexer-agent (attempt $attempt/$MAX_RETRIES) ===" + NODE_OPTIONS="--inspect=0.0.0.0:9230" \ + tsx packages/indexer-agent/src/index.ts start && break + echo "Agent exited with code $?, retrying in ${RETRY_DELAY}s..." + sleep $RETRY_DELAY +done + +if [ $attempt -ge $MAX_RETRIES ]; then + echo "Agent failed after $MAX_RETRIES attempts" + exit 1 +fi diff --git a/containers/indexer/indexer-service/dev/run-dips.sh b/containers/indexer/indexer-service/dev/run-dips.sh index be043fb..0e888fb 100755 --- a/containers/indexer/indexer-service/dev/run-dips.sh +++ b/containers/indexer/indexer-service/dev/run-dips.sh @@ -4,6 +4,16 @@ set -eu . /opt/config/.env . /opt/shared/lib.sh +# Allow env var overrides for multi-indexer support +INDEXER_ADDRESS="${INDEXER_ADDRESS:-$RECEIVER_ADDRESS}" +INDEXER_OPERATOR_MNEMONIC="${INDEXER_OPERATOR_MNEMONIC:-$INDEXER_MNEMONIC}" +INDEXER_DB_NAME="${INDEXER_DB_NAME:-indexer_components_1}" +GRAPH_NODE_HOST="${GRAPH_NODE_HOST:-graph-node}" +PROTOCOL_GRAPH_NODE_HOST="${PROTOCOL_GRAPH_NODE_HOST:-graph-node}" +POSTGRES_HOST="${POSTGRES_HOST:-postgres}" + +wait_for_rpc + tap_verifier=$(contract_addr TAPVerifier tap-contracts) graph_tally_verifier=$(contract_addr GraphTallyCollector.address horizon) subgraph_service=$(contract_addr SubgraphService.address subgraph-service) @@ -19,23 +29,23 @@ fi cat >/opt/config.toml <<-EOF [indexer] -indexer_address = "${RECEIVER_ADDRESS}" -operator_mnemonic = "${INDEXER_MNEMONIC}" +indexer_address = "${INDEXER_ADDRESS}" +operator_mnemonic = "${INDEXER_OPERATOR_MNEMONIC}" [database] -postgres_url = "postgresql://postgres@postgres:${POSTGRES_PORT}/indexer_components_1" +postgres_url = "postgresql://postgres@${POSTGRES_HOST}:${POSTGRES_PORT}/${INDEXER_DB_NAME}" [graph_node] -query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}" -status_url = "http://graph-node:${GRAPH_NODE_STATUS_PORT}/graphql" +query_url = "http://${GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}" +status_url = "http://${GRAPH_NODE_HOST}:${GRAPH_NODE_STATUS_PORT}/graphql" [subgraphs.network] -query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network" +query_url = "http://${PROTOCOL_GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network" recently_closed_allocation_buffer_secs = 60 syncing_interval_secs = 30 [subgraphs.escrow] -query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/semiotic/tap" +query_url = "http://${PROTOCOL_GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/semiotic/tap" syncing_interval_secs = 30 [blockchain] @@ -84,5 +94,10 @@ fi cat /opt/config.toml cd /opt/source -cargo build --bin indexer-service-rs +( + flock -x 200 + if [ ! -f ./target/debug/indexer-service-rs ]; then + cargo build --bin indexer-service-rs + fi +) 200>/opt/source/.cargo-build.lock exec ./target/debug/indexer-service-rs --config=/opt/config.toml diff --git a/containers/query-payments/tap-agent/run.sh b/containers/query-payments/tap-agent/run.sh index e783680..a916350 100755 --- a/containers/query-payments/tap-agent/run.sh +++ b/containers/query-payments/tap-agent/run.sh @@ -4,6 +4,16 @@ set -eu . /opt/shared/lib.sh +# Allow env var overrides for multi-indexer support +INDEXER_ADDRESS="${INDEXER_ADDRESS:-$RECEIVER_ADDRESS}" +INDEXER_OPERATOR_MNEMONIC="${INDEXER_OPERATOR_MNEMONIC:-$INDEXER_MNEMONIC}" +INDEXER_DB_NAME="${INDEXER_DB_NAME:-indexer_components_1}" +GRAPH_NODE_HOST="${GRAPH_NODE_HOST:-graph-node}" +PROTOCOL_GRAPH_NODE_HOST="${PROTOCOL_GRAPH_NODE_HOST:-graph-node}" +POSTGRES_HOST="${POSTGRES_HOST:-postgres}" + +wait_for_rpc + cd /opt tap_verifier=$(contract_addr TAPVerifier tap-contracts) graph_tally_verifier=$(contract_addr GraphTallyCollector.address horizon) @@ -15,23 +25,23 @@ EOF cat >config.toml <<-EOF [indexer] -indexer_address = "${RECEIVER_ADDRESS}" -operator_mnemonic = "${INDEXER_MNEMONIC}" +indexer_address = "${INDEXER_ADDRESS}" +operator_mnemonic = "${INDEXER_OPERATOR_MNEMONIC}" [database] -postgres_url = "postgresql://postgres@postgres:${POSTGRES_PORT}/indexer_components_1" +postgres_url = "postgresql://postgres@${POSTGRES_HOST}:${POSTGRES_PORT}/${INDEXER_DB_NAME}" [graph_node] -query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}" -status_url = "http://graph-node:${GRAPH_NODE_STATUS_PORT}/graphql" +query_url = "http://${GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}" +status_url = "http://${GRAPH_NODE_HOST}:${GRAPH_NODE_STATUS_PORT}/graphql" [subgraphs.network] -query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network" +query_url = "http://${PROTOCOL_GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/graph-network" recently_closed_allocation_buffer_secs = 60 syncing_interval_secs = 30 [subgraphs.escrow] -query_url = "http://graph-node:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/semiotic/tap" +query_url = "http://${PROTOCOL_GRAPH_NODE_HOST}:${GRAPH_NODE_GRAPHQL_PORT}/subgraphs/name/semiotic/tap" syncing_interval_secs = 30 [blockchain] diff --git a/containers/shared/lib.sh b/containers/shared/lib.sh index c17694e..2b6ec41 100644 --- a/containers/shared/lib.sh +++ b/containers/shared/lib.sh @@ -34,3 +34,35 @@ wait_for_gql() { echo "Error: timed out waiting for $_url after ${_timeout}s" >&2 exit 1 } + +wait_for_rpc() { + echo "Waiting for chain RPC at http://chain:${CHAIN_RPC_PORT}..." + if command -v cast > /dev/null 2>&1; then + until cast block-number --rpc-url="http://chain:${CHAIN_RPC_PORT}" > /dev/null 2>&1; do + sleep 2 + done + else + until curl -sf "http://chain:${CHAIN_RPC_PORT}" -X POST \ + -H 'content-type: application/json' \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' > /dev/null 2>&1; do + sleep 2 + done + fi + echo "Chain RPC available" +} + +retry_cmd() { + _rc_max="${1}"; shift + _rc_delay="${1}"; shift + _rc_attempt=0 + while [ "$_rc_attempt" -lt "$_rc_max" ]; do + _rc_attempt=$((_rc_attempt + 1)) + if "$@"; then + return 0 + fi + echo "Attempt $_rc_attempt/$_rc_max failed, retrying in ${_rc_delay}s..." + sleep "$_rc_delay" + done + echo "Command failed after $_rc_max attempts: $*" + return 1 +} diff --git a/scripts/gen-extra-indexers.py b/scripts/gen-extra-indexers.py new file mode 100755 index 0000000..7ad9ca6 --- /dev/null +++ b/scripts/gen-extra-indexers.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +"""Generate a compose override file with N extra indexer stacks. + +Each extra indexer gets its own postgres, graph-node, indexer-agent, +indexer-service, and tap-agent. Protocol subgraphs (network, epoch, TAP) +are read from the primary graph-node -- extra graph-nodes only handle +actual indexing work. On-chain registration (GRT stake, operator auth) +is handled by a shared init container. + +Shared across all indexers: chain (hardhat), ipfs, gateway, dipper, iisa, +redpanda, contract addresses, protocol subgraphs (on primary graph-node). + +Accounts come from the "junk" mnemonic starting at index 2 (indices 0-1 are +ACCOUNT0/ACCOUNT1). Hardhat pre-funds these with 10,000 ETH. + +Usage: + python3 scripts/gen-extra-indexers.py 3 # generate 3 extra indexers + python3 scripts/gen-extra-indexers.py 0 # remove the file +""" + +import sys +from pathlib import Path + +# Hardhat "junk" mnemonic accounts starting at index 2. +# Deterministic and pre-funded with 10,000 ETH by Hardhat. +JUNK_ACCOUNTS = [ + ("0x3C44CdDdB6a900fa2b585dd299e03d12FA4293BC", "0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a"), + ("0x90F79bf6EB2c4f870365E785982E1f101E93b906", "0x7c852118294e51e653712a81e05800f419141751be58f605c371e15141b007a6"), + ("0x15d34AAf54267DB7D7c367839AAf71A00a2C6A65", "0x47e179ec197488593b187f80a00eb0da91f1b9d0b13f8733639f19c30a34926a"), + ("0x9965507D1a55bcC2695C58ba16FB37d819B0A4dc", "0x8b3a350cf5c34c9194ca85829a2df0ec3153be0318b5e2d3348e872092edffba"), + ("0x976EA74026E726554dB657fA54763abd0C3a0aa9", "0x92db14e403b83dfe3df233f83dfa3a0d7096f21ca9b0d6d6b8d88b2b4ec1564e"), + ("0x14dC79964da2C08b23698B3D3cc7Ca32193d9955", "0x4bbbf85ce3377467afe5d46f804f221813b2bb87f24d81f60f1fcdbf7cbf4356"), + ("0x23618e81E3f5cdF7f54C3d65f7FBc0aBf5B21E8f", "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97"), + ("0xa0Ee7A142d267C1f36714E4a8F75612F20a79720", "0x2a871d0798f97d79848a013d4936a73bf4cc922c825d33c1cf7073dff6d409c6"), + ("0xBcd4042DE499D14e55001CcbB24a551F3b954096", "0xf214f2b2cd398c806f84e317254e0f0b801d0643303237d97a22a48e01628897"), + ("0x71bE63f3384f5fb98995898A86B02Fb2426c5788", "0x701b615bbdfb9de65240bc28bd21bbc0d996645a3dd57e7b12bc2bdf6f192c82"), + ("0xFABB0ac9d68B0B445fB7357272Ff202C5651694a", "0xa267530f49f8280200edf313ee7af6b827f2a8bce2897751d06a843f644967b1"), + ("0x1CBd3b2770909D4e10f157cABC84C7264073C9Ec", "0x47c99abed3324a2707c28affff1267e45918ec8c3f20b8aa892e8b065d2942dd"), + ("0xdF3e18d64BC6A983f673Ab319CCaE4f1a57C7097", "0xc526ee95bf44d8fc405a158bb884d9d1238d99f0612e9f33d006bb0789009aaa"), + ("0xcd3B766CCDd6AE721141F452C550Ca635964ce71", "0x8166f546bab6da521a8369cab06c5d2b9e46670292d85c875ee9ec20e84ffb61"), + ("0x2546BcD3c84621e976D8185a91A922aE77ECEc30", "0xea6c44ac03bff858b476bba40716402b03e41b8e97e276d1baec7c37d42484a0"), + ("0xbDA5747bFD65F08deb54cb465eB87D40e51B197E", "0x689af8efa8c651a91ad287602527f3af2fe9f6501a7ac4b061667b5a93e037fd"), + ("0xdD2FD4581271e230360230F9337D5c0430Bf44C0", "0xde9be858da4a475276426320d5e9262ecfc3ba460bfac56360bfa6c4c28b4ee0"), + ("0x8626f6940E2eb28930eFb4CeF49B2d1F2C9C1199", "0xdf57089febbacf7ba0bc227dafbffa9fc08a93fdc68e1e42411a14efcf23656e"), +] + +MAX_EXTRA = len(JUNK_ACCOUNTS) # 18 +JUNK_MNEMONIC = "test test test test test test test test test test test junk" + +OUTPUT_FILE = Path(__file__).resolve().parent.parent / "compose" / "extra-indexers.yaml" + + +def postgres_service(n: int) -> str: + return f"""\ + postgres-{n}: + container_name: postgres-{n} + image: postgres:17-alpine + command: postgres -c 'max_connections=1000' -c 'shared_preload_libraries=pg_stat_statements' + volumes: + - postgres-{n}-data:/var/lib/postgresql/data + - ./containers/core/postgres/setup.sql:/docker-entrypoint-initdb.d/setup.sql:ro + environment: + POSTGRES_INITDB_ARGS: "--encoding UTF8 --locale=C" + POSTGRES_HOST_AUTH_METHOD: trust + POSTGRES_USER: postgres + healthcheck: + interval: 1s + retries: 20 + test: pg_isready -U postgres + restart: unless-stopped +""" + + +def graph_node_service(n: int) -> str: + return f"""\ + graph-node-{n}: + container_name: graph-node-{n} + build: + context: containers/indexer/graph-node + args: + GRAPH_NODE_VERSION: ${{GRAPH_NODE_VERSION}} + depends_on: + chain: + condition: service_healthy + ipfs: + condition: service_healthy + postgres-{n}: + condition: service_healthy + stop_signal: SIGKILL + volumes: + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + environment: + POSTGRES_HOST: "postgres-{n}" + healthcheck: + interval: 2s + retries: 60 + start_period: 10s + test: curl -f http://127.0.0.1:8030 + restart: unless-stopped +""" + + +def agent_service(n: int, address: str, secret: str) -> str: + return f"""\ + indexer-agent-{n}: + container_name: indexer-agent-{n} + platform: linux/arm64 + build: + target: "wrapper" + dockerfile_inline: | + FROM node:22-slim AS wrapper + RUN apt-get update \\ + && apt-get install -y --no-install-recommends \\ + build-essential curl git jq python3 \\ + && rm -rf /var/lib/apt/lists/* + COPY --from=ghcr.io/foundry-rs/foundry:v1.0.0 \\ + /usr/local/bin/forge /usr/local/bin/cast /usr/local/bin/anvil /usr/local/bin/chisel /usr/local/bin/ + RUN npm install -g tsx nodemon + entrypoint: ["bash", "/opt/run-dips.sh"] + depends_on: + start-indexing-extra: + condition: service_completed_successfully + graph-node-{n}: + condition: service_healthy + ports: + - "{7600 + n * 10}:7600" + stop_signal: SIGKILL + volumes: + - ${{INDEXER_AGENT_SOURCE_ROOT:?Set INDEXER_AGENT_SOURCE_ROOT}}:/opt/indexer-agent-source-root + - ./containers/indexer/indexer-agent/dev/run-dips.sh:/opt/run-dips.sh:ro + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + environment: + INDEXER_ADDRESS: "{address}" + INDEXER_SECRET: "{secret}" + INDEXER_OPERATOR_MNEMONIC: "{JUNK_MNEMONIC}" + INDEXER_DB_NAME: "indexer_components_1" + INDEXER_SVC_HOST: "indexer-service-{n}" + GRAPH_NODE_HOST: "graph-node-{n}" + PROTOCOL_GRAPH_NODE_HOST: "graph-node" + POSTGRES_HOST: "postgres-{n}" + INDEXER_MANAGEMENT_PORT: "7600" + healthcheck: + interval: 10s + retries: 600 + start_period: 30s + test: curl -f http://127.0.0.1:7600/ + restart: unless-stopped +""" + + +def service_service(n: int, address: str, secret: str) -> str: + return f"""\ + indexer-service-{n}: + container_name: indexer-service-{n} + cap_add: + - NET_ADMIN + platform: linux/arm64 + build: + target: "wrapper" + dockerfile_inline: | + FROM rust:1-slim-bookworm AS wrapper + RUN apt-get update \\ + && apt-get install -y --no-install-recommends \\ + build-essential curl git jq pkg-config \\ + protobuf-compiler libssl-dev libsasl2-dev \\ + && rm -rf /var/lib/apt/lists/* + entrypoint: ["bash", "/opt/run-dips.sh"] + depends_on: + indexer-agent-{n}: + condition: service_healthy + ports: + - "{7601 + n * 10}:7601" + - "{7602 + n * 10}:7602" + stop_signal: SIGKILL + volumes: + - ${{INDEXER_SERVICE_SOURCE_ROOT:?Set INDEXER_SERVICE_SOURCE_ROOT}}:/opt/source + - ./containers/indexer/indexer-service/dev/run-dips.sh:/opt/run-dips.sh:ro + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + environment: + INDEXER_ADDRESS: "{address}" + INDEXER_SECRET: "{secret}" + INDEXER_OPERATOR_MNEMONIC: "{JUNK_MNEMONIC}" + INDEXER_DB_NAME: "indexer_components_1" + GRAPH_NODE_HOST: "graph-node-{n}" + PROTOCOL_GRAPH_NODE_HOST: "graph-node" + POSTGRES_HOST: "postgres-{n}" + RUST_LOG: info,indexer_service_rs=info,indexer_monitor=warn,indexer_dips=debug + RUST_BACKTRACE: "1" + SQLX_OFFLINE: "true" + healthcheck: + interval: 10s + retries: 600 + test: curl -f http://127.0.0.1:7601/ + restart: unless-stopped +""" + + +def tap_service(n: int, address: str, secret: str) -> str: + return f"""\ + tap-agent-{n}: + container_name: tap-agent-{n} + build: + context: containers/query-payments/tap-agent + args: + INDEXER_TAP_AGENT_VERSION: ${{INDEXER_TAP_AGENT_VERSION}} + depends_on: + indexer-agent-{n}: + condition: service_healthy + stop_signal: SIGKILL + volumes: + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + environment: + INDEXER_ADDRESS: "{address}" + INDEXER_SECRET: "{secret}" + INDEXER_OPERATOR_MNEMONIC: "{JUNK_MNEMONIC}" + INDEXER_DB_NAME: "indexer_components_1" + GRAPH_NODE_HOST: "graph-node-{n}" + PROTOCOL_GRAPH_NODE_HOST: "graph-node" + POSTGRES_HOST: "postgres-{n}" + RUST_LOG: info,indexer_tap_agent=trace + RUST_BACKTRACE: "1" + restart: unless-stopped +""" + + +def registration_block(n: int, address: str, secret: str) -> str: + return f"""\ + # --- Extra indexer {n}: {address} --- + ADDR="{address}" + KEY="{secret}" + STAKE=$$(cast call --rpc-url="$$RPC" "$$STAKING" 'getStake(address)(uint256)' "$$ADDR") + if [ "$$STAKE" != "0" ]; then + echo "Extra indexer {n} ($$ADDR) already staked ($$STAKE) -- skipping" + else + echo "Registering extra indexer {n}: {address}" + + # Transfer ETH + GRT from deployer + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --mnemonic="$$MNEMONIC" \\ + --value=1ether "$$ADDR" + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --mnemonic="$$MNEMONIC" \\ + "$$TOKEN" 'transfer(address,uint256)' "$$ADDR" '100000000000000000000000' + + # Stake GRT + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --private-key="$$KEY" \\ + "$$TOKEN" 'approve(address,uint256)' "$$STAKING" '100000000000000000000000' + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --private-key="$$KEY" \\ + "$$STAKING" 'stake(uint256)' '100000000000000000000000' + + # Authorize as own operator for SubgraphService + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --private-key="$$KEY" \\ + "$$STAKING" 'setOperator(address,address,bool)' "$$ADDR" "$$SSA" "true" + + echo "Extra indexer {n} registered" + fi +""" + + +def init_indexers_service(registrations: str) -> str: + return f"""\ + start-indexing-extra: + container_name: start-indexing-extra + build: + context: containers/indexer/start-indexing + depends_on: + start-indexing: + condition: service_completed_successfully + restart: on-failure:5 + volumes: + - ./containers/shared:/opt/shared:ro + - ./.env:/opt/config/.env:ro + - config-local:/opt/config:ro + entrypoint: ["bash", "-c"] + command: + - | + set -eu + . /opt/config/.env + . /opt/shared/lib.sh + + retry_cast() {{ for i in 1 2 3 4 5; do "$$@" && return 0; echo "Attempt $$i failed, retrying in 3s..."; sleep 3; done; echo "Failed after 5 attempts: $$*"; return 1; }} + + RPC="http://chain:$${{CHAIN_RPC_PORT}}" + MNEMONIC="$${{MNEMONIC}}" + TOKEN=$$(contract_addr L2GraphToken.address horizon) + STAKING=$$(contract_addr HorizonStaking.address horizon) + SSA=$$(contract_addr SubgraphService.address subgraph-service) + +{registrations} + echo "All extra indexers registered" +""" + + +def generate(count: int) -> str: + parts = [] + reg_blocks = [] + volume_names = [] + + for i in range(count): + n = i + 2 # service suffix: postgres-2, graph-node-2, etc. + address, secret = JUNK_ACCOUNTS[i] + volume_names.append(f"postgres-{n}-data") + + parts.append(postgres_service(n)) + parts.append(graph_node_service(n)) + parts.append(agent_service(n, address, secret)) + parts.append(service_service(n, address, secret)) + parts.append(tap_service(n, address, secret)) + reg_blocks.append(registration_block(n, address, secret)) + + parts.append(init_indexers_service("\n".join(reg_blocks))) + + header = """\ +# Auto-generated by scripts/gen-extra-indexers.py -- do not edit manually +# +# Usage: +# python3 scripts/gen-extra-indexers.py N +# COMPOSE_FILE=docker-compose.yaml:compose/dev/dips.yaml:compose/extra-indexers.yaml + +""" + + volumes = "\nvolumes:\n" + for v in volume_names: + volumes += f" {v}:\n" + + return header + "services:\n" + "\n".join(parts) + volumes + + +def main(): + if len(sys.argv) < 2: + print(f"Usage: {sys.argv[0]} N", file=sys.stderr) + print(f" N=1..{MAX_EXTRA}: generate compose/extra-indexers.yaml with N extra indexers", file=sys.stderr) + print(f" N=0: remove the generated file", file=sys.stderr) + sys.exit(1) + + count = int(sys.argv[1]) + + if count == 0: + if OUTPUT_FILE.exists(): + OUTPUT_FILE.unlink() + print(f"Removed {OUTPUT_FILE}") + else: + print("Nothing to remove") + return + + if count < 0 or count > MAX_EXTRA: + print(f"Count must be 0..{MAX_EXTRA}, got {count}", file=sys.stderr) + sys.exit(1) + + yaml_content = generate(count) + OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True) + OUTPUT_FILE.write_text(yaml_content) + print(f"Generated {OUTPUT_FILE} with {count} extra indexer(s)") + print(f"Service suffixes: {', '.join(str(i+2) for i in range(count))}") + print(f"\nPer-indexer stack: postgres, graph-node, indexer-agent, indexer-service, tap-agent") + print(f"Protocol subgraphs read from primary graph-node (no deploy-subgraphs needed)") + print(f"Plus: start-indexing-extra (shared on-chain init)") + print(f"\nTo activate, set in .environment:") + print(f" COMPOSE_FILE=docker-compose.yaml:compose/dev/dips.yaml:compose/extra-indexers.yaml") + + +if __name__ == "__main__": + main() From 007b9e8138e40302effe4447c38f8f9d7914c858 Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Tue, 10 Mar 2026 17:26:14 -0500 Subject: [PATCH 03/10] fix: decouple extra indexers from compose dependency chain Remove start-indexing-extra from agent depends_on so compose no longer walks the full init chain (graph-contracts -> start-indexing -> start- indexing-extra) on every `up -d`. This was causing chain container bounces and cascading DNS failures when adding indexers to a running stack. Agents now poll for on-chain staking (90 attempts, 5s interval) instead of hard-failing, allowing registration to run in parallel. Additional reliability and resource improvements: - Use --no-deps --no-recreate in add-indexers skill - Add dns_opt (timeout:2, attempts:5) to all long-running services - Add mem_limit to chain (512m) and all generated services - Cap chain Node.js heap at 384MB via NODE_OPTIONS - Reduce extra postgres max_connections from 1000 to 200 Co-Authored-By: Claude Opus 4.6 --- .claude/skills/add-indexers/SKILL.md | 19 +++++++++++++--- .../indexer/indexer-agent/dev/run-dips.sh | 22 ++++++++++++++----- docker-compose.yaml | 17 ++++++++++++++ scripts/gen-extra-indexers.py | 21 +++++++++++++++--- 4 files changed, 68 insertions(+), 11 deletions(-) diff --git a/.claude/skills/add-indexers/SKILL.md b/.claude/skills/add-indexers/SKILL.md index bfc3dd6..22bdf45 100644 --- a/.claude/skills/add-indexers/SKILL.md +++ b/.claude/skills/add-indexers/SKILL.md @@ -51,17 +51,29 @@ This regenerates the full compose file for ALL extras (existing + new). It's ide ### 4. Bring up new containers -Only start the NEW containers plus the shared init. For each new suffix N, combine all into a single `up -d` command: +Two-step process to avoid bouncing shared services. + +First, run `start-indexing-extra` to register new indexers on-chain: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose \ + -f docker-compose.yaml \ + -f compose/dev/dips.yaml \ + -f compose/extra-indexers.yaml \ + run --rm start-indexing-extra +``` + +Then start the actual containers with `--no-deps --no-recreate`. For each new suffix N: ```bash DOCKER_DEFAULT_PLATFORM= docker compose \ -f docker-compose.yaml \ -f compose/dev/dips.yaml \ -f compose/extra-indexers.yaml \ - up -d postgres-N graph-node-N indexer-agent-N indexer-service-N tap-agent-N start-indexing-extra + up -d --no-deps --no-recreate postgres-N graph-node-N indexer-agent-N indexer-service-N tap-agent-N ``` -Do NOT use `--force-recreate` -- it bounces shared services (chain, postgres) causing DNS failures in already-running containers. +`--no-deps` prevents compose from walking the dependency tree and bouncing shared services. `--no-recreate` prevents touching already-running containers. ### 5. Verify health @@ -88,5 +100,6 @@ Show a summary of all running indexers (primary + extras) with their container n - Never use `--force-recreate` when adding indexers to a running stack - The generator script is at `scripts/gen-extra-indexers.py` - The `start-indexing-extra` container handles on-chain GRT staking and operator authorization +- Agents poll for on-chain staking automatically (up to 450s), so `start-indexing-extra` can run in parallel with container startup - Agents retry automatically (30 attempts, 10s delay) -- don't manually restart unless the error is persistent and non-transient - If COMPOSE_FILE in .environment doesn't include `compose/extra-indexers.yaml`, warn the user to add it diff --git a/containers/indexer/indexer-agent/dev/run-dips.sh b/containers/indexer/indexer-agent/dev/run-dips.sh index 9c98705..64cd1b8 100755 --- a/containers/indexer/indexer-agent/dev/run-dips.sh +++ b/containers/indexer/indexer-agent/dev/run-dips.sh @@ -16,13 +16,25 @@ POSTGRES_HOST="${POSTGRES_HOST:-postgres}" wait_for_rpc -# Verify this indexer is staked (registration handled by start-indexing or start-indexing-extra) +# Wait for this indexer to be staked on-chain staking_address=$(contract_addr HorizonStaking.address horizon) -indexer_stake="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ - "${staking_address}" 'getStake(address) (uint256)' "${INDEXER_ADDRESS}")" -echo "indexer_stake=${indexer_stake}" +echo "Waiting for indexer ${INDEXER_ADDRESS} to be staked..." +_stake_attempt=0 +while [ "$_stake_attempt" -lt 90 ]; do + _stake_attempt=$((_stake_attempt + 1)) + indexer_stake="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ + "${staking_address}" 'getStake(address) (uint256)' "${INDEXER_ADDRESS}" 2>/dev/null || echo "0")" + if [ "${indexer_stake}" != "0" ]; then + echo "Indexer staked: ${indexer_stake}" + break + fi + if [ $((_stake_attempt % 12)) -eq 0 ]; then + echo " still waiting for staking (attempt ${_stake_attempt}/90)..." + fi + sleep 5 +done if [ "${indexer_stake}" = "0" ]; then - echo "ERROR: Indexer ${INDEXER_ADDRESS} has no stake. Run start-indexing-extra first." + echo "ERROR: Indexer ${INDEXER_ADDRESS} not staked after 450s" exit 1 fi diff --git a/docker-compose.yaml b/docker-compose.yaml index 8a5da95..b0fcb6d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,9 +7,11 @@ services: - chain-data:/data healthcheck: { interval: 1s, retries: 10, test: cast block } stop_grace_period: 30s + mem_limit: 512m restart: on-failure:3 environment: - FORK_RPC_URL=${FORK_RPC_URL:-} + - NODE_OPTIONS=--max-old-space-size=384 block-explorer: container_name: block-explorer @@ -67,6 +69,9 @@ services: - config-local:/opt/config:ro healthcheck: { interval: 1s, retries: 20, test: curl -f http://127.0.0.1:8030 } + dns_opt: + - timeout:2 + - attempts:5 restart: on-failure:3 graph-contracts: @@ -126,6 +131,9 @@ services: - config-local:/opt/config:ro healthcheck: { interval: 10s, retries: 600, test: curl -f http://127.0.0.1:7600/ } + dns_opt: + - timeout:2 + - attempts:5 restart: on-failure:3 subgraph-deploy: @@ -240,6 +248,9 @@ services: environment: RUST_LOG: info,graph_gateway=trace RUST_BACKTRACE: 1 + dns_opt: + - timeout:2 + - attempts:5 restart: on-failure:3 healthcheck: { interval: 1s, retries: 100, test: curl -f http://127.0.0.1:7700/ } @@ -265,6 +276,9 @@ services: RUST_BACKTRACE: 1 healthcheck: { interval: 1s, retries: 100, test: curl -f http://127.0.0.1:7601/ } + dns_opt: + - timeout:2 + - attempts:5 restart: on-failure:3 tap-agent: @@ -284,6 +298,9 @@ services: environment: RUST_LOG: info,indexer_tap_agent=trace RUST_BACKTRACE: 1 + dns_opt: + - timeout:2 + - attempts:5 restart: on-failure:3 # --- Profiled components (activated via COMPOSE_PROFILES in .env) --- diff --git a/scripts/gen-extra-indexers.py b/scripts/gen-extra-indexers.py index 7ad9ca6..87896ea 100755 --- a/scripts/gen-extra-indexers.py +++ b/scripts/gen-extra-indexers.py @@ -55,7 +55,7 @@ def postgres_service(n: int) -> str: postgres-{n}: container_name: postgres-{n} image: postgres:17-alpine - command: postgres -c 'max_connections=1000' -c 'shared_preload_libraries=pg_stat_statements' + command: postgres -c 'max_connections=200' -c 'shared_buffers=64MB' volumes: - postgres-{n}-data:/var/lib/postgresql/data - ./containers/core/postgres/setup.sql:/docker-entrypoint-initdb.d/setup.sql:ro @@ -67,6 +67,7 @@ def postgres_service(n: int) -> str: interval: 1s retries: 20 test: pg_isready -U postgres + mem_limit: 256m restart: unless-stopped """ @@ -98,6 +99,10 @@ def graph_node_service(n: int) -> str: retries: 60 start_period: 10s test: curl -f http://127.0.0.1:8030 + dns_opt: + - timeout:2 + - attempts:5 + mem_limit: 256m restart: unless-stopped """ @@ -120,8 +125,6 @@ def agent_service(n: int, address: str, secret: str) -> str: RUN npm install -g tsx nodemon entrypoint: ["bash", "/opt/run-dips.sh"] depends_on: - start-indexing-extra: - condition: service_completed_successfully graph-node-{n}: condition: service_healthy ports: @@ -148,6 +151,10 @@ def agent_service(n: int, address: str, secret: str) -> str: retries: 600 start_period: 30s test: curl -f http://127.0.0.1:7600/ + dns_opt: + - timeout:2 + - attempts:5 + mem_limit: 512m restart: unless-stopped """ @@ -197,6 +204,10 @@ def service_service(n: int, address: str, secret: str) -> str: interval: 10s retries: 600 test: curl -f http://127.0.0.1:7601/ + dns_opt: + - timeout:2 + - attempts:5 + mem_limit: 192m restart: unless-stopped """ @@ -227,6 +238,10 @@ def tap_service(n: int, address: str, secret: str) -> str: POSTGRES_HOST: "postgres-{n}" RUST_LOG: info,indexer_tap_agent=trace RUST_BACKTRACE: "1" + dns_opt: + - timeout:2 + - attempts:5 + mem_limit: 128m restart: unless-stopped """ From b0dd8cbe57f65ca3bba462f099ed95df80621e62 Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Tue, 10 Mar 2026 18:24:40 -0500 Subject: [PATCH 04/10] update yaml --- compose/dev/dips.yaml | 8 ++++---- docker-compose.yaml | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/compose/dev/dips.yaml b/compose/dev/dips.yaml index 18ccea3..701cf27 100644 --- a/compose/dev/dips.yaml +++ b/compose/dev/dips.yaml @@ -62,7 +62,7 @@ services: RUST_BACKTRACE: 1 SQLX_OFFLINE: "true" healthcheck: - interval: 10s + interval: 2s retries: 600 test: curl -f http://127.0.0.1:7601/ @@ -164,9 +164,9 @@ services: PYTHONUNBUFFERED: "1" healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] - interval: 10s - retries: 30 - start_period: 60s + interval: 3s + retries: 100 + start_period: 15s # Real eligibility oracle from source eligibility-oracle-node: diff --git a/docker-compose.yaml b/docker-compose.yaml index b0fcb6d..dc1b7d1 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -130,7 +130,7 @@ services: - ./.env:/opt/config/.env:ro - config-local:/opt/config:ro healthcheck: - { interval: 10s, retries: 600, test: curl -f http://127.0.0.1:7600/ } + { interval: 2s, retries: 600, test: curl -f http://127.0.0.1:7600/ } dns_opt: - timeout:2 - attempts:5 @@ -157,7 +157,6 @@ services: build: { context: containers/indexer/start-indexing } depends_on: subgraph-deploy: { condition: service_completed_successfully } - indexer-agent: { condition: service_healthy } volumes: - ./containers/shared:/opt/shared:ro - ./.env:/opt/config/.env:ro From 5d422f39599681582663540768be510e4876dd53 Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Tue, 10 Mar 2026 18:32:39 -0500 Subject: [PATCH 05/10] fix --- .../indexer/indexer-agent/dev/run-dips.sh | 55 +++++++++++++------ 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/containers/indexer/indexer-agent/dev/run-dips.sh b/containers/indexer/indexer-agent/dev/run-dips.sh index 64cd1b8..49506b5 100755 --- a/containers/indexer/indexer-agent/dev/run-dips.sh +++ b/containers/indexer/indexer-agent/dev/run-dips.sh @@ -16,26 +16,49 @@ POSTGRES_HOST="${POSTGRES_HOST:-postgres}" wait_for_rpc -# Wait for this indexer to be staked on-chain +token_address=$(contract_addr L2GraphToken.address horizon) staking_address=$(contract_addr HorizonStaking.address horizon) -echo "Waiting for indexer ${INDEXER_ADDRESS} to be staked..." -_stake_attempt=0 -while [ "$_stake_attempt" -lt 90 ]; do - _stake_attempt=$((_stake_attempt + 1)) + +if [ "${INDEXER_ADDRESS}" = "${RECEIVER_ADDRESS}" ]; then + # Primary indexer: self-stake using RECEIVER's own key (no nonce collision + # with ACCOUNT0). Idempotent -- skips if already staked. indexer_stake="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ - "${staking_address}" 'getStake(address) (uint256)' "${INDEXER_ADDRESS}" 2>/dev/null || echo "0")" - if [ "${indexer_stake}" != "0" ]; then - echo "Indexer staked: ${indexer_stake}" - break + "${staking_address}" 'getStake(address) (uint256)' "${INDEXER_ADDRESS}")" + if [ "${indexer_stake}" = "0" ]; then + echo "Staking primary indexer ${INDEXER_ADDRESS}..." + cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--mnemonic=${MNEMONIC}" \ + --value=1ether "${INDEXER_ADDRESS}" + cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--mnemonic=${MNEMONIC}" \ + "${token_address}" 'transfer(address,uint256)' "${INDEXER_ADDRESS}" '100000000000000000000000' + cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--private-key=${INDEXER_SECRET}" \ + "${token_address}" 'approve(address,uint256)' "${staking_address}" '100000000000000000000000' + cast send "--rpc-url=http://chain:${CHAIN_RPC_PORT}" --confirmations=0 "--private-key=${INDEXER_SECRET}" \ + "${staking_address}" 'stake(uint256)' '100000000000000000000000' + echo "Primary indexer staked" + else + echo "Primary indexer already staked: ${indexer_stake}" fi - if [ $((_stake_attempt % 12)) -eq 0 ]; then - echo " still waiting for staking (attempt ${_stake_attempt}/90)..." +else + # Extra indexers: wait for start-indexing-extra to stake them on-chain. + echo "Waiting for indexer ${INDEXER_ADDRESS} to be staked..." + _stake_attempt=0 + while [ "$_stake_attempt" -lt 90 ]; do + _stake_attempt=$((_stake_attempt + 1)) + indexer_stake="$(cast call "--rpc-url=http://chain:${CHAIN_RPC_PORT}" \ + "${staking_address}" 'getStake(address) (uint256)' "${INDEXER_ADDRESS}" 2>/dev/null || echo "0")" + if [ "${indexer_stake}" != "0" ]; then + echo "Indexer staked: ${indexer_stake}" + break + fi + if [ $((_stake_attempt % 12)) -eq 0 ]; then + echo " still waiting for staking (attempt ${_stake_attempt}/90)..." + fi + sleep 5 + done + if [ "${indexer_stake}" = "0" ]; then + echo "ERROR: Indexer ${INDEXER_ADDRESS} not staked after 450s" + exit 1 fi - sleep 5 -done -if [ "${indexer_stake}" = "0" ]; then - echo "ERROR: Indexer ${INDEXER_ADDRESS} not staked after 450s" - exit 1 fi export INDEXER_AGENT_HORIZON_ADDRESS_BOOK=/opt/config/horizon.json From 4cc0cbdc463b26f93c3881a0de48dafaf71f51e9 Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Tue, 10 Mar 2026 19:00:10 -0500 Subject: [PATCH 06/10] remove unnecessary dependancies --- docker-compose.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index dc1b7d1..8c23f4a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -331,14 +331,11 @@ services: args: IISA_COMMIT: ${IISA_COMMIT:-main} MAXMIND_LICENSE_KEY: "skip" - depends_on: - redpanda: { condition: service_healthy } environment: REDPANDA_BOOTSTRAP_SERVERS: "redpanda:${REDPANDA_KAFKA_PORT}" REDPANDA_TOPIC: gateway_queries SCORES_FILE_PATH: /app/scores/indexer_scores.json GRAPH_NETWORK_SUBGRAPH_URL: "http://graph-node:8000/subgraphs/name/graph-network" - SCORING_INTERVAL: "600" SCORING_HTTP_PORT: "9090" DEGRADED_ALERT_THRESHOLD: "999" volumes: @@ -383,7 +380,6 @@ services: block-oracle: { condition: service_healthy } postgres: { condition: service_healthy } gateway: { condition: service_healthy } - iisa: { condition: service_healthy } ports: - "${DIPPER_ADMIN_RPC_PORT}:${DIPPER_ADMIN_RPC_PORT}" - "${DIPPER_INDEXER_RPC_PORT}:${DIPPER_INDEXER_RPC_PORT}" From 748726efeafea7a04642b1c980012fb409911720 Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Tue, 10 Mar 2026 19:30:23 -0500 Subject: [PATCH 07/10] fixes --- .claude/skills/network-status/SKILL.md | 2 +- scripts/gen-extra-indexers.py | 6 +++--- scripts/network-status.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.claude/skills/network-status/SKILL.md b/.claude/skills/network-status/SKILL.md index 0a2f725..4783213 100644 --- a/.claude/skills/network-status/SKILL.md +++ b/.claude/skills/network-status/SKILL.md @@ -5,4 +5,4 @@ description: Show the current state of the local Graph protocol network. Use whe Run `python3 scripts/network-status.py` from the local-network repo root to fetch the current network state. -Output the result directly as text in a code block so it renders inline without the user needing to expand tool results. +Output the FULL result directly as text in a code block so it renders inline without the user needing to expand tool results. Do NOT truncate, summarize, or abbreviate any part of the output -- show every line including all deployment hashes. diff --git a/scripts/gen-extra-indexers.py b/scripts/gen-extra-indexers.py index 87896ea..dce4524 100755 --- a/scripts/gen-extra-indexers.py +++ b/scripts/gen-extra-indexers.py @@ -128,7 +128,7 @@ def agent_service(n: int, address: str, secret: str) -> str: graph-node-{n}: condition: service_healthy ports: - - "{7600 + n * 10}:7600" + - "{17600 + n * 10}:7600" stop_signal: SIGKILL volumes: - ${{INDEXER_AGENT_SOURCE_ROOT:?Set INDEXER_AGENT_SOURCE_ROOT}}:/opt/indexer-agent-source-root @@ -180,8 +180,8 @@ def service_service(n: int, address: str, secret: str) -> str: indexer-agent-{n}: condition: service_healthy ports: - - "{7601 + n * 10}:7601" - - "{7602 + n * 10}:7602" + - "{17601 + n * 10}:7601" + - "{17602 + n * 10}:7602" stop_signal: SIGKILL volumes: - ${{INDEXER_SERVICE_SOURCE_ROOT:?Set INDEXER_SERVICE_SOURCE_ROOT}}:/opt/source diff --git a/scripts/network-status.py b/scripts/network-status.py index 9011e01..80cf1ee 100755 --- a/scripts/network-status.py +++ b/scripts/network-status.py @@ -198,6 +198,18 @@ def main(): if not is_last_network: print() + # Idle indexers (registered on-chain but no active allocations) + active_indexer_ids = {idx["id"] for idx in indexers if idx["allocations"]} + idle_indexers = [idx for idx in indexers if not idx["allocations"]] + if idle_indexers: + print(f"\nidle indexers ({len(idle_indexers)} registered, no allocations)") + idle_indexers.sort(key=lambda x: x["id"]) + for i, idx in enumerate(idle_indexers): + is_last = i == len(idle_indexers) - 1 + branch = "\u2514\u2500" if is_last else "\u251c\u2500" + staked = format_tokens(idx["stakedTokens"]) + print(f" {branch} {idx['id']} staked {staked}") + # Unallocated subgraphs (indexed by graph-node but no active allocation) allocated_deps = {dep for net in tree.values() for dep in net} unallocated = [dep for dep in statuses if dep not in allocated_deps] From 50563444c39b48df08e1251e3bffbe0a6fcb8c81 Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Wed, 11 Mar 2026 11:09:20 -0500 Subject: [PATCH 08/10] feat: per-indexer operators with BIP39 mnemonic derivation Each extra indexer now gets a unique operator derived from a mnemonic of the form "test*11 {bip39_word}" instead of sharing ACCOUNT0. This matches production topology where each indexer has an independent operator. The generator validates BIP39 checksums at import time, derives operator addresses via eth_account, and threads them through compose services and the on-chain registration block. Two bugs fixed in the registration block: - setOperator argument order was (operator, verifier) but the contract expects (verifier, operator). The SubgraphService verifier triggers a legacy code path in HorizonStaking that reads _legacyOperatorAuth, which is only written when the verifier is the first argument. - Operator auth was inside the staking if/else, so re-runs after a partial failure would skip authorization. Now runs unconditionally. Co-Authored-By: Claude Opus 4.6 --- .claude/skills/add-indexers/SKILL.md | 2 + scripts/gen-extra-indexers.py | 101 +++++++++++++++++++-------- 2 files changed, 73 insertions(+), 30 deletions(-) diff --git a/.claude/skills/add-indexers/SKILL.md b/.claude/skills/add-indexers/SKILL.md index 22bdf45..f7dd8e4 100644 --- a/.claude/skills/add-indexers/SKILL.md +++ b/.claude/skills/add-indexers/SKILL.md @@ -18,6 +18,8 @@ The argument is the number of NEW indexers to add (defaults to 1). Extra indexers use hardhat "junk" mnemonic accounts starting at index 2. Maximum 18 extra (indices 2-19). +Each indexer gets a unique operator derived from a mnemonic of the form `test test test ... test {bip39_word}` (11 "test" + 1 valid checksum word). The generator handles mnemonic validation, operator address derivation, ETH funding, and on-chain `setOperator` authorization for both SubgraphService and HorizonStaking. + | Suffix | Mnemonic Index | Address | |--------|---------------|---------| | 2 | 2 | 0x3C44CdDdB6a900fa2b585dd299e03d12FA4293BC | diff --git a/scripts/gen-extra-indexers.py b/scripts/gen-extra-indexers.py index dce4524..ee4cf75 100755 --- a/scripts/gen-extra-indexers.py +++ b/scripts/gen-extra-indexers.py @@ -10,8 +10,13 @@ Shared across all indexers: chain (hardhat), ipfs, gateway, dipper, iisa, redpanda, contract addresses, protocol subgraphs (on primary graph-node). -Accounts come from the "junk" mnemonic starting at index 2 (indices 0-1 are -ACCOUNT0/ACCOUNT1). Hardhat pre-funds these with 10,000 ETH. +Indexer accounts come from the "junk" mnemonic starting at index 2 +(indices 0-1 are ACCOUNT0/ACCOUNT1). Hardhat pre-funds these with 10k ETH. + +Each extra indexer gets a unique operator derived from a mnemonic of the +form "test test test test test test test test test test test {word}" where +{word} is a BIP39 word that passes the 12-word checksum. This gives each +indexer an independent operator, matching production topology. Usage: python3 scripts/gen-extra-indexers.py 3 # generate 3 extra indexers @@ -21,6 +26,11 @@ import sys from pathlib import Path +from eth_account import Account +from mnemonic import Mnemonic + +Account.enable_unaudited_hdwallet_features() + # Hardhat "junk" mnemonic accounts starting at index 2. # Deterministic and pre-funded with 10,000 ETH by Hardhat. JUNK_ACCOUNTS = [ @@ -47,6 +57,19 @@ MAX_EXTRA = len(JUNK_ACCOUNTS) # 18 JUNK_MNEMONIC = "test test test test test test test test test test test junk" +# Operator mnemonics: "test*11 {word}" for each BIP39 word that passes +# the 12-word checksum. Skip "junk" (ACCOUNT0) and "zero" (RECEIVER). +_bip39 = Mnemonic("english") +_prefix = "test " * 11 +OPERATOR_MNEMONICS: list[tuple[str, str]] = [] # (mnemonic, address) +for _word in _bip39.wordlist: + if _word in ("junk", "zero"): + continue + _candidate = _prefix + _word + if _bip39.check(_candidate): + _addr = Account.from_mnemonic(_candidate).address + OPERATOR_MNEMONICS.append((_candidate, _addr)) + OUTPUT_FILE = Path(__file__).resolve().parent.parent / "compose" / "extra-indexers.yaml" @@ -107,7 +130,7 @@ def graph_node_service(n: int) -> str: """ -def agent_service(n: int, address: str, secret: str) -> str: +def agent_service(n: int, address: str, secret: str, operator_mnemonic: str) -> str: return f"""\ indexer-agent-{n}: container_name: indexer-agent-{n} @@ -139,7 +162,7 @@ def agent_service(n: int, address: str, secret: str) -> str: environment: INDEXER_ADDRESS: "{address}" INDEXER_SECRET: "{secret}" - INDEXER_OPERATOR_MNEMONIC: "{JUNK_MNEMONIC}" + INDEXER_OPERATOR_MNEMONIC: "{operator_mnemonic}" INDEXER_DB_NAME: "indexer_components_1" INDEXER_SVC_HOST: "indexer-service-{n}" GRAPH_NODE_HOST: "graph-node-{n}" @@ -159,7 +182,7 @@ def agent_service(n: int, address: str, secret: str) -> str: """ -def service_service(n: int, address: str, secret: str) -> str: +def service_service(n: int, address: str, secret: str, operator_mnemonic: str) -> str: return f"""\ indexer-service-{n}: container_name: indexer-service-{n} @@ -192,7 +215,7 @@ def service_service(n: int, address: str, secret: str) -> str: environment: INDEXER_ADDRESS: "{address}" INDEXER_SECRET: "{secret}" - INDEXER_OPERATOR_MNEMONIC: "{JUNK_MNEMONIC}" + INDEXER_OPERATOR_MNEMONIC: "{operator_mnemonic}" INDEXER_DB_NAME: "indexer_components_1" GRAPH_NODE_HOST: "graph-node-{n}" PROTOCOL_GRAPH_NODE_HOST: "graph-node" @@ -212,7 +235,7 @@ def service_service(n: int, address: str, secret: str) -> str: """ -def tap_service(n: int, address: str, secret: str) -> str: +def tap_service(n: int, address: str, secret: str, operator_mnemonic: str) -> str: return f"""\ tap-agent-{n}: container_name: tap-agent-{n} @@ -231,7 +254,7 @@ def tap_service(n: int, address: str, secret: str) -> str: environment: INDEXER_ADDRESS: "{address}" INDEXER_SECRET: "{secret}" - INDEXER_OPERATOR_MNEMONIC: "{JUNK_MNEMONIC}" + INDEXER_OPERATOR_MNEMONIC: "{operator_mnemonic}" INDEXER_DB_NAME: "indexer_components_1" GRAPH_NODE_HOST: "graph-node-{n}" PROTOCOL_GRAPH_NODE_HOST: "graph-node" @@ -246,35 +269,44 @@ def tap_service(n: int, address: str, secret: str) -> str: """ -def registration_block(n: int, address: str, secret: str) -> str: +def registration_block(n: int, address: str, secret: str, operator_mnemonic: str) -> str: return f"""\ # --- Extra indexer {n}: {address} --- ADDR="{address}" KEY="{secret}" + OP_MNEMONIC="{operator_mnemonic}" + + # Derive this indexer's unique operator address from its mnemonic + OPERATOR=$$(cast wallet address --mnemonic="$$OP_MNEMONIC") + echo "Extra indexer {n}: $$ADDR operator: $$OPERATOR" + + # Staking (idempotent -- skip if already staked) STAKE=$$(cast call --rpc-url="$$RPC" "$$STAKING" 'getStake(address)(uint256)' "$$ADDR") if [ "$$STAKE" != "0" ]; then - echo "Extra indexer {n} ($$ADDR) already staked ($$STAKE) -- skipping" + echo " already staked ($$STAKE)" else - echo "Registering extra indexer {n}: {address}" + # Fund indexer with ETH + GRT, then stake + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --mnemonic="$$MNEMONIC" \\ + --value=1ether "$$ADDR" + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --mnemonic="$$MNEMONIC" \\ + "$$TOKEN" 'transfer(address,uint256)' "$$ADDR" '100000000000000000000000' + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --private-key="$$KEY" \\ + "$$TOKEN" 'approve(address,uint256)' "$$STAKING" '100000000000000000000000' + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --private-key="$$KEY" \\ + "$$STAKING" 'stake(uint256)' '100000000000000000000000' + echo " staked" + fi - # Transfer ETH + GRT from deployer - retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --mnemonic="$$MNEMONIC" \\ - --value=1ether "$$ADDR" + # Operator auth (always run -- idempotent on-chain, ensures auth + # even if a previous run staked but failed on the auth step). + # setOperator(verifier, operator, allowed) -- verifier first! retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --mnemonic="$$MNEMONIC" \\ - "$$TOKEN" 'transfer(address,uint256)' "$$ADDR" '100000000000000000000000' - - # Stake GRT - retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --private-key="$$KEY" \\ - "$$TOKEN" 'approve(address,uint256)' "$$STAKING" '100000000000000000000000' + --value=1ether "$$OPERATOR" retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --private-key="$$KEY" \\ - "$$STAKING" 'stake(uint256)' '100000000000000000000000' - - # Authorize as own operator for SubgraphService + "$$STAKING" 'setOperator(address,address,bool)' "$$SSA" "$$OPERATOR" "true" retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --private-key="$$KEY" \\ - "$$STAKING" 'setOperator(address,address,bool)' "$$ADDR" "$$SSA" "true" - - echo "Extra indexer {n} registered" - fi + "$$STAKING" 'setOperator(address,address,bool)' "$$STAKING" "$$OPERATOR" "true" + echo " operator authorized" """ @@ -313,6 +345,14 @@ def init_indexers_service(registrations: str) -> str: def generate(count: int) -> str: + if count > len(OPERATOR_MNEMONICS): + print( + f"Only {len(OPERATOR_MNEMONICS)} valid operator mnemonics available, " + f"requested {count}", + file=sys.stderr, + ) + sys.exit(1) + parts = [] reg_blocks = [] volume_names = [] @@ -320,14 +360,15 @@ def generate(count: int) -> str: for i in range(count): n = i + 2 # service suffix: postgres-2, graph-node-2, etc. address, secret = JUNK_ACCOUNTS[i] + op_mnemonic, op_address = OPERATOR_MNEMONICS[i] volume_names.append(f"postgres-{n}-data") parts.append(postgres_service(n)) parts.append(graph_node_service(n)) - parts.append(agent_service(n, address, secret)) - parts.append(service_service(n, address, secret)) - parts.append(tap_service(n, address, secret)) - reg_blocks.append(registration_block(n, address, secret)) + parts.append(agent_service(n, address, secret, op_mnemonic)) + parts.append(service_service(n, address, secret, op_mnemonic)) + parts.append(tap_service(n, address, secret, op_mnemonic)) + reg_blocks.append(registration_block(n, address, secret, op_mnemonic)) parts.append(init_indexers_service("\n".join(reg_blocks))) From dbc1f14b81efb3aa8a13988d47e74492da380886 Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Wed, 11 Mar 2026 12:33:53 -0500 Subject: [PATCH 09/10] fix: register SubgraphService as rewards issuer, add IISA score reload, network-status contract health Register SubgraphService on RewardsManager in the deploy scripts so allocation operations (reallocate, DIPs agreement acceptance) don't revert with "Not a rewards issuer". Add contract health check to network-status.py to surface this misconfiguration immediately. Configure IISA cronjob to refresh the API's score cache after writing, and set the local reload interval to 120s. --- BUGS.md | 20 ++++++ containers/core/graph-contracts/run.sh | 16 +++++ docker-compose.yaml | 2 + scripts/network-status.py | 84 ++++++++++++++++++++++++++ 4 files changed, 122 insertions(+) diff --git a/BUGS.md b/BUGS.md index ea43f96..1e5c2a5 100644 --- a/BUGS.md +++ b/BUGS.md @@ -68,4 +68,24 @@ **Repo**: `contracts` **Fix**: Add `{ after: [GraphPeripheryModule, HorizonProxiesModule] }` to the `deployImplementation` call in `HorizonStaking.ts`. Applied locally on `indexing-payments-management-audit`. +**PR**: not submitted + +## BUG-008: SubgraphService not registered as rewards issuer in RewardsManager + +**Symptom**: indexer-agent fails all allocation operations (reallocate, new allocations for DIPs) with `execution reverted: "Not a rewards issuer"`. The agent enters a perpetual retry loop, blocking both protocol subgraph reallocations and DIPs agreement acceptance. + +**Root cause**: The `AllocationManager.stakeUsageSummary()` calls `RewardsManager.getRewards(SubgraphService, allocationId)` before executing allocation transactions. The RewardsManager checks whether the caller (SubgraphService at `0x09635F...`) is a registered rewards issuer. On a fresh local-network deploy, SubgraphService is never whitelisted in the RewardsManager, so all `getRewards` calls revert. + +**Repo**: `local-network` (deploy scripts) +**Fix**: The deploy scripts need to call `RewardsManager.setRewardsIssuer(SubgraphService, true)` after contract deployment. Needs investigation into which deploy script should handle this and what the RewardsManager ABI looks like. +**PR**: not submitted + +## BUG-009: IISA API does not reload scores after cronjob updates them + +**Symptom**: IISA selection endpoint returns stale data (e.g. 1 indexer when 10 exist). The cronjob correctly computes and writes updated scores to the shared volume, but the API serves its startup cache indefinitely. This caused dipper to only select 1 of 10 available indexers for a DIPs agreement. + +**Root cause**: The IISA HTTP API (`iisa` service) loads scores into an in-memory DataFrame at startup and never reloads them. The `POST /refresh` endpoint exists but nothing calls it. The cronjob writes to `/app/scores/indexer_scores.json` on a shared volume, but the API reads from memory, not disk, on each request. + +**Repo**: `subgraph-dips-indexer-selection` +**Fix**: Two-layer approach applied locally: (1) The cronjob now calls `POST /refresh` on the IISA API after writing scores (`IISA_API_URL` env var, warns at startup if unset). (2) The API now runs a background task that checks the scores file mtime every `IISA_SCORES_RELOAD_INTERVAL` seconds (default 120) and reloads when it changes. The cronjob provides immediate freshness; the periodic reload is a fallback if the refresh call fails. **PR**: not submitted \ No newline at end of file diff --git a/containers/core/graph-contracts/run.sh b/containers/core/graph-contracts/run.sh index bb77bff..9e5be4e 100644 --- a/containers/core/graph-contracts/run.sh +++ b/containers/core/graph-contracts/run.sh @@ -100,6 +100,22 @@ if [ -n "$rewards_manager" ]; then fi fi +# -- Ensure SubgraphService is registered as rewards issuer on RewardsManager -- +subgraph_service=$(jq -r '.["1337"].SubgraphService.address // empty' /opt/config/subgraph-service.json) +if [ -n "$rewards_manager" ] && [ -n "$subgraph_service" ]; then + current_service=$(cast call --rpc-url="http://chain:${CHAIN_RPC_PORT}" \ + "${rewards_manager}" "subgraphService()(address)" 2>/dev/null | tr '[:upper:]' '[:lower:]') + expected_lower=$(echo "$subgraph_service" | tr '[:upper:]' '[:lower:]') + if [ "$current_service" = "$expected_lower" ]; then + echo " SubgraphService already set on RewardsManager: ${subgraph_service}" + else + echo " Setting SubgraphService on RewardsManager to ${subgraph_service} (was ${current_service})" + cast send --rpc-url="http://chain:${CHAIN_RPC_PORT}" --confirmations=0 \ + --private-key="${ACCOUNT1_SECRET}" \ + "${rewards_manager}" "setSubgraphService(address)" "${subgraph_service}" + fi +fi + echo "==== Phase 1 complete ====" # ============================================================ diff --git a/docker-compose.yaml b/docker-compose.yaml index 8c23f4a..19fdcb7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -338,6 +338,7 @@ services: GRAPH_NETWORK_SUBGRAPH_URL: "http://graph-node:8000/subgraphs/name/graph-network" SCORING_HTTP_PORT: "9090" DEGRADED_ALERT_THRESHOLD: "999" + IISA_API_URL: "http://iisa:8080" volumes: - iisa-scores:/app/scores healthcheck: @@ -359,6 +360,7 @@ services: IISA_HOST: "0.0.0.0" IISA_PORT: "8080" IISA_LOG_LEVEL: DEBUG + IISA_SCORES_RELOAD_INTERVAL: "120" SCORES_FILE_PATH: /app/scores/indexer_scores.json volumes: - iisa-scores:/app/scores diff --git a/scripts/network-status.py b/scripts/network-status.py index 80cf1ee..c3a62c5 100755 --- a/scripts/network-status.py +++ b/scripts/network-status.py @@ -7,8 +7,13 @@ GRAPH_NODE_STATUS = "http://localhost:8030/graphql" GRAPH_NODE_QUERY = "http://localhost:8000" +HARDHAT_RPC = "http://localhost:8545" NAMED_SUBGRAPHS = ["graph-network", "semiotic/tap", "block-oracle"] +# Solidity function selectors (first 4 bytes of keccak256 of the signature) +# Source: contracts build-info methodIdentifiers +SELECTOR_SUBGRAPH_SERVICE = "26058249" # subgraphService() + def gql(url: str, query: str) -> dict: req = Request(url, json.dumps({"query": query}).encode(), {"Content-Type": "application/json"}) @@ -19,6 +24,72 @@ def gql(url: str, query: str) -> dict: return data["data"] +def eth_call(to: str, data: str) -> str: + """Make a raw eth_call to the Hardhat RPC. Returns the hex result.""" + payload = json.dumps({ + "jsonrpc": "2.0", + "method": "eth_call", + "params": [{"to": to, "data": "0x" + data}, "latest"], + "id": 1, + }) + req = Request(HARDHAT_RPC, payload.encode(), {"Content-Type": "application/json"}) + with urlopen(req, timeout=5) as resp: + result = json.loads(resp.read()) + if "error" in result: + raise RuntimeError(f"eth_call error: {result['error']}") + return result["result"] + + +def decode_address(hex_result: str) -> str: + """Decode a 32-byte ABI-encoded address from an eth_call result.""" + raw = hex_result.replace("0x", "") + if len(raw) < 40: + return "0x" + "0" * 40 + # Address is the last 40 hex chars of the 64-char word + return "0x" + raw[-40:] + + +ZERO_ADDRESS = "0x" + "0" * 40 + + +def fetch_contract_health(ns_id: str) -> list[dict]: + """Check contract configuration health. Returns a list of check results.""" + checks = [] + + # Get RewardsManager address from the network subgraph + try: + data = gql(f"{GRAPH_NODE_QUERY}/subgraphs/id/{ns_id}", """ + { graphNetwork(id: "1") { rewardsManager } } + """) + rewards_manager = data["graphNetwork"]["rewardsManager"] + except Exception as e: + checks.append({ + "name": "RewardsManager address", + "ok": False, + "detail": f"could not query network subgraph: {e}", + }) + return checks + + # Call subgraphService() on the RewardsManager + try: + result = eth_call(rewards_manager, SELECTOR_SUBGRAPH_SERVICE) + registered_addr = decode_address(result) + is_registered = registered_addr.lower() != ZERO_ADDRESS.lower() + checks.append({ + "name": "RewardsManager \u2192 SubgraphService rewards issuer", + "ok": is_registered, + "detail": registered_addr if is_registered else "not set (zero address)", + }) + except Exception as e: + checks.append({ + "name": "RewardsManager \u2192 SubgraphService rewards issuer", + "ok": False, + "detail": f"eth_call failed: {e}", + }) + + return checks + + def fetch_indexing_statuses() -> dict: """deployment_id -> {network, health, latest_block, chain_head}""" data = gql(GRAPH_NODE_STATUS, """{ @@ -236,6 +307,19 @@ def main(): branch = "\u2514\u2500" if is_last else "\u251c\u2500" print(f" {branch} {dep}") + # Contract health checks + health_checks = fetch_contract_health(ns_id) + if health_checks: + print(f"\ncontract health") + for i, check in enumerate(health_checks): + is_last = i == len(health_checks) - 1 + branch = "\u2514\u2500" if is_last else "\u251c\u2500" + if check["ok"]: + status_str = f"YES {check['detail']}" + else: + status_str = f"NO \u26a0 {check['detail']}" + print(f" {branch} {check['name']}: {status_str}") + return 0 From 4578b4d575483b2e91c07074bfbdcf5a7eb95fa7 Mon Sep 17 00:00:00 2001 From: MoonBoi9001 Date: Wed, 11 Mar 2026 16:43:48 -0500 Subject: [PATCH 10/10] update skills, document bugs, update script --- .claude/skills/add-indexers/SKILL.md | 60 +++++++++++++++++++++++----- .claude/skills/fresh-deploy/SKILL.md | 23 ++++++----- BUGS.md | 20 ++++++++++ scripts/gen-extra-indexers.py | 35 +++++++++++++++- 4 files changed, 115 insertions(+), 23 deletions(-) diff --git a/.claude/skills/add-indexers/SKILL.md b/.claude/skills/add-indexers/SKILL.md index f7dd8e4..1ec6de5 100644 --- a/.claude/skills/add-indexers/SKILL.md +++ b/.claude/skills/add-indexers/SKILL.md @@ -18,7 +18,7 @@ The argument is the number of NEW indexers to add (defaults to 1). Extra indexers use hardhat "junk" mnemonic accounts starting at index 2. Maximum 18 extra (indices 2-19). -Each indexer gets a unique operator derived from a mnemonic of the form `test test test ... test {bip39_word}` (11 "test" + 1 valid checksum word). The generator handles mnemonic validation, operator address derivation, ETH funding, and on-chain `setOperator` authorization for both SubgraphService and HorizonStaking. +Each indexer gets a unique operator derived from a mnemonic of the form `test test test ... test {bip39_word}` (11 "test" + 1 valid checksum word). The generator handles mnemonic validation, operator address derivation, ETH funding, on-chain `setOperator` authorization for both SubgraphService and HorizonStaking, and PaymentsEscrow deposits for DIPs signer validation. | Suffix | Mnemonic Index | Address | |--------|---------------|---------| @@ -32,7 +32,7 @@ Each indexer gets a unique operator derived from a mnemonic of the form `test te ### 1. Determine current extra indexer count ```bash -docker ps --format '{{.Names}}' | grep -oP 'indexer-agent-\K\d+' | sort -n | tail -1 +docker ps --format '{{.Names}}' | grep 'indexer-agent-' | sed 's/indexer-agent-//' | sort -n | tail -1 ``` If no matches, current extra count is 0. Otherwise the highest suffix minus 1 gives the count (suffix 2 = 1 extra, suffix 3 = 2 extras, etc.). @@ -55,7 +55,7 @@ This regenerates the full compose file for ALL extras (existing + new). It's ide Two-step process to avoid bouncing shared services. -First, run `start-indexing-extra` to register new indexers on-chain: +First, run `start-indexing-extra` to register new indexers on-chain (stake, operator auth, escrow deposits): ```bash DOCKER_DEFAULT_PLATFORM= docker compose \ @@ -65,35 +65,72 @@ DOCKER_DEFAULT_PLATFORM= docker compose \ run --rm start-indexing-extra ``` -Then start the actual containers with `--no-deps --no-recreate`. For each new suffix N: +Then start all new containers in a single command with `--no-deps --no-recreate`. List all new service names space-separated: ```bash DOCKER_DEFAULT_PLATFORM= docker compose \ -f docker-compose.yaml \ -f compose/dev/dips.yaml \ -f compose/extra-indexers.yaml \ - up -d --no-deps --no-recreate postgres-N graph-node-N indexer-agent-N indexer-service-N tap-agent-N + up -d --no-deps --no-recreate postgres-2 graph-node-2 indexer-agent-2 indexer-service-2 tap-agent-2 [... all suffixes ...] ``` `--no-deps` prevents compose from walking the dependency tree and bouncing shared services. `--no-recreate` prevents touching already-running containers. -### 5. Verify health +### 5. Verify container health Indexer-services share a `flock`-serialized cargo build, so they come up sequentially. The first service to start builds the binary (~2-3 minutes if not cached); subsequent services acquire the lock, find the binary already built, and start immediately. Wait 30 seconds after `up -d` completes, then check status: ```bash -docker ps --format '{{.Names}}\t{{.Status}}' | grep -E '(indexer-agent|indexer-service|tap-agent)-[0-9]' | sort +docker ps --format '{{.Names}}\t{{.Status}}' | grep -E '(indexer-agent|indexer-service)-[0-9]' | sort ``` All agents and services should show `(healthy)`. If a service is still `(health: starting)`, it may be waiting for the cargo build lock -- wait another 60 seconds and recheck. -If an agent is stuck retrying (check `docker logs indexer-agent-N 2>&1 | tail -5`), the retry loop will show attempt counts. Common causes: `start-indexing-extra` hasn't completed yet (check `docker logs start-indexing-extra`), or a wrong address in JUNK_ACCOUNTS. +### 6. Wait for network subgraph to index URL registrations -### 6. Report +After agents start, they call `subgraphService.register(url, geo)` on-chain. The network subgraph must index these events before IISA or dipper can see the new indexers. Poll until all indexers have URLs: -Show a summary of all running indexers (primary + extras) with their container names, addresses, and health status. +```bash +curl -s -X POST -H "Content-Type: application/json" \ + -d '{"query":"{ indexers(where: { url_not: \"\" }) { id } }"}' \ + http://localhost:8000/subgraphs/name/graph-network \ + | python3 -c "import json,sys; print(len(json.load(sys.stdin)['data']['indexers']))" +``` + +This should return `TOTAL_EXPECTED` (1 primary + N extras). If it's lower, the subgraph is still catching up -- wait 10 seconds and recheck. Typically takes 30-90 seconds after agents register. + +### 7. Trigger IISA score refresh + +The IISA cronjob exposes `POST /run` on port 9090 for manual scoring runs. Without triggering it, IISA won't see the new indexers until the next scheduled cycle (default 120s). + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose \ + -f docker-compose.yaml \ + -f compose/dev/dips.yaml \ + -f compose/extra-indexers.yaml \ + exec iisa-cronjob curl -s -X POST http://localhost:9090/run +``` + +Then verify scores were written for the expected number of indexers: + +```bash +DOCKER_DEFAULT_PLATFORM= docker compose \ + -f docker-compose.yaml \ + -f compose/dev/dips.yaml \ + -f compose/extra-indexers.yaml \ + logs iisa-cronjob --since 30s 2>&1 | grep -E "Wrote|indexers" +``` + +### 8. Report + +Show a summary including: +- All running indexers (primary + extras) with container names, addresses, and health status +- Number of indexers visible in the network subgraph (with URLs) +- Number of indexers scored by IISA +- Confirmation that the pipeline is ready for `/send-indexing-request` ## Constraints @@ -101,7 +138,8 @@ Show a summary of all running indexers (primary + extras) with their container n - Always use all three compose files: `-f docker-compose.yaml -f compose/dev/dips.yaml -f compose/extra-indexers.yaml` - Never use `--force-recreate` when adding indexers to a running stack - The generator script is at `scripts/gen-extra-indexers.py` -- The `start-indexing-extra` container handles on-chain GRT staking and operator authorization +- The `start-indexing-extra` container handles on-chain GRT staking, operator authorization, and PaymentsEscrow deposits - Agents poll for on-chain staking automatically (up to 450s), so `start-indexing-extra` can run in parallel with container startup - Agents retry automatically (30 attempts, 10s delay) -- don't manually restart unless the error is persistent and non-transient - If COMPOSE_FILE in .environment doesn't include `compose/extra-indexers.yaml`, warn the user to add it +- The `/fresh-deploy` skill must include `compose/extra-indexers.yaml` in its `down -v` command, otherwise extra indexer postgres volumes survive and agents have stale state on the next deploy diff --git a/.claude/skills/fresh-deploy/SKILL.md b/.claude/skills/fresh-deploy/SKILL.md index 85d4ad8..ae13109 100644 --- a/.claude/skills/fresh-deploy/SKILL.md +++ b/.claude/skills/fresh-deploy/SKILL.md @@ -9,25 +9,25 @@ Reset the local-network Docker Compose environment to a clean state and bring al ## Prerequisites -The contracts repo at `$CONTRACTS_SOURCE_ROOT` (typically `/Users/samuel/Documents/github/contracts`) must be on `indexing-payments-management-audit` (PR #1301) with three local commits applied on top: +The contracts repo at `$CONTRACTS_SOURCE_ROOT` (typically `/Users/samuel/Documents/github/contracts`) must be on `fix/horizon-staking-ignition-dependency` (or `mde/dips-ignition-deployment` + BUG-007 fix). This branch has `IndexingAgreementManager`, RecurringCollector in toolshed/ignition natively, and the HorizonStaking deployment ordering fix. -1. Cherry-pick `02b6996e` from `escrow-management` -- adds RecurringCollector Ignition module, wires it into SubgraphService deployment, and links external libraries -2. Cherry-pick `d2a0d30e` from `escrow-management` -- adds `RecurringCollector` to `GraphHorizonContractNameList` in toolshed so it gets written to horizon.json -3. Local fix for BUG-007 -- adds `{ after: [GraphPeripheryModule, HorizonProxiesModule] }` to the `deployImplementation` call in `packages/horizon/ignition/modules/core/HorizonStaking.ts` +After checking out the branch, the toolshed package must be compiled: `cd packages/toolshed && pnpm build:self`. -After applying these, the toolshed package must be compiled: `cd packages/toolshed && pnpm build:self`. - -To verify the local commits are present, check: `cd $CONTRACTS_SOURCE_ROOT && git log --oneline -5`. The top 3 commits should be the fix and two cherry-picks. +To verify: `cd $CONTRACTS_SOURCE_ROOT && git log --oneline -3` should show the HorizonStaking fix on top of the mde branch. ## Steps ### 1. Tear down everything including volumes +Build the compose file list dynamically to include extra-indexers if present. This is critical -- omitting `compose/extra-indexers.yaml` leaves extra indexer containers and their postgres volumes alive, causing stale state on the next deploy (agents think they're registered on the old chain). + ```bash -DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml down -v +COMPOSE_FILES="-f docker-compose.yaml -f compose/dev/dips.yaml" +[ -f compose/extra-indexers.yaml ] && COMPOSE_FILES="$COMPOSE_FILES -f compose/extra-indexers.yaml" +DOCKER_DEFAULT_PLATFORM= docker compose $COMPOSE_FILES down -v ``` -This destroys all data: chain state, postgres, subgraph deployments, config volume with contract addresses. +This destroys all data: chain state, postgres (including extra indexer postgres volumes), subgraph deployments, config volume with contract addresses. ### 2. Clear stale Ignition journals @@ -41,6 +41,8 @@ This is safe after a `down -v` since the chain state it references no longer exi ### 3. Bring everything up +Use only the base compose files for the initial deploy. Extra indexers are added separately via the `/add-indexers` skill after the core stack is healthy. + ```bash DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/dips.yaml up -d --build ``` @@ -58,7 +60,7 @@ DOCKER_DEFAULT_PLATFORM= docker compose -f docker-compose.yaml -f compose/dev/di jq '.["1337"].RecurringCollector' /opt/config/horizon.json ``` -If this returns null, the contracts toolshed wasn't rebuilt after cherry-picking the whitelist fix. Run `cd $CONTRACTS_SOURCE_ROOT/packages/toolshed && pnpm build:self` and repeat from step 1. +If this returns null, the contracts toolshed wasn't rebuilt. Run `cd $CONTRACTS_SOURCE_ROOT/packages/toolshed && pnpm build:self` and repeat from step 1. ### 5. Fix nonce race failures @@ -136,6 +138,7 @@ The authorization chain that makes gateway queries work: - **ACCOUNT0 nonce race**: `start-indexing` and `tap-escrow-manager` both use ACCOUNT0 concurrently after `graph-contracts` finishes. Either can fail with "nonce too low". If `start-indexing` fails, `dipper` and `ready` never start (cascade). The fix is to restart the failed container and run `up -d` again. - **Stale Ignition journals**: After a failed `graph-contracts` deployment, the journal at `packages/subgraph-service/ignition/deployments/chain-1337/` contains partial state. A fresh `down -v` destroys the chain but not the journal (it's in the mounted source). Always delete it before retrying (step 2). - The contracts toolshed must be compiled (JS, not just TS) for the RecurringCollector whitelist to take effect. Use `pnpm build:self` in `packages/toolshed` (not `pnpm build` which fails on the `interfaces` package). +- **Extra indexer stale state**: If `compose/extra-indexers.yaml` is not included in the `down -v` command, extra indexer containers and their postgres volumes survive the teardown. On the next deploy, agents have stale state from the old chain -- they believe they're already registered and never re-register URLs on the new chain. The network subgraph then shows `url: null` for these indexers and IISA can't select them. ## Key contract addresses (change each deploy) diff --git a/BUGS.md b/BUGS.md index 1e5c2a5..91677e6 100644 --- a/BUGS.md +++ b/BUGS.md @@ -88,4 +88,24 @@ **Repo**: `subgraph-dips-indexer-selection` **Fix**: Two-layer approach applied locally: (1) The cronjob now calls `POST /refresh` on the IISA API after writing scores (`IISA_API_URL` env var, warns at startup if unset). (2) The API now runs a background task that checks the scores file mtime every `IISA_SCORES_RELOAD_INTERVAL` seconds (default 120) and reloads when it changes. The cronjob provides immediate freshness; the periodic reload is a fallback if the refresh call fails. +**PR**: https://github.com/edgeandnode/subgraph-dips-indexer-selection/pull/75 + +## BUG-010: Dipper topology excludes indexers without allocations + +**Symptom**: Dipper logs `"IISA selected indexer not found in network topology, skipping"` for every idle indexer. IISA selects 3 candidates from 10, all 10 pass the price filter, but dipper skips all 3 because they have no active allocations. + +**Root cause**: Dipper's network topology is built exclusively from subgraph allocation data (`indexerAllocations`). An indexer only enters the topology map when it appears in allocation data. Idle indexers (registered with stake, URL, and operators but no allocations) are invisible. This is a chicken-and-egg problem: DIPs is supposed to create allocations, but dipper can't propose to indexers without existing allocations. + +**Repo**: `dipper` +**Fix**: Extended the `indexer_operators` fetcher to also return the URL field, and changed its `Extend` impl to create indexer entries (`.or_insert_with()`) instead of only modifying existing ones (`.and_modify()`). Now all registered indexers with a valid URL appear in the topology regardless of allocation status. +**PR**: not submitted + +## BUG-011: Extra indexers rejected with SIGNER_NOT_AUTHORISED due to missing escrow accounts + +**Symptom**: After fixing BUG-010, dipper sends proposals to idle indexers but all are rejected with `SIGNER_NOT_AUTHORISED`. + +**Root cause**: The indexer-service's DIPs signer validator reuses the TAP `EscrowSignerValidator`, which queries the network subgraph for `paymentsEscrowAccounts` filtered by receiver (indexer address). The `tap-escrow-manager` only deposits GRT into PaymentsEscrow for the primary indexer. Extra indexers have no escrow accounts, so the query returns empty and all signers are rejected -- even though the signer authorization (on GraphTallyCollector) exists at the payer level. + +**Repo**: `local-network` +**Fix**: Added escrow deposits (GRT approve + `PaymentsEscrow.deposit(collector, receiver, amount)`) for each extra indexer in the `start-indexing-extra` init container generated by `scripts/gen-extra-indexers.py`. In production, the `IndexingAgreementManager` contract (on the `mde/dips-ignition-deployment` branch) handles this automatically when `offerAgreement()` is called. **PR**: not submitted \ No newline at end of file diff --git a/scripts/gen-extra-indexers.py b/scripts/gen-extra-indexers.py index ee4cf75..a8864b5 100755 --- a/scripts/gen-extra-indexers.py +++ b/scripts/gen-extra-indexers.py @@ -310,7 +310,24 @@ def registration_block(n: int, address: str, secret: str, operator_mnemonic: str """ -def init_indexers_service(registrations: str) -> str: +def escrow_deposit_block(n: int, address: str) -> str: + return f"""\ + # Escrow deposit for extra indexer {n} + BALANCE=$$(cast call --rpc-url="$$RPC" "$$ESCROW" \\ + 'getBalance(address,address,address)(uint256)' \\ + "$$PAYER" "$$COLLECTOR" "{address}") + if [ "$$BALANCE" != "0" ]; then + echo " Escrow for {address}: already funded ($$BALANCE)" + else + echo " Depositing escrow for {address}" + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --mnemonic="$$MNEMONIC" \\ + "$$TOKEN" 'approve(address,uint256)' "$$ESCROW" "$$DEPOSIT_AMOUNT" + retry_cast cast send --rpc-url="$$RPC" --confirmations=1 --mnemonic="$$MNEMONIC" \\ + "$$ESCROW" 'deposit(address,address,uint256)' "$$COLLECTOR" "{address}" "$$DEPOSIT_AMOUNT" + fi""" + + +def init_indexers_service(registrations: str, escrow_deposits: str) -> str: return f"""\ start-indexing-extra: container_name: start-indexing-extra @@ -341,6 +358,18 @@ def init_indexers_service(registrations: str) -> str: {registrations} echo "All extra indexers registered" + + # Deposit GRT into PaymentsEscrow for each extra indexer. + # The indexer-service validates DIPs proposal signers via the network + # subgraph's paymentsEscrowAccounts (filtered by receiver). Without a + # deposit, the query returns empty and all signers are rejected. + ESCROW=$$(contract_addr PaymentsEscrow.address horizon) + COLLECTOR=$$(contract_addr GraphTallyCollector.address horizon) + PAYER="$${{ACCOUNT0_ADDRESS}}" + DEPOSIT_AMOUNT="2000000000000000000" # 2 GRT per indexer + +{escrow_deposits} + echo "All escrow deposits complete" """ @@ -355,6 +384,7 @@ def generate(count: int) -> str: parts = [] reg_blocks = [] + deposit_blocks = [] volume_names = [] for i in range(count): @@ -369,8 +399,9 @@ def generate(count: int) -> str: parts.append(service_service(n, address, secret, op_mnemonic)) parts.append(tap_service(n, address, secret, op_mnemonic)) reg_blocks.append(registration_block(n, address, secret, op_mnemonic)) + deposit_blocks.append(escrow_deposit_block(n, address)) - parts.append(init_indexers_service("\n".join(reg_blocks))) + parts.append(init_indexers_service("\n".join(reg_blocks), "\n".join(deposit_blocks))) header = """\ # Auto-generated by scripts/gen-extra-indexers.py -- do not edit manually