From 3879c1a66f8c235b3eb251aa1f2581fd4b6c20b2 Mon Sep 17 00:00:00 2001 From: Korel Date: Thu, 11 Jun 2026 10:10:50 +0000 Subject: [PATCH] fix(collector): add value_type: double to ccp_stat_checkpointer_sync_time Both pg_stat_checkpointer.write_time and .sync_time are double precision (milliseconds). write_time already had value_type: double; sync_time was missing it. The OTel sqlquery receiver defaults value_type to int, so once cumulative sync_time grows large enough for the driver to render it in scientific notation (e.g. 2.774625e+06), strconv.Atoi fails and the collector logs an "Error scraping metrics" error on every collection interval (every 5s by default). This affects both PG-version variants (gte_pg17 and lt_pg17). The generated JSON artifacts are regenerated to match. Issue: CrunchyData/postgres-operator#4514 --- internal/collector/generated/gte_pg17_fast_metrics.json | 2 +- internal/collector/generated/lt_pg17_fast_metrics.json | 2 +- internal/collector/gte_pg17_fast_metrics.yaml | 1 + internal/collector/lt_pg17_fast_metrics.yaml | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/collector/generated/gte_pg17_fast_metrics.json b/internal/collector/generated/gte_pg17_fast_metrics.json index 9553e8c756..0bf18ca7e7 100644 --- a/internal/collector/generated/gte_pg17_fast_metrics.json +++ b/internal/collector/generated/gte_pg17_fast_metrics.json @@ -1 +1 @@ -[{"metrics":[{"data_type":"sum","description":"Number of write operations by background writers","metric_name":"ccp_stat_io_bgwriter_writes","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls by background writers","metric_name":"ccp_stat_io_bgwriter_fsyncs","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_requested","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , COALESCE(s.conflicting::int, 0)\n , COALESCE(s.failover::int, 0)\n , COALESCE(s.synced::int, 0)\nFROM pg_catalog.pg_replication_slots s;\n"}] +[{"metrics":[{"data_type":"sum","description":"Number of write operations by background writers","metric_name":"ccp_stat_io_bgwriter_writes","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls by background writers","metric_name":"ccp_stat_io_bgwriter_fsyncs","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_requested","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time","value_type":"double"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , COALESCE(s.conflicting::int, 0)\n , COALESCE(s.failover::int, 0)\n , COALESCE(s.synced::int, 0)\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/generated/lt_pg17_fast_metrics.json b/internal/collector/generated/lt_pg17_fast_metrics.json index 55b6ca78fc..ecc41e1cd3 100644 --- a/internal/collector/generated/lt_pg17_fast_metrics.json +++ b/internal/collector/generated/lt_pg17_fast_metrics.json @@ -1 +1 @@ -[{"metrics":[{"data_type":"sum","description":"Number of write operations by background writers","metric_name":"ccp_stat_io_bgwriter_writes","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls by background writers","metric_name":"ccp_stat_io_bgwriter_fsyncs","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.buffers_backend AS writes\n , s.buffers_backend_fsync AS fsyncs\nFROM pg_catalog.pg_stat_bgwriter s;\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_requested","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.checkpoints_timed AS num_timed\n , c.checkpoints_req AS num_requested\n , c.checkpoint_write_time AS write_time\n , c.checkpoint_sync_time AS sync_time\n , c.buffers_checkpoint AS buffers_written\nFROM pg_catalog.pg_stat_bgwriter c;\n"}] +[{"metrics":[{"data_type":"sum","description":"Number of write operations by background writers","metric_name":"ccp_stat_io_bgwriter_writes","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls by background writers","metric_name":"ccp_stat_io_bgwriter_fsyncs","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.buffers_backend AS writes\n , s.buffers_backend_fsync AS fsyncs\nFROM pg_catalog.pg_stat_bgwriter s;\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_requested","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time","value_type":"double"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.checkpoints_timed AS num_timed\n , c.checkpoints_req AS num_requested\n , c.checkpoint_write_time AS write_time\n , c.checkpoint_sync_time AS sync_time\n , c.buffers_checkpoint AS buffers_written\nFROM pg_catalog.pg_stat_bgwriter c;\n"}] diff --git a/internal/collector/gte_pg17_fast_metrics.yaml b/internal/collector/gte_pg17_fast_metrics.yaml index a590b48272..5d9257aa09 100644 --- a/internal/collector/gte_pg17_fast_metrics.yaml +++ b/internal/collector/gte_pg17_fast_metrics.yaml @@ -51,6 +51,7 @@ server: "localhost:5432" - metric_name: ccp_stat_checkpointer_sync_time value_column: sync_time + value_type: double description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds static_attributes: server: "localhost:5432" diff --git a/internal/collector/lt_pg17_fast_metrics.yaml b/internal/collector/lt_pg17_fast_metrics.yaml index 576ea8e4a6..cb8be435db 100644 --- a/internal/collector/lt_pg17_fast_metrics.yaml +++ b/internal/collector/lt_pg17_fast_metrics.yaml @@ -50,6 +50,7 @@ server: "localhost:5432" - metric_name: ccp_stat_checkpointer_sync_time value_column: sync_time + value_type: double description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds static_attributes: server: "localhost:5432"