Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 55 additions & 12 deletions src/bench/logging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,17 @@
#include <logging.h>
#include <test/util/setup_common.h>

// All but 2 of the benchmarks should have roughly similar performance:
//
// LogPrintWithoutCategory should be ~3 orders of magnitude faster, as nothing is logged.
//
// LogWithoutWriteToFile should be ~2 orders of magnitude faster, as it avoids disk writes.

static void Logging(benchmark::Bench& bench, const std::vector<const char*>& extra_args, const std::function<void()>& log)
{
// Reset any enabled logging categories from a previous benchmark run.
LogInstance().DisableCategory(BCLog::LogFlags::ALL);

TestingSetup test_setup{
CBaseChainParams::REGTEST,
extra_args,
Expand All @@ -17,32 +25,67 @@ static void Logging(benchmark::Bench& bench, const std::vector<const char*>& ext
bench.run([&] { log(); });
}

static void LoggingYoThreadNames(benchmark::Bench& bench)
static void LogPrintLevelWithThreadNames(benchmark::Bench& bench)
{
Logging(bench, {"-logthreadnames=1"}, [] { LogPrintf("%s\n", "test"); });
Logging(bench, {"-logthreadnames=1", "-debug=net"}, [] {
LogPrintLevel(BCLog::NET, BCLog::Level::Error, "%s\n", "test"); });
}
static void LoggingNoThreadNames(benchmark::Bench& bench)

static void LogPrintLevelWithoutThreadNames(benchmark::Bench& bench)
{
Logging(bench, {"-logthreadnames=0"}, [] { LogPrintf("%s\n", "test"); });
Logging(bench, {"-logthreadnames=0", "-debug=net"}, [] {
LogPrintLevel(BCLog::NET, BCLog::Level::Error, "%s\n", "test"); });
}
static void LoggingYoCategory(benchmark::Bench& bench)

static void LogPrintWithCategory(benchmark::Bench& bench)
{
Logging(bench, {"-logthreadnames=0", "-debug=net"}, [] { LogPrint(BCLog::NET, "%s\n", "test"); });
}
static void LoggingNoCategory(benchmark::Bench& bench)

static void LogPrintWithoutCategory(benchmark::Bench& bench)
{
Logging(bench, {"-logthreadnames=0", "-debug=0"}, [] { LogPrint(BCLog::NET, "%s\n", "test"); });
}
static void LoggingNoFile(benchmark::Bench& bench)

static void LogPrintfCategoryWithThreadNames(benchmark::Bench& bench)
{
Logging(bench, {"-logthreadnames=1", "-debug=net"}, [] {
LogPrintfCategory(BCLog::NET, "%s\n", "test");
});
}

static void LogPrintfCategoryWithoutThreadNames(benchmark::Bench& bench)
{
Logging(bench, {"-logthreadnames=0", "-debug=net"}, [] {
LogPrintfCategory(BCLog::NET, "%s\n", "test");
});
}

static void LogPrintfWithThreadNames(benchmark::Bench& bench)
{
Logging(bench, {"-logthreadnames=1"}, [] { LogPrintf("%s\n", "test"); });
}

static void LogPrintfWithoutThreadNames(benchmark::Bench& bench)
{
Logging(bench, {"-logthreadnames=0"}, [] { LogPrintf("%s\n", "test"); });
}

static void LogWithoutWriteToFile(benchmark::Bench& bench)
{
// Disable writing the log to a file, as used for unit tests and fuzzing in `MakeNoLogFileContext`.
Logging(bench, {"-nodebuglogfile", "-debug=1"}, [] {
LogPrintf("%s\n", "test");
LogPrint(BCLog::NET, "%s\n", "test");
});
}

BENCHMARK(LoggingYoThreadNames, benchmark::PriorityLevel::HIGH);
BENCHMARK(LoggingNoThreadNames, benchmark::PriorityLevel::HIGH);
BENCHMARK(LoggingYoCategory, benchmark::PriorityLevel::HIGH);
BENCHMARK(LoggingNoCategory, benchmark::PriorityLevel::HIGH);
BENCHMARK(LoggingNoFile, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogPrintLevelWithThreadNames, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogPrintLevelWithoutThreadNames, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogPrintWithCategory, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogPrintWithoutCategory, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogPrintfCategoryWithThreadNames, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogPrintfCategoryWithoutThreadNames, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogPrintfWithThreadNames, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogPrintfWithoutThreadNames, benchmark::PriorityLevel::HIGH);
BENCHMARK(LogWithoutWriteToFile, benchmark::PriorityLevel::HIGH);
2 changes: 1 addition & 1 deletion src/init/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ bool SanityChecks()

void AddLoggingArgs(ArgsManager& argsman)
{
argsman.AddArg("-debuglogfile=<file>", strprintf("Specify location of debug log file. Relative paths will be prefixed by a net-specific datadir location. (-nodebuglogfile to disable; default: %s)", DEFAULT_DEBUGLOGFILE), ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
argsman.AddArg("-debuglogfile=<file>", strprintf("Specify location of debug log file (default: %s). Relative paths will be prefixed by a net-specific datadir location. Pass -nodebuglogfile to disable writing the log to a file.", DEFAULT_DEBUGLOGFILE), ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
argsman.AddArg("-debug=<category>", "Output debug and trace logging (default: -nodebug, supplying <category> is optional). "
"If <category> is not supplied or if <category> = 1, output all debug and trace logging. <category> can be: " + LogInstance().LogCategoriesString() + ". This option can be specified multiple times to output multiple categories.",
ArgsManager::ALLOW_ANY, OptionsCategory::DEBUG_TEST);
Expand Down
2 changes: 1 addition & 1 deletion test/functional/feature_governance_cl.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def run_test(self):

def governance_tip_updated(node):
with open(node.debug_log_path, encoding='utf-8') as dl:
seek_pos = node.debug_log_bytes() - 100 * 1024 # read the last 100 KiB only
seek_pos = node.debug_log_size(mode="rb") - 100 * 1024 # read the last 100 KiB only
dl.seek(seek_pos if seek_pos > 0 else 0)
debug_log_part = dl.read()
Comment on lines 153 to 156
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify the byte-offset/text-seek mismatch in the changed segment.
rg -n -C2 'open\(node\.debug_log_path, encoding=.utf-8.|debug_log_size\(mode="rb"\)|dl\.seek\(' test/functional/feature_governance_cl.py

Repository: dashpay/dash

Length of output: 1772


🌐 Web query:

Dash dash feature_governance_cl.py debug_log_path UTF-8 text mode seek

💡 Result:

In Dash Core’s functional test framework, debug_log_path is the per-node logfile at:

  • chain_path / "debug.log" (i.e., <datadir>/<chain>/debug.log). In code it’s exposed as the debug_log_path property on TestNode. [1]

The problem with UTF-8 + text mode + seek is in how the test framework tracks “where it last read up to” in debug.log:

  • debug_log_bytes() opens the log in text mode (open(..., encoding="utf-8")), does seek(0, 2) and returns tell(). [1]
  • assert_debug_log() later re-opens the file in text mode and does dl.seek(prev_size) where prev_size came from that earlier tell(). [1]

On some platforms (notably Windows), text-mode seeks are not byte-offset seeks (they use an opaque cookie tied to newline/decoder state), so using these offsets as if they were stable byte positions can lead to “text mode seek” errors or incorrect positioning.

Fix (robust approach): do offset tracking in binary mode, decode after reading, e.g.:

  • compute size with Path.stat().st_size or open(..., "rb")/tell()
  • read new data with open(..., "rb"), seek(prev_size), read(), then .decode("utf-8", errors="replace")

This avoids text-mode seek() semantics entirely while still letting you search log text for expected messages. [1]

Source(s)
[1] Dash Core test_node.py implementation of debug_log_path, debug_log_bytes(), and assert_debug_log(). (github.com)

Citations:


Avoid mixing binary byte offsets with text-mode seek().

Line 154 computes a byte offset using debug_log_size(mode="rb"), but lines 153–155 open and seek the file in text mode with encoding='utf-8'. This mismatch causes invalid seek positions for UTF-8 and can produce flaky reads, especially on Windows where text-mode seeks use opaque decoder state.

Open the file in binary mode and decode explicitly after reading:

Proposed fix
-        def governance_tip_updated(node):
-            with open(node.debug_log_path, encoding='utf-8') as dl:
-                seek_pos = node.debug_log_size(mode="rb") - 100 * 1024  # read the last 100 KiB only
-                dl.seek(seek_pos if seek_pos > 0 else 0)
-                debug_log_part = dl.read()
-            return expected_msg in debug_log_part
+        def governance_tip_updated(node):
+            with open(node.debug_log_path, "rb") as dl:
+                seek_pos = node.debug_log_size(mode="rb") - 100 * 1024  # read the last 100 KiB only
+                dl.seek(seek_pos if seek_pos > 0 else 0)
+                debug_log_part = dl.read().decode("utf-8", errors="replace")
+            return expected_msg in debug_log_part
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
with open(node.debug_log_path, encoding='utf-8') as dl:
seek_pos = node.debug_log_bytes() - 100 * 1024 # read the last 100 KiB only
seek_pos = node.debug_log_size(mode="rb") - 100 * 1024 # read the last 100 KiB only
dl.seek(seek_pos if seek_pos > 0 else 0)
debug_log_part = dl.read()
with open(node.debug_log_path, "rb") as dl:
seek_pos = node.debug_log_size(mode="rb") - 100 * 1024 # read the last 100 KiB only
dl.seek(seek_pos if seek_pos > 0 else 0)
debug_log_part = dl.read().decode("utf-8", errors="replace")
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/functional/feature_governance_cl.py` around lines 153 - 156, The code
opens the debug log in text mode but computes a byte offset via
node.debug_log_size(mode="rb"), causing invalid seeks; change to open the file
in binary mode (use 'rb') when getting dl and seeking by the byte offset, read
the bytes into a variable (e.g., debug_log_bytes) and then decode to UTF-8
(handling errors='replace' or similar) into debug_log_part; keep the same seek
logic (seek_pos if > 0 else 0) and use node.debug_log_path and
node.debug_log_size as before.

Comment on lines 153 to 156
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Blocking: Governance log polling seeks to a byte offset on a UTF-8 text stream

governance_tip_updated() opens debug.log in text mode with UTF-8 decoding, but computes seek_pos with node.debug_log_size(mode="rb"), which returns a byte offset. In Python text I/O, arbitrary byte offsets are not valid seek positions unless they come from tell() on the same text stream. This can misalign the read or raise UnicodeDecodeError when the offset lands inside a multibyte UTF-8 sequence. The surrounding test framework already documents this constraint in TestNode.assert_debug_log() and TestNode.wait_for_debug_log() by using matching open modes for debug_log_size() and the later open(...)/seek().

💡 Suggested change
Suggested change
with open(node.debug_log_path, encoding='utf-8') as dl:
seek_pos = node.debug_log_bytes() - 100 * 1024 # read the last 100 KiB only
seek_pos = node.debug_log_size(mode="rb") - 100 * 1024 # read the last 100 KiB only
dl.seek(seek_pos if seek_pos > 0 else 0)
debug_log_part = dl.read()
with open(node.debug_log_path, "rb") as dl:
seek_pos = node.debug_log_size(mode="rb") - 100 * 1024
dl.seek(seek_pos if seek_pos > 0 else 0)
debug_log_part = dl.read().decode("utf-8", errors="replace")

source: ['codex-general']

🤖 Fix this with AI agents
These findings are from an automated code review. Verify each finding against the current code and only fix it if needed.

In `test/functional/feature_governance_cl.py`:
- [BLOCKING] lines 153-156: Governance log polling seeks to a byte offset on a UTF-8 text stream
  `governance_tip_updated()` opens `debug.log` in text mode with UTF-8 decoding, but computes `seek_pos` with `node.debug_log_size(mode="rb")`, which returns a byte offset. In Python text I/O, arbitrary byte offsets are not valid seek positions unless they come from `tell()` on the same text stream. This can misalign the read or raise `UnicodeDecodeError` when the offset lands inside a multibyte UTF-8 sequence. The surrounding test framework already documents this constraint in `TestNode.assert_debug_log()` and `TestNode.wait_for_debug_log()` by using matching open modes for `debug_log_size()` and the later `open(...)/seek()`.

return expected_msg in debug_log_part
Expand Down
10 changes: 5 additions & 5 deletions test/functional/test_framework/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,8 +453,8 @@ def chain_path(self) -> Path:
def debug_log_path(self) -> Path:
return self.chain_path / 'debug.log'

def debug_log_bytes(self) -> int:
with open(self.debug_log_path, encoding='utf-8') as dl:
def debug_log_size(self, **kwargs) -> int:
with open(self.debug_log_path, **kwargs) as dl:
dl.seek(0, 2)
return dl.tell()

Expand All @@ -466,13 +466,13 @@ def assert_debug_log(self, expected_msgs, unexpected_msgs=None, timeout=2):
assert_equal(type(unexpected_msgs), list)

time_end = time.time() + timeout * self.timeout_factor
prev_size = self.debug_log_bytes()
prev_size = self.debug_log_size(encoding="utf-8") # Must use same encoding that is used to read() below

yield

while True:
found = True
with open(self.debug_log_path, encoding='utf-8') as dl:
with open(self.debug_log_path, encoding="utf-8", errors="replace") as dl:
dl.seek(prev_size)
log = dl.read()
print_log = " - " + "\n - ".join(log.splitlines())
Expand All @@ -497,7 +497,7 @@ def wait_for_debug_log(self, expected_msgs, timeout=60):
the number of log lines we encountered when matching
"""
time_end = time.time() + timeout * self.timeout_factor
prev_size = self.debug_log_bytes()
prev_size = self.debug_log_size(mode="rb") # Must use same mode that is used to read() below

yield

Expand Down
2 changes: 1 addition & 1 deletion test/lint/lint-python-utf8-encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def check_fileopens():
if e.returncode > 1:
raise e

filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]", fileopen)]
filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, (\*\*kwargs|['\"][^'\"]*b[^'\"]*['\"])", fileopen)]

return filtered_fileopens

Expand Down
Loading