From 0ae8c1afd6ac9643cfeeb1cb60502fdcdb836ab3 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Tue, 3 Feb 2026 21:10:23 +0100 Subject: [PATCH] src: consolidate C++ ReadFileSync/WriteFileSync utilities This patch moves `ReadFileSync` and `WriteFileSync` from `src/util.cc` to `src/node_file_utils.cc`, consolidates the implementation to reuse code, and adds a few more enhancements: For `ReadFileSync`: - Use fstat-based pre-allocation to minimize buffer resizing and repeated reads for bigger files. - Handle various potential overflows in size conversions. - Handle fallback for 0-byte special files. For `WriteFileSync`: - Handle potential partial writes for big enough files and support non-seekable files (with -1 as offset). - Handle 0-byte writes correctly. In both cases, this now avoids hard aborts and return error code for the caller to handle as much as possible, except `std::vector ReadFileSync(FILE* fp)` which is part of the embedder API. This patch uses the new `ReadFileSync` to address a TODO in node_sea.bin.cc. --- node.gyp | 1 + src/node_file_utils.cc | 255 +++++++++++++++++++++++++++++++++++++++++ src/node_file_utils.h | 39 +++++++ src/node_internals.h | 6 +- src/node_sea_bin.cc | 7 +- src/util.cc | 90 --------------- src/util.h | 6 - 7 files changed, 298 insertions(+), 106 deletions(-) create mode 100644 src/node_file_utils.cc create mode 100644 src/node_file_utils.h diff --git a/node.gyp b/node.gyp index 83351d1d82627e..232d46727ae2ca 100644 --- a/node.gyp +++ b/node.gyp @@ -124,6 +124,7 @@ 'src/node_errors.cc', 'src/node_external_reference.cc', 'src/node_file.cc', + 'src/node_file_utils.cc', 'src/node_http_parser.cc', 'src/node_http2.cc', 'src/node_i18n.cc', diff --git a/src/node_file_utils.cc b/src/node_file_utils.cc new file mode 100644 index 00000000000000..d9bdb413aa44c0 --- /dev/null +++ b/src/node_file_utils.cc @@ -0,0 +1,255 @@ +#include "node_file_utils.h" + +#include +#include +#include +#include +#include +#include + +#include "util-inl.h" + +#ifdef _WIN32 +#include // _S_IREAD _S_IWRITE +#ifndef S_IRUSR +#define S_IRUSR _S_IREAD +#endif // S_IRUSR +#ifndef S_IWUSR +#define S_IWUSR _S_IWRITE +#endif // S_IWUSR +#endif + +namespace node { + +int WriteFileSync(const char* path, uv_buf_t buf) { + return WriteFileSync(path, &buf, 1); +} + +constexpr int64_t kCurrentFileOffset = -1; +int WriteFileSync(const char* path, uv_buf_t* bufs, size_t buf_count) { + uv_fs_t req; + int fd = uv_fs_open(nullptr, + &req, + path, + O_WRONLY | O_CREAT | O_TRUNC, + S_IWUSR | S_IRUSR, + nullptr); + uv_fs_req_cleanup(&req); + if (fd < 0) { + return fd; + } + + // Handle potential partial writes by looping until all data is written. + std::vector iovs(bufs, bufs + buf_count); + size_t idx = 0; + + while (idx < iovs.size()) { + // Skip empty buffers. + while (idx < iovs.size() && iovs[idx].len == 0) { + idx++; + } + if (idx >= iovs.size()) { // No non-empty buffers left. + break; + } + + uv_fs_write(nullptr, + &req, + fd, + iovs.data() + idx, + iovs.size() - idx, + kCurrentFileOffset, + nullptr); + if (req.result < 0) { // Error during write. + int err = req.result; + uv_fs_req_cleanup(&req); + uv_fs_close(nullptr, &req, fd, nullptr); + uv_fs_req_cleanup(&req); + return err; + } + if (req.result == 0) { // Should not happen unless the file system is full. + uv_fs_req_cleanup(&req); + uv_fs_close(nullptr, &req, fd, nullptr); + uv_fs_req_cleanup(&req); + return UV_EIO; + } + size_t written = req.result; + uv_fs_req_cleanup(&req); + + // Consume written bytes from buffers. + while (written > 0 && idx < iovs.size()) { + if (written >= iovs[idx].len) { + written -= iovs[idx].len; + idx++; + } else { + iovs[idx].base += written; + iovs[idx].len -= written; + written = 0; + } + } + } + + int err = uv_fs_close(nullptr, &req, fd, nullptr); + uv_fs_req_cleanup(&req); + return err; +} + +int WriteFileSync(v8::Isolate* isolate, + const char* path, + v8::Local string) { + node::Utf8Value utf8(isolate, string); + uv_buf_t buf = uv_buf_init(utf8.out(), utf8.length()); + return WriteFileSync(path, buf); +} + +// Default size used if fstat reports a file size of 0 for special files. +static constexpr size_t kDefaultReadSize = 8192; + +// The resize_buffer callback is called with the file size after fstat, and must +// return a pointer to a buffer of at least that size. If the file grows during +// reading, resize_buffer may be called again with a larger size; the callback +// must preserve existing content and release old storage if needed. +// After reading completes, resize_buffer may be called with the actual bytes +// read. +template +int ReadFileSyncImpl(const char* path, ResizeBuffer resize_buffer) { + uv_fs_t req; + + uv_file file = uv_fs_open(nullptr, &req, path, O_RDONLY, 0, nullptr); + if (req.result < 0) { + int err = req.result; + uv_fs_req_cleanup(&req); + return err; + } + uv_fs_req_cleanup(&req); + + // Get the file size first, which should be cheap enough on an already opened + // files, and saves us from repeated reallocations/reads. + int err = uv_fs_fstat(nullptr, &req, file, nullptr); + if (err < 0) { + uv_fs_req_cleanup(&req); + uv_fs_close(nullptr, &req, file, nullptr); + uv_fs_req_cleanup(&req); + return err; + } + // SIZE_MAX is ~18 exabytes on 64-bit and ~4 GB on 32-bit systems. + // In both cases, the process is unlikely to have that much memory + // to hold the file content, so we just error with UV_EFBIG. + if (req.statbuf.st_size > static_cast(SIZE_MAX)) { + uv_fs_req_cleanup(&req); + uv_fs_close(nullptr, &req, file, nullptr); + uv_fs_req_cleanup(&req); + return UV_EFBIG; + } + size_t size = static_cast(req.statbuf.st_size); + uv_fs_req_cleanup(&req); + + // If the file is reported as 0 bytes for special files, use a default + // size to start reading. + if (size == 0) { + size = kDefaultReadSize; + } + + char* buffer = resize_buffer(size); + if (buffer == nullptr) { + uv_fs_close(nullptr, &req, file, nullptr); + uv_fs_req_cleanup(&req); + return UV_ENOMEM; + } + size_t total_read = 0; + while (true) { + size_t remaining = size - total_read; + // On Windows, uv_buf_t uses ULONG which may truncate the + // length for large buffers. Limit the individual read request size to + // INT_MAX to be safe. The loop will issue multiple reads for larger files. + if (remaining > INT_MAX) { + remaining = INT_MAX; + } + uv_buf_t buf = uv_buf_init(buffer + total_read, remaining); + uv_fs_read( + nullptr, &req, file, &buf, 1 /* nbufs */, kCurrentFileOffset, nullptr); + ssize_t bytes_read = req.result; + uv_fs_req_cleanup(&req); + if (bytes_read < 0) { + uv_fs_close(nullptr, &req, file, nullptr); + uv_fs_req_cleanup(&req); + return bytes_read; + } + if (bytes_read == 0) { + // EOF, stop reading. + break; + } + total_read += bytes_read; + if (total_read == size) { + // Buffer is full, the file may have grown during reading. + // Try increasing the buffer size and reading more. + if (size == SIZE_MAX) { + uv_fs_close(nullptr, &req, file, nullptr); + uv_fs_req_cleanup(&req); + return UV_EFBIG; + } + if (size > SIZE_MAX / 2) { + size = SIZE_MAX; + } else { + size *= 2; + } + buffer = resize_buffer(size); + if (buffer == nullptr) { + uv_fs_close(nullptr, &req, file, nullptr); + uv_fs_req_cleanup(&req); + return UV_ENOMEM; + } + } + } + + int close_err = uv_fs_close(nullptr, &req, file, nullptr); + uv_fs_req_cleanup(&req); + if (close_err < 0) { + return close_err; + } + + // Truncate the actual size read if necessary. + if (total_read != size) { + buffer = resize_buffer(total_read); + if (buffer == nullptr && total_read != 0) { + return UV_ENOMEM; + } + } + return 0; +} + +int ReadFileSync(const char* path, std::string* result) { + return ReadFileSyncImpl(path, [result](size_t size) { + result->resize(size); + return result->data(); + }); +} + +// Legacy interface. TODO(joyeecheung): update the callers to pass path first, +// output parameters second. +int ReadFileSync(std::string* result, const char* path) { + return ReadFileSync(path, result); +} + +int ReadFileSync(const char* path, std::vector* result) { + return ReadFileSyncImpl(path, [result](size_t size) { + result->resize(size); + return reinterpret_cast(result->data()); + }); +} + +std::vector ReadFileSync(FILE* fp) { + CHECK_EQ(ftell(fp), 0); + int err = fseek(fp, 0, SEEK_END); + CHECK_EQ(err, 0); + size_t size = ftell(fp); + CHECK_NE(size, static_cast(-1L)); + err = fseek(fp, 0, SEEK_SET); + CHECK_EQ(err, 0); + + std::vector contents(size); + size_t num_read = fread(contents.data(), size, 1, fp); + CHECK_EQ(num_read, 1); + return contents; +} + +} // namespace node diff --git a/src/node_file_utils.h b/src/node_file_utils.h new file mode 100644 index 00000000000000..d998c59a7015cf --- /dev/null +++ b/src/node_file_utils.h @@ -0,0 +1,39 @@ +#ifndef SRC_NODE_FILE_UTILS_H_ +#define SRC_NODE_FILE_UTILS_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include +#include +#include +#include + +#include "uv.h" +#include "v8.h" + +namespace node { + +// Synchronously writes to a file. If the file exists, it is replaced +// (truncated). +int WriteFileSync(const char* path, uv_buf_t buf); +int WriteFileSync(const char* path, uv_buf_t* bufs, size_t buf_count); +int WriteFileSync(v8::Isolate* isolate, + const char* path, + v8::Local string); + +// Synchronously reads the entire contents of a file. +int ReadFileSync(const char* path, std::string* result); +int ReadFileSync(const char* path, std::vector* result); + +// Legacy interface. TODO(joyeecheung): update the callers to pass path first, +// output parameters second. +int ReadFileSync(std::string* result, const char* path); + +// This is currently only used by embedder APIs that take a FILE*. +std::vector ReadFileSync(FILE* fp); + +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_FILE_UTILS_H_ diff --git a/src/node_internals.h b/src/node_internals.h index 19c153018c6e6d..dbe36868600682 100644 --- a/src/node_internals.h +++ b/src/node_internals.h @@ -27,6 +27,7 @@ #include "env.h" #include "node.h" #include "node_binding.h" +#include "node_file_utils.h" #include "node_mutex.h" #include "tracing/trace_event.h" #include "util.h" @@ -411,11 +412,6 @@ typedef struct tm TIME_TYPE; #endif double GetCurrentTimeInMicroseconds(); -int WriteFileSync(const char* path, uv_buf_t* bufs, size_t buf_count); -int WriteFileSync(const char* path, uv_buf_t buf); -int WriteFileSync(v8::Isolate* isolate, - const char* path, - v8::Local string); class DiagnosticFilename { public: diff --git a/src/node_sea_bin.cc b/src/node_sea_bin.cc index dc9c9e614f8fca..3e209990a40b86 100644 --- a/src/node_sea_bin.cc +++ b/src/node_sea_bin.cc @@ -405,8 +405,8 @@ ExitCode BuildSingleExecutable(const std::string& sea_config_path, int src_mode = static_cast(req.statbuf.st_mode); uv_fs_req_cleanup(&req); - std::string exe; - r = ReadFileSync(&exe, config.executable_path.c_str()); + std::vector exe_data; + r = ReadFileSync(config.executable_path.c_str(), &exe_data); if (r != 0) { FPrintF(stderr, "Error: Couldn't read executable %s: %s\n", @@ -415,9 +415,6 @@ ExitCode BuildSingleExecutable(const std::string& sea_config_path, return ExitCode::kGenericUserError; } - // TODO(joyeecheung): add a variant of ReadFileSync that reads into - // vector directly and avoid this copy. - std::vector exe_data(exe.begin(), exe.end()); std::vector sea_blob; ExitCode code = GenerateSingleExecutableBlob(&sea_blob, config, args, exec_args); diff --git a/src/util.cc b/src/util.cc index c4b39450c5b7f9..1ea51cf7012963 100644 --- a/src/util.cc +++ b/src/util.cc @@ -225,96 +225,6 @@ double GetCurrentTimeInMicroseconds() { return kMicrosecondsPerSecond * tv.tv_sec + tv.tv_usec; } -int WriteFileSync(const char* path, uv_buf_t buf) { - return WriteFileSync(path, &buf, 1); -} - -int WriteFileSync(const char* path, uv_buf_t* bufs, size_t buf_count) { - uv_fs_t req; - int fd = uv_fs_open(nullptr, - &req, - path, - O_WRONLY | O_CREAT | O_TRUNC, - S_IWUSR | S_IRUSR, - nullptr); - uv_fs_req_cleanup(&req); - if (fd < 0) { - return fd; - } - - int err = uv_fs_write(nullptr, &req, fd, bufs, buf_count, 0, nullptr); - uv_fs_req_cleanup(&req); - if (err < 0) { - return err; - } - - err = uv_fs_close(nullptr, &req, fd, nullptr); - uv_fs_req_cleanup(&req); - return err; -} - -int WriteFileSync(v8::Isolate* isolate, - const char* path, - v8::Local string) { - node::Utf8Value utf8(isolate, string); - uv_buf_t buf = uv_buf_init(utf8.out(), utf8.length()); - return WriteFileSync(path, buf); -} - -int ReadFileSync(std::string* result, const char* path) { - uv_fs_t req; - auto defer_req_cleanup = OnScopeLeave([&req]() { - uv_fs_req_cleanup(&req); - }); - - uv_file file = uv_fs_open(nullptr, &req, path, O_RDONLY, 0, nullptr); - if (req.result < 0) { - // req will be cleaned up by scope leave. - return req.result; - } - uv_fs_req_cleanup(&req); - - auto defer_close = OnScopeLeave([file]() { - uv_fs_t close_req; - CHECK_EQ(0, uv_fs_close(nullptr, &close_req, file, nullptr)); - uv_fs_req_cleanup(&close_req); - }); - - *result = std::string(""); - char buffer[4096]; - uv_buf_t buf = uv_buf_init(buffer, sizeof(buffer)); - - while (true) { - const int r = - uv_fs_read(nullptr, &req, file, &buf, 1, result->length(), nullptr); - if (req.result < 0) { - // req will be cleaned up by scope leave. - return req.result; - } - uv_fs_req_cleanup(&req); - if (r <= 0) { - break; - } - result->append(buf.base, r); - } - return 0; -} - -std::vector ReadFileSync(FILE* fp) { - CHECK_EQ(ftell(fp), 0); - int err = fseek(fp, 0, SEEK_END); - CHECK_EQ(err, 0); - size_t size = ftell(fp); - CHECK_NE(size, static_cast(-1L)); - err = fseek(fp, 0, SEEK_SET); - CHECK_EQ(err, 0); - - std::vector contents(size); - size_t num_read = fread(contents.data(), size, 1, fp); - CHECK_EQ(num_read, 1); - return contents; -} - void DiagnosticFilename::LocalTime(TIME_TYPE* tm_struct) { #ifdef _WIN32 GetLocalTime(tm_struct); diff --git a/src/util.h b/src/util.h index 2482f82fe21a9e..873a204f9cd794 100644 --- a/src/util.h +++ b/src/util.h @@ -859,12 +859,6 @@ std::unique_ptr static_unique_pointer_cast(std::unique_ptr&& ptr) { #define MAYBE_FIELD_PTR(ptr, field) ptr == nullptr ? nullptr : &(ptr->field) -// Returns a non-zero code if it fails to open or read the file, -// aborts if it fails to close the file. -int ReadFileSync(std::string* result, const char* path); -// Reads all contents of a FILE*, aborts if it fails. -std::vector ReadFileSync(FILE* fp); - v8::Local NewFunctionTemplate( v8::Isolate* isolate, v8::FunctionCallback callback,