diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 02b3318c9..4f59377ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,8 @@ jobs: cxx: g++ type: static shell: sh - options: -DSOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL:BOOL=ON + options: -DSOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL:BOOL=ON -DSOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL:BOOL=ON + apt: libcurl4-openssl-dev benchmark: linux/gcc - os: ubuntu-latest cc: clang @@ -85,6 +86,9 @@ jobs: env: HOMEBREW_NO_ANALYTICS: 1 HOMEBREW_NO_AUTO_UPDATE: 1 + - name: Install dependencies (Linux) + if: runner.os == 'Linux' && matrix.platform.apt + run: sudo apt-get update && sudo apt-get install --yes ${{ matrix.platform.apt }} - run: cmake --version - name: Configure (static) @@ -93,6 +97,7 @@ jobs: cmake -S . -B ./build -DCMAKE_BUILD_TYPE:STRING=Release -DSOURCEMETA_CORE_TESTS:BOOL=ON + -DSOURCEMETA_CORE_TESTS_CI:BOOL=ON -DSOURCEMETA_CORE_BENCHMARK:BOOL=ON -DSOURCEMETA_CORE_DOCS:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=OFF @@ -104,6 +109,7 @@ jobs: cmake -S . -B ./build -DCMAKE_BUILD_TYPE:STRING=Release -DSOURCEMETA_CORE_TESTS:BOOL=ON + -DSOURCEMETA_CORE_TESTS_CI:BOOL=ON -DSOURCEMETA_CORE_BENCHMARK:BOOL=ON -DSOURCEMETA_CORE_DOCS:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=ON diff --git a/CMakeLists.txt b/CMakeLists.txt index 512e0d3e1..158c5bcfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ option(SOURCEMETA_CORE_YAML "Build the Sourcemeta Core YAML library" ON) option(SOURCEMETA_CORE_JSONRPC "Build the Sourcemeta Core JSON-RPC library" ON) option(SOURCEMETA_CORE_MCP "Build the Sourcemeta Core MCP library" ON) option(SOURCEMETA_CORE_HTTP "Build the Sourcemeta Core HTTP library" ON) +option(SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL "Use system cURL for the Sourcemeta Core HTTP library" OFF) option(SOURCEMETA_CORE_JOSE "Build the Sourcemeta Core JOSE library" ON) option(SOURCEMETA_CORE_SEMVER "Build the Sourcemeta Core SemVer library" ON) option(SOURCEMETA_CORE_GZIP "Build the Sourcemeta Core GZIP library" ON) @@ -38,6 +39,7 @@ option(SOURCEMETA_CORE_HTML "Build the Sourcemeta Core HTML library" ON) option(SOURCEMETA_CORE_CSS "Build the Sourcemeta Core CSS library" ON) option(SOURCEMETA_CORE_MARKDOWN "Build the Sourcemeta Core Markdown library" ON) option(SOURCEMETA_CORE_TESTS "Build the Sourcemeta Core tests" OFF) +option(SOURCEMETA_CORE_TESTS_CI "Build the Sourcemeta Core CI tests" OFF) option(SOURCEMETA_CORE_BENCHMARK "Build the Sourcemeta Core benchmarks" OFF) option(SOURCEMETA_CORE_DOCS "Build the Sourcemeta Core docs" OFF) option(SOURCEMETA_CORE_INSTALL "Install the Sourcemeta Core library" ON) @@ -230,7 +232,7 @@ endif() # Testing -if(SOURCEMETA_CORE_CONTRIB_GOOGLETEST OR SOURCEMETA_CORE_TESTS) +if(SOURCEMETA_CORE_CONTRIB_GOOGLETEST OR SOURCEMETA_CORE_TESTS OR SOURCEMETA_CORE_TESTS_CI) find_package(GoogleTest REQUIRED) endif() @@ -380,6 +382,14 @@ if(SOURCEMETA_CORE_TESTS) endif() endif() +if(SOURCEMETA_CORE_TESTS_CI) + enable_testing() + + if(SOURCEMETA_CORE_HTTP) + add_subdirectory(test/http/ci) + endif() +endif() + if(SOURCEMETA_CORE_BENCHMARK) add_subdirectory(benchmark) endif() diff --git a/cmake/common/compiler/options.cmake b/cmake/common/compiler/options.cmake index 150799252..20c05eaa9 100644 --- a/cmake/common/compiler/options.cmake +++ b/cmake/common/compiler/options.cmake @@ -116,6 +116,10 @@ function(sourcemeta_add_default_options visibility target) -Wno-exit-time-destructors -Wrange-loop-analysis + # Manage Objective-C and Objective-C++ object lifetimes with Automatic + # Reference Counting + $<$,$>:-fobjc-arc> + # Enable loop vectorization for performance reasons $<$>:-fvectorize> # Enable vectorization of straight-line code for performance diff --git a/cmake/common/variables.cmake b/cmake/common/variables.cmake index ee6359cb0..c7d6adbb0 100644 --- a/cmake/common/variables.cmake +++ b/cmake/common/variables.cmake @@ -1,3 +1,10 @@ +# Objective-C++ powers the Apple-specific backends and must be enabled before +# we capture the project languages below, so its standard and visibility +# defaults get applied +if(APPLE) + enable_language(OBJCXX) +endif() + # Get the list of languages defined in the project get_property(SOURCEMETA_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) diff --git a/config.cmake.in b/config.cmake.in index 408a1e233..16d87fed7 100644 --- a/config.cmake.in +++ b/config.cmake.in @@ -158,6 +158,9 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonrpc.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_mcp.cmake") elseif(component STREQUAL "http") + if(@SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL@) + find_dependency(CURL) + endif() include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") diff --git a/src/core/http/CMakeLists.txt b/src/core/http/CMakeLists.txt index ddc5d7d11..e81a3b9c8 100644 --- a/src/core/http/CMakeLists.txt +++ b/src/core/http/CMakeLists.txt @@ -1,8 +1,19 @@ +if(SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL) + set(SOURCEMETA_CORE_HTTP_CLIENT_SOURCE client_curl.cc) +elseif(APPLE) + set(SOURCEMETA_CORE_HTTP_CLIENT_SOURCE client_darwin.mm) +elseif(WIN32 AND NOT CMAKE_SYSTEM_NAME STREQUAL "MSYS") + set(SOURCEMETA_CORE_HTTP_CLIENT_SOURCE client_windows.cc) +else() + set(SOURCEMETA_CORE_HTTP_CLIENT_SOURCE client_curl.cc) +endif() + sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME http - PRIVATE_HEADERS problem.h status.h method.h message.h error.h + PRIVATE_HEADERS problem.h status.h method.h message.h error.h system.h SOURCES helpers.h problem.cc match_accept.cc match_accept_language.cc negotiate_encoding.cc from_date.cc format_link.cc field_list.cc - accept_includes_all.cc content_type_matches.cc parse_bearer.cc) + accept_includes_all.cc content_type_matches.cc parse_bearer.cc + ${SOURCEMETA_CORE_HTTP_CLIENT_SOURCE}) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME http) @@ -11,3 +22,17 @@ endif() target_link_libraries(sourcemeta_core_http PUBLIC sourcemeta::core::json) target_link_libraries(sourcemeta_core_http PUBLIC sourcemeta::core::text) target_link_libraries(sourcemeta_core_http PRIVATE sourcemeta::core::time) + +if(SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL) + find_package(CURL REQUIRED) + target_compile_definitions(sourcemeta_core_http + PRIVATE SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL) + target_link_libraries(sourcemeta_core_http PRIVATE CURL::libcurl) +elseif(APPLE) + target_link_libraries(sourcemeta_core_http PRIVATE "-framework Foundation") +elseif(WIN32 AND NOT CMAKE_SYSTEM_NAME STREQUAL "MSYS") + target_link_libraries(sourcemeta_core_http PRIVATE winhttp) + target_link_libraries(sourcemeta_core_http PRIVATE sourcemeta::core::unicode) +else() + target_link_libraries(sourcemeta_core_http PRIVATE ${CMAKE_DL_LIBS}) +endif() diff --git a/src/core/http/client_curl.cc b/src/core/http/client_curl.cc new file mode 100644 index 000000000..0b7c89d94 --- /dev/null +++ b/src/core/http/client_curl.cc @@ -0,0 +1,410 @@ +#include + +#ifdef SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL +#include // curl_easy_*, curl_slist_*, curl_global_init, CURLOPT_* +#endif + +#include // std::size_t +#include // std::uint16_t +#include // std::optional +#include // std::string +#include // std::string_view + +#ifndef SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL +#include // dlopen, dlsym, dlerror, RTLD_NOW + +#include // std::array +#include // std::getenv +#include // std::memcpy +#include // std::vector + +// When cURL is not linked at build time, we load it at runtime via dlopen +// and therefore need no curl headers. The following reproduces the small +// subset of libcurl's C API this backend uses. Every type, prototype, and +// option id is part of libcurl's frozen ABI (SONAME libcurl.so.4, stable +// since 2006), so these values never change. The prototypes are never +// called directly (we invoke them through dlsym'd pointers) nor linked; +// they exist only so the shared CurlApi table can derive their types +extern "C" { + +using CURL = void; +using CURLcode = int; +using CURLoption = int; +using CURLINFO = int; +using curl_off_t = long long; + +struct curl_slist; + +auto curl_global_init(long flags) -> CURLcode; +auto curl_easy_init() -> CURL *; +auto curl_easy_cleanup(CURL *handle) -> void; +auto curl_easy_setopt(CURL *handle, CURLoption option, ...) -> CURLcode; +auto curl_easy_perform(CURL *handle) -> CURLcode; +auto curl_easy_getinfo(CURL *handle, CURLINFO info, ...) -> CURLcode; +auto curl_easy_strerror(CURLcode code) -> const char *; +auto curl_slist_append(curl_slist *list, const char *value) -> curl_slist *; +auto curl_slist_free_all(curl_slist *list) -> void; + +} // extern "C" + +// Option ids are a type-class base plus an index in libcurl's headers; the +// resolved values are reproduced here (see the trailing comments) +constexpr CURLcode CURLE_OK{0}; +constexpr long CURL_GLOBAL_ALL{3}; // SSL(1<<0) | WIN32(1<<1) +constexpr CURLoption CURLOPT_URL{10002}; // STRINGPOINT + 2 +constexpr CURLoption CURLOPT_FOLLOWLOCATION{52}; // LONG + 52 +constexpr CURLoption CURLOPT_MAXREDIRS{68}; // LONG + 68 +constexpr CURLoption CURLOPT_NOSIGNAL{99}; // LONG + 99 +constexpr CURLoption CURLOPT_ACCEPT_ENCODING{10102}; // STRINGPOINT + 102 +constexpr CURLoption CURLOPT_TIMEOUT_MS{155}; // LONG + 155 +constexpr CURLoption CURLOPT_CONNECTTIMEOUT_MS{156}; // LONG + 156 +constexpr CURLoption CURLOPT_WRITEFUNCTION{20011}; // FUNCTIONPOINT + 11 +constexpr CURLoption CURLOPT_WRITEDATA{10001}; // CBPOINT + 1 +constexpr CURLoption CURLOPT_HEADERFUNCTION{20079}; // FUNCTIONPOINT + 79 +constexpr CURLoption CURLOPT_HEADERDATA{10029}; // CBPOINT + 29 +constexpr CURLoption CURLOPT_POSTFIELDSIZE_LARGE{30120}; // OFF_T + 120 +constexpr CURLoption CURLOPT_POSTFIELDS{10015}; // OBJECTPOINT + 15 +constexpr CURLoption CURLOPT_HTTPHEADER{10023}; // SLISTPOINT + 23 +constexpr CURLoption CURLOPT_NOBODY{44}; // LONG + 44 +constexpr CURLoption CURLOPT_CUSTOMREQUEST{10036}; // STRINGPOINT + 36 +constexpr CURLINFO CURLINFO_RESPONSE_CODE{2097154}; // CURLINFO_LONG(0x200000)+2 +constexpr CURLINFO CURLINFO_EFFECTIVE_URL{ + 1048577}; // CURLINFO_STRING(0x100000)+1 +#endif + +namespace { + +constexpr std::string_view HTTP_RESPONSE_TOO_LARGE_MESSAGE{ + "The response exceeds the maximum allowed size"}; + +// The subset of the libcurl C API this backend relies on, captured as +// function pointers so the request logic is shared between the link-time +// backend (SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL) and the default +// runtime-loaded (dlopen) backend. Member types are derived from the curl +// headers, so they stay in sync with the real prototypes +struct CurlApi { + decltype(&curl_global_init) global_init; + decltype(&curl_easy_init) easy_init; + decltype(&curl_easy_cleanup) easy_cleanup; + decltype(&curl_easy_setopt) easy_setopt; + decltype(&curl_easy_perform) easy_perform; + decltype(&curl_easy_getinfo) easy_getinfo; + decltype(&curl_easy_strerror) easy_strerror; + decltype(&curl_slist_append) slist_append; + decltype(&curl_slist_free_all) slist_free_all; +}; + +class CurlHandle { +public: + explicit CurlHandle(const CurlApi &api) + : api_{api}, handle_{api.easy_init()} {} + ~CurlHandle() { + if (this->handle_) { + this->api_.easy_cleanup(this->handle_); + } + } + + CurlHandle(const CurlHandle &) = delete; + auto operator=(const CurlHandle &) -> CurlHandle & = delete; + CurlHandle(CurlHandle &&) = delete; + auto operator=(CurlHandle &&) -> CurlHandle & = delete; + + [[nodiscard]] auto get() const -> CURL * { return this->handle_; } + explicit operator bool() const { return this->handle_ != nullptr; } + +private: + const CurlApi &api_; + CURL *handle_; +}; + +class CurlHeaderList { +public: + explicit CurlHeaderList(const CurlApi &api) : api_{api} {} + ~CurlHeaderList() { + if (this->list_) { + this->api_.slist_free_all(this->list_); + } + } + + CurlHeaderList(const CurlHeaderList &) = delete; + auto operator=(const CurlHeaderList &) -> CurlHeaderList & = delete; + CurlHeaderList(CurlHeaderList &&) = delete; + auto operator=(CurlHeaderList &&) -> CurlHeaderList & = delete; + + auto append(const std::string &line) -> void { + auto *result{this->api_.slist_append(this->list_, line.c_str())}; + if (result) { + this->list_ = result; + } + } + + [[nodiscard]] auto get() const -> curl_slist * { return this->list_; } + +private: + const CurlApi &api_; + curl_slist *list_{nullptr}; +}; + +struct BodyContext { + std::string *output; + std::optional maximum_size; + bool maximum_size_exceeded{false}; +}; + +auto body_callback(char *data, std::size_t size, std::size_t count, + void *user_data) -> std::size_t { + auto *context{static_cast(user_data)}; + const std::size_t chunk{size * count}; + if (context->maximum_size.has_value() && + (context->output->size() > context->maximum_size.value() || + chunk > context->maximum_size.value() - context->output->size())) { + context->maximum_size_exceeded = true; + // Returning a smaller count than given aborts the transfer + return 0; + } + + context->output->append(data, chunk); + return chunk; +} + +auto header_callback(char *data, std::size_t size, std::size_t count, + void *output) -> std::size_t { + sourcemeta::core::http_accumulate_header_line( + *static_cast(output), + std::string_view{data, size * count}); + return size * count; +} + +#ifndef SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL + +using sourcemeta::core::HTTPSystemBackendError; + +constexpr std::string_view CURL_LIBRARY_ENV{"SOURCEMETA_CORE_CURL_SO"}; + +// Tried in order. Every entry carries the `.so.4` SONAME so we only ever +// bind an ABI-compatible cURL (never the unversioned `libcurl.so` dev +// symlink, which could point at a different major version). The bare +// soname is first because it resolves through the dynamic linker (ld.so +// cache on glibc, default /lib:/usr/lib on musl) and is present on every +// mainstream distribution. The absolute entries are fallbacks for +// environments where the cache is absent (custom prefixes, ldconfig not +// run). The trailing GnuTLS entry is an ABI-compatible last resort for +// minimal Debian and Ubuntu systems that ship only that build +constexpr std::array CURL_CANDIDATE_PATHS{ + {"libcurl.so.4", "/usr/lib/x86_64-linux-gnu/libcurl.so.4", + "/usr/lib/aarch64-linux-gnu/libcurl.so.4", + "/usr/lib/arm-linux-gnueabihf/libcurl.so.4", + "/usr/lib/i386-linux-gnu/libcurl.so.4", "/usr/lib64/libcurl.so.4", + "/lib64/libcurl.so.4", "/usr/lib/libcurl.so.4", + "/usr/local/lib/libcurl.so.4", "libcurl-gnutls.so.4"}}; + +struct ResolvedLibrary { + void *handle; + std::string path; +}; + +template +auto resolve_symbol(const ResolvedLibrary &library, const char *name) + -> Signature { + // Clear any stale error, then distinguish a null symbol from a null value + dlerror(); + void *symbol{dlsym(library.handle, name)}; + if (dlerror() != nullptr) { + throw HTTPSystemBackendError{ + "The cURL library was loaded but does not provide the expected API", + std::string{CURL_LIBRARY_ENV}, + {library.path}}; + } + + // Copy the pointer representation instead of reinterpret_cast, which the + // standard only conditionally supports for object-to-function conversions. + // POSIX guarantees dlsym results are convertible to function pointers + Signature function{}; + std::memcpy(&function, &symbol, sizeof(function)); + return function; +} + +auto open_library() -> ResolvedLibrary { + if (const auto *configured_path{std::getenv(CURL_LIBRARY_ENV.data())}; + configured_path != nullptr && configured_path[0] != '\0') { + if (auto *handle{dlopen(configured_path, RTLD_NOW)}; handle != nullptr) { + return {handle, configured_path}; + } + + throw HTTPSystemBackendError{ + "Could not load the cURL library from the configured path", + std::string{CURL_LIBRARY_ENV}, + {std::string{configured_path}}}; + } + + for (const auto candidate : CURL_CANDIDATE_PATHS) { + if (auto *handle{dlopen(candidate.data(), RTLD_NOW)}; handle != nullptr) { + return {handle, std::string{candidate}}; + } + } + + std::vector searched; + searched.reserve(CURL_CANDIDATE_PATHS.size()); + for (const auto candidate : CURL_CANDIDATE_PATHS) { + searched.emplace_back(candidate); + } + + throw HTTPSystemBackendError{ + "Could not find the system cURL library (libcurl)", + std::string{CURL_LIBRARY_ENV}, std::move(searched)}; +} + +auto load_curl() -> const CurlApi & { + // The handle is intentionally never dlclose()d: the function pointers + // must remain valid for the lifetime of the process + static const ResolvedLibrary library{open_library()}; + static const CurlApi api{ + .global_init = resolve_symbol( + library, "curl_global_init"), + .easy_init = resolve_symbol( + library, "curl_easy_init"), + .easy_cleanup = resolve_symbol( + library, "curl_easy_cleanup"), + .easy_setopt = resolve_symbol( + library, "curl_easy_setopt"), + .easy_perform = resolve_symbol( + library, "curl_easy_perform"), + .easy_getinfo = resolve_symbol( + library, "curl_easy_getinfo"), + .easy_strerror = resolve_symbol( + library, "curl_easy_strerror"), + .slist_append = resolve_symbol( + library, "curl_slist_append"), + .slist_free_all = resolve_symbol( + library, "curl_slist_free_all")}; + return api; +} + +#endif + +auto acquire_api() -> const CurlApi & { +#ifdef SOURCEMETA_CORE_HTTP_USE_SYSTEM_CURL + static const CurlApi api{.global_init = &curl_global_init, + .easy_init = &curl_easy_init, + .easy_cleanup = &curl_easy_cleanup, + .easy_setopt = &curl_easy_setopt, + .easy_perform = &curl_easy_perform, + .easy_getinfo = &curl_easy_getinfo, + .easy_strerror = &curl_easy_strerror, + .slist_append = &curl_slist_append, + .slist_free_all = &curl_slist_free_all}; + return api; +#else + return load_curl(); +#endif +} + +} // namespace + +namespace sourcemeta::core { + +auto HTTPSystemRequest::send() const -> HTTPResponse { + const CurlApi &api{acquire_api()}; + + static const CURLcode global_initialization{api.global_init(CURL_GLOBAL_ALL)}; + if (global_initialization != CURLE_OK) { + throw HTTPError{this->method_, this->url_, + api.easy_strerror(global_initialization)}; + } + + const CurlHandle handle{api}; + if (!handle) { + throw HTTPError{this->method_, this->url_, + "Failed to initialise the HTTP client"}; + } + + HTTPResponse response; + api.easy_setopt(handle.get(), CURLOPT_URL, this->url_.c_str()); + api.easy_setopt(handle.get(), CURLOPT_FOLLOWLOCATION, + this->follow_redirects_ ? 1L : 0L); + if (this->follow_redirects_) { + api.easy_setopt(handle.get(), CURLOPT_MAXREDIRS, + static_cast(this->maximum_redirects_)); + } + + api.easy_setopt(handle.get(), CURLOPT_NOSIGNAL, 1L); + api.easy_setopt(handle.get(), CURLOPT_TIMEOUT_MS, + static_cast(this->timeout_.count())); + if (this->connect_timeout_.has_value()) { + api.easy_setopt(handle.get(), CURLOPT_CONNECTTIMEOUT_MS, + static_cast(this->connect_timeout_.value().count())); + } + + // Advertise and transparently decode all supported content encodings, + // matching what the NSURLSession and WinHTTP backends do + api.easy_setopt(handle.get(), CURLOPT_ACCEPT_ENCODING, ""); + + std::string raw_headers; + BodyContext body_context{.output = &response.body, + .maximum_size = this->maximum_response_size_}; + api.easy_setopt(handle.get(), CURLOPT_WRITEFUNCTION, body_callback); + api.easy_setopt(handle.get(), CURLOPT_WRITEDATA, &body_context); + api.easy_setopt(handle.get(), CURLOPT_HEADERFUNCTION, header_callback); + api.easy_setopt(handle.get(), CURLOPT_HEADERDATA, &raw_headers); + + CurlHeaderList header_list{api}; + for (const auto &[name, value] : this->headers_) { + std::string line{name}; + // The semicolon form is how cURL distinguishes a header with an + // empty value from a header to suppress + if (value.empty()) { + line += ";"; + } else { + line += ": "; + line += value; + } + + header_list.append(line); + } + + if (this->body_.has_value()) { + std::string content_type_line{"Content-Type: "}; + content_type_line += this->body_.value().content_type; + header_list.append(content_type_line); + api.easy_setopt(handle.get(), CURLOPT_POSTFIELDSIZE_LARGE, + static_cast(this->body_.value().data.size())); + api.easy_setopt(handle.get(), CURLOPT_POSTFIELDS, + this->body_.value().data.data()); + } + + if (header_list.get()) { + api.easy_setopt(handle.get(), CURLOPT_HTTPHEADER, header_list.get()); + } + + const std::string method{http_method_string(this->method_)}; + if (this->method_ == HTTPMethod::HEAD) { + api.easy_setopt(handle.get(), CURLOPT_NOBODY, 1L); + } else if (this->method_ != HTTPMethod::GET || this->body_.has_value()) { + api.easy_setopt(handle.get(), CURLOPT_CUSTOMREQUEST, method.c_str()); + } + + const auto code{api.easy_perform(handle.get())}; + if (code != CURLE_OK) { + if (body_context.maximum_size_exceeded) { + throw HTTPError{this->method_, this->url_, + std::string{HTTP_RESPONSE_TOO_LARGE_MESSAGE}}; + } + + throw HTTPError{this->method_, this->url_, api.easy_strerror(code)}; + } + + long status_code{0}; + api.easy_getinfo(handle.get(), CURLINFO_RESPONSE_CODE, &status_code); + char *effective_url{nullptr}; + api.easy_getinfo(handle.get(), CURLINFO_EFFECTIVE_URL, &effective_url); + if (effective_url != nullptr) { + response.url.assign(effective_url); + } + + http_parse_headers(raw_headers, response.headers); + response.status = + http_status_from_code(static_cast(status_code)); + return response; +} + +} // namespace sourcemeta::core diff --git a/src/core/http/client_darwin.mm b/src/core/http/client_darwin.mm new file mode 100644 index 000000000..84d4b731c --- /dev/null +++ b/src/core/http/client_darwin.mm @@ -0,0 +1,194 @@ +#include +#include + +// NSURL, NSMutableURLRequest, NSURLSession, NSHTTPURLResponse, dispatch_* +#import + +#include // std::size_t +#include // std::uint16_t +#include // std::string +#include // std::string_view +#include // std::move + +namespace { + +constexpr std::string_view HTTP_RESPONSE_TOO_LARGE_MESSAGE{ + "The response exceeds the maximum allowed size"}; + +auto to_nsstring(const std::string_view input) -> NSString * { + return [[NSString alloc] initWithBytes:input.data() + length:input.size() + encoding:NSUTF8StringEncoding]; +} + +} // namespace + +// The delegate-based API streams the response body in chunks, allowing +// the maximum response size to be enforced without first buffering the +// entire response in memory +@interface SourcemetaCoreHTTPDelegate : NSObject +@property(nonatomic, assign) sourcemeta::core::HTTPResponse *response; +@property(nonatomic, assign) std::string *failure; +@property(nonatomic, strong) dispatch_semaphore_t semaphore; +@property(nonatomic, assign) BOOL hasMaximumResponseSize; +@property(nonatomic, assign) std::size_t maximumResponseSize; +@property(nonatomic, assign) BOOL followRedirects; +@property(nonatomic, assign) std::size_t maximumRedirects; +@property(nonatomic, assign) std::size_t redirectCount; +@end + +@implementation SourcemetaCoreHTTPDelegate + +- (void)URLSession:(NSURLSession *)session + task:(NSURLSessionTask *)task + willPerformHTTPRedirection:(NSHTTPURLResponse *)response + newRequest:(NSURLRequest *)request + completionHandler: + (void (^)(NSURLRequest *))completionHandler { + // Passing a nil request stops the redirection and delivers the redirect + // response itself as the final response + if (!self.followRedirects) { + completionHandler(nil); + return; + } + + self.redirectCount += 1; + if (self.redirectCount > self.maximumRedirects) { + self.failure->assign("The maximum number of redirects was exceeded"); + [task cancel]; + completionHandler(nil); + return; + } + + completionHandler(request); +} + +- (void)URLSession:(NSURLSession *)session + dataTask:(NSURLSessionDataTask *)dataTask + didReceiveData:(NSData *)data { + auto *body{&self.response->body}; + if (self.hasMaximumResponseSize && + (body->size() > self.maximumResponseSize || + static_cast(data.length) > + self.maximumResponseSize - body->size())) { + self.failure->assign(HTTP_RESPONSE_TOO_LARGE_MESSAGE); + [dataTask cancel]; + return; + } + + [data enumerateByteRangesUsingBlock:^(const void *bytes, NSRange range, + BOOL *) { + body->append(static_cast(bytes), range.length); + }]; +} + +- (void)URLSession:(NSURLSession *)session + task:(NSURLSessionTask *)task + didCompleteWithError:(NSError *)error { + // A failure recorded while streaming, such as exceeding the maximum + // response size, takes precedence over the resulting cancellation error + if (self.failure->empty()) { + if (error != nil) { + self.failure->assign([error.localizedDescription UTF8String]); + } else if (![task.response isKindOfClass:[NSHTTPURLResponse class]]) { + self.failure->assign("The response is not an HTTP response"); + } else { + const auto *http_response{(NSHTTPURLResponse *)task.response}; + self.response->status = sourcemeta::core::http_status_from_code( + static_cast(http_response.statusCode)); + if (http_response.URL != nil) { + self.response->url.assign([http_response.URL.absoluteString UTF8String]); + } + + auto *headers{&self.response->headers}; + [http_response.allHeaderFields + enumerateKeysAndObjectsUsingBlock:^(NSString *name, NSString *value, + BOOL *) { + std::string header_name{[name UTF8String]}; + sourcemeta::core::to_lowercase(header_name); + headers->emplace_back(std::move(header_name), [value UTF8String]); + }]; + } + } + + dispatch_semaphore_signal(self.semaphore); +} + +@end + +namespace sourcemeta::core { + +auto HTTPSystemRequest::send() const -> HTTPResponse { + HTTPResponse response; + // The delegate runs on a background queue, where throwing would + // terminate the process, so failures are recorded here and thrown + // from the calling thread once the request settles + std::string failure; + + @autoreleasepool { + NSURL *target{[NSURL URLWithString:to_nsstring(this->url_)]}; + if (target == nil) { + failure = "Invalid URL"; + } else { + NSMutableURLRequest *url_request{ + [NSMutableURLRequest requestWithURL:target]}; + url_request.HTTPMethod = to_nsstring(http_method_string(this->method_)); + for (const auto &[name, value] : this->headers_) { + // Repeated headers are folded into a single comma-separated field + // line, which is semantically equivalent per RFC 9110 + [url_request addValue:to_nsstring(value) + forHTTPHeaderField:to_nsstring(name)]; + } + + if (this->body_.has_value()) { + [url_request setValue:to_nsstring(this->body_.value().content_type) + forHTTPHeaderField:@"Content-Type"]; + url_request.HTTPBody = + [NSData dataWithBytes:this->body_.value().data.data() + length:this->body_.value().data.size()]; + } + + NSURLSessionConfiguration *configuration{ + [NSURLSessionConfiguration ephemeralSessionConfiguration]}; + configuration.timeoutIntervalForResource = + static_cast(this->timeout_.count()) / 1000.0; + if (this->connect_timeout_.has_value()) { + configuration.timeoutIntervalForRequest = + static_cast(this->connect_timeout_.value().count()) / + 1000.0; + } + + // The delegate completes before the semaphore is signalled, so + // pointing to the stack-allocated locals from it is safe + SourcemetaCoreHTTPDelegate *delegate{ + [[SourcemetaCoreHTTPDelegate alloc] init]}; + delegate.response = &response; + delegate.failure = &failure; + delegate.semaphore = dispatch_semaphore_create(0); + delegate.hasMaximumResponseSize = + this->maximum_response_size_.has_value() ? YES : NO; + delegate.maximumResponseSize = + this->maximum_response_size_.value_or(0); + delegate.followRedirects = this->follow_redirects_ ? YES : NO; + delegate.maximumRedirects = this->maximum_redirects_; + delegate.redirectCount = 0; + + NSURLSession *session{ + [NSURLSession sessionWithConfiguration:configuration + delegate:delegate + delegateQueue:nil]}; + NSURLSessionDataTask *task{[session dataTaskWithRequest:url_request]}; + [task resume]; + dispatch_semaphore_wait(delegate.semaphore, DISPATCH_TIME_FOREVER); + [session finishTasksAndInvalidate]; + } + } + + if (!failure.empty()) { + throw HTTPError{this->method_, this->url_, failure}; + } + + return response; +} + +} // namespace sourcemeta::core diff --git a/src/core/http/client_windows.cc b/src/core/http/client_windows.cc new file mode 100644 index 000000000..609115db2 --- /dev/null +++ b/src/core/http/client_windows.cc @@ -0,0 +1,277 @@ +#include + +#ifndef NOMINMAX +#define NOMINMAX +#endif +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#include // DWORD, GetLastError, LPVOID +#include // WinHttp* + +// `windows.h` defines a `DELETE` macro that conflicts with +// `sourcemeta::core::HTTPMethod::DELETE` +#ifdef DELETE +#undef DELETE +#endif + +#include + +#include // std::chrono::milliseconds +#include // std::size_t +#include // std::uint16_t +#include // std::numeric_limits +#include // std::string, std::wstring +#include // std::wstring_view +#include // std::pair +#include // std::vector + +namespace { + +constexpr std::string_view HTTP_RESPONSE_TOO_LARGE_MESSAGE{ + "The response exceeds the maximum allowed size"}; + +// WinHttpSetTimeouts takes signed millisecond counts where zero requests no +// timeout. Floor non-positive durations to the smallest bound so a misused +// value cannot become an unbounded wait, and saturate large ones to avoid a +// narrowing wrap +auto to_winhttp_timeout(const std::chrono::milliseconds value) -> int { + if (value.count() <= 0) { + return 1; + } + + if (value.count() > std::numeric_limits::max()) { + return std::numeric_limits::max(); + } + + return static_cast(value.count()); +} + +class WinHTTPHandle { +public: + WinHTTPHandle(const HINTERNET handle) : handle_{handle} {} + ~WinHTTPHandle() { + if (this->handle_) { + WinHttpCloseHandle(this->handle_); + } + } + + WinHTTPHandle(const WinHTTPHandle &) = delete; + auto operator=(const WinHTTPHandle &) -> WinHTTPHandle & = delete; + WinHTTPHandle(WinHTTPHandle &&) = delete; + auto operator=(WinHTTPHandle &&) -> WinHTTPHandle & = delete; + + auto get() const -> HINTERNET { return this->handle_; } + explicit operator bool() const { return this->handle_ != nullptr; } + +private: + HINTERNET handle_; +}; + +auto parse_response_headers( + const HINTERNET request, + std::vector> &headers) -> void { + DWORD size{0}; + WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, + WINHTTP_HEADER_NAME_BY_INDEX, WINHTTP_NO_OUTPUT_BUFFER, + &size, WINHTTP_NO_HEADER_INDEX); + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return; + } + + std::wstring buffer(size / sizeof(wchar_t), L'\0'); + if (!WinHttpQueryHeaders(request, WINHTTP_QUERY_RAW_HEADERS_CRLF, + WINHTTP_HEADER_NAME_BY_INDEX, buffer.data(), &size, + WINHTTP_NO_HEADER_INDEX)) { + return; + } + + sourcemeta::core::http_parse_headers(sourcemeta::core::wide_to_utf8(buffer), + headers); +} + +auto query_effective_url(const HINTERNET request) -> std::string { + DWORD size{0}; + WinHttpQueryOption(request, WINHTTP_OPTION_URL, nullptr, &size); + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER || size == 0) { + return {}; + } + + std::wstring buffer(size / sizeof(wchar_t), L'\0'); + if (!WinHttpQueryOption(request, WINHTTP_OPTION_URL, buffer.data(), &size)) { + return {}; + } + + buffer.resize(size / sizeof(wchar_t)); + if (!buffer.empty() && buffer.back() == L'\0') { + buffer.pop_back(); + } + + return sourcemeta::core::wide_to_utf8(buffer); +} + +} // namespace + +namespace sourcemeta::core { + +auto HTTPSystemRequest::send() const -> HTTPResponse { + HTTPResponse response; + + const auto wide_url{sourcemeta::core::utf8_to_wide(this->url_)}; + URL_COMPONENTS components{}; + components.dwStructSize = sizeof(components); + components.dwHostNameLength = static_cast(-1); + components.dwUrlPathLength = static_cast(-1); + components.dwExtraInfoLength = static_cast(-1); + if (!WinHttpCrackUrl(wide_url.c_str(), 0, 0, &components)) { + throw HTTPError{this->method_, this->url_, "Invalid URL"}; + } + + const std::wstring host{components.lpszHostName, components.dwHostNameLength}; + std::wstring path{components.lpszUrlPath, components.dwUrlPathLength}; + if (components.lpszExtraInfo) { + // The fragment, if any, must never be sent to the server + const std::wstring_view extra_information{components.lpszExtraInfo, + components.dwExtraInfoLength}; + path.append(extra_information.substr(0, extra_information.find(L'#'))); + } + + const WinHTTPHandle session{ + WinHttpOpen(nullptr, WINHTTP_ACCESS_TYPE_AUTOMATIC_PROXY, + WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0)}; + if (!session) { + throw HTTPError{this->method_, this->url_, + "Failed to initialise the HTTP client"}; + } + + const WinHTTPHandle connection{ + WinHttpConnect(session.get(), host.c_str(), components.nPort, 0)}; + if (!connection) { + throw HTTPError{this->method_, this->url_, "Failed to connect to the host"}; + } + + const auto secure{components.nScheme == INTERNET_SCHEME_HTTPS}; + const auto method{ + sourcemeta::core::utf8_to_wide(http_method_string(this->method_))}; + const WinHTTPHandle request_handle{WinHttpOpenRequest( + connection.get(), method.c_str(), path.c_str(), nullptr, + WINHTTP_NO_REFERER, WINHTTP_DEFAULT_ACCEPT_TYPES, + secure ? WINHTTP_FLAG_SECURE : 0)}; + if (!request_handle) { + throw HTTPError{this->method_, this->url_, + "Failed to create the HTTP request"}; + } + + if (this->follow_redirects_) { + DWORD maximum_redirects{static_cast(this->maximum_redirects_)}; + WinHttpSetOption(request_handle.get(), + WINHTTP_OPTION_MAX_HTTP_AUTOMATIC_REDIRECTS, + &maximum_redirects, sizeof(maximum_redirects)); + } else { + DWORD policy{WINHTTP_OPTION_REDIRECT_POLICY_NEVER}; + WinHttpSetOption(request_handle.get(), WINHTTP_OPTION_REDIRECT_POLICY, + &policy, sizeof(policy)); + } + + // The total timeout bounds sending the request and receiving the response, + // and also caps the resolution and connection phases unless a narrower + // connect timeout is given, so it acts as an overall ceiling + const auto total_timeout{to_winhttp_timeout(this->timeout_)}; + const auto connect_timeout{ + this->connect_timeout_.has_value() + ? to_winhttp_timeout(this->connect_timeout_.value()) + : total_timeout}; + WinHttpSetTimeouts(request_handle.get(), connect_timeout, connect_timeout, + total_timeout, total_timeout); + + DWORD decompression{WINHTTP_DECOMPRESSION_FLAG_ALL}; + WinHttpSetOption(request_handle.get(), WINHTTP_OPTION_DECOMPRESSION, + &decompression, sizeof(decompression)); + + auto serialized_headers{http_serialize_headers(this->headers_)}; + LPVOID body_data{WINHTTP_NO_REQUEST_DATA}; + DWORD body_size{0}; + if (this->body_.has_value()) { + if (this->body_.value().data.size() > std::numeric_limits::max()) { + throw HTTPError{this->method_, this->url_, + "The request body is too large"}; + } + + serialized_headers += "Content-Type: "; + serialized_headers += this->body_.value().content_type; + serialized_headers += "\r\n"; + body_data = const_cast(this->body_.value().data.data()); + body_size = static_cast(this->body_.value().data.size()); + } + + const auto request_headers{ + sourcemeta::core::utf8_to_wide(serialized_headers)}; + + if (!WinHttpSendRequest( + request_handle.get(), + request_headers.empty() ? WINHTTP_NO_ADDITIONAL_HEADERS + : request_headers.c_str(), + request_headers.empty() ? 0 + : static_cast(request_headers.size()), + body_data, body_size, body_size, 0)) { + throw HTTPError{this->method_, this->url_, + "Failed to send the HTTP request"}; + } + + if (!WinHttpReceiveResponse(request_handle.get(), nullptr)) { + throw HTTPError{this->method_, this->url_, + "Failed to receive the HTTP response"}; + } + + DWORD status_code{0}; + DWORD status_code_size{sizeof(status_code)}; + if (!WinHttpQueryHeaders(request_handle.get(), + WINHTTP_QUERY_STATUS_CODE | + WINHTTP_QUERY_FLAG_NUMBER, + WINHTTP_HEADER_NAME_BY_INDEX, &status_code, + &status_code_size, WINHTTP_NO_HEADER_INDEX)) { + throw HTTPError{this->method_, this->url_, + "Failed to read the HTTP response status"}; + } + + parse_response_headers(request_handle.get(), response.headers); + response.url = query_effective_url(request_handle.get()); + + while (true) { + DWORD available{0}; + if (!WinHttpQueryDataAvailable(request_handle.get(), &available)) { + throw HTTPError{this->method_, this->url_, + "Failed to read the HTTP response body"}; + } + + if (available == 0) { + break; + } + + if (this->maximum_response_size_.has_value() && + (response.body.size() > this->maximum_response_size_.value() || + available > + this->maximum_response_size_.value() - response.body.size())) { + throw HTTPError{this->method_, this->url_, + std::string{HTTP_RESPONSE_TOO_LARGE_MESSAGE}}; + } + + const auto offset{response.body.size()}; + response.body.resize(offset + available); + DWORD read{0}; + if (!WinHttpReadData(request_handle.get(), response.body.data() + offset, + available, &read)) { + throw HTTPError{this->method_, this->url_, + "Failed to read the HTTP response body"}; + } + + response.body.resize(offset + read); + } + + response.status = + http_status_from_code(static_cast(status_code)); + return response; +} + +} // namespace sourcemeta::core diff --git a/src/core/http/include/sourcemeta/core/http.h b/src/core/http/include/sourcemeta/core/http.h index 59028c611..f572a6e32 100644 --- a/src/core/http/include/sourcemeta/core/http.h +++ b/src/core/http/include/sourcemeta/core/http.h @@ -11,6 +11,7 @@ #include #include #include +#include // NOLINTEND(misc-include-cleaner) #include // std::chrono::system_clock diff --git a/src/core/http/include/sourcemeta/core/http_system.h b/src/core/http/include/sourcemeta/core/http_system.h new file mode 100644 index 000000000..2d85dcbff --- /dev/null +++ b/src/core/http/include/sourcemeta/core/http_system.h @@ -0,0 +1,192 @@ +#ifndef SOURCEMETA_CORE_HTTP_SYSTEM_H_ +#define SOURCEMETA_CORE_HTTP_SYSTEM_H_ + +#ifndef SOURCEMETA_CORE_HTTP_EXPORT +#include +#endif + +#include +#include + +#include // std::chrono::milliseconds, std::chrono::seconds +#include // std::size_t +#include // std::optional +#include // std::runtime_error +#include // std::string +#include // std::move, std::pair +#include // std::vector + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup http +/// The result of performing a request against a system HTTP backend. For +/// example: +/// +/// ```cpp +/// #include +/// #include +/// +/// sourcemeta::core::HTTPSystemRequest request{"https://example.com"}; +/// const auto response{request.send()}; +/// assert(response.status == sourcemeta::core::HTTP_STATUS_OK); +/// ``` +struct HTTPResponse { + /// The response status code + HTTPStatus status{}; + /// The response headers, with names normalised to lowercase. Repeated + /// headers are preserved as separate entries, except on backends that fold + /// them into a single comma-separated entry, which is semantically + /// equivalent per RFC 9110 + std::vector> headers; + /// The response body, owned by this result + std::string body; + /// The effective URL after any followed redirects + std::string url; +}; + +/// @ingroup http +/// An error that prevented loading the underlying system HTTP backend, such +/// as a missing dynamically loaded library. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// const sourcemeta::core::HTTPSystemBackendError error{ +/// "Could not find the system cURL library", "SOURCEMETA_CORE_CURL_SO", +/// {"libcurl.so.4"}}; +/// assert(error.variable() == "SOURCEMETA_CORE_CURL_SO"); +/// ``` +class SOURCEMETA_CORE_HTTP_EXPORT HTTPSystemBackendError + : public std::runtime_error { +public: + HTTPSystemBackendError(const std::string &message, std::string variable, + std::vector paths) + : std::runtime_error{message}, variable_{std::move(variable)}, + paths_{std::move(paths)} {} + + /// Get the name of the environment variable that overrides the backend path + [[nodiscard]] auto variable() const noexcept -> const std::string & { + return this->variable_; + } + + /// Get the paths that were searched while looking for the backend + [[nodiscard]] auto paths() const noexcept + -> const std::vector & { + return this->paths_; + } + +private: + std::string variable_; + std::vector paths_; +}; + +/// @ingroup http +/// A simple cross-platform HTTP request that delegates to the system HTTP +/// stack, NSURLSession on Apple platforms, WinHTTP on Windows, and cURL +/// everywhere else. The request owns its data, configure it with the builder +/// methods and perform it with `send`. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// sourcemeta::core::HTTPSystemRequest request{ +/// "https://example.com", sourcemeta::core::HTTPMethod::POST}; +/// request.header("Accept", "application/json"); +/// request.body("{}", "application/json"); +/// const auto response{request.send()}; +/// assert(response.status == sourcemeta::core::HTTP_STATUS_OK); +/// ``` +class SOURCEMETA_CORE_HTTP_EXPORT HTTPSystemRequest { +public: + explicit HTTPSystemRequest(std::string url, + const HTTPMethod method = HTTPMethod::GET) + : url_{std::move(url)}, method_{method} {} + + /// Set the request method + auto method(const HTTPMethod method) -> HTTPSystemRequest & { + this->method_ = method; + return *this; + } + + /// Add a request header. Repeated names are permitted + auto header(std::string name, std::string value) -> HTTPSystemRequest & { + this->headers_.emplace_back(std::move(name), std::move(value)); + return *this; + } + + /// Set the request body, sent along with the given `Content-Type` header + auto body(std::string data, std::string content_type) -> HTTPSystemRequest & { + this->body_ = + Body{.data = std::move(data), .content_type = std::move(content_type)}; + return *this; + } + + /// Set whether to follow redirects, on by default + auto follow_redirects(const bool value) -> HTTPSystemRequest & { + this->follow_redirects_ = value; + return *this; + } + + /// Set the maximum number of redirects to follow, 20 by default + auto maximum_redirects(const std::size_t value) -> HTTPSystemRequest & { + this->maximum_redirects_ = value; + return *this; + } + + /// Set the total request timeout, 30 seconds by default + auto timeout(const std::chrono::milliseconds value) -> HTTPSystemRequest & { + this->timeout_ = value; + return *this; + } + + /// Set a best-effort timeout for establishing the connection, applied as + /// each backend allows and falling back to the backend default when unset + auto connect_timeout(const std::chrono::milliseconds value) + -> HTTPSystemRequest & { + this->connect_timeout_ = value; + return *this; + } + + /// Abort with an error if the response body exceeds this number of bytes + auto maximum_response_size(const std::size_t value) -> HTTPSystemRequest & { + this->maximum_response_size_ = value; + return *this; + } + + /// Perform the request. A failure to obtain a response is reported as an + /// error, while unsuccessful status codes are returned on the result + [[nodiscard]] auto send() const -> HTTPResponse; + +private: + struct Body { + std::string data; + std::string content_type; + }; + + std::string url_; + HTTPMethod method_; + std::vector> headers_; + std::optional body_; + bool follow_redirects_{true}; + std::size_t maximum_redirects_{20}; + std::chrono::milliseconds timeout_{std::chrono::seconds{30}}; + std::optional connect_timeout_; + std::optional maximum_response_size_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/test/http/ci/CMakeLists.txt b/test/http/ci/CMakeLists.txt new file mode 100644 index 000000000..bfbf6abf0 --- /dev/null +++ b/test/http/ci/CMakeLists.txt @@ -0,0 +1,7 @@ +sourcemeta_googletest(NAMESPACE sourcemeta PROJECT core NAME http_ci + SOURCES http_system_request_test.cc) + +target_link_libraries(sourcemeta_core_http_ci_unit + PRIVATE sourcemeta::core::http) +target_link_libraries(sourcemeta_core_http_ci_unit + PRIVATE sourcemeta::core::json) diff --git a/test/http/ci/http_system_request_test.cc b/test/http/ci/http_system_request_test.cc new file mode 100644 index 000000000..af8e5d105 --- /dev/null +++ b/test/http/ci/http_system_request_test.cc @@ -0,0 +1,135 @@ +#include + +#include +#include + +#include // std::chrono::milliseconds + +TEST(HTTP_SystemRequest, get_health_ok) { + sourcemeta::core::HTTPSystemRequest request{ + "https://schemas.sourcemeta.com/self/v1/health"}; + const auto response{request.send()}; + EXPECT_EQ(response.status, sourcemeta::core::HTTP_STATUS_OK); + // Backends may normalise the effective URL differently, so match its + // meaningful prefix rather than an exact string + EXPECT_TRUE(response.url.starts_with( + "https://schemas.sourcemeta.com/self/v1/health")); +} + +TEST(HTTP_SystemRequest, head_health_ok) { + sourcemeta::core::HTTPSystemRequest request{ + "https://schemas.sourcemeta.com/self/v1/health", + sourcemeta::core::HTTPMethod::HEAD}; + const auto response{request.send()}; + EXPECT_EQ(response.status, sourcemeta::core::HTTP_STATUS_OK); + EXPECT_TRUE(response.body.empty()); +} + +TEST(HTTP_SystemRequest, get_list_json_body) { + sourcemeta::core::HTTPSystemRequest request{ + "https://schemas.sourcemeta.com/self/v1/api/list"}; + const auto response{request.send()}; + EXPECT_EQ(response.status, sourcemeta::core::HTTP_STATUS_OK); + const auto content_type{ + sourcemeta::core::http_header_find(response.headers, "content-type")}; + ASSERT_TRUE(content_type.has_value()); + EXPECT_NE(content_type.value().find("application/json"), + std::string_view::npos); + const auto body{sourcemeta::core::parse_json(response.body)}; + EXPECT_TRUE(body.is_object()); + EXPECT_TRUE(body.defines("entries")); +} + +TEST(HTTP_SystemRequest, get_missing_path_not_found) { + sourcemeta::core::HTTPSystemRequest request{ + "https://schemas.sourcemeta.com/self/v1/api/list/" + "this-directory-does-not-exist-xyz"}; + const auto response{request.send()}; + EXPECT_EQ(response.status, sourcemeta::core::HTTP_STATUS_NOT_FOUND); + const auto content_type{ + sourcemeta::core::http_header_find(response.headers, "content-type")}; + ASSERT_TRUE(content_type.has_value()); + EXPECT_NE(content_type.value().find("application/problem+json"), + std::string_view::npos); +} + +TEST(HTTP_SystemRequest, post_health_method_not_allowed) { + sourcemeta::core::HTTPSystemRequest request{ + "https://schemas.sourcemeta.com/self/v1/health", + sourcemeta::core::HTTPMethod::POST}; + const auto response{request.send()}; + EXPECT_EQ(response.status, sourcemeta::core::HTTP_STATUS_METHOD_NOT_ALLOWED); +} + +TEST(HTTP_SystemRequest, post_evaluate_with_body) { + sourcemeta::core::HTTPSystemRequest request{ + "https://schemas.sourcemeta.com/self/v1/api/schemas/evaluate/" + "cloudevents/v1.0.2/cloudevents", + sourcemeta::core::HTTPMethod::POST}; + request.body("{}", "application/json"); + const auto response{request.send()}; + EXPECT_EQ(response.status, sourcemeta::core::HTTP_STATUS_OK); + const auto body{sourcemeta::core::parse_json(response.body)}; + EXPECT_TRUE(body.is_object()); + ASSERT_TRUE(body.defines("valid")); + EXPECT_TRUE(body.at("valid").is_boolean()); + EXPECT_FALSE(body.at("valid").to_boolean()); +} + +TEST(HTTP_SystemRequest, post_evaluate_missing_instance_bad_request) { + sourcemeta::core::HTTPSystemRequest request{ + "https://schemas.sourcemeta.com/self/v1/api/schemas/evaluate/" + "cloudevents/v1.0.2/cloudevents", + sourcemeta::core::HTTPMethod::POST}; + const auto response{request.send()}; + EXPECT_EQ(response.status, sourcemeta::core::HTTP_STATUS_BAD_REQUEST); +} + +TEST(HTTP_SystemRequest, follow_redirect_to_https) { + sourcemeta::core::HTTPSystemRequest request{ + "http://schemas.sourcemeta.com/self/v1/health"}; + request.follow_redirects(true); + const auto response{request.send()}; + EXPECT_EQ(response.status, sourcemeta::core::HTTP_STATUS_OK); + // The redirect upgrades the scheme to HTTPS, which the effective URL + // reflects regardless of backend-specific normalisation + EXPECT_TRUE(response.url.starts_with( + "https://schemas.sourcemeta.com/self/v1/health")); +} + +TEST(HTTP_SystemRequest, no_follow_redirect_returns_redirect) { + sourcemeta::core::HTTPSystemRequest request{ + "http://schemas.sourcemeta.com/self/v1/health"}; + request.follow_redirects(false); + const auto response{request.send()}; + EXPECT_GE(response.status.code, 300); + EXPECT_LT(response.status.code, 400); +} + +TEST(HTTP_SystemRequest, timeout_against_unreachable_host_throws) { + // A non-routable RFC 5737 TEST-NET-1 address never completes the request, + // so a bounded timeout reliably surfaces an error rather than racing the + // sub-millisecond timer granularity of some backends + sourcemeta::core::HTTPSystemRequest request{"https://192.0.2.1/"}; + request.timeout(std::chrono::milliseconds{1000}); + EXPECT_THROW( + { [[maybe_unused]] const auto response{request.send()}; }, + sourcemeta::core::HTTPError); +} + +TEST(HTTP_SystemRequest, unresolvable_host_throws) { + sourcemeta::core::HTTPSystemRequest request{ + "https://this-host-does-not-exist.sourcemeta.invalid/"}; + EXPECT_THROW( + { [[maybe_unused]] const auto response{request.send()}; }, + sourcemeta::core::HTTPError); +} + +TEST(HTTP_SystemRequest, maximum_response_size_exceeded_throws) { + sourcemeta::core::HTTPSystemRequest request{ + "https://schemas.sourcemeta.com/self/v1/api/list"}; + request.maximum_response_size(1); + EXPECT_THROW( + { [[maybe_unused]] const auto response{request.send()}; }, + sourcemeta::core::HTTPError); +}