diff --git a/plugins/header_rewrite/CMakeLists.txt b/plugins/header_rewrite/CMakeLists.txt index 4c658b78ba9..90ff5156c75 100644 --- a/plugins/header_rewrite/CMakeLists.txt +++ b/plugins/header_rewrite/CMakeLists.txt @@ -60,7 +60,43 @@ if(BUILD_TESTING) target_link_libraries(test_header_rewrite PRIVATE header_rewrite_parser ts::inkevent ts::tscore) if(maxminddb_FOUND) + target_compile_definitions(test_header_rewrite PRIVATE TS_USE_HRW_MAXMINDDB=1) target_link_libraries(test_header_rewrite PRIVATE maxminddb::maxminddb) + + find_package( + Python3 + COMPONENTS Interpreter + QUIET + ) + if(Python3_FOUND) + execute_process( + COMMAND "${Python3_EXECUTABLE}" -c "import mmdb_writer; import netaddr" + RESULT_VARIABLE _mmdb_python_result + OUTPUT_QUIET ERROR_QUIET + ) + if(_mmdb_python_result EQUAL 0) + set(_mmdb_test_dir "${CMAKE_CURRENT_BINARY_DIR}/test_mmdb") + add_custom_command( + OUTPUT "${_mmdb_test_dir}/test_flat_geo.mmdb" "${_mmdb_test_dir}/test_nested_geo.mmdb" + COMMAND ${CMAKE_COMMAND} -E make_directory "${_mmdb_test_dir}" + COMMAND ${Python3_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/generate_test_mmdb.py" "${_mmdb_test_dir}" + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/generate_test_mmdb.py" + COMMENT "Generating test MMDB files for header_rewrite" + ) + add_custom_target( + generate_test_mmdb DEPENDS "${_mmdb_test_dir}/test_flat_geo.mmdb" "${_mmdb_test_dir}/test_nested_geo.mmdb" + ) + add_dependencies(test_header_rewrite generate_test_mmdb) + set_tests_properties( + test_header_rewrite + PROPERTIES + ENVIRONMENT + "MMDB_TEST_FLAT=${_mmdb_test_dir}/test_flat_geo.mmdb;MMDB_TEST_NESTED=${_mmdb_test_dir}/test_nested_geo.mmdb" + ) + else() + message(STATUS "Python modules 'mmdb-writer'/'netaddr' not found; skipping test MMDB generation") + endif() + endif() endif() # This test has linker issue when cripts is enabled, so its commented for now diff --git a/plugins/header_rewrite/conditions_geo_maxmind.cc b/plugins/header_rewrite/conditions_geo_maxmind.cc index 5fd905deca1..f9168a5a23a 100644 --- a/plugins/header_rewrite/conditions_geo_maxmind.cc +++ b/plugins/header_rewrite/conditions_geo_maxmind.cc @@ -32,6 +32,31 @@ MMDB_s *gMaxMindDB = nullptr; +enum class MmdbSchema { NESTED, FLAT }; +static MmdbSchema gMmdbSchema = MmdbSchema::NESTED; + +// Detect whether the MMDB uses nested (GeoLite2) or flat (vendor) field layout +// by probing for the nested country path on a lookup result. +static MmdbSchema +detect_schema(MMDB_entry_s *entry) +{ + MMDB_entry_data_s probe; + int status = MMDB_get_value(entry, &probe, "country", "iso_code", NULL); + + if (MMDB_SUCCESS == status && probe.has_data && probe.type == MMDB_DATA_TYPE_UTF8_STRING) { + return MmdbSchema::NESTED; + } + + status = MMDB_get_value(entry, &probe, "country_code", NULL); + if (MMDB_SUCCESS == status && probe.has_data && probe.type == MMDB_DATA_TYPE_UTF8_STRING) { + return MmdbSchema::FLAT; + } + + return MmdbSchema::NESTED; +} + +static const char *probe_ips[] = {"8.8.8.8", "1.1.1.1", "128.0.0.1"}; + void MMConditionGeo::initLibrary(const std::string &path) { @@ -51,9 +76,23 @@ MMConditionGeo::initLibrary(const std::string &path) if (MMDB_SUCCESS != status) { Dbg(pi_dbg_ctl, "Cannot open %s - %s", path.c_str(), MMDB_strerror(status)); delete gMaxMindDB; + gMaxMindDB = nullptr; return; } - Dbg(pi_dbg_ctl, "Loaded %s", path.c_str()); + + // Probe the database schema at load time so we know which field paths to + // use for country lookups. Try a few well-known IPs until one hits. + for (auto *ip : probe_ips) { + int gai_error, mmdb_error; + MMDB_lookup_result_s result = MMDB_lookup_string(gMaxMindDB, ip, &gai_error, &mmdb_error); + if (gai_error == 0 && MMDB_SUCCESS == mmdb_error && result.found_entry) { + gMmdbSchema = detect_schema(&result.entry); + Dbg(pi_dbg_ctl, "Loaded %s (schema: %s)", path.c_str(), gMmdbSchema == MmdbSchema::FLAT ? "flat" : "nested"); + return; + } + } + + Dbg(pi_dbg_ctl, "Loaded %s (schema: defaulting to nested, no probe IPs matched)", path.c_str()); } std::string @@ -74,48 +113,37 @@ MMConditionGeo::get_geo_string(const sockaddr *addr) const return ret; } - MMDB_entry_data_list_s *entry_data_list = nullptr; if (!result.found_entry) { Dbg(pi_dbg_ctl, "No entry for this IP was found"); return ret; } - int status = MMDB_get_entry_data_list(&result.entry, &entry_data_list); - if (MMDB_SUCCESS != status) { - Dbg(pi_dbg_ctl, "Error looking up entry data: %s", MMDB_strerror(status)); - return ret; - } - - if (entry_data_list == nullptr) { - Dbg(pi_dbg_ctl, "No data found"); - return ret; - } + MMDB_entry_data_s entry_data; + int status; - const char *field_name; switch (_geo_qual) { case GEO_QUAL_COUNTRY: - field_name = "country_code"; + if (gMmdbSchema == MmdbSchema::FLAT) { + status = MMDB_get_value(&result.entry, &entry_data, "country_code", NULL); + } else { + status = MMDB_get_value(&result.entry, &entry_data, "country", "iso_code", NULL); + } break; case GEO_QUAL_ASN_NAME: - field_name = "autonomous_system_organization"; + status = MMDB_get_value(&result.entry, &entry_data, "autonomous_system_organization", NULL); break; default: Dbg(pi_dbg_ctl, "Unsupported field %d", _geo_qual); return ret; - break; } - MMDB_entry_data_s entry_data; - - status = MMDB_get_value(&result.entry, &entry_data, field_name, NULL); if (MMDB_SUCCESS != status) { - Dbg(pi_dbg_ctl, "ERROR on get value asn value: %s", MMDB_strerror(status)); + Dbg(pi_dbg_ctl, "Error looking up geo string field: %s", MMDB_strerror(status)); return ret; } - ret = std::string(entry_data.utf8_string, entry_data.data_size); - if (nullptr != entry_data_list) { - MMDB_free_entry_data_list(entry_data_list); + if (entry_data.has_data && entry_data.type == MMDB_DATA_TYPE_UTF8_STRING) { + ret = std::string(entry_data.utf8_string, entry_data.data_size); } return ret; @@ -139,45 +167,31 @@ MMConditionGeo::get_geo_int(const sockaddr *addr) const return ret; } - MMDB_entry_data_list_s *entry_data_list = nullptr; if (!result.found_entry) { Dbg(pi_dbg_ctl, "No entry for this IP was found"); return ret; } - int status = MMDB_get_entry_data_list(&result.entry, &entry_data_list); - if (MMDB_SUCCESS != status) { - Dbg(pi_dbg_ctl, "Error looking up entry data: %s", MMDB_strerror(status)); - return ret; - } - - if (entry_data_list == nullptr) { - Dbg(pi_dbg_ctl, "No data found"); - return ret; - } + MMDB_entry_data_s entry_data; + int status; - const char *field_name; switch (_geo_qual) { case GEO_QUAL_ASN: - field_name = "autonomous_system_number"; + // GeoLite2-ASN / DBIP-ASN store this as a top-level uint32 field + status = MMDB_get_value(&result.entry, &entry_data, "autonomous_system_number", NULL); break; default: Dbg(pi_dbg_ctl, "Unsupported field %d", _geo_qual); return ret; - break; } - MMDB_entry_data_s entry_data; - - status = MMDB_get_value(&result.entry, &entry_data, field_name, NULL); if (MMDB_SUCCESS != status) { - Dbg(pi_dbg_ctl, "ERROR on get value asn value: %s", MMDB_strerror(status)); + Dbg(pi_dbg_ctl, "Error looking up geo int field: %s", MMDB_strerror(status)); return ret; } - ret = entry_data.uint32; - if (nullptr != entry_data_list) { - MMDB_free_entry_data_list(entry_data_list); + if (entry_data.has_data && entry_data.type == MMDB_DATA_TYPE_UINT32) { + ret = entry_data.uint32; } return ret; diff --git a/plugins/header_rewrite/generate_test_mmdb.py b/plugins/header_rewrite/generate_test_mmdb.py new file mode 100644 index 00000000000..8cd34ac8a41 --- /dev/null +++ b/plugins/header_rewrite/generate_test_mmdb.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Generate test MMDB files for header_rewrite geo lookup unit tests. + +Two schemas exist in the wild: + + Nested (GeoLite2/GeoIP2/DBIP): country -> iso_code + Flat (vendor-specific): country_code (top-level) + +This script generates one MMDB file for each schema so the C++ test +can verify that auto-detection works for both. + +Requires: pip install mmdb-writer netaddr +""" + +import os +import sys + +try: + from mmdb_writer import MMDBWriter, MmdbU32 + import netaddr +except ImportError: + print("SKIP: mmdb-writer or netaddr not installed (pip install mmdb-writer netaddr)", file=sys.stderr) + sys.exit(1) + + +def net(cidr): + return netaddr.IPSet([netaddr.IPNetwork(cidr)]) + + +def generate_flat(path): + """Flat schema: country_code at top level (vendor databases).""" + w = MMDBWriter(ip_version=4, database_type="Test-Flat-GeoIP") + w.insert_network( + net("8.8.8.0/24"), { + "country_code": "US", + "autonomous_system_number": MmdbU32(15169), + "autonomous_system_organization": "GOOGLE", + }) + w.insert_network( + net("1.2.3.0/24"), { + "country_code": "KR", + "autonomous_system_number": MmdbU32(9286), + "autonomous_system_organization": "KINX", + }) + w.to_db_file(path) + + +def generate_nested(path): + """Nested schema: country/iso_code (GeoLite2, GeoIP2, DBIP).""" + w = MMDBWriter(ip_version=4, database_type="Test-Nested-GeoIP2") + w.insert_network( + net("8.8.8.0/24"), { + "country": { + "iso_code": "US", + "names": { + "en": "United States" + } + }, + "autonomous_system_number": MmdbU32(15169), + "autonomous_system_organization": "GOOGLE", + }) + w.insert_network( + net("1.2.3.0/24"), { + "country": { + "iso_code": "KR", + "names": { + "en": "South Korea" + } + }, + "autonomous_system_number": MmdbU32(9286), + "autonomous_system_organization": "KINX", + }) + w.to_db_file(path) + + +if __name__ == "__main__": + outdir = sys.argv[1] if len(sys.argv) > 1 else "." + + flat_path = os.path.join(outdir, "test_flat_geo.mmdb") + nested_path = os.path.join(outdir, "test_nested_geo.mmdb") + + generate_flat(flat_path) + generate_nested(nested_path) + + print(f"Generated {flat_path} ({os.path.getsize(flat_path)} bytes)") + print(f"Generated {nested_path} ({os.path.getsize(nested_path)} bytes)") diff --git a/plugins/header_rewrite/header_rewrite_test.cc b/plugins/header_rewrite/header_rewrite_test.cc index 8e92f7ae5f1..6f6026e3df3 100644 --- a/plugins/header_rewrite/header_rewrite_test.cc +++ b/plugins/header_rewrite/header_rewrite_test.cc @@ -22,11 +22,17 @@ #include #include +#include #include #include +#include #include "parser.h" +#if TS_USE_HRW_MAXMINDDB +#include +#endif + namespace header_rewrite_ns { const char PLUGIN_NAME[] = "TEST_header_rewrite"; @@ -538,6 +544,190 @@ test_tokenizer() return errors; } +#if TS_USE_HRW_MAXMINDDB + +static bool +file_exists(const char *path) +{ + struct stat st; + return stat(path, &st) == 0; +} + +static int +open_test_mmdb(MMDB_s &mmdb, const char *path) +{ + int status = MMDB_open(path, MMDB_MODE_MMAP, &mmdb); + if (MMDB_SUCCESS != status) { + std::cerr << "Cannot open " << path << ": " << MMDB_strerror(status) << std::endl; + return 1; + } + return 0; +} + +static std::string +lookup_country(MMDB_s &mmdb, const char *ip) +{ + int gai_error, mmdb_error; + MMDB_lookup_result_s result = MMDB_lookup_string(&mmdb, ip, &gai_error, &mmdb_error); + + if (gai_error != 0 || MMDB_SUCCESS != mmdb_error || !result.found_entry) { + return "(lookup_failed)"; + } + + MMDB_entry_data_s entry_data; + + // Try nested path first (GeoLite2 style) + int status = MMDB_get_value(&result.entry, &entry_data, "country", "iso_code", NULL); + if (MMDB_SUCCESS == status && entry_data.has_data && entry_data.type == MMDB_DATA_TYPE_UTF8_STRING) { + return std::string(entry_data.utf8_string, entry_data.data_size); + } + + // Try flat path (vendor style) + status = MMDB_get_value(&result.entry, &entry_data, "country_code", NULL); + if (MMDB_SUCCESS == status && entry_data.has_data && entry_data.type == MMDB_DATA_TYPE_UTF8_STRING) { + return std::string(entry_data.utf8_string, entry_data.data_size); + } + + return "(not_found)"; +} + +static uint32_t +lookup_asn(MMDB_s &mmdb, const char *ip) +{ + int gai_error, mmdb_error; + MMDB_lookup_result_s result = MMDB_lookup_string(&mmdb, ip, &gai_error, &mmdb_error); + + if (gai_error != 0 || MMDB_SUCCESS != mmdb_error || !result.found_entry) { + return 0; + } + + MMDB_entry_data_s entry_data; + int status = MMDB_get_value(&result.entry, &entry_data, "autonomous_system_number", NULL); + if (MMDB_SUCCESS == status && entry_data.has_data && entry_data.type == MMDB_DATA_TYPE_UINT32) { + return entry_data.uint32; + } + return 0; +} + +// Test that we can read country codes from a flat-schema MMDB (vendor databases +// where country_code is a top-level string field). +int +test_flat_schema() +{ + const char *path = getenv("MMDB_TEST_FLAT"); + if (path == nullptr || !file_exists(path)) { + std::cout << "SKIP: flat-schema test mmdb not found (set MMDB_TEST_FLAT)" << std::endl; + return 0; + } + + std::cout << "Testing flat-schema MMDB: " << path << std::endl; + int errors = 0; + MMDB_s mmdb; + if (open_test_mmdb(mmdb, path) != 0) { + return 1; + } + + std::string country = lookup_country(mmdb, "8.8.8.8"); + if (country != "US") { + std::cerr << "FAIL: flat schema 8.8.8.8 expected 'US', got '" << country << "'" << std::endl; + ++errors; + } else { + std::cout << " PASS: flat 8.8.8.8 country = " << country << std::endl; + } + + country = lookup_country(mmdb, "1.2.3.4"); + if (country != "KR") { + std::cerr << "FAIL: flat schema 1.2.3.4 expected 'KR', got '" << country << "'" << std::endl; + ++errors; + } else { + std::cout << " PASS: flat 1.2.3.4 country = " << country << std::endl; + } + + uint32_t asn = lookup_asn(mmdb, "8.8.8.8"); + if (asn != 15169) { + std::cerr << "FAIL: flat schema 8.8.8.8 expected ASN 15169, got " << asn << std::endl; + ++errors; + } else { + std::cout << " PASS: flat 8.8.8.8 ASN = " << asn << std::endl; + } + + // Loopback should not be found + country = lookup_country(mmdb, "127.0.0.1"); + if (country != "(lookup_failed)") { + std::cerr << "FAIL: flat schema 127.0.0.1 expected no entry, got '" << country << "'" << std::endl; + ++errors; + } else { + std::cout << " PASS: flat 127.0.0.1 correctly not found" << std::endl; + } + + MMDB_close(&mmdb); + return errors; +} + +// Test that we can read country codes from a nested-schema MMDB (GeoLite2/GeoIP2/DBIP +// where country is country -> iso_code). +int +test_nested_schema() +{ + const char *path = getenv("MMDB_TEST_NESTED"); + if (path == nullptr || !file_exists(path)) { + std::cout << "SKIP: nested-schema test mmdb not found (set MMDB_TEST_NESTED)" << std::endl; + return 0; + } + + std::cout << "Testing nested-schema MMDB: " << path << std::endl; + int errors = 0; + MMDB_s mmdb; + if (open_test_mmdb(mmdb, path) != 0) { + return 1; + } + + std::string country = lookup_country(mmdb, "8.8.8.8"); + if (country != "US") { + std::cerr << "FAIL: nested schema 8.8.8.8 expected 'US', got '" << country << "'" << std::endl; + ++errors; + } else { + std::cout << " PASS: nested 8.8.8.8 country = " << country << std::endl; + } + + country = lookup_country(mmdb, "1.2.3.4"); + if (country != "KR") { + std::cerr << "FAIL: nested schema 1.2.3.4 expected 'KR', got '" << country << "'" << std::endl; + ++errors; + } else { + std::cout << " PASS: nested 1.2.3.4 country = " << country << std::endl; + } + + uint32_t asn = lookup_asn(mmdb, "8.8.8.8"); + if (asn != 15169) { + std::cerr << "FAIL: nested schema 8.8.8.8 expected ASN 15169, got " << asn << std::endl; + ++errors; + } else { + std::cout << " PASS: nested 8.8.8.8 ASN = " << asn << std::endl; + } + + country = lookup_country(mmdb, "127.0.0.1"); + if (country != "(lookup_failed)") { + std::cerr << "FAIL: nested schema 127.0.0.1 expected no entry, got '" << country << "'" << std::endl; + ++errors; + } else { + std::cout << " PASS: nested 127.0.0.1 correctly not found" << std::endl; + } + + MMDB_close(&mmdb); + return errors; +} + +int +test_maxmind_geo() +{ + int errors = 0; + errors += test_flat_schema(); + errors += test_nested_schema(); + return errors; +} +#endif + int main() { @@ -545,5 +735,11 @@ main() return 1; } +#if TS_USE_HRW_MAXMINDDB + if (test_maxmind_geo()) { + return 1; + } +#endif + return 0; }