From 243e7cd44fd6f21145dee90012f0674f825ff36e Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Wed, 27 May 2026 14:24:43 +0300 Subject: [PATCH] MDEV-39720: Optimizer Trace doesn't quote string values, may have invalid JSON - Make Json_writer::add_str() do the quoting with json_escape_to_string(). = All other string writing methods are handled as they call this one. - Move json_escape_to_string() from opt_histogram_json.cc to my_json_writer.cc. This is needed because JSON writer unit tests do not link with opt_histogram_json. - Histogram code does its own escaping (with handling for special cases). Add Json_writer::add_escaped_str() to accept escaped strings. This is not intended to be commonly used. - opt_trace_print_expanded_query() did its own escaping for "expanded_query" string. It was done in MDEV-30354. Remove escaping there to avoid double escaping. - dump_record_to_trace() also did own escaping when writing table DDLs into the optimizer trace. Remove double escaping. - Adjust my_json_writer unit test. --- mysql-test/main/explain_json.result | 2 +- mysql-test/main/opt_trace.result | 32 ++++++--- mysql-test/main/opt_trace.test | 16 +++++ sql/my_json_writer.cc | 107 +++++++++++++++++++++++++++- sql/my_json_writer.h | 9 ++- sql/opt_histogram_json.cc | 45 +----------- sql/opt_trace.cc | 13 +--- sql/opt_trace_ddl_info.cc | 23 +----- sql/sql_explain.h | 6 +- unittest/sql/my_json_writer-t.cc | 30 ++++---- 10 files changed, 177 insertions(+), 106 deletions(-) diff --git a/mysql-test/main/explain_json.result b/mysql-test/main/explain_json.result index 50c8cfcbef7b5..ea33469a68e5d 100644 --- a/mysql-test/main/explain_json.result +++ b/mysql-test/main/explain_json.result @@ -1477,7 +1477,7 @@ EXPLAIN "rows": 2, "cost": "COST_REPLACED", "filtered": 100, - "attached_condition": "t1.a = _latin1'\xDF'" + "attached_condition": "t1.a = _latin1'\\xDF'" } } ] diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index 4f58dcfb3aebc..6b791248008da 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -10526,7 +10526,7 @@ JS { "index": "i_b", "ranges": - ["(\xD9[\x943j\x99F\xA3\x9C\xF5\xB5\x8C\xFEw-\x8C) <= (b) <= (\xD9[\x943j\x99F\xA3\x9C\xF5\xB5\x8C\xFEw-\x8C)"], + ["(\\xD9[\\x943j\\x99F\\xA3\\x9C\\xF5\\xB5\\x8C\\xFEw-\\x8C) <= (b) <= (\\xD9[\\x943j\\x99F\\xA3\\x9C\\xF5\\xB5\\x8C\\xFEw-\\x8C)"], "rowid_ordered": true, "using_mrr": false, "index_only": false, @@ -10588,7 +10588,7 @@ JS { "index": "i_b", "ranges": - ["(ab\x0A) <= (b) <= (ab\x0A)"], + ["(ab\\x0A) <= (b) <= (ab\\x0A)"], "rowid_ordered": true, "using_mrr": false, "index_only": false, @@ -10616,7 +10616,7 @@ JS { "index": "i_b", "ranges": - ["(ab\x0A\x00\x00\x00\x00\x00\x00\x00) <= (b) <= (ab\x0A\x00\x00\x00\x00\x00\x00\x00)"], + ["(ab\\x0A\\x00\\x00\\x00\\x00\\x00\\x00\\x00) <= (b) <= (ab\\x0A\\x00\\x00\\x00\\x00\\x00\\x00\\x00)"], "rowid_ordered": true, "using_mrr": false, "index_only": false, @@ -10644,7 +10644,7 @@ JS { "index": "i_b", "ranges": - ["(ab\x0A) <= (b) <= (ab\x0A)"], + ["(ab\\x0A) <= (b) <= (ab\\x0A)"], "rowid_ordered": true, "using_mrr": false, "index_only": false, @@ -10675,7 +10675,7 @@ JS { "index": "i_b", "ranges": - ["(ab\n) <= (b) <= (ab\n)"], + ["(ab\\n) <= (b) <= (ab\\n)"], "rowid_ordered": true, "using_mrr": false, "index_only": false, @@ -10709,7 +10709,7 @@ JS { "index": "i_b", "ranges": - ["(ab\x0A) <= (b) <= (ab\x0A)"], + ["(ab\\x0A) <= (b) <= (ab\\x0A)"], "rowid_ordered": false, "using_mrr": false, "index_only": false, @@ -10741,7 +10741,7 @@ JS { "index": "i_b", "ranges": - ["(ab\n) <= (b) <= (ab\n)"], + ["(ab\\n) <= (b) <= (ab\\n)"], "rowid_ordered": true, "using_mrr": false, "index_only": false, @@ -12968,7 +12968,7 @@ x 𝄞 SELECT json_valid(trace) AS valid_json, json_detailed(json_extract(trace,'$**.expanded_query')) FROM information_schema.optimizer_trace; valid_json json_detailed(json_extract(trace,'$**.expanded_query')) -1 ["Error: failed to escape query string for JSON output"] +1 ["select '𝄞' AS x"] SET NAMES DEFAULT; set optimizer_trace=@saved_optimizer_trace; # End of 10.11 tests @@ -13888,6 +13888,22 @@ OPTIMIZER_TRACE CREATE TEMPORARY TABLE `OPTIMIZER_TRACE` ( `MISSING_BYTES_BEYOND_MAX_MEM_SIZE` int(20) NOT NULL, `INSUFFICIENT_PRIVILEGES` tinyint(1) NOT NULL ) ENGINE=Aria DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci PAGE_CHECKSUM=0 +# +# MDEV-39720: Optimizer Trace may have invalid JSON due to quoting. +# +create table t1 ( +a varchar(100) +); +set optimizer_trace=1; +explain +select * from t1 where a < '"'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables +set @trace=(select trace from information_schema.optimizer_trace); +select json_valid(@trace); +json_valid(@trace) +1 +drop table t1; set @@optimizer_switch= @save_optimizer_switch; set @@use_stat_tables= @save_use_stat_tables; set @@histogram_size= @save_histogram_size; diff --git a/mysql-test/main/opt_trace.test b/mysql-test/main/opt_trace.test index f9e40d2c0d019..181ef4aa6374d 100644 --- a/mysql-test/main/opt_trace.test +++ b/mysql-test/main/opt_trace.test @@ -1389,6 +1389,22 @@ drop table t1; SHOW CREATE TABLE information_schema.optimizer_trace; +--echo # +--echo # MDEV-39720: Optimizer Trace may have invalid JSON due to quoting. +--echo # +create table t1 ( + a varchar(100) +); + +set optimizer_trace=1; +explain +select * from t1 where a < '"'; + +set @trace=(select trace from information_schema.optimizer_trace); +select json_valid(@trace); + +drop table t1; + set @@optimizer_switch= @save_optimizer_switch; set @@use_stat_tables= @save_use_stat_tables; set @@histogram_size= @save_histogram_size; diff --git a/sql/my_json_writer.cc b/sql/my_json_writer.cc index eb95b46b277a9..4050177c9a879 100644 --- a/sql/my_json_writer.cc +++ b/sql/my_json_writer.cc @@ -14,6 +14,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ #include "my_global.h" +#include #include "my_json_writer.h" #if !defined(NDEBUG) || defined(JSON_WRITER_UNIT_TEST) @@ -284,10 +285,45 @@ void Json_writer::add_str(const char *str) } /* - This function is used to add only num_bytes of str to the output string + @brief + Add a string pointed by str of num_bytes length, escaping it if needed. + + @param + str String in utf8mb4. + num_bytes Length of string in bytes (not characters). */ void Json_writer::add_str(const char* str, size_t num_bytes) +{ + int rc; + /* Initial buffer, json_escape_to_string will alloc larger if needed */ + StringBuffer<128> buf; + if ((rc= json_escape_to_string(str, num_bytes, &my_charset_utf8mb4_bin, &buf))) + { + if (rc == JSON_ERROR_ILLEGAL_SYMBOL) + str= "String with illegal unicode symbol"; + else + { + DBUG_ASSERT(rc == JSON_ERROR_OUT_OF_SPACE); + str= "Out of memory writing JSON"; + } + num_bytes= strlen(str); + } + else + { + str= buf.ptr(); + num_bytes= buf.length(); + } + add_escaped_str(str, num_bytes); +} + + +/* + @brief + Add a string value. The caller guarantees that it doesn't need escaping. +*/ + +void Json_writer::add_escaped_str(const char* str, size_t num_bytes) { VALIDITY_ASSERT(fmt_helper.is_making_writer_calls() || got_name == named_item_expected()); @@ -305,7 +341,22 @@ void Json_writer::add_str(const char* str, size_t num_bytes) void Json_writer::add_str(const String &str) { - add_str(str.ptr(), str.length()); + uint32 offs; + /* str may be in any charset. Convert to utf8mb4 if necessary */ + if (String::needs_conversion(str.length(), str.charset(), + &my_charset_utf8mb4_bin, &offs)) + { + uint err; + StringBuffer<128> converted_str; + if (converted_str.copy(&str, &my_charset_utf8mb4_bin, &err)) + { + const char *errstr= "Conversion error writing JSON"; + converted_str.set(errstr, strlen(errstr), &my_charset_utf8mb4_bin); + } + add_str(converted_str.ptr(), converted_str.length()); + } + else + add_str(str.ptr(), str.length()); } #ifdef ENABLED_JSON_WRITER_CONSISTENCY_CHECKS @@ -493,3 +544,55 @@ void Single_line_formatting_helper::disable_and_flush() buf_ptr= buffer; state= INACTIVE; } + +/* + @brief + Escape a JSON string and save it into *out. + + @detail + There's no way to tell how much space is needed for the output. + Start with a small string and increase its size until json_escape() + succeeds. +*/ + +int json_escape_to_string(const char *str, size_t len, CHARSET_INFO *cs, + String *out) +{ + /* Safety: assert that str doesn't point into *out. */ + DBUG_ASSERT(!out->alloced_length() || + str < out->ptr() || str >= out->ptr() + out->alloced_length()); + + // Make sure 'out' has some memory allocated. + if (!out->alloced_length() && out->alloc(128)) + return JSON_ERROR_OUT_OF_SPACE; + + while (1) + { + uchar *buf= (uchar*)out->ptr(); + out->length(out->alloced_length()); + const uchar *str_ptr= (const uchar*)str; + + int res= json_escape(cs, + str_ptr, + str_ptr + len, + &my_charset_utf8mb4_bin, + buf, buf + out->length()); + if (res >= 0) + { + out->length(res); + return 0; // Ok + } + + if (res != JSON_ERROR_OUT_OF_SPACE) + return res; // Some conversion error + + // Out of space error. Try with a bigger buffer + if (out->alloc(out->alloced_length()*2)) + return JSON_ERROR_OUT_OF_SPACE; + } +} + +int json_escape_to_string(const String *str, String *out) +{ + return json_escape_to_string(str->ptr(), str->length(), str->charset(), out); +} diff --git a/sql/my_json_writer.h b/sql/my_json_writer.h index 989679a767c8f..0e8d8115258dc 100644 --- a/sql/my_json_writer.h +++ b/sql/my_json_writer.h @@ -243,11 +243,14 @@ class Json_writer /* Add atomic values */ - /* Note: the add_str methods do not do escapes. Should this change? */ + /* All const char* arguments are strings in utf8mb4 */ void add_str(const char* val); void add_str(const char* val, size_t num_bytes); void add_str(const String &str); void add_str(Item *item); + + /* Add a string for which the caller has done escaping needed in JSON */ + void add_escaped_str(const char* val, size_t len); void add_table_name(const JOIN_TAB *tab); void add_table_name(const TABLE* table); @@ -798,4 +801,8 @@ class Json_writer_nesting_guard #endif }; +int json_escape_to_string(const String *str, String *out); +int json_escape_to_string(const char *str, size_t len, CHARSET_INFO *cs, + String *out); + #endif diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc index 982cbe4642753..cac7b1d9c50e2 100644 --- a/sql/opt_histogram_json.cc +++ b/sql/opt_histogram_json.cc @@ -67,49 +67,6 @@ bool json_unescape_to_string(const char *val, int val_len, String* out) } -/* - @brief - Escape a JSON string and save it into *out. - - @detail - There's no way to tell how much space is needed for the output. - Start with a small string and increase its size until json_escape() - succeeds. -*/ - -static int json_escape_to_string(const String *str, String* out) -{ - // Make sure 'out' has some memory allocated. - if (!out->alloced_length() && out->alloc(128)) - return JSON_ERROR_OUT_OF_SPACE; - - while (1) - { - uchar *buf= (uchar*)out->ptr(); - out->length(out->alloced_length()); - const uchar *str_ptr= (const uchar*)str->ptr(); - - int res= json_escape(str->charset(), - str_ptr, - str_ptr + str->length(), - &my_charset_utf8mb4_bin, - buf, buf + out->length()); - if (res >= 0) - { - out->length(res); - return 0; // Ok - } - - if (res != JSON_ERROR_OUT_OF_SPACE) - return res; // Some conversion error - - // Out of space error. Try with a bigger buffer - if (out->alloc(out->alloced_length()*2)) - return JSON_ERROR_OUT_OF_SPACE; - } -} - - class Histogram_json_builder : public Histogram_builder { Histogram_json_hb *histogram; @@ -256,7 +213,7 @@ class Histogram_json_builder : public Histogram_builder if (!rc) { writer.add_member(is_start? "start": "end"); - writer.add_str(escaped_val.c_ptr_safe()); + writer.add_escaped_str(escaped_val.ptr(), escaped_val.length()); return false; } } diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc index ab5dd5dd9cf86..15ee905eb8e6b 100644 --- a/sql/opt_trace.cc +++ b/sql/opt_trace.cc @@ -128,18 +128,7 @@ void opt_trace_print_expanded_query(THD *thd, SELECT_LEX *select_lex, The output is not very pretty lots of back-ticks, the output is as the one in explain extended , lets try to improved it here. */ - - StringBuffer<1024> escaped_str(system_charset_info); - if (st_append_escaped(&escaped_str, &str) == 0) - { - writer->add("expanded_query", escaped_str.c_ptr_safe(), - escaped_str.length()); - } - else - { - writer->add("expanded_query", - "Error: failed to escape query string for JSON output"); - } + writer->add("expanded_query", str.c_ptr_safe(), str.length()); } void opt_trace_disable_if_no_security_context_access(THD *thd) diff --git a/sql/opt_trace_ddl_info.cc b/sql/opt_trace_ddl_info.cc index 0151a9929805e..27e044fa2d4dd 100644 --- a/sql/opt_trace_ddl_info.cc +++ b/sql/opt_trace_ddl_info.cc @@ -80,28 +80,7 @@ static bool dump_record_to_trace(THD *thd, DDL_Key *ddl_key, String *stmt) { Json_writer_object ddl_wrapper(thd); ddl_wrapper.add("name", ddl_key->name); - size_t non_esc_stmt_len= stmt->length(); - /* - making escape_stmt size to be 4 times the non_esc_stmt - 4 is chosen as a worst case although 3 should suffice. - "'" would be escaped to \"\'\" - */ - size_t len_multiplier= sizeof(uint32_t); - size_t escape_stmt_len= len_multiplier * non_esc_stmt_len; - char *escaped_stmt= (char *) thd->alloc(escape_stmt_len + 1); - - if (!escaped_stmt) - return true; - - int act_escape_stmt_len= - json_escape_string(stmt->c_ptr(), stmt->c_ptr() + non_esc_stmt_len, - escaped_stmt, escaped_stmt + escape_stmt_len); - - if (act_escape_stmt_len < 0) - return true; - - escaped_stmt[act_escape_stmt_len]= 0; - ddl_wrapper.add("ddl", escaped_stmt); + ddl_wrapper.add("ddl", stmt->c_ptr_safe()); return false; } diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 7a1ee3911611e..fd442e3d8e8e0 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -791,12 +791,12 @@ class Explain_table_access : public Sql_alloc /* Internals */ /* id and 'select_type' are cared-of by the parent Explain_select */ - StringBuffer<32> table_name; - StringBuffer<32> used_partitions; + StringBuffer<32> table_name{system_charset_info}; + StringBuffer<32> used_partitions{system_charset_info}; String_list used_partitions_list; // valid with ET_USING_MRR StringBuffer<32> mrr_type; - StringBuffer<32> firstmatch_table_name; + StringBuffer<32> firstmatch_table_name{system_charset_info}; /* Non-zero number means this is a derived table. The number can be used to diff --git a/unittest/sql/my_json_writer-t.cc b/unittest/sql/my_json_writer-t.cc index 10796f041df75..4fec0f6659dae 100644 --- a/unittest/sql/my_json_writer-t.cc +++ b/unittest/sql/my_json_writer-t.cc @@ -223,7 +223,7 @@ int main(int args, char **argv) ok(!w.invalid_json, "Empty string value is valid"); } - /* add_str does not escape: special chars are passed through verbatim */ + /* add_str does escaping */ { Json_writer w; w.start_object(); @@ -231,26 +231,33 @@ int main(int args, char **argv) w.end_object(); ok(json_output_eq(w, "{\n" - " \"s\": \"with\"quote\"\n" + " \"s\": \"with\\\"quote\"\n" "}"), - "String with embedded quote (no escaping)"); + "String with embedded quote properly escaped"); ok(!w.invalid_json, "String with embedded quote remains valid for this writer"); } { Json_writer w; + /* + Char latin1 UTF-8 + é E9 C3 A9 + ö F6 C3 B6 + ü FC C3 BC + */ String latin1_str("\xe9\xf6\xfc", 3, &my_charset_latin1); + w.start_object(); w.add_member("s").add_str(latin1_str); w.end_object(); ok(json_output_eq(w, "{\n" - " \"s\": \"\xe9\xf6\xfc\"\n" + " \"s\": \"\xc3\xa9\xc3\xb6\xc3\xbc\"\n" "}"), - "Latin-1 encoded string bytes pass through unchanged"); + "Characters written in latin1 are transcoded into utf-8"); ok(!w.invalid_json, - "Latin-1 encoded string bytes pass through unchanged and stay valid"); + "Latin-1 is converted to utf-8 and remains valid"); } { @@ -595,9 +602,7 @@ int main(int args, char **argv) const char value_with_nul[]= { 'a', '\0', 'b' }; const char expected[]= "{\n" - " \"arr\": [\n" - " \"a\0b\"\n" - " ]\n" + " \"arr\": [\"a\\u0000b\"]\n" "}"; w.start_object(); w.add_member("arr"); @@ -606,8 +611,8 @@ int main(int args, char **argv) w.end_array(); w.end_object(); ok(json_output_eq_len(w, expected, sizeof(expected) - 1), - "Named array element with embedded NUL is preserved"); - ok(!w.invalid_json, "Embedded-NUL array element keeps writer valid"); + "NUL character is escaped as \\u0000"); + ok(!w.invalid_json, "Writing a string with NUL character produces valid JSON"); } { @@ -1053,8 +1058,7 @@ int main(int args, char **argv) { Json_writer w; - String s; - s.append("hello", 5); + String s("hello", 5, &my_charset_utf8mb4_general_ci); w.start_object(); w.add_member("msg").add_str(s); w.end_object();