Skip to content
/ server Public
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 102 additions & 2 deletions client/import_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/

#include <string>
#include <cstring>
#include <vector>
#include <pcre2posix.h>

Expand Down Expand Up @@ -58,10 +59,18 @@ std::string extract_first_create_table(const std::string &script)
TableDDLInfo::TableDDLInfo(const std::string &create_table_stmt)
{
regex_t primary_key_regex, constraint_regex, index_regex, engine_regex,
table_name_regex;
table_name_regex, column_regex;
constexpr size_t MAX_MATCHES= 10;
regmatch_t match[10];

// Extract just the CREATE TABLE statement if the input contains other SQL
std::string actual_create_table = extract_first_create_table(create_table_stmt);
if (actual_create_table.empty())
{
// Input might already be just the CREATE TABLE statement
actual_create_table = create_table_stmt;
}

regcomp(&primary_key_regex, "\\n\\s*(PRIMARY\\s+KEY\\s+(.*?)),?\\n",
REG_EXTENDED);
regcomp(&constraint_regex,
Expand All @@ -73,8 +82,13 @@ TableDDLInfo::TableDDLInfo(const std::string &create_table_stmt)
regcomp(&engine_regex, "\\bENGINE\\s*=\\s*(\\w+)", REG_EXTENDED);
regcomp(&table_name_regex, "CREATE\\s+TABLE\\s+(`?(?:[^`]|``)+`?)\\s*\\(",
REG_EXTENDED);
// Column regex: matches lines starting with column name followed by type
// Must be inside parentheses, not starting with constraint/key keywords
regcomp(&column_regex,
"\\n\\s*(`[^`]+`|[a-zA-Z_][a-zA-Z0-9_]*)\\s+([a-zA-Z]+[a-zA-Z0-9()]*)",
REG_EXTENDED);

const char *stmt= create_table_stmt.c_str();
const char *stmt= actual_create_table.c_str();
const char *search_start= stmt;

// Extract primary key
Expand Down Expand Up @@ -129,11 +143,97 @@ TableDDLInfo::TableDDLInfo(const std::string &create_table_stmt)
}
}
}

// Extract column definitions - find the column definition block
const char *col_start = strchr(stmt, '(');
if (col_start)
{
col_start++; // Move past the opening '('

// Find the matching closing parenthesis
int depth = 1;
const char *col_end = col_start;
bool in_string = false;
char string_delim = 0;

while (*col_end && depth > 0)
{
// Handle strings
if (!in_string && (*col_end == '\'' || *col_end == '"' || *col_end == '`'))
{
in_string = true;
string_delim = *col_end;
}
else if (in_string && *col_end == string_delim)
{
// Check for escaped delimiter
if (*(col_end + 1) == string_delim)
col_end++; // Skip escaped
else
in_string = false;
}
else if (!in_string)
{
if (*col_end == '(')
depth++;
else if (*col_end == ')')
{
depth--;
if (depth == 0)
break; // Found the matching closing paren
}
}
col_end++;
}

if (depth == 0 && col_end > col_start)
{
// col_end now points to the closing ')'
std::string columns_block(col_start, col_end - col_start);

// Now parse column definitions from this block only
// Pattern matches lines that start with whitespace + identifier + whitespace + type
regcomp(&column_regex,
"\\n[ \\t]*(`[^`]+`|[a-zA-Z_][a-zA-Z0-9_]*)[ \\t]+([a-zA-Z]+)",
REG_EXTENDED);

const char *block = columns_block.c_str();
search_start = block;

while (regexec(&column_regex, search_start, MAX_MATCHES, match, 0) == 0)
{
std::string col_name(search_start + match[1].rm_so,
match[1].rm_eo - match[1].rm_so);
std::string col_type(search_start + match[2].rm_so,
match[2].rm_eo - match[2].rm_so);

// Remove backticks
if (!col_name.empty() && col_name.front() == '`' && col_name.back() == '`')
col_name = col_name.substr(1, col_name.length() - 2);

// Convert to uppercase for comparison
std::string col_name_upper = col_name;
for (char &c : col_name_upper) c = toupper(c);

// Skip constraint/key lines
if (col_name_upper != "PRIMARY" && col_name_upper != "CONSTRAINT" &&
col_name_upper != "KEY" && col_name_upper != "INDEX" &&
col_name_upper != "UNIQUE" && col_name_upper != "FULLTEXT" &&
col_name_upper != "SPATIAL" && col_name_upper != "VECTOR")
{
columns.push_back({col_name, col_type});
}

search_start += match[0].rm_eo - 1;
}
}
}
regfree(&primary_key_regex);
regfree(&constraint_regex);
regfree(&index_regex);
regfree(&engine_regex);
regfree(&table_name_regex);
regfree(&column_regex);
}

/**
Expand Down
7 changes: 7 additions & 0 deletions client/import_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ struct KeyDefinition
std::string name;
};

struct ColumnInfo
{
std::string name;
std::string type;
};

/**
Information about keys and constraints, extracted from
CREATE TABLE statement
Expand All @@ -50,6 +56,7 @@ struct TableDDLInfo
KeyDefinition primary_key;
std::vector<KeyDefinition> constraints;
std::vector<KeyDefinition> secondary_indexes;
std::vector<ColumnInfo> columns;
std::string storage_engine;
std::string table_name;
/* Innodb is using first UNIQUE key for clustering, if no PK is set*/
Expand Down
29 changes: 28 additions & 1 deletion client/mysqldump.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3473,7 +3473,7 @@ static uint get_table_structure(const char *table, const char *db, char *table_t
mysql_free_result(result);
}
my_snprintf(query_buff, sizeof(query_buff),
"select column_name, extra, generation_expression, data_type "
"select column_name, extra, generation_expression, data_type, character_set_name "
"from information_schema.columns where table_schema=database() "
"and table_name=%s order by ordinal_position",
quote_for_equal(table, temp_buff));
Expand Down Expand Up @@ -3506,6 +3506,26 @@ static uint get_table_structure(const char *table, const char *db, char *table_t
dynstr_append_checked(&select_field_names_for_header, ", ");
}
init=1;
my_bool is_blob_field= 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not really is_blob_field, is it? I'd call it is_print_as_hex.

/*
Check if this is a binary/blob field that should be hex-encoded.
For multi_file_output (--tab/--dir), we need to wrap with HEX() in SELECT.
Binary fields have character_set_name = NULL or 'binary'.
*/
if (opt_hex_blob && multi_file_output && row[3])
{
/* Check for blob/binary types with binary charset */
if ((row[4] == NULL || strcmp(row[4], "binary") == 0) &&
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you really need to check both the character set and the data type?

(strcmp(row[3], "binary") == 0 ||
strcmp(row[3], "varbinary") == 0 ||
strcmp(row[3], "tinyblob") == 0 ||
strcmp(row[3], "blob") == 0 ||
strcmp(row[3], "mediumblob") == 0 ||
strcmp(row[3], "longblob") == 0))
{
is_blob_field= 1;
}
}

last_name= quote_name(row[0], name_buff, 0);
if (opt_dump_history && *versioned && opt_update_history &&
Expand All @@ -3520,6 +3540,13 @@ static uint get_table_structure(const char *table, const char *db, char *table_t
dynstr_append_checked(&select_field_names, ") as ");
dynstr_append_checked(&select_field_names, last_name);
}
else if (is_blob_field)
{
dynstr_append_checked(&select_field_names, "HEX(");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you have a spacing issue here. please fix. For the full diff.

dynstr_append_checked(&select_field_names, last_name);
dynstr_append_checked(&select_field_names, ") AS ");
dynstr_append_checked(&select_field_names, last_name);
}
else
dynstr_append_checked(&select_field_names, last_name);
dynstr_append_checked(&insert_field_names, last_name);
Expand Down
Loading
Loading