From 53c7f0f22e2e4da530a5da3bf27b27faab503741 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Thu, 5 Feb 2026 13:57:27 +0530 Subject: [PATCH] MDEV-36436 Avoid undo log on insertion of intermediate table during Ignore DDL Problem: ========= An assertion failure occurs in InnoDB during consecutive ALTER TABLE operations using the COPY algorithm. The crash happens during the table rename phase because the source table (dict_table_t::n_ref_count) is non-zero, despite the thread holding an exclusive Metadata Lock (MDL). Reason: ======== When ALTER IGNORE TABLE is executed via the COPY algorithm, it generates undo logs for every row inserted into the intermediate table (e.g., #sql-alter-...). The background Purge Thread, responsible for cleaning up these undo logs, attempts to take an MDL on the table to prevent the table from being dropped while in use. Race condition: ================== First ALTER: Creates #sql-alter-, copies data, and renames it to t1. Purge Activation: The Purge thread picks up the undo logs from step 1. It takes an MDL on the temporary name (#sql-alter-) and increments the table's n_ref_count. Identity Shift: InnoDB renames the physical table object to t1, but the Purge thread still holds a reference to this object. Second ALTER: Starts a new copy process. When it attempts to rename the "new" t1 to a backup name, it checks if n_ref_count == 0. Because the Purge thread is still "pinning" the object to clean up logs from the first ALTER, the count is > 0, triggering the assertion failure. Solution: ======== ALTER IGNORE TABLE needs row-level undo logging to easily roll back the last inserted row in case of duplicate key errors. By discarding the last undo log record after inserting each row, purge will not process any log records generated by ALTER IGNORE TABLE, preventing unexpected access from the purge subsystem during subsequent DDL operations. Make skip_alter_undo (1-bit) to (2-bit enum) to support different ALTER operation modes: - NO_UNDO (0): Normal mode with standard undo logging - SKIP_UNDO (1): ALTER mode that skips undo logging - IGNORE_UNDO (2): ALTER IGNORE mode that rewrites undo blocks trx_undo_report_row_operation(): Add ALTER IGNORE undo rewriting logic. Store old undo record info before writing new records for IGNORE_UNDO mode. Reset undo top_offset to maintain only latest insert undo --- include/my_base.h | 5 ++++- sql/sql_insert.cc | 6 ++++-- sql/sql_table.cc | 8 +++++--- storage/innobase/btr/btr0cur.cc | 2 +- storage/innobase/handler/ha_innodb.cc | 23 ++++++++++++++++++++--- storage/innobase/include/dict0mem.h | 23 ++++++++++++++++++----- storage/innobase/include/trx0undo.h | 4 ++++ storage/innobase/row/row0ins.cc | 2 +- storage/innobase/row/row0uins.cc | 2 +- storage/innobase/row/row0undo.cc | 2 +- storage/innobase/trx/trx0rec.cc | 25 +++++++++++++++++++++++++ storage/mroonga/ha_mroonga.cpp | 3 +++ 12 files changed, 87 insertions(+), 18 deletions(-) diff --git a/include/my_base.h b/include/my_base.h index 8fdd1397ff09d..f92718f3d89ca 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -233,7 +233,10 @@ enum ha_extra_function { HA_EXTRA_END_ALTER_COPY, /** Abort of writing rows during ALTER TABLE..ALGORITHM=COPY or CREATE..SELCT */ - HA_EXTRA_ABORT_ALTER_COPY + HA_EXTRA_ABORT_ALTER_COPY, + /** Overwrite the old undo log for each insert for + ALTER IGNORE TABLE .. ALGORITHM= COPY */ + HA_EXTRA_BEGIN_ALTER_IGNORE_COPY }; /* Compatible option, to be deleted in 6.0 */ diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index 64a57f2f111fc..c6eb5cb1787ef 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -5253,8 +5253,10 @@ select_create::prepare(List &_values, SELECT_LEX_UNIT *u) !table->s->long_unique_table && !table->s->hlindexes()) { table->file->ha_start_bulk_insert((ha_rows) 0); - if (thd->lex->duplicates == DUP_ERROR && !thd->lex->ignore) - table->file->extra(HA_EXTRA_BEGIN_ALTER_COPY); + if (thd->lex->duplicates == DUP_ERROR) + table->file->extra(thd->lex->ignore + ? HA_EXTRA_BEGIN_ALTER_IGNORE_COPY + : HA_EXTRA_BEGIN_ALTER_COPY); table->file->extra(HA_EXTRA_WRITE_CACHE); } thd->abort_on_warning= !info.ignore && thd->is_strict_mode(); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 1907dbbc06496..6521b5bc0a8e4 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -12812,8 +12812,10 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, thd->progress.max_counter= from->file->records(); time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10; /* for now, InnoDB needs the undo log for ALTER IGNORE */ - if (!ignore && !to->s->hlindexes()) - to->file->extra(HA_EXTRA_BEGIN_ALTER_COPY); + if (!to->s->hlindexes()) + to->file->extra(ignore + ? HA_EXTRA_BEGIN_ALTER_IGNORE_COPY + : HA_EXTRA_BEGIN_ALTER_COPY); if (!(error= info.read_record())) { @@ -13000,7 +13002,7 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, } bulk_insert_started= 0; - if (!ignore && !to->s->hlindexes() && error <= 0) + if (!to->s->hlindexes() && error <= 0) { int alt_error= to->file->extra(HA_EXTRA_END_ALTER_COPY); if (alt_error > 0) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index b57f5ec5d2e69..e0dbe083fdcbd 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -2167,7 +2167,7 @@ btr_cur_ins_lock_and_undo( || dict_index_is_clust(index) || (flags & BTR_CREATE_FLAG)); ut_ad((flags & BTR_NO_UNDO_LOG_FLAG) - || !index->table->skip_alter_undo); + || index->table->skip_alter_undo != dict_table_t::NO_UNDO); ut_ad(mtr->is_named_space(index->table->space)); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 46f084108153d..d7dde6796ea71 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -15931,9 +15931,13 @@ ha_innobase::extra( break; } goto stmt_boundary; + case HA_EXTRA_BEGIN_ALTER_IGNORE_COPY: case HA_EXTRA_BEGIN_ALTER_COPY: trx = check_trx_exists(thd); - m_prebuilt->table->skip_alter_undo = 1; + m_prebuilt->table->skip_alter_undo = + (operation == HA_EXTRA_BEGIN_ALTER_COPY) + ? dict_table_t::NO_UNDO + : dict_table_t::IGNORE_UNDO; if (m_prebuilt->table->is_temporary() || !m_prebuilt->table->versioned_by_id()) { break; @@ -15952,7 +15956,19 @@ ha_innobase::extra( they could cause a severe performance regression. */ break; } - m_prebuilt->table->skip_alter_undo = 0; + + if (m_prebuilt->table->skip_alter_undo == + dict_table_t::IGNORE_UNDO) { + /* Remove the last undo log from + transaction and does call commit_empty() + during commit process */ + trx->undo_no = 0; + trx_undo_try_truncate(trx); + } + + m_prebuilt->table->skip_alter_undo = + dict_table_t::NORMAL_UNDO; + if (dberr_t err= trx->bulk_insert_apply()) { trx->rollback(); return convert_error_code_to_mysql( @@ -15985,7 +16001,8 @@ ha_innobase::extra( if (m_prebuilt->table->skip_alter_undo && !m_prebuilt->table->is_temporary()) { trx = check_trx_exists(ha_thd()); - m_prebuilt->table->skip_alter_undo = 0; + m_prebuilt->table->skip_alter_undo = + dict_table_t::NORMAL_UNDO; trx->rollback(); return(HA_ERR_ROLLBACK); } diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 87f034f3178ea..2b4c856b50352 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -2223,12 +2223,25 @@ struct dict_table_t { Use DICT_TF2_FLAG_IS_SET() to parse this flag. */ unsigned flags2:DICT_TF2_BITS; - /** TRUE if the table is an intermediate table during copy alter - operation or a partition/subpartition which is required for copying - data and skip the undo log for insertion of row in the table. - This variable will be set and unset during extra(), or during the + /** Undo log handling modes for ALTER TABLE operations */ + enum skip_alter_undo_t { + /** Normal mode - standard undo logging */ + NORMAL_UNDO = 0, + /** ALTER mode - skip undo logging */ + NO_UNDO = 1, + /** ALTER IGNORE mode - rewrite undo blocks to + maintain only latest insert undo log */ + IGNORE_UNDO = 2 + }; + + /** Mode for handling undo logs during ALTER TABLE operations. + Set during copy alter operations or partition/subpartition operations. + When set, controls undo log behavior for row operations in the table. + For ALTER IGNORE, this enables rewriting old insert undo blocks + to maintain only the latest insert undo log. + This variable is set and unset during extra(), or during the process of altering partitions */ - unsigned skip_alter_undo:1; + unsigned skip_alter_undo:2; /** whether this is in a single-table tablespace and the .ibd file is believed to be missing or page decryption failed and page is diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index 5d998c8da66c7..03f2d89cd09fc 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -265,6 +265,10 @@ struct trx_undo_t { uint16_t top_offset; /*!< offset of the latest undo record, i.e., the topmost element in the undo log if we think of it as a stack */ + /** For ALTER IGNORE: store previous undo record info to + rewrite undo chain */ + uint16_t old_offset; /*!< previous undo record offset + for ALTER IGNORE */ undo_no_t top_undo_no; /*!< undo number of the latest record (IB_ID_MAX if the undo log is empty) */ buf_block_t* guess_block; /*!< guess for the buffer block where diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 87d02da38e5a7..deb3676c8803b 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -3316,7 +3316,7 @@ row_ins_clust_index_entry( skip the undo log and record lock checking for insertion operation. */ - if (index->table->skip_alter_undo) { + if (index->table->skip_alter_undo == dict_table_t::NO_UNDO) { flags |= BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG; } diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index e3bb2b412ca43..c2e6f7a0b788b 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -435,7 +435,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) node->table = NULL; return false; } else { - ut_ad(!node->table->skip_alter_undo); + ut_ad(node->table->skip_alter_undo != dict_table_t::NO_UNDO); clust_index = dict_table_get_first_index(node->table); if (clust_index != NULL) { diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index 2de3e473b2143..57fa45a3df329 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -171,7 +171,7 @@ row_undo_search_clust_to_pcur( rec_offs* offsets = offsets_; rec_offs_init(offsets_); - ut_ad(!node->table->skip_alter_undo); + ut_ad(node->table->skip_alter_undo != dict_table_t::NO_UNDO); mtr_start(&mtr); diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index fbde1ae465d22..1f60cf5eb73b1 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -1906,6 +1906,8 @@ trx_undo_report_row_operation( } trx_undo_t* undo = *pundo; + bool ignore_ddl= + (index->table->skip_alter_undo == dict_table_t::IGNORE_UNDO); ut_ad((err == DB_SUCCESS) == (undo_block != NULL)); if (UNIV_UNLIKELY(undo_block == NULL)) { err_exit: @@ -1915,6 +1917,22 @@ trx_undo_report_row_operation( ut_ad(undo != NULL); + /* For ALTER IGNORE, implement undo log rewriting to maintain only + the latest insert undo record. This allows easy rollback of the + last inserted row on duplicate key errors. Before writing a new + undo record, rewind the undo log to the previous record position, + effectively discarding all intermediate + undo records and keeping only the most recent one. */ + if (ignore_ddl && !undo->empty() && + undo->old_offset <= undo->top_offset) { + + undo->top_offset = undo->old_offset; + undo->top_undo_no = 0; + mtr.write<2>(*undo_block, + undo_block->page.frame + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE, undo->old_offset); + } + do { uint16_t offset = !rec ? trx_undo_page_report_insert( @@ -1989,6 +2007,13 @@ trx_undo_report_row_operation( /* Success */ undo->top_page_no = undo_block->page.id().page_no(); mtr.commit(); + + /* For ALTER IGNORE, store the current record position + as the rollback point for the next operation */ + if (ignore_ddl && undo->empty()) { + undo->old_offset = offset; + } + undo->top_offset = offset; undo->top_undo_no = trx->undo_no++; undo->guess_block = undo_block; diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp index 49ad7ade11526..43ba1d6231e04 100644 --- a/storage/mroonga/ha_mroonga.cpp +++ b/storage/mroonga/ha_mroonga.cpp @@ -557,6 +557,9 @@ static const char *mrn_inspect_extra_function(enum ha_extra_function operation) case HA_EXTRA_ABORT_ALTER_COPY: inspected = "HA_EXTRA_ABORT_ALTER_COPY"; break; + case HA_EXTRA_BEGIN_ALTER_IGNORE_COPY: + inspected = "HA_EXTRA_BEGIN_ALTER_IGNORE_COPY"; + break; #ifdef MRN_HAVE_HA_EXTRA_EXPORT case HA_EXTRA_EXPORT: inspected = "HA_EXTRA_EXPORT";