diff --git a/src/prism.c b/src/prism.c index d196c5d7c4..9d58bdb43d 100644 --- a/src/prism.c +++ b/src/prism.c @@ -8594,6 +8594,7 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_ } if (width == 1) { + if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags)); } else if (width > 1) { // Valid multibyte character. Just ignore escape. @@ -8910,6 +8911,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } + if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL)); return; @@ -8968,6 +8970,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } + if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL)); return; @@ -9021,6 +9024,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre return; } + if (peeked == '\n') pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1); parser->current.end++; escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META)); return; @@ -9028,6 +9032,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre } case '\r': { if (peek_offset(parser, 1) == '\n') { + pm_line_offset_list_append(&parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2); parser->current.end += 2; escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags)); return; diff --git a/test/prism/newline_offsets_test.rb b/test/prism/newline_offsets_test.rb index 99b808b1df..bb06876a96 100644 --- a/test/prism/newline_offsets_test.rb +++ b/test/prism/newline_offsets_test.rb @@ -8,15 +8,38 @@ class NewlineOffsetsTest < TestCase define_method(fixture.test_name) { assert_newline_offsets(fixture) } end + def test_escape_control_newline + # Newlines consumed inside escape sequences like \C-, \c, and \M- + # must be tracked in line offsets across all literal types. + %w[\\C- \\c \\M-].each do |escape| + assert_newline_offsets_for("\"#{escape}\n\"", "#{escape} in string") + assert_newline_offsets_for("`#{escape}\n`", "#{escape} in xstring") + assert_newline_offsets_for("/#{escape}\n/", "#{escape} in regexp") + assert_newline_offsets_for("%Q{#{escape}\n}", "#{escape} in %Q") + assert_newline_offsets_for("%W[#{escape}\n]", "#{escape} in %W") + assert_newline_offsets_for("<<~H\n#{escape}\n\nH\n", "#{escape} in heredoc") + assert_newline_offsets_for("?#{escape}\n", "#{escape} in char literal") + end + + # Combined meta + control escapes + assert_newline_offsets_for("\"\\M-\\C-\n\"", "\\M-\\C- in string") + assert_newline_offsets_for("\"\\M-\\c\n\"", "\\M-\\c in string") + + # \r\n consumed inside escape context + assert_newline_offsets_for("\"\\C-\r\n\"", "\\C- with \\r\\n") + end + private def assert_newline_offsets(fixture) - source = fixture.read + assert_newline_offsets_for(fixture.read) + end + def assert_newline_offsets_for(source, message = nil) expected = [0] source.b.scan("\n") { expected << $~.offset(0)[0] + 1 } - assert_equal expected, Prism.parse(source).source.offsets + assert_equal expected, Prism.parse(source).source.offsets, message end end end