Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The following submitted code, packages or analysis, and deserve special thanks:
Tobias Predel
Andrew Poelstra
thaafox
Johan Sarge

Thanks to the following, who submitted detailed bug reports and excellent
suggestions:
Expand Down
2 changes: 2 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
master/HEAD
- #117 Extended Unicode support in Composite, character display width now taken into account
(thanks to Johan Sarge)
- #111 Duration: support negative durations by prefixing a '-' before the P in ISO format
(thanks to Andrew Poelstra)
- #113 Set CMAKE_CURRENT_SOURCE_DIR instead of CMAKE_SOURCE_DIR
Expand Down
226 changes: 188 additions & 38 deletions src/Composite.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2016 - 2021, 2023, Gothenburg Bit Factory.
// Copyright 2016 - 2021, 2023, 2026 Gothenburg Bit Factory.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand All @@ -25,10 +25,111 @@
////////////////////////////////////////////////////////////////////////////////

#include <Composite.h>
#include <format.h>
#include <limits>
#include <sstream>
#include <stack>
#include <utf8.h>


////////////////////////////////////////////////////////////////////////////////

namespace
{

// Helper function that either replaces a pre-existing element at index (i) in
// a std::vector with the value (x) (if (i) is less than the size of the vector)
// or extends the vector in such a way that it ends up with (i+1) elements, with
// the value (x) at index (i) and the padding value (pad) at each index between
// that of the final pre-existing element of the vector and (i).
template <typename T>
void put_or_extend (
std::vector<T>& v, typename std::vector<T>::size_type i, const T& x, const T& pad = T {})
{
if (i < v.size ())
v[i] = x;
else
{
v.resize (i, pad);
v.push_back (x);
}
}

// Helper class that is used to store information about columns in a Composite.
struct ColumnData
{
// Number of topmost layer that overlaps with the column represented by this ColumnData.
// NOTE: Layer numbers start at 1. "Layer 0" is background not covered by any layer.
unsigned int layer_num;

// Byte offset into the UTF-8 text string of the layer identified by (layer_num).
// Points to the first byte of the first character to include in the content
// of the column represented by this ColumnData.
std::string::size_type text_begin_i;

// Byte offset into the UTF-8 text string of the layer identified by (layer_num).
// Points to the first byte after the last character to include in the content
// of the column represented by this ColumnData.
std::string::size_type text_end_i;

// Unicode display width of the first character to include in the content
// of the column represented by this ColumnData. Should always be 1 or 2,
// unless this ColumnData represents a padding column.
unsigned char char_0_width;

ColumnData (
unsigned int layer = 0, std::string::size_type begin_i = 1, std::string::size_type end_i = 0,
unsigned char c_0_w = 0)
:
layer_num (layer), text_begin_i (begin_i), text_end_i (end_i), char_0_width (c_0_w)
{}

ColumnData (const ColumnData& orig) = default;

ColumnData& operator= (const ColumnData& orig) = default;

std::string::difference_type byte_count () const
{
return text_end_i - text_begin_i;
}

// Changes the state of this ColumnData to one that indicates that the ColumnData
// represents a padding column (i.e. a state where byte_count is negative).
void make_padding ()
{
text_begin_i = 1;
text_end_i = 0;
char_0_width = 0;
}

bool is_padding () const
{
return byte_count () < 0;
}
};

const ColumnData LAYER_0_PAD; // ColumnData representing a padding column on "layer 0".

// Special column index value, distinct from any valid column index.
const std::string::size_type INVALID_COLUMN_I = std::numeric_limits<std::string::size_type>::max ();

// Helper function that turns the uncovered half of half-covered wide characters into padding.
inline void do_halfcovered_wide_char_check (
std::vector<ColumnData>& columns, std::vector<ColumnData>::size_type column_i)
{
// If there is a wide character (on a lower layer) in the preceding column, replace
// that character (and any nonspacing characters associated with it) with padding.
// (Because the second half of that character will be covered, and we couldn't display
// half a character if we wanted to.)
if (column_i >= 1 && column_i - 1 < columns.size ())
{
ColumnData& prev_col_data = columns[column_i - 1];
if (prev_col_data.char_0_width == 2)
prev_col_data.make_padding ();
}
}

};

////////////////////////////////////////////////////////////////////////////////
// Initially assume no text, but infinite virtual space.
//
Expand Down Expand Up @@ -74,65 +175,114 @@ void Composite::add (
// bbbbb // Layer 2
// c // Layer 3
//
// Walk all strings left to right, selecting the character and color from the
// Walk all layers left to right, selecting the character and color from the
// highest numbered layer. Emit color codes only on edge detection.
//
std::string Composite::str () const
{
// The strings are broken into a vector of int, for UTF8 support.
std::vector <int> characters;
std::vector <int> colors;
for (unsigned int layer = 0; layer < _layers.size (); ++layer)
std::vector <ColumnData> columns;

for (unsigned int layer_i = 0; layer_i < _layers.size (); ++layer_i)
{
const auto& text = std::get <0> (_layers[layer]);
auto offset = std::get <1> (_layers[layer]);
auto len = utf8_text_length (text);
const auto& text = std::get <0> (_layers[layer_i]);
auto offset = std::get <1> (_layers[layer_i]);
auto len = utf8_text_length (text);

// Make sure the vectors are large enough to support a write operator[].
if (characters.size () < offset + len)
{
characters.resize (offset + len, 32);
colors.resize (offset + len, 0);
}
// Make sure the capacity of the column vector is large enough to support push_back()
// without reallocation.
if (columns.capacity () < offset + len)
columns.reserve (offset + len);

// Copy in the layer characters and color indexes.
// Inspect and decide how to handle each character (i.e. Unicode code point)
// in the current layer's text string.
std::string::size_type prev_cursor = 0;
std::string::size_type cursor = 0;
int character;
int count = 0;
unsigned int column_count = 0;
std::string::size_type prev_spacer_column_i = INVALID_COLUMN_I;
unsigned int character;
while ((character = utf8_next_char (text, cursor)))
{
characters[offset + count] = character;
colors [offset + count] = layer + 1;
++count;
std::string::size_type column_i = offset + column_count;
int ch_width = mk_wcwidth ((wchar_t)character);

switch (ch_width)
{
case 0: // zero-width / nonspacing character
if (prev_spacer_column_i == INVALID_COLUMN_I) // No preceding spacing character on this layer.
; // Skip this character.
else // There is a preceding spacing character on this layer.
{
// Append the nonspacing character to the column of the previous spacing character.
columns[prev_spacer_column_i].text_end_i = cursor;
}
break;
case 1: // ordinary narrow spacing character
if (prev_spacer_column_i == INVALID_COLUMN_I)
do_halfcovered_wide_char_check (columns, column_i);

// Put the character in the appropriate column. Pad out the column list as necessary.
put_or_extend (columns, column_i, ColumnData (layer_i + 1, prev_cursor, cursor, 1), LAYER_0_PAD);

prev_spacer_column_i = column_i;
column_count += 1;
break;
case 2: // graphically wide spacing character
if (prev_spacer_column_i == INVALID_COLUMN_I)
do_halfcovered_wide_char_check (columns, column_i);

// Put the character in the appropriate column. Pad out the column list as necessary.
// Make the column after the current one (which is also covered by the wide character)
// a padding column on the current layer.
put_or_extend (columns, column_i, ColumnData (layer_i + 1, prev_cursor, cursor, 2), LAYER_0_PAD);
put_or_extend (columns, column_i + 1, ColumnData (layer_i + 1), LAYER_0_PAD);

prev_spacer_column_i = column_i;
column_count += 2;
break;
default: // Should not happen.
throw format ("Unexpected character width {1} of code point 0x{2}.", ch_width, formatHex (character));
}

// Remember byte offset of first UTF-8 byte of next character in the layer text.
prev_cursor = cursor;
}
}

// Now walk the character and color vector, emitting every character and
// every detected color change.
// Now walk the column vector, emitting every character and every detected layer change.
std::stringstream out;
int prev_color = 0;
for (unsigned int i = 0; i < characters.size (); ++i)
unsigned int prev_layer = 0;
for (unsigned int column_i = 0; column_i < columns.size (); ++column_i)
{
// A change in color triggers a code emit.
if (prev_color != colors[i])
auto column_data = columns[column_i];
auto curr_layer = column_data.layer_num;
const auto& text = std::get <0> (_layers[curr_layer - 1]);

// A change in layer triggers an ANSI escape code emit.
if (prev_layer != curr_layer)
{
if (prev_color)
out << std::get <2> (_layers[prev_color - 1]).end ();
if (prev_layer) // Reset attributes (if any) of previous layer.
out << std::get <2> (_layers[prev_layer - 1]).end ();

if (colors[i])
out << std::get <2> (_layers[colors[i] - 1]).code ();
else
out << std::get <2> (_layers[prev_color - 1]).end ();
if (curr_layer) // Set attributes (if any) of current layer.
out << std::get <2> (_layers[curr_layer - 1]).code ();

prev_color = colors[i];
prev_layer = curr_layer;
}

out << utf8_character (characters[i]);
// The layer text string is already UTF-8, so we can output its bytes verbatim,
// provided that we're keeping track of character (i.e. code point) boundaries.
if (column_data.is_padding ())
out << ' '; // Display padding columns as spaces.
else // Display a slice of the layer text (Spacer [Nonspacer ...]).
out.write(text.data () + column_data.text_begin_i, column_data.byte_count ());

if (column_data.char_0_width == 2)
++column_i; // Wide characters cover two columns.
}

// Terminate the color codes, if necessary.
if (prev_color)
out << std::get <2> (_layers[prev_color - 1]).end ();
if (prev_layer)
out << std::get <2> (_layers[prev_layer - 1]).end ();

return out.str ();
}
Expand Down
48 changes: 46 additions & 2 deletions test/composite.t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
////////////////////////////////////////////////////////////////////////////////
int main (int, char**)
{
UnitTest t (3);
UnitTest t (4);

Composite c1;
c1.add ("left", 2, Color ());
Expand Down Expand Up @@ -130,8 +130,52 @@ int main (int, char**)
c8.add ( "foo", 7, Color ("white on red"));
t.diag (c8.str ());

// Add layers containing characters with non-standard Unicode width.
// Verify that they are composited correctly.
// * Each zero-width character should be included in the column of the
// preceding non-zero-width character on the same layer. (If there is
// no such character, the zero-width character should be skipped.)
// * Each wide character should be treated as occupying two columns of the
// layer, the one corresponding to the array index at which the character
// code is stored, and the next one.
// * If exactly one of the columns occupied by a wide characher is also
// occupied by a character in a higher layer (obscuring half of the wide
// character), then the wide character should not be displayed at all.
// The unobscured column should be treated as containing blank space
// (but still be covered by the current layer).
Composite c9;
c9.add ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, Color ()); // BG
c9.add ("a", 50, Color ()); // more BG
c9.add ("😃😃😃", 1, Color ()); // some wide chars
c9.add ("bb", 1, Color ()); // obscure the first of the two wide chars
c9.add ("😖😖😖", 8, Color ()); // a few more wide chars
c9.add ("cc", 9, Color ()); // obscure half of each of the first two
c9.add ("😬😬😬", 15, Color ()); // even more
c9.add ("会会会", 18, Color ()); // obscure the last one-and-half
c9.add ("[èé][ñn̄][öô]", 25, Color ()); // layer with zero-width chars (combining diacritics)
c9.add ("}{", 32, Color ()); // obscure two of the non-zero-width chars
c9.add ("è🐋é🐋", 38, Color ()); // 1-col, 0-col and 2-col chars on same layer
c9.add ("\a\aff", 45, Color ()); // zero-width characters at beginning of layer
t.is (c9.str (), "abb😃😃a cc 😖a😬 会会会a[èé][ñn̄}{öô]aè🐋é🐋affa a", "Composite ... --> 'abb😃😃a cc 😖a😬 会会会a[èé][ñn̄}{öô]aè🐋é🐋affa a'");

// Add colored layers containing characters with non-standard Unicode width.
// Display the result.
Composite c10;
c10.add ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, Color ("black on bright blue")); // BG
c10.add ("a", 50, Color ("black on bright blue")); // more BG
c10.add ("😃😃😃", 1, Color ("yellow on grey10")); // some wide chars
c10.add ("bb", 1, Color ("red on black")); // obscure the first of the two wide chars
c10.add ("😖😖😖", 8, Color ("green on blue")); // a few more wide chars
c10.add ("cc", 9, Color ("grey18 on green")); // obscure half of each of the first two
c10.add ("😬😬😬", 15, Color ("white on red")); // even more
c10.add ("会会会", 18, Color ("magenta on grey6")); // obscure the last one-and-half
c10.add ("[èé][ñn̄][öô]", 25, Color ("blue on white")); // layer with zero-width chars (combining diacritics)
c10.add ("}{", 32, Color ("red on white")); // obscure two of the non-zero-width chars
c10.add ("è🐋é🐋", 38, Color ("yellow on cyan")); // 1-col, 0-col and 2-col chars on same layer
c10.add ("\a\aff", 45, Color ("black on bright yellow")); // zero-width characters at beginning of layer
t.diag (c10.str ());
Comment thread
lauft marked this conversation as resolved.

return 0;
}

////////////////////////////////////////////////////////////////////////////////

Loading