Skip to content

Commit

Permalink
Handle ANSI escape sequences when performing column wrapping (#2849)
Browse files Browse the repository at this point in the history
This PR adds functionality to skip around ANSI escape sequences in catch_textflow so they do not contribute to line length and line wrapping code does not split escape sequences in the middle. I've implemented this by creating a AnsiSkippingString abstraction that has a bidirectional iterator that can skip around escape sequences while iterating. Additionally I refactored Column::const_iterator to be iterator-based rather than index-based so this abstraction is a simple drop-in for std::string.

Currently only color sequences are handled, other escape sequences are left unaffected.

Motivation: Text with ANSI color sequences gets messed up when being output by Catch2 #2833.
  • Loading branch information
jeremy-rifkin authored May 4, 2024
1 parent fa5a53d commit 8ce2426
Show file tree
Hide file tree
Showing 3 changed files with 489 additions and 69 deletions.
227 changes: 169 additions & 58 deletions src/catch2/internal/catch_textflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,117 +26,228 @@ namespace {
return std::memchr( chars, c, sizeof( chars ) - 1 ) != nullptr;
}

bool isBoundary( std::string const& line, size_t at ) {
assert( at > 0 );
assert( at <= line.size() );

return at == line.size() ||
( isWhitespace( line[at] ) && !isWhitespace( line[at - 1] ) ) ||
isBreakableBefore( line[at] ) ||
isBreakableAfter( line[at - 1] );
}

} // namespace

namespace Catch {
namespace TextFlow {
void AnsiSkippingString::preprocessString() {
for ( auto it = m_string.begin(); it != m_string.end(); ) {
// try to read through an ansi sequence
while ( it != m_string.end() && *it == '\033' &&
it + 1 != m_string.end() && *( it + 1 ) == '[' ) {
auto cursor = it + 2;
while ( cursor != m_string.end() &&
( isdigit( *cursor ) || *cursor == ';' ) ) {
++cursor;
}
if ( cursor == m_string.end() || *cursor != 'm' ) {
break;
}
// 'm' -> 0xff
*cursor = AnsiSkippingString::sentinel;
// if we've read an ansi sequence, set the iterator and
// return to the top of the loop
it = cursor + 1;
}
if ( it != m_string.end() ) {
++m_size;
++it;
}
}
}

AnsiSkippingString::AnsiSkippingString( std::string const& text ):
m_string( text ) {
preprocessString();
}

AnsiSkippingString::AnsiSkippingString( std::string&& text ):
m_string( CATCH_MOVE( text ) ) {
preprocessString();
}

AnsiSkippingString::const_iterator AnsiSkippingString::begin() const {
return const_iterator( m_string );
}

AnsiSkippingString::const_iterator AnsiSkippingString::end() const {
return const_iterator( m_string, const_iterator::EndTag{} );
}

std::string AnsiSkippingString::substring( const_iterator begin,
const_iterator end ) const {
// There's one caveat here to an otherwise simple substring: when
// making a begin iterator we might have skipped ansi sequences at
// the start. If `begin` here is a begin iterator, skipped over
// initial ansi sequences, we'll use the true beginning of the
// string. Lastly: We need to transform any chars we replaced with
// 0xff back to 'm'
auto str = std::string( begin == this->begin() ? m_string.begin()
: begin.m_it,
end.m_it );
std::transform( str.begin(), str.end(), str.begin(), []( char c ) {
return c == AnsiSkippingString::sentinel ? 'm' : c;
} );
return str;
}

void AnsiSkippingString::const_iterator::tryParseAnsiEscapes() {
// check if we've landed on an ansi sequence, and if so read through
// it
while ( m_it != m_string->end() && *m_it == '\033' &&
m_it + 1 != m_string->end() && *( m_it + 1 ) == '[' ) {
auto cursor = m_it + 2;
while ( cursor != m_string->end() &&
( isdigit( *cursor ) || *cursor == ';' ) ) {
++cursor;
}
if ( cursor == m_string->end() ||
*cursor != AnsiSkippingString::sentinel ) {
break;
}
// if we've read an ansi sequence, set the iterator and
// return to the top of the loop
m_it = cursor + 1;
}
}

void AnsiSkippingString::const_iterator::advance() {
assert( m_it != m_string->end() );
m_it++;
tryParseAnsiEscapes();
}

void AnsiSkippingString::const_iterator::unadvance() {
assert( m_it != m_string->begin() );
m_it--;
// if *m_it is 0xff, scan back to the \033 and then m_it-- once more
// (and repeat check)
while ( *m_it == AnsiSkippingString::sentinel ) {
while ( *m_it != '\033' ) {
assert( m_it != m_string->begin() );
m_it--;
}
// if this happens, we must have been a begin iterator that had
// skipped over ansi sequences at the start of a string
assert( m_it != m_string->begin() );
assert( *m_it == '\033' );
m_it--;
}
}

static bool isBoundary( AnsiSkippingString const& line,
AnsiSkippingString::const_iterator it ) {
return it == line.end() ||
( isWhitespace( *it ) &&
!isWhitespace( *it.oneBefore() ) ) ||
isBreakableBefore( *it ) ||
isBreakableAfter( *it.oneBefore() );
}

void Column::const_iterator::calcLength() {
m_addHyphen = false;
m_parsedTo = m_lineStart;
AnsiSkippingString const& current_line = m_column.m_string;

std::string const& current_line = m_column.m_string;
if ( current_line[m_lineStart] == '\n' ) {
++m_parsedTo;
if ( m_parsedTo == current_line.end() ) {
m_lineEnd = m_parsedTo;
return;
}

assert( m_lineStart != current_line.end() );
if ( *m_lineStart == '\n' ) { ++m_parsedTo; }

const auto maxLineLength = m_column.m_width - indentSize();
const auto maxParseTo = std::min(current_line.size(), m_lineStart + maxLineLength);
while ( m_parsedTo < maxParseTo &&
current_line[m_parsedTo] != '\n' ) {
std::size_t lineLength = 0;
while ( m_parsedTo != current_line.end() &&
lineLength < maxLineLength && *m_parsedTo != '\n' ) {
++m_parsedTo;
++lineLength;
}

// If we encountered a newline before the column is filled,
// then we linebreak at the newline and consider this line
// finished.
if ( m_parsedTo < m_lineStart + maxLineLength ) {
m_lineLength = m_parsedTo - m_lineStart;
if ( lineLength < maxLineLength ) {
m_lineEnd = m_parsedTo;
} else {
// Look for a natural linebreak boundary in the column
// (We look from the end, so that the first found boundary is
// the right one)
size_t newLineLength = maxLineLength;
while ( newLineLength > 0 && !isBoundary( current_line, m_lineStart + newLineLength ) ) {
--newLineLength;
m_lineEnd = m_parsedTo;
while ( lineLength > 0 &&
!isBoundary( current_line, m_lineEnd ) ) {
--lineLength;
--m_lineEnd;
}
while ( newLineLength > 0 &&
isWhitespace( current_line[m_lineStart + newLineLength - 1] ) ) {
--newLineLength;
while ( lineLength > 0 &&
isWhitespace( *m_lineEnd.oneBefore() ) ) {
--lineLength;
--m_lineEnd;
}

// If we found one, then that is where we linebreak
if ( newLineLength > 0 ) {
m_lineLength = newLineLength;
} else {
// Otherwise we have to split text with a hyphen
// If we found one, then that is where we linebreak, otherwise
// we have to split text with a hyphen
if ( lineLength == 0 ) {
m_addHyphen = true;
m_lineLength = maxLineLength - 1;
m_lineEnd = m_parsedTo.oneBefore();
}
}
}

size_t Column::const_iterator::indentSize() const {
auto initial =
m_lineStart == 0 ? m_column.m_initialIndent : std::string::npos;
auto initial = m_lineStart == m_column.m_string.begin()
? m_column.m_initialIndent
: std::string::npos;
return initial == std::string::npos ? m_column.m_indent : initial;
}

std::string
Column::const_iterator::addIndentAndSuffix( size_t position,
size_t length ) const {
std::string Column::const_iterator::addIndentAndSuffix(
AnsiSkippingString::const_iterator start,
AnsiSkippingString::const_iterator end ) const {
std::string ret;
const auto desired_indent = indentSize();
ret.reserve( desired_indent + length + m_addHyphen );
// ret.reserve( desired_indent + (end - start) + m_addHyphen );
ret.append( desired_indent, ' ' );
ret.append( m_column.m_string, position, length );
if ( m_addHyphen ) {
ret.push_back( '-' );
}
// ret.append( start, end );
ret += m_column.m_string.substring( start, end );
if ( m_addHyphen ) { ret.push_back( '-' ); }

return ret;
}

Column::const_iterator::const_iterator( Column const& column ): m_column( column ) {
Column::const_iterator::const_iterator( Column const& column ):
m_column( column ),
m_lineStart( column.m_string.begin() ),
m_lineEnd( column.m_string.begin() ),
m_parsedTo( column.m_string.begin() ) {
assert( m_column.m_width > m_column.m_indent );
assert( m_column.m_initialIndent == std::string::npos ||
m_column.m_width > m_column.m_initialIndent );
calcLength();
if ( m_lineLength == 0 ) {
m_lineStart = m_column.m_string.size();
if ( m_lineStart == m_lineEnd ) {
m_lineStart = m_column.m_string.end();
}
}

std::string Column::const_iterator::operator*() const {
assert( m_lineStart <= m_parsedTo );
return addIndentAndSuffix( m_lineStart, m_lineLength );
return addIndentAndSuffix( m_lineStart, m_lineEnd );
}

Column::const_iterator& Column::const_iterator::operator++() {
m_lineStart += m_lineLength;
std::string const& current_line = m_column.m_string;
if ( m_lineStart < current_line.size() && current_line[m_lineStart] == '\n' ) {
m_lineStart += 1;
m_lineStart = m_lineEnd;
AnsiSkippingString const& current_line = m_column.m_string;
if ( m_lineStart != current_line.end() && *m_lineStart == '\n' ) {
m_lineStart++;
} else {
while ( m_lineStart < current_line.size() &&
isWhitespace( current_line[m_lineStart] ) ) {
while ( m_lineStart != current_line.end() &&
isWhitespace( *m_lineStart ) ) {
++m_lineStart;
}
}

if ( m_lineStart != current_line.size() ) {
calcLength();
}
if ( m_lineStart != current_line.end() ) { calcLength(); }
return *this;
}

Expand Down Expand Up @@ -233,25 +344,25 @@ namespace Catch {
return os;
}

Columns operator+(Column const& lhs, Column const& rhs) {
Columns operator+( Column const& lhs, Column const& rhs ) {
Columns cols;
cols += lhs;
cols += rhs;
return cols;
}
Columns operator+(Column&& lhs, Column&& rhs) {
Columns operator+( Column&& lhs, Column&& rhs ) {
Columns cols;
cols += CATCH_MOVE( lhs );
cols += CATCH_MOVE( rhs );
return cols;
}

Columns& operator+=(Columns& lhs, Column const& rhs) {
Columns& operator+=( Columns& lhs, Column const& rhs ) {
lhs.m_columns.push_back( rhs );
return lhs;
}
Columns& operator+=(Columns& lhs, Column&& rhs) {
lhs.m_columns.push_back( CATCH_MOVE(rhs) );
Columns& operator+=( Columns& lhs, Column&& rhs ) {
lhs.m_columns.push_back( CATCH_MOVE( rhs ) );
return lhs;
}
Columns operator+( Columns const& lhs, Column const& rhs ) {
Expand Down
Loading

0 comments on commit 8ce2426

Please sign in to comment.