From 58ccbaf9445382ae848df2d83afb2183189b3c9c Mon Sep 17 00:00:00 2001 From: Moritz Bunkus Date: Fri, 22 Dec 2023 16:55:00 +0100 Subject: [PATCH] UTFstring: store data in std::wstring instead of doing manual string management --- ebml/EbmlUnicodeString.h | 12 +++---- src/EbmlUnicodeString.cpp | 76 +++++++++++---------------------------- 2 files changed, 25 insertions(+), 63 deletions(-) diff --git a/ebml/EbmlUnicodeString.h b/ebml/EbmlUnicodeString.h index 8e601c2d..c37bd03f 100644 --- a/ebml/EbmlUnicodeString.h +++ b/ebml/EbmlUnicodeString.h @@ -42,19 +42,17 @@ class EBML_DLL_API UTFstring { UTFstring & operator=(wchar_t); /// Return length of string - std::size_t length() const {return _Length;} + std::size_t length() const {return WString.size();} - explicit operator const wchar_t*() const; - const wchar_t* c_str() const {return _Data;} + explicit operator const wchar_t*() const {return WString.c_str();}; + const wchar_t* c_str() const {return WString.c_str();} const std::string & GetUTF8() const {return UTF8string;} void SetUTF8(const std::string &); - private: - std::size_t _Length{0}; ///< length of the UCS string excluding the \0 - wchar_t* _Data{nullptr}; ///< internal UCS representation +private: + std::wstring WString; ///< internal UCS representation std::string UTF8string; - static bool wcscmp_internal(const wchar_t *str1, const wchar_t *str2); void UpdateFromUTF8(); void UpdateFromUCS2(); }; diff --git a/src/EbmlUnicodeString.cpp b/src/EbmlUnicodeString.cpp index 0be95f89..759ff30c 100644 --- a/src/EbmlUnicodeString.cpp +++ b/src/EbmlUnicodeString.cpp @@ -30,7 +30,6 @@ UTFstring::UTFstring(std::wstring const &_aBuf) UTFstring::~UTFstring() { - delete [] _Data; } UTFstring::UTFstring(const UTFstring & _aBuf) @@ -44,49 +43,34 @@ UTFstring & UTFstring::operator=(const UTFstring & _aBuf) return *this; } -UTFstring::operator const wchar_t*() const {return _Data;} - - UTFstring & UTFstring::operator=(const wchar_t * _aBuf) { - delete [] _Data; - if (_aBuf == nullptr) { - _Data = new wchar_t[1]; - _Data[0] = 0; - UpdateFromUCS2(); - return *this; - } + if (_aBuf != nullptr) + WString = _aBuf; + else + WString.clear(); - std::size_t aLen; - for (aLen=0; _aBuf[aLen] != 0; aLen++); - _Length = aLen; - _Data = new wchar_t[_Length+1]; - for (aLen=0; _aBuf[aLen] != 0; aLen++) { - _Data[aLen] = _aBuf[aLen]; - } - _Data[aLen] = 0; UpdateFromUCS2(); return *this; } UTFstring & UTFstring::operator=(wchar_t _aChar) { - delete [] _Data; - _Data = new wchar_t[2]; - _Length = 1; - _Data[0] = _aChar; - _Data[1] = 0; + WString = _aChar; UpdateFromUCS2(); return *this; } bool UTFstring::operator==(const UTFstring& _aStr) const { - if ((_Data == nullptr) && (_aStr._Data == nullptr)) - return true; - if ((_Data == nullptr) || (_aStr._Data == nullptr)) + // Only compare up to the first 0 char in both strings. + auto LengthThis = std::distance(WString.begin(), std::find(WString.begin(), WString.end(), L'\0')); + auto LengthOther = std::distance(_aStr.WString.begin(), std::find(_aStr.WString.begin(), _aStr.WString.end(), L'\0')); + + if (LengthThis != LengthOther) return false; - return wcscmp_internal(_Data, _aStr._Data); + + return std::memcmp(WString.c_str(), _aStr.WString.c_str(), LengthThis * sizeof(wchar_t)) == 0; } void UTFstring::SetUTF8(const std::string & _aStr) @@ -103,38 +87,27 @@ void UTFstring::UpdateFromUTF8() // Only convert up to the first \0 character if present. auto Current = std::find(UTF8string.begin(), UTF8string.end(), '\0'); - std::wstring Temp; + WString.clear(); try { // Even though the function names hint at UCS2, the internal // representation must actually be compatible with the C++ // library's implementation. Implementations with sizeof(wchar_t) // == 4 are using UCS4. if (sizeof(wchar_t) == 2) - ::utf8::utf8to16(UTF8string.begin(), Current, std::back_inserter(Temp)); + ::utf8::utf8to16(UTF8string.begin(), Current, std::back_inserter(WString)); else - ::utf8::utf8to32(UTF8string.begin(), Current, std::back_inserter(Temp)); + ::utf8::utf8to32(UTF8string.begin(), Current, std::back_inserter(WString)); } catch (::utf8::invalid_code_point &) { } catch (::utf8::invalid_utf8 &) { } - - delete [] _Data; - _Length = Temp.length(); - _Data = new wchar_t[_Length + 1]; - - std::memcpy(_Data, Temp.c_str(), sizeof(wchar_t) * (_Length + 1)); } void UTFstring::UpdateFromUCS2() { - UTF8string.clear(); - - if (!_Data) - return; - // Only convert up to the first \0 character if present. - std::size_t Current = 0; - while ((Current < _Length) && _Data[Current]) - ++Current; + auto Current = std::find(WString.begin(), WString.end(), L'\0'); + + UTF8string.clear(); try { // Even though the function is called UCS2, the internal @@ -142,23 +115,14 @@ void UTFstring::UpdateFromUCS2() // library's implementation. Implementations with sizeof(wchar_t) // == 4 are using UCS4. if (sizeof(wchar_t) == 2) - ::utf8::utf16to8(_Data, _Data + Current, std::back_inserter(UTF8string)); + ::utf8::utf16to8(WString.begin(), Current, std::back_inserter(UTF8string)); else - ::utf8::utf32to8(_Data, _Data + Current, std::back_inserter(UTF8string)); + ::utf8::utf32to8(WString.begin(), Current, std::back_inserter(UTF8string)); } catch (::utf8::invalid_code_point &) { } catch (::utf8::invalid_utf16 &) { } } -bool UTFstring::wcscmp_internal(const wchar_t *str1, const wchar_t *str2) -{ - std::size_t Index=0; - while (str1[Index] == str2[Index] && str1[Index] != 0) { - Index++; - } - return (str1[Index] == str2[Index]); -} - // ===================== EbmlUnicodeString class =================== EbmlUnicodeString::EbmlUnicodeString()