Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EbmlString/UTFstring/EbmlUnicodeString: automatic memory management #178

Merged
merged 7 commits into from
Dec 26, 2023
12 changes: 5 additions & 7 deletions ebml/EbmlUnicodeString.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,17 @@ class EBML_DLL_API UTFstring {
UTFstring & operator=(wchar_t);

/// Return length of string
std::size_t length() const {return _Length;}
std::size_t length() const {return WString.size();}

explicit operator const wchar_t*() const;
const wchar_t* c_str() const {return _Data;}
explicit operator const wchar_t*() const {return WString.c_str();};
const wchar_t* c_str() const {return WString.c_str();}
robUx4 marked this conversation as resolved.
Show resolved Hide resolved

const std::string & GetUTF8() const {return UTF8string;}
void SetUTF8(const std::string &);

private:
std::size_t _Length{0}; ///< length of the UCS string excluding the \0
wchar_t* _Data{nullptr}; ///< internal UCS representation
private:
std::wstring WString; ///< internal UCS representation
std::string UTF8string;
static bool wcscmp_internal(const wchar_t *str1, const wchar_t *str2);
void UpdateFromUTF8();
void UpdateFromUCS2();
};
Expand Down
76 changes: 20 additions & 56 deletions src/EbmlUnicodeString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ UTFstring::UTFstring(std::wstring const &_aBuf)

UTFstring::~UTFstring()
robUx4 marked this conversation as resolved.
Show resolved Hide resolved
{
delete [] _Data;
}

UTFstring::UTFstring(const UTFstring & _aBuf)
Expand All @@ -44,49 +43,34 @@ UTFstring & UTFstring::operator=(const UTFstring & _aBuf)
return *this;
}

UTFstring::operator const wchar_t*() const {return _Data;}


UTFstring & UTFstring::operator=(const wchar_t * _aBuf)
{
delete [] _Data;
if (_aBuf == nullptr) {
_Data = new wchar_t[1];
_Data[0] = 0;
UpdateFromUCS2();
return *this;
}
if (_aBuf != nullptr)
WString = _aBuf;
else
WString.clear();

std::size_t aLen;
for (aLen=0; _aBuf[aLen] != 0; aLen++);
_Length = aLen;
_Data = new wchar_t[_Length+1];
for (aLen=0; _aBuf[aLen] != 0; aLen++) {
_Data[aLen] = _aBuf[aLen];
}
_Data[aLen] = 0;
UpdateFromUCS2();
return *this;
}

UTFstring & UTFstring::operator=(wchar_t _aChar)
{
delete [] _Data;
_Data = new wchar_t[2];
_Length = 1;
_Data[0] = _aChar;
_Data[1] = 0;
WString = _aChar;
UpdateFromUCS2();
return *this;
}

bool UTFstring::operator==(const UTFstring& _aStr) const
{
if ((_Data == nullptr) && (_aStr._Data == nullptr))
return true;
if ((_Data == nullptr) || (_aStr._Data == nullptr))
// Only compare up to the first 0 char in both strings.
auto LengthThis = std::distance(WString.begin(), std::find(WString.begin(), WString.end(), L'\0'));
robUx4 marked this conversation as resolved.
Show resolved Hide resolved
auto LengthOther = std::distance(_aStr.WString.begin(), std::find(_aStr.WString.begin(), _aStr.WString.end(), L'\0'));

if (LengthThis != LengthOther)
return false;
return wcscmp_internal(_Data, _aStr._Data);

return std::memcmp(WString.c_str(), _aStr.WString.c_str(), LengthThis * sizeof(wchar_t)) == 0;
}

void UTFstring::SetUTF8(const std::string & _aStr)
Expand All @@ -103,62 +87,42 @@ void UTFstring::UpdateFromUTF8()
// Only convert up to the first \0 character if present.
auto Current = std::find(UTF8string.begin(), UTF8string.end(), '\0');

std::wstring Temp;
WString.clear();
try {
// Even though the function names hint at UCS2, the internal
// representation must actually be compatible with the C++
// library's implementation. Implementations with sizeof(wchar_t)
// == 4 are using UCS4.
if (sizeof(wchar_t) == 2)
::utf8::utf8to16(UTF8string.begin(), Current, std::back_inserter(Temp));
::utf8::utf8to16(UTF8string.begin(), Current, std::back_inserter(WString));
else
::utf8::utf8to32(UTF8string.begin(), Current, std::back_inserter(Temp));
::utf8::utf8to32(UTF8string.begin(), Current, std::back_inserter(WString));
} catch (::utf8::invalid_code_point &) {
} catch (::utf8::invalid_utf8 &) {
}

delete [] _Data;
_Length = Temp.length();
_Data = new wchar_t[_Length + 1];

std::memcpy(_Data, Temp.c_str(), sizeof(wchar_t) * (_Length + 1));
}

void UTFstring::UpdateFromUCS2()
{
UTF8string.clear();

if (!_Data)
return;

// Only convert up to the first \0 character if present.
std::size_t Current = 0;
while ((Current < _Length) && _Data[Current])
++Current;
auto Current = std::find(WString.begin(), WString.end(), L'\0');
robUx4 marked this conversation as resolved.
Show resolved Hide resolved

UTF8string.clear();

try {
// Even though the function is called UCS2, the internal
// representation must actually be compatible with the C++
// library's implementation. Implementations with sizeof(wchar_t)
// == 4 are using UCS4.
if (sizeof(wchar_t) == 2)
::utf8::utf16to8(_Data, _Data + Current, std::back_inserter(UTF8string));
::utf8::utf16to8(WString.begin(), Current, std::back_inserter(UTF8string));
else
::utf8::utf32to8(_Data, _Data + Current, std::back_inserter(UTF8string));
::utf8::utf32to8(WString.begin(), Current, std::back_inserter(UTF8string));
} catch (::utf8::invalid_code_point &) {
} catch (::utf8::invalid_utf16 &) {
}
}

bool UTFstring::wcscmp_internal(const wchar_t *str1, const wchar_t *str2)
{
std::size_t Index=0;
while (str1[Index] == str2[Index] && str1[Index] != 0) {
Index++;
}
return (str1[Index] == str2[Index]);
}

// ===================== EbmlUnicodeString class ===================

EbmlUnicodeString::EbmlUnicodeString()
Expand Down