summaryrefslogtreecommitdiff
path: root/Source/modules/asura-core/Font/Utf.inc
diff options
context:
space:
mode:
Diffstat (limited to 'Source/modules/asura-core/Font/Utf.inc')
-rw-r--r--Source/modules/asura-core/Font/Utf.inc752
1 files changed, 752 insertions, 0 deletions
diff --git a/Source/modules/asura-core/Font/Utf.inc b/Source/modules/asura-core/Font/Utf.inc
new file mode 100644
index 0000000..69a523b
--- /dev/null
+++ b/Source/modules/asura-core/Font/Utf.inc
@@ -0,0 +1,752 @@
+////////////////////////////////////////////////////////////
+//
+// SFML - Simple and Fast Multimedia Library
+// Copyright (C) 2007-2019 Laurent Gomila (laurent@sfml-dev.org)
+//
+// This software is provided 'as-is', without any express or implied warranty.
+// In no event will the authors be held liable for any damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it freely,
+// subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented;
+// you must not claim that you wrote the original software.
+// If you use this software in a product, an acknowledgment
+// in the product documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such,
+// and must not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source distribution.
+//
+////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////
+// References:
+//
+// https://www.unicode.org/
+// https://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
+// https://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.h
+// https://people.w3.org/rishida/scripts/uniview/conversion
+//
+////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+In Utf<8>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
+{
+ // Some useful precomputed data
+ static const int trailing[256] =
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
+ };
+ static const Uint32 offsets[6] =
+ {
+ 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
+ };
+
+ // decode the character
+ int trailingBytes = trailing[static_cast<Uint8>(*begin)];
+ if (begin + trailingBytes < end)
+ {
+ output = 0;
+ switch (trailingBytes)
+ {
+ case 5: output += static_cast<Uint8>(*begin++); output <<= 6;
+ case 4: output += static_cast<Uint8>(*begin++); output <<= 6;
+ case 3: output += static_cast<Uint8>(*begin++); output <<= 6;
+ case 2: output += static_cast<Uint8>(*begin++); output <<= 6;
+ case 1: output += static_cast<Uint8>(*begin++); output <<= 6;
+ case 0: output += static_cast<Uint8>(*begin++);
+ }
+ output -= offsets[trailingBytes];
+ }
+ else
+ {
+ // Incomplete character
+ begin = end;
+ output = replacement;
+ }
+
+ return begin;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename Out>
+Out Utf<8>::Encode(Uint32 input, Out output, Uint8 replacement)
+{
+ // Some useful precomputed data
+ static const Uint8 firstBytes[7] =
+ {
+ 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
+ };
+
+ // encode the character
+ if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
+ {
+ // Invalid character
+ if (replacement)
+ *output++ = replacement;
+ }
+ else
+ {
+ // Valid character
+
+ // Get the number of bytes to write
+ std::size_t bytestoWrite = 1;
+ if (input < 0x80) bytestoWrite = 1;
+ else if (input < 0x800) bytestoWrite = 2;
+ else if (input < 0x10000) bytestoWrite = 3;
+ else if (input <= 0x0010FFFF) bytestoWrite = 4;
+
+ // Extract the bytes to write
+ Uint8 bytes[4];
+ switch (bytestoWrite)
+ {
+ case 4: bytes[3] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
+ case 3: bytes[2] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
+ case 2: bytes[1] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
+ case 1: bytes[0] = static_cast<Uint8> (input | firstBytes[bytestoWrite]);
+ }
+
+ // Add them to the output
+ output = std::copy(bytes, bytes + bytestoWrite, output);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+In Utf<8>::Next(In begin, In end)
+{
+ Uint32 codepoint;
+ return Decode(begin, end, codepoint);
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+std::size_t Utf<8>::Count(In begin, In end)
+{
+ std::size_t length = 0;
+ while (begin < end)
+ {
+ begin = Next(begin, end);
+ ++length;
+ }
+
+ return length;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
+ output = Encode(codepoint, output);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::FromWide(In begin, In end, Out output)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
+ output = Encode(codepoint, output);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::FromLatin1(In begin, In end, Out output)
+{
+ // Latin-1 is directly compatible with Unicode encodings,
+ // and can thus be treated as (a sub-range of) UTF-32
+ while (begin < end)
+ output = Encode(*begin++, output);
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::ToWide(In begin, In end, Out output, wchar_t replacement)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ output = Utf<32>::EncodeWide(codepoint, output, replacement);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::ToLatin1(In begin, In end, Out output, char replacement)
+{
+ // Latin-1 is directly compatible with Unicode encodings,
+ // and can thus be treated as (a sub-range of) UTF-32
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ *output++ = codepoint < 256 ? static_cast<char>(codepoint) : replacement;
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::ToUtf8(In begin, In end, Out output)
+{
+ return std::copy(begin, end, output);
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::ToUtf16(In begin, In end, Out output)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ output = Utf<16>::Encode(codepoint, output);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<8>::ToUtf32(In begin, In end, Out output)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ *output++ = codepoint;
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+In Utf<16>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
+{
+ Uint16 first = *begin++;
+
+ // If it's a surrogate pair, first convert to a single UTF-32 character
+ if ((first >= 0xD800) && (first <= 0xDBFF))
+ {
+ if (begin < end)
+ {
+ Uint32 second = *begin++;
+ if ((second >= 0xDC00) && (second <= 0xDFFF))
+ {
+ // The second element is valid: convert the two elements to a UTF-32 character
+ output = ((first - 0xD800) << 10) + (second - 0xDC00) + 0x0010000;
+ }
+ else
+ {
+ // Invalid character
+ output = replacement;
+ }
+ }
+ else
+ {
+ // Invalid character
+ begin = end;
+ output = replacement;
+ }
+ }
+ else
+ {
+ // We can make a direct copy
+ output = first;
+ }
+
+ return begin;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename Out>
+Out Utf<16>::Encode(Uint32 input, Out output, Uint16 replacement)
+{
+ if (input <= 0xFFFF)
+ {
+ // The character can be copied directly, we just need to check if it's in the valid range
+ if ((input >= 0xD800) && (input <= 0xDFFF))
+ {
+ // Invalid character (this range is reserved)
+ if (replacement)
+ *output++ = replacement;
+ }
+ else
+ {
+ // Valid character directly convertible to a single UTF-16 character
+ *output++ = static_cast<Uint16>(input);
+ }
+ }
+ else if (input > 0x0010FFFF)
+ {
+ // Invalid character (greater than the maximum Unicode value)
+ if (replacement)
+ *output++ = replacement;
+ }
+ else
+ {
+ // The input character will be converted to two UTF-16 elements
+ input -= 0x0010000;
+ *output++ = static_cast<Uint16>((input >> 10) + 0xD800);
+ *output++ = static_cast<Uint16>((input & 0x3FFUL) + 0xDC00);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+In Utf<16>::Next(In begin, In end)
+{
+ Uint32 codepoint;
+ return Decode(begin, end, codepoint);
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+std::size_t Utf<16>::Count(In begin, In end)
+{
+ std::size_t length = 0;
+ while (begin < end)
+ {
+ begin = Next(begin, end);
+ ++length;
+ }
+
+ return length;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
+ output = Encode(codepoint, output);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::FromWide(In begin, In end, Out output)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
+ output = Encode(codepoint, output);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::FromLatin1(In begin, In end, Out output)
+{
+ // Latin-1 is directly compatible with Unicode encodings,
+ // and can thus be treated as (a sub-range of) UTF-32
+ return std::copy(begin, end, output);
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::ToWide(In begin, In end, Out output, wchar_t replacement)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ output = Utf<32>::EncodeWide(codepoint, output, replacement);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::ToLatin1(In begin, In end, Out output, char replacement)
+{
+ // Latin-1 is directly compatible with Unicode encodings,
+ // and can thus be treated as (a sub-range of) UTF-32
+ while (begin < end)
+ {
+ *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
+ begin++;
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::ToUtf8(In begin, In end, Out output)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ output = Utf<8>::Encode(codepoint, output);
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::ToUtf16(In begin, In end, Out output)
+{
+ return std::copy(begin, end, output);
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<16>::ToUtf32(In begin, In end, Out output)
+{
+ while (begin < end)
+ {
+ Uint32 codepoint;
+ begin = Decode(begin, end, codepoint);
+ *output++ = codepoint;
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+In Utf<32>::Decode(In begin, In /*end*/, Uint32& output, Uint32 /*replacement*/)
+{
+ output = *begin++;
+ return begin;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename Out>
+Out Utf<32>::Encode(Uint32 input, Out output, Uint32 /*replacement*/)
+{
+ *output++ = input;
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+In Utf<32>::Next(In begin, In /*end*/)
+{
+ return ++begin;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+std::size_t Utf<32>::Count(In begin, In end)
+{
+ return begin - end;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
+{
+ while (begin < end)
+ *output++ = DecodeAnsi(*begin++, locale);
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::FromWide(In begin, In end, Out output)
+{
+ while (begin < end)
+ *output++ = DecodeWide(*begin++);
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::FromLatin1(In begin, In end, Out output)
+{
+ // Latin-1 is directly compatible with Unicode encodings,
+ // and can thus be treated as (a sub-range of) UTF-32
+ return std::copy(begin, end, output);
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
+{
+ while (begin < end)
+ output = EncodeAnsi(*begin++, output, replacement, locale);
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::ToWide(In begin, In end, Out output, wchar_t replacement)
+{
+ while (begin < end)
+ output = EncodeWide(*begin++, output, replacement);
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::ToLatin1(In begin, In end, Out output, char replacement)
+{
+ // Latin-1 is directly compatible with Unicode encodings,
+ // and can thus be treated as (a sub-range of) UTF-32
+ while (begin < end)
+ {
+ *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
+ begin++;
+ }
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::ToUtf8(In begin, In end, Out output)
+{
+ while (begin < end)
+ output = Utf<8>::Encode(*begin++, output);
+
+ return output;
+}
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::ToUtf16(In begin, In end, Out output)
+{
+ while (begin < end)
+ output = Utf<16>::Encode(*begin++, output);
+
+ return output;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In, typename Out>
+Out Utf<32>::ToUtf32(In begin, In end, Out output)
+{
+ return std::copy(begin, end, output);
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+Uint32 Utf<32>::DecodeAnsi(In input, const std::locale& locale)
+{
+ // On Windows, GCC's standard library (glibc++) has almost
+ // no support for Unicode stuff. As a consequence, in this
+ // context we can only use the default locale and ignore
+ // the one passed as parameter.
+
+ #if defined(SFML_SYSTEM_WINDOWS) && /* if Windows ... */ \
+ (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && /* ... and standard library is glibc++ ... */ \
+ !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
+
+ (void)locale; // to avoid warnings
+
+ wchar_t character = 0;
+ mbtowc(&character, &input, 1);
+ return static_cast<Uint32>(character);
+
+ #else
+
+ // Get the facet of the locale which deals with character conversion
+ const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
+
+ // Use the facet to convert each character of the input string
+ return static_cast<Uint32>(facet.widen(input));
+
+ #endif
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename In>
+Uint32 Utf<32>::DecodeWide(In input)
+{
+ // The encoding of wide characters is not well defined and is left to the system;
+ // however we can safely assume that it is UCS-2 on Windows and
+ // UCS-4 on Unix systems.
+ // In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
+ // and UCS-4 *is* UTF-32).
+
+ return input;
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename Out>
+Out Utf<32>::EncodeAnsi(Uint32 codepoint, Out output, char replacement, const std::locale& locale)
+{
+ // On Windows, gcc's standard library (glibc++) has almost
+ // no support for Unicode stuff. As a consequence, in this
+ // context we can only use the default locale and ignore
+ // the one passed as parameter.
+
+ #if defined(SFML_SYSTEM_WINDOWS) && /* if Windows ... */ \
+ (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && /* ... and standard library is glibc++ ... */ \
+ !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) /* ... and STLPort is not used on top of it */
+
+ (void)locale; // to avoid warnings
+
+ char character = 0;
+ if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
+ *output++ = character;
+ else if (replacement)
+ *output++ = replacement;
+
+ return output;
+
+ #else
+
+ // Get the facet of the locale which deals with character conversion
+ const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
+
+ // Use the facet to convert each character of the input string
+ *output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
+
+ return output;
+
+ #endif
+}
+
+
+////////////////////////////////////////////////////////////
+template <typename Out>
+Out Utf<32>::EncodeWide(Uint32 codepoint, Out output, wchar_t replacement)
+{
+ // The encoding of wide characters is not well defined and is left to the system;
+ // however we can safely assume that it is UCS-2 on Windows and
+ // UCS-4 on Unix systems.
+ // For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
+ // For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32).
+
+ switch (sizeof(wchar_t))
+ {
+ case 4:
+ {
+ *output++ = static_cast<wchar_t>(codepoint);
+ break;
+ }
+
+ default:
+ {
+ if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
+ {
+ *output++ = static_cast<wchar_t>(codepoint);
+ }
+ else if (replacement)
+ {
+ *output++ = replacement;
+ }
+ break;
+ }
+ }
+
+ return output;
+}