+init

author: chai <chaifix@163.com> 2021-12-13 00:07:19 +0800
committer: chai <chaifix@163.com> 2021-12-13 00:07:19 +0800
commit: 60cbbdec07ab7a5636eac5b3c024ae44e937f4d4 (patch)
tree: b2c7b0a868f18159dbc43d8954e1bd7668549a88 /Client/Source/GUI/utf8.cpp
1 files changed, 415 insertions, 0 deletions
diff --git a/Client/Source/GUI/utf8.cpp b/Client/Source/GUI/utf8.cpp
new file mode 100644
index 0000000..8a3a086
--- /dev/null
+++ b/Client/Source/GUI/utf8.cpp
@@ -0,0 +1,415 @@
+/*
+ * Description: UTF-8 字符串的解码和编码函数
+ *              unicode 字符处理函数
+ *     History: yang@haipo.me, 2013/05/29, create
+ *
+ * This code is in the public domain.
+ * You may use this code any way you wish, private, educational,
+ * or commercial. It's free.
+ */
+
+# include <stdint.h>
+# include <stddef.h>
+
+# include "utf8.h"
+
+namespace utf8
+{
+
+    ucs4_t getu8c(char **src, int *illegal)
+    {
+        static char umap[256] = { 0 };
+        static int  umap_init_flag = 0;
+
+        if (umap_init_flag == 0)
+        {
+            int i;
+
+            for (i = 0; i < 0x100; ++i)
+            {
+                if (i < 0x80)
+                {
+                    umap[i] = 1;
+                }
+                else if (i >= 0xc0 && i < 0xe0)
+                {
+                    umap[i] = 2;
+                }
+                else if (i >= 0xe0 && i < 0xf0)
+                {
+                    umap[i] = 3;
+                }
+                else if (i >= 0xf0 && i < 0xf8)
+                {
+                    umap[i] = 4;
+                }
+                else if (i >= 0xf8 && i < 0xfc)
+                {
+                    umap[i] = 5;
+                }
+                else if (i >= 0xfc && i < 0xfe)
+                {
+                    umap[i] = 6;
+                }
+                else
+                {
+                    umap[i] = 0;
+                }
+            }
+
+            umap_init_flag = 1;
+        }
+
+        uint8_t *s = (uint8_t *)(*src);
+        int r_illegal = 0;
+
+        while (umap[*s] == 0)
+        {
+            ++s;
+            ++r_illegal;
+        }
+
+        uint8_t *t;
+        int byte_num;
+        uint32_t uc;
+        int i;
+
+    repeat_label:
+        t = s;
+        byte_num = umap[*s];
+        uc = *s++ & (0xff >> byte_num);
+
+        for (i = 1; i < byte_num; ++i)
+        {
+            if (umap[*s])
+            {
+                r_illegal += s - t;
+                goto repeat_label;
+            }
+            else
+            {
+                uc = (uc << 6) + (*s & 0x3f);
+                s += 1;
+            }
+        }
+
+        *src = (char *)s;
+        if (illegal)
+        {
+            *illegal = r_illegal;
+        }
+
+        return uc;
+    }
+
+    size_t u8decode(char const *str, ucs4_t *des, size_t n, int *illegal)
+    {
+        if (n == 0)
+            return 0;
+
+        char *s = (char *)str;
+        size_t i = 0;
+        ucs4_t uc = 0;
+        int r_illegal_all = 0, r_illegal;
+
+        while ((uc = getu8c(&s, &r_illegal)))
+        {
+            if (i < (n - 1))
+            {
+                des[i++] = uc;
+                r_illegal_all += r_illegal;
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        des[i] = 0;
+        if (illegal)
+        {
+            *illegal = r_illegal_all + r_illegal;
+        }
+
+        return i;
+    }
+
+# define IF_CAN_HOLD(left, n) do { \
+    size_t m = (size_t)(n); \
+    if ((size_t)(left) < (m + 1)) return -2; \
+    (left) -= m; \
+} while (0)
+
+    int putu8c(ucs4_t uc, char **des, size_t *left)
+    {
+        if (uc < 0)
+            return -1;
+
+        if (uc < (0x1 << 7))
+        {
+            IF_CAN_HOLD(*left, 1);
+
+            **des = (char)uc;
+            *des += 1;
+            **des = 0;
+
+            return 1;
+        }
+
+        int byte_num;
+
+        if (uc < (0x1 << 11))
+        {
+            byte_num = 2;
+        }
+        else if (uc < (0x1 << 16))
+        {
+            byte_num = 3;
+        }
+        else if (uc < (0x1 << 21))
+        {
+            byte_num = 4;
+        }
+        else if (uc < (0x1 << 26))
+        {
+            byte_num = 5;
+        }
+        else
+        {
+            byte_num = 6;
+        }
+
+        IF_CAN_HOLD(*left, byte_num);
+
+        int i;
+        for (i = byte_num - 1; i > 0; --i)
+        {
+            *(uint8_t *)(*des + i) = (uc & 0x3f) | 0x80;
+            uc >>= 6;
+        }
+
+        *(uint8_t *)(*des) = uc | (0xff << (8 - byte_num));
+
+        *des += byte_num;
+        **des = 0;
+
+        return byte_num;
+    }
+
+    size_t u8encode(ucs4_t *us, char *des, size_t n, int *illegal)
+    {
+        if (n == 0)
+            return 0;
+
+        char *s = des;
+        size_t left = n;
+        size_t len = 0;
+        int r_illegal = 0;
+
+        *s = 0;
+        while (*us)
+        {
+            int ret = putu8c(*us, &s, &left);
+            if (ret > 0)
+            {
+                len += ret;
+            }
+            else if (ret == -1)
+            {
+                r_illegal += 1;
+            }
+            else
+            {
+                break;
+            }
+
+            ++us;
+        }
+
+        if (illegal)
+        {
+            *illegal = r_illegal;
+        }
+
+        return len;
+    }
+
+    /* 全角字符 */
+    int isufullwidth(ucs4_t uc)
+    {
+        if (uc == 0x3000)
+            return 1;
+
+        if (uc >= 0xff01 && uc <= 0xff5e)
+            return 1;
+
+        return 0;
+    }
+
+    /* 全角字母 */
+    int isufullwidthalpha(ucs4_t uc)
+    {
+        if (uc >= 0xff21 && uc <= 0xff3a)
+            return 1;
+
+        if (uc >= 0xff41 && uc <= 0xff5a)
+            return 2;
+
+        return 0;
+    }
+
+    /* 全角数字  */
+    int isufullwidthdigit(ucs4_t uc)
+    {
+        if (uc >= 0xff10 && uc <= 0xff19)
+            return 1;
+
+        return 0;
+    }
+
+    /* 全角转半角 */
+    ucs4_t ufull2half(ucs4_t uc)
+    {
+        if (uc == 0x3000)
+            return ' ';
+
+        if (uc >= 0xff01 && uc <= 0xff5e)
+            return uc - 0xfee0;
+
+        return uc;
+    }
+
+    /* 半角转全角 */
+    ucs4_t uhalf2full(ucs4_t uc)
+    {
+        if (uc == ' ')
+            return 0x3000;
+
+        if (uc >= 0x21 && uc <= 0x7e)
+            return uc + 0xfee0;
+
+        return uc;
+    }
+
+    /* 中日韩越统一表意文字 */
+    int isuchiness(ucs4_t uc)
+    {
+        /* 最初期统一汉字 */
+        if (uc >= 0x4e00 && uc <= 0x9fcc)
+            return 1;
+
+        /* 扩展 A 区 */
+        if (uc >= 0x3400 && uc <= 0x4db5)
+            return 2;
+
+        /* 扩展 B 区 */
+        if (uc >= 0x20000 && uc <= 0x2a6d6)
+            return 3;
+
+        /* 扩展 C 区 */
+        if (uc >= 0x2a700 && uc <= 0x2b734)
+            return 4;
+
+        /* 扩展 D 区 */
+        if (uc >= 0x2b740 && uc <= 0x2b81f)
+            return 5;
+
+        /* 扩展 E 区 */
+        if (uc >= 0x2b820 && uc <= 0x2f7ff)
+            return 6;
+
+        /* 台湾兼容汉字 */
+        if (uc >= 0x2f800 && uc <= 0x2fa1d)
+            return 7;
+
+        /* 北朝鲜兼容汉字 */
+        if (uc >= 0xfa70 && uc <= 0xfad9)
+            return 8;
+
+        /* 兼容汉字 */
+        if (uc >= 0xf900 && uc <= 0xfa2d)
+            return 9;
+
+        /* 兼容汉字 */
+        if (uc >= 0xfa30 && uc <= 0xfa6d)
+            return 10;
+
+        return 0;
+    }
+
+    /* 中文标点 */
+    int isuzhpunct(ucs4_t uc)
+    {
+        if (uc >= 0x3001 && uc <= 0x3002)
+            return 1;
+
+        if (uc >= 0x3008 && uc <= 0x300f)
+            return 1;
+
+        if (uc >= 0xff01 && uc <= 0xff0f)
+            return 1;
+
+        if (uc >= 0xff1a && uc <= 0xff20)
+            return 1;
+
+        if (uc >= 0xff3b && uc <= 0xff40)
+            return 1;
+
+        if (uc >= 0xff5b && uc <= 0xff5e)
+            return 1;
+
+        if (uc >= 0x2012 && uc <= 0x201f)
+            return 1;
+
+        if (uc >= 0xfe41 && uc <= 0xfe44)
+            return 1;
+
+        if (uc >= 0xfe49 && uc <= 0xfe4f)
+            return 1;
+
+        if (uc >= 0x3010 && uc <= 0x3017)
+            return 1;
+
+        return 0;
+    }
+
+    /* 日文平假名 */
+    int isuhiragana(ucs4_t uc)
+    {
+        if (uc >= 0x3040 && uc <= 0x309f)
+            return 1;
+
+        return 0;
+    }
+
+    /* 日文片假名 */
+    int isukatakana(ucs4_t uc)
+    {
+        if (uc >= 0x30a0 && uc <= 0x30ff)
+            return 1;
+
+        if (uc >= 0x31f0 && uc <= 0x31ff)
+            return 2;
+
+        return 0;
+    }
+
+    /* 韩文 */
+    int isukorean(ucs4_t uc)
+    {
+        /* 韩文拼音 */
+        if (uc >= 0xac00 && uc <= 0xd7af)
+            return 1;
+
+        /* 韩文字母 */
+        if (uc >= 0x1100 && uc <= 0x11ff)
+            return 2;
+
+        /* 韩文兼容字母 */
+        if (uc >= 0x3130 && uc <= 0x318f)
+            return 3;
+
+        return 0;
+    }
+
+}
author	chai <chaifix@163.com>	2021-12-13 00:07:19 +0800
committer	chai <chaifix@163.com>	2021-12-13 00:07:19 +0800
commit	60cbbdec07ab7a5636eac5b3c024ae44e937f4d4 (patch)
tree	b2c7b0a868f18159dbc43d8954e1bd7668549a88 /Client/Source/GUI/utf8.cpp