summaryrefslogtreecommitdiff
path: root/Client/Source/GUI/utf8.cpp
diff options
context:
space:
mode:
authorchai <chaifix@163.com>2021-12-13 00:07:19 +0800
committerchai <chaifix@163.com>2021-12-13 00:07:19 +0800
commit60cbbdec07ab7a5636eac5b3c024ae44e937f4d4 (patch)
treeb2c7b0a868f18159dbc43d8954e1bd7668549a88 /Client/Source/GUI/utf8.cpp
+init
Diffstat (limited to 'Client/Source/GUI/utf8.cpp')
-rw-r--r--Client/Source/GUI/utf8.cpp415
1 files changed, 415 insertions, 0 deletions
diff --git a/Client/Source/GUI/utf8.cpp b/Client/Source/GUI/utf8.cpp
new file mode 100644
index 0000000..8a3a086
--- /dev/null
+++ b/Client/Source/GUI/utf8.cpp
@@ -0,0 +1,415 @@
+/*
+ * Description: UTF-8 字符串的解码和编码函数
+ * unicode 字符处理函数
+ * History: yang@haipo.me, 2013/05/29, create
+ *
+ * This code is in the public domain.
+ * You may use this code any way you wish, private, educational,
+ * or commercial. It's free.
+ */
+
+# include <stdint.h>
+# include <stddef.h>
+
+# include "utf8.h"
+
+namespace utf8
+{
+
+ ucs4_t getu8c(char **src, int *illegal)
+ {
+ static char umap[256] = { 0 };
+ static int umap_init_flag = 0;
+
+ if (umap_init_flag == 0)
+ {
+ int i;
+
+ for (i = 0; i < 0x100; ++i)
+ {
+ if (i < 0x80)
+ {
+ umap[i] = 1;
+ }
+ else if (i >= 0xc0 && i < 0xe0)
+ {
+ umap[i] = 2;
+ }
+ else if (i >= 0xe0 && i < 0xf0)
+ {
+ umap[i] = 3;
+ }
+ else if (i >= 0xf0 && i < 0xf8)
+ {
+ umap[i] = 4;
+ }
+ else if (i >= 0xf8 && i < 0xfc)
+ {
+ umap[i] = 5;
+ }
+ else if (i >= 0xfc && i < 0xfe)
+ {
+ umap[i] = 6;
+ }
+ else
+ {
+ umap[i] = 0;
+ }
+ }
+
+ umap_init_flag = 1;
+ }
+
+ uint8_t *s = (uint8_t *)(*src);
+ int r_illegal = 0;
+
+ while (umap[*s] == 0)
+ {
+ ++s;
+ ++r_illegal;
+ }
+
+ uint8_t *t;
+ int byte_num;
+ uint32_t uc;
+ int i;
+
+ repeat_label:
+ t = s;
+ byte_num = umap[*s];
+ uc = *s++ & (0xff >> byte_num);
+
+ for (i = 1; i < byte_num; ++i)
+ {
+ if (umap[*s])
+ {
+ r_illegal += s - t;
+ goto repeat_label;
+ }
+ else
+ {
+ uc = (uc << 6) + (*s & 0x3f);
+ s += 1;
+ }
+ }
+
+ *src = (char *)s;
+ if (illegal)
+ {
+ *illegal = r_illegal;
+ }
+
+ return uc;
+ }
+
+ size_t u8decode(char const *str, ucs4_t *des, size_t n, int *illegal)
+ {
+ if (n == 0)
+ return 0;
+
+ char *s = (char *)str;
+ size_t i = 0;
+ ucs4_t uc = 0;
+ int r_illegal_all = 0, r_illegal;
+
+ while ((uc = getu8c(&s, &r_illegal)))
+ {
+ if (i < (n - 1))
+ {
+ des[i++] = uc;
+ r_illegal_all += r_illegal;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ des[i] = 0;
+ if (illegal)
+ {
+ *illegal = r_illegal_all + r_illegal;
+ }
+
+ return i;
+ }
+
+# define IF_CAN_HOLD(left, n) do { \
+ size_t m = (size_t)(n); \
+ if ((size_t)(left) < (m + 1)) return -2; \
+ (left) -= m; \
+} while (0)
+
+ int putu8c(ucs4_t uc, char **des, size_t *left)
+ {
+ if (uc < 0)
+ return -1;
+
+ if (uc < (0x1 << 7))
+ {
+ IF_CAN_HOLD(*left, 1);
+
+ **des = (char)uc;
+ *des += 1;
+ **des = 0;
+
+ return 1;
+ }
+
+ int byte_num;
+
+ if (uc < (0x1 << 11))
+ {
+ byte_num = 2;
+ }
+ else if (uc < (0x1 << 16))
+ {
+ byte_num = 3;
+ }
+ else if (uc < (0x1 << 21))
+ {
+ byte_num = 4;
+ }
+ else if (uc < (0x1 << 26))
+ {
+ byte_num = 5;
+ }
+ else
+ {
+ byte_num = 6;
+ }
+
+ IF_CAN_HOLD(*left, byte_num);
+
+ int i;
+ for (i = byte_num - 1; i > 0; --i)
+ {
+ *(uint8_t *)(*des + i) = (uc & 0x3f) | 0x80;
+ uc >>= 6;
+ }
+
+ *(uint8_t *)(*des) = uc | (0xff << (8 - byte_num));
+
+ *des += byte_num;
+ **des = 0;
+
+ return byte_num;
+ }
+
+ size_t u8encode(ucs4_t *us, char *des, size_t n, int *illegal)
+ {
+ if (n == 0)
+ return 0;
+
+ char *s = des;
+ size_t left = n;
+ size_t len = 0;
+ int r_illegal = 0;
+
+ *s = 0;
+ while (*us)
+ {
+ int ret = putu8c(*us, &s, &left);
+ if (ret > 0)
+ {
+ len += ret;
+ }
+ else if (ret == -1)
+ {
+ r_illegal += 1;
+ }
+ else
+ {
+ break;
+ }
+
+ ++us;
+ }
+
+ if (illegal)
+ {
+ *illegal = r_illegal;
+ }
+
+ return len;
+ }
+
+ /* 全角字符 */
+ int isufullwidth(ucs4_t uc)
+ {
+ if (uc == 0x3000)
+ return 1;
+
+ if (uc >= 0xff01 && uc <= 0xff5e)
+ return 1;
+
+ return 0;
+ }
+
+ /* 全角字母 */
+ int isufullwidthalpha(ucs4_t uc)
+ {
+ if (uc >= 0xff21 && uc <= 0xff3a)
+ return 1;
+
+ if (uc >= 0xff41 && uc <= 0xff5a)
+ return 2;
+
+ return 0;
+ }
+
+ /* 全角数字 */
+ int isufullwidthdigit(ucs4_t uc)
+ {
+ if (uc >= 0xff10 && uc <= 0xff19)
+ return 1;
+
+ return 0;
+ }
+
+ /* 全角转半角 */
+ ucs4_t ufull2half(ucs4_t uc)
+ {
+ if (uc == 0x3000)
+ return ' ';
+
+ if (uc >= 0xff01 && uc <= 0xff5e)
+ return uc - 0xfee0;
+
+ return uc;
+ }
+
+ /* 半角转全角 */
+ ucs4_t uhalf2full(ucs4_t uc)
+ {
+ if (uc == ' ')
+ return 0x3000;
+
+ if (uc >= 0x21 && uc <= 0x7e)
+ return uc + 0xfee0;
+
+ return uc;
+ }
+
+ /* 中日韩越统一表意文字 */
+ int isuchiness(ucs4_t uc)
+ {
+ /* 最初期统一汉字 */
+ if (uc >= 0x4e00 && uc <= 0x9fcc)
+ return 1;
+
+ /* 扩展 A 区 */
+ if (uc >= 0x3400 && uc <= 0x4db5)
+ return 2;
+
+ /* 扩展 B 区 */
+ if (uc >= 0x20000 && uc <= 0x2a6d6)
+ return 3;
+
+ /* 扩展 C 区 */
+ if (uc >= 0x2a700 && uc <= 0x2b734)
+ return 4;
+
+ /* 扩展 D 区 */
+ if (uc >= 0x2b740 && uc <= 0x2b81f)
+ return 5;
+
+ /* 扩展 E 区 */
+ if (uc >= 0x2b820 && uc <= 0x2f7ff)
+ return 6;
+
+ /* 台湾兼容汉字 */
+ if (uc >= 0x2f800 && uc <= 0x2fa1d)
+ return 7;
+
+ /* 北朝鲜兼容汉字 */
+ if (uc >= 0xfa70 && uc <= 0xfad9)
+ return 8;
+
+ /* 兼容汉字 */
+ if (uc >= 0xf900 && uc <= 0xfa2d)
+ return 9;
+
+ /* 兼容汉字 */
+ if (uc >= 0xfa30 && uc <= 0xfa6d)
+ return 10;
+
+ return 0;
+ }
+
+ /* 中文标点 */
+ int isuzhpunct(ucs4_t uc)
+ {
+ if (uc >= 0x3001 && uc <= 0x3002)
+ return 1;
+
+ if (uc >= 0x3008 && uc <= 0x300f)
+ return 1;
+
+ if (uc >= 0xff01 && uc <= 0xff0f)
+ return 1;
+
+ if (uc >= 0xff1a && uc <= 0xff20)
+ return 1;
+
+ if (uc >= 0xff3b && uc <= 0xff40)
+ return 1;
+
+ if (uc >= 0xff5b && uc <= 0xff5e)
+ return 1;
+
+ if (uc >= 0x2012 && uc <= 0x201f)
+ return 1;
+
+ if (uc >= 0xfe41 && uc <= 0xfe44)
+ return 1;
+
+ if (uc >= 0xfe49 && uc <= 0xfe4f)
+ return 1;
+
+ if (uc >= 0x3010 && uc <= 0x3017)
+ return 1;
+
+ return 0;
+ }
+
+ /* 日文平假名 */
+ int isuhiragana(ucs4_t uc)
+ {
+ if (uc >= 0x3040 && uc <= 0x309f)
+ return 1;
+
+ return 0;
+ }
+
+ /* 日文片假名 */
+ int isukatakana(ucs4_t uc)
+ {
+ if (uc >= 0x30a0 && uc <= 0x30ff)
+ return 1;
+
+ if (uc >= 0x31f0 && uc <= 0x31ff)
+ return 2;
+
+ return 0;
+ }
+
+ /* 韩文 */
+ int isukorean(ucs4_t uc)
+ {
+ /* 韩文拼音 */
+ if (uc >= 0xac00 && uc <= 0xd7af)
+ return 1;
+
+ /* 韩文字母 */
+ if (uc >= 0x1100 && uc <= 0x11ff)
+ return 2;
+
+ /* 韩文兼容字母 */
+ if (uc >= 0x3130 && uc <= 0x318f)
+ return 3;
+
+ return 0;
+ }
+
+}