22#define NFLAGS(c) (0x1F1A5+((unsigned int)(c)))
24static const char nflags_s[10][2] = {
"CN",
"DE",
"ES",
"FR",
"GB",
"IT",
"JP",
"KR",
"RU",
"US"};
25static const int nflags_code_kddi[10] = { 0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7 };
26static const int nflags_code_sb[10] = { 0x2B0A, 0x2B05, 0x2B08, 0x2B04, 0x2B07, 0x2B06, 0x2B02, 0x2B0B, 0x2B09, 0x2B03 };
28#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0)
29#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0)
31static const char nflags_kddi[6][2] = {
"FR",
"DE",
"IT",
"GB",
"CN",
"KR"};
32static const char nflags_sb[10][2] = {
"JP",
"US",
"FR",
"DE",
"IT",
"GB",
"ES",
"RU",
"CN",
"KR"};
35#define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n)))
36#define DOCOMO_KEYPAD_HASH 0x2964
39static int mbfl_bisec_srch(
int w,
const unsigned short *tbl,
int n)
43 int probe = (l + r) >> 1;
44 unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1];
61 int probe = (l + r) >> 1;
62 unsigned short val = tbl[probe];
74static const unsigned short *mbfl_binary_search_paired_sorted_table(uint32_t w,
const unsigned short tbl[][2],
int n)
79 int probe = (l + r) >> 1;
80 if (w < tbl[probe][0]) {
82 }
else if (w > tbl[probe][0]) {
85 return &tbl[probe][1];
91#define SJIS_ENCODE(c1,c2,s1,s2) \
93 s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \
105#define SJIS_DECODE(c1,c2,s1,s2) \
108 s1 = ((c1 - 0x81) << 1) + 0x21; \
110 s1 = ((c1 - 0xc1) << 1) + 0x21; \
124#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
131#define JISX0201_KANA 0x20
132#define JISX0208_KANJI 0x80
139 switch (filter->
status & 0xf) {
148 }
else if (c == 0x0e) {
150 }
else if (c == 0x0f) {
152 }
else if (filter->
status == 0x10 && c == 0x5c) {
154 }
else if (filter->
status == 0x10 && c == 0x7e) {
156 }
else if (filter->
status == 0x20 && c > 0x20 && c < 0x60) {
158 }
else if ((filter->
status == 0x80 || filter->
status == 0x90) && c > 0x20 && c < 0x7f) {
161 }
else if (c >= 0 && c < 0x80) {
163 }
else if (c > 0xa0 && c < 0xe0) {
175 if (c > 0x20 && c < 0x7f) {
176 s = (c1 - 0x21)*94 + c - 0x21;
177 if (filter->
status == 0x80) {
213 }
else if (c == 0x28) {
229 if (c == 0x40 || c == 0x42) {
231 }
else if (c == 0x28) {
248 if (c == 0x40 || c == 0x42) {
250 }
else if (c == 0x44) {
268 if (c == 0x42 || c == 0x48) {
270 }
else if (c == 0x4a) {
272 }
else if (c == 0x49) {
290 if (filter->
status & 0xF) {
310 }
else if (c == 0x203E) {
322 }
else if (c == 0xff3c) {
324 }
else if (c == 0x2225) {
326 }
else if (c == 0xff0d) {
328 }
else if (c == 0xffe0) {
330 }
else if (c == 0xffe1) {
332 }
else if (c == 0xffe2) {
343 if ((filter->
status & 0xff00) != 0) {
350 }
else if (
s < 0x8080) {
351 if ((filter->
status & 0xff00) != 0x200) {
359 }
else if (
s < 0x10000) {
360 if ((filter->
status & 0xff00) != 0x300) {
370 if ((filter->
status & 0xff00) != 0x400) {
403 }
else if (c == 0xff3c) {
405 }
else if (c == 0x2225) {
407 }
else if (c == 0xff0d) {
409 }
else if (c == 0xffe0) {
411 }
else if (c == 0xffe1) {
413 }
else if (c == 0xffe2) {
421 }
else if ((
s >= 0x80 &&
s < 0x2121) || (
s > 0x8080)) {
426 if ((filter->
status & 0xff00) != 0) {
433 }
else if (
s < 0x10000) {
434 if ((filter->
status & 0xff00) != 0x200) {
443 if ((filter->
status & 0xff00) != 0x400) {
457#define JISX_0201_LATIN 1
458#define JISX_0201_KANA 2
462static size_t mb_iso2022jp_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
466 unsigned char *
p = *in, *e =
p + *in_len;
467 uint32_t *
out =
buf, *limit =
buf + bufsize;
469 while (
p < e &&
out < limit) {
470 unsigned char c = *
p++;
476 if (
p != e && (*
p ==
'$' || *
p ==
'('))
481 unsigned char c2 = *
p++;
483 unsigned char c3 = *
p++;
484 if (c3 ==
'@' || c3 ==
'B') {
486 }
else if (c3 ==
'(') {
491 unsigned char c4 = *
p++;
492 if (c4 ==
'@' || c4 ==
'B') {
494 }
else if (c4 ==
'D') {
497 if ((limit -
out) < 3) {
507 if ((limit -
out) < 2) {
515 }
else if (c2 ==
'(') {
516 unsigned char c3 = *
p++;
517 if (c3 ==
'B' || c3 ==
'H') {
519 }
else if (c3 ==
'J') {
521 }
else if (c3 ==
'I') {
524 if ((limit -
out) < 2) {
536 }
else if (c == 0xE) {
539 }
else if (c == 0xF) {
553 unsigned char c2 = *
p++;
554 if (c2 > 0x20 && c2 < 0x7F) {
555 unsigned int s = (c - 0x21)*94 + c2 - 0x21;
576 }
else if (c < 0x80) {
578 }
else if (c >= 0xA1 && c <= 0xDF) {
603 unsigned char *
out, *limit;
624 }
else if (w == 0xFF3C) {
626 }
else if (w == 0x2225) {
628 }
else if (w == 0xFF0D) {
630 }
else if (w == 0xFFE0) {
632 }
else if (w == 0xFFE1) {
634 }
else if (w == 0xFFE2) {
641 }
else if ((
s >= 0x80 &&
s < 0x2121) || (
s > 0x8080)) {
650 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
653 out = mb_convert_buf_add(
out,
s);
654 }
else if (
s < 0x8080) {
657 out = mb_convert_buf_add3(
out, 0x1B,
'$',
'B');
660 out = mb_convert_buf_add2(
out, (
s >> 8) & 0x7F,
s & 0x7F);
661 }
else if (
s < 0x10000) {
664 out = mb_convert_buf_add4(
out, 0x1B,
'$',
'(',
'D');
667 out = mb_convert_buf_add2(
out, (
s >> 8) & 0x7F,
s & 0x7F);
671 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'J');
674 out = mb_convert_buf_add(
out,
s & 0x7F);
680 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
688 unsigned char *
out, *limit;
698 }
else if (w == 0x203E) {
711 }
else if (w == 0xFF3C) {
713 }
else if (w == 0x2225) {
715 }
else if (w == 0xFF0D) {
717 }
else if (w == 0xFFE0) {
719 }
else if (w == 0xFFE1) {
721 }
else if (w == 0xFFE2) {
733 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
736 out = mb_convert_buf_add(
out,
s);
737 }
else if (
s >= 0xA1 &&
s <= 0xDF) {
740 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'I');
743 out = mb_convert_buf_add(
out,
s & 0x7F);
744 }
else if (
s < 0x8080) {
747 out = mb_convert_buf_add3(
out, 0x1B,
'$',
'B');
750 out = mb_convert_buf_add2(
out, (
s >> 8) & 0x7F,
s & 0x7F);
751 }
else if (
s < 0x10000) {
754 out = mb_convert_buf_add4(
out, 0x1B,
'$',
'(',
'D');
757 out = mb_convert_buf_add2(
out, (
s >> 8) & 0x7F,
s & 0x7F);
761 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'J');
764 out = mb_convert_buf_add(
out,
s & 0x7F);
770 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
776#define JISX_0201_KANA_SO 5
778static bool mb_check_jis(
unsigned char *in,
size_t in_len)
780 unsigned char *
p = in, *e =
p + in_len;
784 unsigned char c = *
p++;
793 unsigned char c2 = *
p++;
795 unsigned char c3 = *
p++;
796 if (c3 ==
'@' || c3 ==
'B') {
798 }
else if (c3 ==
'(') {
802 unsigned char c4 = *
p++;
803 if (c4 ==
'@' || c4 ==
'B') {
805 }
else if (c4 ==
'D') {
813 }
else if (c2 ==
'(') {
814 unsigned char c3 = *
p++;
817 if (c3 ==
'B' || c3 ==
'H') {
819 }
else if (c3 ==
'J') {
821 }
else if (c3 ==
'I') {
829 }
else if (c == 0xE) {
835 }
else if (c == 0xF) {
845 unsigned char c2 = *
p++;
846 if (c2 > 0x20 && c2 < 0x7F) {
847 unsigned int s = (c - 0x21)*94 + c2 - 0x21;
861 }
else if (c < 0x80) {
863 }
else if (c >= 0xA1 && c <= 0xDF) {
874static bool mb_check_iso2022jp(
unsigned char *in,
size_t in_len)
876 unsigned char *
p = in, *e =
p + in_len;
880 unsigned char c = *
p++;
886 unsigned char c2 = *
p++;
888 unsigned char c3 = *
p++;
889 if (c3 ==
'@' || c3 ==
'B') {
894 }
else if (c2 ==
'(') {
895 unsigned char c3 = *
p++;
898 }
else if (c3 ==
'J') {
906 }
else if (c == 0xE || c == 0xF) {
913 unsigned char c2 = *
p++;
914 if (c2 > 0x20 && c2 < 0x7F) {
915 unsigned int s = (c - 0x21)*94 + c2 - 0x21;
923 }
else if (c < 0x80) {
939static inline int convert_emoji_cp(
int cp)
943 else if (
cp > 0xE000)
950 if (
s >= mb_tbl_code2uni_kddi1_min &&
s <= mb_tbl_code2uni_kddi1_max) {
953 }
else if (
s == 0x24C1) {
955 }
else if (
s >= 0x2545 &&
s <= 0x254A) {
957 }
else if (
s == 0x25BC) {
961 return convert_emoji_cp(mb_tbl_code2uni_kddi1[
s - mb_tbl_code2uni_kddi1_min]);
963 }
else if (
s >= mb_tbl_code2uni_kddi2_min &&
s <= mb_tbl_code2uni_kddi2_max) {
966 }
else if (
s >= 0x27A6 &&
s <= 0x27AE) {
968 }
else if (
s == 0x27F7) {
970 }
else if (
s == 0x2830) {
974 return convert_emoji_cp(mb_tbl_code2uni_kddi2[
s - mb_tbl_code2uni_kddi2_min]);
982 int c1,
s, w, snd = 0;
984 switch (filter->
status & 0xF) {
993 }
else if (c >= 0 && c < 0x80) {
995 }
else if (c > 0xA0 && c < 0xE0) {
1007 if (c > 0x20 && c < 0x7F) {
1008 s = ((c1 - 0x21) * 94) + c - 0x21;
1013 }
else if (
s == 32) {
1015 }
else if (
s == 33) {
1017 }
else if (
s == 60) {
1019 }
else if (
s == 80) {
1021 }
else if (
s == 81) {
1023 }
else if (
s == 137) {
1028 if (
s >= (84 * 94) &&
s < (91 * 94)) {
1031 if (w > 0 && snd > 0) {
1057 }
else if (c ==
'(') {
1067 if (c ==
'@' || c ==
'B') {
1069 }
else if (c ==
'(') {
1079 if (c ==
'@' || c ==
'B') {
1089 if (c ==
'B' || c ==
'J') {
1091 }
else if (c ==
'I') {
1104 if (filter->
status & 0xF) {
1118 if ((filter->
status & 0xF) == 1) {
1119 int c1 = filter->
cache;
1125 }
else if (c1 ==
'0') {
1128 *s1 = 0x27A6 + (c1 -
'1');
1132 if (filter->
status & 0xFF00) {
1142 if (c ==
'#' || (c >=
'0' && c <=
'9')) {
1151 }
else if (c == 0xAE) {
1154 }
else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
1155 int i =
mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
1157 *s1 = mb_tbl_uni_kddi2code2_value[i];
1160 }
else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
1161 int i =
mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
1163 *s1 = mb_tbl_uni_kddi2code3_value[i];
1166 }
else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
1167 int i =
mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
1169 *s1 = mb_tbl_uni_kddi2code5_val[i];
1177#define CODE2JIS(c1,c2,s1,s2) \
1178 c1 = (s1)/94+0x21; \
1179 c2 = (s1)-94*((c1)-0x21)+0x21; \
1180 s1 = ((c1) << 8) | (c2); \
1185 int c1, c2, s1 = 0, s2 = 0;
1200 }
else if (c == 0xFF3C) {
1202 }
else if (c == 0x2225) {
1204 }
else if (c == 0xFF0D) {
1206 }
else if (c == 0xFFE0) {
1208 }
else if (c == 0xFFE1) {
1210 }
else if (c == 0xFFE2) {
1215 if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) {
1219 }
else if ((filter->
status & 0xFF) == 1 && filter->
cache) {
1224 if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) {
1228 s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
1240 if (filter->
status & 0xFF00) {
1247 }
else if (s1 > 0xA0 && s1 < 0xE0) {
1248 if ((filter->
status & 0xFF00) != 0x100) {
1255 }
else if (s1 < 0x7E7F) {
1256 if ((filter->
status & 0xFF00) != 0x200) {
1275 if (filter->
status & 0xFF00) {
1281 int c1 = filter->
cache;
1282 if ((filter->
status & 0xFF) == 1 && (c1 ==
'#' || (c1 >=
'0' && c1 <=
'9'))) {
1294static size_t mb_iso2022jp_kddi_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
1296 unsigned char *
p = *in, *e =
p + *in_len;
1297 uint32_t *
out =
buf, *limit =
buf + bufsize - 1;
1299 while (
p < e &&
out < limit) {
1300 unsigned char c = *
p++;
1308 unsigned char c2 = *
p++;
1309 unsigned char c3 = *
p++;
1312 if (c3 ==
'@' || c3 ==
'B') {
1314 }
else if (c3 ==
'(') {
1319 unsigned char c4 = *
p++;
1321 if (c4 ==
'@' || c4 ==
'B') {
1329 }
else if (c2 ==
'(') {
1330 if (c3 ==
'B' || c3 ==
'J') {
1332 }
else if (c3 ==
'I') {
1342 *
out++ = 0xFF40 + c;
1348 unsigned char c2 = *
p++;
1350 if (c2 >= 0x21 && c2 <= 0x7E) {
1351 unsigned int s = ((c - 0x21) * 94) + c2 - 0x21;
1357 }
else if (
s == 32) {
1359 }
else if (
s == 33) {
1361 }
else if (
s == 60) {
1363 }
else if (
s == 80) {
1365 }
else if (
s == 81) {
1367 }
else if (
s == 137) {
1372 if (
s >= (84 * 94) &&
s < (91 * 94)) {
1393 }
else if (c <= 0x7F) {
1395 }
else if (c >= 0xA1 && c <= 0xDF) {
1396 *
out++ = 0xFEC0 + c;
1409 unsigned char *
out, *limit;
1430 }
else if (w == 0xFF3C) {
1432 }
else if (w == 0x2225) {
1434 }
else if (w == 0xFF0D) {
1436 }
else if (w == 0xFFE0) {
1438 }
else if (w == 0xFFE1) {
1440 }
else if (w == 0xFFE2) {
1445 if ((w ==
'#' || (w >=
'0' && w <=
'9')) &&
len) {
1446 uint32_t w2 = *in++;
len--;
1449 unsigned int s1 = 0;
1452 }
else if (w ==
'0') {
1455 s1 = 0x27A6 + (w -
'1');
1457 s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
1462 uint32_t w2 = *in++;
len--;
1465 for (
int i = 0; i < 10; i++) {
1466 if (w ==
NFLAGS(nflags_s[i][0]) && w2 ==
NFLAGS(nflags_s[i][1])) {
1467 unsigned int s1 = nflags_code_kddi[i];
1468 s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
1469 goto found_flag_emoji;
1479 unsigned int s1 = 0x27DC;
1480 s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
1481 }
else if (w == 0xAE) {
1482 unsigned int s1 = 0x27DD;
1483 s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
1484 }
else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) {
1485 int i =
mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
1487 unsigned int s1 = mb_tbl_uni_kddi2code2_value[i];
1488 s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
1490 }
else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) {
1491 int i =
mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
1493 unsigned int s1 = mb_tbl_uni_kddi2code3_value[i];
1494 s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
1496 }
else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) {
1497 int i =
mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
1499 unsigned int s1 = mb_tbl_uni_kddi2code5_val[i];
1500 s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600;
1504 if (!
s ||
s >= 0xA1A1) {
1508 s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
1519 }
else if (
s <= 0x7F) {
1522 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
1525 out = mb_convert_buf_add(
out,
s);
1526 }
else if (
s >= 0xA1 &&
s <= 0xDF) {
1529 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'I');
1532 out = mb_convert_buf_add(
out,
s & 0x7F);
1533 }
else if (
s <= 0x7E7E) {
1536 out = mb_convert_buf_add3(
out, 0x1B,
'$',
'B');
1541 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
1550 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
1559 int c1, c2,
s, s1 = 0, s2 = 0, w = 0, w1;
1561 switch (filter->
status & 0xf) {
1563 if (c >= 0 && c < 0x80) {
1569 }
else if (c == 0x7e) {
1577 }
else if ((filter->
status == 0x80 || filter->
status == 0x90 || filter->
status == 0xa0)
1578 && c > 0x20 && c < 0x7f) {
1580 if (filter->
status == 0x90) {
1582 }
else if (filter->
status == 0xa0) {
1593 if (c > 0xa0 && c < 0xff) {
1596 }
else if (c == 0x8e) {
1597 filter->
cache = 0x8E;
1599 }
else if (c == 0x8f) {
1605 if (c > 0xa0 && c < 0xe0) {
1607 }
else if (c > 0x80 && c < 0xfd && c != 0xa0) {
1624 if (c > 0xa0 && c < 0xff) {
1632 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
1639 if (c >= 0x21 && c <= 0x7E) {
1647 w1 = (s1 << 8) | s2;
1650 if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) ||
1651 (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 ||
1652 (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) {
1655 w = jisx0213_u2_tbl[2*k];
1657 w = jisx0213_u2_tbl[2*k+1];
1663 w1 = (s1 - 0x21)*94 + s2 - 0x21;
1664 if (w1 >= 0 && w1 < jisx0213_ucs_table_size) {
1665 w = jisx0213_ucs_table[w1];
1673 w = jisx0213_jis_u5_tbl[k] + 0x20000;
1685 if (c > 0xa0 && c < 0xe0) {
1694 if (c == 0xA1 || (c >= 0xA3 && c <= 0xA5) || c == 0xA8 || (c >= 0xAC && c <= 0xAF) || (c >= 0xEE && c <= 0xFE)) {
1695 filter->
cache = c - 0x80;
1712 if (c2 < 0x21 || c2 > 0x7E) {
1720 if (((s1 >= 0 && s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) ||
1721 (s1 >= 77 && s1 < 94)) && s2 >= 0 && s2 < 94) {
1723 for (k = 0; k < jisx0213_p2_ofst_len; k++) {
1724 if (s1 == jisx0213_p2_ofst[k]) {
1728 k -= jisx0213_p2_ofst[k];
1731 s = (s1 + 94 + k)*94 + s2;
1733 w = jisx0213_ucs_table[
s];
1739 w = jisx0213_jis_u5_tbl[k] + 0x20000;
1756 if (c > 0x20 && c < 0x7f) {
1757 s = (c1 - 0x21)*94 + c - 0x21;
1780 }
else if (c ==
'(') {
1798 }
else if (c ==
'(') {
1816 }
else if (c ==
'P') {
1847 if (filter->
status & 0xF) {
1867 if ((filter->
status & 0xf) == 0 && (
1869 (c >= 0x0254 && c <= 0x02E9) ||
1870 (c >= 0x304B && c <= 0x3053) ||
1871 (c >= 0x30AB && c <= 0x30C8) ||
1873 for (k = 0; k < jisx0213_u2_tbl_len; k++) {
1874 if (c == jisx0213_u2_tbl[2*k]) {
1883 if ((filter->
status & 0xf) == 1 && filter->
cache >= 0 && filter->
cache < jisx0213_u2_tbl_len) {
1888 c1 = jisx0213_u2_tbl[2*k];
1889 if ((c1 == 0x0254 || c1 == 0x028C || c1 == 0x0259 || c1 == 0x025A) && c == 0x0301) {
1892 if (c == jisx0213_u2_tbl[2*k+1]) {
1893 s1 = jisx0213_u2_key[k];
1895 s1 = jisx0213_u2_fb_tbl[k];
1898 c1 = (s1 >> 8) & 0xff;
1902 s2 = (s1 & 0xff) + 0x80;
1903 s1 = ((s1 >> 8) & 0xff) + 0x80;
1905 if (filter->
status != 0x200) {
1914 s1 = (s1 >> 8) & 0x7f;
1927 if (c >= uni2jis_tbl_range[
offset][0] && c <= uni2jis_tbl_range[
offset][1]) {
1928 s1 = uni2jis_tbl[
offset][c-uni2jis_tbl_range[
offset][0]];
1935 if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) {
1936 k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len);
1938 s1 = ucs_c1_jisx0213_ofst[k] + c - ucs_c1_jisx0213_tbl[2*k];
1943 if (s1 <= 0 && c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) {
1944 k =
mbfl_bisec_srch2(c - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len);
1946 s1 = jisx0213_u5_jis_tbl[k];
1954 }
else if (c == 0xfe46) {
1956 }
else if (c >= 0xf91d && c <= 0xf9dc) {
1958 k =
mbfl_bisec_srch2(c, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len);
1960 s1 = ucs_r2b_jisx0213_cmap_val[k];
1982 }
else if (s1 < 0x100) {
1991 }
else if (s1 < 0x7f00) {
1993 c1 = (s1 >> 8) & 0xff;
1997 s2 = (s1 & 0xff) + 0x80;
1998 s1 = ((s1 >> 8) & 0xff) + 0x80;
2000 if ((filter->
status & 0xff00) != 0x200) {
2008 s1 = (s1 >> 8) & 0xff;
2014 c1 = (s1 >> 8) & 0xff;
2019 k = ((s1 >> 8) & 0xff) - 0x7f;
2020 if (k >= 0 && k < jisx0213_p2_ofst_len) {
2021 s1 = jisx0213_p2_ofst[k] + 0x21;
2028 if ((filter->
status & 0xff00) != 0x200) {
2050 int k, c1, c2, s1, s2;
2055 if (filter->
status == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) {
2056 s1 = jisx0213_u2_fb_tbl[k];
2059 c1 = (s1 >> 8) & 0xff;
2063 s2 = (s1 & 0xff) | 0x80;
2064 s1 = ((s1 >> 8) & 0xff) | 0x80;
2067 s1 = (s1 >> 8) & 0x7f;
2068 if ((filter->
status & 0xff00) != 0x200) {
2084 if (filter->
status & 0xff00) {
2101#define JISX0213_PLANE1 2
2102#define JISX0213_PLANE2 3
2104static size_t mb_iso2022jp2004_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
2106 unsigned char *
p = *in, *e =
p + *in_len;
2107 uint32_t *
out =
buf, *limit =
buf + bufsize - 1;
2109 while (
p < e &&
out < limit) {
2110 unsigned char c = *
p++;
2119 unsigned char c2 = *
p++;
2120 unsigned char c3 = *
p++;
2124 }
else if (c3 ==
'(') {
2129 unsigned char c4 = *
p++;
2132 }
else if (c4 ==
'P') {
2140 }
else if (c2 ==
'(') {
2155 unsigned char c2 = *
p++;
2156 if (c2 < 0x21 || c2 > 0x7E) {
2162 unsigned int w1 = (c << 8) | c2;
2165 if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) {
2168 *
out++ = jisx0213_u2_tbl[2*k];
2169 *
out++ = jisx0213_u2_tbl[2*k+1];
2176 w1 = (c - 0x21)*94 + c2 - 0x21;
2177 if (w1 < jisx0213_ucs_table_size) {
2178 w = jisx0213_ucs_table[w1];
2185 w = jisx0213_jis_u5_tbl[k] + 0x20000;
2192 unsigned int s1 = c - 0x21, s2 = c2 - 0x21;
2194 if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) {
2196 for (k = 0; k < jisx0213_p2_ofst_len; k++) {
2197 if (s1 == jisx0213_p2_ofst[k]) {
2201 k -= jisx0213_p2_ofst[k];
2204 unsigned int s = (s1 + 94 + k)*94 + s2;
2206 uint32_t w = jisx0213_ucs_table[
s];
2212 w = jisx0213_jis_u5_tbl[k] + 0x20000;
2221 unsigned int s = (c - 0x21)*94 + c2 - 0x21;
2243 unsigned char *
out, *limit;
2248 if (
buf->state & 0xFF00) {
2249 int k = (
buf->state >> 8) - 1;
2250 w = jisx0213_u2_tbl[2*k];
2252 goto process_codepoint;
2260 if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) {
2261 for (
int k = 0; k < jisx0213_u2_tbl_len; k++) {
2262 if (w == jisx0213_u2_tbl[2*k]) {
2265 buf->state |= (k+1) << 8;
2270 uint32_t w2 = *in++;
len--;
2271 if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) {
2274 if (w2 == jisx0213_u2_tbl[2*k+1]) {
2275 s = jisx0213_u2_key[k];
2281 s = jisx0213_u2_fb_tbl[k];
2289 for (
int k = 0; k < uni2jis_tbl_len; k++) {
2290 if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) {
2291 s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]];
2298 if (!
s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) {
2299 int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len);
2301 s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k];
2306 if (!
s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) {
2307 int k =
mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len);
2309 s = jisx0213_u5_jis_tbl[k];
2317 }
else if (w == 0xFE46) {
2319 }
else if (w >= 0xF91D && w <= 0xF9DC) {
2321 int k =
mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len);
2323 s = ucs_r2b_jisx0213_cmap_val[k];
2331 }
else if (
s <= 0x7F) {
2334 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
2337 out = mb_convert_buf_add(
out,
s);
2338 }
else if (
s <= 0xFF) {
2341 }
else if (
s <= 0x7EFF) {
2344 out = mb_convert_buf_add4(
out, 0x1B,
'$',
'(',
'Q');
2349 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
2353 out = mb_convert_buf_add4(
out, 0x1B,
'$',
'(',
'P');
2358 unsigned int s2 =
s & 0xFF;
2359 int k = ((
s >> 8) & 0xFF) - 0x7F;
2361 s = jisx0213_p2_ofst[k] + 0x21;
2362 out = mb_convert_buf_add2(
out,
s, s2);
2368 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
2379 switch (filter->
status & 0xf) {
2388 }
else if (c == 0x0e) {
2390 }
else if (c == 0x0f) {
2392 }
else if (filter->
status == 0x10 && c == 0x5c) {
2394 }
else if (filter->
status == 0x10 && c == 0x7e) {
2396 }
else if (filter->
status == 0x20 && c > 0x20 && c < 0x60) {
2398 }
else if ((filter->
status == 0x80 || filter->
status == 0x90) && c > 0x20 && c <= 0x97) {
2401 }
else if (c >= 0 && c < 0x80) {
2403 }
else if (c > 0xa0 && c < 0xe0) {
2415 if (c > 0x20 && c < 0x7f) {
2416 s = (c1 - 0x21)*94 + c - 0x21;
2417 if (filter->
status == 0x80) {
2426 }
else if (
s >= 94 * 94 &&
s < 114 * 94) {
2428 w =
s - 94*94 + 0xe000;
2462 }
else if (c == 0x28) {
2478 if (c == 0x40 || c == 0x42) {
2480 }
else if (c == 0x28) {
2497 if (c == 0x40 || c == 0x42) {
2499 }
else if (c == 0x44) {
2517 if (c == 0x42 || c == 0x48) {
2519 }
else if (c == 0x4a) {
2521 }
else if (c == 0x49) {
2539 if (filter->
status & 0xF) {
2553static const unsigned char hankana2zenkana_table[64] = {
2554 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5,
2555 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6,
2556 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9,
2557 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC,
2558 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0,
2559 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,
2563static const unsigned char hankana2zenhira_table[64] = {
2564 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45,
2565 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46,
2566 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59,
2567 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C,
2568 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80,
2569 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D,
2573static const unsigned char zenkana2hankana_table[84][2] = {
2574 {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00},
2575 {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00},
2576 {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00},
2577 {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E},
2578 {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00},
2579 {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E},
2580 {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00},
2581 {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00},
2582 {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00},
2583 {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00},
2584 {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F},
2585 {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E},
2586 {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00},
2587 {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00},
2588 {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00},
2589 {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00},
2590 {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E}
2612 if ((
mode &
MBFL_HAN2ZEN_ALL) && c >= 0x21 && c <= 0x7D && c !=
'"' && c !=
'\'' && c !=
'\\') {
2630 if (c >= 0xFF61 && c <= 0xFF9F) {
2633 if (
next >= 0xFF61 &&
next <= 0xFF9F) {
2634 if (
next == 0xFF9E && ((
n >= 22 &&
n <= 36) || (
n >= 42 &&
n <= 46))) {
2636 return 0x3001 + hankana2zenkana_table[
n];
2638 if (
next == 0xFF9E &&
n == 19) {
2642 if (
next == 0xFF9F &&
n >= 42 &&
n <= 46) {
2644 return 0x3002 + hankana2zenkana_table[
n];
2648 return 0x3000 + hankana2zenkana_table[
n];
2652 if (c >= 0xFF61 && c <= 0xFF9F) {
2655 if (
next >= 0xFF61 &&
next <= 0xFF9F) {
2656 if (
next == 0xFF9E && ((
n >= 22 &&
n <= 36) || (
n >= 42 &&
n <= 46))) {
2658 return 0x3001 + hankana2zenhira_table[
n];
2660 if (
next == 0xFF9F &&
n >= 42 &&
n <= 46) {
2662 return 0x3002 + hankana2zenhira_table[
n];
2666 return 0x3000 + hankana2zenhira_table[
n];
2670 return 0x3000 + hankana2zenkana_table[c - 0xFF60];
2673 return 0x3000 + hankana2zenhira_table[c - 0xFF60];
2678 if (c ==
'\\' || c == 0xA5) {
2681 if (c == 0x7E || c == 0x203E) {
2694 if ((
mode &
MBFL_ZEN2HAN_ALL) && c >= 0xFF01 && c <= 0xFF5D && c != 0xFF02 && c != 0xFF07 && c != 0xFF3C) {
2717 if (zenkana2hankana_table[
n][1]) {
2718 *second = 0xFF00 + zenkana2hankana_table[
n][1];
2720 return 0xFF00 + zenkana2hankana_table[
n][0];
2725 if (zenkana2hankana_table[
n][1]) {
2726 *second = 0xFF00 + zenkana2hankana_table[
n][1];
2728 return 0xFF00 + zenkana2hankana_table[
n][0];
2768 if (c == 0xFFE5 || c == 0xFF3C) {
2771 if (c == 0xFFE3 || c == 0x203E) {
2774 if (c == 0x2018 || c == 0x2019) {
2777 if (c == 0x201C || c == 0x201D) {
2790 bool consumed =
false;
2792 if (filter->
cache) {
2794 filter->
cache = consumed ? 0 : c;
2798 mbfl_filt_conv_wchar_cp50221(
s, filter);
2800 if (c == 0 && !consumed) {
2803 }
else if (c == 0) {
2817 if ((filter->
status & 0xff00) != 0) {
2835 if (filter->
cache) {
2838 mbfl_filt_conv_wchar_cp50221(
s, filter);
2843 return mbfl_filt_conv_any_jis_flush(filter);
2852 }
else if (c == 0x203E) {
2860 }
else if (c >= 0xE000 && c <= 0xE757) {
2863 s = ((
s / 94) + 0x7F) << 8 | ((
s % 94) + 0x21);
2869 }
else if (c == 0xff3c) {
2871 }
else if (c == 0x2225) {
2873 }
else if (c == 0xff0d) {
2875 }
else if (c == 0xffe0) {
2877 }
else if (c == 0xffe1) {
2879 }
else if (c == 0xffe2) {
2892 if (
s == 0 || ((
s & 0x8000) && (
s & 0x80))) {
2902 s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
2909 const int cp932ext2_ucs_table_size =
2911 for (i = 0; i < cp932ext2_ucs_table_size; i++) {
2913 s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
2921 }
else if (
s <= 0) {
2928 if ((filter->
status & 0xff00) != 0) {
2935 }
else if (
s >= 0xa0 &&
s < 0xe0) {
2936 if ((filter->
status & 0xff00) != 0x500) {
2943 }
else if (
s <= 0x927E) {
2944 if ((filter->
status & 0xff00) != 0x200) {
2952 }
else if (
s < 0x10000) {
2955 if ((filter->
status & 0xff00) != 0x400) {
2976 }
else if (c == 0x203E) {
2984 }
else if (c >= 0xE000 && c <= 0xE757) {
2987 s = ((
s / 94) + 0x7F) << 8 | ((
s % 94) + 0x21);
2993 }
else if (c == 0xff3c) {
2995 }
else if (c == 0x2225) {
2997 }
else if (c == 0xff0d) {
2999 }
else if (c == 0xffe0) {
3001 }
else if (c == 0xffe1) {
3003 }
else if (c == 0xffe2) {
3007 if (
s == 0 || ((
s & 0x8000) && (
s & 0x80))) {
3016 s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
3023 const int cp932ext2_ucs_table_size =
3025 for (i = 0; i < cp932ext2_ucs_table_size; i++) {
3027 s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21);
3035 }
else if (
s <= 0) {
3042 if ((filter->
status & 0xff00) == 0x500) {
3045 }
else if ((filter->
status & 0xff00) != 0) {
3052 }
else if (
s >= 0xa0 &&
s < 0xe0) {
3053 if ((filter->
status & 0xff00) != 0x500) {
3058 }
else if (
s <= 0x927E) {
3059 if ((filter->
status & 0xff00) == 0x500) {
3063 if ((filter->
status & 0xff00) != 0x200) {
3071 }
else if (
s < 0x10000) {
3074 if ((filter->
status & 0xff00) == 0x500) {
3078 if ((filter->
status & 0xff00) != 0x400) {
3096 if ((filter->
status & 0xff00) == 0x500) {
3098 }
else if ((filter->
status & 0xff00) != 0) {
3113#define JISX_0201_LATIN 1
3114#define JISX_0201_KANA 2
3118static size_t mb_cp5022x_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
3122 unsigned char *
p = *in, *e =
p + *in_len;
3123 uint32_t *
out =
buf, *limit =
buf + bufsize;
3125 while (
p < e &&
out < limit) {
3126 unsigned char c = *
p++;
3133 if (
p < e && (*
p ==
'(' || *
p ==
'$'))
3137 unsigned char c2 = *
p++;
3139 unsigned char c3 = *
p++;
3140 if (c3 ==
'@' || c3 ==
'B') {
3142 }
else if (c3 ==
'(') {
3147 unsigned char c4 = *
p++;
3148 if (c4 ==
'@' || c4 ==
'B') {
3150 }
else if (c4 ==
'D') {
3153 if ((limit -
out) < 3) {
3163 if ((limit -
out) < 2) {
3171 }
else if (c2 ==
'(') {
3172 unsigned char c3 = *
p++;
3173 if (c3 ==
'B' || c3 ==
'H') {
3175 }
else if (c3 ==
'J') {
3177 }
else if (c3 ==
'I') {
3180 if ((limit -
out) < 2) {
3192 }
else if (c == 0xE) {
3194 }
else if (c == 0xF) {
3201 *
out++ = 0xFF40 + c;
3207 unsigned char c2 = *
p++;
3208 if (c2 > 0x20 && c2 < 0x7F) {
3209 unsigned int s = (c - 0x21)*94 + c2 - 0x21;
3220 }
else if (
s >= 94*94 &&
s < 114*94) {
3222 w =
s - 94*94 + 0xE000;
3237 }
else if (c < 0x80) {
3239 }
else if (c >= 0xA1 && c <= 0xDF) {
3240 *
out++ = 0xFEC0 + c;
3251static unsigned int lookup_wchar(uint32_t w)
3257 }
else if (w == 0x203E) {
3265 }
else if (w >= 0xE000 && w <= 0xE757) {
3268 s = ((
s / 94) + 0x7F) << 8 | ((
s % 94) + 0x21);
3274 }
else if (w == 0xFF3C) {
3276 }
else if (w == 0x2225) {
3278 }
else if (w == 0xFF0D) {
3280 }
else if (w == 0xFFE0) {
3282 }
else if (w == 0xFFE1) {
3284 }
else if (w == 0xFFE2) {
3286 }
else if (w == 0) {
3299 if (!
s ||
s >= 0x8080) {
3320 unsigned char *
out, *limit;
3326 if (
buf->state & 0xFFFF00) {
3328 w =
buf->state >> 8;
3330 goto reprocess_codepoint;
3337 if (w >= 0xFF61 && w <= 0xFF9F && !
len && !
end) {
3340 buf->state |= w << 8;
3344 bool consumed =
false;
3348 in++;
len--; consumed =
false;
3351 unsigned int s = lookup_wchar(w);
3355 }
else if (
s < 0x80) {
3359 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
3362 out = mb_convert_buf_add(
out,
s);
3363 }
else if (
s >= 0xA0 &&
s < 0xE0) {
3367 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'I');
3370 out = mb_convert_buf_add(
out,
s - 0x80);
3371 }
else if (
s <= 0x927E) {
3375 out = mb_convert_buf_add3(
out, 0x1B,
'$',
'B');
3378 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
3379 }
else if (
s >= 0x10000) {
3383 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'J');
3386 out = mb_convert_buf_add(
out,
s & 0x7F);
3394 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
3402 unsigned char *
out, *limit;
3408 unsigned int s = lookup_wchar(w);
3412 }
else if (
s < 0x80) {
3416 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
3419 out = mb_convert_buf_add(
out,
s);
3420 }
else if (
s >= 0xA0 &&
s < 0xE0) {
3424 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'I');
3427 out = mb_convert_buf_add(
out,
s - 0x80);
3428 }
else if (
s <= 0x927E) {
3432 out = mb_convert_buf_add3(
out, 0x1B,
'$',
'B');
3435 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
3436 }
else if (
s >= 0x10000) {
3440 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'J');
3443 out = mb_convert_buf_add(
out,
s & 0x7F);
3451 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
3459 unsigned char *
out, *limit;
3465 unsigned int s = lookup_wchar(w);
3469 }
else if (
s < 0x80) {
3473 out = mb_convert_buf_add(
out, 0xF);
3476 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
3479 out = mb_convert_buf_add(
out,
s);
3480 }
else if (
s >= 0xA0 &&
s < 0xE0) {
3484 out = mb_convert_buf_add(
out, 0xE);
3487 out = mb_convert_buf_add(
out,
s - 0x80);
3488 }
else if (
s <= 0x927E) {
3492 out = mb_convert_buf_add(
out, 0xF);
3495 out = mb_convert_buf_add3(
out, 0x1B,
'$',
'B');
3498 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
3499 }
else if (
s >= 0x10000) {
3503 out = mb_convert_buf_add(
out, 0xF);
3506 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'J');
3509 out = mb_convert_buf_add(
out,
s & 0x7F);
3518 out = mb_convert_buf_add(
out, 0xF);
3521 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
3529#define JISX0201_KANA 0x20
3530#define JISX0208_KANJI 0x80
3537 switch (filter->
status & 0xF) {
3546 }
else if (c >= 0 && c < 0x80) {
3548 }
else if (c > 0xA0 && c < 0xE0) {
3560 if (c > 0x20 && c < 0x7F) {
3561 s = ((c1 - 0x21) * 94) + c - 0x21;
3566 }
else if (
s == 32) {
3568 }
else if (
s == 33) {
3570 }
else if (
s == 60) {
3572 }
else if (
s == 80) {
3574 }
else if (
s == 81) {
3576 }
else if (
s == 137) {
3595 if (c1 > 0x20 && c1 < 0x35) {
3596 w = 0xE000 + ((c1 - 0x21) * 94) + c - 0x21;
3611 }
else if (c ==
'(') {
3621 if (c ==
'@' || c ==
'B') {
3623 }
else if (c ==
'(') {
3633 if (c ==
'@' || c ==
'B') {
3635 }
else if (c ==
'?') {
3645 if (c ==
'B' || c ==
'J') {
3647 }
else if (c ==
'I') {
3660 if (filter->
status & 0xF) {
3672#define sjistoidx(c1, c2) \
3673 (((c1) > 0x9f) ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
3674#define idxtojis1(c) (((c) / 94) + 0x21)
3675#define idxtojis2(c) (((c) % 94) + 0x21)
3677static int cp932ext3_cp932ext2_jis(
int c)
3693 int c1, c2, s1 = 0, s2 = 0;
3703 }
else if (c >= 0xE000 && c < (0xE000 + 20*94)) {
3706 c1 = (s1 / 94) + 0x7f;
3707 c2 = (s1 % 94) + 0x21;
3708 s1 = (c1 << 8) | c2;
3714 }
else if (c == 0xFF3C) {
3716 }
else if (c == 0x2225) {
3718 }
else if (c == 0xFF0D) {
3720 }
else if (c == 0xFFE0) {
3722 }
else if (c == 0xFFE1) {
3724 }
else if (c == 0xFFE2) {
3729 if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) {
3733 s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
3741 s1 = cp932ext3_cp932ext2_jis(c1);
3754 if (filter->
status & 0xFF00) {
3761 }
else if (s1 > 0xA0 && s1 < 0xE0) {
3762 if ((filter->
status & 0xFF00) != 0x100) {
3769 }
else if (s1 < 0x7E7F) {
3770 if ((filter->
status & 0xFF00) != 0x200) {
3778 }
else if (s1 < 0x927F) {
3779 if ((filter->
status & 0xFF00) != 0x800) {
3799 if ((filter->
status & 0xFF00) != 0) {
3813static size_t mb_iso2022jpms_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
3815 unsigned char *
p = *in, *e =
p + *in_len;
3816 uint32_t *
out =
buf, *limit =
buf + bufsize;
3818 while (
p < e &&
out < limit) {
3819 unsigned char c = *
p++;
3827 unsigned char c2 = *
p++;
3828 unsigned char c3 = *
p++;
3831 if (c3 ==
'@' || c3 ==
'B') {
3833 }
else if (c3 ==
'(' &&
p < e) {
3834 unsigned char c4 = *
p++;
3836 if (c4 ==
'@' || c4 ==
'B') {
3838 }
else if (c4 ==
'?') {
3846 }
else if (c2 ==
'(') {
3847 if (c3 ==
'B' || c3 ==
'J') {
3849 }
else if (c3 ==
'I') {
3859 *
out++ = 0xFF40 + c;
3865 unsigned char c2 = *
p++;
3868 if (c2 >= 0x21 && c2 <= 0x7E) {
3869 unsigned int s = ((c - 0x21) * 94) + c2 - 0x21;
3874 }
else if (
s == 32) {
3876 }
else if (
s == 33) {
3878 }
else if (
s == 60) {
3880 }
else if (
s == 80) {
3882 }
else if (
s == 81) {
3884 }
else if (
s == 137) {
3898 }
else if (c >= 0x21 && c <= 0x34) {
3899 w = 0xE000 + ((c - 0x21) * 94) + c2 - 0x21;
3906 }
else if (c <= 0x7F) {
3908 }
else if (c >= 0xA1 && c <= 0xDF) {
3909 *
out++ = 0xFEC0 + c;
3922 unsigned char *
out, *limit;
3938 }
else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
3940 s = ((((w - 0xE000) / 94) + 0x7F) << 8) | (((w - 0xE000) % 94) + 0x21);
3946 }
else if (w == 0xFF3C) {
3948 }
else if (w == 0x2225) {
3950 }
else if (w == 0xFF0D) {
3952 }
else if (w == 0xFFE0) {
3954 }
else if (w == 0xFFE1) {
3956 }
else if (w == 0xFFE2) {
3967 s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
3975 s = cp932ext3_cp932ext2_jis(i);
3985 }
else if (
s <= 0x7F) {
3988 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
3991 out = mb_convert_buf_add(
out,
s);
3992 }
else if (
s >= 0xA1 &&
s <= 0xDF) {
3995 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'I');
3998 out = mb_convert_buf_add(
out,
s & 0x7F);
3999 }
else if (
s <= 0x7E7E) {
4002 out = mb_convert_buf_add3(
out, 0x1B,
'$',
'B');
4007 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0x7F);
4008 }
else if (
s < 0x927F) {
4011 out = mb_convert_buf_add4(
out, 0x1B,
'$',
'(',
'?');
4016 out = mb_convert_buf_add2(
out, ((
s >> 8) - 0x5E) & 0x7F,
s & 0x7F);
4025 out = mb_convert_buf_add3(
out, 0x1B,
'(',
'B');
4035 switch (filter->
status & 0xf) {
4041 }
else if (c == 0x0f) {
4043 }
else if (c == 0x0e) {
4045 }
else if ((filter->
status & 0x10) && c > 0x20 && c < 0x7f) {
4049 }
else if ((filter->
status & 0x10) == 0 && c >= 0 && c < 0x80) {
4059 int c1 = filter->
cache;
4062 if (c1 > 0x20 && c1 < 0x47) {
4064 }
else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) {
4068 if (flag > 0 && c > 0x20 && c < 0x7f) {
4070 if (c1 != 0x22 || c <= 0x65) {
4071 w = (c1 - 1)*190 + (c - 0x41) + 0x80;
4076 w = (c1 - 0x47)*94 + c - 0x21;
4126 if (filter->
status & 0xF) {
4143 if ((filter->
status & 0x100) == 0) {
4167 c1 = (
s >> 8) & 0xff;
4170 if (c1 < 0xa1 || c2 < 0xa1) {
4172 }
else if (
s & 0x8000) {
4182 }
else if ((
s >= 0x80 &&
s < 0x2121) || (
s > 0x8080)) {
4188 if (filter->
status & 0x10) {
4194 if ((filter->
status & 0x10) == 0) {
4210 if (filter->
status & 0xF) {
4215 if (filter->
status & 0x10) {
4231static size_t mb_iso2022kr_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
4233 unsigned char *
p = *in, *e =
p + *in_len;
4234 uint32_t *
out =
buf, *limit =
buf + bufsize;
4236 while (
p < e &&
out < limit) {
4237 unsigned char c = *
p++;
4242 if (
p < e && *
p++ ==
'$') {
4249 unsigned char c2 = *
p++;
4250 unsigned char c3 = *
p++;
4251 unsigned char c4 = *
p++;
4252 if (c2 ==
'$' && c3 ==
')' && c4 ==
'C') {
4262 }
else if (c == 0xF) {
4264 }
else if (c == 0xE) {
4266 }
else if (c >= 0x21 && c <= 0x7E && *
state ==
KSC5601) {
4271 unsigned char c2 = *
p++;
4274 if (c2 < 0x21 || c2 > 0x7E) {
4280 if (c != 0x22 || c2 <= 0x65) {
4281 w = (c - 1)*190 + c2 - 0x41 + 0x80;
4285 }
else if (c != 0x49 && c <= 0x7D) {
4286 w = (c - 0x47)*94 + c2 - 0x21;
4306#define EMITTED_ESC_SEQUENCE 0x10
4310 unsigned char *
out, *limit;
4319 out = mb_convert_buf_add4(
out, 0x1B,
'$',
')',
'C');
4345 if (((
s >> 8) & 0xFF) < 0xA1 || (
s & 0xFF) < 0xA1) {
4351 if ((
s >= 0x80 &&
s < 0x2121) || (
s > 0x8080)) {
4354 }
else if (
s < 0x80) {
4357 out = mb_convert_buf_add(
out, 0xF);
4360 out = mb_convert_buf_add(
out,
s);
4364 out = mb_convert_buf_add(
out, 0xE);
4369 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
4375 out = mb_convert_buf_add(
out, 0xF);
4386 mbfl_filt_conv_jis_wchar,
4387 mbfl_filt_conv_jis_wchar_flush,
4396 mbfl_filt_conv_wchar_jis,
4397 mbfl_filt_conv_any_jis_flush,
4410 mb_iso2022jp_to_wchar,
4421 mbfl_filt_conv_jis_wchar,
4422 mbfl_filt_conv_jis_wchar_flush,
4431 mbfl_filt_conv_wchar_2022jp,
4432 mbfl_filt_conv_any_jis_flush,
4445 mb_iso2022jp_to_wchar,
4446 mb_wchar_to_iso2022jp,
4451static const char *mbfl_encoding_2022jp_kddi_aliases[] = {
"ISO-2022-JP-KDDI",
NULL};
4458 mbfl_filt_conv_2022jp_mobile_wchar,
4459 mbfl_filt_conv_2022jp_mobile_wchar_flush,
4468 mbfl_filt_conv_wchar_2022jp_mobile,
4469 mbfl_filt_conv_wchar_2022jp_mobile_flush,
4475 "ISO-2022-JP-MOBILE#KDDI",
4477 mbfl_encoding_2022jp_kddi_aliases,
4480 &vtbl_2022jp_kddi_wchar,
4481 &vtbl_wchar_2022jp_kddi,
4482 mb_iso2022jp_kddi_to_wchar,
4483 mb_wchar_to_iso2022jp_kddi,
4493 mbfl_filt_conv_jis2004_wchar,
4494 mbfl_filt_conv_jis2004_wchar_flush,
4503 mbfl_filt_conv_wchar_jis2004,
4504 mbfl_filt_conv_wchar_jis2004_flush,
4515 &vtbl_2022jp_2004_wchar,
4516 &vtbl_wchar_2022jp_2004,
4517 mb_iso2022jp2004_to_wchar,
4518 mb_wchar_to_iso2022jp2004,
4532static const char *cp50220_aliases[] = {
"cp50220raw",
"cp50220-raw",
"JIS-ms",
NULL};
4539 mbfl_filt_conv_cp5022x_wchar,
4540 mbfl_filt_conv_cp5022x_wchar_flush,
4549 mbfl_filt_conv_wchar_cp50220,
4550 mbfl_filt_conv_wchar_cp50220_flush,
4559 mbfl_filt_conv_cp5022x_wchar,
4560 mbfl_filt_conv_cp5022x_wchar_flush,
4569 mbfl_filt_conv_wchar_cp50221,
4570 mbfl_filt_conv_any_jis_flush,
4579 mbfl_filt_conv_cp5022x_wchar,
4580 mbfl_filt_conv_cp5022x_wchar_flush,
4589 mbfl_filt_conv_wchar_cp50222,
4590 mbfl_filt_conv_wchar_cp50222_flush,
4601 &vtbl_cp50220_wchar,
4602 &vtbl_wchar_cp50220,
4603 mb_cp5022x_to_wchar,
4604 mb_wchar_to_cp50220,
4616 &vtbl_cp50221_wchar,
4617 &vtbl_wchar_cp50221,
4618 mb_cp5022x_to_wchar,
4619 mb_wchar_to_cp50221,
4631 &vtbl_cp50222_wchar,
4632 &vtbl_wchar_cp50222,
4633 mb_cp5022x_to_wchar,
4634 mb_wchar_to_cp50222,
4639static const char *mbfl_encoding_2022jpms_aliases[] = {
"ISO2022JPMS",
NULL};
4646 mbfl_filt_conv_2022jpms_wchar,
4647 mbfl_filt_conv_2022jpms_wchar_flush,
4656 mbfl_filt_conv_wchar_2022jpms,
4657 mbfl_filt_conv_any_2022jpms_flush,
4665 mbfl_encoding_2022jpms_aliases,
4668 &vtbl_2022jpms_wchar,
4669 &vtbl_wchar_2022jpms,
4670 mb_iso2022jpms_to_wchar,
4671 mb_wchar_to_iso2022jpms,
4689 mbfl_filt_conv_wchar_2022kr,
4690 mbfl_filt_conv_any_2022kr_flush,
4699 mbfl_filt_conv_2022kr_wchar,
4700 mbfl_filt_conv_2022kr_wchar_flush,
4713 mb_iso2022kr_to_wchar,
4714 mb_wchar_to_iso2022kr,
4727 switch (filter->
status) {
4729 if (c >= 0 && c < 0x80) {
4731 }
else if (c > 0xA0 && c < 0xE0) {
4733 }
else if (c > 0x80 && c < 0xF0 && c != 0xA0) {
4743 int c1 = filter->
cache;
4744 if (c >= 0x40 && c <= 0xFC && c != 0x7F) {
4746 w = (s1 - 0x21)*94 + s2 - 0x21;
4779 int c1, c2, s1 = 0, s2;
4793 }
else if (c == 0xAF || c == 0x203E) {
4795 }
else if (c == 0xFF3C) {
4797 }
else if (c == 0x2225) {
4799 }
else if (c == 0xFF0D) {
4801 }
else if (c == 0xFFE0) {
4803 }
else if (c == 0xFFE1) {
4805 }
else if (c == 0xFFE2) {
4807 }
else if (c == 0) {
4812 }
else if (s1 >= 0x8080) {
4820 c1 = (s1 >> 8) & 0xFF;
4833static const unsigned short sjis_decode_tbl1[] = {
4834 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
4837static const unsigned short sjis_decode_tbl2[] = {
4838 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 0xFFFF, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 0xFFFF, 0xFFFF, 0xFFFF
4841static size_t mb_sjis_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
4843 unsigned char *
p = *in, *e =
p + *in_len;
4844 uint32_t *
out =
buf, *limit =
buf + bufsize;
4848 while (
p < e &&
out < limit) {
4849 unsigned char c = *
p++;
4853 }
else if (c >= 0xA1 && c <= 0xDF) {
4854 *
out++ = 0xFEC0 + c;
4857 unsigned char c2 = *
p++;
4864 uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
4871 if (c == 0x80 || c == 0xA0 || c > 0xEF) {
4880 if (
p == e &&
out < limit) {
4881 unsigned char c = *
p++;
4884 }
else if (c >= 0xA1 && c <= 0xDF) {
4885 *
out++ = 0xFEC0 + c;
4891 *in_len = e -
p + 1;
4898 unsigned char *
out, *limit;
4919 }
else if (w == 0xAF || w == 0x203E) {
4921 }
else if (w == 0xFF3C) {
4923 }
else if (w == 0x2225) {
4925 }
else if (w == 0xFF0D) {
4927 }
else if (w == 0xFFE0) {
4929 }
else if (w == 0xFFE1) {
4931 }
else if (w == 0xFFE2) {
4933 }
else if (w != 0) {
4938 }
else if (
s >= 0x8080) {
4946 out = mb_convert_buf_add(
out,
s);
4949 unsigned int c1 = (
s >> 8) & 0xFF, c2 =
s & 0xFF, s2;
4951 out = mb_convert_buf_add2(
out,
s, s2);
4961 int c1,
s, s1, s2, w;
4963 switch (filter->
status) {
4965 if (c >= 0 && c < 0x80 && c != 0x5c) {
4967 }
else if (c > 0xa0 && c < 0xe0) {
4969 }
else if (c > 0x80 && c <= 0xed && c != 0xa0) {
4972 }
else if (c == 0x5c) {
4974 }
else if (c == 0x80) {
4976 }
else if (c == 0xa0) {
4978 }
else if (c == 0xfd) {
4980 }
else if (c == 0xfe) {
4982 }
else if (c == 0xff) {
4993 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
4996 s = (s1 - 0x21)*94 + s2 - 0x21;
5000 }
else if (
s == 0x1f) {
5002 }
else if (
s == 0x20) {
5004 }
else if (
s == 0x21) {
5006 }
else if (
s == 0x3c) {
5008 }
else if (
s == 0x50) {
5010 }
else if (
s == 0x51) {
5012 }
else if (
s == 0x89) {
5019 for (i=0; i<7; i++) {
5020 if (
s >= code_tbl[i][0] &&
s <= code_tbl[i][1]) {
5021 w =
s - code_tbl[i][0] + code_tbl[i][2];
5029 for (i=0; i<code_tbl_m_len; i++) {
5030 if (
s == code_tbl_m[i][0]) {
5031 if (code_tbl_m[i][1] == 0xf860) {
5033 }
else if (code_tbl_m[i][1] == 0xf861) {
5038 for (
j=1;
j<
n-1;
j++) {
5041 w = code_tbl_m[i][
n-1];
5048 for (i=0; i<8; i++) {
5049 if (
s >= code_ofst_tbl[i][0] &&
s <= code_ofst_tbl[i][1]) {
5050 w = code_map[i][
s - code_ofst_tbl[i][0]];
5056 if (
s >= 0x043e &&
s <= 0x0441) {
5058 }
else if (
s == 0x03b1 ||
s == 0x03b7) {
5060 }
else if (
s == 0x04b8 ||
s == 0x04b9 ||
s == 0x04c4) {
5062 }
else if (
s == 0x1ed9 ||
s == 0x1eda ||
s == 0x1ee8 ||
s == 0x1ef3 ||
5063 (
s >= 0x1ef5 &&
s <= 0x1efb) ||
s == 0x1f05 ||
s == 0x1f06 ||
5064 s == 0x1f18 || (
s >= 0x1ff2 &&
s <= 0x20a5)) {
5097 int i, c1, c2, s1 = 0, s2 = 0,
mode;
5104 switch (filter->
status) {
5110 for (i = 0; i < 4; i++) {
5111 if (c1 == s_form_tbl[i+34+3+3]) {
5112 s1 = s_form_sjis_tbl[i+34+3+3];
5119 }
else if (c == 0x20dd) {
5120 for (i = 0; i < 3; i++) {
5121 if (c1 == s_form_tbl[i+34+3]) {
5122 s1 = s_form_sjis_tbl[i+34+3];
5129 }
else if (c == 0xf87f) {
5130 for (i = 0; i < 3; i++) {
5131 if (c1 == s_form_tbl[i+34]) {
5132 s1 = s_form_sjis_tbl[i+34];
5140 }
else if (c == 0xf87e) {
5141 for (i = 0; i < 34; i++) {
5142 if (c1 == s_form_tbl[i]) {
5143 s1 = s_form_sjis_tbl[i];
5157 for (i = 0; i < s_form_tbl_len; i++) {
5158 if (c1 == s_form_tbl[i]) {
5159 s1 = s_form_sjis_fallback_tbl[i];
5176 if (s2 <= 0 || s1 == -1) {
5187 }
else if (c == 0xa9) {
5194 }
else if (c == 0x2014) {
5196 }
else if (c == 0x2116) {
5206 for (i = 0; i < s_form_tbl_len; i++) {
5207 if (c == s_form_tbl[i]) {
5214 if (c == 0xf860 || c == 0xf861 || c == 0xf862) {
5225 }
else if (c == 0xa5) {
5229 }
else if (c == 0xff3c) {
5235 for (i=0; i<wchar2sjis_mac_r_tbl_len; i++) {
5236 if (c >= wchar2sjis_mac_r_tbl[i][0] && c <= wchar2sjis_mac_r_tbl[i][1]) {
5237 s1 = c - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2];
5243 for (i=0; i<wchar2sjis_mac_r_map_len; i++) {
5244 if (c >= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) {
5245 s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]];
5252 for (i=0; i<wchar2sjis_mac_wchar_tbl_len ; i++) {
5253 if ( c == wchar2sjis_mac_wchar_tbl[i][0]) {
5254 s1 = wchar2sjis_mac_wchar_tbl[i][1] & 0xffff;
5262 c2 = s1-94*(c1-0x21)+0x21;
5263 s1 = (c1 << 8) | c2;
5268 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {
5274 }
else if (s1 <= 0) {
5283 c1 = (s1 >> 8) & 0xff;
5299 for (i = 0; i < 5; i++) {
5300 if (c == code_tbl_m[i][2]) {
5301 filter->
cache = c | 0x10000;
5306 }
else if (c1 == 0xf861) {
5307 for (i = 0; i < 3; i++) {
5308 if (c == code_tbl_m[i+5][2]) {
5309 filter->
cache = c | 0x20000;
5314 }
else if (c1 == 0xf862) {
5315 for (i = 0; i < 4; i++) {
5316 if (c == code_tbl_m[i+5+3][2]) {
5317 filter->
cache = c | 0x40000;
5324 if (filter->
status == 0) {
5327 return mbfl_filt_conv_wchar_sjis_mac(c, filter);
5333 c1 = filter->
cache & 0xffff;
5339 for (i = 0; i < 5; i++) {
5340 if (c1 == code_tbl_m[i][2] && c == code_tbl_m[i][3]) {
5341 s1 = code_tbl_m[i][0];
5348 c2 = s1-94*(c1-0x21)+0x21;
5357 }
else if (
mode == 0x2) {
5358 for (i = 0; i < 3; i++) {
5359 if (c1 == code_tbl_m[i+5][2] && c == code_tbl_m[i+5][3]) {
5360 filter->
cache = c | 0x20000;
5365 }
else if (
mode == 0x4) {
5366 for (i = 0; i < 4; i++) {
5367 if (c1 == code_tbl_m[i+8][2] && c == code_tbl_m[i+8][3]) {
5368 filter->
cache = c | 0x40000;
5378 c1 = filter->
cache & 0xffff;
5385 for (i = 0; i < 3; i++) {
5386 if (c1 == code_tbl_m[i+5][3] && c == code_tbl_m[i+5][4]) {
5387 s1 = code_tbl_m[i+5][0];
5394 c2 = s1-94*(c1-0x21)+0x21;
5400 for (i = 0; i < 3; i++) {
5401 if (c1 == code_tbl_m[i+5][3]) {
5409 }
else if (
mode == 0x4) {
5410 for (i = 0; i < 4; i++) {
5411 if (c1 == code_tbl_m[i+8][3] && c == code_tbl_m[i+8][4]) {
5412 filter->
cache = c | 0x40000;
5422 c1 = filter->
cache & 0xffff;
5428 for (i = 0; i < 4; i++) {
5429 if (c1 == code_tbl_m[i+8][4] && c == code_tbl_m[i+8][5]) {
5430 s1 = code_tbl_m[i+8][0];
5437 c2 = s1-94*(c1-0x21)+0x21;
5443 for (i = 0; i < 4; i++) {
5444 if (c1 == code_tbl_m[i+8][4]) {
5467 for (i=0;i<s_form_tbl_len;i++) {
5468 if (c1 == s_form_tbl[i]) {
5469 s1 = s_form_sjis_fallback_tbl[i];
5488static size_t mb_sjismac_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
5493 unsigned char *
p = *in, *e =
p + *in_len;
5494 uint32_t *
out =
buf, *limit =
buf + bufsize;
5496 while (
p < e &&
out < limit) {
5497 unsigned char c = *
p++;
5499 if (c <= 0x80 || c == 0xA0) {
5502 }
else if (c == 0x80) {
5507 }
else if (c >= 0xA1 && c <= 0xDF) {
5508 *
out++ = 0xFEC0 + c;
5509 }
else if (c <= 0xED) {
5514 unsigned char c2 = *
p++;
5515 uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2];
5521 }
else if (w == 0x1F) {
5524 }
else if (w == 0x20) {
5527 }
else if (w == 0x21) {
5530 }
else if (w == 0x3C) {
5533 }
else if (w == 0x50) {
5536 }
else if (w == 0x51) {
5539 }
else if (w == 0x89) {
5544 if (w >= 0x2F0 && w <= 0x3A3) {
5545 for (
int i = 0; i < 7; i++) {
5546 if (w >= code_tbl[i][0] && w <= code_tbl[i][1]) {
5547 *
out++ = w - code_tbl[i][0] + code_tbl[i][2];
5548 goto next_iteration;
5553 if (w >= 0x340 && w <= 0x523) {
5554 for (
int i = 0; i < code_tbl_m_len; i++) {
5555 if (w == code_tbl_m[i][0]) {
5557 if (code_tbl_m[i][1] == 0xF860) {
5559 }
else if (code_tbl_m[i][1] == 0xF861) {
5562 if ((limit -
out) <
n) {
5566 for (
int j = 1;
j <=
n;
j++) {
5567 *
out++ = code_tbl_m[i][
j];
5569 goto next_iteration;
5574 if (w >= 0x3AC && w <= 0x20A5) {
5575 for (
int i = 0; i < 8; i++) {
5576 if (w >= code_ofst_tbl[i][0] && w <= code_ofst_tbl[i][1]) {
5577 uint32_t w2 = code_map[i][w - code_ofst_tbl[i][0]];
5580 goto next_iteration;
5582 if ((limit -
out) < 2) {
5587 if (w >= 0x43E && w <= 0x441) {
5589 }
else if (w == 0x3B1 || w == 0x3B7) {
5591 }
else if (w == 0x4B8 || w == 0x4B9 || w == 0x4C4) {
5593 }
else if (w == 0x1ED9 || w == 0x1EDA || w == 0x1EE8 || w == 0x1EF3 || (w >= 0x1EF5 && w <= 0x1EFB) || w == 0x1F05 || w == 0x1F06 || w == 0x1F18 || (w >= 0x1FF2 && w <= 0x20A5)) {
5596 goto next_iteration;
5610 }
else if (c == 0xFD) {
5612 }
else if (c == 0xFE) {
5614 }
else if (c == 0xFF) {
5615 if ((limit -
out) < 2) {
5633static bool process_s_form(uint32_t w, uint32_t w2,
unsigned int *
s)
5636 for (
int i = 0; i < 4; i++) {
5637 if (w == s_form_tbl[i+34+3+3]) {
5638 *
s = s_form_sjis_tbl[i+34+3+3];
5642 }
else if (w2 == 0x20DD) {
5643 for (
int i = 0; i < 3; i++) {
5644 if (w == s_form_tbl[i+34+3]) {
5645 *
s = s_form_sjis_tbl[i+34+3];
5649 }
else if (w2 == 0xF87F) {
5650 for (
int i = 0; i < 3; i++) {
5651 if (w == s_form_tbl[i+34]) {
5652 *
s = s_form_sjis_tbl[i+34];
5656 }
else if (w2 == 0xF87E) {
5657 for (
int i = 0; i < 34; i++) {
5658 if (w == s_form_tbl[i]) {
5659 *
s = s_form_sjis_tbl[i];
5669static int transcoding_hint_cp_width[3] = { 3, 4, 5 };
5673 unsigned char *
out, *limit;
5680 w =
buf->state & 0xFFFF;
5681 if (
buf->state & 0xFF000000L) {
5682 goto resume_transcoding_hint;
5685 goto process_codepoint;
5697 }
else if (w == 0xA9) {
5705 }
else if (w == 0x2014) {
5707 }
else if (w == 0x2116) {
5719 for (
int i = 0; i < s_form_tbl_len; i++) {
5720 if (w == s_form_tbl[i]) {
5723 s = s_form_sjis_fallback_tbl[i];
5726 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
5736 uint32_t w2 = *in++;
5739 if (!process_s_form(w, w2, &
s)) {
5742 for (
int i = 0; i < s_form_tbl_len; i++) {
5743 if (w == s_form_tbl[i]) {
5744 s = s_form_sjis_fallback_tbl[i];
5751 out = mb_convert_buf_add(
out,
s);
5754 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
5757 goto next_iteration;
5761 if (w == 0xF860 || w == 0xF861 || w == 0xF862) {
5773 uint32_t w2 = *in++;
5776 for (
int i = 0; i < code_tbl_m_len; i++) {
5777 if (w == code_tbl_m[i][1] && w2 == code_tbl_m[i][2]) {
5782resume_transcoding_hint:
5783 i =
buf->state >> 24;
5784 index = (
buf->state >> 16) & 0xFF;
5788 int expected = transcoding_hint_cp_width[w - 0xF860];
5790 while (index <= expected) {
5793 for (
int j = 1;
j < index;
j++) {
5797 buf->state = (i << 24) | (index << 16) | (w & 0xFFFF);
5806 if (w2 != code_tbl_m[i][index]) {
5808 for (
int j = 1;
j < index;
j++) {
5813 goto next_iteration;
5820 s = code_tbl_m[i][0];
5821 unsigned int c1 = (
s / 94) + 0x21, c2 = (
s % 94) + 0x21, s1, s2;
5824 out = mb_convert_buf_add2(
out, s1, s2);
5825 goto next_iteration;
5840 }
else if (w == 0xA5) {
5844 }
else if (w == 0xFF3C) {
5847 for (
int i = 0; i < wchar2sjis_mac_r_tbl_len; i++) {
5848 if (w >= wchar2sjis_mac_r_tbl[i][0] && w <= wchar2sjis_mac_r_tbl[i][1]) {
5849 s = w - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2];
5850 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
5851 goto found_kuten_code;
5855 for (
int i = 0; i < wchar2sjis_mac_r_map_len; i++) {
5856 if (w >= wchar2sjis_mac_r_map[i][0] && w <= wchar2sjis_mac_r_map[i][1]) {
5857 s = wchar2sjis_mac_code_map[i][w - wchar2sjis_mac_r_map[i][0]];
5859 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
5860 goto found_kuten_code;
5865 for (
int i = 0; i < wchar2sjis_mac_wchar_tbl_len; i++) {
5866 if (w == wchar2sjis_mac_wchar_tbl[i][0]) {
5867 s = wchar2sjis_mac_wchar_tbl[i][1];
5868 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
5869 goto found_kuten_code;
5876 if ((!
s && w) ||
s >= 0x8080) {
5879 }
else if (
s <= 0xFF) {
5880 out = mb_convert_buf_add(
out,
s);
5882 unsigned int c1 = (
s >> 8) & 0xFF, c2 =
s & 0xFF, s1, s2;
5885 out = mb_convert_buf_add2(
out, s1, s2);
5900 if (
s >= mb_tbl_code2uni_docomo1_min &&
s <= mb_tbl_code2uni_docomo1_max) {
5902 EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[
s - mb_tbl_code2uni_docomo1_min]));
5905 return convert_emoji_cp(mb_tbl_code2uni_docomo1[
s - mb_tbl_code2uni_docomo1_min]);
5913 if (
s >= mb_tbl_code2uni_sb1_min &&
s <= mb_tbl_code2uni_sb1_max) {
5914 if (
s == 0x2817 || (
s >= 0x2823 &&
s <= 0x282C)) {
5918 return convert_emoji_cp(mb_tbl_code2uni_sb1[
s - mb_tbl_code2uni_sb1_min]);
5920 }
else if (
s >= mb_tbl_code2uni_sb2_min &&
s <= mb_tbl_code2uni_sb2_max) {
5922 return convert_emoji_cp(mb_tbl_code2uni_sb2[
s - mb_tbl_code2uni_sb2_min]);
5923 }
else if (
s >= mb_tbl_code2uni_sb3_min &&
s <= mb_tbl_code2uni_sb3_max) {
5924 if (
s >= 0x2B02 &&
s <= 0x2B0B) {
5928 return convert_emoji_cp(mb_tbl_code2uni_sb3[
s - mb_tbl_code2uni_sb3_min]);
5942 if (filter->
status == 1) {
5943 int c1 = filter->
cache;
5948 }
else if (c1 ==
'0') {
5951 *s1 = 0x2966 + (c1 -
'1');
5963 if (c ==
'#' || (c >=
'0' && c <=
'9')) {
5972 }
else if (c == 0x00AE) {
5975 }
else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) {
5976 int i =
mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
5978 *s1 = mb_tbl_uni_docomo2code2_value[i];
5981 }
else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) {
5982 int i =
mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
5984 *s1 = mb_tbl_uni_docomo2code3_value[i];
5987 }
else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) {
5988 int i =
mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
5990 *s1 = mb_tbl_uni_docomo2code5_val[i];
5999 if (filter->
status == 1) {
6000 int c1 = filter->
cache;
6005 }
else if (c1 ==
'0') {
6008 *s1 = 0x27a6 + (c1 -
'1');
6014 }
else if (filter->
status == 2) {
6015 int c1 = filter->
cache;
6018 for (
int i = 0; i < 10; i++) {
6019 if (c1 ==
NFLAGS(nflags_s[i][0]) && c ==
NFLAGS(nflags_s[i][1])) {
6020 *s1 = nflags_code_kddi[i];
6031 if (c ==
'#' || (c >=
'0' && c <=
'9')) {
6044 }
else if (c == 0xAE) {
6047 }
else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
6048 int i =
mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
6050 *s1 = mb_tbl_uni_kddi2code2_value[i];
6053 }
else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
6054 int i =
mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
6056 *s1 = mb_tbl_uni_kddi2code3_value[i];
6059 }
else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
6060 int i =
mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
6062 *s1 = mb_tbl_uni_kddi2code5_val[i];
6071 if (filter->
status == 1) {
6072 int c1 = filter->
cache;
6077 }
else if (c1 ==
'0') {
6080 *s1 = 0x2823 + (c1 -
'1');
6086 }
else if (filter->
status == 2) {
6087 int c1 = filter->
cache;
6090 for (
int i = 0; i < 10; i++) {
6091 if (c1 ==
NFLAGS(nflags_s[i][0]) && c ==
NFLAGS(nflags_s[i][1])) {
6092 *s1 = nflags_code_sb[i];
6103 if (c ==
'#' || (c >=
'0' && c <=
'9')) {
6116 }
else if (c == 0xAE) {
6119 }
else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) {
6120 int i =
mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
6122 *s1 = mb_tbl_uni_sb2code2_value[i];
6125 }
else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) {
6126 int i =
mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
6128 *s1 = mb_tbl_uni_sb2code3_value[i];
6131 }
else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) {
6132 int i =
mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
6134 *s1 = mb_tbl_uni_sb2code5_val[i];
6143 int c1,
s, s1, s2, w, snd = 0;
6145 switch (filter->
status) {
6147 if (c >= 0 && c < 0x80) {
6155 }
else if (c > 0xA0 && c < 0xE0) {
6157 }
else if (c > 0x80 && c < 0xFD && c != 0xA0) {
6168 if (c >= 0x40 && c <= 0xFC && c != 0x7F) {
6171 s = ((s1 - 0x21) * 94) + s2 - 0x21;
6175 }
else if (
s == 32) {
6177 }
else if (
s == 33) {
6179 }
else if (
s == 60) {
6181 }
else if (
s == 80) {
6183 }
else if (
s == 81) {
6185 }
else if (
s == 137) {
6219 }
else if (
s >= (94*94) &&
s < (114*94)) {
6220 w =
s - (94*94) + 0xe000;
6246 if ((c >=
'E' && c <=
'G') || (c >=
'O' && c <=
'Q')) {
6262 if (c1 ==
'G' && c >= 0x21 && c <= 0x7a) {
6263 s1 = (0x91 - 0x21) * 94;
6264 }
else if (c1 ==
'E' && c >= 0x21 && c <= 0x7A) {
6265 s1 = (0x8D - 0x21) * 94;
6266 }
else if (c1 ==
'F' && c >= 0x21 && c <= 0x7A) {
6267 s1 = (0x8E - 0x21) * 94;
6268 }
else if (c1 ==
'O' && c >= 0x21 && c <= 0x6D) {
6269 s1 = (0x92 - 0x21) * 94;
6270 }
else if (c1 ==
'P' && c >= 0x21 && c <= 0x6C) {
6271 s1 = (0x95 - 0x21) * 94;
6272 }
else if (c1 ==
'Q' && c >= 0x21 && c <= 0x5E) {
6273 s1 = (0x96 - 0x21) * 94;
6298 int c1, c2, s1 = 0, s2 = 0;
6308 }
else if (c >= 0xE000 && c < (0xE000 + 20*94)) {
6311 c1 = (s1 / 94) + 0x7F;
6312 c2 = (s1 % 94) + 0x21;
6313 s1 = (c1 << 8) | c2;
6320 }
else if (c == 0xFF3c) {
6322 }
else if (c == 0x2225) {
6324 }
else if (c == 0xFF0D) {
6326 }
else if (c == 0xFFE0) {
6328 }
else if (c == 0xFFE1) {
6330 }
else if (c == 0xFFE2) {
6335 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {
6341 s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21;
6350 s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21;
6364 s1 = (((s1 / 94) + 0x21) << 8) | ((s1 % 94) + 0x21);
6375 c1 = (s1 >> 8) & 0xff;
6390 int c1 = filter->
cache;
6391 if (filter->
status == 1 && (c1 ==
'#' || (c1 >=
'0' && c1 <=
'9'))) {
6394 }
else if (filter->
status == 2) {
6407static const unsigned short sjis_mobile_decode_tbl1[] = {
6408 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 8836, 9024, 9212, 9400, 9588, 9776, 9964, 10152, 10340, 10528, 10716, 10904, 11092, 0xFFFF, 0xFFFF, 0xFFFF
6411static size_t mb_sjis_docomo_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
6413 unsigned char *
p = *in, *e =
p + *in_len;
6416 uint32_t *
out =
buf, *limit =
buf + bufsize - 1;
6418 while (
p < e &&
out < limit) {
6419 unsigned char c = *
p++;
6423 }
else if (c >= 0xA1 && c <= 0xDF) {
6425 *
out++ = 0xFEC0 + c;
6432 unsigned char c2 = *
p++;
6433 uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
6439 }
else if (w == 32) {
6442 }
else if (w == 33) {
6445 }
else if (w == 60) {
6448 }
else if (w == 80) {
6451 }
else if (w == 81) {
6454 }
else if (w == 137) {
6460 if (w >= mb_tbl_code2uni_docomo1_min && w <= mb_tbl_code2uni_docomo1_max) {
6474 }
else if (w >= (94*94) && w < (114*94)) {
6475 w = w - (94*94) + 0xE000;
6477 if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
6495 unsigned char *
out, *limit;
6506 goto reprocess_wchar;
6522 }
else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
6525 s = (((
s / 94) + 0x7F) << 8) | ((
s % 94) + 0x21);
6532 }
else if (w == 0xFF3C) {
6534 }
else if (w == 0x2225) {
6536 }
else if (w == 0xFF0D) {
6538 }
else if (w == 0xFFE0) {
6540 }
else if (w == 0xFFE1) {
6542 }
else if (w == 0xFFE2) {
6547 if (w && (!
s ||
s >= 0x8080)) {
6552 s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
6559 s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
6572 if (w ==
'#' || (w >=
'0' && w <=
'9')) {
6583 uint32_t w2 = *in++;
len--;
6587 }
else if (w ==
'0') {
6590 s = 0x2966 + (w -
'1');
6592 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6596 }
else if (w == 0xA9) {
6597 s = (((0x29B5 / 94) + 0x21) << 8) | ((0x29B5 % 94) + 0x21);
6598 }
else if (w == 0xAE) {
6599 s = (((0x29BA / 94) + 0x21) << 8) | ((0x29BA % 94) + 0x21);
6600 }
else if (w >= mb_tbl_uni_docomo2code2_min && w <= mb_tbl_uni_docomo2code2_max) {
6601 int i =
mbfl_bisec_srch2(w, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
6603 s = mb_tbl_uni_docomo2code2_value[i];
6604 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6606 }
else if (w >= mb_tbl_uni_docomo2code3_min && w <= mb_tbl_uni_docomo2code3_max) {
6607 int i =
mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
6609 s = mb_tbl_uni_docomo2code3_value[i];
6610 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6612 }
else if (w >= mb_tbl_uni_docomo2code5_min && w <= mb_tbl_uni_docomo2code5_max) {
6613 int i =
mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
6615 s = mb_tbl_uni_docomo2code5_val[i];
6616 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6624 }
else if (
s <= 0xFF) {
6625 out = mb_convert_buf_add(
out,
s);
6627 unsigned int c1 = (
s >> 8) & 0xFF, c2 =
s & 0xFF, s1, s2;
6630 out = mb_convert_buf_add2(
out, s1, s2);
6637static size_t mb_sjis_kddi_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
6639 unsigned char *
p = *in, *e =
p + *in_len;
6640 uint32_t *
out =
buf, *limit =
buf + bufsize - 1;
6642 while (
p < e &&
out < limit) {
6643 unsigned char c = *
p++;
6647 }
else if (c >= 0xA1 && c <= 0xDF) {
6649 *
out++ = 0xFEC0 + c;
6656 unsigned char c2 = *
p++;
6657 uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
6663 }
else if (w == 32) {
6666 }
else if (w == 33) {
6669 }
else if (w == 60) {
6672 }
else if (w == 80) {
6675 }
else if (w == 81) {
6678 }
else if (w == 137) {
6684 if (w >= mb_tbl_code2uni_kddi1_min && w <= mb_tbl_code2uni_kddi2_max) {
6688 w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
6689 if (w >= (94*94) && w < (114*94)) {
6690 w = w - (94*94) + 0xE000;
6703 }
else if (w >= (94*94) && w < (114*94)) {
6704 w = w - (94*94) + 0xE000;
6706 if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
6724 unsigned char *
out, *limit;
6734 goto reprocess_wchar;
6750 }
else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
6753 s = (((
s / 94) + 0x7F) << 8) | ((
s % 94) + 0x21);
6760 }
else if (w == 0xFF3c) {
6762 }
else if (w == 0x2225) {
6764 }
else if (w == 0xFF0D) {
6766 }
else if (w == 0xFFE0) {
6768 }
else if (w == 0xFFE1) {
6770 }
else if (w == 0xFFE2) {
6775 if (w && (!
s ||
s >= 0x8080)) {
6780 s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
6787 s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
6794 if (w ==
'#' || (w >=
'0' && w <=
'9')) {
6805 uint32_t w2 = *in++;
len--;
6809 }
else if (w ==
'0') {
6812 s = 0x27A6 + (w -
'1');
6814 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6829 uint32_t w2 = *in++;
len--;
6831 for (
int i = 0; i < 10; i++) {
6832 if (w ==
NFLAGS(nflags_s[i][0]) && w2 ==
NFLAGS(nflags_s[i][1])) {
6833 s = nflags_code_kddi[i];
6834 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6843 }
else if (w == 0xA9) {
6844 s = (((0x27DC / 94) + 0x21) << 8) | ((0x27DC % 94) + 0x21);
6845 }
else if (w == 0xAE) {
6846 s = (((0x27DD / 94) + 0x21) << 8) | ((0x27DD % 94) + 0x21);
6847 }
else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) {
6848 int i =
mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
6850 s = mb_tbl_uni_kddi2code2_value[i];
6851 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6853 }
else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) {
6854 int i =
mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
6856 s = mb_tbl_uni_kddi2code3_value[i];
6857 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6859 }
else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) {
6860 int i =
mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
6862 s = mb_tbl_uni_kddi2code5_val[i];
6863 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
6871 }
else if (
s <= 0xFF) {
6872 out = mb_convert_buf_add(
out,
s);
6874 unsigned int c1 = (
s >> 8) & 0xFF, c2 =
s & 0xFF, s1, s2;
6877 out = mb_convert_buf_add2(
out, s1, s2);
6884static size_t mb_sjis_sb_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
6886 unsigned char *
p = *in, *e =
p + *in_len;
6887 uint32_t *
out =
buf, *limit =
buf + bufsize - 1;
6890 goto softbank_emoji_escapes;
6893 while (
p < e &&
out < limit) {
6894 unsigned char c = *
p++;
6898 if (
p == e || *
p++ !=
'$' ||
p == e) {
6902 unsigned char c2 = *
p++;
6903 if ((c2 <
'E' || c2 >
'G') && (c2 <
'O' || c2 >
'Q')) {
6911softbank_emoji_escapes:
6912 while (
p < e &&
out < limit) {
6919 if (*
state ==
'G' && c >= 0x21 && c <= 0x7A) {
6920 s = (0x91 - 0x21) * 94;
6921 }
else if (*
state ==
'E' && c >= 0x21 && c <= 0x7A) {
6922 s = (0x8D - 0x21) * 94;
6923 }
else if (*
state ==
'F' && c >= 0x21 && c <= 0x7A) {
6924 s = (0x8E - 0x21) * 94;
6925 }
else if (*
state ==
'O' && c >= 0x21 && c <= 0x6D) {
6926 s = (0x92 - 0x21) * 94;
6927 }
else if (*
state ==
'P' && c >= 0x21 && c <= 0x6C) {
6928 s = (0x95 - 0x21) * 94;
6929 }
else if (*
state ==
'Q' && c >= 0x21 && c <= 0x5E) {
6930 s = (0x96 - 0x21) * 94;
6950 }
else if (c <= 0x7F) {
6952 }
else if (c >= 0xA1 && c <= 0xDF) {
6954 *
out++ = 0xFEC0 + c;
6961 unsigned char c2 = *
p++;
6962 uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
6968 }
else if (w == 32) {
6971 }
else if (w == 33) {
6974 }
else if (w == 60) {
6977 }
else if (w == 80) {
6980 }
else if (w == 81) {
6983 }
else if (w == 137) {
6989 if (w >= mb_tbl_code2uni_sb1_min && w <= mb_tbl_code2uni_sb3_max) {
6993 w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
6996 }
else if (w >= (94*94) && w < (114*94)) {
6997 w = w - (94*94) + 0xE000;
7010 }
else if (w >= (94*94) && w < (114*94)) {
7011 w = w - (94*94) + 0xE000;
7013 if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
7031 unsigned char *
out, *limit;
7041 goto reprocess_wchar;
7057 }
else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
7060 s = (((
s / 94) + 0x7F) << 8) | ((
s % 94) + 0x21);
7067 }
else if (w == 0xFF3C) {
7069 }
else if (w == 0x2225) {
7071 }
else if (w == 0xFF0D) {
7073 }
else if (w == 0xFFE0) {
7075 }
else if (w == 0xFFE1) {
7077 }
else if (w == 0xFFE2) {
7082 if (w && (!
s ||
s >= 0x8080)) {
7087 s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21;
7094 s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21;
7101 if (w ==
'#' || (w >=
'0' && w <=
'9')) {
7112 uint32_t w2 = *in++;
len--;
7116 }
else if (w ==
'0') {
7119 s = 0x2823 + (w -
'1');
7121 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
7136 uint32_t w2 = *in++;
len--;
7138 for (
int i = 0; i < 10; i++) {
7139 if (w ==
NFLAGS(nflags_s[i][0]) && w2 ==
NFLAGS(nflags_s[i][1])) {
7140 s = nflags_code_sb[i];
7141 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
7150 }
else if (w == 0xA9) {
7151 s = (((0x2855 / 94) + 0x21) << 8) | ((0x2855 % 94) + 0x21);
7152 }
else if (w == 0xAE) {
7153 s = (((0x2856 / 94) + 0x21) << 8) | ((0x2856 % 94) + 0x21);
7154 }
else if (w >= mb_tbl_uni_sb2code2_min && w <= mb_tbl_uni_sb2code2_max) {
7155 int i =
mbfl_bisec_srch2(w, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
7157 s = mb_tbl_uni_sb2code2_value[i];
7158 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
7160 }
else if (w >= mb_tbl_uni_sb2code3_min && w <= mb_tbl_uni_sb2code3_max) {
7161 int i =
mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
7163 s = mb_tbl_uni_sb2code3_value[i];
7164 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
7166 }
else if (w >= mb_tbl_uni_sb2code5_min && w <= mb_tbl_uni_sb2code5_max) {
7167 int i =
mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
7169 s = mb_tbl_uni_sb2code5_val[i];
7170 s = (((
s / 94) + 0x21) << 8) | ((
s % 94) + 0x21);
7178 }
else if (
s <= 0xFF) {
7179 out = mb_convert_buf_add(
out,
s);
7181 unsigned int c1 = (
s >> 8) & 0xFF, c2 =
s & 0xFF, s1, s2;
7184 out = mb_convert_buf_add2(
out, s1, s2);
7191static size_t mb_sjis2004_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
7193 unsigned char *
p = *in, *e =
p + *in_len;
7194 uint32_t *
out =
buf, *limit =
buf + bufsize - 1;
7196 while (
p < e &&
out < limit) {
7197 unsigned char c = *
p++;
7202 }
else if (c == 0x7E) {
7207 }
else if (c >= 0xA1 && c <= 0xDF) {
7208 *
out++ = 0xFEC0 + c;
7214 unsigned char c2 = *
p++;
7215 uint32_t w1 = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
7218 if (w1 >= 0x0170 && w1 <= 0x03F1) {
7221 *
out++ = jisx0213_u2_tbl[2*k];
7222 *
out++ = jisx0213_u2_tbl[2*k+1];
7228 if (w1 < jisx0213_ucs_table_size) {
7229 uint32_t w = jisx0213_ucs_table[w1];
7239 *
out++ = jisx0213_jis_u5_tbl[k] + 0x20000;
7241 if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
7256 unsigned char *
out, *limit;
7264 goto process_codepoint;
7272 if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) {
7273 for (
int k = 0; k < jisx0213_u2_tbl_len; k++) {
7274 if (w == jisx0213_u2_tbl[2*k]) {
7282 uint32_t w2 = *in++;
len--;
7283 if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) {
7286 if (w2 == jisx0213_u2_tbl[2*k+1]) {
7287 s = jisx0213_u2_key[k];
7294 s = jisx0213_u2_fb_tbl[k];
7302 for (
int k = 0; k < uni2jis_tbl_len; k++) {
7303 if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) {
7304 s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]];
7311 if (!
s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) {
7312 int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len);
7314 s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k];
7319 if (!
s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) {
7320 int k =
mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len);
7322 s = jisx0213_u5_jis_tbl[k];
7330 }
else if (w == 0xFE46) {
7332 }
else if (w >= 0xF91D && w <= 0xF9DC) {
7334 int k =
mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len);
7336 s = ucs_r2b_jisx0213_cmap_val[k];
7344 }
else if (
s <= 0xFF) {
7345 out = mb_convert_buf_add(
out,
s);
7347 unsigned int c1 = (
s >> 8) & 0xFF, c2 =
s & 0xFF, s1, s2;
7350 out = mb_convert_buf_add2(
out, s1, s2);
7359 int c1,
s, s1, s2, w;
7361 switch (filter->
status) {
7363 if (c >= 0 && c < 0x80) {
7365 }
else if (c > 0xa0 && c < 0xe0) {
7367 }
else if (c > 0x80 && c < 0xfd && c != 0xa0) {
7378 if (c >= 0x40 && c <= 0xfc && c != 0x7f) {
7381 s = (s1 - 0x21)*94 + s2 - 0x21;
7385 }
else if (
s == 32) {
7387 }
else if (
s == 33) {
7389 }
else if (
s == 60) {
7391 }
else if (
s == 80) {
7393 }
else if (
s == 81) {
7395 }
else if (
s == 137) {
7408 }
else if (
s >= (94*94) &&
s < (114*94)) {
7409 w =
s - (94*94) + 0xe000;
7451 }
else if (c == 0x203E) {
7459 }
else if (c >= 0xe000 && c < (0xe000 + 20*94)) {
7463 s1 = (c1 << 8) | c2;
7469 }
else if (c == 0xff3c) {
7471 }
else if (c == 0x2225) {
7473 }
else if (c == 0xff0d) {
7475 }
else if (c == 0xffe0) {
7477 }
else if (c == 0xffe1) {
7479 }
else if (c == 0xffe2) {
7483 if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) {
7489 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
7499 s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21);
7507 }
else if (s1 <= 0) {
7515 c1 = (s1 >> 8) & 0xff;
7533 }
else if (c == 0x203E) {
7537 return mbfl_filt_conv_wchar_cp932(c, filter);
7542static size_t mb_cp932_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
7544 unsigned char *
p = *in, *e =
p + *in_len;
7545 uint32_t *
out =
buf, *limit =
buf + bufsize;
7547 while (
p < e &&
out < limit) {
7548 unsigned char c = *
p++;
7552 }
else if (c > 0xA0 && c < 0xE0) {
7554 *
out++ = 0xFEC0 + c;
7560 unsigned char c2 = *
p++;
7562 unsigned int s = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2];
7567 }
else if (
s == 32) {
7569 }
else if (
s == 33) {
7571 }
else if (
s == 60) {
7573 }
else if (
s == 80) {
7575 }
else if (
s == 81) {
7577 }
else if (
s == 137) {
7591 }
else if (
s >= (94*94) &&
s < (114*94)) {
7592 w =
s - (94*94) + 0xE000;
7597 if (c == 0x80 || c == 0xA0 || c >= 0xFD) {
7613 unsigned char *
out, *limit;
7619 unsigned int s1 = 0, s2 = 0, c1, c2;
7623 }
else if (w == 0x203E) {
7631 }
else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
7635 s1 = (c1 << 8) | c2;
7641 }
else if (w == 0xFF3C) {
7643 }
else if (w == 0x2225) {
7645 }
else if (w == 0xFF0D) {
7647 }
else if (w == 0xFFE0) {
7649 }
else if (w == 0xFFE1) {
7651 }
else if (w == 0xFFE2) {
7653 }
else if (w == 0) {
7654 out = mb_convert_buf_add(
out, 0);
7658 if (!s1 || (s1 >= 0x8080 && !s2)) {
7678 out = mb_convert_buf_add(
out, s1);
7680 c1 = (s1 >> 8) & 0xFF;
7683 out = mb_convert_buf_add2(
out, s1, s2);
7692 unsigned char *
out, *limit;
7698 unsigned int s1 = 0, s2 = 0, c1, c2;
7708 }
else if (w >= 0xE000 && w < (0xE000 + 20*94)) {
7712 s1 = (c1 << 8) | c2;
7718 }
else if (w == 0xFF3C) {
7720 }
else if (w == 0x2225) {
7722 }
else if (w == 0xFF0D) {
7724 }
else if (w == 0xFFE0) {
7726 }
else if (w == 0xFFE1) {
7728 }
else if (w == 0xFFE2) {
7730 }
else if (w == 0) {
7731 out = mb_convert_buf_add(
out, 0);
7735 if (!s1 || (s1 >= 0x8080 && !s2)) {
7755 out = mb_convert_buf_add(
out, s1);
7757 c1 = (s1 >> 8) & 0xFF;
7760 out = mb_convert_buf_add2(
out, s1, s2);
7767static const unsigned char mblen_table_sjis[] = {
7768 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7769 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7770 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7771 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7772 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7773 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7774 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7775 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7776 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
7777 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
7778 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7779 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7780 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7781 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7782 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
7783 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
7786static const unsigned char mblen_table_sjismac[] = {
7787 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7788 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7789 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7790 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7791 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7792 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7793 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7794 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7795 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
7796 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
7797 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7798 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7799 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7800 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7801 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,
7802 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
7805static const unsigned char mblen_table_sjis_mobile[] = {
7806 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7807 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7808 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7809 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7810 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7812 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7813 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7814 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
7815 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
7816 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7817 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7818 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7819 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7820 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
7821 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
7824static const char *mbfl_encoding_sjis_aliases[] = {
"x-sjis",
"SHIFT-JIS",
NULL};
7831 mbfl_filt_conv_sjis_wchar,
7832 mbfl_filt_conv_sjis_wchar_flush,
7841 mbfl_filt_conv_wchar_sjis,
7850 mbfl_encoding_sjis_aliases,
7861static const char *mbfl_encoding_sjis_mac_aliases[] = {
"MacJapanese",
"x-Mac-Japanese",
NULL};
7868 mbfl_filt_conv_sjis_mac_wchar,
7869 mbfl_filt_conv_sjis_wchar_flush,
7878 mbfl_filt_conv_wchar_sjis_mac,
7879 mbfl_filt_conv_wchar_sjis_mac_flush,
7887 mbfl_encoding_sjis_mac_aliases,
7888 mblen_table_sjismac,
7890 &vtbl_sjis_mac_wchar,
7891 &vtbl_wchar_sjis_mac,
7892 mb_sjismac_to_wchar,
7893 mb_wchar_to_sjismac,
7898static const char *mbfl_encoding_sjis_docomo_aliases[] = {
"SJIS-DOCOMO",
"shift_jis-imode",
"x-sjis-emoji-docomo",
NULL};
7899static const char *mbfl_encoding_sjis_kddi_aliases[] = {
"SJIS-KDDI",
"shift_jis-kddi",
"x-sjis-emoji-kddi",
NULL};
7900static const char *mbfl_encoding_sjis_sb_aliases[] = {
"SJIS-SOFTBANK",
"shift_jis-softbank",
"x-sjis-emoji-softbank",
NULL};
7907 mbfl_filt_conv_sjis_mobile_wchar,
7908 mbfl_filt_conv_sjis_wchar_flush,
7917 mbfl_filt_conv_wchar_sjis_mobile,
7924 "SJIS-Mobile#DOCOMO",
7926 mbfl_encoding_sjis_docomo_aliases,
7927 mblen_table_sjis_mobile,
7929 &vtbl_sjis_docomo_wchar,
7930 &vtbl_wchar_sjis_docomo,
7931 mb_sjis_docomo_to_wchar,
7932 mb_wchar_to_sjis_docomo,
7942 mbfl_filt_conv_sjis_mobile_wchar,
7943 mbfl_filt_conv_sjis_wchar_flush,
7952 mbfl_filt_conv_wchar_sjis_mobile,
7961 mbfl_encoding_sjis_kddi_aliases,
7962 mblen_table_sjis_mobile,
7964 &vtbl_sjis_kddi_wchar,
7965 &vtbl_wchar_sjis_kddi,
7966 mb_sjis_kddi_to_wchar,
7967 mb_wchar_to_sjis_kddi,
7977 mbfl_filt_conv_sjis_mobile_wchar,
7978 mbfl_filt_conv_sjis_wchar_flush,
7987 mbfl_filt_conv_wchar_sjis_mobile,
7994 "SJIS-Mobile#SOFTBANK",
7996 mbfl_encoding_sjis_sb_aliases,
7997 mblen_table_sjis_mobile,
7999 &vtbl_sjis_sb_wchar,
8000 &vtbl_wchar_sjis_sb,
8001 mb_sjis_sb_to_wchar,
8002 mb_wchar_to_sjis_sb,
8014static const char *mbfl_encoding_sjis2004_aliases[] = {
"SJIS2004",
"Shift_JIS-2004",
NULL};
8021 mbfl_filt_conv_jis2004_wchar,
8022 mbfl_filt_conv_jis2004_wchar_flush,
8031 mbfl_filt_conv_wchar_jis2004,
8032 mbfl_filt_conv_wchar_jis2004_flush,
8040 mbfl_encoding_sjis2004_aliases,
8041 mblen_table_sjis_mobile,
8043 &vtbl_sjis2004_wchar,
8044 &vtbl_wchar_sjis2004,
8045 mb_sjis2004_to_wchar,
8046 mb_wchar_to_sjis2004,
8081static const unsigned char mblen_table_sjiswin[] = {
8082 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8083 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8084 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8085 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8086 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8087 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8088 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8089 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8090 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
8091 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
8092 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8093 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8094 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8095 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8096 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
8097 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
8100static const char *mbfl_encoding_cp932_aliases[] = {
"MS932",
"Windows-31J",
"MS_Kanji",
NULL};
8101static const char *mbfl_encoding_sjiswin_aliases[] = {
"SJIS-ms",
"SJIS-open",
NULL};
8108 mbfl_filt_conv_cp932_wchar,
8109 mbfl_filt_conv_cp932_wchar_flush,
8118 mbfl_filt_conv_wchar_cp932,
8127 mbfl_encoding_cp932_aliases,
8128 mblen_table_sjiswin,
8143 mbfl_filt_conv_cp932_wchar,
8144 mbfl_filt_conv_cp932_wchar_flush,
8153 mbfl_filt_conv_wchar_sjiswin,
8162 mbfl_encoding_sjiswin_aliases,
8163 mblen_table_sjiswin,
8165 &vtbl_sjiswin_wchar,
8166 &vtbl_wchar_sjiswin,
8168 mb_wchar_to_sjiswin,
8181 switch (filter->
status) {
8183 if (c >= 0 && c < 0x80) {
8185 }
else if (c > 0xa0 && c < 0xff) {
8188 }
else if (c == 0x8e) {
8190 }
else if (c == 0x8f) {
8200 if (c > 0xa0 && c < 0xff) {
8201 s = (c1 - 0xa1)*94 + c - 0xa1;
8218 if (c > 0xa0 && c < 0xe0) {
8234 if (c > 0xA0 && c < 0xFF && c1 > 0xA0 && c1 < 0xFF) {
8235 s = (c1 - 0xa1)*94 + c - 0xa1;
8288 }
else if (c == 0x2225) {
8290 }
else if (c == 0xff0d) {
8292 }
else if (c == 0xffe0) {
8294 }
else if (c == 0xffe1) {
8296 }
else if (c == 0xffe2) {
8298 }
else if (c == 0) {
8307 }
else if (
s < 0x100) {
8310 }
else if (
s < 0x8080) {
8325static size_t mb_eucjp_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
8327 unsigned char *
p = *in, *e =
p + *in_len;
8328 uint32_t *
out =
buf, *limit =
buf + bufsize;
8330 while (
p < e &&
out < limit) {
8331 unsigned char c = *
p++;
8335 }
else if (c >= 0xA1 && c <= 0xFE &&
p < e) {
8337 unsigned char c2 = *
p++;
8338 if (c2 >= 0xA1 && c2 <= 0xFE) {
8339 unsigned int s = (c - 0xA1)*94 + c2 - 0xA1;
8351 }
else if (c == 0x8E &&
p < e) {
8353 unsigned char c2 = *
p++;
8355 }
else if (c == 0x8F) {
8358 unsigned char c2 = *
p++;
8359 unsigned char c3 = *
p++;
8360 if (c3 >= 0xA1 && c3 <= 0xFE && c2 >= 0xA1 && c2 <= 0xFE) {
8361 unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1;
8389 unsigned char *
out, *limit;
8412 }
else if (w == 0x2225) {
8414 }
else if (w == 0xFF0D) {
8416 }
else if (w == 0xFFE0) {
8418 }
else if (w == 0xFFE1) {
8420 }
else if (w == 0xFFE2) {
8422 }
else if (w == 0) {
8423 out = mb_convert_buf_add(
out, 0);
8433 out = mb_convert_buf_add(
out,
s);
8434 }
else if (
s < 0x100) {
8435 out = mb_convert_buf_add2(
out, 0x8E,
s);
8436 }
else if (
s < 0x8080) {
8437 out = mb_convert_buf_add2(
out, ((
s >> 8) & 0xFF) | 0x80, (
s & 0xFF) | 0x80);
8440 out = mb_convert_buf_add3(
out, 0x8F, ((
s >> 8) & 0xFF) | 0x80, (
s & 0xFF) | 0x80);
8451 switch (filter->
status) {
8453 if (c >= 0 && c < 0x80) {
8455 }
else if (c >= 0xa1 && c <= 0xfe) {
8458 }
else if (c == 0x8e) {
8460 }
else if (c == 0x8f) {
8470 if (c > 0xa0 && c < 0xff) {
8472 s = (c1 - 0xa1)*94 + c - 0xa1;
8476 }
else if (
s == 32) {
8478 }
else if (
s == 33) {
8480 }
else if (
s == 60) {
8482 }
else if (
s == 80) {
8484 }
else if (
s == 81) {
8486 }
else if (
s == 137) {
8496 }
else if (
s >= (84 * 94)) {
8497 w =
s - (84 * 94) + 0xe000;
8512 if (c > 0xa0 && c < 0xe0) {
8528 if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
8529 s = (c1 - 0xa1)*94 + c - 0xa1;
8537 }
else if (
s >= (82*94) &&
s < (84*94)) {
8541 while (
n < cp932ext3_eucjp_table_size) {
8542 if (
s == cp932ext3_eucjp_table[
n]) {
8550 }
else if (
s >= (84*94)) {
8551 w =
s - (84*94) + (0xe000 + (94*10));
8595 }
else if (c == 0x203E) {
8605 }
else if (c >= 0xe000 && c < (0xe000 + 10*94)) {
8609 s1 = (c1 << 8) | c2;
8610 }
else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) {
8611 s1 = c - (0xe000 + 10*94);
8614 s1 = (c1 << 8) | c2;
8624 }
else if (c == 0x2014) {
8626 }
else if (c == 0xff3c) {
8628 }
else if (c == 0x2225) {
8630 }
else if (c == 0xff0d) {
8632 }
else if (c == 0xffe0) {
8634 }
else if (c == 0xffe1) {
8636 }
else if (c == 0xffe2) {
8646 s1 = ((c1 / 94 + oh + 0x21) << 8) + (c1 % 94 + 0x21);
8656 if (c1 < cp932ext3_eucjp_table_size) {
8657 s1 = cp932ext3_eucjp_table[c1];
8668 }
else if (s1 <= 0) {
8676 }
else if (s1 < 0x100) {
8679 }
else if (s1 < 0x8080) {
8694static size_t mb_eucjpwin_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
8696 unsigned char *
p = *in, *e =
p + *in_len;
8697 uint32_t *
out =
buf, *limit =
buf + bufsize;
8699 while (
p < e &&
out < limit) {
8700 unsigned char c = *
p++;
8704 }
else if (c >= 0xA1 && c <= 0xFE &&
p < e) {
8705 unsigned char c2 = *
p++;
8707 if (c2 >= 0xA1 && c2 <= 0xFE) {
8708 unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0;
8713 }
else if (
s == 32) {
8715 }
else if (
s == 33) {
8717 }
else if (
s == 60) {
8719 }
else if (
s == 80) {
8721 }
else if (
s == 81) {
8723 }
else if (
s == 137) {
8733 }
else if (
s >= (84 * 94)) {
8734 w =
s - (84 * 94) + 0xE000;
8744 }
else if (c == 0x8E &&
p < e) {
8745 unsigned char c2 = *
p++;
8746 if (c2 >= 0xA1 && c2 <= 0xDF) {
8747 *
out++ = 0xFEC0 + c2;
8751 }
else if (c == 0x8F &&
p < e) {
8752 unsigned char c2 = *
p++;
8757 unsigned char c3 = *
p++;
8759 if (c2 >= 0xA1 && c2 <= 0xFE && c3 >= 0xA1 && c3 <= 0xFE) {
8760 unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1, w = 0;
8766 }
else if (
s >= (82*94) &&
s < (84*94)) {
8768 for (
int i = 0; i < cp932ext3_eucjp_table_size; i++) {
8769 if (cp932ext3_eucjp_table[i] ==
s) {
8774 }
else if (
s >= (84*94)) {
8775 w =
s - (84*94) + 0xE000 + (94*10);
8799 unsigned char *
out, *limit;
8808 out = mb_convert_buf_add(
out, 0);
8810 }
else if (w == 0xAF) {
8812 }
else if (w == 0x203E) {
8822 }
else if (w >= 0xE000 && w < (0xE000 + 10*94)) {
8824 s = ((
s/94 + 0x75) << 8) + (
s%94) + 0x21;
8825 }
else if (w >= (0xE000 + 10*94) && w < (0xE000 + 20*94)) {
8826 s = w - (0xE000 + 10*94);
8827 s = ((
s/94 + 0xF5) << 8) + (
s%94) + 0xA1;
8836 }
else if (w == 0x2014) {
8838 }
else if (w == 0xFF3C) {
8840 }
else if (w == 0x2225) {
8842 }
else if (w == 0xFF0D) {
8844 }
else if (w == 0xFFE0) {
8846 }
else if (w == 0xFFE1) {
8848 }
else if (w == 0xFFE2) {
8861 s = cp932ext3_eucjp_table[i];
8872 }
else if (
s < 0x80) {
8873 out = mb_convert_buf_add(
out,
s);
8874 }
else if (
s < 0x100) {
8875 out = mb_convert_buf_add2(
out, 0x8E,
s);
8876 }
else if (
s < 0x8080) {
8877 out = mb_convert_buf_add2(
out, ((
s >> 8) & 0xFF) | 0x80, (
s & 0xFF) | 0x80);
8880 out = mb_convert_buf_add3(
out, 0x8F, ((
s >> 8) & 0xFF) | 0x80, (
s & 0xFF) | 0x80);
8891 switch (filter->
status) {
8893 if (c >= 0 && c < 0x80) {
8895 }
else if (c >= 0xA1 && c <= 0xFE) {
8898 }
else if (c == 0x8e) {
8908 if (c > 0xa0 && c < 0xff) {
8910 s = (c1 - 0xa1)*94 + c - 0xa1;
8914 }
else if (
s == 32) {
8916 }
else if (
s == 33) {
8918 }
else if (
s == 60) {
8920 }
else if (
s == 80) {
8922 }
else if (
s == 81) {
8924 }
else if (
s == 137) {
8948 if (c > 0xa0 && c < 0xe0) {
8991 if (s1 >= 0x8080) s1 = -1;
8995 }
else if (c == 0xff3c) {
8997 }
else if (c == 0x2225) {
8999 }
else if (c == 0xff0d) {
9001 }
else if (c == 0xffe0) {
9003 }
else if (c == 0xffe1) {
9005 }
else if (c == 0xffe2) {
9013 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
9023 s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
9032 }
else if (s1 <= 0) {
9040 }
else if (s1 < 0x100) {
9043 }
else if (s1 < 0x8080) {
9056static size_t mb_cp51932_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
9058 unsigned char *
p = *in, *e =
p + *in_len;
9059 uint32_t *
out =
buf, *limit =
buf + bufsize;
9061 while (
p < e &&
out < limit) {
9062 unsigned char c = *
p++;
9066 }
else if (c >= 0xA1 && c <= 0xFE &&
p < e) {
9067 unsigned char c2 = *
p++;
9068 if (c2 >= 0xA1 && c2 <= 0xFE) {
9069 unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0;
9074 }
else if (
s == 32) {
9076 }
else if (
s == 33) {
9078 }
else if (
s == 60) {
9080 }
else if (
s == 80) {
9082 }
else if (
s == 81) {
9084 }
else if (
s == 137) {
9105 }
else if (c == 0x8E &&
p < e) {
9106 unsigned char c2 = *
p++;
9107 if (c2 >= 0xA1 && c2 <= 0xDF) {
9108 *
out++ = 0xFEC0 + c2;
9124 unsigned char *
out, *limit;
9133 out = mb_convert_buf_add(
out, 0);
9145 if (
s >= 0x8080)
s = 0;
9150 }
else if (w == 0xFF3C) {
9152 }
else if (w == 0x2225) {
9154 }
else if (w == 0xFF0D) {
9156 }
else if (w == 0xFFE0) {
9158 }
else if (w == 0xFFE1) {
9160 }
else if (w == 0xFFE2) {
9165 s = ((i/94 + 0x2D) << 8) + (i%94) + 0x21;
9172 s = ((i/94 + 0x79) << 8) + (i%94) + 0x21;
9180 if (!
s ||
s >= 0x8080) {
9183 }
else if (
s < 0x80) {
9184 out = mb_convert_buf_add(
out,
s);
9185 }
else if (
s < 0x100) {
9186 out = mb_convert_buf_add2(
out, 0x8E,
s);
9188 out = mb_convert_buf_add2(
out, ((
s >> 8) & 0xFF) | 0x80, (
s & 0xFF) | 0x80);
9195static size_t mb_eucjp2004_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
9197 unsigned char *
p = *in, *e =
p + *in_len;
9198 uint32_t *
out =
buf, *limit =
buf + bufsize - 1;
9200 while (
p < e &&
out < limit) {
9201 unsigned char c = *
p++;
9205 }
else if (c >= 0xA1 && c <= 0xFE) {
9211 unsigned char c2 = *
p++;
9212 if (c2 <= 0xA0 || c2 == 0xFF) {
9217 unsigned int s1 = c - 0x80, s2 = c2 - 0x80;
9218 unsigned int w1 = (s1 << 8) | s2, w = 0;
9221 if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) {
9224 *
out++ = jisx0213_u2_tbl[2*k];
9225 *
out++ = jisx0213_u2_tbl[2*k+1];
9231 w1 = (s1 - 0x21)*94 + s2 - 0x21;
9232 if (w1 < jisx0213_ucs_table_size) {
9233 w = jisx0213_ucs_table[w1];
9240 w = jisx0213_jis_u5_tbl[k] + 0x20000;
9245 }
else if (c == 0x8E &&
p < e) {
9247 unsigned char c2 = *
p++;
9248 if (c2 >= 0xA1 && c2 <= 0xDF) {
9249 *
out++ = 0xFEC0 + c2;
9253 }
else if (c == 0x8F &&
p < e) {
9254 unsigned char c2 = *
p++;
9255 if ((c2 == 0xA1 || (c2 >= 0xA3 && c2 <= 0xA5) || c2 == 0xA8 || (c2 >= 0xAC && c2 <= 0xAF) || (c2 >= 0xEE && c2 <= 0xFE)) &&
p < e) {
9256 unsigned char c3 = *
p++;
9258 if (c3 < 0xA1 || c3 == 0xFF) {
9263 unsigned int s1 = c2 - 0xA1, s2 = c3 - 0xA1;
9265 if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) {
9267 for (k = 0; k < jisx0213_p2_ofst_len; k++) {
9268 if (s1 == jisx0213_p2_ofst[k]) {
9272 k -= jisx0213_p2_ofst[k];
9275 unsigned int s = (s1 + 94 + k)*94 + s2;
9277 unsigned int w = jisx0213_ucs_table[
s];
9283 w = jisx0213_jis_u5_tbl[k] + 0x20000;
9306 unsigned char *
out, *limit;
9314 goto process_codepoint;
9323 if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) {
9324 for (
int k = 0; k < jisx0213_u2_tbl_len; k++) {
9325 if (w == jisx0213_u2_tbl[2*k]) {
9333 uint32_t w2 = *in++;
len--;
9334 if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) {
9337 if (w2 == jisx0213_u2_tbl[2*k+1]) {
9338 s = jisx0213_u2_key[k];
9345 s = jisx0213_u2_fb_tbl[k];
9353 for (
int k = 0; k < uni2jis_tbl_len; k++) {
9354 if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) {
9355 s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]];
9362 if (!
s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) {
9363 int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len);
9365 s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k];
9370 if (!
s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) {
9371 int k =
mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len);
9373 s = jisx0213_u5_jis_tbl[k];
9381 }
else if (w == 0xFE46) {
9383 }
else if (w >= 0xF91D && w <= 0xF9DC) {
9385 int k =
mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len);
9387 s = ucs_r2b_jisx0213_cmap_val[k];
9395 }
else if (
s <= 0x7F) {
9396 out = mb_convert_buf_add(
out,
s);
9397 }
else if (
s <= 0xFF) {
9399 out = mb_convert_buf_add2(
out, 0x8E,
s);
9400 }
else if (
s <= 0x7EFF) {
9402 out = mb_convert_buf_add2(
out, ((
s >> 8) & 0xFF) + 0x80, (
s & 0xFF) + 0x80);
9404 unsigned int s2 =
s & 0xFF;
9405 int k = ((
s >> 8) & 0xFF) - 0x7F;
9407 s = jisx0213_p2_ofst[k] + 0x21;
9409 out = mb_convert_buf_add3(
out, 0x8F,
s | 0x80, s2 | 0x80);
9420 switch (filter->
status) {
9422 if (c >= 0 && c < 0x80) {
9424 }
else if ((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) {
9435 if (c > 0xA0 && c < 0xFF) {
9436 w = (c1 - 0x81)*192 + c - 0x40;
9440 }
else if (w == 0x186A) {
9442 }
else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) {
9469 if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261) {
9477 }
else if (c == 0x2014 || (c >= 0x2170 && c <= 0x2179)) {
9493 }
else if (c == 0xFF5E) {
9495 }
else if (c >= 0xFF01 && c <= 0xFF5D) {
9496 s = c - 0xFF01 + 0xA3A1;
9497 }
else if (c >= 0xFFE0 && c <= 0xFFE5) {
9503 if (((
s >> 8) & 0xFF) < 0xA1 || (
s & 0xFF) < 0xA1) {
9510 }
else if (
s <= 0) {
9531 if (filter->
status == 1) {
9544static size_t mb_euccn_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
9546 unsigned char *
p = *in, *e =
p + *in_len;
9547 uint32_t *
out =
buf, *limit =
buf + bufsize;
9549 while (
p < e &&
out < limit) {
9550 unsigned char c = *
p++;
9554 }
else if (((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) &&
p < e) {
9555 unsigned char c2 = *
p++;
9557 if (c2 >= 0xA1 && c2 <= 0xFE) {
9558 unsigned int w = (c - 0x81)*192 + c2 - 0x40;
9562 }
else if (w == 0x186A) {
9564 }
else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) {
9588 unsigned char *
out, *limit;
9597 if (w != 0xB7 && w != 0x144 && w != 0x148 && w != 0x251 && w != 0x261) {
9603 }
else if (w != 0x2014 && (w < 0x2170 || w > 0x2179)) {
9617 }
else if (w == 0xFF5E) {
9619 }
else if (w >= 0xFF01 && w <= 0xFF5D) {
9620 s = w - 0xFF01 + 0xA3A1;
9621 }
else if (w >= 0xFFE0 && w <= 0xFFE5) {
9627 if (((
s >> 8) & 0xFF) < 0xA1 || (
s & 0xFF) < 0xA1) {
9633 out = mb_convert_buf_add(
out, w);
9638 }
else if (
s < 0x80) {
9639 out = mb_convert_buf_add(
out,
s);
9641 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
9652 switch (filter->
status) {
9654 if (c >= 0 && c < 0x80) {
9656 }
else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) {
9659 }
else if (c == 0x8E) {
9669 if (c > 0xA0 && c < 0xFF) {
9670 w = (c1 - 0xA1)*94 + (c - 0xA1);
9671 if (w >= 0 && w < cns11643_1_ucs_table_size) {
9672 w = cns11643_1_ucs_table[w];
9689 if (c == 0xA1 || c == 0xA2 || c == 0xAE) {
9691 filter->
cache = c - 0xA1;
9701 if (c >= 0xA1 && ((c1 == 0 && ((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) ||
9702 (c1 == 1 && c <= 0xF2) || (c1 == 13 && c <= 0xE7))) {
9704 filter->
cache = (c1 << 8) + c - 0xA1;
9714 if (c1 <= 0xDFF && c > 0xA0 && c < 0xFF) {
9715 int plane = (c1 & 0xF00) >> 8;
9716 s = (c1 & 0xFF)*94 + c - 0xA1;
9723 if (plane == 0 &&
s < cns11643_1_ucs_table_size) {
9724 w = cns11643_1_ucs_table[
s];
9725 }
else if (plane == 1 &&
s < cns11643_2_ucs_table_size) {
9726 w = cns11643_2_ucs_table[
s];
9727 }
else if (plane == 13 &&
s < cns11643_14_ucs_table_size) {
9728 w = cns11643_14_ucs_table[
s];
9753 if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) {
9754 s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min];
9755 }
else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) {
9756 s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min];
9757 }
else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) {
9758 s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min];
9759 }
else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) {
9760 s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min];
9761 }
else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) {
9762 s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min];
9768 }
else if (
s <= 0) {
9774 int plane = (
s & 0x1F0000) >> 16;
9779 s = (
s & 0xFFFF) | 0x8080;
9784 s = (0x8EA00000 + (plane << 16)) | ((
s & 0xFFFF) | 0x8080);
9811static size_t mb_euctw_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
9813 unsigned char *
p = *in, *e =
p + *in_len;
9814 uint32_t *
out =
buf, *limit =
buf + bufsize;
9816 while (
p < e &&
out < limit) {
9817 unsigned char c = *
p++;
9821 }
else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3 &&
p < e) {
9822 unsigned char c2 = *
p++;
9824 if (c2 >= 0xA1 && c2 <= 0xFE) {
9825 unsigned int w = (c - 0xA1)*94 + (c2 - 0xA1);
9826 if (w < cns11643_1_ucs_table_size) {
9827 w = cns11643_1_ucs_table[w];
9837 }
else if (c == 0x8E &&
p < e) {
9838 unsigned char c2 = *
p++;
9840 if ((c2 == 0xA1 || c2 == 0xA2 || c2 == 0xAE) &&
p < e) {
9841 unsigned int plane = c2 - 0xA1;
9842 unsigned char c3 = *
p++;
9844 if (c3 >= 0xA1 && ((plane == 0 && ((c3 >= 0xA1 && c3 <= 0xA6) || (c3 >= 0xC2 && c3 <= 0xFD)) && c3 != 0xC3) || (plane == 1 && c3 <= 0xF2) || (plane == 13 && c3 <= 0xE7)) &&
p < e) {
9845 unsigned char c4 = *
p++;
9847 if (c2 <= 0xAE && c4 > 0xA0 && c4 < 0xFF) {
9848 unsigned int s = (c3 - 0xA1)*94 + c4 - 0xA1, w = 0;
9854 if (plane == 0 &&
s < cns11643_1_ucs_table_size) {
9855 w = cns11643_1_ucs_table[
s];
9856 }
else if (plane == 1 &&
s < cns11643_2_ucs_table_size) {
9857 w = cns11643_2_ucs_table[
s];
9858 }
else if (plane == 13 &&
s < cns11643_14_ucs_table_size) {
9859 w = cns11643_14_ucs_table[
s];
9883 unsigned char *
out, *limit;
9891 if (w >= ucs_a1_cns11643_table_min && w < ucs_a1_cns11643_table_max) {
9892 s = ucs_a1_cns11643_table[w - ucs_a1_cns11643_table_min];
9893 }
else if (w >= ucs_a2_cns11643_table_min && w < ucs_a2_cns11643_table_max) {
9894 s = ucs_a2_cns11643_table[w - ucs_a2_cns11643_table_min];
9895 }
else if (w >= ucs_a3_cns11643_table_min && w < ucs_a3_cns11643_table_max) {
9896 s = ucs_a3_cns11643_table[w - ucs_a3_cns11643_table_min];
9897 }
else if (w >= ucs_i_cns11643_table_min && w < ucs_i_cns11643_table_max) {
9898 s = ucs_i_cns11643_table[w - ucs_i_cns11643_table_min];
9899 }
else if (w >= ucs_r_cns11643_table_min && w < ucs_r_cns11643_table_max) {
9900 s = ucs_r_cns11643_table[w - ucs_r_cns11643_table_min];
9905 out = mb_convert_buf_add(
out, 0);
9911 unsigned int plane =
s >> 16;
9914 out = mb_convert_buf_add(
out,
s);
9916 out = mb_convert_buf_add2(
out, ((
s >> 8) & 0xFF) | 0x80, (
s & 0xFF) | 0x80);
9920 out = mb_convert_buf_add4(
out, 0x8E, 0xA0 + plane, ((
s >> 8) & 0xFF) | 0x80, (
s & 0xFF) | 0x80);
9932 switch (filter->
status) {
9934 if (c >= 0 && c < 0x80) {
9936 }
else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9) {
9948 if (c1 >= 0xa1 && c1 <= 0xc6) {
9950 }
else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) {
9953 if (flag > 0 && c >= 0xa1 && c <= 0xfe) {
9955 w = (c1 - 0x81)*190 + c - 0x41;
9959 w = (c1 - 0xc7)*94 + c - 0xa1;
10000 if (((
s >> 8) & 0xFF) < 0xA1 || (
s & 0xFF) < 0xA1) {
10028 if (filter->
status == 1) {
10041static size_t mb_euckr_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
10043 unsigned char *
p = *in, *e =
p + *in_len;
10044 uint32_t *
out =
buf, *limit =
buf + bufsize;
10046 while (
p < e &&
out < limit) {
10047 unsigned char c = *
p++;
10051 }
else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9 &&
p < e) {
10052 unsigned char c2 = *
p++;
10053 if (c2 < 0xA1 || c2 == 0xFF) {
10059 unsigned int w = (c - 0x81)*190 + c2 - 0x41;
10066 unsigned int w = (c - 0xC7)*94 + c2 - 0xA1;
10085 unsigned char *
out, *limit;
10090 uint32_t w = *in++;
10091 unsigned int s = 0;
10110 if (((
s >> 8) & 0xFF) < 0xA1 || (
s & 0xFF) < 0xA1) {
10116 out = mb_convert_buf_add(
out, w);
10121 }
else if (
s < 0x80) {
10122 out = mb_convert_buf_add(
out,
s);
10125 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
10134 switch (filter->
status) {
10136 if (c >= 0 && c < 0x80) {
10138 }
else if (c > 0x80 && c < 0xfe && c != 0xc9) {
10148 int c1 = filter->
cache, w = 0;
10150 if (c1 >= 0x81 && c1 <= 0xc6 && c >= 0x41 && c <= 0xfe) {
10151 w = (c1 - 0x81)*190 + (c - 0x41);
10155 }
else if (c1 >= 0xc7 && c1 < 0xfe && c >= 0xa1 && c <= 0xfe) {
10156 w = (c1 - 0xc7)*94 + (c - 0xa1);
10176 if (filter->
status == 1) {
10209 if (
s == 0 && c != 0) {
10227static size_t mb_uhc_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
10229 unsigned char *
p = *in, *e =
p + *in_len;
10230 uint32_t *
out =
buf, *limit =
buf + bufsize;
10234 while (
p < e &&
out < limit) {
10235 unsigned char c = *
p++;
10239 }
else if (c > 0x80 && c < 0xFE) {
10242 unsigned char c2 = *
p++;
10243 if (c2 < 0x41 || c2 == 0xFF) {
10247 unsigned int w = 0;
10250 w = (c - 0x81)*190 + c2 - 0x41;
10253 }
else if (c2 >= 0xA1) {
10254 w = (c - 0xC7)*94 + c2 - 0xA1;
10273 if (
p == e &&
out < limit) {
10274 unsigned char c = *
p++;
10278 *in_len = e -
p + 1;
10285 unsigned char *
out, *limit;
10290 uint32_t w = *in++;
10291 unsigned int s = 0;
10311 out = mb_convert_buf_add(
out, 0);
10316 }
else if (
s < 0x80) {
10317 out = mb_convert_buf_add(
out,
s);
10320 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
10327static const unsigned char mblen_table_eucjp[] = {
10328 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10329 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10330 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10331 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10332 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10333 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10334 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10336 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
10337 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10338 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10339 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10340 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10341 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10342 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10343 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
10346static const char *mbfl_encoding_euc_jp_aliases[] = {
"EUC",
"EUC_JP",
"eucJP",
"x-euc-jp",
NULL};
10353 mbfl_filt_conv_eucjp_wchar,
10354 mbfl_filt_conv_eucjp_wchar_flush,
10363 mbfl_filt_conv_wchar_eucjp,
10372 mbfl_encoding_euc_jp_aliases,
10383static const char *mbfl_encoding_eucjp2004_aliases[] = {
"EUC_JP-2004",
NULL};
10390 mbfl_filt_conv_jis2004_wchar,
10391 mbfl_filt_conv_jis2004_wchar_flush,
10400 mbfl_filt_conv_wchar_jis2004,
10401 mbfl_filt_conv_wchar_jis2004_flush,
10409 mbfl_encoding_eucjp2004_aliases,
10412 &vtbl_eucjp2004_wchar,
10413 &vtbl_wchar_eucjp2004,
10414 mb_eucjp2004_to_wchar,
10415 mb_wchar_to_eucjp2004,
10420static const char *mbfl_encoding_eucjp_win_aliases[] = {
"eucJP-open",
"eucJP-ms",
NULL};
10427 mbfl_filt_conv_eucjpwin_wchar,
10428 mbfl_filt_conv_eucjpwin_wchar_flush,
10437 mbfl_filt_conv_wchar_eucjpwin,
10446 mbfl_encoding_eucjp_win_aliases,
10449 &vtbl_eucjpwin_wchar,
10450 &vtbl_wchar_eucjpwin,
10451 mb_eucjpwin_to_wchar,
10452 mb_wchar_to_eucjpwin,
10457static const char *mbfl_encoding_cp51932_aliases[] = {
"cp51932",
NULL};
10464 mbfl_filt_conv_cp51932_wchar,
10465 mbfl_filt_conv_cp51932_wchar_flush,
10474 mbfl_filt_conv_wchar_cp51932,
10483 mbfl_encoding_cp51932_aliases,
10486 &vtbl_cp51932_wchar,
10487 &vtbl_wchar_cp51932,
10488 mb_cp51932_to_wchar,
10489 mb_wchar_to_cp51932,
10494static const unsigned char mblen_table_euccn[] = {
10495 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10496 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10497 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10498 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10499 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10500 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10501 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10502 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10503 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10504 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10505 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10506 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10507 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10508 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10509 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10510 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
10513static const char *mbfl_encoding_euc_cn_aliases[] = {
"CN-GB",
"EUC_CN",
"eucCN",
"x-euc-cn",
"gb2312",
NULL};
10520 mbfl_filt_conv_euccn_wchar,
10521 mbfl_filt_conv_euccn_wchar_flush,
10530 mbfl_filt_conv_wchar_euccn,
10539 mbfl_encoding_euc_cn_aliases,
10550static const char *mbfl_encoding_euc_tw_aliases[] = {
"EUC_TW",
"eucTW",
"x-euc-tw",
NULL};
10557 mbfl_filt_conv_euctw_wchar,
10558 mbfl_filt_conv_euctw_wchar_flush,
10567 mbfl_filt_conv_wchar_euctw,
10576 mbfl_encoding_euc_tw_aliases,
10587static const char *mbfl_encoding_euc_kr_aliases[] = {
"EUC_KR",
"eucKR",
"x-euc-kr",
NULL};
10594 mbfl_filt_conv_euckr_wchar,
10595 mbfl_filt_conv_euckr_wchar_flush,
10604 mbfl_filt_conv_wchar_euckr,
10613 mbfl_encoding_euc_kr_aliases,
10628static const unsigned char mblen_table_81_to_fe[] = {
10629 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10630 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10631 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10632 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10633 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10634 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10635 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10636 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10637 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10638 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10639 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10640 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10641 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10642 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10643 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10644 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
10647static const char *mbfl_encoding_uhc_aliases[] = {
"CP949",
NULL};
10654 mbfl_filt_conv_uhc_wchar,
10655 mbfl_filt_conv_uhc_wchar_flush,
10664 mbfl_filt_conv_wchar_uhc,
10673 mbfl_encoding_uhc_aliases,
10674 mblen_table_81_to_fe,
10690 int c1, c2, c3, w = -1;
10692 switch (filter->
status) {
10694 if (c >= 0 && c < 0x80) {
10696 }
else if (c > 0x80 && c < 0xff) {
10705 c1 = filter->
cache;
10708 if (c1 >= 0x81 && c1 <= 0x84 && c >= 0x30 && c <= 0x39) {
10711 filter->
cache = (c1 << 8) | c;
10713 }
else if (c1 >= 0x90 && c1 <= 0xe3 && c >= 0x30 && c <= 0x39) {
10716 filter->
cache = (c1 << 8) | c;
10718 }
else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) {
10720 w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000;
10722 }
else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) {
10724 w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6;
10728 c2 = (c1 << 8) | c;
10731 (c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) ||
10732 (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) ||
10733 (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844))
10736 if (c2 >= mbfl_gb18030_pua_tbl[
offset][2] && c2 <= mbfl_gb18030_pua_tbl[
offset][2] + mbfl_gb18030_pua_tbl[
offset][1] - mbfl_gb18030_pua_tbl[
offset][0]) {
10737 w = c2 - mbfl_gb18030_pua_tbl[
offset][2] + mbfl_gb18030_pua_tbl[
offset][0];
10745 if ((c1 >= 0xa1 && c1 <= 0xa9 && c >= 0xa1 && c <= 0xfe) ||
10746 (c1 >= 0xb0 && c1 <= 0xf7 && c >= 0xa1 && c <= 0xfe) ||
10747 (c1 >= 0x81 && c1 <= 0xa0 && c >= 0x40 && c <= 0xfe && c != 0x7f) ||
10748 (c1 >= 0xaa && c1 <= 0xfe && c >= 0x40 && c <= 0xa0 && c != 0x7f) ||
10749 (c1 >= 0xa8 && c1 <= 0xa9 && c >= 0x40 && c <= 0xa0 && c != 0x7f)) {
10750 w = (c1 - 0x81)*192 + c - 0x40;
10760 c1 = (filter->
cache >> 8) & 0xff;
10761 c2 = filter->
cache & 0xff;
10763 if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c >= 0x81 && c <= 0xfe) {
10764 filter->
cache = (c1 << 16) | (c2 << 8) | c;
10772 c1 = (filter->
cache >> 16) & 0xff;
10773 c2 = (filter->
cache >> 8) & 0xff;
10774 c3 = filter->
cache & 0xff;
10776 if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c3 >= 0x81 && c3 <= 0xfe && c >= 0x30 && c <= 0x39) {
10777 if (c1 >= 0x90 && c1 <= 0xe3) {
10778 w = ((((c1 - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c - 0x30) + 0x10000;
10779 if (w > 0x10FFFF) {
10784 w = (((c1 - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c - 0x30);
10785 if (w >= 0 && w <= 39419) {
10786 int k = mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max);
10787 w += mbfl_gb_uni_ofst[k];
10823 int c1,
s = 0, s1 = 0;
10845 }
else if (c == 0xf979) {
10847 }
else if (c == 0xf995) {
10849 }
else if (c == 0xf9e7) {
10851 }
else if (c == 0xf9f1) {
10853 }
else if (c >= 0xfa0c && c <= 0xfa29) {
10866 }
else if (c == 0xff5e) {
10868 }
else if (c >= 0xff01 && c <= 0xff5d) {
10869 s = c - 0xff01 + 0xa3a1;
10870 }
else if (c >= 0xffe0 && c <= 0xffe5) {
10877 if (
s <= 0 && c >= mbfl_gb18030_c_tbl_key[0] && c <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) {
10880 s = mbfl_gb18030_c_tbl_val[k1];
10884 if (c >= 0xe000 && c <= 0xe864) {
10888 s = (c1 % 94) + 0xa1;
10890 s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8;
10893 s = ((c1 / 96) + 0xa1) << 8;
10895 s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40);
10900 k2 = mbfl_gb18030_pua_tbl_max;
10902 k = (k1 + k2) >> 1;
10903 if (c < mbfl_gb18030_pua_tbl[k][0]) {
10905 }
else if (c > mbfl_gb18030_pua_tbl[k][1]) {
10908 s = c - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2];
10918 s = mbfl_bisec_srch(c, mbfl_uni2gb_tbl, mbfl_gb_uni_max);
10920 c1 = c - mbfl_gb_uni_ofst[
s];
10921 s = (c1 % 10) + 0x30;
10923 s |= ((c1 % 126) + 0x81) << 8;
10925 s |= ((c1 % 10) + 0x30) << 16;
10929 }
else if (c >= 0x10000 && c <= 0x10ffff) {
10932 s = (c1 % 10) + 0x30;
10934 s |= ((c1 % 126) + 0x81) << 8;
10936 s |= ((c1 % 10) + 0x30) << 16;
10943 }
else if (
s == 0) {
10950 }
else if (s1 > 0) {
10966static const unsigned short gb18030_pua_tbl3[] = {
10968 0x0000,0xE816,0xE817,0xE818,0x0000,0x0000,0x0000,0x0000,
10969 0x0000,0xE81E,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
10970 0x0000,0xE826,0x0000,0x0000,0x0000,0x0000,0xE82B,0xE82C,
10971 0x0000,0x0000,0x0000,0x0000,0xE831,0xE832,0x0000,0x0000,
10972 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE83B,0x0000,
10973 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE843,0x0000,
10974 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
10975 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
10976 0xE854,0xE855,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
10977 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
10982static size_t mb_gb18030_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
10984 unsigned char *
p = *in, *e =
p + *in_len;
10985 uint32_t *
out =
buf, *limit =
buf + bufsize;
10987 while (
p < e &&
out < limit) {
10988 unsigned char c = *
p++;
10992 }
else if (c == 0x80 || c == 0xFF) {
10999 unsigned char c2 = *
p++;
11001 if (((c >= 0x81 && c <= 0x84) || (c >= 0x90 && c <= 0xE3)) && c2 >= 0x30 && c2 <= 0x39) {
11006 unsigned char c3 = *
p++;
11008 if (c3 >= 0x81 && c3 <= 0xFE &&
p < e) {
11009 unsigned char c4 = *
p++;
11011 if (c4 >= 0x30 && c4 <= 0x39) {
11012 if (c >= 0x90 && c <= 0xE3) {
11013 unsigned int w = ((((c - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c4 - 0x30) + 0x10000;
11017 unsigned int w = (((c - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c4 - 0x30);
11019 *
out++ = w + mbfl_gb_uni_ofst[mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max)];
11030 }
else if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && (c2 >= 0xA1 && c2 <= 0xFE)) {
11032 *
out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000;
11033 }
else if (c >= 0xA1 && c <= 0xA7 && c2 >= 0x40 && c2 < 0xA1 && c2 != 0x7F) {
11035 *
out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6;
11036 }
else if (c2 >= 0x40 && c2 != 0x7F && c2 != 0xFF) {
11037 unsigned int w = (c - 0x81)*192 + c2 - 0x40;
11041 if (w != 0x1963 && w != 0x1DBF && (w < 0x1E49 || w > 0x1E55) && w != 0x1E7F) {
11045 }
else if (w >= 0x413A) {
11049 }
else if (w >= 0x5DD0 && w <= 0x5E20) {
11050 unsigned int c = gb18030_pua_tbl3[w - 0x5DD0];
11059 if ((c >= 0x81 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7 && c2 >= 0xA1) || (c >= 0xAA && c <= 0xFE && c2 <= 0xA0)) {
11078 unsigned char *
out, *limit;
11083 uint32_t w = *in++;
11084 unsigned int s = 0;
11087 out = mb_convert_buf_add(
out, 0);
11109 }
else if (w == 0xF979) {
11111 }
else if (w == 0xF995) {
11113 }
else if (w == 0xF9E7) {
11115 }
else if (w == 0xF9F1) {
11117 }
else if (w >= 0xFA0C && w <= 0xFA29) {
11130 }
else if (w == 0xFF5E) {
11132 }
else if (w >= 0xFF01 && w <= 0xFF5D) {
11133 s = w - 0xFF01 + 0xA3A1;
11134 }
else if (w >= 0xFFE0 && w <= 0xFFE5) {
11137 }
else if (w >= 0xE000 && w <= 0xE864) {
11141 unsigned int c1 = w - 0xE000;
11142 s = (c1 % 94) + 0xA1;
11144 s |= (c1 + (c1 < 0x06 ? 0xAA : 0xF2)) << 8;
11146 unsigned int c1 = w - 0xE4C6;
11147 s = ((c1 / 96) + 0xA1) << 8;
11149 s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40);
11153 unsigned int k1 = 0, k2 = mbfl_gb18030_pua_tbl_max;
11155 unsigned int k = (k1 + k2) >> 1;
11156 if (w < mbfl_gb18030_pua_tbl[k][0]) {
11158 }
else if (w > mbfl_gb18030_pua_tbl[k][1]) {
11161 s = w - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2];
11170 if (!
s && w >= mbfl_gb18030_c_tbl_key[0] && w <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) {
11171 int i =
mbfl_bisec_srch2(w, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max);
11173 s = mbfl_gb18030_c_tbl_val[i];
11178 if (!
s && w >= 0x80 && w <= 0xFFFF) {
11180 int i = mbfl_bisec_srch(w, mbfl_uni2gb_tbl, mbfl_gb_uni_max);
11182 unsigned int c1 = w - mbfl_gb_uni_ofst[i];
11183 s = (c1 % 10) + 0x30;
11185 s |= ((c1 % 126) + 0x81) << 8;
11187 s |= ((c1 % 10) + 0x30) << 16;
11189 s |= (c1 + 0x81) << 24;
11191 }
else if (w >= 0x10000 && w <= 0x10FFFF) {
11193 unsigned int c1 = w - 0x10000;
11194 s = (c1 % 10) + 0x30;
11196 s |= ((c1 % 126) + 0x81) << 8;
11198 s |= ((c1 % 10) + 0x30) << 16;
11200 s |= (c1 + 0x90) << 24;
11206 }
else if (
s < 0x80) {
11207 out = mb_convert_buf_add(
out,
s);
11208 }
else if (
s > 0xFFFFFF) {
11210 out = mb_convert_buf_add4(
out, (
s >> 24) & 0xFF, (
s >> 16) & 0xFF, (
s >> 8) & 0xFF,
s & 0xFF);
11213 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
11222 int c1, c2, w = -1;
11224 switch (filter->
status) {
11226 if (c >= 0 && c < 0x80) {
11228 }
else if (c == 0x80) {
11230 }
else if (c < 0xff) {
11240 c1 = filter->
cache;
11242 if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) &&
11243 (c >= 0xa1 && c <= 0xfe)) {
11245 w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000;
11247 }
else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) {
11249 w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6;
11253 c2 = (c1 << 8) | c;
11256 (c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) ||
11257 (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) ||
11258 (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844))
11261 for (k = 0; k < mbfl_cp936_pua_tbl_max; k++) {
11262 if (c2 >= mbfl_cp936_pua_tbl[k][2] &&
11263 c2 <= mbfl_cp936_pua_tbl[k][2] +
11264 mbfl_cp936_pua_tbl[k][1] - mbfl_cp936_pua_tbl[k][0]) {
11265 w = c2 - mbfl_cp936_pua_tbl[k][2] + mbfl_cp936_pua_tbl[k][0];
11273 if (c1 < 0xff && c1 > 0x80 && c >= 0x40 && c < 0xff && c != 0x7f) {
11274 w = (c1 - 0x81)*192 + c - 0x40;
11316 }
else if (c == 0x2218) {
11318 }
else if (c == 0x223c) {
11329 }
else if (c >= 0xe000 && c <= 0xe864) {
11333 s = (c1 % 94) + 0xa1; c1 /= 94;
11334 s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8;
11337 s = ((c1 / 96) + 0xa1) << 8; c1 %= 96;
11338 s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40);
11342 k1 = 0; k2 = mbfl_cp936_pua_tbl_max;
11344 k = (k1 + k2) >> 1;
11345 if (c < mbfl_cp936_pua_tbl[k][0]) {
11347 }
else if (c > mbfl_cp936_pua_tbl[k][1]) {
11350 s = c - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2];
11355 }
else if (c == 0xf8f5) {
11368 }
else if (c == 0xff5e) {
11370 }
else if (c >= 0xff01 && c <= 0xff5d) {
11371 s = c - 0xff01 + 0xa3a1;
11372 }
else if (c >= 0xffe0 && c <= 0xffe5) {
11380 }
else if (
s <= 0) {
11386 if (
s <= 0x80 ||
s == 0xff) {
11399static size_t mb_cp936_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
11401 unsigned char *
p = *in, *e =
p + *in_len;
11402 uint32_t *
out =
buf, *limit =
buf + bufsize;
11404 while (
p < e &&
out < limit) {
11405 unsigned char c = *
p++;
11409 }
else if (c == 0x80) {
11411 }
else if (c < 0xFF) {
11417 unsigned char c2 = *
p++;
11418 if (c2 < 0x40 || c2 == 0x7F || c2 == 0xFF) {
11423 if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && c2 >= 0xA1) {
11425 *
out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000;
11426 }
else if (c >= 0xA1 && c <= 0xA7 && c2 < 0xA1) {
11428 *
out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6;
11430 unsigned int w = (c - 0x81)*192 + c2 - 0x40;
11440 }
else if (w >= 0x413A) {
11444 }
else if (w >= 0x5DD0 && w <= 0x5E20) {
11466 unsigned char *
out, *limit;
11471 uint32_t w = *in++;
11472 unsigned int s = 0;
11481 }
else if (w == 0x2218) {
11483 }
else if (w == 0x223C) {
11494 }
else if (w >= 0xE000 && w <= 0xE864) {
11498 unsigned int c1 = w - 0xE000;
11499 s = (c1 % 94) + 0xA1;
11501 s |= (c1 < 0x6 ? c1 + 0xAA : c1 + 0xF2) << 8;
11503 unsigned int c1 = w - 0xE4C6;
11504 s = ((c1 / 96) + 0xA1) << 8;
11506 s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40);
11510 unsigned int k1 = 0;
11511 unsigned int k2 = mbfl_cp936_pua_tbl_max;
11513 int k = (k1 + k2) >> 1;
11514 if (w < mbfl_cp936_pua_tbl[k][0]) {
11516 }
else if (w > mbfl_cp936_pua_tbl[k][1]) {
11519 s = w - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2];
11524 }
else if (w == 0xF8F5) {
11538 }
else if (w == 0xFF5E) {
11540 }
else if (w >= 0xFF01 && w <= 0xFF5D) {
11541 s = w - 0xFF01 + 0xA3A1;
11542 }
else if (w >= 0xFFE0 && w <= 0xFFE5) {
11549 out = mb_convert_buf_add(
out, 0);
11554 }
else if (
s <= 0x80 ||
s == 0xFF) {
11555 out = mb_convert_buf_add(
out,
s);
11557 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
11564static const unsigned short gb18030_2022_pua_tbl3[] = {
11566 0x0000,0xE816,0xE817,0xE818,0x0000,0x0000,0x0000,0x0000,
11567 0x0000,0x9FB4,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
11568 0x0000,0x9FB5,0x0000,0x0000,0x0000,0x0000,0x9FB6,0x9FB7,
11569 0x0000,0x0000,0x0000,0x0000,0xE831,0x9FB8,0x0000,0x0000,
11570 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE83B,0x0000,
11571 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x9FB9,0x0000,
11572 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
11573 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
11574 0x9FBA,0xE855,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
11575 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
11580static size_t mb_gb18030_2022_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
11582 unsigned char *
p = *in, *e =
p + *in_len;
11583 uint32_t *
out =
buf, *limit =
buf + bufsize;
11585 while (
p < e &&
out < limit) {
11586 unsigned char c = *
p++;
11590 }
else if (c == 0x80 || c == 0xFF) {
11597 unsigned char c2 = *
p++;
11599 if (((c >= 0x81 && c <= 0x84) || (c >= 0x90 && c <= 0xE3)) && c2 >= 0x30 && c2 <= 0x39) {
11604 unsigned char c3 = *
p++;
11606 if (c3 >= 0x81 && c3 <= 0xFE &&
p < e) {
11607 unsigned char c4 = *
p++;
11609 if (c4 >= 0x30 && c4 <= 0x39) {
11610 if (c >= 0x90 && c <= 0xE3) {
11611 unsigned int w = ((((c - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c4 - 0x30) + 0x10000;
11615 unsigned int w = (((c - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c4 - 0x30);
11618 }
else if (w == 0x98A6) {
11620 }
else if (w == 0x98A5) {
11622 }
else if (w >= 0x98A7 && w <= 0x98AD) {
11623 *
out++ = w + (0xE790 - 0x98A7);
11624 }
else if (w == 0x1D21) {
11626 }
else if (w == 0x4A71) {
11628 }
else if (w == 0x4A72) {
11630 }
else if (w >= 0x4A73 && w <= 0x4A74) {
11631 *
out++ = w + (0xE82B - 0x4A73);
11632 }
else if (w == 0x4A75) {
11634 }
else if (w == 0x4A76) {
11636 }
else if (w == 0x4A77) {
11638 }
else if (w == 0x4A78) {
11640 }
else if (w <= 0x99FB) {
11641 *
out++ = w + mbfl_gb_uni_ofst[mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max)];
11652 }
else if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && (c2 >= 0xA1 && c2 <= 0xFE)) {
11654 *
out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000;
11655 }
else if (c >= 0xA1 && c <= 0xA7 && c2 >= 0x40 && c2 < 0xA1 && c2 != 0x7F) {
11657 *
out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6;
11658 }
else if (c2 >= 0x40 && c2 != 0x7F && c2 != 0xFF) {
11659 unsigned int w = (c - 0x81)*192 + c2 - 0x40;
11663 if (w != 0x1963 && w != 0x1DBF && (w < 0x1E49 || w > 0x1E55) && w != 0x1E7F) {
11667 }
else if (w >= 0x413A) {
11671 }
else if (w >= 0x5DD0 && w <= 0x5E20) {
11672 unsigned int c = gb18030_2022_pua_tbl3[w - 0x5DD0];
11681 if ((c >= 0x81 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7 && c2 >= 0xA1) || (c >= 0xAA && c <= 0xFE && c2 <= 0xA0)) {
11700 unsigned char *
out, *limit;
11705 uint32_t w = *in++;
11706 unsigned int s = 0;
11709 out = mb_convert_buf_add(
out, 0);
11725 }
else if (w >= 0x9FB4 && w <= 0x9FBB) {
11729 }
else if (w == 0x9FB5) {
11731 }
else if (w == 0x9FB6) {
11733 }
else if (w == 0x9FB7) {
11735 }
else if (w == 0x9FB8) {
11737 }
else if (w == 0x9FB9) {
11739 }
else if (w == 0x9FBA) {
11750 }
else if (w == 0xF979) {
11752 }
else if (w == 0xF995) {
11754 }
else if (w == 0xF9E7) {
11756 }
else if (w == 0xF9F1) {
11758 }
else if (w >= 0xFA0C && w <= 0xFA29) {
11771 }
else if (w == 0xFF5E) {
11773 }
else if (w >= 0xFF01 && w <= 0xFF5D) {
11774 s = w - 0xFF01 + 0xA3A1;
11775 }
else if (w >= 0xFFE0 && w <= 0xFFE5) {
11778 }
else if (w >= 0xE000 && w <= 0xE864) {
11782 unsigned int c1 = w - 0xE000;
11783 s = (c1 % 94) + 0xA1;
11785 s |= (c1 + (c1 < 0x06 ? 0xAA : 0xF2)) << 8;
11787 unsigned int c1 = w - 0xE4C6;
11788 s = ((c1 / 96) + 0xA1) << 8;
11790 s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40);
11794 unsigned int k1 = 0, k2 = mbfl_gb18030_2022_pua_tbl_max;
11796 unsigned int k = (k1 + k2) >> 1;
11797 if (w < mbfl_gb18030_2022_pua_tbl[k][0]) {
11799 }
else if (w > mbfl_gb18030_2022_pua_tbl[k][1]) {
11802 s = w - mbfl_gb18030_2022_pua_tbl[k][0] + mbfl_gb18030_2022_pua_tbl[k][2];
11807 }
else if (w >= 0xFE10 && w <= 0xFE19) {
11811 }
else if (w == 0xFE12) {
11813 }
else if (w <= 0xFE16) {
11814 s = w - (0xFE10 - 0xA6D9);
11815 }
else if (w <= 0xFE18) {
11816 s = w - (0xFE17 - 0xA6EC);
11820 }
else if (w == 0x1E3F) {
11827 if (!
s && w >= mbfl_gb18030_c_tbl_key[0] && w <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) {
11828 int i =
mbfl_bisec_srch2(w, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max);
11830 s = mbfl_gb18030_c_tbl_val[i];
11835 if (!
s && w >= 0x80 && w <= 0xFFFF) {
11837 int i = mbfl_bisec_srch(w, mbfl_uni2gb2022_tbl, mbfl_gb2022_uni_max);
11839 unsigned int c1 = w - mbfl_gb2022_uni_ofst[i];
11840 s = (c1 % 10) + 0x30;
11842 s |= ((c1 % 126) + 0x81) << 8;
11844 s |= ((c1 % 10) + 0x30) << 16;
11846 s |= (c1 + 0x81) << 24;
11848 }
else if (w >= 0x10000 && w <= 0x10FFFF) {
11850 unsigned int c1 = w - 0x10000;
11851 s = (c1 % 10) + 0x30;
11853 s |= ((c1 % 126) + 0x81) << 8;
11855 s |= ((c1 % 10) + 0x30) << 16;
11857 s |= (c1 + 0x90) << 24;
11863 }
else if (
s < 0x80) {
11864 out = mb_convert_buf_add(
out,
s);
11865 }
else if (
s > 0xFFFFFF) {
11867 out = mb_convert_buf_add4(
out, (
s >> 24) & 0xFF, (
s >> 16) & 0xFF, (
s >> 8) & 0xFF,
s & 0xFF);
11870 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
11879static zend_always_inline unsigned char* step_through_gb18030_str(
unsigned char *
p,
unsigned char *limit)
11881 while (
p < limit) {
11882 unsigned char c = *
p;
11883 if (c < 0x81 || c == 0xFF) {
11886 if (limit -
p == 1) {
11889 unsigned char c2 =
p[1];
11891 unsigned int w = (c2 >= 0x30 && c2 <= 0x39) ? 4 : 2;
11892 if (limit -
p < w) {
11904 unsigned char *
start = step_through_gb18030_str(str, str +
from);
11909 return zend_string_init_fast((
const char*)
start,
end -
start);
11911 unsigned char *_end = step_through_gb18030_str(
start,
start +
len);
11912 return zend_string_init_fast((
const char*)
start, _end -
start);
11916static const char *mbfl_encoding_gb18030_aliases[] = {
"gb-18030",
"gb-18030-2000",
NULL};
11923 mbfl_filt_conv_gb18030_wchar,
11924 mbfl_filt_conv_gb18030_wchar_flush,
11933 mbfl_filt_conv_wchar_gb18030,
11942 mbfl_encoding_gb18030_aliases,
11945 &vtbl_gb18030_wchar,
11946 &vtbl_wchar_gb18030,
11947 mb_gb18030_to_wchar,
11948 mb_wchar_to_gb18030,
11953static const char *mbfl_encoding_cp936_aliases[] = {
"CP-936",
"GBK",
NULL};
11960 mbfl_filt_conv_cp936_wchar,
11961 mbfl_filt_conv_cp936_wchar_flush,
11970 mbfl_filt_conv_wchar_cp936,
11979 mbfl_encoding_cp936_aliases,
11980 mblen_table_81_to_fe,
11999 mb_gb18030_2022_to_wchar,
12000 mb_wchar_to_gb18030_2022,
12010static unsigned short cp950_pua_tbl[][4] = {
12011 {0xe000, 0xe310, 0xfa40, 0xfefe},
12012 {0xe311, 0xeeb7, 0x8e40, 0xa0fe},
12013 {0xeeb8, 0xf6b0, 0x8140, 0x8dfe},
12014 {0xf6b1, 0xf70e, 0xc6a1, 0xc6fe},
12015 {0xf70f, 0xf848, 0xc740, 0xc8fe},
12018static inline int is_in_cp950_pua(
int c1,
int c)
12020 if ((c1 >= 0xfa && c1 <= 0xfe) || (c1 >= 0x8e && c1 <= 0xa0) || (c1 >= 0x81 && c1 <= 0x8d) || (c1 >= 0xc7 && c1 <= 0xc8)) {
12021 return (c >= 0x40 && c <= 0x7e) || (c >= 0xa1 && c <= 0xfe);
12022 }
else if (c1 == 0xc6) {
12023 return c >= 0xa1 && c <= 0xfe;
12032 switch (filter->
status) {
12034 if (c >= 0 && c < 0x80) {
12049 c1 = filter->
cache;
12050 if ((c > 0x3f && c < 0x7f) || (c > 0xa0 && c < 0xff)) {
12052 w = (c1 - 0xa1)*157 + (c - 0x40);
12054 w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f;
12056 if (w >= 0 && w < big5_ucs_table_size) {
12057 w = big5_ucs_table[w];
12064 if (is_in_cp950_pua(c1, c)) {
12065 int c2 = (c1 << 8) | c;
12068 for (k = 0; k <
sizeof(cp950_pua_tbl) / (
sizeof(
unsigned short)*4); k++) {
12069 if (c2 >= cp950_pua_tbl[k][2] && c2 <= cp950_pua_tbl[k][3]) {
12074 if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) {
12075 w = 157*(c1 - (cp950_pua_tbl[k][2]>>8)) + c - (c >= 0xa1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0];
12077 w = c2 - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0];
12079 }
else if (c1 == 0xA1) {
12082 }
else if (c == 0x4E) {
12084 }
else if (c == 0x5A) {
12086 }
else if (c == 0xC2) {
12088 }
else if (c == 0xC3) {
12090 }
else if (c == 0xC5) {
12092 }
else if (c == 0xE3) {
12094 }
else if (c == 0xF2) {
12096 }
else if (c == 0xF3) {
12098 }
else if (c == 0xFE) {
12101 }
else if (c1 == 0xA2) {
12104 }
else if (c == 0x41) {
12106 }
else if (c == 0x42) {
12108 }
else if (c == 0x46) {
12110 }
else if (c == 0x47) {
12112 }
else if (c == 0xCC) {
12114 }
else if (c == 0xCE) {
12137 if (filter->
status == 1) {
12154 if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) {
12155 s = ucs_a1_big5_table[c - ucs_a1_big5_table_min];
12156 }
else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) {
12157 s = ucs_a2_big5_table[c - ucs_a2_big5_table_min];
12158 }
else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) {
12159 s = ucs_a3_big5_table[c - ucs_a3_big5_table_min];
12160 }
else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) {
12161 s = ucs_i_big5_table[c - ucs_i_big5_table_min];
12162 }
else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) {
12163 s = ucs_r1_big5_table[c - ucs_r1_big5_table_min];
12164 }
else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) {
12165 s = ucs_r2_big5_table[c - ucs_r2_big5_table_min];
12169 if (c >= 0xe000 && c <= 0xf848) {
12171 for (k = 0; k <
sizeof(cp950_pua_tbl) / (
sizeof(
unsigned short)*4); k++) {
12172 if (c <= cp950_pua_tbl[k][1]) {
12177 int c1 = c - cp950_pua_tbl[k][0];
12178 if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) {
12179 int c2 = cp950_pua_tbl[k][2] >> 8;
12180 s = ((c1 / 157) + c2) << 8;
12182 s |= c1 + (c1 >= 0x3f ? 0x62 : 0x40);
12184 s = c1 + cp950_pua_tbl[k][2];
12186 }
else if (c == 0x00A2) {
12188 }
else if (c == 0x00A3) {
12190 }
else if (c == 0x00AF) {
12192 }
else if (c == 0x02CD) {
12194 }
else if (c == 0x0401) {
12196 }
else if (c >= 0x0414 && c <= 0x041C) {
12198 }
else if (c >= 0x0423 && c <= 0x044F) {
12200 }
else if (c == 0x0451) {
12202 }
else if (c == 0x2022) {
12204 }
else if (c == 0x2027) {
12206 }
else if (c == 0x203E) {
12208 }
else if (c == 0x2215) {
12210 }
else if (c == 0x223C) {
12212 }
else if (c == 0x2295) {
12214 }
else if (c == 0x2299) {
12216 }
else if (c >= 0x2460 && c <= 0x247D) {
12218 }
else if (c == 0x2574) {
12220 }
else if (c == 0x2609) {
12222 }
else if (c == 0x2641) {
12224 }
else if (c == 0x3005 || (c >= 0x302A && c <= 0x30FF)) {
12226 }
else if (c == 0xFE51) {
12228 }
else if (c == 0xFE68) {
12230 }
else if (c == 0xFF3C) {
12232 }
else if (c == 0xFF5E) {
12234 }
else if (c == 0xFF64) {
12236 }
else if (c == 0xFFE0) {
12238 }
else if (c == 0xFFE1) {
12240 }
else if (c == 0xFFE3) {
12242 }
else if (c == 0xFF0F) {
12269static size_t mb_big5_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
12271 unsigned char *
p = *in, *e =
p + *in_len;
12272 uint32_t *
out =
buf, *limit =
buf + bufsize;
12276 while (
p < e &&
out < limit) {
12277 unsigned char c = *
p++;
12281 }
else if (c > 0xA0 && c <= 0xF9) {
12284 unsigned char c2 = *
p++;
12286 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) {
12287 unsigned int w = (c - 0xA1)*157 + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F);
12289 w = big5_ucs_table[w];
12306 if (
p == e &&
out < limit) {
12307 unsigned char c = *
p++;
12311 *in_len = e -
p + 1;
12318 unsigned char *
out, *limit;
12323 uint32_t w = *in++;
12324 unsigned int s = 0;
12326 if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) {
12327 s = ucs_a1_big5_table[w - ucs_a1_big5_table_min];
12328 }
else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) {
12329 s = ucs_a2_big5_table[w - ucs_a2_big5_table_min];
12330 }
else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) {
12331 s = ucs_a3_big5_table[w - ucs_a3_big5_table_min];
12332 }
else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) {
12333 s = ucs_i_big5_table[w - ucs_i_big5_table_min];
12334 }
else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) {
12335 s = ucs_r1_big5_table[w - ucs_r1_big5_table_min];
12336 }
else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) {
12337 s = ucs_r2_big5_table[w - ucs_r2_big5_table_min];
12342 out = mb_convert_buf_add(
out, 0);
12347 }
else if (
s <= 0x80) {
12348 out = mb_convert_buf_add(
out,
s);
12351 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
12358static size_t mb_cp950_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
12360 unsigned char *
p = *in, *e =
p + *in_len;
12361 uint32_t *
out =
buf, *limit =
buf + bufsize;
12363 while (
p < e &&
out < limit) {
12364 unsigned char c = *
p++;
12368 }
else if (c > 0x80 && c <= 0xFE &&
p < e) {
12369 unsigned char c2 = *
p++;
12371 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) {
12372 unsigned int w = ((c - 0xA1)*157) + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F);
12373 w = (w < big5_ucs_table_size) ? big5_ucs_table[w] : 0;
12376 if (is_in_cp950_pua(c, c2)) {
12377 unsigned int s = (c << 8) | c2;
12380 for (k = 0; k <
sizeof(cp950_pua_tbl) / (
sizeof(
unsigned short)*4); k++) {
12381 if (
s >= cp950_pua_tbl[k][2] &&
s <= cp950_pua_tbl[k][3]) {
12386 if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) {
12387 w = 157*(c - (cp950_pua_tbl[k][2] >> 8)) + c2 - (c2 >= 0xA1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0];
12389 w =
s - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0];
12391 }
else if (c == 0xA1) {
12394 }
else if (c2 == 0x4E) {
12396 }
else if (c2 == 0x5A) {
12398 }
else if (c2 == 0xC2) {
12400 }
else if (c2 == 0xC3) {
12402 }
else if (c2 == 0xC5) {
12404 }
else if (c2 == 0xE3) {
12406 }
else if (c2 == 0xF2) {
12408 }
else if (c2 == 0xF3) {
12410 }
else if (c2 == 0xFE) {
12413 }
else if (c == 0xA2) {
12416 }
else if (c2 == 0x41) {
12418 }
else if (c2 == 0x42) {
12420 }
else if (c2 == 0x46) {
12422 }
else if (c2 == 0x47) {
12424 }
else if (c2 == 0xCC) {
12426 }
else if (c2 == 0xCE) {
12449 unsigned char *
out, *limit;
12454 uint32_t w = *in++;
12455 unsigned int s = 0;
12457 if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) {
12458 s = ucs_a1_big5_table[w - ucs_a1_big5_table_min];
12459 }
else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) {
12460 s = ucs_a2_big5_table[w - ucs_a2_big5_table_min];
12461 }
else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) {
12462 s = ucs_a3_big5_table[w - ucs_a3_big5_table_min];
12463 }
else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) {
12464 s = ucs_i_big5_table[w - ucs_i_big5_table_min];
12465 }
else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) {
12466 s = ucs_r1_big5_table[w - ucs_r1_big5_table_min];
12467 }
else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) {
12468 s = ucs_r2_big5_table[w - ucs_r2_big5_table_min];
12471 if (w >= 0xE000 && w <= 0xF848) {
12473 for (k = 0; k <
sizeof(cp950_pua_tbl) / (
sizeof(
unsigned short)*4); k++) {
12474 if (w <= cp950_pua_tbl[k][1]) {
12479 int c1 = w - cp950_pua_tbl[k][0];
12480 if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) {
12481 int c2 = cp950_pua_tbl[k][2] >> 8;
12482 s = ((c1 / 157) + c2) << 8;
12484 s |= c1 + (c1 >= 0x3F ? 0x62 : 0x40);
12486 s = c1 + cp950_pua_tbl[k][2];
12488 }
else if (w == 0xA2 || w == 0xA3 || w == 0x401 || (w >= 0x414 && w <= 0x41C) || (w >= 0x423 && w <= 0x44F) || w == 0x451 || w == 0x2022 || w == 0x203E || w == 0x223C || (w >= 0x2460 && w <= 0x247D) || w == 0x2609 || w == 0x2641 || w == 0x3005 || (w >= 0x302A && w <= 0x30FF) || w == 0xFF64) {
12490 }
else if (w == 0xAF) {
12492 }
else if (w == 0x2CD) {
12494 }
else if (w == 0x2027) {
12496 }
else if (w == 0x2215) {
12498 }
else if (w == 0x2295) {
12500 }
else if (w == 0x2299) {
12502 }
else if (w == 0x2574) {
12504 }
else if (w == 0xFE51) {
12506 }
else if (w == 0xFE68) {
12508 }
else if (w == 0xFF3C) {
12510 }
else if (w == 0xFF5E) {
12512 }
else if (w == 0xFFE0) {
12514 }
else if (w == 0xFFE1) {
12516 }
else if (w == 0xFFE3) {
12518 }
else if (w == 0xFF0F) {
12524 out = mb_convert_buf_add(
out, 0);
12529 }
else if (
s <= 0x80) {
12530 out = mb_convert_buf_add(
out,
s);
12533 out = mb_convert_buf_add2(
out, (
s >> 8) & 0xFF,
s & 0xFF);
12540static const char *mbfl_encoding_big5_aliases[] = {
"CN-BIG5",
"BIG-FIVE",
"BIGFIVE",
NULL};
12547 mbfl_filt_conv_big5_wchar,
12548 mbfl_filt_conv_big5_wchar_flush,
12557 mbfl_filt_conv_wchar_big5,
12566 mbfl_encoding_big5_aliases,
12567 mblen_table_81_to_fe,
12582 mbfl_filt_conv_big5_wchar,
12583 mbfl_filt_conv_big5_wchar_flush,
12592 mbfl_filt_conv_wchar_big5,
12602 mblen_table_81_to_fe,
12620 switch (filter->
status & 0xf) {
12626 }
else if (filter->
status == 0x10 && ((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77))) {
12630 }
else if (filter->
status == 0 && c >= 0 && c < 0x80) {
12640 c1 = filter->
cache;
12641 if (c1 > 0x20 && c1 < 0x7F && c > 0x20 && c < 0x7F) {
12642 s = (c1 - 1)*192 + c + 0x40;
12646 }
else if (
s == 0x186A) {
12648 }
else if (
s == 0x186C) {
12650 }
else if ((
s >= 0x1920 &&
s <= 0x192A) ||
s == 0x1963 || (
s >= 0x1C60 &&
s <= 0x1C7F) || (
s >= 0x1DBB &&
s <= 0x1DC4)) {
12668 if (c ==
'}' && filter->
status == 0x12) {
12670 }
else if (c ==
'{' && filter->
status == 2) {
12672 }
else if (c ==
'~' && filter->
status == 2) {
12675 }
else if (c ==
'\n') {
12693 if (filter->
status == 0x11) {
12712 if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261 || c == 0x2CA || c == 0x2CB || c == 0x2D9) {
12720 }
else if (c == 0x2010 || c == 0x2013 || c == 0x2014 || c == 0x2016 || c == 0x2025 || c == 0x2035 ||
12721 c == 0x2105 || c == 0x2109 || c == 0x2121 || (c >= 0x2170 && c <= 0x2179) || (c >= 0x2196 && c <= 0x2199) ||
12722 c == 0x2215 || c == 0x221F || c == 0x2223 || c == 0x2252 || c == 0x2266 || c == 0x2267 || c == 0x2295 ||
12723 (c >= 0x2550 && c <= 0x2573) || c == 0x22BF || c == 0x2609 || (c >= 0x2581 && c <= 0x258F) ||
12724 (c >= 0x2593 && c <= 0x2595) || c == 0x25BC || c == 0x25BD || (c >= 0x25E2 && c <= 0x25E5)) {
12732 }
else if (c == 0x3006 || c == 0x3007 || c == 0x3012 || c == 0x3231 || c == 0x32A3 || c >= 0x3300 ||
12733 (c >= 0x3018 && c <= 0x3040) || (c >= 0x309B && c <= 0x309E) || (c >= 0x30FC && c <= 0x30FE)) {
12743 }
else if (c == 0xFF5E) {
12745 }
else if (c >= 0xFF01 && c <= 0xFF5D) {
12746 s = c - 0xFF01 + 0xA3A1;
12747 }
else if (c == 0xFFE0 || c == 0xFFE1 || c == 0xFFE3 || c == 0xFFE5) {
12757 s = (c == 0) ? 0 : -1;
12758 }
else if ((
s >= 0x80 &&
s < 0x2121) ||
s > 0x8080) {
12764 if ((filter->
status & 0xff00) != 0) {
12774 if ((filter->
status & 0xFF00) != 0x200) {
12792 if (filter->
status & 0xFF00) {
12803static size_t mb_hz_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
12805 unsigned char *
p = *in, *e =
p + *in_len;
12806 uint32_t *
out =
buf, *limit =
buf + bufsize;
12808 while (
p < e &&
out < limit) {
12809 unsigned char c = *
p++;
12815 unsigned char c2 = *
p++;
12823 }
else if (c2 ==
'\n') {
12829 }
else if (((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77)) &&
p < e && *
state ==
GB2312) {
12830 unsigned char c2 = *
p++;
12832 if (c > 0x20 && c < 0x7F && c2 > 0x20 && c2 < 0x7F) {
12833 unsigned int s = (c - 1)*192 + c2 + 0x40;
12838 }
else if (
s == 0x186A) {
12840 }
else if (
s == 0x186C) {
12842 }
else if ((
s >= 0x1920 &&
s <= 0x192A) ||
s == 0x1963 || (
s >= 0x1C60 &&
s <= 0x1C7F) || (
s >= 0x1DBB &&
s <= 0x1DC4)) {
12867 unsigned char *
out, *limit;
12872 uint32_t w = *in++;
12873 unsigned int s = 0;
12876 if (w == 0xB7 || w == 0x144 || w == 0x148 || w == 0x251 || w == 0x261 || w == 0x2CA || w == 0x2CB || w == 0x2D9) {
12884 }
else if (w == 0x2010 || w == 0x2013 || w == 0x2014 || w == 0x2016 || w == 0x2025 || w == 0x2035 || w == 0x2105 || w == 0x2109 || w == 0x2121 || (w >= 0x2170 && w <= 0x2179) || (w >= 0x2196 && w <= 0x2199) || w == 0x2215 || w == 0x221F || w == 0x2223 || w == 0x2252 || w == 0x2266 || w == 0x2267 || w == 0x2295 || (w >= 0x2550 && w <= 0x2573) || w == 0x22BF || w == 0x2609 || (w >= 0x2581 && w <= 0x258F) || (w >= 0x2593 && w <= 0x2595) || w == 0x25BC || w == 0x25BD || (w >= 0x25E2 && w <= 0x25E5)) {
12892 }
else if (w == 0x3006 || w == 0x3007 || w == 0x3012 || w == 0x3231 || w == 0x32A3 || w >= 0x3300 || (w >= 0x3018 && w <= 0x3040) || (w >= 0x309B && w <= 0x309E) || (w >= 0x30FC && w <= 0x30FE)) {
12902 }
else if (w == 0xFF5E) {
12904 }
else if (w >= 0xFF01 && w <= 0xFF5D) {
12905 s = w - 0xFF01 + 0xA3A1;
12906 }
else if (w == 0xFFE0 || w == 0xFFE1 || w == 0xFFE3 || w == 0xFFE5) {
12913 if ((!
s && w) || (
s >= 0x80 &&
s < 0x2121)) {
12916 }
else if (
s < 0x80) {
12920 out = mb_convert_buf_add2(
out,
'~',
'}');
12925 out = mb_convert_buf_add2(
out,
'~',
'~');
12927 out = mb_convert_buf_add(
out,
s);
12933 out = mb_convert_buf_add2(
out,
'~',
'{');
12938 out = mb_convert_buf_add2(
out, (
s >> 8) & 0x7F,
s & 0x7F);
12945 out = mb_convert_buf_add2(
out,
'~',
'}');
12956 mbfl_filt_conv_hz_wchar,
12957 mbfl_filt_conv_hz_wchar_flush,
12966 mbfl_filt_conv_wchar_hz,
12967 mbfl_filt_conv_any_hz_flush,
zend_ffi_ctype_name_buf buf
uint32_t mb_convert_kana_codepoint(uint32_t c, uint32_t next, bool *consumed, uint32_t *second, unsigned int mode)
const mbfl_encoding mbfl_encoding_hz
int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
#define EMIT_KEYPAD_EMOJI(c)
const mbfl_encoding mbfl_encoding_sjis
#define JISX_0201_KANA_SO
const mbfl_encoding mbfl_encoding_sjiswin
int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
#define CODE2JIS(c1, c2, s1, s2)
const mbfl_encoding mbfl_encoding_cp932
const mbfl_encoding mbfl_encoding_uhc
const mbfl_encoding mbfl_encoding_euc_kr
const mbfl_encoding mbfl_encoding_cp50221
const mbfl_encoding mbfl_encoding_gb18030_2022
const mbfl_encoding mbfl_encoding_sjis2004
const mbfl_encoding mbfl_encoding_sjis_docomo
int mbfilter_unicode2sjis_emoji_kddi_sjis(int c, int *s1, mbfl_convert_filter *filter)
const mbfl_encoding mbfl_encoding_euc_cn
const mbfl_encoding mbfl_encoding_cp50220
const mbfl_encoding mbfl_encoding_big5
const mbfl_encoding mbfl_encoding_cp50222
const mbfl_encoding mbfl_encoding_2022jpms
const mbfl_encoding mbfl_encoding_cp51932
const mbfl_encoding mbfl_encoding_2022jp_2004
#define EMIT_FLAG_EMOJI(country)
int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n)
const mbfl_encoding mbfl_encoding_sjis_sb
#define sjistoidx(c1, c2)
const mbfl_encoding mbfl_encoding_cp936
const mbfl_encoding mbfl_encoding_gb18030
const mbfl_encoding mbfl_encoding_eucjp2004
const mbfl_encoding mbfl_encoding_jis
const mbfl_encoding mbfl_encoding_2022jp
const mbfl_encoding mbfl_encoding_2022jp_kddi
#define EMITTED_ESC_SEQUENCE
const mbfl_encoding mbfl_encoding_eucjp_win
int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
#define DOCOMO_KEYPAD_HASH
const mbfl_encoding mbfl_encoding_euc_tw
const mbfl_encoding mbfl_encoding_cp950
int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
int mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
const mbfl_encoding mbfl_encoding_euc_jp
const mbfl_encoding mbfl_encoding_sjis_kddi
#define SJIS_ENCODE(c1, c2, s1, s2)
const mbfl_encoding mbfl_encoding_2022kr
#define SJIS_DECODE(c1, c2, s1, s2)
const mbfl_encoding mbfl_encoding_sjis_mac
#define MBFL_ENCTYPE_GL_UNSAFE
int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
struct _mbfl_convert_filter mbfl_convert_filter
@ mbfl_no_encoding_2022jp_kddi
@ mbfl_no_encoding_2022jp
@ mbfl_no_encoding_cp50220
@ mbfl_no_encoding_eucjp2004
@ mbfl_no_encoding_gb18030
@ mbfl_no_encoding_cp50222
@ mbfl_no_encoding_cp51932
@ mbfl_no_encoding_sjis_docomo
@ mbfl_no_encoding_euc_kr
@ mbfl_no_encoding_euc_cn
@ mbfl_no_encoding_2022kr
@ mbfl_no_encoding_euc_tw
@ mbfl_no_encoding_2022jpms
@ mbfl_no_encoding_cp50221
@ mbfl_no_encoding_euc_jp
@ mbfl_no_encoding_sjiswin
@ mbfl_no_encoding_sjis2004
@ mbfl_no_encoding_gb18030_2022
@ mbfl_no_encoding_sjis_kddi
@ mbfl_no_encoding_sjis_mac
@ mbfl_no_encoding_2022jp_2004
@ mbfl_no_encoding_eucjp_win
@ mbfl_no_encoding_sjis_sb
#define MB_CONVERT_BUF_STORE(buf, _out, _limit)
#define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed)
#define MB_CONVERT_ERROR(buf, out, limit, bad_cp, conv_fn)
#define MB_CONVERT_BUF_LOAD(buf, _out, _limit)
unsigned const char * end
const mbfl_encoding * from
output_function_t output_function
int(* filter_function)(int c, mbfl_convert_filter *filter)
flush_function_t flush_function
enum mbfl_no_encoding from
enum mbfl_no_encoding no_encoding
#define MBFL_ZEN2HAN_HIRAGANA
#define MBFL_HAN2ZEN_ALPHA
#define MBFL_HAN2ZEN_GLUE
#define MBFL_ZEN2HAN_SPACE
#define MBFL_ZENKAKU_KATA2HIRA
#define MBFL_ZEN2HAN_ALPHA
#define MBFL_HAN2ZEN_SPECIAL
#define MBFL_HAN2ZEN_NUMERIC
#define MBFL_HAN2ZEN_KATAKANA
#define MBFL_ZEN2HAN_NUMERIC
#define MBFL_ZEN2HAN_SPECIAL
#define MBFL_HAN2ZEN_HIRAGANA
#define MBFL_HAN2ZEN_SPACE
#define MBFL_ZENKAKU_HIRA2KATA
#define MBFL_ZEN2HAN_KATAKANA
const unsigned short cp932ext2_ucs_table[]
const int cp932ext2_ucs_table_min
const unsigned short cp932ext3_ucs_table_paired_sorted[][2]
const int cp932ext1_ucs_table_max
const unsigned short cp932ext1_ucs_table_paired_sorted[][2]
const unsigned short cp932ext3_ucs_table[]
const unsigned short cp932ext1_ucs_table[]
const int cp932ext3_ucs_table_max
const int cp932ext3_ucs_table_min
const int cp932ext2_ucs_table_max
const int cp932ext1_ucs_table_min
const unsigned short ucs_hff_s_cp936_table[]
const unsigned short ucs_ci_s_cp936_table[]
const int ucs_a3_cp936_table_max
const unsigned short cp936_ucs_table[]
const unsigned short cp936_pua_tbl2[]
const int ucs_ci_cp936_table_min
const unsigned short cp936_pua_tbl3[]
const int ucs_hff_cp936_table_max
const int ucs_i_cp936_table_min
const int ucs_a1_cp936_table_min
const int ucs_a2_cp936_table_min
const unsigned short ucs_a1_cp936_table[]
const int ucs_a1_cp936_table_max
const unsigned short ucs_a2_cp936_table[]
const unsigned short cp936_pua_tbl1[]
const int ucs_cf_cp936_table_max
const unsigned short ucs_cf_cp936_table[]
const int ucs_hff_cp936_table_min
const int ucs_sfv_cp936_table_min
const int ucs_cf_cp936_table_min
const unsigned short ucs_i_cp936_table[]
const int cp936_ucs_table_size
const unsigned short ucs_a3_cp936_table[]
const unsigned short ucs_sfv_cp936_table[]
const int ucs_a2_cp936_table_max
const int ucs_a3_cp936_table_min
const int ucs_ci_cp936_table_max
const int ucs_sfv_cp936_table_max
const int ucs_i_cp936_table_max
const unsigned short gb18030_2022_pua_tbl1[]
const unsigned short ucs_i_gb2312_table[]
const int ucs_i_gb2312_table_min
const int ucs_i_gb2312_table_max
const int ucs_i_jis_table_min
const int ucs_a1_jis_table_min
const unsigned short ucs_i_jis_table[]
const unsigned short ucs_r_jis_table[]
const int ucs_a1_jis_table_max
const unsigned short ucs_a2_jis_table[]
const unsigned short ucs_a1_jis_table[]
const int ucs_a2_jis_table_min
const unsigned short jisx0208_ucs_table[]
const int ucs_a2_jis_table_max
const unsigned short jisx0212_ucs_table[]
const int jisx0212_ucs_table_size
const int ucs_i_jis_table_max
const int jisx0208_ucs_table_size
const unsigned short ucs_i_uhc_table[]
const int ucs_r1_uhc_table_min
const int ucs_a2_uhc_table_max
const int ucs_a1_uhc_table_max
const int ucs_a3_uhc_table_min
const unsigned short uhc1_ucs_table[]
const unsigned short ucs_r2_uhc_table[]
const int ucs_r2_uhc_table_min
const int ucs_a3_uhc_table_max
const int ucs_i_uhc_table_min
const unsigned short ucs_s_uhc_table[]
const int ucs_s_uhc_table_min
const unsigned short ucs_r1_uhc_table[]
const int ucs_a2_uhc_table_min
const unsigned short uhc3_ucs_table[]
const int ucs_s_uhc_table_max
const unsigned short ucs_a2_uhc_table[]
const int ucs_i_uhc_table_max
const unsigned short ucs_a3_uhc_table[]
const int ucs_r2_uhc_table_max
const int ucs_a1_uhc_table_min
const int ucs_r1_uhc_table_max
const unsigned short ucs_a1_uhc_table[]
const int uhc1_ucs_table_size
const int uhc3_ucs_table_size
struct _zend_string zend_string
#define zend_always_inline
#define EMPTY_SWITCH_DEFAULT_CASE()