php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
mbfilter_utf8.c
Go to the documentation of this file.
1/*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24/*
25 * The source code included in this files was separated from mbfilter.c
26 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
27 *
28 */
29
30#include "mbfilter.h"
31#include "mbfilter_utf8.h"
32#include "mbfilter_cjk.h"
33#include "emoji2uni.h"
34
35const unsigned char mblen_table_utf8[] = {
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
51 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
52};
53
54extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);
56
57static int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter);
58static int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter);
59
60static int mbfl_filt_conv_utf8_wchar(int c, mbfl_convert_filter *filter);
61static int mbfl_filt_conv_wchar_utf8(int c, mbfl_convert_filter *filter);
62static int mbfl_filt_conv_utf8_wchar_flush(mbfl_convert_filter *filter);
63
64static size_t mb_utf8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
65static void mb_wchar_to_utf8(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
66static zend_string* mb_cut_utf8(unsigned char *str, size_t from, size_t len, unsigned char *end);
67
68static size_t mb_utf8_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
69static void mb_wchar_to_utf8_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
70static size_t mb_utf8_kddi_a_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
71static void mb_wchar_to_utf8_kddi_a(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
72static size_t mb_utf8_kddi_b_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
73static void mb_wchar_to_utf8_kddi_b(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
74static size_t mb_utf8_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
75static void mb_wchar_to_utf8_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
76
77static bool mbfilter_conv_map_tbl(int c, int *w, unsigned int n, const unsigned short map[/* n */][3])
78{
79 for (unsigned int i = 0; i < n; i++) {
80 if (map[i][0] <= c && c <= map[i][1]) {
81 *w = c - map[i][0] + map[i][2];
82 return true;
83 }
84 }
85 return false;
86}
87
88static bool mbfilter_conv_r_map_tbl(int c, int *w, unsigned int n, const unsigned short map[/* n */][3])
89{
90 /* Convert in reverse direction */
91 for (unsigned int i = 0; i < n; i++) {
92 if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) {
93 *w = c + map[i][0] - map[i][2];
94 return true;
95 }
96 }
97 return false;
98}
99
100static const unsigned short mbfl_docomo2uni_pua[4][3] = {
101 {0x28c2, 0x292f, 0xe63e},
102 {0x2930, 0x2934, 0xe6ac},
103 {0x2935, 0x2951, 0xe6b1},
104 {0x2952, 0x29db, 0xe6ce},
105};
106
107static const unsigned short mbfl_kddi2uni_pua[7][3] = {
108 {0x26ec, 0x2838, 0xe468},
109 {0x284c, 0x2863, 0xe5b5},
110 {0x24b8, 0x24ca, 0xe5cd},
111 {0x24cb, 0x2545, 0xea80},
112 {0x2839, 0x284b, 0xeafb},
113 {0x2546, 0x25c0, 0xeb0e},
114 {0x25c1, 0x25c6, 0xeb89},
115};
116
117static const unsigned short mbfl_kddi2uni_pua_b[8][3] = {
118 {0x24b8, 0x24f6, 0xec40},
119 {0x24f7, 0x2573, 0xec80},
120 {0x2574, 0x25b2, 0xed40},
121 {0x25b3, 0x25c6, 0xed80},
122 {0x26ec, 0x272a, 0xef40},
123 {0x272b, 0x27a7, 0xef80},
124 {0x27a8, 0x27e6, 0xf040},
125 {0x27e7, 0x2863, 0xf080},
126};
127
128static const unsigned short mbfl_sb2uni_pua[6][3] = {
129 {0x27a9, 0x2802, 0xe101},
130 {0x2808, 0x2861, 0xe201},
131 {0x2921, 0x297a, 0xe001},
132 {0x2980, 0x29cc, 0xe301},
133 {0x2a99, 0x2ae4, 0xe401},
134 {0x2af8, 0x2b35, 0xe501},
135};
136
137static const char *mbfl_encoding_utf8_aliases[] = {"utf8", NULL};
138
141 "UTF-8",
142 "UTF-8",
143 mbfl_encoding_utf8_aliases,
145 0,
148 mb_utf8_to_wchar,
149 mb_wchar_to_utf8,
150 NULL,
151 mb_cut_utf8
152};
153
158 NULL,
159 mbfl_filt_conv_utf8_wchar,
160 mbfl_filt_conv_utf8_wchar_flush,
161 NULL,
162};
163
173
174static const char *mbfl_encoding_utf8_docomo_aliases[] = {"UTF-8-DOCOMO", "UTF8-DOCOMO", NULL};
175static const char *mbfl_encoding_utf8_kddi_b_aliases[] = {"UTF-8-Mobile#KDDI", "UTF-8-KDDI", "UTF8-KDDI", NULL};
176static const char *mbfl_encoding_utf8_sb_aliases[] = {"UTF-8-SOFTBANK", "UTF8-SOFTBANK", NULL};
177
180 "UTF-8-Mobile#DOCOMO",
181 "UTF-8",
182 mbfl_encoding_utf8_docomo_aliases,
184 0,
187 mb_utf8_docomo_to_wchar,
188 mb_wchar_to_utf8_docomo,
189 NULL,
190 mb_cut_utf8,
191};
192
195 "UTF-8-Mobile#KDDI-A",
196 "UTF-8",
197 NULL,
199 0,
202 mb_utf8_kddi_a_to_wchar,
203 mb_wchar_to_utf8_kddi_a,
204 NULL,
205 mb_cut_utf8,
206};
207
210 "UTF-8-Mobile#KDDI-B",
211 "UTF-8",
212 mbfl_encoding_utf8_kddi_b_aliases,
214 0,
217 mb_utf8_kddi_b_to_wchar,
218 mb_wchar_to_utf8_kddi_b,
219 NULL,
220 mb_cut_utf8,
221};
222
225 "UTF-8-Mobile#SOFTBANK",
226 "UTF-8",
227 mbfl_encoding_utf8_sb_aliases,
229 0,
232 mb_utf8_sb_to_wchar,
233 mb_wchar_to_utf8_sb,
234 NULL,
235 mb_cut_utf8,
236};
237
242 NULL,
243 mbfl_filt_conv_utf8_mobile_wchar,
244 mbfl_filt_conv_utf8_wchar_flush,
245 NULL,
246};
247
257
262 NULL,
263 mbfl_filt_conv_utf8_mobile_wchar,
264 mbfl_filt_conv_utf8_wchar_flush,
265 NULL,
266};
267
277
282 NULL,
283 mbfl_filt_conv_utf8_mobile_wchar,
284 mbfl_filt_conv_utf8_wchar_flush,
285 NULL,
286};
287
297
302 NULL,
303 mbfl_filt_conv_utf8_mobile_wchar,
304 mbfl_filt_conv_utf8_wchar_flush,
305 NULL,
306};
307
317
318#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
319
320static int mbfl_filt_put_invalid_char(mbfl_convert_filter *filter)
321{
322 filter->status = filter->cache = 0;
323 CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));
324 return 0;
325}
326
327static int mbfl_filt_conv_utf8_wchar(int c, mbfl_convert_filter *filter)
328{
329 int s, c1;
330
331retry:
332 switch (filter->status) {
333 case 0x00:
334 if (c < 0x80) {
335 CK((*filter->output_function)(c, filter->data));
336 } else if (c >= 0xc2 && c <= 0xdf) { /* 2byte code first char: 0xc2-0xdf */
337 filter->status = 0x10;
338 filter->cache = c & 0x1f;
339 } else if (c >= 0xe0 && c <= 0xef) { /* 3byte code first char: 0xe0-0xef */
340 filter->status = 0x20;
341 filter->cache = c & 0xf;
342 } else if (c >= 0xf0 && c <= 0xf4) { /* 3byte code first char: 0xf0-0xf4 */
343 filter->status = 0x30;
344 filter->cache = c & 0x7;
345 } else {
346 CK(mbfl_filt_put_invalid_char(filter));
347 }
348 break;
349 case 0x10: /* 2byte code 2nd char: 0x80-0xbf */
350 case 0x21: /* 3byte code 3rd char: 0x80-0xbf */
351 case 0x32: /* 4byte code 4th char: 0x80-0xbf */
352 if (c >= 0x80 && c <= 0xbf) {
353 s = (filter->cache<<6) | (c & 0x3f);
354 filter->status = filter->cache = 0;
355 CK((*filter->output_function)(s, filter->data));
356 } else {
357 CK(mbfl_filt_put_invalid_char(filter));
358 goto retry;
359 }
360 break;
361 case 0x20: /* 3byte code 2nd char: 0:0xa0-0xbf,D:0x80-9F,1-C,E-F:0x80-0x9f */
362 s = (filter->cache<<6) | (c & 0x3f);
363 c1 = filter->cache & 0xf;
364
365 if ((c >= 0x80 && c <= 0xbf) &&
366 ((c1 == 0x0 && c >= 0xa0) ||
367 (c1 == 0xd && c < 0xa0) ||
368 (c1 > 0x0 && c1 != 0xd))) {
369 filter->cache = s;
370 filter->status++;
371 } else {
372 CK(mbfl_filt_put_invalid_char(filter));
373 goto retry;
374 }
375 break;
376 case 0x30: /* 4byte code 2nd char: 0:0x90-0xbf,1-3:0x80-0xbf,4:0x80-0x8f */
377 s = (filter->cache<<6) | (c & 0x3f);
378 c1 = filter->cache & 0x7;
379
380 if ((c >= 0x80 && c <= 0xbf) &&
381 ((c1 == 0x0 && c >= 0x90) ||
382 (c1 == 0x4 && c < 0x90) ||
383 (c1 > 0x0 && c1 != 0x4))) {
384 filter->cache = s;
385 filter->status++;
386 } else {
387 CK(mbfl_filt_put_invalid_char(filter));
388 goto retry;
389 }
390 break;
391 case 0x31: /* 4byte code 3rd char: 0x80-0xbf */
392 if (c >= 0x80 && c <= 0xbf) {
393 filter->cache = (filter->cache<<6) | (c & 0x3f);
394 filter->status++;
395 } else {
396 CK(mbfl_filt_put_invalid_char(filter));
397 goto retry;
398 }
399 break;
400
402 }
403
404 return 0;
405}
406
407static int mbfl_filt_conv_utf8_wchar_flush(mbfl_convert_filter *filter)
408{
409 if (filter->status) {
410 (*filter->output_function)(MBFL_BAD_INPUT, filter->data);
411 filter->status = 0;
412 }
413
414 if (filter->flush_function) {
415 (*filter->flush_function)(filter->data);
416 }
417
418 return 0;
419}
420
421static int mbfl_filt_conv_wchar_utf8(int c, mbfl_convert_filter *filter)
422{
423 if (c >= 0 && c < 0x110000) {
424 if (c < 0x80) {
425 CK((*filter->output_function)(c, filter->data));
426 } else if (c < 0x800) {
427 CK((*filter->output_function)(((c >> 6) & 0x1f) | 0xc0, filter->data));
428 CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
429 } else if (c < 0x10000) {
430 CK((*filter->output_function)(((c >> 12) & 0x0f) | 0xe0, filter->data));
431 CK((*filter->output_function)(((c >> 6) & 0x3f) | 0x80, filter->data));
432 CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
433 } else {
434 CK((*filter->output_function)(((c >> 18) & 0x07) | 0xf0, filter->data));
435 CK((*filter->output_function)(((c >> 12) & 0x3f) | 0x80, filter->data));
436 CK((*filter->output_function)(((c >> 6) & 0x3f) | 0x80, filter->data));
437 CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
438 }
439 } else {
441 }
442
443 return 0;
444}
445
446static size_t mb_utf8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
447{
448 unsigned char *p = *in, *e = p + *in_len;
449 uint32_t *out = buf, *limit = buf + bufsize;
450
451 while (p < e && out < limit) {
452 unsigned char c = *p++;
453
454 if (c < 0x80) {
455 *out++ = c;
456 } else if (c < 0xC2) {
457 *out++ = MBFL_BAD_INPUT;
458 } else if (c <= 0xDF) { /* 2 byte character */
459 if (p < e) {
460 unsigned char c2 = *p++;
461 if ((c2 & 0xC0) != 0x80) {
462 *out++ = MBFL_BAD_INPUT;
463 p--;
464 } else {
465 *out++ = ((c & 0x1F) << 6) | (c2 & 0x3F);
466 }
467 } else {
468 *out++ = MBFL_BAD_INPUT;
469 }
470 } else if (c <= 0xEF) { /* 3 byte character */
471 if ((e - p) >= 2) {
472 unsigned char c2 = *p++;
473 unsigned char c3 = *p++;
474 if ((c2 & 0xC0) != 0x80 || (c == 0xE0 && c2 < 0xA0) || (c == 0xED && c2 >= 0xA0)) {
475 *out++ = MBFL_BAD_INPUT;
476 p -= 2;
477 } else if ((c3 & 0xC0) != 0x80) {
478 *out++ = MBFL_BAD_INPUT;
479 p--;
480 } else {
481 uint32_t decoded = ((c & 0xF) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
482 ZEND_ASSERT(decoded >= 0x800); /* Not an overlong code unit */
483 ZEND_ASSERT(decoded < 0xD800 || decoded > 0xDFFF); /* U+D800-DFFF are reserved, illegal code points */
484 *out++ = decoded;
485 }
486 } else {
487 *out++ = MBFL_BAD_INPUT;
488 if (p < e && (c != 0xE0 || *p >= 0xA0) && (c != 0xED || *p < 0xA0) && (*p & 0xC0) == 0x80) {
489 p++;
490 if (p < e && (*p & 0xC0) == 0x80) {
491 p++;
492 }
493 }
494 }
495 } else if (c <= 0xF4) { /* 4 byte character */
496 if ((e - p) >= 3) {
497 unsigned char c2 = *p++;
498 unsigned char c3 = *p++;
499 unsigned char c4 = *p++;
500 /* If c == 0xF0 and c2 < 0x90, then this is an over-long code unit; it could have
501 * fit in 3 bytes only. If c == 0xF4 and c2 >= 0x90, then this codepoint is
502 * greater than U+10FFFF, which is the highest legal codepoint */
503 if ((c2 & 0xC0) != 0x80 || (c == 0xF0 && c2 < 0x90) || (c == 0xF4 && c2 >= 0x90)) {
504 *out++ = MBFL_BAD_INPUT;
505 p -= 3;
506 } else if ((c3 & 0xC0) != 0x80) {
507 *out++ = MBFL_BAD_INPUT;
508 p -= 2;
509 } else if ((c4 & 0xC0) != 0x80) {
510 *out++ = MBFL_BAD_INPUT;
511 p--;
512 } else {
513 uint32_t decoded = ((c & 0x7) << 18) | ((c2 & 0x3F) << 12) | ((c3 & 0x3F) << 6) | (c4 & 0x3F);
514 ZEND_ASSERT(decoded >= 0x10000); /* Not an overlong code unit */
515 *out++ = decoded;
516 }
517 } else {
518 *out++ = MBFL_BAD_INPUT;
519 if (p < e) {
520 unsigned char c2 = *p;
521 if ((c == 0xF0 && c2 >= 0x90) || (c == 0xF4 && c2 < 0x90) || (c >= 0xF1 && c <= 0xF3)) {
522 while (p < e && (*p & 0xC0) == 0x80) {
523 p++;
524 }
525 }
526 }
527 }
528 } else {
529 *out++ = MBFL_BAD_INPUT;
530 }
531 }
532
533 *in_len = e - p;
534 *in = p;
535 return out - buf;
536}
537
538static void mb_wchar_to_utf8(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
539{
540 unsigned char *out, *limit;
541 MB_CONVERT_BUF_LOAD(buf, out, limit);
543
544 while (len--) {
545 uint32_t w = *in++;
546 if (w < 0x80) {
547 out = mb_convert_buf_add(out, w & 0xFF);
548 } else if (w < 0x800) {
549 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
550 out = mb_convert_buf_add2(out, ((w >> 6) & 0x1F) | 0xC0, (w & 0x3F) | 0x80);
551 } else if (w < 0x10000) {
552 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
553 out = mb_convert_buf_add3(out, ((w >> 12) & 0xF) | 0xE0, ((w >> 6) & 0x3F) | 0x80, (w & 0x3F) | 0x80);
554 } else if (w < 0x110000) {
555 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
556 out = mb_convert_buf_add4(out, ((w >> 18) & 0x7) | 0xF0, ((w >> 12) & 0x3F) | 0x80, ((w >> 6) & 0x3F) | 0x80, (w & 0x3F) | 0x80);
557 } else {
558 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf8);
560 }
561 }
562
564}
565
566static zend_string* mb_cut_utf8(unsigned char *str, size_t from, size_t len, unsigned char *end)
567{
568 unsigned char *start = str + from;
569 /* Byte values less than -64 are UTF-8 continuation bytes, that is,
570 * the 2nd, 3rd, or 4th byte of a multi-byte character */
571 while (start > str && ((signed char)*start) < -64) {
572 start--;
573 }
574 unsigned char *_end = start + len;
575 if (_end >= end) {
576 return zend_string_init_fast((char*)start, end - start);
577 }
578 while (_end > start && ((signed char)*_end) < -64) {
579 _end--;
580 }
581 return zend_string_init_fast((char*)start, _end - start);
582}
583
584static int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter)
585{
586 int s, s1 = 0, c1 = 0, snd = 0;
587
588retry:
589 switch (filter->status & 0xff) {
590 case 0x00:
591 if (c < 0x80) {
592 CK((*filter->output_function)(c, filter->data));
593 } else if (c >= 0xc2 && c <= 0xdf) { /* 2byte code first char: 0xc2-0xdf */
594 filter->status = 0x10;
595 filter->cache = c & 0x1f;
596 } else if (c >= 0xe0 && c <= 0xef) { /* 3byte code first char: 0xe0-0xef */
597 filter->status = 0x20;
598 filter->cache = c & 0xf;
599 } else if (c >= 0xf0 && c <= 0xf4) { /* 3byte code first char: 0xf0-0xf4 */
600 filter->status = 0x30;
601 filter->cache = c & 0x7;
602 } else {
603 CK(mbfl_filt_put_invalid_char(filter));
604 }
605 break;
606
607 case 0x10: /* 2byte code 2nd char: 0x80-0xbf */
608 case 0x21: /* 3byte code 3rd char: 0x80-0xbf */
609 case 0x32: /* 4byte code 4th char: 0x80-0xbf */
610 filter->status = 0;
611 if (c >= 0x80 && c <= 0xbf) {
612 s = (filter->cache << 6) | (c & 0x3f);
613 filter->cache = 0;
614
615 if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_conv_r_map_tbl(s, &s1, 4, mbfl_docomo2uni_pua)) {
617 } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_conv_r_map_tbl(s, &s1, 7, mbfl_kddi2uni_pua)) {
619 } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_conv_r_map_tbl(s, &s1, 8, mbfl_kddi2uni_pua_b)) {
621 } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_conv_r_map_tbl(s, &s1, 6, mbfl_sb2uni_pua)) {
623 }
624
625 if (snd > 0) {
626 CK((*filter->output_function)(snd, filter->data));
627 }
628 CK((*filter->output_function)(s, filter->data));
629 } else {
630 CK(mbfl_filt_put_invalid_char(filter));
631 goto retry;
632 }
633 break;
634
635 case 0x20: /* 3byte code 2nd char: 0:0xa0-0xbf,D:0x80-9F,1-C,E-F:0x80-0x9f */
636 s = (filter->cache << 6) | (c & 0x3f);
637 c1 = filter->cache & 0xf;
638
639 if ((c >= 0x80 && c <= 0xbf) &&
640 ((c1 == 0x0 && c >= 0xa0) ||
641 (c1 == 0xd && c < 0xa0) ||
642 (c1 > 0x0 && c1 != 0xd))) {
643 filter->cache = s;
644 filter->status++;
645 } else {
646 CK(mbfl_filt_put_invalid_char(filter));
647 goto retry;
648 }
649 break;
650
651 case 0x30: /* 4byte code 2nd char: 0:0x90-0xbf,1-3:0x80-0xbf,4:0x80-0x8f */
652 s = (filter->cache << 6) | (c & 0x3f);
653 c1 = filter->cache & 0x7;
654
655 if ((c >= 0x80 && c <= 0xbf) &&
656 ((c1 == 0x0 && c >= 0x90) ||
657 (c1 == 0x4 && c < 0x90) ||
658 (c1 > 0x0 && c1 != 0x4))) {
659 filter->cache = s;
660 filter->status++;
661 } else {
662 CK(mbfl_filt_put_invalid_char(filter));
663 goto retry;
664 }
665 break;
666
667 case 0x31: /* 4byte code 3rd char: 0x80-0xbf */
668 if (c >= 0x80 && c <= 0xbf) {
669 filter->cache = (filter->cache << 6) | (c & 0x3f);
670 filter->status++;
671 } else {
672 CK(mbfl_filt_put_invalid_char(filter));
673 goto retry;
674 }
675 break;
676
678 }
679
680 return 0;
681}
682
683static int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter)
684{
685 if (c >= 0 && c < 0x110000) {
686 int s1, c1;
687
688 if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, 4, mbfl_docomo2uni_pua)) ||
689 (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, 7, mbfl_kddi2uni_pua)) ||
690 (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, 8, mbfl_kddi2uni_pua_b)) ||
691 (filter->to->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, 6, mbfl_sb2uni_pua))) {
692 c = c1;
693 }
694
695 if (filter->status) {
696 return 0;
697 }
698
699 if (c < 0x80) {
700 CK((*filter->output_function)(c, filter->data));
701 } else if (c < 0x800) {
702 CK((*filter->output_function)(((c >> 6) & 0x1f) | 0xc0, filter->data));
703 CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
704 } else if (c < 0x10000) {
705 CK((*filter->output_function)(((c >> 12) & 0x0f) | 0xe0, filter->data));
706 CK((*filter->output_function)(((c >> 6) & 0x3f) | 0x80, filter->data));
707 CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
708 } else {
709 CK((*filter->output_function)(((c >> 18) & 0x07) | 0xf0, filter->data));
710 CK((*filter->output_function)(((c >> 12) & 0x3f) | 0x80, filter->data));
711 CK((*filter->output_function)(((c >> 6) & 0x3f) | 0x80, filter->data));
712 CK((*filter->output_function)((c & 0x3f) | 0x80, filter->data));
713 }
714 } else {
716 }
717
718 return 0;
719}
720
721/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
722 * These correspond to the letters A-Z
723 * To display the flag emoji for a country, two unicode codepoints are combined,
724 * which correspond to the two-letter code for that country
725 * This macro converts uppercase ASCII values to Regional Indicator codepoints */
726#define NFLAGS(c) (0x1F1A5+(int)(c))
727
728static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"};
729static const int nflags_code_kddi[10] = { 0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7 };
730static const int nflags_code_sb[10] = { 0x2B0A, 0x2B05, 0x2B08, 0x2B04, 0x2B07, 0x2B06, 0x2B02, 0x2B0B, 0x2B09, 0x2B03 };
731
732static size_t mb_mobile_utf8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state, const unsigned short emoji_map[][3], int (*convert_emoji)(int s, int *snd), int n)
733{
734 unsigned char *p = *in, *e = p + *in_len;
735 uint32_t *out = buf, *limit = buf + bufsize - 1;
736
737 while (p < e && out < limit) {
738 unsigned char c = *p++;
739 unsigned int s = 0;
740
741 if (c <= 0x7F) {
742 *out++ = c;
743 continue;
744 } else if (c >= 0xC2 && c <= 0xDF && p < e) {
745 unsigned char c2 = *p++;
746
747 if ((c2 & 0xC0) == 0x80) {
748 s = ((c & 0x1F) << 6) | (c2 & 0x3F);
749 } else {
750 *out++ = MBFL_BAD_INPUT;
751 p--;
752 continue;
753 }
754 } else if (c >= 0xE0 && c <= 0xEF) {
755 if ((e - p) < 2) {
756 *out++ = MBFL_BAD_INPUT;
757 if (p < e && (c != 0xE0 || *p >= 0xA0) && (c != 0xED || *p < 0xA0) && (*p & 0xC0) == 0x80) {
758 p++;
759 if (p < e && (*p & 0xC0) == 0x80) {
760 p++;
761 }
762 }
763 continue;
764 }
765 unsigned char c2 = *p++;
766 unsigned char c3 = *p++;
767
768 if ((c2 & 0xC0) != 0x80 || (c == 0xE0 && c2 < 0xA0) || (c == 0xED && c2 >= 0xA0)) {
769 *out++ = MBFL_BAD_INPUT;
770 p -= 2;
771 continue;
772 } else if ((c3 & 0xC0) != 0x80) {
773 *out++ = MBFL_BAD_INPUT;
774 p--;
775 continue;
776 } else {
777 s = ((c & 0xF) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
778 }
779 } else if (c >= 0xF0 && c <= 0xF4) {
780 if ((e - p) < 3) {
781 *out++ = MBFL_BAD_INPUT;
782 if (p < e) {
783 unsigned char c2 = *p;
784 if ((c == 0xF0 && c2 >= 0x90) || (c == 0xF4 && c2 < 0x90) || (c >= 0xF1 && c <= 0xF3)) {
785 while (p < e && (*p & 0xC0) == 0x80) {
786 p++;
787 }
788 }
789 }
790 continue;
791 }
792 unsigned char c2 = *p++;
793 unsigned char c3 = *p++;
794 unsigned char c4 = *p++;
795
796 if ((c2 & 0xC0) != 0x80 || (c == 0xF0 && c2 < 0x90) || (c == 0xF4 && c2 >= 0x90)) {
797 *out++ = MBFL_BAD_INPUT;
798 p -= 3;
799 continue;
800 } else if ((c3 & 0xC0) != 0x80) {
801 *out++ = MBFL_BAD_INPUT;
802 p -= 2;
803 continue;
804 } else if ((c4 & 0xC0) != 0x80) {
805 *out++ = MBFL_BAD_INPUT;
806 p--;
807 continue;
808 } else {
809 s = ((c & 0x7) << 18) | ((c2 & 0x3F) << 12) | ((c3 & 0x3F) << 6) | (c4 & 0x3F);
810 }
811 } else {
812 *out++ = MBFL_BAD_INPUT;
813 continue;
814 }
815
816 int s1 = 0, snd = 0;
817 if (mbfilter_conv_r_map_tbl(s, &s1, n, emoji_map)) {
818 s = convert_emoji(s1, &snd);
819 if (snd) {
820 *out++ = snd;
821 }
822 }
823 *out++ = s;
824 }
825
826 *in_len = e - p;
827 *in = p;
828 return out - buf;
829}
830
831static size_t mb_utf8_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
832{
833 return mb_mobile_utf8_to_wchar(in, in_len, buf, bufsize, state, mbfl_docomo2uni_pua, mbfilter_sjis_emoji_docomo2unicode, 4);
834}
835
836static void mb_wchar_to_utf8_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
837{
838 unsigned char *out, *limit;
839 MB_CONVERT_BUF_LOAD(buf, out, limit);
841
842 while (len--) {
843 uint32_t w = *in++;
844 unsigned int s = 0;
845 int c1 = 0;
846
847 if (w < 0x110000) {
848 if ((w == '#' || (w >= '0' && w <= '9')) && len) {
849 uint32_t w2 = *in++; len--;
850
851 if (w2 == 0x20E3) {
852 if (w == '#') {
853 s = 0x2964;
854 } else if (w == '0') {
855 s = 0x296F;
856 } else {
857 s = 0x2966 + (w - '1');
858 }
859 } else {
860 in--; len++;
861 }
862 } else if (w == 0xA9) { /* Copyright sign */
863 s = 0x29B5;
864 } else if (w == 0xAE) { /* Registered sign */
865 s = 0x29BA;
866 } else if (w >= mb_tbl_uni_docomo2code2_min && w <= mb_tbl_uni_docomo2code2_max) {
867 int i = mbfl_bisec_srch2(w, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len);
868 if (i >= 0) {
869 s = mb_tbl_uni_docomo2code2_value[i];
870 }
871 } else if (w >= mb_tbl_uni_docomo2code3_min && w <= mb_tbl_uni_docomo2code3_max) {
872 int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len);
873 if (i >= 0) {
874 s = mb_tbl_uni_docomo2code3_value[i];
875 }
876 } else if (w >= mb_tbl_uni_docomo2code5_min && w <= mb_tbl_uni_docomo2code5_max) {
877 int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len);
878 if (i >= 0) {
879 s = mb_tbl_uni_docomo2code5_val[i];
880 }
881 }
882
883 if (s && mbfilter_conv_map_tbl(s, &c1, 4, mbfl_docomo2uni_pua)) {
884 w = c1;
885 }
886
887 if (w <= 0x7F) {
888 out = mb_convert_buf_add(out, w);
889 } else if (w <= 0x7FF) {
890 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
891 out = mb_convert_buf_add2(out, ((w >> 6) & 0x1F) | 0xC0, (w & 0x3F) | 0x80);
892 } else if (w <= 0xFFFF) {
893 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
894 out = mb_convert_buf_add3(out, ((w >> 12) & 0xF) | 0xE0, ((w >> 6) & 0x3F) | 0x80, (w & 0x3F) | 0x80);
895 } else {
896 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
897 out = mb_convert_buf_add4(out, ((w >> 18) & 0x7) | 0xF0, ((w >> 12) & 0x3F) | 0x80, ((w >> 6) & 0x3F) | 0x80, (w & 0x3F) | 0x80);
898 }
899 } else {
900 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf8_docomo);
902 }
903 }
904
906}
907
908static size_t mb_utf8_kddi_a_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
909{
910 return mb_mobile_utf8_to_wchar(in, in_len, buf, bufsize, state, mbfl_kddi2uni_pua, mbfilter_sjis_emoji_kddi2unicode, 7);
911}
912
913static void mb_wchar_to_utf8_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end, const unsigned short emoji_map[][3], int n, mb_from_wchar_fn error_handler)
914{
915 unsigned char *out, *limit;
916 MB_CONVERT_BUF_LOAD(buf, out, limit);
918
919 while (len--) {
920 uint32_t w = *in++;
921 unsigned int s = 0;
922 int c1 = 0;
923
924 if (w < 0x110000) {
925 if ((w == '#' || (w >= '0' && w <= '9')) && len) {
926 uint32_t w2 = *in++; len--;
927
928 if (w2 == 0x20E3) {
929 if (w == '#') {
930 s = 0x25BC;
931 } else if (w == '0') {
932 s = 0x2830;
933 } else {
934 s = 0x27A6 + (w - '1');
935 }
936 } else {
937 in--; len++;
938 }
939 } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */
940 if (len) {
941 uint32_t w2 = *in++; len--;
942
943 if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
944 for (int i = 0; i < 10; i++) {
945 if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) {
946 s = nflags_code_kddi[i];
947 goto process_kuten;
948 }
949 }
950 }
951
952 in--; len++;
953 }
954
955 MB_CONVERT_ERROR(buf, out, limit, w, error_handler);
957 continue;
958 } else if (w == 0xA9) { /* Copyright sign */
959 s = 0x27DC;
960 } else if (w == 0xAE) { /* Registered sign */
961 s = 0x27DD;
962 } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) {
963 int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
964 if (i >= 0) {
965 s = mb_tbl_uni_kddi2code2_value[i];
966 }
967 } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) {
968 int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
969 if (i >= 0) {
970 s = mb_tbl_uni_kddi2code3_value[i];
971 }
972 } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) {
973 int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
974 if (i >= 0) {
975 s = mb_tbl_uni_kddi2code5_val[i];
976 }
977 }
978
979process_kuten:
980 if (s && mbfilter_conv_map_tbl(s, &c1, n, emoji_map)) {
981 w = c1;
982 }
983
984 if (w <= 0x7F) {
985 out = mb_convert_buf_add(out, w);
986 } else if (w <= 0x7FF) {
987 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
988 out = mb_convert_buf_add2(out, ((w >> 6) & 0x1F) | 0xC0, (w & 0x3F) | 0x80);
989 } else if (w <= 0xFFFF) {
990 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
991 out = mb_convert_buf_add3(out, ((w >> 12) & 0xF) | 0xE0, ((w >> 6) & 0x3F) | 0x80, (w & 0x3F) | 0x80);
992 } else {
993 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
994 out = mb_convert_buf_add4(out, ((w >> 18) & 0x7) | 0xF0, ((w >> 12) & 0x3F) | 0x80, ((w >> 6) & 0x3F) | 0x80, (w & 0x3F) | 0x80);
995 }
996 } else {
997 MB_CONVERT_ERROR(buf, out, limit, w, error_handler);
999 }
1000 }
1001
1002 MB_CONVERT_BUF_STORE(buf, out, limit);
1003}
1004
1005static void mb_wchar_to_utf8_kddi_a(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
1006{
1007 mb_wchar_to_utf8_kddi(in, len, buf, end, mbfl_kddi2uni_pua, 7, mb_wchar_to_utf8_kddi_a);
1008}
1009
1010static size_t mb_utf8_kddi_b_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
1011{
1012 return mb_mobile_utf8_to_wchar(in, in_len, buf, bufsize, state, mbfl_kddi2uni_pua_b, mbfilter_sjis_emoji_kddi2unicode, 8);
1013}
1014
1015static void mb_wchar_to_utf8_kddi_b(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
1016{
1017 mb_wchar_to_utf8_kddi(in, len, buf, end, mbfl_kddi2uni_pua_b, 8, mb_wchar_to_utf8_kddi_b);
1018}
1019
1020static size_t mb_utf8_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
1021{
1022 return mb_mobile_utf8_to_wchar(in, in_len, buf, bufsize, state, mbfl_sb2uni_pua, mbfilter_sjis_emoji_sb2unicode, 6);
1023}
1024
1025static void mb_wchar_to_utf8_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
1026{
1027 unsigned char *out, *limit;
1028 MB_CONVERT_BUF_LOAD(buf, out, limit);
1029 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1030
1031 while (len--) {
1032 uint32_t w = *in++;
1033 unsigned int s = 0;
1034 int c1 = 0;
1035
1036 if (w < 0x110000) {
1037 if ((w == '#' || (w >= '0' && w <= '9')) && len) {
1038 uint32_t w2 = *in++; len--;
1039
1040 if (w2 == 0x20E3) {
1041 if (w == '#') {
1042 s = 0x2817;
1043 } else if (w == '0') {
1044 s = 0x282C;
1045 } else {
1046 s = 0x2823 + (w - '1');
1047 }
1048 } else {
1049 in--; len++;
1050 }
1051 } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */
1052 if (len) {
1053 uint32_t w2 = *in++; len--;
1054
1055 if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */
1056 for (int i = 0; i < 10; i++) {
1057 if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) {
1058 s = nflags_code_sb[i];
1059 goto process_kuten;
1060 }
1061 }
1062 }
1063
1064 in--; len++;
1065 }
1066
1067 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf8_sb);
1068 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1069 continue;
1070 } else if (w == 0xA9) { /* Copyright sign */
1071 s = 0x2855;
1072 } else if (w == 0xAE) { /* Registered sign */
1073 s = 0x2856;
1074 } else if (w >= mb_tbl_uni_sb2code2_min && w <= mb_tbl_uni_sb2code2_max) {
1075 int i = mbfl_bisec_srch2(w, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len);
1076 if (i >= 0) {
1077 s = mb_tbl_uni_sb2code2_value[i];
1078 }
1079 } else if (w >= mb_tbl_uni_sb2code3_min && w <= mb_tbl_uni_sb2code3_max) {
1080 int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len);
1081 if (i >= 0) {
1082 s = mb_tbl_uni_sb2code3_value[i];
1083 }
1084 } else if (w >= mb_tbl_uni_sb2code5_min && w <= mb_tbl_uni_sb2code5_max) {
1085 int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len);
1086 if (i >= 0) {
1087 s = mb_tbl_uni_sb2code5_val[i];
1088 }
1089 }
1090
1091process_kuten:
1092 if (s && mbfilter_conv_map_tbl(s, &c1, 6, mbfl_sb2uni_pua)) {
1093 w = c1;
1094 }
1095
1096 if (w <= 0x7F) {
1097 out = mb_convert_buf_add(out, w);
1098 } else if (w <= 0x7FF) {
1099 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2);
1100 out = mb_convert_buf_add2(out, ((w >> 6) & 0x1F) | 0xC0, (w & 0x3F) | 0x80);
1101 } else if (w <= 0xFFFF) {
1102 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3);
1103 out = mb_convert_buf_add3(out, ((w >> 12) & 0xF) | 0xE0, ((w >> 6) & 0x3F) | 0x80, (w & 0x3F) | 0x80);
1104 } else {
1105 MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4);
1106 out = mb_convert_buf_add4(out, ((w >> 18) & 0x7) | 0xF0, ((w >> 12) & 0x3F) | 0x80, ((w >> 6) & 0x3F) | 0x80, (w & 0x3F) | 0x80);
1107 }
1108 } else {
1109 MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf8_sb);
1110 MB_CONVERT_BUF_ENSURE(buf, out, limit, len);
1111 }
1112 }
1113
1114 MB_CONVERT_BUF_STORE(buf, out, limit);
1115}
size_t len
Definition apprentice.c:174
char s[4]
Definition cdf.c:77
zend_long n
Definition ffi.c:4979
buf start
Definition ffi.c:4687
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685
#define NULL
Definition gdcache.h:45
int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd)
int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
int mbfilter_unicode2sjis_emoji_kddi_sjis(int c, int *s1, mbfl_convert_filter *filter)
int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter)
int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter)
int mbfilter_sjis_emoji_sb2unicode(int s, int *snd)
int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
const struct mbfl_convert_vtbl vtbl_utf8_docomo_wchar
#define NFLAGS(c)
const struct mbfl_convert_vtbl vtbl_utf8_sb_wchar
#define CK(statement)
const mbfl_encoding mbfl_encoding_utf8_kddi_b
const struct mbfl_convert_vtbl vtbl_utf8_kddi_a_wchar
const mbfl_encoding mbfl_encoding_utf8_sb
const struct mbfl_convert_vtbl vtbl_utf8_kddi_b_wchar
const struct mbfl_convert_vtbl vtbl_wchar_utf8
const mbfl_encoding mbfl_encoding_utf8_kddi_a
const struct mbfl_convert_vtbl vtbl_wchar_utf8_kddi_b
int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n)
const unsigned char mblen_table_utf8[]
const struct mbfl_convert_vtbl vtbl_wchar_utf8_docomo
const struct mbfl_convert_vtbl vtbl_wchar_utf8_sb
const struct mbfl_convert_vtbl vtbl_utf8_wchar
int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter)
const struct mbfl_convert_vtbl vtbl_wchar_utf8_kddi_a
const mbfl_encoding mbfl_encoding_utf8
const mbfl_encoding mbfl_encoding_utf8_docomo
#define MBFL_BAD_INPUT
Definition mbfl_consts.h:45
int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
struct _mbfl_convert_filter mbfl_convert_filter
@ mbfl_no_encoding_utf8
@ mbfl_no_encoding_utf8_kddi_a
@ mbfl_no_encoding_utf8_docomo
@ mbfl_no_encoding_utf8_kddi_b
@ mbfl_no_encoding_wchar
@ mbfl_no_encoding_utf8_sb
#define MB_CONVERT_BUF_STORE(buf, _out, _limit)
#define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed)
#define MB_CONVERT_ERROR(buf, out, limit, bad_cp, conv_fn)
void(* mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end)
#define MB_CONVERT_BUF_LOAD(buf, _out, _limit)
unsigned const char * end
Definition php_ffi.h:51
p
Definition session.c:1105
const mbfl_encoding * from
output_function_t output_function
const mbfl_encoding * to
flush_function_t flush_function
enum mbfl_no_encoding from
enum mbfl_no_encoding no_encoding
struct _zend_string zend_string
#define ZEND_ASSERT(c)
#define EMPTY_SWITCH_DEFAULT_CASE()
out($f, $s)