35static size_t mb_utf7_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state);
37static bool mb_check_utf7(
unsigned char *in,
size_t in_len);
39static const unsigned char mbfl_base64_table[] = {
41 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,
43 0x4e,0x4f,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,
45 0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,
47 0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,
49 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x2b,0x2f,0x00
52static const char *mbfl_encoding_utf7_aliases[] = {
"utf7",
NULL};
58 mbfl_encoding_utf7_aliases,
75 mbfl_filt_conv_utf7_wchar_flush,
90#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
92static unsigned int decode_base64_char(
unsigned char c)
94 if (c >=
'A' && c <=
'Z') {
96 }
else if (c >=
'a' && c <=
'z') {
98 }
else if (c >=
'0' && c <=
'9') {
100 }
else if (c ==
'+') {
102 }
else if (c ==
'/') {
113 n = decode_base64_char(c);
121 if (filter->
status == 1) {
124 }
else if (c >= 0 && c < 0x80) {
139 }
else if (c >= 0 && c < 0x80) {
157 s = ((
n >> 2) & 0xf) | (filter->
cache & 0xffff);
160 if (
s >= 0xd800 &&
s < 0xdc00) {
162 if (filter->
cache & 0xfff0000) {
166 s = (((
s & 0x3ff) << 16) + 0x400000) |
n;
168 }
else if (
s >= 0xdc00 &&
s < 0xe000) {
170 if (filter->
cache & 0xfff0000) {
172 s |= (filter->
cache & 0xfff0000) >> 6;
180 if (filter->
cache & 0xfff0000) {
198 s = ((
n >> 4) & 0x3) | (filter->
cache & 0xffff);
201 if (
s >= 0xd800 &&
s < 0xdc00) {
202 if (filter->
cache & 0xfff0000) {
206 s = (((
s & 0x3ff) << 16) + 0x400000) |
n;
208 }
else if (
s >= 0xdc00 &&
s < 0xe000) {
210 if (filter->
cache & 0xfff0000) {
212 s |= (filter->
cache & 0xfff0000) >> 6;
220 if (filter->
cache & 0xfff0000) {
234 s =
n | (filter->
cache & 0xffff);
236 if (
s >= 0xd800 &&
s < 0xdc00) {
237 if (filter->
cache & 0xfff0000) {
241 s = (((
s & 0x3ff) << 16) + 0x400000);
243 }
else if (
s >= 0xdc00 &&
s < 0xe000) {
244 if (filter->
cache & 0xfff0000) {
246 s |= (filter->
cache & 0xfff0000) >> 6;
254 if (filter->
cache & 0xfff0000) {
290 if (c >= 0 && c < 0x80) {
291 if ((c >=
'A' && c <=
'Z') || (c >=
'a' && c <=
'z') || (c >=
'0' && c <=
'9') || c ==
'\0' || c ==
'/' || c ==
'-') {
293 }
else if (c ==
' ' || c ==
'\t' || c ==
'\r' || c ==
'\n' || c ==
'\'' || c ==
'(' || c ==
')' || c ==
',' || c ==
'.' || c ==
':' || c ==
'?') {
332 filter->
cache = ((
s & 0xf) << 16) | c;
350 filter->
cache = ((
s & 0x3) << 16) | c;
380 int cache = filter->
cache;
415static inline bool is_base64_end(
unsigned char c)
420static bool is_optional_direct(
unsigned char c)
423 return c ==
'!' || c ==
'"' || c ==
'#' || c ==
'$' || c ==
'%' || c ==
'&' || c ==
'*' || c ==
';' || c ==
'<' ||
424 c ==
'=' || c ==
'>' || c ==
'@' || c ==
'[' || c ==
']' || c ==
'^' || c ==
'_' || c ==
'`' || c ==
'{' ||
425 c ==
'|' || c ==
'}';
428static bool can_end_base64(uint32_t c)
430 return c ==
' ' || c ==
'\t' || c ==
'\r' || c ==
'\n' || c ==
'\'' || c ==
'(' || c ==
')' || c ==
',' || c ==
'.' || c ==
':' || c ==
'?';
433static unsigned char decode_base64(
unsigned char c)
435 if (c >=
'A' && c <=
'Z') {
437 }
else if (c >=
'a' && c <=
'z') {
439 }
else if (c >=
'0' && c <=
'9') {
441 }
else if (c ==
'+') {
443 }
else if (c ==
'/') {
445 }
else if (c ==
'-') {
447 }
else if (can_end_base64(c) || is_optional_direct(c) || c ==
'\0') {
449 }
else if (c <= 0x7F) {
455static uint32_t* handle_utf16_cp(uint16_t
cp, uint32_t *
out, uint16_t *surrogate1)
459 if (
cp >= 0xDC00 &&
cp <= 0xDFFF) {
460 *
out++ = ((*surrogate1 & 0x3FF) << 10) + (
cp & 0x3FF) + 0x10000;
467 }
else if (
cp >= 0xD800 &&
cp <= 0xDBFF) {
469 }
else if (
cp >= 0xDC00 &&
cp <= 0xDFFF) {
478static uint32_t* handle_base64_end(
unsigned char n,
unsigned char **
p, uint32_t *
out,
bool *base64,
bool abrupt, uint16_t *surrogate1)
480 if (abrupt || *surrogate1) {
495static size_t mb_utf7_to_wchar(
unsigned char **in,
size_t *in_len, uint32_t *
buf,
size_t bufsize,
unsigned int *
state)
506 unsigned char *
p = *in, *e =
p + *in_len;
507 uint32_t *
out =
buf, *limit =
buf + bufsize;
509 bool base64 = *
state & 1;
510 uint16_t surrogate1 = (*
state >> 1);
512 while (
p < e &&
out < limit) {
515 if ((limit -
out) < 5) {
519 unsigned char n1 = decode_base64(*
p++);
520 if (is_base64_end(n1)) {
521 out = handle_base64_end(n1, &
p,
out, &base64,
false, &surrogate1);
524 out = handle_base64_end(n1, &
p,
out, &base64,
true, &surrogate1);
527 unsigned char n2 = decode_base64(*
p++);
528 if (is_base64_end(n2) ||
p == e) {
529 out = handle_base64_end(n2, &
p,
out, &base64,
true, &surrogate1);
532 unsigned char n3 = decode_base64(*
p++);
533 if (is_base64_end(n3)) {
534 out = handle_base64_end(n3, &
p,
out, &base64,
true, &surrogate1);
537 out = handle_utf16_cp((n1 << 10) | (n2 << 4) | ((n3 & 0x3C) >> 2),
out, &surrogate1);
541 if ((n3 & 0x3) || surrogate1) {
548 unsigned char n4 = decode_base64(*
p++);
549 if (is_base64_end(n4)) {
550 out = handle_base64_end(n4, &
p,
out, &base64, n3 & 0x3, &surrogate1);
553 out = handle_base64_end(n4, &
p,
out, &base64,
true, &surrogate1);
556 unsigned char n5 = decode_base64(*
p++);
557 if (is_base64_end(n5) ||
p == e) {
558 out = handle_base64_end(n5, &
p,
out, &base64,
true, &surrogate1);
561 unsigned char n6 = decode_base64(*
p++);
562 if (is_base64_end(n6)) {
563 out = handle_base64_end(n6, &
p,
out, &base64,
true, &surrogate1);
566 out = handle_utf16_cp((n3 << 14) | (n4 << 8) | (n5 << 2) | ((n6 & 0x30) >> 4),
out, &surrogate1);
568 if ((n6 & 0xF) || surrogate1) {
575 unsigned char n7 = decode_base64(*
p++);
576 if (is_base64_end(n7)) {
577 out = handle_base64_end(n7, &
p,
out, &base64, n6 & 0xF, &surrogate1);
580 out = handle_base64_end(n7, &
p,
out, &base64,
true, &surrogate1);
583 unsigned char n8 = decode_base64(*
p++);
584 if (is_base64_end(n8)) {
585 out = handle_base64_end(n8, &
p,
out, &base64,
true, &surrogate1);
588 out = handle_utf16_cp((n6 << 12) | (n7 << 6) | n8,
out, &surrogate1);
591 unsigned char c = *
p++;
603 }
else if (c <= 0x7F) {
611 if (
p == e && surrogate1) {
616 *
state = (surrogate1 << 1) | base64;
622static bool should_direct_encode(uint32_t c)
624 return (c >=
'A' && c <=
'Z') || (c >=
'a' && c <=
'z') || (c >=
'0' && c <=
'9') || c ==
'\0' || c ==
'/' || c ==
'-' || can_end_base64(c);
627#define SAVE_CONVERSION_STATE() buf->state = (cache << 4) | (nbits << 1) | base64
628#define RESTORE_CONVERSION_STATE() base64 = (buf->state & 1); nbits = (buf->state >> 1) & 0x7; cache = (buf->state >> 4)
632 unsigned char *
out, *limit;
642 unsigned char nbits, cache;
648 if (should_direct_encode(w)) {
654 out = mb_convert_buf_add(
out, mbfl_base64_table[(cache << (6 - nbits)) & 0x3F]);
657 if (!can_end_base64(w)) {
658 out = mb_convert_buf_add(
out,
'-');
676 bits = ((uint64_t)cache << 32) | 0xD800DC00L | ((w & 0xFFC00) << 6) | (w & 0x3FF);
680 bits = (cache << 16) | w;
685 out = mb_convert_buf_add(
out, mbfl_base64_table[(bits >> (nbits - 6)) & 0x3F]);
692 if (should_direct_encode(w)) {
693 out = mb_convert_buf_add(
out, w);
700 out = mb_convert_buf_add(
out,
'+');
709 out = mb_convert_buf_add(
out, mbfl_base64_table[(cache << (6 - nbits)) & 0x3F]);
713 out = mb_convert_buf_add(
out,
'-');
722static bool is_utf16_cp_valid(uint16_t
cp,
bool is_surrogate)
725 return cp >= 0xDC00 &&
cp <= 0xDFFF;
728 return !(
cp >= 0xDC00 &&
cp <= 0xDFFF);
732static bool can_encode_directly(
unsigned char c)
734 return should_direct_encode(c) || is_optional_direct(c) || c ==
'\0';
737static bool mb_check_utf7(
unsigned char *in,
size_t in_len)
739 unsigned char *
p = in, *e =
p + in_len;
741 bool is_surrogate =
false;
745 unsigned char n1 = decode_base64(*
p++);
746 if (is_base64_end(n1)) {
747 if (!is_base64_end_valid(n1,
false, is_surrogate)) {
755 unsigned char n2 = decode_base64(*
p++);
756 if (is_base64_end(n2) ||
p == e) {
759 unsigned char n3 = decode_base64(*
p++);
760 if (is_base64_end(n3)) {
763 uint16_t cp1 = (n1 << 10) | (n2 << 4) | ((n3 & 0x3C) >> 2);
764 if (!is_utf16_cp_valid(cp1, is_surrogate)) {
767 is_surrogate = has_surrogate(cp1, is_surrogate);
771 return !((n3 & 0x3) || is_surrogate);
774 unsigned char n4 = decode_base64(*
p++);
775 if (is_base64_end(n4)) {
776 if (!is_base64_end_valid(n4, n3 & 0x3, is_surrogate)) {
784 unsigned char n5 = decode_base64(*
p++);
785 if (is_base64_end(n5) ||
p == e) {
788 unsigned char n6 = decode_base64(*
p++);
789 if (is_base64_end(n6)) {
792 uint16_t cp2 = (n3 << 14) | (n4 << 8) | (n5 << 2) | ((n6 & 0x30) >> 4);
793 if (!is_utf16_cp_valid(cp2, is_surrogate)) {
796 is_surrogate = has_surrogate(cp2, is_surrogate);
798 return !((n6 & 0xF) || is_surrogate);
801 unsigned char n7 = decode_base64(*
p++);
802 if (is_base64_end(n7)) {
803 if (!is_base64_end_valid(n7, n6 & 0xF, is_surrogate)) {
811 unsigned char n8 = decode_base64(*
p++);
812 if (is_base64_end(n8)) {
815 uint16_t cp3 = (n6 << 12) | (n7 << 6) | n8;
816 if (!is_utf16_cp_valid(cp3, is_surrogate)) {
819 is_surrogate = has_surrogate(cp3, is_surrogate);
822 unsigned char c = *
p++;
827 return !is_surrogate;
829 unsigned char n = decode_base64(*
p);
832 }
else if (
n >
DASH) {
838 }
else if (can_encode_directly(c)) {
845 return !is_surrogate;
zend_ffi_ctype_name_buf buf
const struct mbfl_convert_vtbl vtbl_wchar_utf7
const struct mbfl_convert_vtbl vtbl_utf7_wchar
int mbfl_filt_conv_wchar_utf7_flush(mbfl_convert_filter *filter)
int mbfl_filt_conv_utf7_wchar(int c, mbfl_convert_filter *filter)
#define SAVE_CONVERSION_STATE()
int mbfl_filt_conv_wchar_utf7(int c, mbfl_convert_filter *filter)
const mbfl_encoding mbfl_encoding_utf7
#define RESTORE_CONVERSION_STATE()
#define MBFL_ENCTYPE_GL_UNSAFE
#define MBFL_WCSPLANE_UCS2MAX
#define MBFL_WCSPLANE_UTF32MAX
#define MBFL_WCSPLANE_SUPMIN
int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
struct _mbfl_convert_filter mbfl_convert_filter
#define MB_CONVERT_BUF_STORE(buf, _out, _limit)
#define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed)
#define MB_CONVERT_ERROR(buf, out, limit, bad_cp, conv_fn)
#define MB_CONVERT_BUF_LOAD(buf, _out, _limit)
unsigned const char * end
output_function_t output_function
int(* filter_function)(int c, mbfl_convert_filter *filter)
flush_function_t flush_function
#define EMPTY_SWITCH_DEFAULT_CASE()