php-doxygen/mbfilter__utf16_8c_source.html

/*

 * "streamable kanji code filter and converter"

 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.

 *

 * LICENSE NOTICES

 *

 * This file is part of "streamable kanji code filter and converter",

 * which is distributed under the terms of GNU Lesser General Public

 * License (version 2) as published by the Free Software Foundation.

 *

 * This software is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU Lesser General Public License for more details.

 *

 * You should have received a copy of the GNU Lesser General Public

 * License along with "streamable kanji code filter and converter";

 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,

 * Suite 330, Boston, MA  02111-1307  USA

 *

 * The author of this file:

 *

 */

/*

 * The source code included in this file was separated from mbfilter.c

 * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.

 *

 */


#include "zend_bitset.h"

#include "mbfilter.h"

#include "mbfilter_utf16.h"


#ifdef ZEND_INTRIN_AVX2_NATIVE


/* We are building AVX2-only binary */

# include <immintrin.h>

# define mb_utf16be_to_wchar mb_utf16be_to_wchar_avx2

# define mb_utf16le_to_wchar mb_utf16le_to_wchar_avx2

# define mb_wchar_to_utf16be mb_wchar_to_utf16be_avx2

# define mb_wchar_to_utf16le mb_wchar_to_utf16le_avx2


static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);


#elif defined(ZEND_INTRIN_AVX2_RESOLVER)


/* We are building binary which works with or without AVX2; whether or not to use

 * AVX2-accelerated functions will be determined at runtime */

# include <immintrin.h>

# include "Zend/zend_cpuinfo.h"


static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);


# ifdef ZEND_INTRIN_AVX2_FUNC_PROTO

/* Dynamic linker will decide whether or not to use AVX2-based functions and

 * resolve symbols accordingly */


ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));

ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));

ZEND_INTRIN_AVX2_FUNC_DECL(size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state));

ZEND_INTRIN_AVX2_FUNC_DECL(void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end));


size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16be_wchar")));

void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16be")));

size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((ifunc("resolve_utf16le_wchar")));

void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((ifunc("resolve_wchar_utf16le")));


ZEND_NO_SANITIZE_ADDRESS

ZEND_ATTRIBUTE_UNUSED

static mb_to_wchar_fn resolve_utf16be_wchar(void)

{

    return zend_cpu_supports_avx2() ? mb_utf16be_to_wchar_avx2 : mb_utf16be_to_wchar_default;

}


ZEND_NO_SANITIZE_ADDRESS

ZEND_ATTRIBUTE_UNUSED

static mb_from_wchar_fn resolve_wchar_utf16be(void)

{

    return zend_cpu_supports_avx2() ? mb_wchar_to_utf16be_avx2 : mb_wchar_to_utf16be_default;

}


ZEND_NO_SANITIZE_ADDRESS

ZEND_ATTRIBUTE_UNUSED

static mb_to_wchar_fn resolve_utf16le_wchar(void)

{

    return zend_cpu_supports_avx2() ? mb_utf16le_to_wchar_avx2 : mb_utf16le_to_wchar_default;

}


ZEND_NO_SANITIZE_ADDRESS

ZEND_ATTRIBUTE_UNUSED

static mb_from_wchar_fn resolve_wchar_utf16le(void)

{

    return zend_cpu_supports_avx2() ? mb_wchar_to_utf16le_avx2 : mb_wchar_to_utf16le_default;

}


# else /* ZEND_INTRIN_AVX2_FUNC_PTR */

/* We are compiling for a target where the dynamic linker will not be able to

 * resolve symbols according to whether the host supports AVX2 or not; so instead,

 * we can make calls go through a function pointer and set the function pointer

 * on module load */


#ifdef HAVE_FUNC_ATTRIBUTE_TARGET

static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));

static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));

static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) __attribute__((target("avx2")));

static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) __attribute__((target("avx2")));

#else

static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

#endif


static mb_to_wchar_fn utf16be_to_wchar_ptr = NULL;

static mb_from_wchar_fn wchar_to_utf16be_ptr = NULL;

static mb_to_wchar_fn utf16le_to_wchar_ptr = NULL;

static mb_from_wchar_fn wchar_to_utf16le_ptr = NULL;


static size_t mb_utf16be_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

{

    return utf16be_to_wchar_ptr(in, in_len, buf, bufsize, NULL);

}


static void mb_wchar_to_utf16be(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)

{

    wchar_to_utf16be_ptr(in, len, buf, end);

}


static size_t mb_utf16le_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

{

    return utf16le_to_wchar_ptr(in, in_len, buf, bufsize, NULL);

}


static void mb_wchar_to_utf16le(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)

{

    wchar_to_utf16le_ptr(in, len, buf, end);

}


void init_convert_utf16(void)

{

    if (zend_cpu_supports_avx2()) {

        utf16be_to_wchar_ptr = mb_utf16be_to_wchar_avx2;

        wchar_to_utf16be_ptr = mb_wchar_to_utf16be_avx2;

        utf16le_to_wchar_ptr = mb_utf16le_to_wchar_avx2;

        wchar_to_utf16le_ptr = mb_wchar_to_utf16le_avx2;

    } else {

        utf16be_to_wchar_ptr = mb_utf16be_to_wchar_default;

        wchar_to_utf16be_ptr = mb_wchar_to_utf16be_default;

        utf16le_to_wchar_ptr = mb_utf16le_to_wchar_default;

        wchar_to_utf16le_ptr = mb_wchar_to_utf16le_default;

    }

}

# endif


#else


/* No AVX2 support */

# define mb_utf16be_to_wchar mb_utf16be_to_wchar_default

# define mb_utf16le_to_wchar mb_utf16le_to_wchar_default

# define mb_wchar_to_utf16be mb_wchar_to_utf16be_default

# define mb_wchar_to_utf16le mb_wchar_to_utf16le_default


static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);


#endif


static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter);

static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);

static zend_string* mb_cut_utf16(unsigned char *str, size_t from, size_t len, unsigned char *end);

static zend_string* mb_cut_utf16be(unsigned char *str, size_t from, size_t len, unsigned char *end);

static zend_string* mb_cut_utf16le(unsigned char *str, size_t from, size_t len, unsigned char *end);


static const char *mbfl_encoding_utf16_aliases[] = {"utf16", NULL};


const mbfl_encoding mbfl_encoding_utf16 = {

    mbfl_no_encoding_utf16,

    "UTF-16",

    "UTF-16",

    mbfl_encoding_utf16_aliases,

    NULL,

    0,

    &vtbl_utf16_wchar,

    &vtbl_wchar_utf16,

    mb_utf16_to_wchar,

    mb_wchar_to_utf16be,

    NULL,

    mb_cut_utf16

};


const mbfl_encoding mbfl_encoding_utf16be = {

    mbfl_no_encoding_utf16be,

    "UTF-16BE",

    "UTF-16BE",

    NULL,

    NULL,

    0,

    &vtbl_utf16be_wchar,

    &vtbl_wchar_utf16be,

    mb_utf16be_to_wchar,

    mb_wchar_to_utf16be,

    NULL,

    mb_cut_utf16be

};


const mbfl_encoding mbfl_encoding_utf16le = {

    mbfl_no_encoding_utf16le,

    "UTF-16LE",

    "UTF-16LE",

    NULL,

    NULL,

    0,

    &vtbl_utf16le_wchar,

    &vtbl_wchar_utf16le,

    mb_utf16le_to_wchar,

    mb_wchar_to_utf16le,

    NULL,

    mb_cut_utf16le

};


const struct mbfl_convert_vtbl vtbl_utf16_wchar = {

    mbfl_no_encoding_utf16,

    mbfl_no_encoding_wchar,

    mbfl_filt_conv_common_ctor,

    NULL,

    mbfl_filt_conv_utf16_wchar,

    mbfl_filt_conv_utf16_wchar_flush,

    NULL,

};


const struct mbfl_convert_vtbl vtbl_wchar_utf16 = {

    mbfl_no_encoding_wchar,

    mbfl_no_encoding_utf16,

    mbfl_filt_conv_common_ctor,

    NULL,

    mbfl_filt_conv_wchar_utf16be,

    mbfl_filt_conv_common_flush,

    NULL,

};


const struct mbfl_convert_vtbl vtbl_utf16be_wchar = {

    mbfl_no_encoding_utf16be,

    mbfl_no_encoding_wchar,

    mbfl_filt_conv_common_ctor,

    NULL,

    mbfl_filt_conv_utf16be_wchar,

    mbfl_filt_conv_utf16_wchar_flush,

    NULL,

};


const struct mbfl_convert_vtbl vtbl_wchar_utf16be = {

    mbfl_no_encoding_wchar,

    mbfl_no_encoding_utf16be,

    mbfl_filt_conv_common_ctor,

    NULL,

    mbfl_filt_conv_wchar_utf16be,

    mbfl_filt_conv_common_flush,

    NULL,

};


const struct mbfl_convert_vtbl vtbl_utf16le_wchar = {

    mbfl_no_encoding_utf16le,

    mbfl_no_encoding_wchar,

    mbfl_filt_conv_common_ctor,

    NULL,

    mbfl_filt_conv_utf16le_wchar,

    mbfl_filt_conv_utf16_wchar_flush,

    NULL,

};


const struct mbfl_convert_vtbl vtbl_wchar_utf16le = {

    mbfl_no_encoding_wchar,

    mbfl_no_encoding_utf16le,

    mbfl_filt_conv_common_ctor,

    NULL,

    mbfl_filt_conv_wchar_utf16le,

    mbfl_filt_conv_common_flush,

    NULL,

};


#define CK(statement)   do { if ((statement) < 0) return (-1); } while (0)


int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)

{

    /* Start with the assumption that the string is big-endian;

     * If we find a little-endian BOM, then we will change that assumption */

    if (filter->status == 0) {

        filter->cache = c & 0xFF;

        filter->status = 1;

    } else {

        int n = (filter->cache << 8) | (c & 0xFF);

        filter->cache = filter->status = 0;

        if (n == 0xFFFE) {

            /* Switch to little-endian mode */

            filter->filter_function = mbfl_filt_conv_utf16le_wchar;

        } else {

            filter->filter_function = mbfl_filt_conv_utf16be_wchar;

            if (n >= 0xD800 && n <= 0xDBFF) {

                filter->cache = n & 0x3FF; /* Pick out 10 data bits */

                filter->status = 2;

                return 0;

            } else if (n >= 0xDC00 && n <= 0xDFFF) {

                /* This is wrong; second part of surrogate pair has come first */

                CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));

            } else if (n != 0xFEFF) {

                CK((*filter->output_function)(n, filter->data));

            }

        }

    }


    return 0;

}


int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)

{

    int n;


    switch (filter->status) {

    case 0: /* First byte */

        filter->cache = c & 0xFF;

        filter->status = 1;

        break;


    case 1: /* Second byte */

        n = (filter->cache << 8) | (c & 0xFF);

        if (n >= 0xD800 && n <= 0xDBFF) {

            filter->cache = n & 0x3FF; /* Pick out 10 data bits */

            filter->status = 2;

        } else if (n >= 0xDC00 && n <= 0xDFFF) {

            /* This is wrong; second part of surrogate pair has come first */

            filter->status = 0;

            CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));

        } else {

            filter->status = 0;

            CK((*filter->output_function)(n, filter->data));

        }

        break;


    case 2: /* Second part of surrogate, first byte */

        filter->cache = (filter->cache << 8) | (c & 0xFF);

        filter->status = 3;

        break;


    case 3: /* Second part of surrogate, second byte */

        n = ((filter->cache & 0xFF) << 8) | (c & 0xFF);

        if (n >= 0xD800 && n <= 0xDBFF) {

            /* Wrong; that's the first half of a surrogate pair, not the second */

            filter->cache = n & 0x3FF;

            filter->status = 2;

            CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));

        } else if (n >= 0xDC00 && n <= 0xDFFF) {

            filter->status = 0;

            n = ((filter->cache & 0x3FF00) << 2) + (n & 0x3FF) + 0x10000;

            CK((*filter->output_function)(n, filter->data));

        } else {

            filter->status = 0;

            CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));

            CK((*filter->output_function)(n, filter->data));

        }

    }


    return 0;

}


int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)

{

    int n;


    if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {

        CK((*filter->output_function)((c >> 8) & 0xff, filter->data));

        CK((*filter->output_function)(c & 0xff, filter->data));

    } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {

        n = ((c >> 10) - 0x40) | 0xd800;

        CK((*filter->output_function)((n >> 8) & 0xff, filter->data));

        CK((*filter->output_function)(n & 0xff, filter->data));

        n = (c & 0x3ff) | 0xdc00;

        CK((*filter->output_function)((n >> 8) & 0xff, filter->data));

        CK((*filter->output_function)(n & 0xff, filter->data));

    } else {

        CK(mbfl_filt_conv_illegal_output(c, filter));

    }


    return 0;

}


int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)

{

    int n;


    switch (filter->status) {

    case 0:

        filter->cache = c & 0xff;

        filter->status = 1;

        break;


    case 1:

        if ((c & 0xfc) == 0xd8) {

            /* Looks like we have a surrogate pair here */

            filter->cache += ((c & 0x3) << 8);

            filter->status = 2;

        } else if ((c & 0xfc) == 0xdc) {

            /* This is wrong; the second part of the surrogate pair has come first */

            filter->status = 0;

            CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));

        } else {

            filter->status = 0;

            CK((*filter->output_function)(filter->cache + ((c & 0xff) << 8), filter->data));

        }

        break;


    case 2:

        filter->cache = (filter->cache << 10) + (c & 0xff);

        filter->status = 3;

        break;


    case 3:

        n = (filter->cache & 0xFF) | ((c & 0xFF) << 8);

        if (n >= 0xD800 && n <= 0xDBFF) {

            /* We previously saw the first part of a surrogate pair and were

             * expecting the second part; this is another first part */

            filter->cache = n & 0x3FF;

            filter->status = 2;

            CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));

        } else if (n >= 0xDC00 && n <= 0xDFFF) {

            n = filter->cache + ((c & 0x3) << 8) + 0x10000;

            filter->status = 0;

            CK((*filter->output_function)(n, filter->data));

        } else {

            /* The first part of a surrogate pair was followed by some other codepoint

             * which is not part of a surrogate pair at all */

            filter->status = 0;

            CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));

            CK((*filter->output_function)(n, filter->data));

        }

        break;

    }


    return 0;

}


int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)

{

    int n;


    if (c >= 0 && c < MBFL_WCSPLANE_UCS2MAX) {

        CK((*filter->output_function)(c & 0xff, filter->data));

        CK((*filter->output_function)((c >> 8) & 0xff, filter->data));

    } else if (c >= MBFL_WCSPLANE_SUPMIN && c < MBFL_WCSPLANE_SUPMAX) {

        n = ((c >> 10) - 0x40) | 0xd800;

        CK((*filter->output_function)(n & 0xff, filter->data));

        CK((*filter->output_function)((n >> 8) & 0xff, filter->data));

        n = (c & 0x3ff) | 0xdc00;

        CK((*filter->output_function)(n & 0xff, filter->data));

        CK((*filter->output_function)((n >> 8) & 0xff, filter->data));

    } else {

        CK(mbfl_filt_conv_illegal_output(c, filter));

    }


    return 0;

}


static int mbfl_filt_conv_utf16_wchar_flush(mbfl_convert_filter *filter)

{

    if (filter->status) {

        /* Input string was truncated */

        filter->status = 0;

        CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data));

    }


    if (filter->flush_function) {

        (*filter->flush_function)(filter->data);

    }


    return 0;

}


#define DETECTED_BE 1

#define DETECTED_LE 2


static size_t mb_utf16_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

{

    if (*state == DETECTED_BE) {

        return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);

    } else if (*state == DETECTED_LE) {

        return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);

    } else if (*in_len >= 2) {

        unsigned char *p = *in;

        unsigned char c1 = *p++;

        unsigned char c2 = *p++;

        uint16_t n = (c1 << 8) | c2;


        if (n == 0xFFFE) {

            /* Little-endian BOM */

            *in = p;

            *in_len -= 2;

            *state = DETECTED_LE;

            return mb_utf16le_to_wchar(in, in_len, buf, bufsize, NULL);

        } if (n == 0xFEFF) {

            /* Big-endian BOM; don't send to output */

            *in = p;

            *in_len -= 2;

        }

    }


    *state = DETECTED_BE;

    return mb_utf16be_to_wchar(in, in_len, buf, bufsize, NULL);

}


static size_t mb_utf16be_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

{

    /* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */

    unsigned char *p = *in, *e = p + (*in_len & ~1);

    /* Set `limit` to one less than the actual amount of space in the buffer; this is because

     * on some iterations of the below loop, we might produce two output words */

    uint32_t *out = buf, *limit = buf + bufsize - 1;


    while (p < e && out < limit) {

        unsigned char c1 = *p++;

        unsigned char c2 = *p++;

        uint16_t n = (c1 << 8) | c2;


        if (n >= 0xD800 && n <= 0xDBFF) {

            /* Handle surrogate */

            if (p < e) {

                unsigned char c3 = *p++;

                unsigned char c4 = *p++;

                uint16_t n2 = (c3 << 8) | c4;


                if (n2 >= 0xD800 && n2 <= 0xDBFF) {

                    /* Wrong; that's the first half of a surrogate pair, when we were expecting the second */

                    *out++ = MBFL_BAD_INPUT;

                    p -= 2;

                } else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {

                    *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;

                } else {

                    /* The first half of a surrogate pair was followed by a 'normal' codepoint */

                    *out++ = MBFL_BAD_INPUT;

                    *out++ = n2;

                }

            } else {

                *out++ = MBFL_BAD_INPUT;

            }

        } else if (n >= 0xDC00 && n <= 0xDFFF) {

            /* This is wrong; second part of surrogate pair has come first */

            *out++ = MBFL_BAD_INPUT;

        } else {

            *out++ = n;

        }

    }


    if (p == e && (*in_len & 0x1) && out < limit) {

        /* There is an extra trailing byte (which shouldn't be there) */

        *out++ = MBFL_BAD_INPUT;

        p++;

    }


    *in_len -= (p - *in);

    *in = p;

    return out - buf;

}


static void mb_wchar_to_utf16be_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)

{

    unsigned char *out, *limit;

    MB_CONVERT_BUF_LOAD(buf, out, limit);

    MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);


    while (len--) {

        uint32_t w = *in++;


        if (w < MBFL_WCSPLANE_UCS2MAX) {

            out = mb_convert_buf_add2(out, (w >> 8) & 0xFF, w & 0xFF);

        } else if (w < MBFL_WCSPLANE_UTF32MAX) {

            uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;

            uint16_t n2 = (w & 0x3FF) | 0xDC00;

            MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);

            out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);

        } else {

            MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);

            MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);

        }

    }


    MB_CONVERT_BUF_STORE(buf, out, limit);

}


static size_t mb_utf16le_to_wchar_default(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

{

    /* We only want to read 16-bit words out of `str`; any trailing byte will be handled at the end */

    unsigned char *p = *in, *e = p + (*in_len & ~1);

    /* Set `limit` to one less than the actual amount of space in the buffer; this is because

     * on some iterations of the below loop, we might produce two output words */

    uint32_t *out = buf, *limit = buf + bufsize - 1;


    while (p < e && out < limit) {

        unsigned char c1 = *p++;

        unsigned char c2 = *p++;

        uint16_t n = (c2 << 8) | c1;


        if (n >= 0xD800 && n <= 0xDBFF) {

            /* Handle surrogate */

            if (p < e) {

                unsigned char c3 = *p++;

                unsigned char c4 = *p++;

                uint16_t n2 = (c4 << 8) | c3;


                if (n2 >= 0xD800 && n2 <= 0xDBFF) {

                    /* Wrong; that's the first half of a surrogate pair, when we were expecting the second */

                    *out++ = MBFL_BAD_INPUT;

                    p -= 2;

                } else if (n2 >= 0xDC00 && n2 <= 0xDFFF) {

                    *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;

                } else {

                    /* The first half of a surrogate pair was followed by a 'normal' codepoint */

                    *out++ = MBFL_BAD_INPUT;

                    *out++ = n2;

                }

            } else {

                *out++ = MBFL_BAD_INPUT;

            }

        } else if (n >= 0xDC00 && n <= 0xDFFF) {

            /* This is wrong; second part of surrogate pair has come first */

            *out++ = MBFL_BAD_INPUT;

        } else {

            *out++ = n;

        }

    }


    if (p == e && (*in_len & 0x1) && out < limit) {

        /* There is an extra trailing byte (which shouldn't be there) */

        *out++ = MBFL_BAD_INPUT;

        p++;

    }


    *in_len -= (p - *in);

    *in = p;

    return out - buf;

}


static void mb_wchar_to_utf16le_default(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)

{

    unsigned char *out, *limit;

    MB_CONVERT_BUF_LOAD(buf, out, limit);

    MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);


    while (len--) {

        uint32_t w = *in++;


        if (w < MBFL_WCSPLANE_UCS2MAX) {

            out = mb_convert_buf_add2(out, w & 0xFF, (w >> 8) & 0xFF);

        } else if (w < MBFL_WCSPLANE_UTF32MAX) {

            uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;

            uint16_t n2 = (w & 0x3FF) | 0xDC00;

            MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);

            out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);

        } else {

            MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);

            MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);

        }

    }


    MB_CONVERT_BUF_STORE(buf, out, limit);

}


#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)


#ifdef ZEND_INTRIN_AVX2_FUNC_PROTO

size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

#else

static size_t mb_utf16be_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

#endif

{

    size_t len = *in_len;


    if (len >= 32 && bufsize >= 16) {

        unsigned char *p = *in;

        uint32_t *out = buf;


        /* Used to determine if a block of input bytes contains any surrogates */

        const __m256i _f8 = _mm256_set1_epi16(0xF8);

        const __m256i _d8 = _mm256_set1_epi16(0xD8);

        /* wchars must be in host byte order, which is little-endian on x86;

         * Since we are reading in (big-endian) UTF-16BE, use this vector to swap byte order for output */

        const __m256i swap_bytes = _mm256_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);


        do {

            __m256i operand = _mm256_loadu_si256((__m256i*)p); /* Load 32 bytes */


            uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));

            if (surrogate_bitvec == 0) {

                /* There are no surrogates among these 16 characters

                 * So converting the UTF-16 input to wchars is very simple; just extend each 16-bit value

                 * to a 32-bit value, filling in zero bits in the high end */

                operand = _mm256_shuffle_epi8(operand, swap_bytes);

                _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));

                _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));

                out += 16;

                bufsize -= 16;

                p += sizeof(__m256i);

                len -= sizeof(__m256i);

            } else if ((surrogate_bitvec & 1) == 0) {

                /* Some prefix of the current block is non-surrogates; output those */

                uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;

                operand = _mm256_shuffle_epi8(operand, swap_bytes);

                /* We know that the output buffer has at least 64 bytes of space available

                 * So don't bother trimming the output down to only include the non-surrogate prefix;

                 * rather, write out an entire block of 64 (or 32) bytes, then bump our output pointer

                 * forward just past the 'good part', so the 'bad part' will be overwritten on the next

                 * iteration of this loop */

                _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));

                if (n_chars > 8) {

                    _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));

                }

                out += n_chars;

                bufsize -= n_chars;

                p += n_chars * 2;

                len -= n_chars * 2;

            } else {

                /* Some prefix of the current block is (valid or invalid) surrogates

                 * Handle those using non-vectorized code */

                surrogate_bitvec = ~surrogate_bitvec;

                unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;

                do {

                    unsigned char c1 = *p++;

                    unsigned char c2 = *p++;


                    if (c1 & 0x4 || len < 4) {

                        /* 2nd part of surrogate pair has come first OR string ended abruptly

                         * after 1st part of surrogate pair */

                        *out++ = MBFL_BAD_INPUT;

                        bufsize--;

                        n_chars--;

                        len -= 2;

                        continue;

                    }


                    uint16_t n = (c1 << 8) | c2;

                    unsigned char c3 = *p++;

                    unsigned char c4 = *p++;


                    if ((c3 & 0xFC) == 0xDC) {

                        /* Valid surrogate pair */

                        uint16_t n2 = (c3 << 8) | c4;

                        *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;

                        bufsize--;

                        len -= 4;

#ifdef PHP_HAVE_BUILTIN_USUB_OVERFLOW

                        /* Subtracting 2 from `n_chars` will automatically set the CPU's flags;

                         * branch directly off the appropriate flag (CF on x86) rather than using

                         * another instruction (CMP on x86) to check for underflow */

                        if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {

                            /* The last 2 bytes of this block and the first 2 bytes of the following

                             * block form a valid surrogate pair; now just make sure we don't get

                             * stuck in this loop due to underflow of the loop index */

                            break;

                        }

#else

                        n_chars -= 2;

                        if (n_chars == UINT_MAX) {

                            break;

                        }

#endif

                    } else {

                        /* First half of surrogate pair was followed by another first half

                         * OR by a non-surrogate character */

                        *out++ = MBFL_BAD_INPUT;

                        bufsize--;

                        n_chars--;

                        len -= 2;

                        p -= 2; /* Back up so the last 2 bytes will be processed again */

                    }

                } while (n_chars);

            }

        } while (len >= 32 && bufsize >= 16);


        if (len && bufsize >= 4) {

            /* Finish up trailing bytes which don't fill a 32-byte block */

            out += mb_utf16be_to_wchar_default(&p, &len, out, bufsize, NULL);

        }


        *in = p;

        *in_len = len;

        return out - buf;

    } else if (len) {

        return mb_utf16be_to_wchar_default(in, in_len, buf, bufsize, NULL);

    } else {

        return 0;

    }

}


#ifdef ZEND_INTRIN_AVX2_FUNC_PROTO

void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)

#else

static void mb_wchar_to_utf16be_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)

#endif

{

    if (len >= 8) {

        unsigned char *out, *limit;

        MB_CONVERT_BUF_LOAD(buf, out, limit);

        MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);


        /* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */

        const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);

        /* Used to extract 16 bits which we want from each of eight 32-bit values */

        const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 12, 13, 8, 9, 4, 5, 0, 1, 12, 13, 8, 9, 4, 5, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1);


        do {

            __m256i operand = _mm256_loadu_si256((__m256i*)in); /* Load 32 bytes */


            uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));

            if (bmp_bitvec == 0xFFFFFFFF) {

                /* All eight wchars are in the BMP

                 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register

                 * (which is equivalent to an XMM register) */

                operand = _mm256_shuffle_epi8(operand, pack_8x16);

                __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);

                operand = _mm256_alignr_epi8(operand2, operand, 8);

                _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand)); /* Store 16 bytes */

                out += 16;

                len -= 8;

                in += 8;

            } else if (bmp_bitvec & 1) {

                /* Some prefix of this block are codepoints in the BMP */

                unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);

                operand = _mm256_shuffle_epi8(operand, pack_8x16);

                __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);

                operand = _mm256_alignr_epi8(operand2, operand, 8);

                /* Store 16 bytes, but bump output pointer forward just past the 'good part',

                 * so the 'bad part' will be overwritten on the next iteration of this loop */

                _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));

                out += n_bytes >> 1;

                len -= n_bytes >> 2;

                in += n_bytes >> 2;

            } else {

                /* Some prefix of this block is codepoints outside the BMP OR error markers

                 * Handle them using non-vectorized code */

                unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;

                do {

                    uint32_t w = *in++;

                    n_words--;

                    len--;


                    if (w < MBFL_WCSPLANE_UTF32MAX) {

                        uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;

                        uint16_t n2 = (w & 0x3FF) | 0xDC00;

                        MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);

                        out = mb_convert_buf_add4(out, (n1 >> 8) & 0xFF, n1 & 0xFF, (n2 >> 8) & 0xFF, n2 & 0xFF);

                    } else {

                        MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16be_default);

                        MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);

                    }

                } while (n_words);

            }

        } while (len >= 8);


        MB_CONVERT_BUF_STORE(buf, out, limit);

    }


    if (len) {

        mb_wchar_to_utf16be_default(in, len, buf, end);

    }

}


#ifdef ZEND_INTRIN_AVX2_FUNC_PROTO

size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

#else

static size_t mb_utf16le_to_wchar_avx2(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)

#endif

{

    /* Most of this function is the same as `mb_utf16be_to_wchar_avx2`, above;

     * See it for more detailed code comments */


    size_t len = *in_len;


    if (len >= 32 && bufsize >= 16) {

        unsigned char *p = *in;

        uint32_t *out = buf;


        const __m256i _f8 = _mm256_set1_epi16(0xF800);

        const __m256i _d8 = _mm256_set1_epi16(0xD800);


        do {

            __m256i operand = _mm256_loadu_si256((__m256i*)p);


            uint32_t surrogate_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi16(_mm256_and_si256(operand, _f8), _d8));

            if (surrogate_bitvec == 0) {

                /* There are no surrogates among these 16 characters */

                _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));

                _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));

                out += 16;

                bufsize -= 16;

                p += sizeof(__m256i);

                len -= sizeof(__m256i);

            } else if ((surrogate_bitvec & 1) == 0) {

                /* Some prefix of the current block is non-surrogates */

                uint8_t n_chars = zend_ulong_ntz(surrogate_bitvec) >> 1;

                _mm256_storeu_si256((__m256i*)out, _mm256_cvtepu16_epi32(_mm256_castsi256_si128(operand)));

                if (n_chars > 8) {

                    _mm256_storeu_si256((__m256i*)(out + 8), _mm256_cvtepu16_epi32(_mm256_extracti128_si256(operand, 1)));

                }

                out += n_chars;

                bufsize -= n_chars;

                p += n_chars * 2;

                len -= n_chars * 2;

            } else {

                /* Some prefix of the current block is (valid or invalid) surrogates */

                surrogate_bitvec = ~surrogate_bitvec;

                unsigned int n_chars = surrogate_bitvec ? zend_ulong_ntz(surrogate_bitvec) >> 1 : 16;

                do {

                    unsigned char c1 = *p++;

                    unsigned char c2 = *p++;


                    if (c2 & 0x4 || len < 4) {

                        /* 2nd part of surrogate pair has come first OR string ended abruptly

                         * after 1st part of surrogate pair */

                        *out++ = MBFL_BAD_INPUT;

                        bufsize--;

                        n_chars--;

                        len -= 2;

                        continue;

                    }


                    uint16_t n = (c2 << 8) | c1;

                    unsigned char c3 = *p++;

                    unsigned char c4 = *p++;


                    if ((c4 & 0xFC) == 0xDC) {

                        /* Valid surrogate pair */

                        uint16_t n2 = (c4 << 8) | c3;

                        *out++ = (((n & 0x3FF) << 10) | (n2 & 0x3FF)) + 0x10000;

                        bufsize--;

                        len -= 4;

#ifdef PHP_HAVE_BUILTIN_USUB_OVERFLOW

                        if (__builtin_usub_overflow(n_chars, 2, &n_chars)) {

                            break;

                        }

#else

                        n_chars -= 2;

                        if (n_chars == UINT_MAX) {

                            break;

                        }

#endif

                    } else {

                        /* First half of surrogate pair was followed by another first half

                         * OR by a non-surrogate character */

                        *out++ = MBFL_BAD_INPUT;

                        bufsize--;

                        n_chars--;

                        len -= 2;

                        p -= 2; /* Back up so the last 2 bytes will be processed again */

                    }

                } while (n_chars);

            }

        } while (len >= 32 && bufsize >= 16);


        if (len && bufsize >= 4) {

            out += mb_utf16le_to_wchar_default(&p, &len, out, bufsize, NULL);

        }


        *in = p;

        *in_len = len;

        return out - buf;

    } else if (len) {

        return mb_utf16le_to_wchar_default(in, in_len, buf, bufsize, NULL);

    } else {

        return 0;

    }

}


#ifdef ZEND_INTRIN_AVX2_FUNC_PROTO

void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)

#else

static void mb_wchar_to_utf16le_avx2(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)

#endif

{

    if (len >= 8) {

        unsigned char *out, *limit;

        MB_CONVERT_BUF_LOAD(buf, out, limit);

        MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);


        /* Used to find wchars which are outside the Unicode BMP (Basic Multilingual Plane) */

        const __m256i bmp_mask = _mm256_set1_epi32(0xFFFF);

        /* Used to extract 16 bits which we want from each of eight 32-bit values */

        const __m256i pack_8x16 = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 13, 12, 9, 8, 5, 4, 1, 0, 13, 12, 9, 8, 5, 4, 1, 0, -1, -1, -1, -1, -1, -1, -1, -1);


        do {

            __m256i operand = _mm256_loadu_si256((__m256i*)in);


            uint32_t bmp_bitvec = _mm256_movemask_epi8(_mm256_cmpeq_epi32(_mm256_and_si256(operand, bmp_mask), operand));

            if (bmp_bitvec == 0xFFFFFFFF) {

                /* All eight wchars are in the BMP

                 * Shuffle bytes around to get the 16 bytes we want into the low 16 bytes of YMM register

                 * (which is equivalent to an XMM register) */

                operand = _mm256_shuffle_epi8(operand, pack_8x16);

                __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);

                operand = _mm256_alignr_epi8(operand2, operand, 8);

                _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));

                out += 16;

                len -= 8;

                in += 8;

            } else if (bmp_bitvec & 1) {

                /* Some prefix of this block are codepoints in the BMP */

                unsigned int n_bytes = zend_ulong_ntz(~bmp_bitvec);

                operand = _mm256_shuffle_epi8(operand, pack_8x16);

                __m256i operand2 = _mm256_permute2x128_si256(operand, operand, 1);

                operand = _mm256_alignr_epi8(operand2, operand, 8);

                _mm_storeu_si128((__m128i*)out, _mm256_castsi256_si128(operand));

                out += n_bytes >> 1;

                len -= n_bytes >> 2;

                in += n_bytes >> 2;

            } else {

                /* Some prefix of this block is codepoints outside the BMP OR error markers */

                unsigned int n_words = bmp_bitvec ? zend_ulong_ntz(bmp_bitvec) >> 2 : 8;

                do {

                    uint32_t w = *in++;

                    n_words--;

                    len--;


                    if (w < MBFL_WCSPLANE_UTF32MAX) {

                        uint16_t n1 = ((w >> 10) - 0x40) | 0xD800;

                        uint16_t n2 = (w & 0x3FF) | 0xDC00;

                        MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);

                        out = mb_convert_buf_add4(out, n1 & 0xFF, (n1 >> 8) & 0xFF, n2 & 0xFF, (n2 >> 8) & 0xFF);

                    } else {

                        MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_utf16le_default);

                        MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);

                    }

                } while (n_words);

            }

        } while (len >= 8);


        MB_CONVERT_BUF_STORE(buf, out, limit);

    }


    if (len) {

        mb_wchar_to_utf16le_default(in, len, buf, end);

    }

}


#endif /* defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) */


static zend_string* mb_cut_utf16be(unsigned char *str, size_t from, size_t len, unsigned char *end)

{

    if (len > end - (str + from)) {

        len = end - (str + from);

    }

    from &= ~1;

    len &= ~1;

    unsigned char *start = str + from;

    if (len < 2 || (end - start) < 2) {

        return zend_empty_string;

    }

    /* Check if 1st codepoint is 2nd part of surrogate pair */

    if (from > 0) {

        uint32_t start_cp = (*start << 8) + *(start + 1);

        if (start_cp >= 0xDC00 && start_cp <= 0xDFFF) {

            uint32_t preceding_cp = (*(start - 2) << 8) + *(start - 1);

            if (preceding_cp >= 0xD800 && preceding_cp <= 0xDBFF) {

                from -= 2;

            }

        }

    }

    /* Same for ending cut point */

    unsigned char *_end = start + len;

    if (_end > end) {

        _end = end;

    }

    uint32_t ending_cp = (*(_end - 2) << 8) + *(_end - 1);

    if (ending_cp >= 0xD800 && ending_cp <= 0xDBFF) {

        _end -= 2;

    }

    return zend_string_init_fast((char*)start, _end - start);

}


static zend_string* mb_cut_utf16le(unsigned char *str, size_t from, size_t len, unsigned char *end)

{

    if (len > end - (str + from)) {

        len = end - (str + from);

    }

    from &= ~1;

    len &= ~1;

    unsigned char *start = str + from;

    if (len < 2 || (end - start) < 2) {

        return zend_empty_string;

    }

    /* Check if 1st codepoint is 2nd part of surrogate pair */

    if (from > 0) {

        uint32_t start_cp = (*(start + 1) << 8) + *start;

        if (start_cp >= 0xDC00 && start_cp <= 0xDFFF) {

            uint32_t preceding_cp = (*(start - 1) << 8) + *(start - 2);

            if (preceding_cp >= 0xD800 && preceding_cp <= 0xDBFF) {

                from -= 2;

            }

        }

    }

    /* Same for ending cut point */

    unsigned char *_end = start + len;

    if (_end > end) {

        _end = end;

    }

    uint32_t ending_cp = (*(_end - 1) << 8) + *(_end - 2);

    if (ending_cp >= 0xD800 && ending_cp <= 0xDBFF) {

        _end -= 2;

    }

    return zend_string_init_fast((char*)start, _end - start);

}


static zend_string* mb_cut_utf16(unsigned char *str, size_t from, size_t len, unsigned char *end)

{

    if (len < 2 || (end - str) < 2) {

        return zend_empty_string;

    }

    uint32_t cp = (*str << 8) + *(str + 1);

    if (cp == 0xFFFE) {

        /* Little-endian BOM */

        if (from < 2) {

            from = 2;

        }

        return mb_cut_utf16le(str, from, len, end);

    } else {

        if (cp == 0xFEFF && from < 2) {

            from = 2;

        }

        return mb_cut_utf16be(str, from, len, end);

    }

}

len
size_t len
Definition apprentice.c:174

n
zend_long n
Definition ffi.c:4979

start
buf start
Definition ffi.c:4687

buf
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685

__attribute__
#define __attribute__(a)
Definition file.h:131

NULL
#define NULL
Definition gdcache.h:45

mbfilter.h

DETECTED_BE
#define DETECTED_BE
Definition mbfilter_ucs2.c:238

DETECTED_LE
#define DETECTED_LE
Definition mbfilter_ucs2.c:239

mbfl_filt_conv_utf16_wchar
int mbfl_filt_conv_utf16_wchar(int c, mbfl_convert_filter *filter)
Definition mbfilter_utf16.c:291

mb_utf16be_to_wchar
#define mb_utf16be_to_wchar
Definition mbfilter_utf16.c:164

mbfl_filt_conv_wchar_utf16le
int mbfl_filt_conv_wchar_utf16le(int c, mbfl_convert_filter *filter)
Definition mbfilter_utf16.c:449

mbfl_encoding_utf16be
const mbfl_encoding mbfl_encoding_utf16be
Definition mbfilter_utf16.c:199

CK
#define CK(statement)
Definition mbfilter_utf16.c:289

mbfl_filt_conv_utf16be_wchar
int mbfl_filt_conv_utf16be_wchar(int c, mbfl_convert_filter *filter)
Definition mbfilter_utf16.c:322

vtbl_utf16le_wchar
const struct mbfl_convert_vtbl vtbl_utf16le_wchar
Definition mbfilter_utf16.c:269

mbfl_filt_conv_wchar_utf16be
int mbfl_filt_conv_wchar_utf16be(int c, mbfl_convert_filter *filter)
Definition mbfilter_utf16.c:373

mbfl_encoding_utf16le
const mbfl_encoding mbfl_encoding_utf16le
Definition mbfilter_utf16.c:214

vtbl_utf16be_wchar
const struct mbfl_convert_vtbl vtbl_utf16be_wchar
Definition mbfilter_utf16.c:249

mbfl_filt_conv_utf16le_wchar
int mbfl_filt_conv_utf16le_wchar(int c, mbfl_convert_filter *filter)
Definition mbfilter_utf16.c:394

mb_wchar_to_utf16le
#define mb_wchar_to_utf16le
Definition mbfilter_utf16.c:167

mbfl_encoding_utf16
const mbfl_encoding mbfl_encoding_utf16
Definition mbfilter_utf16.c:184

mb_utf16le_to_wchar
#define mb_utf16le_to_wchar
Definition mbfilter_utf16.c:165

vtbl_wchar_utf16be
const struct mbfl_convert_vtbl vtbl_wchar_utf16be
Definition mbfilter_utf16.c:259

vtbl_wchar_utf16le
const struct mbfl_convert_vtbl vtbl_wchar_utf16le
Definition mbfilter_utf16.c:279

vtbl_utf16_wchar
const struct mbfl_convert_vtbl vtbl_utf16_wchar
Definition mbfilter_utf16.c:229

vtbl_wchar_utf16
const struct mbfl_convert_vtbl vtbl_wchar_utf16
Definition mbfilter_utf16.c:239

mb_wchar_to_utf16be
#define mb_wchar_to_utf16be
Definition mbfilter_utf16.c:166

mbfilter_utf16.h

MBFL_WCSPLANE_UCS2MAX
#define MBFL_WCSPLANE_UCS2MAX
Definition mbfl_consts.h:39

MBFL_WCSPLANE_SUPMAX
#define MBFL_WCSPLANE_SUPMAX
Definition mbfl_consts.h:42

MBFL_WCSPLANE_UTF32MAX
#define MBFL_WCSPLANE_UTF32MAX
Definition mbfl_consts.h:40

MBFL_BAD_INPUT
#define MBFL_BAD_INPUT
Definition mbfl_consts.h:45

MBFL_WCSPLANE_SUPMIN
#define MBFL_WCSPLANE_SUPMIN
Definition mbfl_consts.h:41

mbfl_filt_conv_common_flush
int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
Definition mbfl_convert.c:316

mbfl_filt_conv_illegal_output
int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
Definition mbfl_convert.c:218

mbfl_filt_conv_common_ctor
void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
Definition mbfl_convert.c:311

mbfl_convert_filter
struct _mbfl_convert_filter mbfl_convert_filter
Definition mbfl_convert.h:38

mbfl_no_encoding_utf16be
@ mbfl_no_encoding_utf16be
Definition mbfl_encoding.h:59

mbfl_no_encoding_utf16
@ mbfl_no_encoding_utf16
Definition mbfl_encoding.h:58

mbfl_no_encoding_wchar
@ mbfl_no_encoding_wchar
Definition mbfl_encoding.h:41

mbfl_no_encoding_utf16le
@ mbfl_no_encoding_utf16le
Definition mbfl_encoding.h:60

mb_to_wchar_fn
size_t(* mb_to_wchar_fn)(unsigned char **in, size_t *in_len, uint32_t *out, size_t out_len, unsigned int *state)
Definition mbfl_encoding.h:146

MB_CONVERT_BUF_STORE
#define MB_CONVERT_BUF_STORE(buf, _out, _limit)
Definition mbfl_encoding.h:176

MB_CONVERT_BUF_ENSURE
#define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed)
Definition mbfl_encoding.h:165

MB_CONVERT_ERROR
#define MB_CONVERT_ERROR(buf, out, limit, bad_cp, conv_fn)
Definition mbfl_encoding.h:180

mb_from_wchar_fn
void(* mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end)
Definition mbfl_encoding.h:147

MB_CONVERT_BUF_LOAD
#define MB_CONVERT_BUF_LOAD(buf, _out, _limit)
Definition mbfl_encoding.h:178

end
unsigned const char * end
Definition php_ffi.h:51

state
state
Definition php_http_parser.h:114

p
p
Definition session.c:1105

_mbfl_convert_filter::output_function
output_function_t output_function
Definition mbfl_convert.h:52

_mbfl_convert_filter::data
void * data
Definition mbfl_convert.h:54

_mbfl_convert_filter::filter_function
int(* filter_function)(int c, mbfl_convert_filter *filter)
Definition mbfl_convert.h:50

_mbfl_convert_filter::status
int status
Definition mbfl_convert.h:55

_mbfl_convert_filter::cache
int cache
Definition mbfl_convert.h:56

_mbfl_convert_filter::flush_function
flush_function_t flush_function
Definition mbfl_convert.h:53

cp
Definition cp_enc_map_gen.c:5

mb_convert_buf
Definition mbfl_encoding.h:136

mbfl_convert_vtbl
Definition mbfl_encoding.h:126

mbfl_convert_vtbl::from
enum mbfl_no_encoding from
Definition mbfl_encoding.h:127

mbfl_encoding
Definition mbfl_encoding.h:244

zend_bitset.h

zend_cpuinfo.h

ZEND_NO_SANITIZE_ADDRESS
#define ZEND_NO_SANITIZE_ADDRESS
Definition zend_cpuinfo.h:119

zend_string
struct _zend_string zend_string
Definition zend_map_ptr.h:24

ZEND_INTRIN_AVX2_FUNC_DECL
#define ZEND_INTRIN_AVX2_FUNC_DECL(func)
Definition zend_portability.h:675

ZEND_ATTRIBUTE_UNUSED
#define ZEND_ATTRIBUTE_UNUSED
Definition zend_portability.h:269

zend_empty_string
ZEND_API zend_string * zend_empty_string
Definition zend_string.c:51

out
out($f, $s)
Definition zend_vm_gen.php:551