php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
mbfl_encoding.h
Go to the documentation of this file.
1/*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24/*
25 * The source code included in this files was separated from mbfilter.h
26 * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
27 * mbfilter.h is included in this package .
28 *
29 */
30
31#ifndef MBFL_ENCODING_H
32#define MBFL_ENCODING_H
33
34#include "zend.h"
35#include "mbfl_defs.h"
36#include "mbfl_consts.h"
37
123};
124
135
136typedef struct {
137 unsigned char *out;
138 unsigned char *limit;
139 uint32_t state;
140 uint32_t errors;
142 unsigned int error_mode;
145
146typedef size_t (*mb_to_wchar_fn)(unsigned char **in, size_t *in_len, uint32_t *out, size_t out_len, unsigned int *state);
147typedef void (*mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end);
148typedef bool (*mb_check_fn)(unsigned char *in, size_t in_len);
149typedef zend_string* (*mb_cut_fn)(unsigned char *str, size_t from, size_t len, unsigned char *end);
150
151/* When converting encoded text to a buffer of wchars (Unicode codepoints) using `mb_to_wchar_fn`,
152 * the buffer must be at least this size (to work with all supported text encodings) */
153#define MBSTRING_MIN_WCHAR_BUFSIZE 5
154
155static inline void mb_convert_buf_init(mb_convert_buf *buf, size_t initsize, uint32_t repl_char, unsigned int err_mode)
156{
157 buf->state = buf->errors = 0;
158 buf->str = emalloc(_ZSTR_STRUCT_SIZE(initsize));
159 buf->out = (unsigned char*)ZSTR_VAL(buf->str);
160 buf->limit = buf->out + initsize;
161 buf->replacement_char = repl_char;
162 buf->error_mode = err_mode;
163}
164
165#define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed) \
166 ZEND_ASSERT(out <= limit); \
167 if ((size_t)(limit - out) < (needed)) { \
168 size_t oldsize = limit - (unsigned char*)ZSTR_VAL((buf)->str); \
169 size_t newsize = oldsize + MAX(oldsize >> 1, needed); \
170 zend_string *newstr = erealloc((buf)->str, _ZSTR_STRUCT_SIZE(newsize)); \
171 out = (unsigned char*)ZSTR_VAL(newstr) + (out - (unsigned char*)ZSTR_VAL((buf)->str)); \
172 limit = (unsigned char*)ZSTR_VAL(newstr) + newsize; \
173 (buf)->str = newstr; \
174 }
175
176#define MB_CONVERT_BUF_STORE(buf, _out, _limit) (buf)->out = _out; (buf)->limit = _limit
177
178#define MB_CONVERT_BUF_LOAD(buf, _out, _limit) _out = (buf)->out; _limit = (buf)->limit
179
180#define MB_CONVERT_ERROR(buf, out, limit, bad_cp, conv_fn) \
181 MB_CONVERT_BUF_STORE(buf, out, limit); \
182 mb_illegal_output(bad_cp, conv_fn, buf); \
183 MB_CONVERT_BUF_LOAD(buf, out, limit)
184
185static inline unsigned char* mb_convert_buf_add(unsigned char *out, char c)
186{
187 *out++ = c;
188 return out;
189}
190
191static inline unsigned char* mb_convert_buf_add2(unsigned char *out, char c1, char c2)
192{
193 *out++ = c1;
194 *out++ = c2;
195 return out;
196}
197
198static inline unsigned char* mb_convert_buf_add3(unsigned char *out, char c1, char c2, char c3)
199{
200 *out++ = c1;
201 *out++ = c2;
202 *out++ = c3;
203 return out;
204}
205
206static inline unsigned char* mb_convert_buf_add4(unsigned char *out, char c1, char c2, char c3, char c4)
207{
208 *out++ = c1;
209 *out++ = c2;
210 *out++ = c3;
211 *out++ = c4;
212 return out;
213}
214
215static inline unsigned char* mb_convert_buf_appends(unsigned char *out, const char *s)
216{
217 while (*s) {
218 *out++ = *s++;
219 }
220 return out;
221}
222
223static inline unsigned char* mb_convert_buf_appendn(unsigned char *out, const char *s, size_t n)
224{
225 while (n--) {
226 *out++ = *s++;
227 }
228 return out;
229}
230
231static inline zend_string* mb_convert_buf_result_raw(mb_convert_buf *buf)
232{
233 ZEND_ASSERT(buf->out <= buf->limit);
234 zend_string *ret = buf->str;
235 /* See `zend_string_alloc` in zend_string.h */
238 ZSTR_H(ret) = 0;
239 ZSTR_LEN(ret) = buf->out - (unsigned char*)ZSTR_VAL(ret);
240 *(buf->out) = '\0';
241 return ret;
242}
243
258
260
261static inline zend_string* mb_convert_buf_result(mb_convert_buf *buf, const mbfl_encoding *enc)
262{
263 zend_string *ret = mb_convert_buf_result_raw(buf);
264 if (enc == &mbfl_encoding_utf8 && buf->error_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_BADUTF8) {
266 }
267 return ret;
268}
269
270/* Used if we initialize an `mb_convert_buf` but then discover we don't actually
271 * want to return `zend_string` */
272static inline void mb_convert_buf_free(mb_convert_buf *buf)
273{
274 efree(buf->str);
275}
276
277static inline size_t mb_convert_buf_len(mb_convert_buf *buf)
278{
279 return buf->out - (unsigned char*)ZSTR_VAL(buf->str);
280}
281
282static inline void mb_convert_buf_reset(mb_convert_buf *buf, size_t len)
283{
284 buf->out = (unsigned char*)ZSTR_VAL(buf->str) + len;
285 ZEND_ASSERT(buf->out <= buf->limit);
286}
287
288MBFLAPI extern const mbfl_encoding *mbfl_name2encoding(const char *name);
289MBFLAPI extern const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len);
290MBFLAPI extern const mbfl_encoding *mbfl_no2encoding(enum mbfl_no_encoding no_encoding);
292MBFLAPI extern const char *mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding);
294
295#endif /* MBFL_ENCODING_H */
size_t len
Definition apprentice.c:174
char s[4]
Definition cdf.c:77
zend_long n
Definition ffi.c:4979
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_BADUTF8
Definition mbfilter.h:119
const mbfl_encoding mbfl_encoding_utf8
#define MBFLAPI
Definition mbfl_defs.h:56
MBFLAPI const char * mbfl_no_encoding2name(enum mbfl_no_encoding no_encoding)
mbfl_no_encoding
@ mbfl_no_encoding_8859_9
@ mbfl_no_encoding_2022jp_kddi
@ mbfl_no_encoding_utf16be
@ mbfl_no_encoding_hz
@ mbfl_no_encoding_ucs2le
@ mbfl_no_encoding_2022jp
@ mbfl_no_encoding_ucs2
@ mbfl_no_encoding_cp936
@ mbfl_no_encoding_7bit
@ mbfl_no_encoding_cp50220
@ mbfl_no_encoding_utf8
@ mbfl_no_encoding_8859_2
@ mbfl_no_encoding_8859_15
@ mbfl_no_encoding_eucjp2004
@ mbfl_no_encoding_gb18030
@ mbfl_no_encoding_utf8_kddi_a
@ mbfl_no_encoding_charset_min
@ mbfl_no_encoding_pass
@ mbfl_no_encoding_cp50222
@ mbfl_no_encoding_invalid
@ mbfl_no_encoding_cp51932
@ mbfl_no_encoding_sjis_docomo
@ mbfl_no_encoding_euc_kr
@ mbfl_no_encoding_8859_13
@ mbfl_no_encoding_euc_cn
@ mbfl_no_encoding_utf8_docomo
@ mbfl_no_encoding_cp850
@ mbfl_no_encoding_utf7imap
@ mbfl_no_encoding_utf32be
@ mbfl_no_encoding_ucs4le
@ mbfl_no_encoding_8859_8
@ mbfl_no_encoding_2022kr
@ mbfl_no_encoding_utf8_kddi_b
@ mbfl_no_encoding_jis
@ mbfl_no_encoding_html_ent
@ mbfl_no_encoding_euc_tw
@ mbfl_no_encoding_2022jpms
@ mbfl_no_encoding_8859_4
@ mbfl_no_encoding_cp50221
@ mbfl_no_encoding_utf16
@ mbfl_no_encoding_uhc
@ mbfl_no_encoding_8859_16
@ mbfl_no_encoding_cp950
@ mbfl_no_encoding_euc_jp
@ mbfl_no_encoding_sjiswin
@ mbfl_no_encoding_base64
@ mbfl_no_encoding_8859_5
@ mbfl_no_encoding_wchar
@ mbfl_no_encoding_sjis2004
@ mbfl_no_encoding_gb18030_2022
@ mbfl_no_encoding_koi8u
@ mbfl_no_encoding_cp1252
@ mbfl_no_encoding_cp866
@ mbfl_no_encoding_ucs4
@ mbfl_no_encoding_utf32
@ mbfl_no_encoding_8859_1
@ mbfl_no_encoding_8859_7
@ mbfl_no_encoding_ucs4be
@ mbfl_no_encoding_utf32le
@ mbfl_no_encoding_armscii8
@ mbfl_no_encoding_sjis_kddi
@ mbfl_no_encoding_ascii
@ mbfl_no_encoding_cp1251
@ mbfl_no_encoding_charset_max
@ mbfl_no_encoding_qprint
@ mbfl_no_encoding_sjis_mac
@ mbfl_no_encoding_uuencode
@ mbfl_no_encoding_cp932
@ mbfl_no_encoding_utf16le
@ mbfl_no_encoding_8859_3
@ mbfl_no_encoding_8859_6
@ mbfl_no_encoding_8859_14
@ mbfl_no_encoding_ucs2be
@ mbfl_no_encoding_sjis
@ mbfl_no_encoding_8bit
@ mbfl_no_encoding_2022jp_2004
@ mbfl_no_encoding_utf7
@ mbfl_no_encoding_big5
@ mbfl_no_encoding_utf8_sb
@ mbfl_no_encoding_8859_10
@ mbfl_no_encoding_eucjp_win
@ mbfl_no_encoding_sjis_sb
@ mbfl_no_encoding_koi8r
@ mbfl_no_encoding_cp1254
size_t(* mb_to_wchar_fn)(unsigned char **in, size_t *in_len, uint32_t *out, size_t out_len, unsigned int *state)
MBFLAPI const mbfl_encoding * mbfl_name2encoding_ex(const char *name, size_t name_len)
zend_string *(* mb_cut_fn)(unsigned char *str, size_t from, size_t len, unsigned char *end)
MBFLAPI const char * mbfl_encoding_preferred_mime_name(const mbfl_encoding *encoding)
MBFLAPI const mbfl_encoding ** mbfl_get_supported_encodings(void)
MBFLAPI const mbfl_encoding * mbfl_name2encoding(const char *name)
void(* mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end)
bool(* mb_check_fn)(unsigned char *in, size_t in_len)
MBFLAPI const mbfl_encoding * mbfl_no2encoding(enum mbfl_no_encoding no_encoding)
unsigned const char * end
Definition php_ffi.h:51
xmlCharEncodingHandlerPtr encoding
Definition php_soap.h:170
unsigned int error_mode
uint32_t replacement_char
unsigned char * out
zend_string * str
unsigned char * limit
enum mbfl_no_encoding from
int(* filter_flush)(struct _mbfl_convert_filter *filter)
int(* filter_function)(int c, struct _mbfl_convert_filter *filter)
void(* filter_ctor)(struct _mbfl_convert_filter *filter)
void(* filter_copy)(struct _mbfl_convert_filter *src, struct _mbfl_convert_filter *dest)
enum mbfl_no_encoding to
void(* filter_dtor)(struct _mbfl_convert_filter *filter)
const char ** aliases
mb_to_wchar_fn to_wchar
const unsigned char * mblen_table
mb_check_fn check
const char * name
unsigned int flag
const char * mime_name
const struct mbfl_convert_vtbl * output_filter
enum mbfl_no_encoding no_encoding
mb_from_wchar_fn from_wchar
const struct mbfl_convert_vtbl * input_filter
#define efree(ptr)
Definition zend_alloc.h:155
#define emalloc(size)
Definition zend_alloc.h:151
ZEND_API void(ZEND_FASTCALL *zend_touch_vm_stack_data)(void *vm_stack_data)
struct _zend_string zend_string
#define ZEND_ASSERT(c)
#define ZSTR_H(zstr)
Definition zend_string.h:70
#define _ZSTR_STRUCT_SIZE(len)
#define ZSTR_VAL(zstr)
Definition zend_string.h:68
#define ZSTR_LEN(zstr)
Definition zend_string.h:69
#define GC_STRING
Definition zend_types.h:785
#define GC_SET_REFCOUNT(p, rc)
Definition zend_types.h:708
#define IS_STR_VALID_UTF8
Definition zend_types.h:820
#define GC_ADD_FLAGS(p, flags)
Definition zend_types.h:759
#define GC_TYPE_INFO(p)
Definition zend_types.h:754
zend_string * name
zval * ret
out($f, $s)