php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
base.h
Go to the documentation of this file.
1/*
2 * Copyright (C) 2019 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7#ifndef LEXBOR_ENCODING_BASE_H
8#define LEXBOR_ENCODING_BASE_H
9
10#ifdef __cplusplus
11extern "C" {
12#endif
13
14#include "lexbor/core/base.h"
16
17
18#define LXB_ENCODING_VERSION_MAJOR 2
19#define LXB_ENCODING_VERSION_MINOR 0
20#define LXB_ENCODING_VERSION_PATCH 1
21
22#define LXB_ENCODING_VERSION_STRING \
23 LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_MAJOR) "." \
24 LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_MINOR) "." \
25 LEXBOR_STRINGIZE(LXB_ENCODING_VERSION_PATCH)
26
27
28#define LXB_ENCODING_REPLACEMENT_BYTES ((lxb_char_t *) "\xEF\xBF\xBD")
29
30#define LXB_ENCODING_REPLACEMENT_BUFFER_LEN 1
31#define LXB_ENCODING_REPLACEMENT_BUFFER \
32 (&(const lxb_codepoint_t) {LXB_ENCODING_REPLACEMENT_CODEPOINT})
33
34
35/*
36 * In UTF-8 0x10FFFF value is maximum (inclusive)
37 */
38enum {
43};
44
45enum {
49};
50
51enum {
55};
56
57enum {
66};
67
68enum {
72};
73
74typedef struct {
75 unsigned need;
78}
80
87
88typedef struct {
91}
93
94typedef struct {
97 unsigned state;
98 unsigned out_state;
100}
102
104
105typedef struct {
107
108 /* Out buffer */
112
113 /*
114 * Bad code points will be replaced to user code point.
115 * If replace_to == 0 stop parsing and return error ot user.
116 */
119
120 /* Not for users */
125
127
128 union {
131 unsigned lead;
134 } u;
135}
137
138typedef struct {
140
141 /* Out buffer */
145
146 /*
147 * Bad code points will be replaced to user bytes.
148 * If replace_to == NULL stop parsing and return error ot user.
149 */
152
153 unsigned state;
154}
156
157/*
158* Why can't I pass a char ** to a function which expects a const char **?
159* http://c-faq.com/ansi/constmismatch.html
160*
161* Short answer: use cast (const char **).
162*
163* For example:
164* lxb_encoding_ctx_t ctx = {0};
165* const lxb_encoding_data_t *enc;
166*
167* lxb_char_t *data = (lxb_char_t *) "\x81\x30\x84\x36";
168*
169* enc = lxb_encoding_data(LXB_ENCODING_GB18030);
170*
171* enc->decode(&ctx, (const lxb_char_t **) &data, data + 4);
172*/
175 const lxb_codepoint_t *end);
176
179 const lxb_char_t **data, const lxb_char_t *end);
180
181typedef int8_t
184
187 const lxb_char_t **data, const lxb_char_t *end);
188
197
198typedef struct {
200 unsigned size;
202}
204
206
207typedef struct {
208 unsigned index;
210}
212
213
214#ifdef __cplusplus
215} /* extern "C" */
216#endif
217
218#endif /* LEXBOR_ENCODING_BASE_H */
uint32_t u
Definition cdf.c:78
#define LXB_ENCODING_DECODE_ERROR(ctx)
Definition decode.c:106
#define LXB_ENCODING_ENCODE_ERROR(ctx)
Definition encode.c:34
@ LXB_ENCODING_DECODE_MAX_CODEPOINT
Definition base.h:52
@ LXB_ENCODING_DECODE_CONTINUE
Definition base.h:54
lxb_status_t(* lxb_encoding_encode_f)(lxb_encoding_encode_t *ctx, const lxb_codepoint_t **cp, const lxb_codepoint_t *end)
Definition base.h:174
lxb_codepoint_t(* lxb_encoding_decode_single_f)(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition base.h:186
@ LXB_ENCODING_REPLACEMENT_SIZE
Definition base.h:39
@ LXB_ENCODING_MAX_CODEPOINT
Definition base.h:41
@ LXB_ENCODING_REPLACEMENT_CODEPOINT
Definition base.h:40
@ LXB_ENCODING_ERROR_CODEPOINT
Definition base.h:42
struct lxb_encoding_data lxb_encoding_data_t
Definition base.h:103
lxb_status_t(* lxb_encoding_decode_f)(lxb_encoding_decode_t *ctx, const lxb_char_t **data, const lxb_char_t *end)
Definition base.h:178
lxb_encoding_single_index_t lxb_encoding_multi_index_t
Definition base.h:205
@ LXB_ENCODING_ENCODE_OK
Definition base.h:46
@ LXB_ENCODING_ENCODE_SMALL_BUFFER
Definition base.h:48
@ LXB_ENCODING_DECODE_2022_JP_TRAIL
Definition base.h:62
@ LXB_ENCODING_DECODE_2022_JP_ASCII
Definition base.h:58
@ LXB_ENCODING_DECODE_2022_JP_ESCAPE
Definition base.h:64
@ LXB_ENCODING_DECODE_2022_JP_UNSET
Definition base.h:65
@ LXB_ENCODING_DECODE_2022_JP_ROMAN
Definition base.h:59
@ LXB_ENCODING_DECODE_2022_JP_LEAD
Definition base.h:61
@ LXB_ENCODING_DECODE_2022_JP_KATAKANA
Definition base.h:60
@ LXB_ENCODING_DECODE_2022_JP_ESCAPE_START
Definition base.h:63
int8_t(* lxb_encoding_encode_single_f)(lxb_encoding_encode_t *ctx, lxb_char_t **data, const lxb_char_t *end, lxb_codepoint_t cp)
Definition base.h:182
@ LXB_ENCODING_ENCODE_2022_JP_ROMAN
Definition base.h:70
@ LXB_ENCODING_ENCODE_2022_JP_JIS0208
Definition base.h:71
@ LXB_ENCODING_ENCODE_2022_JP_ASCII
Definition base.h:69
lxb_encoding_t
Definition const.h:17
unsigned const char * end
Definition php_ffi.h:51
zend_constant * data
lxb_char_t upper
Definition base.h:77
lxb_char_t lower
Definition base.h:76
lxb_char_t * name
Definition base.h:195
lxb_encoding_decode_single_f decode_single
Definition base.h:194
lxb_encoding_encode_f encode
Definition base.h:191
lxb_encoding_encode_single_f encode_single
Definition base.h:193
lxb_encoding_t encoding
Definition base.h:190
lxb_encoding_decode_f decode
Definition base.h:192
size_t buffer_length
Definition base.h:110
lxb_status_t status
Definition base.h:126
const lxb_codepoint_t * replace_to
Definition base.h:117
lxb_codepoint_t codepoint
Definition base.h:121
const lxb_encoding_data_t * encoding_data
Definition base.h:106
lxb_encoding_ctx_utf_8_t utf_8
Definition base.h:129
lxb_encoding_ctx_euc_jp_t euc_jp
Definition base.h:132
lxb_codepoint_t * buffer_out
Definition base.h:109
lxb_encoding_ctx_2022_jp_t iso_2022_jp
Definition base.h:133
lxb_codepoint_t second_codepoint
Definition base.h:122
lxb_encoding_ctx_gb18030_t gb18030
Definition base.h:130
lxb_char_t * buffer_out
Definition base.h:142
size_t buffer_length
Definition base.h:143
const lxb_encoding_data_t * encoding_data
Definition base.h:139
const lxb_char_t * replace_to
Definition base.h:150
lxb_codepoint_t codepoint
Definition base.h:209
lxb_char_t name[4]
Definition base.h:199
lxb_codepoint_t codepoint
Definition base.h:201
unsigned int lxb_status_t
Definition types.h:28
unsigned char lxb_char_t
Definition types.h:27
uint32_t lxb_codepoint_t
Definition types.h:26