php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
mbfl_convert.c
Go to the documentation of this file.
1/*
2 * "streamable kanji code filter and converter"
3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
4 *
5 * LICENSE NOTICES
6 *
7 * This file is part of "streamable kanji code filter and converter",
8 * which is distributed under the terms of GNU Lesser General Public
9 * License (version 2) as published by the Free Software Foundation.
10 *
11 * This software is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with "streamable kanji code filter and converter";
18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
19 * Suite 330, Boston, MA 02111-1307 USA
20 *
21 * The author of this file:
22 *
23 */
24/*
25 * The source code included in this files was separated from mbfilter.c
26 * by Moriyoshi Koizumi <moriyoshi@php.net> on 20 Dec 2002. The file
27 * mbfilter.c is included in this package .
28 *
29 */
30
31#include <stddef.h>
32
33#include "mbfl_encoding.h"
34#include "mbfl_filter_output.h"
35#include "mbfilter_pass.h"
36#include "mbfilter_8bit.h"
37#include "mbfilter_wchar.h"
38
53
54/* hex character table "0123456789ABCDEF" */
55static char mbfl_hexchar_table[] = {
56 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x41,0x42,0x43,0x44,0x45,0x46
57};
58
59static const struct mbfl_convert_vtbl *mbfl_special_filter_list[] = {
65 &vtbl_pass,
66 NULL
67};
68
69static void mbfl_convert_filter_init(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to,
70 const struct mbfl_convert_vtbl *vtbl, output_function_t output_function, flush_function_t flush_function, void* data)
71{
72 /* encoding structure */
73 filter->from = from;
74 filter->to = to;
75
76 if (output_function != NULL) {
77 filter->output_function = output_function;
78 } else {
80 }
81
82 filter->flush_function = flush_function;
83 filter->data = data;
85 filter->illegal_substchar = '?';
86 filter->num_illegalchar = 0;
87 filter->filter_dtor = vtbl->filter_dtor;
88 filter->filter_function = vtbl->filter_function;
90 filter->filter_copy = vtbl->filter_copy;
91
92 (*vtbl->filter_ctor)(filter);
93}
94
96 flush_function_t flush_function, void* data)
97{
99 if (vtbl == NULL) {
100 return NULL;
101 }
102
104 mbfl_convert_filter_init(filter, from, to, vtbl, output_function, flush_function, data);
105 return filter;
106}
107
109 flush_function_t flush_function, void* data)
110{
111 const mbfl_encoding *from_encoding = mbfl_no2encoding(vtbl->from);
112 const mbfl_encoding *to_encoding = mbfl_no2encoding(vtbl->to);
113
115 mbfl_convert_filter_init(filter, from_encoding, to_encoding, vtbl, output_function, flush_function, data);
116 return filter;
117}
118
120{
121 if (filter->filter_dtor) {
122 (*filter->filter_dtor)(filter);
123 }
124 efree(filter);
125}
126
127/* Feed a char, return 0 if ok - used by mailparse ext */
129{
130 return (*filter->filter_function)(c, filter);
131}
132
133/* Feed string into `filter` byte by byte; return pointer to first byte not processed */
134unsigned char* mbfl_convert_filter_feed_string(mbfl_convert_filter *filter, unsigned char *p, size_t len)
135{
136 while (len--) {
137 if ((*filter->filter_function)(*p++, filter) < 0) {
138 break;
139 }
140 }
141 return p;
142}
143
145{
146 (*filter->filter_flush)(filter);
147 return 0;
148}
149
151{
152 if (filter->filter_dtor) {
153 (*filter->filter_dtor)(filter);
154 }
155
157
158 if (vtbl == NULL) {
159 vtbl = &vtbl_pass;
160 }
161
162 mbfl_convert_filter_init(filter, from, to, vtbl, filter->output_function, filter->flush_function, filter->data);
163}
164
166{
167 if (src->filter_copy != NULL) {
168 src->filter_copy(src, dest);
169 return;
170 }
171
172 *dest = *src;
173}
174
179
180int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p)
181{
182 int c;
183 while ((c = *p++)) {
184 if ((*filter->filter_function)(c, filter) < 0) {
185 return -1;
186 }
187 }
188
189 return 0;
190}
191
192static int mbfl_filt_conv_output_hex(unsigned int w, mbfl_convert_filter *filter)
193{
194 bool nonzero = false;
195 int shift = 28, ret = 0;
196
197 while (shift >= 0) {
198 int n = (w >> shift) & 0xF;
199 if (n || nonzero) {
200 nonzero = true;
201 ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
202 if (ret < 0) {
203 return ret;
204 }
205 }
206 shift -= 4;
207 }
208
209 if (!nonzero) {
210 /* No hex digits were output by above loop */
211 ret = (*filter->filter_function)('0', filter);
212 }
213
214 return ret;
215}
216
217/* illegal character output function for conv-filter */
219{
220 unsigned int w = c;
221 int ret = 0;
222 int mode_backup = filter->illegal_mode;
223 uint32_t substchar_backup = filter->illegal_substchar;
224
225 /* The used substitution character may not be supported by the target character encoding.
226 * If that happens, first try to use "?" instead and if that also fails, silently drop the
227 * character. */
229 && filter->illegal_substchar != '?') {
230 filter->illegal_substchar = '?';
231 } else {
233 }
234
235 switch (mode_backup) {
237 ret = (*filter->filter_function)(substchar_backup, filter);
238 break;
239
241 if (w != MBFL_BAD_INPUT) {
242 ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
243 if (ret < 0)
244 break;
245 ret = mbfl_filt_conv_output_hex(w, filter);
246 } else {
247 ret = (*filter->filter_function)(substchar_backup, filter);
248 }
249 break;
250
252 if (w != MBFL_BAD_INPUT) {
253 ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x");
254 if (ret < 0)
255 break;
256 ret = mbfl_filt_conv_output_hex(w, filter);
257 if (ret < 0)
258 break;
259 ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
260 } else {
261 ret = (*filter->filter_function)(substchar_backup, filter);
262 }
263 break;
264
266 default:
267 break;
268 }
269
270 filter->illegal_mode = mode_backup;
271 filter->illegal_substchar = substchar_backup;
272 filter->num_illegalchar++;
273
274 return ret;
275}
276
278{
279 if (to->no_encoding == mbfl_no_encoding_base64 ||
280 to->no_encoding == mbfl_no_encoding_qprint) {
282 } else if (from->no_encoding == mbfl_no_encoding_base64 ||
283 from->no_encoding == mbfl_no_encoding_qprint ||
284 from->no_encoding == mbfl_no_encoding_uuencode) {
286 }
287
288 if (to == from && (to == &mbfl_encoding_wchar || to == &mbfl_encoding_8bit)) {
289 return &vtbl_pass;
290 }
291
292 if (to->no_encoding == mbfl_no_encoding_wchar) {
293 return from->input_filter;
294 } else if (from->no_encoding == mbfl_no_encoding_wchar) {
295 return to->output_filter;
296 } else {
297 int i = 0;
298 const struct mbfl_convert_vtbl *vtbl;
299 while ((vtbl = mbfl_special_filter_list[i++])) {
300 if (vtbl->from == from->no_encoding && vtbl->to == to->no_encoding) {
301 return vtbl;
302 }
303 }
304 return NULL;
305 }
306}
307
308/*
309 * commonly used constructor
310 */
312{
313 filter->status = filter->cache = 0;
314}
315
317{
318 if (filter->flush_function) {
319 (*filter->flush_function)(filter->data);
320 }
321 return 0;
322}
323
324zend_string* mb_fast_convert(unsigned char *in, size_t in_len, const mbfl_encoding *from, const mbfl_encoding *to, uint32_t replacement_char, unsigned int error_mode, unsigned int *num_errors)
325{
326 uint32_t wchar_buf[128];
327 unsigned int state = 0;
328
333 }
334
336 mb_convert_buf_init(&buf, in_len, replacement_char, error_mode);
337
338 while (in_len) {
339 size_t out_len = from->to_wchar(&in, &in_len, wchar_buf, 128, &state);
340 ZEND_ASSERT(out_len <= 128);
341 to->from_wchar(wchar_buf, out_len, &buf, !in_len);
342 }
343
344 *num_errors = buf.errors;
345 return mb_convert_buf_result(&buf, to);
346}
347
348static uint32_t* convert_cp_to_hex(uint32_t cp, uint32_t *out)
349{
350 bool nonzero = false;
351 int shift = 28;
352
353 while (shift >= 0) {
354 int n = (cp >> shift) & 0xF;
355 if (n || nonzero) {
356 nonzero = true;
357 *out++ = mbfl_hexchar_table[n];
358 }
359 shift -= 4;
360 }
361
362 if (!nonzero) {
363 /* No hex digits were output by above loop */
364 *out++ = '0';
365 }
366
367 return out;
368}
369
370static size_t mb_illegal_marker(uint32_t bad_cp, uint32_t *out, unsigned int err_mode, uint32_t replacement_char)
371{
372 uint32_t *start = out;
373
374 if (bad_cp == MBFL_BAD_INPUT) {
375 /* Input string contained a byte sequence which was invalid in the 'from' encoding
376 * Unless the error handling mode is set to NONE, insert the replacement character */
377 if (err_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
378 *out++ = replacement_char;
379 }
380 } else {
381 /* Input string contained a byte sequence which was valid in the 'from' encoding,
382 * but decoded to a Unicode codepoint which cannot be represented in the 'to' encoding */
383 switch (err_mode) {
385 *out++ = replacement_char;
386 break;
387
389 out[0] = 'U';
390 out[1] = '+';
391 out = convert_cp_to_hex(bad_cp, &out[2]);
392 break;
393
395 out[0] = '&'; out[1] = '#'; out[2] = 'x';
396 out = convert_cp_to_hex(bad_cp, &out[3]);
397 *out++ = ';';
398 break;
399 }
400 }
401
402 return out - start;
403}
404
406{
407 buf->errors++;
408
409 uint32_t temp[12];
410 uint32_t repl_char = buf->replacement_char;
411 unsigned int err_mode = buf->error_mode;
412
414 /* This mode is for internal use only, when converting a string to
415 * UTF-8 before searching it; it uses a byte which is illegal in
416 * UTF-8 as an error marker. This ensures that error markers will
417 * never 'accidentally' match valid text, as could happen when a
418 * character like '?' is used as an error marker. */
419 MB_CONVERT_BUF_ENSURE(buf, buf->out, buf->limit, 1);
420 buf->out = mb_convert_buf_add(buf->out, 0xFF);
421 return;
422 }
423
424 size_t len = mb_illegal_marker(bad_cp, temp, err_mode, repl_char);
425
426 /* Avoid infinite loop if `fn` is not able to handle `repl_char` */
427 if (err_mode == MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR && repl_char != '?') {
428 buf->replacement_char = '?';
429 } else {
431 }
432
433 fn(temp, len, buf, false);
434
435 buf->replacement_char = repl_char;
436 buf->error_mode = err_mode;
437}
size_t len
Definition apprentice.c:174
zend_long n
Definition ffi.c:4979
buf start
Definition ffi.c:4687
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685
#define NULL
Definition gdcache.h:45
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG
Definition mbfilter.h:117
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_BADUTF8
Definition mbfilter.h:119
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE
Definition mbfilter.h:115
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY
Definition mbfilter.h:118
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR
Definition mbfilter.h:116
const mbfl_encoding mbfl_encoding_8bit
const struct mbfl_convert_vtbl vtbl_8bit_b64
const mbfl_encoding mbfl_encoding_base64
const struct mbfl_convert_vtbl vtbl_b64_8bit
const struct mbfl_convert_vtbl vtbl_pass
const struct mbfl_convert_vtbl vtbl_qprint_8bit
const mbfl_encoding mbfl_encoding_qprint
const struct mbfl_convert_vtbl vtbl_8bit_qprint
const mbfl_encoding mbfl_encoding_uuencode
const struct mbfl_convert_vtbl vtbl_uuencode_8bit
const mbfl_encoding mbfl_encoding_wchar
#define MBFL_BAD_INPUT
Definition mbfl_consts.h:45
void mb_illegal_output(uint32_t bad_cp, mb_from_wchar_fn fn, mb_convert_buf *buf)
void mbfl_convert_filter_copy(mbfl_convert_filter *src, mbfl_convert_filter *dest)
int mbfl_convert_filter_flush(mbfl_convert_filter *filter)
int mbfl_convert_filter_feed(int c, mbfl_convert_filter *filter)
int mbfl_filt_conv_common_flush(mbfl_convert_filter *filter)
int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char *p)
unsigned char * mbfl_convert_filter_feed_string(mbfl_convert_filter *filter, unsigned char *p, size_t len)
int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
zend_string * mb_fast_convert(unsigned char *in, size_t in_len, const mbfl_encoding *from, const mbfl_encoding *to, uint32_t replacement_char, unsigned int error_mode, unsigned int *num_errors)
const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(const mbfl_encoding *from, const mbfl_encoding *to)
void mbfl_filt_conv_common_ctor(mbfl_convert_filter *filter)
mbfl_convert_filter * mbfl_convert_filter_new2(const struct mbfl_convert_vtbl *vtbl, output_function_t output_function, flush_function_t flush_function, void *data)
mbfl_convert_filter * mbfl_convert_filter_new(const mbfl_encoding *from, const mbfl_encoding *to, output_function_t output_function, flush_function_t flush_function, void *data)
void mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src)
void mbfl_convert_filter_delete(mbfl_convert_filter *filter)
void mbfl_convert_filter_reset(mbfl_convert_filter *filter, const mbfl_encoding *from, const mbfl_encoding *to)
struct _mbfl_convert_filter mbfl_convert_filter
int(* output_function_t)(int, void *)
int(* filter_flush_t)(mbfl_convert_filter *)
int(* flush_function_t)(void *)
const mbfl_encoding * mbfl_no2encoding(enum mbfl_no_encoding no_encoding)
@ mbfl_no_encoding_base64
@ mbfl_no_encoding_wchar
@ mbfl_no_encoding_qprint
@ mbfl_no_encoding_uuencode
#define MB_CONVERT_BUF_ENSURE(buf, out, limit, needed)
void(* mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end)
int mbfl_filter_output_null(int c, void *data)
struct _mbfl_memory_device mbfl_memory_device
zend_constant * data
p
Definition session.c:1105
const mbfl_encoding * from
output_function_t output_function
const mbfl_encoding * to
int(* filter_function)(int c, mbfl_convert_filter *filter)
void(* filter_dtor)(mbfl_convert_filter *filter)
flush_function_t flush_function
filter_flush_t filter_flush
void(* filter_copy)(mbfl_convert_filter *src, mbfl_convert_filter *dest)
enum mbfl_no_encoding from
int(* filter_flush)(struct _mbfl_convert_filter *filter)
int(* filter_function)(int c, struct _mbfl_convert_filter *filter)
void(* filter_ctor)(struct _mbfl_convert_filter *filter)
void(* filter_copy)(struct _mbfl_convert_filter *src, struct _mbfl_convert_filter *dest)
enum mbfl_no_encoding to
void(* filter_dtor)(struct _mbfl_convert_filter *filter)
#define efree(ptr)
Definition zend_alloc.h:155
#define emalloc(size)
Definition zend_alloc.h:151
struct _zend_string zend_string
#define ZEND_ASSERT(c)
zval * ret
out($f, $s)