php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
fuzzer-mbstring.c
Go to the documentation of this file.
1/*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Stanislav Malyshev <stas@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17
18#include "zend.h"
19#include "fuzzer.h"
20#include "fuzzer-sapi.h"
22
23zend_string* convert_encoding(const uint8_t *Data, size_t Size, const mbfl_encoding *FromEncoding, const mbfl_encoding *ToEncoding, size_t BufSize, unsigned int *NumErrors)
24{
25 uint32_t *wchar_buf = ecalloc(BufSize, sizeof(uint32_t));
26 unsigned int state = 0;
27
29 mb_convert_buf_init(&buf, Size, '?', MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR);
30
31 while (Size) {
32 size_t out_len = FromEncoding->to_wchar((unsigned char**)&Data, &Size, wchar_buf, BufSize, &state);
33 ZEND_ASSERT(out_len <= BufSize);
34 ToEncoding->from_wchar(wchar_buf, out_len, &buf, !Size);
35 }
36
37 *NumErrors = buf.errors;
38 zend_string *result = mb_convert_buf_result(&buf, ToEncoding);
39 efree(wchar_buf);
40 return result;
41}
42
44{
45 ZEND_ASSERT(ZSTR_LEN(str1) == ZSTR_LEN(str2));
46 for (int i = 0; i < ZSTR_LEN(str1); i++) {
47 ZEND_ASSERT(ZSTR_VAL(str1)[i] == ZSTR_VAL(str2)[i]);
48 }
49}
50
51int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
52 const uint8_t *Comma1 = memchr(Data, ',', Size);
53 if (!Comma1) {
54 return 0;
55 }
56
57 size_t ToEncodingNameLen = Comma1 - Data;
58 char *ToEncodingName = estrndup((char *) Data, ToEncodingNameLen);
59 Data = Comma1 + 1;
60 Size -= ToEncodingNameLen + 1;
61
62 const uint8_t *Comma2 = memchr(Data, ',', Size);
63 if (!Comma2) {
64 efree(ToEncodingName);
65 return 0;
66 }
67
68 size_t FromEncodingNameLen = Comma2 - Data;
69 char *FromEncodingName = estrndup((char *) Data, FromEncodingNameLen);
70 Data = Comma2 + 1;
71 Size -= FromEncodingNameLen + 1;
72
73 const mbfl_encoding *ToEncoding = mbfl_name2encoding(ToEncodingName);
74 const mbfl_encoding *FromEncoding = mbfl_name2encoding(FromEncodingName);
75
76 if (!ToEncoding || !FromEncoding || Size < 2 || fuzzer_request_startup() == FAILURE) {
77 efree(ToEncodingName);
78 efree(FromEncodingName);
79 return 0;
80 }
81
82 /* Rather than converting an entire (possibly very long) string at once, mbstring converts
83 * strings 'chunk by chunk'; the decoder will run until it fills up its output buffer with
84 * wchars, then the encoder will process those wchars, then the decoder runs again until it
85 * again fills up its output buffer, and so on
86 *
87 * The most error-prone part of the decoder/encoder code is where we exit a decoder/encoder
88 * function and save its state to allow later resumption
89 * To stress-test that aspect of the decoders/encoders, try performing an encoding conversion
90 * operation with different, random buffer sizes
91 * If the code is correct, the result should always be the same either way */
92 size_t bufsize1 = *Data++;
93 size_t bufsize2 = *Data++;
94 bufsize1 = MAX(bufsize1, MBSTRING_MIN_WCHAR_BUFSIZE);
95 bufsize2 = MAX(bufsize2, MBSTRING_MIN_WCHAR_BUFSIZE);
96 Size -= 2;
97
98 unsigned int errors1 = 0, errors2 = 0;
99
100 zend_string *Result1 = convert_encoding(Data, Size, FromEncoding, ToEncoding, bufsize1, &errors1);
101 zend_string *Result2 = convert_encoding(Data, Size, FromEncoding, ToEncoding, bufsize2, &errors2);
102
103 assert_zend_string_eql(Result1, Result2);
104 ZEND_ASSERT(errors1 == errors2);
105
106 /* For some text encodings, we have specialized validation functions. These should always be
107 * stricter than the conversion functions; if the conversion function receives invalid input
108 * and emits an error marker (MBFL_BAD_INPUT), then the validation function should always
109 * return false. However, if the conversion function does not emit any error marker, it may
110 * still happen in some cases that the validation function returns false. */
111 if (FromEncoding->check != NULL) {
112 bool good = FromEncoding->check((unsigned char*)Data, Size);
113 if (errors1 > 0) {
114 /* If the conversion function emits an error marker, that may or may not mean the input
115 * was invalid; it could also be that the input was valid, but it contains codepoints
116 * which cannot be represented in the output encoding.
117 * To confirm if that is the case, try converting to UTF-8, which can represent any
118 * Unicode codepoint. */
119 unsigned int errors3 = 0;
120 zend_string *Temp = convert_encoding(Data, Size, FromEncoding, &mbfl_encoding_utf8, 128, &errors3);
121 if (errors3 > 0) {
122 ZEND_ASSERT(!good);
123 }
124 zend_string_release(Temp);
125 }
126 }
127
128 zend_string_release(Result1);
129 zend_string_release(Result2);
130 efree(ToEncodingName);
131 efree(FromEncodingName);
132
134 return 0;
135}
136
137int LLVMFuzzerInitialize(int *argc, char ***argv) {
139 return 0;
140}
zend_ffi_ctype_name_buf buf
Definition ffi.c:4685
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
int LLVMFuzzerInitialize(int *argc, char ***argv)
zend_string * convert_encoding(const uint8_t *Data, size_t Size, const mbfl_encoding *FromEncoding, const mbfl_encoding *ToEncoding, size_t BufSize, unsigned int *NumErrors)
void assert_zend_string_eql(zend_string *str1, zend_string *str2)
int fuzzer_request_startup(void)
void fuzzer_request_shutdown(void)
int fuzzer_init_php(const char *extra_ini)
#define NULL
Definition gdcache.h:45
#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR
Definition mbfilter.h:116
const mbfl_encoding mbfl_encoding_utf8
const mbfl_encoding * mbfl_name2encoding(const char *name)
#define MBSTRING_MIN_WCHAR_BUFSIZE
mb_to_wchar_fn to_wchar
mb_check_fn check
mb_from_wchar_fn from_wchar
#define estrndup(s, length)
Definition zend_alloc.h:165
#define ecalloc(nmemb, size)
Definition zend_alloc.h:158
#define efree(ptr)
Definition zend_alloc.h:155
struct _zend_string zend_string
#define ZEND_ASSERT(c)
#define MAX(a, b)
#define ZSTR_VAL(zstr)
Definition zend_string.h:68
#define ZSTR_LEN(zstr)
Definition zend_string.h:69
@ FAILURE
Definition zend_types.h:61
bool result