php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
is_json.c
Go to the documentation of this file.
1/*-
2 * Copyright (c) 2018 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/*
28 * Parse JSON object serialization format (RFC-7159)
29 */
30
31#ifndef TEST
32#include "file.h"
33
34#ifndef lint
35FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
36#endif
37
38#include "magic.h"
39#else
40#include <stdio.h>
41#include <stddef.h>
42#endif
43#include <string.h>
44
45#ifdef DEBUG
46#include <stdio.h>
47#define DPRINTF(a, b, c) \
48 printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
49 (int)(b - c), (const char *)(c))
50#define __file_debugused
51#else
52#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
53#define __file_debugused __attribute__((__unused__))
54#endif
55
56#define JSON_ARRAY 0
57#define JSON_CONSTANT 1
58#define JSON_NUMBER 2
59#define JSON_OBJECT 3
60#define JSON_STRING 4
61#define JSON_ARRAYN 5
62#define JSON_MAX 6
63
64/*
65 * if JSON_COUNT != 0:
66 * count all the objects, require that we have the whole data file
67 * otherwise:
68 * stop if we find an object or an array
69 */
70#ifndef JSON_COUNT
71#define JSON_COUNT 0
72#endif
73
74static int json_parse(const unsigned char **, const unsigned char *, size_t *,
75 size_t);
76
77static int
78json_isspace(const unsigned char uc)
79{
80 switch (uc) {
81 case ' ':
82 case '\n':
83 case '\r':
84 case '\t':
85 return 1;
86 default:
87 return 0;
88 }
89}
90
91static int
92json_isdigit(unsigned char uc)
93{
94 switch (uc) {
95 case '0': case '1': case '2': case '3': case '4':
96 case '5': case '6': case '7': case '8': case '9':
97 return 1;
98 default:
99 return 0;
100 }
101}
102
103static int
104json_isxdigit(unsigned char uc)
105{
106 if (json_isdigit(uc))
107 return 1;
108 switch (uc) {
109 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
110 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
111 return 1;
112 default:
113 return 0;
114 }
115}
116
117static const unsigned char *
118json_skip_space(const unsigned char *uc, const unsigned char *ue)
119{
120 while (uc < ue && json_isspace(*uc))
121 uc++;
122 return uc;
123}
124
125/*ARGSUSED*/
126static int
127json_parse_string(const unsigned char **ucp, const unsigned char *ue,
128 size_t lvl __file_debugused)
129{
130 const unsigned char *uc = *ucp;
131 size_t i;
132
133 DPRINTF("Parse string: ", uc, *ucp);
134 while (uc < ue) {
135 switch (*uc++) {
136 case '\0':
137 goto out;
138 case '\\':
139 if (uc == ue)
140 goto out;
141 switch (*uc++) {
142 case '\0':
143 goto out;
144 case '"':
145 case '\\':
146 case '/':
147 case 'b':
148 case 'f':
149 case 'n':
150 case 'r':
151 case 't':
152 continue;
153 case 'u':
154 if (ue - uc < 4) {
155 uc = ue;
156 goto out;
157 }
158 for (i = 0; i < 4; i++)
159 if (!json_isxdigit(*uc++))
160 goto out;
161 continue;
162 default:
163 goto out;
164 }
165 case '"':
166 DPRINTF("Good string: ", uc, *ucp);
167 *ucp = uc;
168 return 1;
169 default:
170 continue;
171 }
172 }
173out:
174 DPRINTF("Bad string: ", uc, *ucp);
175 *ucp = uc;
176 return 0;
177}
178
179static int
180json_parse_array(const unsigned char **ucp, const unsigned char *ue,
181 size_t *st, size_t lvl)
182{
183 const unsigned char *uc = *ucp;
184
185 DPRINTF("Parse array: ", uc, *ucp);
186 while (uc < ue) {
187 uc = json_skip_space(uc, ue);
188 if (uc == ue)
189 goto out;
190 if (*uc == ']')
191 goto done;
192 if (!json_parse(&uc, ue, st, lvl + 1))
193 goto out;
194 if (uc == ue)
195 goto out;
196 switch (*uc) {
197 case ',':
198 uc++;
199 continue;
200 case ']':
201 done:
202 st[JSON_ARRAYN]++;
203 DPRINTF("Good array: ", uc, *ucp);
204 *ucp = uc + 1;
205 return 1;
206 default:
207 goto out;
208 }
209 }
210out:
211 DPRINTF("Bad array: ", uc, *ucp);
212 *ucp = uc;
213 return 0;
214}
215
216static int
217json_parse_object(const unsigned char **ucp, const unsigned char *ue,
218 size_t *st, size_t lvl)
219{
220 const unsigned char *uc = *ucp;
221 DPRINTF("Parse object: ", uc, *ucp);
222 while (uc < ue) {
223 uc = json_skip_space(uc, ue);
224 if (uc == ue)
225 goto out;
226 if (*uc == '}') {
227 uc++;
228 goto done;
229 }
230 if (*uc++ != '"') {
231 DPRINTF("not string", uc, *ucp);
232 goto out;
233 }
234 DPRINTF("next field", uc, *ucp);
235 if (!json_parse_string(&uc, ue, lvl)) {
236 DPRINTF("not string", uc, *ucp);
237 goto out;
238 }
239 uc = json_skip_space(uc, ue);
240 if (uc == ue)
241 goto out;
242 if (*uc++ != ':') {
243 DPRINTF("not colon", uc, *ucp);
244 goto out;
245 }
246 if (!json_parse(&uc, ue, st, lvl + 1)) {
247 DPRINTF("not json", uc, *ucp);
248 goto out;
249 }
250 if (uc == ue)
251 goto out;
252 switch (*uc++) {
253 case ',':
254 continue;
255 case '}': /* { */
256 done:
257 DPRINTF("Good object: ", uc, *ucp);
258 *ucp = uc;
259 return 1;
260 default:
261 DPRINTF("not more", uc, *ucp);
262 *ucp = uc - 1;
263 goto out;
264 }
265 }
266out:
267 DPRINTF("Bad object: ", uc, *ucp);
268 *ucp = uc;
269 return 0;
270}
271
272/*ARGSUSED*/
273static int
274json_parse_number(const unsigned char **ucp, const unsigned char *ue,
275 size_t lvl __file_debugused)
276{
277 const unsigned char *uc = *ucp;
278 int got = 0;
279
280 DPRINTF("Parse number: ", uc, *ucp);
281 if (uc == ue)
282 return 0;
283 if (*uc == '-')
284 uc++;
285
286 for (; uc < ue; uc++) {
287 if (!json_isdigit(*uc))
288 break;
289 got = 1;
290 }
291 if (uc == ue)
292 goto out;
293 if (*uc == '.')
294 uc++;
295 for (; uc < ue; uc++) {
296 if (!json_isdigit(*uc))
297 break;
298 got = 1;
299 }
300 if (uc == ue)
301 goto out;
302 if (got && (*uc == 'e' || *uc == 'E')) {
303 uc++;
304 got = 0;
305 if (uc == ue)
306 goto out;
307 if (*uc == '+' || *uc == '-')
308 uc++;
309 for (; uc < ue; uc++) {
310 if (!json_isdigit(*uc))
311 break;
312 got = 1;
313 }
314 }
315out:
316 if (!got)
317 DPRINTF("Bad number: ", uc, *ucp);
318 else
319 DPRINTF("Good number: ", uc, *ucp);
320 *ucp = uc;
321 return got;
322}
323
324/*ARGSUSED*/
325static int
326json_parse_const(const unsigned char **ucp, const unsigned char *ue,
327 const char *str, size_t len, size_t lvl __file_debugused)
328{
329 const unsigned char *uc = *ucp;
330
331 DPRINTF("Parse const: ", uc, *ucp);
332 *ucp += --len - 1;
333 if (*ucp > ue)
334 *ucp = ue;
335 for (; uc < ue && --len;) {
336 if (*uc++ != *++str) {
337 DPRINTF("Bad const: ", uc, *ucp);
338 return 0;
339 }
340 }
341 DPRINTF("Good const: ", uc, *ucp);
342 return 1;
343}
344
345static int
346json_parse(const unsigned char **ucp, const unsigned char *ue,
347 size_t *st, size_t lvl)
348{
349 const unsigned char *uc, *ouc;
350 int rv = 0;
351 int t;
352
353 ouc = uc = json_skip_space(*ucp, ue);
354 if (uc == ue)
355 goto out;
356
357 // Avoid recursion
358 if (lvl > 500) {
359 DPRINTF("Too many levels", uc, *ucp);
360 return 0;
361 }
362#if JSON_COUNT
363 /* bail quickly if not counting */
364 if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
365 return 1;
366#endif
367
368 DPRINTF("Parse general: ", uc, *ucp);
369 switch (*uc++) {
370 case '"':
371 rv = json_parse_string(&uc, ue, lvl + 1);
372 t = JSON_STRING;
373 break;
374 case '[':
375 rv = json_parse_array(&uc, ue, st, lvl + 1);
376 t = JSON_ARRAY;
377 break;
378 case '{': /* '}' */
379 rv = json_parse_object(&uc, ue, st, lvl + 1);
380 t = JSON_OBJECT;
381 break;
382 case 't':
383 rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
384 t = JSON_CONSTANT;
385 break;
386 case 'f':
387 rv = json_parse_const(&uc, ue, "false", sizeof("false"),
388 lvl + 1);
389 t = JSON_CONSTANT;
390 break;
391 case 'n':
392 rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
393 t = JSON_CONSTANT;
394 break;
395 default:
396 --uc;
397 rv = json_parse_number(&uc, ue, lvl + 1);
398 t = JSON_NUMBER;
399 break;
400 }
401 if (rv)
402 st[t]++;
403 uc = json_skip_space(uc, ue);
404out:
405 DPRINTF("End general: ", uc, *ucp);
406 *ucp = uc;
407 if (lvl == 0) {
408 if (!rv)
409 return 0;
410 if (uc == ue)
411 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
412 if (*ouc == *uc && json_parse(&uc, ue, st, 1))
413 return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
414 else
415 return 0;
416 }
417 return rv;
418}
419
420#ifndef TEST
421int
422file_is_json(struct magic_set *ms, const struct buffer *b)
423{
424 const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
425 const unsigned char *ue = uc + b->flen;
426 size_t st[JSON_MAX];
427 int mime = ms->flags & MAGIC_MIME;
428 int jt;
429
430
431 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
432 return 0;
433
434 memset(st, 0, sizeof(st));
435
436 if ((jt = json_parse(&uc, ue, st, 0)) == 0)
437 return 0;
438
439 if (mime == MAGIC_MIME_ENCODING)
440 return 1;
441 if (mime) {
442 if (file_printf(ms, "application/%s",
443 jt == 1 ? "json" : "x-ndjson") == -1)
444 return -1;
445 return 1;
446 }
447 if (file_printf(ms, "%sJSON text data",
448 jt == 1 ? "" : "New Line Delimited ") == -1)
449 return -1;
450#if JSON_COUNT
451#define P(n) st[n], st[n] > 1 ? "s" : ""
452 if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
453 "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
454 "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
455 "u >1array%s)",
458 == -1)
459 return -1;
460#endif
461 return 1;
462}
463
464#else
465
466#include <sys/types.h>
467#include <sys/stat.h>
468#include <stdio.h>
469#include <fcntl.h>
470#include <unistd.h>
471#include <stdlib.h>
472#include <stdint.h>
473#include <err.h>
474
475int
476main(int argc, char *argv[])
477{
478 int fd;
479 struct stat st;
480 unsigned char *p;
481 size_t stats[JSON_MAX];
482
483 if ((fd = open(argv[1], O_RDONLY)) == -1)
484 err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
485
486 if (fstat(fd, &st) == -1)
487 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
488
489 if ((p = CAST(char *, malloc(st.st_size))) == NULL)
490 err(EXIT_FAILURE, "Can't allocate %jd bytes",
491 (intmax_t)st.st_size);
492 if (read(fd, p, st.st_size) != st.st_size)
493 err(EXIT_FAILURE, "Can't read %jd bytes",
494 (intmax_t)st.st_size);
495 memset(stats, 0, sizeof(stats));
496 printf("is json %d\n", json_parse((const unsigned char **)&p,
497 p + st.st_size, stats, 0));
498 return 0;
499}
500#endif
size_t len
Definition apprentice.c:174
printf(string $format, mixed ... $values)
fstat($stream)
stat(string $filename)
char * err
Definition ffi.c:3029
memset(ptr, 0, type->size)
file_protected int file_printf(struct magic_set *, const char *,...) __attribute__((__format__(__printf__
#define SIZE_T_FORMAT
Definition file.h:72
#define FILE_RCSID(id)
Definition file.h:654
#define CAST(T, b)
Definition file.h:425
#define NULL
Definition gdcache.h:45
int main(void)
Definition gddemo.c:7
#define JSON_NUMBER
Definition is_json.c:58
#define __file_debugused
Definition is_json.c:53
#define JSON_CONSTANT
Definition is_json.c:57
#define JSON_ARRAY
Definition is_json.c:56
#define JSON_ARRAYN
Definition is_json.c:61
#define JSON_MAX
Definition is_json.c:62
#define JSON_OBJECT
Definition is_json.c:59
int file_is_json(struct magic_set *ms, const struct buffer *b)
Definition is_json.c:422
#define JSON_STRING
Definition is_json.c:60
#define DPRINTF(a, b, c)
Definition is_json.c:52
#define MAGIC_MIME
Definition magic.h:44
#define MAGIC_MIME_ENCODING
Definition magic.h:43
#define MAGIC_EXTENSION
Definition magic.h:46
#define MAGIC_APPLE
Definition magic.h:45
int fd
Definition phpdbg.h:282
zval rv
Definition session.c:1024
p
Definition session.c:1105
Definition file.h:177
const void * fbuf
Definition file.h:180
size_t flen
Definition file.h:181
int flags
Definition file.h:458
#define P
out($f, $s)