php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
is_csv.c
Go to the documentation of this file.
1/*-
2 * Copyright (c) 2019 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/*
28 * Parse CSV object serialization format (RFC-4180, RFC-7111)
29 */
30
31#ifndef TEST
32#include "file.h"
33
34#ifndef lint
35FILE_RCSID("@(#)$File: is_csv.c,v 1.13 2023/07/17 16:08:17 christos Exp $")
36#endif
37
38#include <string.h>
39#include "magic.h"
40#else
41#include <sys/types.h>
42#endif
43
44
45#ifdef DEBUG
46#include <stdio.h>
47#define DPRINTF(fmt, ...) printf(fmt, __VA_ARGS__)
48#else
49#define DPRINTF(fmt, ...)
50#endif
51
52/*
53 * if CSV_LINES == 0:
54 * check all the lines in the buffer
55 * otherwise:
56 * check only up-to the number of lines specified
57 *
58 * the last line count is always ignored if it does not end in CRLF
59 */
60#ifndef CSV_LINES
61#define CSV_LINES 10
62#endif
63
64static int csv_parse(const unsigned char *, const unsigned char *);
65
66static const unsigned char *
67eatquote(const unsigned char *uc, const unsigned char *ue)
68{
69 int quote = 0;
70
71 while (uc < ue) {
72 unsigned char c = *uc++;
73 if (c != '"') {
74 // We already got one, done.
75 if (quote) {
76 return --uc;
77 }
78 continue;
79 }
80 if (quote) {
81 // quote-quote escapes
82 quote = 0;
83 continue;
84 }
85 // first quote
86 quote = 1;
87 }
88 return ue;
89}
90
91static int
92csv_parse(const unsigned char *uc, const unsigned char *ue)
93{
94 size_t nf = 0, tf = 0, nl = 0;
95
96 while (uc < ue) {
97 switch (*uc++) {
98 case '"':
99 // Eat until the matching quote
100 uc = eatquote(uc, ue);
101 break;
102 case ',':
103 nf++;
104 break;
105 case '\n':
106 DPRINTF("%zu %zu %zu\n", nl, nf, tf);
107 nl++;
108#if CSV_LINES
109 if (nl == CSV_LINES)
110 return tf != 0 && tf == nf;
111#endif
112 if (tf == 0) {
113 // First time and no fields, give up
114 if (nf == 0)
115 return 0;
116 // First time, set the number of fields
117 tf = nf;
118 } else if (tf != nf) {
119 // Field number mismatch, we are done.
120 return 0;
121 }
122 nf = 0;
123 break;
124 default:
125 break;
126 }
127 }
128 return tf && nl >= 2;
129}
130
131#ifndef TEST
132int
133file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text,
134 const char *code)
135{
136 const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
137 const unsigned char *ue = uc + b->flen;
138 int mime = ms->flags & MAGIC_MIME;
139
140 if (!looks_text)
141 return 0;
142
143 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
144 return 0;
145
146 if (!csv_parse(uc, ue))
147 return 0;
148
149 if (mime == MAGIC_MIME_ENCODING)
150 return 1;
151
152 if (mime) {
153 if (file_printf(ms, "text/csv") == -1)
154 return -1;
155 return 1;
156 }
157
158 if (file_printf(ms, "CSV %s%stext", code ? code : "",
159 code ? " " : "") == -1)
160 return -1;
161
162 return 1;
163}
164
165#else
166
167#include <sys/types.h>
168#include <sys/stat.h>
169#include <stdio.h>
170#include <fcntl.h>
171#include <unistd.h>
172#include <stdlib.h>
173#include <stdint.h>
174#include <err.h>
175
176int
177main(int argc, char *argv[])
178{
179 int fd;
180 struct stat st;
181 unsigned char *p;
182
183 if ((fd = open(argv[1], O_RDONLY)) == -1)
184 err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
185
186 if (fstat(fd, &st) == -1)
187 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
188
189 if ((p = CAST(char *, malloc(st.st_size))) == NULL)
190 err(EXIT_FAILURE, "Can't allocate %jd bytes",
191 (intmax_t)st.st_size);
192 if (read(fd, p, st.st_size) != st.st_size)
193 err(EXIT_FAILURE, "Can't read %jd bytes",
194 (intmax_t)st.st_size);
195 printf("is csv %d\n", csv_parse(p, p + st.st_size));
196 return 0;
197}
198#endif
printf(string $format, mixed ... $values)
fstat($stream)
stat(string $filename)
char * err
Definition ffi.c:3029
file_protected int file_printf(struct magic_set *, const char *,...) __attribute__((__format__(__printf__
#define FILE_RCSID(id)
Definition file.h:654
#define CAST(T, b)
Definition file.h:425
#define NULL
Definition gdcache.h:45
int main(void)
Definition gddemo.c:7
#define CSV_LINES
Definition is_csv.c:61
int file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text, const char *code)
Definition is_csv.c:133
#define DPRINTF(fmt,...)
Definition is_csv.c:49
#define MAGIC_MIME
Definition magic.h:44
#define MAGIC_MIME_ENCODING
Definition magic.h:43
#define MAGIC_EXTENSION
Definition magic.h:46
#define MAGIC_APPLE
Definition magic.h:45
int fd
Definition phpdbg.h:282
p
Definition session.c:1105
Definition file.h:177
const void * fbuf
Definition file.h:180
size_t flen
Definition file.h:181
int flags
Definition file.h:458