php-internal-docs 8.4.8
Unofficial docs for php/php-src
Loading...
Searching...
No Matches
pcre2_substring.c
Go to the documentation of this file.
1/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2023 University of Cambridge
11
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42#ifdef HAVE_CONFIG_H
43#include "config.h"
44#endif
45
46#include "pcre2_internal.h"
47
48
49
50/*************************************************
51* Copy named captured string to given buffer *
52*************************************************/
53
54/* This function copies a single captured substring into a given buffer,
55identifying it by name. If the regex permits duplicate names, the first
56substring that is set is chosen.
57
58Arguments:
59 match_data points to the match data
60 stringname the name of the required substring
61 buffer where to put the substring
62 sizeptr the size of the buffer, updated to the size of the substring
63
64Returns: if successful: zero
65 if not successful, a negative error code:
66 (1) an error from nametable_scan()
67 (2) an error from copy_bynumber()
68 (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
69 (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
70*/
71
74 PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
75{
76PCRE2_SPTR first, last, entry;
77int failrc, entrysize;
78if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
80entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
81 &first, &last);
82if (entrysize < 0) return entrysize;
84for (entry = first; entry <= last; entry += entrysize)
85 {
86 uint32_t n = GET2(entry, 0);
87 if (n < match_data->oveccount)
88 {
89 if (match_data->ovector[n*2] != PCRE2_UNSET)
90 return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
91 failrc = PCRE2_ERROR_UNSET;
92 }
93 }
94return failrc;
95}
96
97
98
99/*************************************************
100* Copy numbered captured string to given buffer *
101*************************************************/
102
103/* This function copies a single captured substring into a given buffer,
104identifying it by number.
105
106Arguments:
107 match_data points to the match data
108 stringnumber the number of the required substring
109 buffer where to put the substring
110 sizeptr the size of the buffer, updated to the size of the substring
111
112Returns: if successful: 0
113 if not successful, a negative error code:
114 PCRE2_ERROR_NOMEMORY: buffer too small
115 PCRE2_ERROR_NOSUBSTRING: no such substring
116 PCRE2_ERROR_UNAVAILABLE: ovector too small
117 PCRE2_ERROR_UNSET: substring is not set
118*/
119
122 uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
123{
124int rc;
126rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
127if (rc < 0) return rc;
128if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
129memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
130 CU2BYTES(size));
131buffer[size] = 0;
132*sizeptr = size;
133return 0;
134}
135
136
137
138/*************************************************
139* Extract named captured string *
140*************************************************/
141
142/* This function copies a single captured substring, identified by name, into
143new memory. If the regex permits duplicate names, the first substring that is
144set is chosen.
145
146Arguments:
147 match_data pointer to match_data
148 stringname the name of the required substring
149 stringptr where to put the pointer to the new memory
150 sizeptr where to put the length of the substring
151
152Returns: if successful: zero
153 if not successful, a negative value:
154 (1) an error from nametable_scan()
155 (2) an error from get_bynumber()
156 (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
157 (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
158*/
159
162 PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
163{
164PCRE2_SPTR first, last, entry;
165int failrc, entrysize;
166if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
168entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
169 &first, &last);
170if (entrysize < 0) return entrysize;
172for (entry = first; entry <= last; entry += entrysize)
173 {
174 uint32_t n = GET2(entry, 0);
175 if (n < match_data->oveccount)
176 {
177 if (match_data->ovector[n*2] != PCRE2_UNSET)
178 return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
179 failrc = PCRE2_ERROR_UNSET;
180 }
181 }
182return failrc;
183}
184
185
186
187/*************************************************
188* Extract captured string to new memory *
189*************************************************/
190
191/* This function copies a single captured substring into a piece of new
192memory.
193
194Arguments:
195 match_data points to match data
196 stringnumber the number of the required substring
197 stringptr where to put a pointer to the new memory
198 sizeptr where to put the size of the substring
199
200Returns: if successful: 0
201 if not successful, a negative error code:
202 PCRE2_ERROR_NOMEMORY: failed to get memory
203 PCRE2_ERROR_NOSUBSTRING: no such substring
204 PCRE2_ERROR_UNAVAILABLE: ovector too small
205 PCRE2_ERROR_UNSET: substring is not set
206*/
207
210 uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
211{
212int rc;
214PCRE2_UCHAR *yield;
215rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
216if (rc < 0) return rc;
217yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
218 (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
219if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
220yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
221memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
222 CU2BYTES(size));
223yield[size] = 0;
224*stringptr = yield;
225*sizeptr = size;
226return 0;
227}
228
229
230
231/*************************************************
232* Free memory obtained by get_substring *
233*************************************************/
234
235/*
236Argument: the result of a previous pcre2_substring_get_byxxx()
237Returns: nothing
238*/
239
242{
243if (string != NULL)
244 {
245 pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
246 memctl->free(memctl, memctl->memory_data);
247 }
248}
249
250
251
252/*************************************************
253* Get length of a named substring *
254*************************************************/
255
256/* This function returns the length of a named captured substring. If the regex
257permits duplicate names, the first substring that is set is chosen.
258
259Arguments:
260 match_data pointer to match data
261 stringname the name of the required substring
262 sizeptr where to put the length
263
264Returns: 0 if successful, else a negative error number
265*/
266
269 PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
270{
271PCRE2_SPTR first, last, entry;
272int failrc, entrysize;
273if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
275entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
276 &first, &last);
277if (entrysize < 0) return entrysize;
279for (entry = first; entry <= last; entry += entrysize)
280 {
281 uint32_t n = GET2(entry, 0);
282 if (n < match_data->oveccount)
283 {
284 if (match_data->ovector[n*2] != PCRE2_UNSET)
285 return pcre2_substring_length_bynumber(match_data, n, sizeptr);
286 failrc = PCRE2_ERROR_UNSET;
287 }
288 }
289return failrc;
290}
291
292
293
294/*************************************************
295* Get length of a numbered substring *
296*************************************************/
297
298/* This function returns the length of a captured substring. If the start is
299beyond the end (which can happen when \K is used in an assertion), it sets the
300length to zero.
301
302Arguments:
303 match_data pointer to match data
304 stringnumber the number of the required substring
305 sizeptr where to put the length, if not NULL
306
307Returns: if successful: 0
308 if not successful, a negative error code:
309 PCRE2_ERROR_NOSUBSTRING: no such substring
310 PCRE2_ERROR_UNAVAILABLE: ovector is too small
311 PCRE2_ERROR_UNSET: substring is not set
312 PCRE2_ERROR_INVALIDOFFSET: internal error, should not occur
313*/
314
317 uint32_t stringnumber, PCRE2_SIZE *sizeptr)
318{
320int count = match_data->rc;
322 {
323 if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
324 count = 0;
325 }
326else if (count < 0) return count; /* Match failed */
327
328if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
329 {
330 if (stringnumber > match_data->code->top_bracket)
332 if (stringnumber >= match_data->oveccount)
334 if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
335 return PCRE2_ERROR_UNSET;
336 }
337else /* Matched using pcre2_dfa_match() */
338 {
339 if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
340 if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
341 }
342
343left = match_data->ovector[stringnumber*2];
344right = match_data->ovector[stringnumber*2+1];
345if (left > match_data->subject_length || right > match_data->subject_length)
347if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
348return 0;
349}
350
351
352
353/*************************************************
354* Extract all captured strings to new memory *
355*************************************************/
356
357/* This function gets one chunk of memory and builds a list of pointers and all
358the captured substrings in it. A NULL pointer is put on the end of the list.
359The substrings are zero-terminated, but also, if the final argument is
360non-NULL, a list of lengths is also returned. This allows binary data to be
361handled.
362
363Arguments:
364 match_data points to the match data
365 listptr set to point to the list of pointers
366 lengthsptr set to point to the list of lengths (may be NULL)
367
368Returns: if successful: 0
369 if not successful, a negative error code:
370 PCRE2_ERROR_NOMEMORY: failed to get memory,
371 or a match failure code
372*/
373
376 PCRE2_SIZE **lengthsptr)
377{
378int i, count, count2;
380PCRE2_SIZE *lensp;
381pcre2_memctl *memp;
382PCRE2_UCHAR **listp;
383PCRE2_UCHAR *sp;
384PCRE2_SIZE *ovector;
385
386if ((count = match_data->rc) < 0) return count; /* Match failed */
387if (count == 0) count = match_data->oveccount; /* Ovector too small */
388
389count2 = 2*count;
390ovector = match_data->ovector;
391size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
392if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
393
394for (i = 0; i < count2; i += 2)
395 {
396 size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
397 if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
398 }
399
400memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
401if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
402
403*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
404lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
405
406if (lengthsptr == NULL)
407 {
408 sp = (PCRE2_UCHAR *)lensp;
409 lensp = NULL;
410 }
411else
412 {
413 *lengthsptr = lensp;
414 sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
415 }
416
417for (i = 0; i < count2; i += 2)
418 {
419 size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
420
421 /* Size == 0 includes the case when the capture is unset. Avoid adding
422 PCRE2_UNSET to match_data->subject because it overflows, even though with
423 zero size calling memcpy() is harmless. */
424
425 if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
426 *listp++ = sp;
427 if (lensp != NULL) *lensp++ = size;
428 sp += size;
429 *sp++ = 0;
430 }
431
432*listp = NULL;
433return 0;
434}
435
436
437
438/*************************************************
439* Free memory obtained by substring_list_get *
440*************************************************/
441
442/*
443Argument: the result of a previous pcre2_substring_list_get()
444Returns: nothing
445*/
446
449{
450if (list != NULL)
451 {
452 pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
453 memctl->free(memctl, memctl->memory_data);
454 }
455}
456
457
458
459/*************************************************
460* Find (multiple) entries for named string *
461*************************************************/
462
463/* This function scans the nametable for a given name, using binary chop. It
464returns either two pointers to the entries in the table, or, if no pointers are
465given, the number of a unique group with the given name. If duplicate names are
466permitted, and the name is not unique, an error is generated.
467
468Arguments:
469 code the compiled regex
470 stringname the name whose entries required
471 firstptr where to put the pointer to the first entry
472 lastptr where to put the pointer to the last entry
473
474Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
475 otherwise, if firstptr and lastptr are NULL:
476 a group number for a unique substring
477 else PCRE2_ERROR_NOUNIQUESUBSTRING
478 otherwise:
479 the length of each entry, having set firstptr and lastptr
480*/
481
484 PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
485{
486uint16_t bot = 0;
487uint16_t top = code->name_count;
488uint16_t entrysize = code->name_entry_size;
489PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
490
491while (top > bot)
492 {
493 uint16_t mid = (top + bot) / 2;
494 PCRE2_SPTR entry = nametable + entrysize*mid;
495 int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
496 if (c == 0)
497 {
498 PCRE2_SPTR first;
500 PCRE2_SPTR lastentry;
501 lastentry = nametable + entrysize * (code->name_count - 1);
502 first = last = entry;
503 while (first > nametable)
504 {
505 if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
506 first -= entrysize;
507 }
508 while (last < lastentry)
509 {
510 if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
511 last += entrysize;
512 }
513 if (firstptr == NULL) return (first == last)?
514 (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
515 *firstptr = first;
516 *lastptr = last;
517 return entrysize;
518 }
519 if (c > 0) bot = mid + 1; else top = mid;
520 }
521
523}
524
525
526/*************************************************
527* Find number for named string *
528*************************************************/
529
530/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
531when it is known that names are unique. If there are duplicate names, it is not
532defined which number is returned.
533
534Arguments:
535 code the compiled regex
536 stringname the name whose number is required
537
538Returns: the number of the named parenthesis, or a negative number
539 PCRE2_ERROR_NOSUBSTRING if not found
540 PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
541*/
542
545 PCRE2_SPTR stringname)
546{
547return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
548}
549
550/* End of pcre2_substring.c */
count(Countable|array $value, int $mode=COUNT_NORMAL)
zend_long n
Definition ffi.c:4979
new_type size
Definition ffi.c:4365
memcpy(ptr1, ptr2, size)
#define NULL
Definition gdcache.h:45
lu_byte right
Definition minilua.c:4267
lu_byte left
Definition minilua.c:4266
#define pcre2_real_code
Definition pcre2.h:826
#define PCRE2_ERROR_INVALIDOFFSET
Definition pcre2.h:409
#define PCRE2_ERROR_UNAVAILABLE
Definition pcre2.h:396
#define PCRE2_UNSET
Definition pcre2.h:482
#define pcre2_substring_get_bynumber
Definition pcre2.h:916
#define PCRE2_UCHAR
Definition pcre2.h:819
#define pcre2_substring_number_from_name
Definition pcre2.h:922
#define PCRE2_ERROR_UNSET
Definition pcre2.h:397
#define pcre2_code
Definition pcre2.h:822
#define pcre2_substring_free
Definition pcre2.h:914
#define pcre2_substring_length_bynumber
Definition pcre2.h:918
#define PCRE2_ERROR_NOSUBSTRING
Definition pcre2.h:390
#define pcre2_substring_length_byname
Definition pcre2.h:917
#define pcre2_substring_get_byname
Definition pcre2.h:915
#define PCRE2_SIZE
Definition pcre2.h:479
#define pcre2_substring_copy_byname
Definition pcre2.h:912
#define pcre2_substring_list_get
Definition pcre2.h:919
#define PCRE2_SPTR
Definition pcre2.h:820
#define pcre2_match_data
Definition pcre2.h:844
#define PCRE2_ERROR_DFA_UFUNC
Definition pcre2.h:382
#define PCRE2_ERROR_NOUNIQUESUBSTRING
Definition pcre2.h:391
#define pcre2_substring_list_free
Definition pcre2.h:920
#define PCRE2_CALL_CONVENTION
Definition pcre2.h:81
#define pcre2_substring_copy_bynumber
Definition pcre2.h:913
#define PCRE2_ERROR_PARTIAL
Definition pcre2.h:328
#define pcre2_substring_nametable_scan
Definition pcre2.h:921
#define PCRE2_ERROR_NOMEMORY
Definition pcre2.h:389
void *PRIV memctl_malloc(size_t size, pcre2_memctl *memctl)
@ PCRE2_MATCHEDBY_DFA_INTERPRETER
#define PCRE2_EXP_DEFN
#define PRIV(name)
#define CU2BYTES(x)
#define IMM2_SIZE
#define GET2(a, n)
original_stack top
Definition file.h:177
void(* free)(void *, void *)
strcmp(string $string1, string $string2)
int last