GCC Code Coverage Report


Directory: src/gate/
File: src/gate/regexpressions.c
Date: 2025-09-14 13:10:38
Exec Total Coverage
Lines: 154 220 70.0%
Functions: 12 18 66.7%
Branches: 94 211 44.5%

Line Branch Exec Source
1 /* GATE PROJECT LICENSE:
2 +----------------------------------------------------------------------------+
3 | Copyright(c) 2018-2025, Stefan Meislinger <sm@opengate.at> |
4 | All rights reserved. |
5 | |
6 | Redistribution and use in source and binary forms, with or without |
7 | modification, are permitted provided that the following conditions are met:|
8 | |
9 | 1. Redistributions of source code must retain the above copyright notice, |
10 | this list of conditions and the following disclaimer. |
11 | 2. Redistributions in binary form must reproduce the above copyright |
12 | notice, this list of conditions and the following disclaimer in the |
13 | documentation and/or other materials provided with the distribution. |
14 | |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"|
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
19 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
20 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
21 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
22 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
25 | THE POSSIBILITY OF SUCH DAMAGE. |
26 +----------------------------------------------------------------------------+
27 */
28
29 #include "gate/regexpressions.h"
30 #include "gate/results.h"
31
32 /*
33 This is a GATE framework port of
34 kokke's public domain code 'tiny-regex-c'
35 which is based on Rob-Pike's regex-code.
36 */
37
38 /* Private functions: */
39 static gate_bool_t match_digit(char c)
40 {
41 return ((c >= '0') && (c <= '9'));
42 }
43 static gate_bool_t match_alpha(char c)
44 {
45 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
46 }
47 static gate_bool_t match_whitespace(char c)
48 {
49 return ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r') || (c == '\f') || (c == '\v'));
50 }
51 static gate_bool_t match_alphanum(char c)
52 {
53 return ((c == '_') || match_alpha(c) || match_digit(c));
54 }
55 4 static gate_bool_t match_range(char c, const char* str)
56 {
57
4/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 1 times.
4 return ((c != '-') && (str[0] != '\0') && (str[0] != '-') &&
58
3/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
2 (str[1] == '-') && (str[1] != '\0') &&
59
4/8
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
8 (str[2] != '\0') && ((c >= str[0]) && (c <= str[2])));
60 }
61 static gate_bool_t is_meta_char(char c)
62 {
63 return ((c == 's') || (c == 'S') || (c == 'w') || (c == 'W') || (c == 'd') || (c == 'D'));
64 }
65
66 static gate_bool_t match_meta_char(char c, const char* str)
67 {
68 switch (str[0])
69 {
70 case 'd': return match_digit(c);
71 case 'D': return !match_digit(c);
72 case 'w': return match_alphanum(c);
73 case 'W': return !match_alphanum(c);
74 case 's': return match_whitespace(c);
75 case 'S': return !match_whitespace(c);
76 default: return (c == str[0]);
77 }
78 }
79
80 4 static gate_bool_t match_char_class(char c, const char* str)
81 {
82 do
83 {
84
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
4 if (match_range(c, str))
85 {
86 return true;
87 }
88
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 else if (str[0] == '\\')
89 {
90 /* Escape-char: increment str-ptr and match on next char */
91 str += 1;
92 if (match_meta_char(c, str))
93 {
94 return true;
95 }
96 else if ((c == str[0]) && !is_meta_char(c))
97 {
98 return true;
99 }
100 }
101
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 else if (c == str[0])
102 {
103 if (c == '-')
104 {
105 return ((str[-1] == '\0') || (str[1] == '\0'));
106 }
107 else
108 {
109 return true;
110 }
111 }
112
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 } while (*str++ != '\0');
113
114 1 return false;
115 }
116
117 89 static gate_bool_t match_one(gate_regex_token_t token, char c)
118 {
119
3/10
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 82 times.
89 switch (token.type)
120 {
121 6 case GATE_REGEX_DOT: return true;
122 1 case GATE_REGEX_CHAR_CLASS: return match_char_class(c, (const char*)token.ptr);
123 case GATE_REGEX_INV_CHAR_CLASS: return !match_char_class(c, (const char*)token.ptr);
124 case GATE_REGEX_DIGIT: return match_digit(c);
125 case GATE_REGEX_NOT_DIGIT: return !match_digit(c);
126 case GATE_REGEX_ALPHA: return match_alphanum(c);
127 case GATE_REGEX_NOT_ALPHA: return !match_alphanum(c);
128 case GATE_REGEX_WHITESPACE: return match_whitespace(c);
129 case GATE_REGEX_NOT_WHITESPACE: return !match_whitespace(c);
130 82 default: return (token.chr == c);
131 }
132 }
133
134 static gate_bool_t match_pattern(gate_regex_token_t* pattern, const char* text, gate_size_t textlen, int* match_length);
135
136 2 static gate_bool_t match_star(gate_regex_token_t token, gate_regex_token_t* pattern, const char* text, gate_size_t textlen, int* match_length)
137 {
138 2 int prelen = *match_length;
139 2 int textpos = 0;
140
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
✓ Branch 3 taken 7 times.
✓ Branch 4 taken 1 times.
9 while ((textpos < (int)textlen) && match_one(token, text[textpos]))
141 {
142 7 ++textpos;
143 7 ++(*match_length);
144 }
145
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 while (textpos >= 0)
146 {
147
2/2
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 1 times.
3 if (match_pattern(pattern, &text[textpos], textlen - textpos, match_length))
148 {
149 2 return true;
150 }
151 1 --textpos;
152 1 --(*match_length);
153 }
154
155 *match_length = prelen;
156 return false;
157 }
158
159 2 static gate_bool_t match_plus(gate_regex_token_t token, gate_regex_token_t* pattern, const char* text, gate_size_t textlen, int* match_length)
160 {
161 2 gate_size_t textpos = 0;
162
163
3/4
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 2 times.
3 while ((textpos < textlen) && match_one(token, text[textpos]))
164 {
165 1 ++textpos;
166 1 ++(*match_length);
167 }
168
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 while (textpos > 0)
169 {
170
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 if (match_pattern(pattern, &text[textpos], textlen - textpos, match_length))
171 {
172 1 return true;
173 }
174 --textpos;
175 --(*match_length);
176 }
177 1 return false;
178 }
179
180 9 static gate_bool_t match_question(gate_regex_token_t token, gate_regex_token_t* pattern, const char* text, gate_size_t textlen, int* match_length)
181 {
182
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
9 if (token.type == GATE_REGEX_UNUSED)
183 {
184 8 return true;
185 }
186
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 if (match_pattern(pattern, text, textlen, match_length))
187 {
188 1 return true;
189 }
190 if ((textlen != 0) && match_one(token, text[0]))
191 {
192 if (match_pattern(pattern, &text[1], textlen - 1, match_length))
193 {
194 ++(*match_length);
195 return true;
196 }
197 }
198 return false;
199 }
200
201
202 66 static gate_bool_t match_pattern(gate_regex_token_t* pattern, const char* text, gate_size_t textlen, int* match_length)
203 {
204 66 int pre = *match_length;
205
206 66 gate_size_t pattern_index = 0;
207 66 gate_size_t textpos = 0;
208 do
209 {
210
4/4
✓ Branch 0 taken 90 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 89 times.
98 if ((pattern[pattern_index].type == GATE_REGEX_UNUSED) || (pattern[pattern_index].type == GATE_REGEX_QUESTIONMARK))
211 {
212 9 return match_question(pattern[pattern_index], &pattern[pattern_index + 2], &text[textpos], textlen - textpos, match_length);
213 }
214
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 87 times.
89 else if (pattern[pattern_index + 1].type == GATE_REGEX_STAR)
215 {
216 2 return match_star(pattern[pattern_index + 0], &pattern[pattern_index + 2], &text[textpos], textlen - textpos, match_length);
217 }
218
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 85 times.
87 else if (pattern[pattern_index + 1].type == GATE_REGEX_PLUS)
219 {
220 2 return match_plus(pattern[pattern_index + 0], &pattern[pattern_index + 2], &text[textpos], textlen - textpos, match_length);
221 }
222
3/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 81 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
85 else if ((pattern[pattern_index + 0].type == GATE_REGEX_END) && pattern[pattern_index + 1].type == GATE_REGEX_UNUSED)
223 {
224 4 return (textpos >= textlen);
225 }
226 81 ++(*match_length);
227
4/4
✓ Branch 0 taken 78 times.
✓ Branch 1 taken 3 times.
✓ Branch 3 taken 32 times.
✓ Branch 4 taken 46 times.
81 } while ((textpos < textlen) && match_one(pattern[pattern_index++], text[textpos++]));
228
229 49 *match_length = pre;
230 49 return false;
231 }
232
233
234 16 gate_result_t gate_regex_init(gate_regex_t regex, gate_string_t const* pattern)
235 {
236 16 gate_result_t result = GATE_RESULT_OK;
237
238 char current_char;
239 16 gate_index_t buffer_index = 1; //int ccl_bufidx = 1;
240 16 gate_index_t pattern_index = 0; //int i = 0; /* index into pattern */
241 16 gate_index_t compiled_index = 0; // int j = 0; /* index into re_compiled */
242 gate_index_t buffer_begin;
243
244 char const* ptr_pattern;
245 gate_size_t pattern_length;
246 gate_regex_token_t* ptr_compiled;
247 unsigned char* ptr_buffer;
248
249 do
250 {
251
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (regex == NULL)
252 {
253 result = GATE_RESULT_INVALIDARG;
254 break;
255 }
256 16 gate_mem_clear(regex, sizeof(struct gate_regex_class));
257
258 16 ptr_pattern = &regex->pattern[0];
259 16 pattern_length = gate_string_to_buffer(pattern, regex->pattern, sizeof(regex->pattern));
260
261 16 ptr_compiled = &regex->compiled[0];
262 16 ptr_buffer = &regex->buffer[0];
263
264
1/2
✓ Branch 0 taken 87 times.
✗ Branch 1 not taken.
87 while ((result == GATE_RESULT_OK)
265
2/2
✓ Branch 0 taken 71 times.
✓ Branch 1 taken 16 times.
87 && (pattern_index < (gate_index_t)pattern_length)
266
1/2
✓ Branch 0 taken 71 times.
✗ Branch 1 not taken.
71 && (compiled_index + 1 < GATE_REGEX_MAX_COMPILED))
267 {
268 71 current_char = ptr_pattern[pattern_index];
269
270
9/9
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 5 times.
✓ Branch 5 taken 1 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 4 times.
✓ Branch 8 taken 43 times.
71 switch (current_char)
271 {
272 /* Meta-characters: */
273 6 case '^': { ptr_compiled[compiled_index].type = GATE_REGEX_BEGIN; break; }
274 6 case '$': { ptr_compiled[compiled_index].type = GATE_REGEX_END; break; }
275 1 case '.': { ptr_compiled[compiled_index].type = GATE_REGEX_DOT; break; }
276 2 case '*': { ptr_compiled[compiled_index].type = GATE_REGEX_STAR; break; }
277 5 case '+': { ptr_compiled[compiled_index].type = GATE_REGEX_PLUS; break; }
278 1 case '?': { ptr_compiled[compiled_index].type = GATE_REGEX_QUESTIONMARK; break; }
279 /*case '|': { ptr_compiled[compiled_index].type = GATE_REGEX_BRANCH; break; } <-- not working properly */
280
281 /* Escaped character-classes (\s \w ...): */
282 3 case '\\':
283 {
284
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (pattern_index + 1 < (gate_index_t)pattern_length)
285 {
286 /* Skip the escape-char '\\' */
287 3 ++pattern_index;
288 /* ... and check the next */
289 3 current_char = ptr_pattern[pattern_index];
290
1/7
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 3 times.
3 switch (current_char)
291 {
292 /* Meta-character: */
293 case 'd': { ptr_compiled[compiled_index].type = GATE_REGEX_DIGIT; break; }
294 case 'D': { ptr_compiled[compiled_index].type = GATE_REGEX_NOT_DIGIT; break; }
295 case 'w': { ptr_compiled[compiled_index].type = GATE_REGEX_ALPHA; break; }
296 case 'W': { ptr_compiled[compiled_index].type = GATE_REGEX_NOT_ALPHA; break; }
297 case 's': { ptr_compiled[compiled_index].type = GATE_REGEX_WHITESPACE; break; }
298 case 'S': { ptr_compiled[compiled_index].type = GATE_REGEX_NOT_WHITESPACE; break; }
299 /* Escaped character, e.g. '.' or '$' */
300 3 default:
301 {
302 3 ptr_compiled[compiled_index].type = GATE_REGEX_CHAR;
303 3 ptr_compiled[compiled_index].chr = current_char;
304 3 break;
305 }
306 }
307 }
308 else
309 {
310 result = GATE_RESULT_INVALIDINPUT;
311 }
312 3 break;
313 }
314 /* Character class: */
315 4 case '[':
316 {
317
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (pattern_index + 1 < (gate_index_t)pattern_length)
318 {
319 /* Remember where the char-buffer starts. */
320 4 buffer_begin = buffer_index;
321
322 /* Look-ahead to determine if negated */
323
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (ptr_pattern[pattern_index + 1] == '^')
324 {
325 ptr_compiled[compiled_index].type = GATE_REGEX_INV_CHAR_CLASS;
326 ++pattern_index; /* Increment i to avoid including '^' in the char-buffer */
327 }
328 else
329 {
330 4 ptr_compiled[compiled_index].type = GATE_REGEX_CHAR_CLASS;
331 }
332
333 /* Copy characters inside [..] to buffer */
334
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 while (pattern_index + 1 < (gate_index_t)pattern_length)
335 {
336 16 ++pattern_index;
337 16 current_char = ptr_pattern[pattern_index];
338
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 12 times.
16 if (current_char == ']')
339 {
340 /* end of character class reached*/
341 4 break;
342 }
343
344
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 if (current_char == '\\')
345 {
346 if (buffer_index >= GATE_REGEX_MAX_CHAR_CLASS_LEN - 1)
347 {
348 result = GATE_RESULT_OUTOFBOUNDS;
349 break;
350 }
351
352 ptr_buffer[buffer_index++] = ptr_pattern[pattern_index++];
353 }
354
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 else if (buffer_index >= GATE_REGEX_MAX_CHAR_CLASS_LEN)
355 {
356 result = GATE_RESULT_OUTOFBOUNDS;
357 break;
358 }
359 12 ptr_buffer[buffer_index++] = ptr_pattern[pattern_index];
360 }
361
362
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (buffer_index >= GATE_REGEX_MAX_CHAR_CLASS_LEN)
363 {
364 /* Catches cases such as [00000000000000000000000000000000000000][ */
365 //fputs("exceeded internal buffer!\n", stderr);
366 result = GATE_RESULT_OUTOFBOUNDS;
367 break;
368 }
369 /* Null-terminate string end */
370 4 ptr_buffer[buffer_index++] = 0;
371 4 ptr_compiled[compiled_index].ptr = &ptr_buffer[buffer_begin];
372 }
373 else
374 {
375 result = GATE_RESULT_INVALIDINPUT;
376 }
377 4 break;
378 }
379 /* Other characters: */
380 43 default:
381 {
382 43 ptr_compiled[compiled_index].type = GATE_REGEX_CHAR;
383 43 ptr_compiled[compiled_index].chr = current_char;
384 43 break;
385 }
386 }
387
388 71 ++pattern_index;
389 71 ++compiled_index;
390 }
391
392 // finally completed
393 } while (0);
394
395 16 return result;
396 }
397
398
399 15 gate_result_t gate_regex_create(gate_regex_t* new_regex, gate_string_t const* pattern)
400 {
401 gate_result_t result;
402 15 gate_regex_t regex = (gate_regex_t)gate_mem_alloc(sizeof(struct gate_regex_class));
403
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 if (NULL == regex)
404 {
405 return GATE_RESULT_OUTOFMEMORY;
406 }
407 15 result = gate_regex_init(regex, pattern);
408
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 if (GATE_FAILED(result))
409 {
410 gate_mem_dealloc(regex);
411 }
412 else
413 {
414 15 *new_regex = regex;
415 }
416
417 15 return result;
418 }
419
420 2 gate_result_t gate_regex_clone(gate_regex_t* new_regex, gate_regex_t source)
421 {
422 2 gate_result_t result = GATE_RESULT_INVALIDARG;
423
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
2 if ((source != NULL) && (new_regex != NULL))
424 {
425 gate_string_t pattern;
426 2 gate_string_create_static(&pattern, source->pattern);
427 2 result = gate_regex_create(new_regex, &pattern);
428 }
429 2 return result;
430 }
431
432 16 gate_result_t gate_regex_match(gate_regex_t regex, gate_string_t const* text, gate_size_t* ptr_match_pos, gate_size_t* ptr_match_length)
433 {
434 16 gate_result_t ret = GATE_RESULT_NOMATCH;
435 int index;
436 16 int match_length = 0;
437 gate_size_t text_pos;
438 gate_size_t text_length;
439
2/4
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
16 if ((regex != NULL) && (text != NULL))
440 {
441 16 text_length = gate_string_length(text);
442
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 10 times.
16 if (regex->compiled[0].type == GATE_REGEX_BEGIN)
443 {
444
2/2
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 4 times.
6 if (match_pattern(&regex->compiled[1], gate_string_ptr(text, 0), text_length, &match_length))
445 {
446
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (ptr_match_pos)
447 {
448 2 *ptr_match_pos = 0;
449 }
450
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (ptr_match_length)
451 {
452 2 *ptr_match_length = (gate_size_t)match_length;
453 }
454 2 ret = GATE_RESULT_OK;
455 }
456 }
457 else
458 {
459 10 index = -1;
460 10 text_pos = 0;
461 do
462 {
463 55 ++index;
464
2/2
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 47 times.
55 if (match_pattern(&regex->compiled[0], gate_string_ptr(text, text_pos), text_length - text_pos, &match_length))
465 {
466
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (text_pos < text_length)
467 {
468
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (ptr_match_pos)
469 {
470 8 *ptr_match_pos = (gate_size_t)index;
471 }
472
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (ptr_match_length)
473 {
474 8 *ptr_match_length = (gate_size_t)match_length;
475 }
476 8 ret = GATE_RESULT_OK;
477 }
478 8 break;
479 }
480
2/2
✓ Branch 0 taken 45 times.
✓ Branch 1 taken 2 times.
47 } while (text_pos++ < text->length);
481 }
482 }
483 16 return ret;
484 }
485
486 15 void gate_regex_release(gate_regex_t regex)
487 {
488
1/2
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
15 if (regex != NULL)
489 {
490 15 gate_mem_dealloc(regex);
491 }
492 15 }
493