v / vlib / regex
Raw file | 940 loc (846 sloc) | 24.99 KB | Latest commit hash 017ace6ea
1import regex
2import rand
3import strings
4
5const debug = true // true for debug println
6
7/******************************************************************************
8*
9* Test section
10*
11******************************************************************************/
12struct TestItem {
13 src string
14 q string
15 s int
16 e int
17}
18
19// vfmt off
20const(
21match_test_suite = [
22 // minus in CC
23 TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0},
24 TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,4},
25 TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8},
26 TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,16},
27 TestItem{"abcdefGHK",r"[a-f]+\A+",0,9},
28 TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11},
29
30 // base OR
31 TestItem{"a",r"a|b",0,1},
32 TestItem{"a",r"b|a",0,1},
33 TestItem{"b",r"a|b",0,1},
34 TestItem{"b",r"b|a",0,1},
35 TestItem{"c",r"b|a",-1,0},
36
37 // test base
38 TestItem{"[ciao]",r"(.)ciao(.)",0,6},
39 TestItem{"[ciao] da me",r"(.)ciao(.)",0,6},
40
41 // positive
42 TestItem{"this is a good.",r"this",0,4},
43 TestItem{"this is a good.",r"good",10,14},
44 TestItem{"this is a good.",r"go+d",10,14},
45 TestItem{"this is a good.",r"g[oae]+d",10,14},
46 TestItem{"this is a goed.",r"g[oae]+d",10,14},
47 TestItem{"this is a good.",r"g[oae]*d",10,14},
48 TestItem{"this is a goaezd.",r"g[ea-cm-z]*d",10,16},
49 TestItem{"this is a good.",r"this (\w+) a",0,9},
50 TestItem{"this is a good.",r"this( \w+){2} g",0,11},
51 TestItem{"this is a good.",r"( ?\w+){,1}",0,4},
52 TestItem{"this is a good.",r"( ?\w+)+",0,14},
53 TestItem{"this is a good.",r"this( \w+)+",0,14},
54 TestItem{"this is a good sample.",r"( ?\w+){,2}",0,7},
55 TestItem{"this is a good sample.",r"( ?\w+){,3}",0,9},
56 TestItem{"this is a good sample.",r"( ?\w+){,4}",0,14},
57 TestItem{"this is a good sample.",r"( ?\w+){,5}",0,21},
58 TestItem{"this is a good sample.",r"( ?\w+){2,3}",0,9},
59 TestItem{"this is a good sample.",r"(\s?\w+){2,3}",0,9},
60 TestItem{"this these those.",r"(th[ei]se?\s|\.)+",0,11},
61 TestItem{"this these those ",r"(th[eio]se? ?)+",0,17},
62 TestItem{"this these those ",r"(th[eio]se? )+",0,17},
63 TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17},
64 TestItem{"soday,this,these,those. over",r".+(th[eio]se?[,. ])+",0,23},
65
66 TestItem{"cpapaz",r"(c(pa)+z)",0,6},
67 TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16},
68 TestItem{"this is a cpapapez over",r"(c(p[ae])+z)",10,18},
69 TestItem{"[email protected]",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,17},
70 TestItem{"[email protected], pera",r"[\w]+@([\w]+\.)+\w+",0,18},
71 TestItem{"[email protected] ",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,14},
72 TestItem{"adce aabe",r"(a(ab)+)|(a(dc)+)e",0,4},
73 TestItem{"zadce aabe",r"(a(ab)+)|(a(dc)+)e",1,5},
74 TestItem{"abbz accz addz.",r"c|(d)|e|(ab+)",0,3},
75 TestItem{"this those these ciao",r"((t[hieo]+se?)\s*)+",0,17},
76 TestItem{"this ciao",r"((t[hieo]+se?)\s*)+",0,5},
77 TestItem{"this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}",5,21},
78 TestItem{"1234this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}$",9,25},
79 TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}",5,21},
80 TestItem{"123cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",3,20},
81
82 TestItem{"this is a good sample.",r".*i(\w)+",0,4},
83 TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23},
84 TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29},
85 TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
86 TestItem{"cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",0,17},
87 TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18},
88 TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
89 TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20},
90
91 TestItem{"cpapaz ole. pippo,",r".*c.+ole.*pi",0,14},
92 TestItem{"cpapaz ole. pipipo,",r".*c.+ole.*p([ip])+o",0,18},
93 TestItem{"cpapaz ole. pipipo",r"^.*c.+ol?e.*p([ip])+o$",0,18},
94 TestItem{"abbb",r"ab{2,3}?",0,3},
95 TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11},
96 TestItem{" abb",r"\s(.*)",0,4},
97
98 TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29}
99
100 // negative
101 TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0},
102 TestItem{"this is a good.",r"thes",-1,2},
103 TestItem{"test1post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",-1,9},
104 TestItem{"this cpapaz adce",r"(c(pa)+z)(\s[\a]+){2}",-1,0},
105 TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
106 TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
107 TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0},
108
109 // check unicode
110 TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34},
111 TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23},
112
113 // new edge cases
114 TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",-1,8},
115 TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",0,8},
116 TestItem{"123456789", r"^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$",0,9}
117 TestItem{"12345678", r"^\d{8}$",0,8},
118 TestItem{"12345678", r"^\d{7}$",-1,0},
119 TestItem{"12345678", r"^\d{9}$",-1,8},
120
121 TestItem{"eth", r"(oth)|(eth)",0,3},
122 TestItem{"et", r"(oth)|(eth)",-1,2},
123 TestItem{"et", r".*(oth)|(eth)",-1,2},
124 TestItem{"peoth", r".*(ith)|(eth)",-1,5},
125
126 TestItem{"poth", r"(eth)|(oth)",1,4},
127 TestItem{"poth", r"(oth)|(eth)",1,4},
128 TestItem{"poth", r".(oth)|(eth)$",0,4},
129 TestItem{"poth", r"^.(oth)|(eth)$",0,4},
130 TestItem{"poth", r"^\w+$",0,4},
131
132 // test dot_char
133 TestItem{"8-11 l: qllllqllklhlvtl", r"^(\d+)-(\d+) ([a-z]): (.*)$",0,23},
134 TestItem{"accccb deer", r"^a(.*)b d(.+)r",0,11},
135 TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11},
136 TestItem{"accccb deer", r"^(.*)$",0,11},
137 TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,11},
138 TestItem{"##.#....#.##.####...#.##", r".{18}[.#]",0,19},
139 TestItem{"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", r'.*#[.#]{4}##[.#]{4}##[.#]{4}###',0,49},
140
141 // test bcksls chars
142 TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31},
143 TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28},
144 TestItem{"p_p", r"\w+",0,3},
145 TestItem{"p_é", r"\w+",0,2},
146
147 // Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()])
148 TestItem{"1*1", r"(\d+)([*])(\d+)",0,3},
149 TestItem{"+1*1", r"^(\d+)([*])(\d+)",-1,0},
150 TestItem{"*1*1", r"(?:^|[*])(\d+)([*])(\d+)",0,4},
151 TestItem{"*1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
152 TestItem{")1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
153 TestItem{"(1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4},
154 TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5},
155 TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
156 TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0},
157
158 // particular groups
159 TestItem{"ababababac", r"ab(.*)(ac)",0,10},
160
161 // backslash on finish string
162 TestItem{"a", r"\S+",0,1},
163 TestItem{"aaaa", r"\S+",0,4},
164 TestItem{"aaaa ", r"\S+",0,4},
165
166 // multiple dot char
167 TestItem{"aba", r"a*(b*)*a",0,3},
168 TestItem{"/*x*/", r"/\**(.*)\**/",0,5},
169 TestItem{"/*x*/", r"/*(.*)*/",0,5},
170
171 // test last IST check
172 TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(.*)",0,26},
173 TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(.*)",0,26},
174 TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(\w*)",0,26},
175 TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(\w*)",0,26},
176
177 // test \S+ vs [^\s]+
178 TestItem{"ab.c", r"\S+\.",0,3},
179 TestItem{"ab.c", r"[^\s]+\.",0,3},
180 TestItem{"ab.c", r"\S*\.",0,3},
181 TestItem{"ab.c", r"[^\s]*\.",0,3},
182 TestItem{"ab c", r"[\S]+\s",0,3},
183 TestItem{"ab c", r"[^\s]+\s",0,3},
184]
185)
186
187struct TestItemRe {
188 src string
189 q string
190 rep string
191 r string
192}
193
194const (
195match_test_suite_replace = [
196 // replace tests
197 TestItemRe{
198 "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
199 r"(pi?(ba)+o)",
200 "CIAO",
201 "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
202 },
203 TestItemRe{
204 "Today is a good day and tomorrow will be for sure.",
205 r"[Tt]o\w+",
206 "CIAO",
207 "CIAO is a good day and CIAO will be for sure."
208 },
209 TestItemRe{
210 "Today is a good day and tomorrow will be for sure.",
211 r"(a\w) ",
212 r"[\0] ",
213 "Tod[ay] is a good d[ay] and tomorrow will be for sure."
214 },
215 TestItemRe{
216 "Today is a good day and tomorrow will be for sure.",
217 r"(a\w) ",
218 r"[\0_\0] ",
219 "Tod[ay_ay] is a good d[ay_ay] and tomorrow will be for sure."
220 },
221 TestItemRe{
222 "Today is a good day and tomorrow will be for sure.",
223 r"(a\w) ",
224 r"[\0\1] ",
225 "Tod[ay] is a good d[ay] and tomorrow will be for sure."
226 },
227]
228
229match_test_suite_replace_simple = [
230 // replace tests
231 TestItemRe{
232 "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
233 r"(pi?(ba)+o)",
234 "CIAO",
235 "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO"
236 },
237 TestItemRe{
238 "Today is a good day and tomorrow will be for sure.",
239 r"[Tt]o\w+",
240 "CIAO",
241 "CIAO is a good day and CIAO will be for sure."
242 },
243]
244)
245
246struct TestItemCGroup {
247 src string
248 q string
249 s int
250 e int
251 cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ]
252 cgn map[string]int
253}
254
255const (
256cgroups_test_suite = [
257 TestItemCGroup{
258 "http://www.ciao.mondo/hello/pippo12_/pera.html",
259 r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+[\.|/])+",0,42,
260 [7, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42],
261 {'format':int(0),'token':1}
262 },
263 TestItemCGroup{
264 "http://www.ciao.mondo/hello/pippo12_/pera.html",
265 r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46,
266 [8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46]
267 //[8, 0, 0, 4, 1, 7, 10, 1, 11, 15, 1, 16, 21, 1, 22, 27, 1, 28, 36, 1, 37, 41, 1, 42, 46],
268 {'format':int(0),'token':1}
269 },
270 TestItemCGroup{
271 "http://www.ciao.mondo/hello/pippo12_/pera.html",
272 r"(?P<format>https?)|(?P<format>ftps?)://([\w_]+\.)+",0,16,
273 [3, 0, 0, 4, 1, 7, 11, 1, 11, 16],
274 {'format':int(0)}
275 },
276 TestItemCGroup{
277 "acc +13 pippo",
278 r"(\w+)\s(.)([0-9]+) \w+",0,13,
279 [0, 3, 4, 5, 5, 7],
280 map[string]int{}
281 },
282 TestItemCGroup{
283 "acc +13",
284 r"(\w+)\s(.)([0-9]+)",0,7,
285 [0, 3, 4, 5, 5, 7],
286 map[string]int{}
287 },
288 TestItemCGroup{
289 "ababababac",
290 r"ab(.*)(ac)",0,10,
291 [2, 8, 8, 10],
292 map[string]int{}
293 },
294]
295)
296
297struct Test_find_all {
298 src string
299 q string
300 res []int // [0,4,5,6...]
301 res_str []string // ['find0','find1'...]
302}
303
304const (
305find_all_test_suite = [
306 Test_find_all{
307 "abcd 1234 efgh 1234 ghkl1234 ab34546df",
308 r"\d+",
309 [5, 9, 15, 19, 24, 28, 31, 36],
310 ['1234', '1234', '1234', '34546']
311 },
312 Test_find_all{
313 "abcd 1234 efgh 1234 ghkl1234 ab34546df",
314 r"\a+",
315 [0, 4, 10, 14, 20, 24, 29, 31, 36, 38],
316 ['abcd', 'efgh', 'ghkl', 'ab', 'df']
317 },
318 Test_find_all{
319 "oggi pippo è andato a casa di pluto ed ha trovato pippo",
320 r"p[iplut]+o",
321 [5, 10, 31, 36, 51, 56],
322 ['pippo', 'pluto', 'pippo']
323 },
324 Test_find_all{
325 "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao",
326 r"(pi?(ba)+o)",
327 [5, 10, 31, 39, 54, 65],
328 ['pibao', 'pbababao', 'pibabababao']
329 },
330 Test_find_all{
331 "Today is a good day and tomorrow will be for sure.",
332 r"[Tt]o\w+",
333 [0, 5, 24, 32],
334 ['Today', 'tomorrow']
335 },
336 Test_find_all{
337 "pera\nurl = https://github.com/dario/pig.html\npippo",
338 r"url *= *https?://[\w./]+",
339 [5, 44],
340 ['url = https://github.com/dario/pig.html']
341 },
342 Test_find_all{
343 "pera\nurl = https://github.com/dario/pig.html\npippo",
344 r"url *= *https?://.*"+'\n',
345 [5, 45],
346 ['url = https://github.com/dario/pig.html\n']
347 },
348 Test_find_all{
349 "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
350 r"#[.#]{4}##[.#]{4}##[.#]{4}###",
351 [29, 49],
352 ['#....###...##...####']
353 },
354 Test_find_all{
355 "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.",
356 r".*#[.#]{4}##[.#]{4}##[.#]{4}###",
357 [0, 49],
358 ['#.#......##.#..#..##........##....###...##...####']
359 },
360 Test_find_all{
361 "1234 Aa dddd Aaf 12334 Aa opopo Aaf",
362 r"Aa.+Aaf",
363 [5, 16, 23, 35],
364 ['Aa dddd Aaf', 'Aa opopo Aaf']
365 },
366 Test_find_all{
367 "@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo",
368 r"@for.+@endfor",
369 [0, 22, 23, 50, 63, 80, 89, 117],
370 ['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor']
371 },
372 Test_find_all{
373 "+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++",
374 r"\+{3}.*\+{3}",
375 [0, 11, 18, 32, 33, 44],
376 ['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++']
377 },
378 Test_find_all{
379 "ab",
380 r"[^\n]*",
381 [0, 2],
382 ['ab']
383 },
384 Test_find_all{
385 "ab",
386 r"([^\n]*)",
387 [0, 2],
388 ['ab']
389 },
390 Test_find_all{
391 "ab",
392 r"([^\n]|a)*",
393 [0, 2],
394 ['ab']
395 }
396
397]
398)
399
400struct Test_split {
401 src string
402 q string
403 res []string // ['abc','def',...]
404}
405
406const (
407 split_test_suite = [
408 Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\d+', ['abcd ', ' efgh ', ' ghkl',
409 ' ab', 'df']},
410 Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\a+', [' 1234 ', ' 1234 ', '1234 ',
411 '34546']},
412 Test_split{'oggi pippo è andato a casa di pluto ed ha trovato pippo', r'p[iplut]+o', [
413 'oggi ', ' è andato a casa di ', ' ed ha trovato ']},
414 Test_split{'oggi pibao è andato a casa di pbababao ed ha trovato pibabababao', r'(pi?(ba)+o)', [
415 'oggi ', ' è andato a casa di ', ' ed ha trovato ']},
416 Test_split{'Today is a good day and tomorrow will be for sure.', r'[Tt]o\w+', [
417 ' is a good day and ', ' will be for sure.']},
418 Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://[\w./]+', [
419 'pera\n', '\npippo']},
420 Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://.*' +
421 '\n', ['pera\n', 'pippo']},
422 Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'#[.#]{4}##[.#]{4}##[.#]{4}###', [
423 '#.#......##.#..#..##........#', '##.......#.....#..#......#...#........###.#..#.']},
424 Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'.*#[.#]{4}##[.#]{4}##[.#]{4}###', [
425 '##.......#.....#..#......#...#........###.#..#.']},
426 Test_split{'1234 Aa dddd Aaf 12334 Aa opopo Aaf', r'Aa.+Aaf', ['1234 ', ' 12334 ']},
427 Test_split{'@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo', r'@for.+@endfor', [
428 ' ', ' altro testo ', ' uno due ', ' pippo']},
429 Test_split{'+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++', r'\+{3}.*\+{3}', [
430 '\n elvo ', ' ']},
431 Test_split{'foobar', r'\d', ['foobar']},
432 Test_split{'1234', r'\d+', []},
433 ]
434)
435// vfmt on
436
437fn test_regex() {
438 // check capturing groups
439 for c, to in cgroups_test_suite {
440 // debug print
441 if debug {
442 println('${c} [${to.src}] [q${to.q}] (${to.s}, ${to.e})')
443 }
444
445 mut re := regex.regex_opt(to.q) or {
446 eprintln('err: ${err}')
447 assert false
448 continue
449 }
450
451 if to.cgn.len > 0 {
452 re.group_csave_flag = true
453 // re.group_csave = [-1].repeat(3*20+1)
454 if debug {
455 println('continuous save')
456 }
457 } else {
458 if debug {
459 println('NO continuous save')
460 }
461 }
462
463 start, end := re.match_string(to.src)
464
465 mut tmp_str := ''
466 if start >= 0 && end > start {
467 tmp_str = to.src[start..end]
468 }
469
470 if start != to.s || end != to.e {
471 println('#${c} [${to.src}] q[${to.q}] res[${tmp_str}] base:[${to.s},${to.e}] ${start}, ${end}')
472 eprintln('ERROR!')
473 assert false
474 continue
475 }
476
477 // check cgroups
478 if to.cgn.len > 0 {
479 if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] {
480 eprintln('Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}')
481 assert false
482 continue
483 }
484
485 // check captured groups
486 mut ln := re.group_csave[0] * 3
487 for ln > 0 {
488 if re.group_csave[ln] != to.cg[ln] {
489 eprintln('Capturing group failed on ${ln} item!')
490 assert false
491 }
492 ln--
493 }
494
495 // check named captured groups
496 for k in to.cgn.keys() {
497 if to.cgn[k] != (re.group_map[k] - 1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1
498 eprintln('Named capturing group error! [${k}]')
499 assert false
500 continue
501 }
502 }
503 } else {
504 // check normal captured groups
505 if re.groups.len != to.cg.len {
506 assert false
507 }
508 for ln := 0; ln < re.groups.len; ln++ {
509 if re.groups[ln] != to.cg[ln] {
510 eprintln("Capture group doesn't match:")
511 eprintln('true ground: ${to.cg}')
512 eprintln('elaborated : ${re.groups}')
513 assert false
514 }
515 }
516 }
517 }
518
519 // check find_all
520 for c, to in find_all_test_suite {
521 // debug print
522 if debug {
523 println('#${c} [${to.src}] q[${to.q}] (${to.res}, ${to.res_str})')
524 }
525
526 mut re := regex.regex_opt(to.q) or {
527 eprintln('err: ${err}')
528 assert false
529 continue
530 }
531
532 re.reset()
533 res := re.find_all(to.src)
534 if res != to.res {
535 eprintln('err: find_all !!')
536 if debug {
537 println('#${c} exp: ${to.res} calculated: ${res}')
538 }
539 assert false
540 }
541
542 res_str := re.find_all_str(to.src)
543 if res_str != to.res_str {
544 eprintln('err: find_all_str !!')
545 if debug {
546 println('#${c} exp: ${to.res_str} calculated: ${res_str}')
547 }
548 assert false
549 }
550 }
551
552 // check split
553 for c, to in split_test_suite {
554 // debug print
555 if debug {
556 println('#${c} [${to.src}] q[${to.q}] (${to.res})')
557 }
558
559 mut re := regex.regex_opt(to.q) or {
560 eprintln('err: ${err}')
561 assert false
562 continue
563 }
564
565 re.reset()
566 res := re.split(to.src)
567 if res != to.res {
568 eprintln('err: split !!')
569 if debug {
570 println('#${c} exp: ${to.res} calculated: ${res}')
571 }
572 assert false
573 }
574 }
575
576 // check replace
577 for c, to in match_test_suite_replace {
578 // debug print
579 if debug {
580 println('#${c} [${to.src}] q[${to.q}] ${to.r}')
581 }
582
583 mut re := regex.regex_opt(to.q) or {
584 eprintln('err: ${err}')
585 assert false
586 continue
587 }
588
589 res := re.replace(to.src, to.rep)
590 if res != to.r {
591 eprintln('ERROR: replace.')
592 assert false
593 continue
594 }
595 }
596
597 // check replace simple
598 for c, to in match_test_suite_replace_simple {
599 // debug print
600 if debug {
601 println('#${c} [${to.src}] q[${to.q}] ${to.r}')
602 }
603
604 mut re := regex.regex_opt(to.q) or {
605 eprintln('err: ${err}')
606 assert false
607 continue
608 }
609
610 res := re.replace_simple(to.src, to.rep)
611 if res != to.r {
612 eprintln('ERROR: replace.')
613 assert false
614 continue
615 }
616 }
617
618 // check match and find
619 for c, to in match_test_suite {
620 // debug print
621 if debug {
622 println('#${c} [${to.src}] q[${to.q}] ${to.s} ${to.e}')
623 }
624
625 // test the find
626 if to.s > 0 {
627 mut re := regex.regex_opt(to.q) or {
628 eprintln('err: ${err}')
629 assert false
630 continue
631 }
632 // q_str := re.get_query()
633 // eprintln("Query: $q_str")
634 start, end := re.find(to.src)
635
636 if start != to.s || end != to.e {
637 err_str := re.get_parse_error_string(start)
638 eprintln('ERROR : ${err_str} start: ${start} end: ${end}')
639 assert false
640 } else {
641 // tmp_str := text[start..end]
642 // println("found in [$start, $end] => [$tmp_str]")
643 assert true
644 }
645 continue
646 }
647
648 // test the match
649 mut re := regex.new()
650 // re.debug = true
651
652 re.compile_opt(to.q) or {
653 eprintln('err: ${err}')
654 assert false
655 continue
656 }
657 // println("#$c [$to.src] q[$to.q]")
658 start, end := re.match_string(to.src)
659
660 mut tmp_str := ''
661 if start >= 0 && end > start {
662 tmp_str = to.src[start..end]
663 }
664
665 if start != to.s || end != to.e {
666 eprintln('#${c} [${to.src}] q[${to.q}] res[${tmp_str}] ${start}, ${end}')
667 eprintln('ERROR!')
668 assert false
669 continue
670 }
671
672 // test the match predicate
673 if to.s >= 0 {
674 assert re.matches_string(to.src)
675 } else {
676 assert !re.matches_string(to.src)
677 }
678
679 // rerun to test consistency
680 tmp_str1 := to.src.clone()
681 start1, end1 := re.match_string(tmp_str1)
682 if start1 != start || end1 != end {
683 eprintln('two run ERROR!!')
684 assert false
685 continue
686 }
687 }
688
689 if debug {
690 println('DONE!')
691 }
692}
693
694// test regex_base function
695fn test_regex_func() {
696 query := r'\d\dabcd'
697 test_str := '78abcd'
698 mut re, re_err, err_pos := regex.regex_base(query)
699 if re_err == regex.compile_ok {
700 start, end := re.match_string(test_str)
701 assert start == 0 && end == 6
702 } else {
703 eprintln('Error in query string in pos ${err_pos}')
704 eprintln('Error: ${re.get_parse_error_string(re_err)}')
705 assert false
706 }
707}
708
709fn my_repl_1(re regex.RE, in_txt string, start int, end int) string {
710 s0 := re.get_group_by_id(in_txt, 0)
711 println('[${start}, ${end}] => ${s0}')
712 return 'a' + s0.to_upper()
713}
714
715fn test_regex_func_replace1() {
716 txt := 'abbabbbabbbbaabba'
717 query := r'a(b+)'
718 mut re := regex.regex_opt(query) or { panic(err) }
719 result := re.replace_by_fn(txt, my_repl_1)
720
721 assert result == 'aBBaBBBaBBBBaaBBa'
722}
723
724fn my_repl(re regex.RE, in_txt string, start int, end int) string {
725 s0 := re.get_group_by_id(in_txt, 0)[0..1] + 'X'
726 s1 := re.get_group_by_id(in_txt, 1)[0..1] + 'X'
727 s2 := re.get_group_by_id(in_txt, 2)[0..1] + 'X'
728 return '${s0}${s1}${s2}'
729}
730
731// test regex replace function
732fn test_regex_func_replace() {
733 filler := "E il primo dei tre regni dell'Oltretomba cristiano visitato da Dante nel corso del viaggio, con la guida di Virgilio."
734 txt := r'"content": "They dont necessarily flag "you will be buying these shares on margin!"", "channel_id"'
735 query := r'"(content":\s+")(.*)(, "channel_id")'
736 mut re := regex.regex_opt(query) or { panic(err) }
737
738 mut txt1 := ''
739 mut txt2 := ''
740
741 for _ in 0 .. 3 {
742 rnd := int(10 + rand.u32() % 20)
743 txt1 += txt + filler[0..rnd] + '\n'
744 txt2 += 'cXTX,X' + filler[0..rnd] + '\n'
745 }
746
747 result := re.replace_by_fn(txt1, my_repl)
748 if debug {
749 eprintln(result)
750 eprintln(txt2)
751 }
752 assert result == txt2
753}
754
755fn rest_regex_replace_n() {
756 s := 'dario 1234 pepep 23454 pera'
757 query := r'\d+'
758
759 mut re := regex.regex_opt(query) or { panic(err) }
760
761 assert re.replace_n(s, '[repl]', 0) == 'dario 1234 pepep 23454 pera'
762 assert re.replace_n(s, '[repl]', -1) == 'dario 1234 pepep [repl] pera'
763 assert re.replace_n(s, '[repl]', 1) == 'dario [repl] pepep 23454 pera'
764 assert re.replace_n(s, '[repl]', 2) == 'dario [repl] pepep [repl] pera'
765 assert re.replace_n(s, '[repl]', -2) == 'dario [repl] pepep [repl] pera'
766 assert re.replace_n(s, '[repl]', 3) == 'dario [repl] pepep [repl] pera'
767 assert re.replace_n(s, '[repl]', -3) == 'dario [repl] pepep [repl] pera'
768
769 // mut res := re.replace_n(s, "[repl]", -1)
770 // println("source: ${s}")
771 // println("res : ${res}")
772}
773
774// test quantifier wrong sequences
775const (
776 test_quantifier_sequences_list = [
777 r'+{3}.*+{3}',
778 r'+{3}.*?{3}',
779 r'+{3}.**{3}',
780 r'+{3}.*\+{3}*',
781 r'+{3}.*\+{3}+',
782 r'+{3}.*\+{3}??',
783 r'+{3}.*\+{3}{4}',
784 ]
785)
786
787fn test_quantifier_sequences() {
788 for pattern in test_quantifier_sequences_list {
789 re, re_err, err_pos := regex.regex_base(pattern)
790 if re_err != regex.err_syntax_error {
791 eprintln('pattern: ${pattern} => ${re_err}')
792 }
793 assert re_err == regex.err_syntax_error
794 }
795}
796
797// test group index in find
798struct Test_find_groups {
799 src string
800 q string
801 s int // start index
802 e int // end index
803 res []int // groups indexes
804}
805
806// vfmt off
807const (
808find_groups_test_suite = [
809 Test_find_groups{
810 "aabbbccccdd",
811 r"(b+)(c+)",
812 2,
813 9,
814 [2, 5, 5, 9],
815 },
816 Test_find_groups{
817 "aabbbccccdd",
818 r"(a+).*(c+)",
819 0,
820 9,
821 [0, 2, 5, 9],
822 },
823 Test_find_groups{
824 "aabbbccccdd",
825 r"((b+).*)(d+)",
826 2,
827 11,
828 [2, 9, 2, 5, 9, 11],
829 },
830]
831)
832// vfmt on
833
834fn test_groups_in_find() {
835 for test_obj in find_groups_test_suite {
836 src_text := test_obj.src
837 query := test_obj.q
838 mut re := regex.regex_opt(query) or { panic(err) }
839 start, end := re.find(src_text)
840 // Debug print do not remove!!
841 /*
842 println("---------")
843 println("src_text:[${src_text}]")
844 println("query :[${query}]")
845 println("[${start}, ${end}]")
846 println(re.groups)
847 mut gi := 0
848 for gi < re.groups.len {
849 if re.groups[gi] >= 0 {
850 println('${gi / 2} :[${src_text[re.groups[gi]..re.groups[gi + 1]]}]')
851 }
852 gi += 2
853 }
854 */
855 // check
856 assert start == test_obj.s
857 assert end == test_obj.e
858 assert re.groups == test_obj.res
859 }
860}
861
862const (
863 err_query_list = [
864 r'([a]|[b])*',
865 ]
866)
867
868fn test_errors() {
869 mut count := 0
870 for query in err_query_list {
871 _, err, _ := regex.regex_base(query)
872 if err != regex.compile_ok {
873 count++
874 }
875 }
876 assert count == err_query_list.len
877}
878
879fn test_long_query() {
880 test_len := 32768
881 mut buf := strings.new_builder(test_len * 3)
882 base_string := rand.string(test_len)
883
884 for c in base_string {
885 buf.write_u8(`(`)
886 buf.write_u8(c)
887 buf.write_u8(`)`)
888 }
889
890 mut query := buf.str()
891
892 // println(base_string)
893 // println(buf.str())
894
895 // test 1
896 mut re := regex.regex_opt(query) or { panic(err) }
897 mut start, mut end := re.match_string(base_string)
898 // println("$start, $end")
899 assert start >= 0 && end == base_string.len
900
901 // test 2
902 buf.clear()
903 for c in base_string {
904 buf.write_u8(`(`)
905 buf.write_u8(c)
906 }
907 for _ in 0 .. base_string.len {
908 buf.write_u8(`)`)
909 }
910 query = buf.str()
911 re = regex.regex_opt(query) or { panic(err) }
912 start, end = re.match_string(base_string)
913 // println("$start, $end")
914 assert start >= 0 && end == base_string.len
915}
916
917struct Test_negation_group {
918 src string
919 res bool
920}
921
922const (
923 negation_groups = [
924 Test_negation_group{'automobile', false},
925 Test_negation_group{'botomobile', true},
926 Test_negation_group{'auto_caravan', false},
927 Test_negation_group{'moto_mobile', true},
928 Test_negation_group{'pippole', true},
929 Test_negation_group{'boring test', false},
930 ]
931)
932
933fn test_negation_groups() {
934 mut query := r'(?!auto)\w+le'
935 mut re := regex.regex_opt(query) or { panic(err) }
936 for test in negation_groups {
937 start, end := re.match_string(test.src)
938 assert (start >= 0) == test.res
939 }
940}