1 | import regex |
2 | import rand |
3 | import strings |
4 | |
5 | const debug = true // true for debug println |
6 | |
7 | /****************************************************************************** |
8 | * |
9 | * Test section |
10 | * |
11 | ******************************************************************************/ |
12 | struct TestItem { |
13 | src string |
14 | q string |
15 | s int |
16 | e int |
17 | } |
18 | |
19 | // vfmt off |
20 | const( |
21 | match_test_suite = [ |
22 | // minus in CC |
23 | TestItem{"d.def",r"abc.\.[\w\-]{,100}",-1,0}, |
24 | TestItem{"abc12345.asd",r"abc.\.[\w\-]{,100}",-1,4}, |
25 | TestItem{"abca.exe",r"abc.\.[\w\-]{,100}",0,8}, |
26 | TestItem{"abc2.exe-test_12",r"abc.\.[\w\-]{,100}",0,16}, |
27 | TestItem{"abcdefGHK",r"[a-f]+\A+",0,9}, |
28 | TestItem{"ab-cd-efGHK",r"[a-f\-g]+\A+",0,11}, |
29 | |
30 | // base OR |
31 | TestItem{"a",r"a|b",0,1}, |
32 | TestItem{"a",r"b|a",0,1}, |
33 | TestItem{"b",r"a|b",0,1}, |
34 | TestItem{"b",r"b|a",0,1}, |
35 | TestItem{"c",r"b|a",-1,0}, |
36 | |
37 | // test base |
38 | TestItem{"[ciao]",r"(.)ciao(.)",0,6}, |
39 | TestItem{"[ciao] da me",r"(.)ciao(.)",0,6}, |
40 | |
41 | // positive |
42 | TestItem{"this is a good.",r"this",0,4}, |
43 | TestItem{"this is a good.",r"good",10,14}, |
44 | TestItem{"this is a good.",r"go+d",10,14}, |
45 | TestItem{"this is a good.",r"g[oae]+d",10,14}, |
46 | TestItem{"this is a goed.",r"g[oae]+d",10,14}, |
47 | TestItem{"this is a good.",r"g[oae]*d",10,14}, |
48 | TestItem{"this is a goaezd.",r"g[ea-cm-z]*d",10,16}, |
49 | TestItem{"this is a good.",r"this (\w+) a",0,9}, |
50 | TestItem{"this is a good.",r"this( \w+){2} g",0,11}, |
51 | TestItem{"this is a good.",r"( ?\w+){,1}",0,4}, |
52 | TestItem{"this is a good.",r"( ?\w+)+",0,14}, |
53 | TestItem{"this is a good.",r"this( \w+)+",0,14}, |
54 | TestItem{"this is a good sample.",r"( ?\w+){,2}",0,7}, |
55 | TestItem{"this is a good sample.",r"( ?\w+){,3}",0,9}, |
56 | TestItem{"this is a good sample.",r"( ?\w+){,4}",0,14}, |
57 | TestItem{"this is a good sample.",r"( ?\w+){,5}",0,21}, |
58 | TestItem{"this is a good sample.",r"( ?\w+){2,3}",0,9}, |
59 | TestItem{"this is a good sample.",r"(\s?\w+){2,3}",0,9}, |
60 | TestItem{"this these those.",r"(th[ei]se?\s|\.)+",0,11}, |
61 | TestItem{"this these those ",r"(th[eio]se? ?)+",0,17}, |
62 | TestItem{"this these those ",r"(th[eio]se? )+",0,17}, |
63 | TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17}, |
64 | TestItem{"soday,this,these,those. over",r".+(th[eio]se?[,. ])+",0,23}, |
65 | |
66 | TestItem{"cpapaz",r"(c(pa)+z)",0,6}, |
67 | TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16}, |
68 | TestItem{"this is a cpapapez over",r"(c(p[ae])+z)",10,18}, |
69 | TestItem{"[email protected]",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,17}, |
70 | TestItem{"[email protected], pera",r"[\w]+@([\w]+\.)+\w+",0,18}, |
71 | TestItem{"[email protected] ",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,14}, |
72 | TestItem{"adce aabe",r"(a(ab)+)|(a(dc)+)e",0,4}, |
73 | TestItem{"zadce aabe",r"(a(ab)+)|(a(dc)+)e",1,5}, |
74 | TestItem{"abbz accz addz.",r"c|(d)|e|(ab+)",0,3}, |
75 | TestItem{"this those these ciao",r"((t[hieo]+se?)\s*)+",0,17}, |
76 | TestItem{"this ciao",r"((t[hieo]+se?)\s*)+",0,5}, |
77 | TestItem{"this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}",5,21}, |
78 | TestItem{"1234this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}$",9,25}, |
79 | TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}",5,21}, |
80 | TestItem{"123cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",3,20}, |
81 | |
82 | TestItem{"this is a good sample.",r".*i(\w)+",0,4}, |
83 | TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23}, |
84 | TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29}, |
85 | TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18}, |
86 | TestItem{"cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",0,17}, |
87 | TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18}, |
88 | TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20}, |
89 | TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20}, |
90 | |
91 | TestItem{"cpapaz ole. pippo,",r".*c.+ole.*pi",0,14}, |
92 | TestItem{"cpapaz ole. pipipo,",r".*c.+ole.*p([ip])+o",0,18}, |
93 | TestItem{"cpapaz ole. pipipo",r"^.*c.+ol?e.*p([ip])+o$",0,18}, |
94 | TestItem{"abbb",r"ab{2,3}?",0,3}, |
95 | TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11}, |
96 | TestItem{" abb",r"\s(.*)",0,4}, |
97 | |
98 | TestItem{"/home/us_er/pippo/info-01.txt", r"(/?[-\w_]+)*\.txt$",0,29} |
99 | |
100 | // negative |
101 | TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0}, |
102 | TestItem{"this is a good.",r"thes",-1,2}, |
103 | TestItem{"test1post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",-1,9}, |
104 | TestItem{"this cpapaz adce",r"(c(pa)+z)(\s[\a]+){2}",-1,0}, |
105 | TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, |
106 | TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, |
107 | TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0}, |
108 | |
109 | // check unicode |
110 | TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34}, |
111 | TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23}, |
112 | |
113 | // new edge cases |
114 | TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",-1,8}, |
115 | TestItem{"12345678", r"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",0,8}, |
116 | TestItem{"123456789", r"^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$",0,9} |
117 | TestItem{"12345678", r"^\d{8}$",0,8}, |
118 | TestItem{"12345678", r"^\d{7}$",-1,0}, |
119 | TestItem{"12345678", r"^\d{9}$",-1,8}, |
120 | |
121 | TestItem{"eth", r"(oth)|(eth)",0,3}, |
122 | TestItem{"et", r"(oth)|(eth)",-1,2}, |
123 | TestItem{"et", r".*(oth)|(eth)",-1,2}, |
124 | TestItem{"peoth", r".*(ith)|(eth)",-1,5}, |
125 | |
126 | TestItem{"poth", r"(eth)|(oth)",1,4}, |
127 | TestItem{"poth", r"(oth)|(eth)",1,4}, |
128 | TestItem{"poth", r".(oth)|(eth)$",0,4}, |
129 | TestItem{"poth", r"^.(oth)|(eth)$",0,4}, |
130 | TestItem{"poth", r"^\w+$",0,4}, |
131 | |
132 | // test dot_char |
133 | TestItem{"8-11 l: qllllqllklhlvtl", r"^(\d+)-(\d+) ([a-z]): (.*)$",0,23}, |
134 | TestItem{"accccb deer", r"^a(.*)b d(.+)r",0,11}, |
135 | TestItem{"accccb deer", r"^a(.*)b d(.+)",0,11}, |
136 | TestItem{"accccb deer", r"^(.*)$",0,11}, |
137 | TestItem{"accccb deer", r"^a(.*)b d(.+)p",-1,11}, |
138 | TestItem{"##.#....#.##.####...#.##", r".{18}[.#]",0,19}, |
139 | TestItem{"#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", r'.*#[.#]{4}##[.#]{4}##[.#]{4}###',0,49}, |
140 | |
141 | // test bcksls chars |
142 | TestItem{"[ an s. s! ]( wi4ki:something )", r"\[.*\]\( *(\w*:*\w+) *\)",0,31}, |
143 | TestItem{"[ an s. s! ](wiki:something)", r"\[.*\]\( *(\w*:*\w+) *\)",0,28}, |
144 | TestItem{"p_p", r"\w+",0,3}, |
145 | TestItem{"p_é", r"\w+",0,2}, |
146 | |
147 | // Crazywulf tests (?:^|[()])(\d+)(*)(\d+)(?:$|[()]) |
148 | TestItem{"1*1", r"(\d+)([*])(\d+)",0,3}, |
149 | TestItem{"+1*1", r"^(\d+)([*])(\d+)",-1,0}, |
150 | TestItem{"*1*1", r"(?:^|[*])(\d+)([*])(\d+)",0,4}, |
151 | TestItem{"*1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4}, |
152 | TestItem{")1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4}, |
153 | TestItem{"(1*1", r"(?:^|[*()])(\d+)([*])(\d+)",0,4}, |
154 | TestItem{"*1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",0,5}, |
155 | TestItem{" 1*1(", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0}, |
156 | TestItem{"1*1 ", r"(?:^|[*()])(\d+)([*])(\d+)(?:$|[*()])",-1,0}, |
157 | |
158 | // particular groups |
159 | TestItem{"ababababac", r"ab(.*)(ac)",0,10}, |
160 | |
161 | // backslash on finish string |
162 | TestItem{"a", r"\S+",0,1}, |
163 | TestItem{"aaaa", r"\S+",0,4}, |
164 | TestItem{"aaaa ", r"\S+",0,4}, |
165 | |
166 | // multiple dot char |
167 | TestItem{"aba", r"a*(b*)*a",0,3}, |
168 | TestItem{"/*x*/", r"/\**(.*)\**/",0,5}, |
169 | TestItem{"/*x*/", r"/*(.*)*/",0,5}, |
170 | |
171 | // test last IST check |
172 | TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(.*)",0,26}, |
173 | TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(.*)",0,26}, |
174 | TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(\w*)",0,26}, |
175 | TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(\w*)",0,26}, |
176 | |
177 | // test \S+ vs [^\s]+ |
178 | TestItem{"ab.c", r"\S+\.",0,3}, |
179 | TestItem{"ab.c", r"[^\s]+\.",0,3}, |
180 | TestItem{"ab.c", r"\S*\.",0,3}, |
181 | TestItem{"ab.c", r"[^\s]*\.",0,3}, |
182 | TestItem{"ab c", r"[\S]+\s",0,3}, |
183 | TestItem{"ab c", r"[^\s]+\s",0,3}, |
184 | ] |
185 | ) |
186 | |
187 | struct TestItemRe { |
188 | src string |
189 | q string |
190 | rep string |
191 | r string |
192 | } |
193 | |
194 | const ( |
195 | match_test_suite_replace = [ |
196 | // replace tests |
197 | TestItemRe{ |
198 | "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", |
199 | r"(pi?(ba)+o)", |
200 | "CIAO", |
201 | "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO" |
202 | }, |
203 | TestItemRe{ |
204 | "Today is a good day and tomorrow will be for sure.", |
205 | r"[Tt]o\w+", |
206 | "CIAO", |
207 | "CIAO is a good day and CIAO will be for sure." |
208 | }, |
209 | TestItemRe{ |
210 | "Today is a good day and tomorrow will be for sure.", |
211 | r"(a\w) ", |
212 | r"[\0] ", |
213 | "Tod[ay] is a good d[ay] and tomorrow will be for sure." |
214 | }, |
215 | TestItemRe{ |
216 | "Today is a good day and tomorrow will be for sure.", |
217 | r"(a\w) ", |
218 | r"[\0_\0] ", |
219 | "Tod[ay_ay] is a good d[ay_ay] and tomorrow will be for sure." |
220 | }, |
221 | TestItemRe{ |
222 | "Today is a good day and tomorrow will be for sure.", |
223 | r"(a\w) ", |
224 | r"[\0\1] ", |
225 | "Tod[ay] is a good d[ay] and tomorrow will be for sure." |
226 | }, |
227 | ] |
228 | |
229 | match_test_suite_replace_simple = [ |
230 | // replace tests |
231 | TestItemRe{ |
232 | "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", |
233 | r"(pi?(ba)+o)", |
234 | "CIAO", |
235 | "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO" |
236 | }, |
237 | TestItemRe{ |
238 | "Today is a good day and tomorrow will be for sure.", |
239 | r"[Tt]o\w+", |
240 | "CIAO", |
241 | "CIAO is a good day and CIAO will be for sure." |
242 | }, |
243 | ] |
244 | ) |
245 | |
246 | struct TestItemCGroup { |
247 | src string |
248 | q string |
249 | s int |
250 | e int |
251 | cg []int // [number of items (3*# item), id_group_0, start_0, end_0, id_group_1, start1, start2,... ] |
252 | cgn map[string]int |
253 | } |
254 | |
255 | const ( |
256 | cgroups_test_suite = [ |
257 | TestItemCGroup{ |
258 | "http://www.ciao.mondo/hello/pippo12_/pera.html", |
259 | r"(?P<format>https?)|(?:ftps?)://(?P<token>[\w_]+[\.|/])+",0,42, |
260 | [7, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42], |
261 | {'format':int(0),'token':1} |
262 | }, |
263 | TestItemCGroup{ |
264 | "http://www.ciao.mondo/hello/pippo12_/pera.html", |
265 | r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+",0,46, |
266 | [8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46] |
267 | //[8, 0, 0, 4, 1, 7, 10, 1, 11, 15, 1, 16, 21, 1, 22, 27, 1, 28, 36, 1, 37, 41, 1, 42, 46], |
268 | {'format':int(0),'token':1} |
269 | }, |
270 | TestItemCGroup{ |
271 | "http://www.ciao.mondo/hello/pippo12_/pera.html", |
272 | r"(?P<format>https?)|(?P<format>ftps?)://([\w_]+\.)+",0,16, |
273 | [3, 0, 0, 4, 1, 7, 11, 1, 11, 16], |
274 | {'format':int(0)} |
275 | }, |
276 | TestItemCGroup{ |
277 | "acc +13 pippo", |
278 | r"(\w+)\s(.)([0-9]+) \w+",0,13, |
279 | [0, 3, 4, 5, 5, 7], |
280 | map[string]int{} |
281 | }, |
282 | TestItemCGroup{ |
283 | "acc +13", |
284 | r"(\w+)\s(.)([0-9]+)",0,7, |
285 | [0, 3, 4, 5, 5, 7], |
286 | map[string]int{} |
287 | }, |
288 | TestItemCGroup{ |
289 | "ababababac", |
290 | r"ab(.*)(ac)",0,10, |
291 | [2, 8, 8, 10], |
292 | map[string]int{} |
293 | }, |
294 | ] |
295 | ) |
296 | |
297 | struct Test_find_all { |
298 | src string |
299 | q string |
300 | res []int // [0,4,5,6...] |
301 | res_str []string // ['find0','find1'...] |
302 | } |
303 | |
304 | const ( |
305 | find_all_test_suite = [ |
306 | Test_find_all{ |
307 | "abcd 1234 efgh 1234 ghkl1234 ab34546df", |
308 | r"\d+", |
309 | [5, 9, 15, 19, 24, 28, 31, 36], |
310 | ['1234', '1234', '1234', '34546'] |
311 | }, |
312 | Test_find_all{ |
313 | "abcd 1234 efgh 1234 ghkl1234 ab34546df", |
314 | r"\a+", |
315 | [0, 4, 10, 14, 20, 24, 29, 31, 36, 38], |
316 | ['abcd', 'efgh', 'ghkl', 'ab', 'df'] |
317 | }, |
318 | Test_find_all{ |
319 | "oggi pippo è andato a casa di pluto ed ha trovato pippo", |
320 | r"p[iplut]+o", |
321 | [5, 10, 31, 36, 51, 56], |
322 | ['pippo', 'pluto', 'pippo'] |
323 | }, |
324 | Test_find_all{ |
325 | "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", |
326 | r"(pi?(ba)+o)", |
327 | [5, 10, 31, 39, 54, 65], |
328 | ['pibao', 'pbababao', 'pibabababao'] |
329 | }, |
330 | Test_find_all{ |
331 | "Today is a good day and tomorrow will be for sure.", |
332 | r"[Tt]o\w+", |
333 | [0, 5, 24, 32], |
334 | ['Today', 'tomorrow'] |
335 | }, |
336 | Test_find_all{ |
337 | "pera\nurl = https://github.com/dario/pig.html\npippo", |
338 | r"url *= *https?://[\w./]+", |
339 | [5, 44], |
340 | ['url = https://github.com/dario/pig.html'] |
341 | }, |
342 | Test_find_all{ |
343 | "pera\nurl = https://github.com/dario/pig.html\npippo", |
344 | r"url *= *https?://.*"+'\n', |
345 | [5, 45], |
346 | ['url = https://github.com/dario/pig.html\n'] |
347 | }, |
348 | Test_find_all{ |
349 | "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", |
350 | r"#[.#]{4}##[.#]{4}##[.#]{4}###", |
351 | [29, 49], |
352 | ['#....###...##...####'] |
353 | }, |
354 | Test_find_all{ |
355 | "#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.", |
356 | r".*#[.#]{4}##[.#]{4}##[.#]{4}###", |
357 | [0, 49], |
358 | ['#.#......##.#..#..##........##....###...##...####'] |
359 | }, |
360 | Test_find_all{ |
361 | "1234 Aa dddd Aaf 12334 Aa opopo Aaf", |
362 | r"Aa.+Aaf", |
363 | [5, 16, 23, 35], |
364 | ['Aa dddd Aaf', 'Aa opopo Aaf'] |
365 | }, |
366 | Test_find_all{ |
367 | "@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo", |
368 | r"@for.+@endfor", |
369 | [0, 22, 23, 50, 63, 80, 89, 117], |
370 | ['@for something @endfor', '@for something else @endfor', '@for body @endfor', '@for senza dire più @endfor'] |
371 | }, |
372 | Test_find_all{ |
373 | "+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++", |
374 | r"\+{3}.*\+{3}", |
375 | [0, 11, 18, 32, 33, 44], |
376 | ['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++'] |
377 | }, |
378 | Test_find_all{ |
379 | "ab", |
380 | r"[^\n]*", |
381 | [0, 2], |
382 | ['ab'] |
383 | }, |
384 | Test_find_all{ |
385 | "ab", |
386 | r"([^\n]*)", |
387 | [0, 2], |
388 | ['ab'] |
389 | }, |
390 | Test_find_all{ |
391 | "ab", |
392 | r"([^\n]|a)*", |
393 | [0, 2], |
394 | ['ab'] |
395 | } |
396 | |
397 | ] |
398 | ) |
399 | |
400 | struct Test_split { |
401 | src string |
402 | q string |
403 | res []string // ['abc','def',...] |
404 | } |
405 | |
406 | const ( |
407 | split_test_suite = [ |
408 | Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\d+', ['abcd ', ' efgh ', ' ghkl', |
409 | ' ab', 'df']}, |
410 | Test_split{'abcd 1234 efgh 1234 ghkl1234 ab34546df', r'\a+', [' 1234 ', ' 1234 ', '1234 ', |
411 | '34546']}, |
412 | Test_split{'oggi pippo è andato a casa di pluto ed ha trovato pippo', r'p[iplut]+o', [ |
413 | 'oggi ', ' è andato a casa di ', ' ed ha trovato ']}, |
414 | Test_split{'oggi pibao è andato a casa di pbababao ed ha trovato pibabababao', r'(pi?(ba)+o)', [ |
415 | 'oggi ', ' è andato a casa di ', ' ed ha trovato ']}, |
416 | Test_split{'Today is a good day and tomorrow will be for sure.', r'[Tt]o\w+', [ |
417 | ' is a good day and ', ' will be for sure.']}, |
418 | Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://[\w./]+', [ |
419 | 'pera\n', '\npippo']}, |
420 | Test_split{'pera\nurl = https://github.com/dario/pig.html\npippo', r'url *= *https?://.*' + |
421 | '\n', ['pera\n', 'pippo']}, |
422 | Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'#[.#]{4}##[.#]{4}##[.#]{4}###', [ |
423 | '#.#......##.#..#..##........#', '##.......#.....#..#......#...#........###.#..#.']}, |
424 | Test_split{'#.#......##.#..#..##........##....###...##...######.......#.....#..#......#...#........###.#..#.', r'.*#[.#]{4}##[.#]{4}##[.#]{4}###', [ |
425 | '##.......#.....#..#......#...#........###.#..#.']}, |
426 | Test_split{'1234 Aa dddd Aaf 12334 Aa opopo Aaf', r'Aa.+Aaf', ['1234 ', ' 12334 ']}, |
427 | Test_split{'@for something @endfor @for something else @endfor altro testo @for body @endfor uno due @for senza dire più @endfor pippo', r'@for.+@endfor', [ |
428 | ' ', ' altro testo ', ' uno due ', ' pippo']}, |
429 | Test_split{'+++pippo+++\n elvo +++ pippo2 +++ +++ oggi+++', r'\+{3}.*\+{3}', [ |
430 | '\n elvo ', ' ']}, |
431 | Test_split{'foobar', r'\d', ['foobar']}, |
432 | Test_split{'1234', r'\d+', []}, |
433 | ] |
434 | ) |
435 | // vfmt on |
436 | |
437 | fn test_regex() { |
438 | // check capturing groups |
439 | for c, to in cgroups_test_suite { |
440 | // debug print |
441 | if debug { |
442 | println('${c} [${to.src}] [q${to.q}] (${to.s}, ${to.e})') |
443 | } |
444 | |
445 | mut re := regex.regex_opt(to.q) or { |
446 | eprintln('err: ${err}') |
447 | assert false |
448 | continue |
449 | } |
450 | |
451 | if to.cgn.len > 0 { |
452 | re.group_csave_flag = true |
453 | // re.group_csave = [-1].repeat(3*20+1) |
454 | if debug { |
455 | println('continuous save') |
456 | } |
457 | } else { |
458 | if debug { |
459 | println('NO continuous save') |
460 | } |
461 | } |
462 | |
463 | start, end := re.match_string(to.src) |
464 | |
465 | mut tmp_str := '' |
466 | if start >= 0 && end > start { |
467 | tmp_str = to.src[start..end] |
468 | } |
469 | |
470 | if start != to.s || end != to.e { |
471 | println('#${c} [${to.src}] q[${to.q}] res[${tmp_str}] base:[${to.s},${to.e}] ${start}, ${end}') |
472 | eprintln('ERROR!') |
473 | assert false |
474 | continue |
475 | } |
476 | |
477 | // check cgroups |
478 | if to.cgn.len > 0 { |
479 | if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] { |
480 | eprintln('Capturing group len error! found: ${re.group_csave[0]} true ground: ${to.cg[0]}') |
481 | assert false |
482 | continue |
483 | } |
484 | |
485 | // check captured groups |
486 | mut ln := re.group_csave[0] * 3 |
487 | for ln > 0 { |
488 | if re.group_csave[ln] != to.cg[ln] { |
489 | eprintln('Capturing group failed on ${ln} item!') |
490 | assert false |
491 | } |
492 | ln-- |
493 | } |
494 | |
495 | // check named captured groups |
496 | for k in to.cgn.keys() { |
497 | if to.cgn[k] != (re.group_map[k] - 1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1 |
498 | eprintln('Named capturing group error! [${k}]') |
499 | assert false |
500 | continue |
501 | } |
502 | } |
503 | } else { |
504 | // check normal captured groups |
505 | if re.groups.len != to.cg.len { |
506 | assert false |
507 | } |
508 | for ln := 0; ln < re.groups.len; ln++ { |
509 | if re.groups[ln] != to.cg[ln] { |
510 | eprintln("Capture group doesn't match:") |
511 | eprintln('true ground: ${to.cg}') |
512 | eprintln('elaborated : ${re.groups}') |
513 | assert false |
514 | } |
515 | } |
516 | } |
517 | } |
518 | |
519 | // check find_all |
520 | for c, to in find_all_test_suite { |
521 | // debug print |
522 | if debug { |
523 | println('#${c} [${to.src}] q[${to.q}] (${to.res}, ${to.res_str})') |
524 | } |
525 | |
526 | mut re := regex.regex_opt(to.q) or { |
527 | eprintln('err: ${err}') |
528 | assert false |
529 | continue |
530 | } |
531 | |
532 | re.reset() |
533 | res := re.find_all(to.src) |
534 | if res != to.res { |
535 | eprintln('err: find_all !!') |
536 | if debug { |
537 | println('#${c} exp: ${to.res} calculated: ${res}') |
538 | } |
539 | assert false |
540 | } |
541 | |
542 | res_str := re.find_all_str(to.src) |
543 | if res_str != to.res_str { |
544 | eprintln('err: find_all_str !!') |
545 | if debug { |
546 | println('#${c} exp: ${to.res_str} calculated: ${res_str}') |
547 | } |
548 | assert false |
549 | } |
550 | } |
551 | |
552 | // check split |
553 | for c, to in split_test_suite { |
554 | // debug print |
555 | if debug { |
556 | println('#${c} [${to.src}] q[${to.q}] (${to.res})') |
557 | } |
558 | |
559 | mut re := regex.regex_opt(to.q) or { |
560 | eprintln('err: ${err}') |
561 | assert false |
562 | continue |
563 | } |
564 | |
565 | re.reset() |
566 | res := re.split(to.src) |
567 | if res != to.res { |
568 | eprintln('err: split !!') |
569 | if debug { |
570 | println('#${c} exp: ${to.res} calculated: ${res}') |
571 | } |
572 | assert false |
573 | } |
574 | } |
575 | |
576 | // check replace |
577 | for c, to in match_test_suite_replace { |
578 | // debug print |
579 | if debug { |
580 | println('#${c} [${to.src}] q[${to.q}] ${to.r}') |
581 | } |
582 | |
583 | mut re := regex.regex_opt(to.q) or { |
584 | eprintln('err: ${err}') |
585 | assert false |
586 | continue |
587 | } |
588 | |
589 | res := re.replace(to.src, to.rep) |
590 | if res != to.r { |
591 | eprintln('ERROR: replace.') |
592 | assert false |
593 | continue |
594 | } |
595 | } |
596 | |
597 | // check replace simple |
598 | for c, to in match_test_suite_replace_simple { |
599 | // debug print |
600 | if debug { |
601 | println('#${c} [${to.src}] q[${to.q}] ${to.r}') |
602 | } |
603 | |
604 | mut re := regex.regex_opt(to.q) or { |
605 | eprintln('err: ${err}') |
606 | assert false |
607 | continue |
608 | } |
609 | |
610 | res := re.replace_simple(to.src, to.rep) |
611 | if res != to.r { |
612 | eprintln('ERROR: replace.') |
613 | assert false |
614 | continue |
615 | } |
616 | } |
617 | |
618 | // check match and find |
619 | for c, to in match_test_suite { |
620 | // debug print |
621 | if debug { |
622 | println('#${c} [${to.src}] q[${to.q}] ${to.s} ${to.e}') |
623 | } |
624 | |
625 | // test the find |
626 | if to.s > 0 { |
627 | mut re := regex.regex_opt(to.q) or { |
628 | eprintln('err: ${err}') |
629 | assert false |
630 | continue |
631 | } |
632 | // q_str := re.get_query() |
633 | // eprintln("Query: $q_str") |
634 | start, end := re.find(to.src) |
635 | |
636 | if start != to.s || end != to.e { |
637 | err_str := re.get_parse_error_string(start) |
638 | eprintln('ERROR : ${err_str} start: ${start} end: ${end}') |
639 | assert false |
640 | } else { |
641 | // tmp_str := text[start..end] |
642 | // println("found in [$start, $end] => [$tmp_str]") |
643 | assert true |
644 | } |
645 | continue |
646 | } |
647 | |
648 | // test the match |
649 | mut re := regex.new() |
650 | // re.debug = true |
651 | |
652 | re.compile_opt(to.q) or { |
653 | eprintln('err: ${err}') |
654 | assert false |
655 | continue |
656 | } |
657 | // println("#$c [$to.src] q[$to.q]") |
658 | start, end := re.match_string(to.src) |
659 | |
660 | mut tmp_str := '' |
661 | if start >= 0 && end > start { |
662 | tmp_str = to.src[start..end] |
663 | } |
664 | |
665 | if start != to.s || end != to.e { |
666 | eprintln('#${c} [${to.src}] q[${to.q}] res[${tmp_str}] ${start}, ${end}') |
667 | eprintln('ERROR!') |
668 | assert false |
669 | continue |
670 | } |
671 | |
672 | // test the match predicate |
673 | if to.s >= 0 { |
674 | assert re.matches_string(to.src) |
675 | } else { |
676 | assert !re.matches_string(to.src) |
677 | } |
678 | |
679 | // rerun to test consistency |
680 | tmp_str1 := to.src.clone() |
681 | start1, end1 := re.match_string(tmp_str1) |
682 | if start1 != start || end1 != end { |
683 | eprintln('two run ERROR!!') |
684 | assert false |
685 | continue |
686 | } |
687 | } |
688 | |
689 | if debug { |
690 | println('DONE!') |
691 | } |
692 | } |
693 | |
694 | // test regex_base function |
695 | fn test_regex_func() { |
696 | query := r'\d\dabcd' |
697 | test_str := '78abcd' |
698 | mut re, re_err, err_pos := regex.regex_base(query) |
699 | if re_err == regex.compile_ok { |
700 | start, end := re.match_string(test_str) |
701 | assert start == 0 && end == 6 |
702 | } else { |
703 | eprintln('Error in query string in pos ${err_pos}') |
704 | eprintln('Error: ${re.get_parse_error_string(re_err)}') |
705 | assert false |
706 | } |
707 | } |
708 | |
709 | fn my_repl_1(re regex.RE, in_txt string, start int, end int) string { |
710 | s0 := re.get_group_by_id(in_txt, 0) |
711 | println('[${start}, ${end}] => ${s0}') |
712 | return 'a' + s0.to_upper() |
713 | } |
714 | |
715 | fn test_regex_func_replace1() { |
716 | txt := 'abbabbbabbbbaabba' |
717 | query := r'a(b+)' |
718 | mut re := regex.regex_opt(query) or { panic(err) } |
719 | result := re.replace_by_fn(txt, my_repl_1) |
720 | |
721 | assert result == 'aBBaBBBaBBBBaaBBa' |
722 | } |
723 | |
724 | fn my_repl(re regex.RE, in_txt string, start int, end int) string { |
725 | s0 := re.get_group_by_id(in_txt, 0)[0..1] + 'X' |
726 | s1 := re.get_group_by_id(in_txt, 1)[0..1] + 'X' |
727 | s2 := re.get_group_by_id(in_txt, 2)[0..1] + 'X' |
728 | return '${s0}${s1}${s2}' |
729 | } |
730 | |
731 | // test regex replace function |
732 | fn test_regex_func_replace() { |
733 | filler := "E il primo dei tre regni dell'Oltretomba cristiano visitato da Dante nel corso del viaggio, con la guida di Virgilio." |
734 | txt := r'"content": "They dont necessarily flag "you will be buying these shares on margin!"", "channel_id"' |
735 | query := r'"(content":\s+")(.*)(, "channel_id")' |
736 | mut re := regex.regex_opt(query) or { panic(err) } |
737 | |
738 | mut txt1 := '' |
739 | mut txt2 := '' |
740 | |
741 | for _ in 0 .. 3 { |
742 | rnd := int(10 + rand.u32() % 20) |
743 | txt1 += txt + filler[0..rnd] + '\n' |
744 | txt2 += 'cXTX,X' + filler[0..rnd] + '\n' |
745 | } |
746 | |
747 | result := re.replace_by_fn(txt1, my_repl) |
748 | if debug { |
749 | eprintln(result) |
750 | eprintln(txt2) |
751 | } |
752 | assert result == txt2 |
753 | } |
754 | |
755 | fn rest_regex_replace_n() { |
756 | s := 'dario 1234 pepep 23454 pera' |
757 | query := r'\d+' |
758 | |
759 | mut re := regex.regex_opt(query) or { panic(err) } |
760 | |
761 | assert re.replace_n(s, '[repl]', 0) == 'dario 1234 pepep 23454 pera' |
762 | assert re.replace_n(s, '[repl]', -1) == 'dario 1234 pepep [repl] pera' |
763 | assert re.replace_n(s, '[repl]', 1) == 'dario [repl] pepep 23454 pera' |
764 | assert re.replace_n(s, '[repl]', 2) == 'dario [repl] pepep [repl] pera' |
765 | assert re.replace_n(s, '[repl]', -2) == 'dario [repl] pepep [repl] pera' |
766 | assert re.replace_n(s, '[repl]', 3) == 'dario [repl] pepep [repl] pera' |
767 | assert re.replace_n(s, '[repl]', -3) == 'dario [repl] pepep [repl] pera' |
768 | |
769 | // mut res := re.replace_n(s, "[repl]", -1) |
770 | // println("source: ${s}") |
771 | // println("res : ${res}") |
772 | } |
773 | |
774 | // test quantifier wrong sequences |
775 | const ( |
776 | test_quantifier_sequences_list = [ |
777 | r'+{3}.*+{3}', |
778 | r'+{3}.*?{3}', |
779 | r'+{3}.**{3}', |
780 | r'+{3}.*\+{3}*', |
781 | r'+{3}.*\+{3}+', |
782 | r'+{3}.*\+{3}??', |
783 | r'+{3}.*\+{3}{4}', |
784 | ] |
785 | ) |
786 | |
787 | fn test_quantifier_sequences() { |
788 | for pattern in test_quantifier_sequences_list { |
789 | re, re_err, err_pos := regex.regex_base(pattern) |
790 | if re_err != regex.err_syntax_error { |
791 | eprintln('pattern: ${pattern} => ${re_err}') |
792 | } |
793 | assert re_err == regex.err_syntax_error |
794 | } |
795 | } |
796 | |
797 | // test group index in find |
798 | struct Test_find_groups { |
799 | src string |
800 | q string |
801 | s int // start index |
802 | e int // end index |
803 | res []int // groups indexes |
804 | } |
805 | |
806 | // vfmt off |
807 | const ( |
808 | find_groups_test_suite = [ |
809 | Test_find_groups{ |
810 | "aabbbccccdd", |
811 | r"(b+)(c+)", |
812 | 2, |
813 | 9, |
814 | [2, 5, 5, 9], |
815 | }, |
816 | Test_find_groups{ |
817 | "aabbbccccdd", |
818 | r"(a+).*(c+)", |
819 | 0, |
820 | 9, |
821 | [0, 2, 5, 9], |
822 | }, |
823 | Test_find_groups{ |
824 | "aabbbccccdd", |
825 | r"((b+).*)(d+)", |
826 | 2, |
827 | 11, |
828 | [2, 9, 2, 5, 9, 11], |
829 | }, |
830 | ] |
831 | ) |
832 | // vfmt on |
833 | |
834 | fn test_groups_in_find() { |
835 | for test_obj in find_groups_test_suite { |
836 | src_text := test_obj.src |
837 | query := test_obj.q |
838 | mut re := regex.regex_opt(query) or { panic(err) } |
839 | start, end := re.find(src_text) |
840 | // Debug print do not remove!! |
841 | /* |
842 | println("---------") |
843 | println("src_text:[${src_text}]") |
844 | println("query :[${query}]") |
845 | println("[${start}, ${end}]") |
846 | println(re.groups) |
847 | mut gi := 0 |
848 | for gi < re.groups.len { |
849 | if re.groups[gi] >= 0 { |
850 | println('${gi / 2} :[${src_text[re.groups[gi]..re.groups[gi + 1]]}]') |
851 | } |
852 | gi += 2 |
853 | } |
854 | */ |
855 | // check |
856 | assert start == test_obj.s |
857 | assert end == test_obj.e |
858 | assert re.groups == test_obj.res |
859 | } |
860 | } |
861 | |
862 | const ( |
863 | err_query_list = [ |
864 | r'([a]|[b])*', |
865 | ] |
866 | ) |
867 | |
868 | fn test_errors() { |
869 | mut count := 0 |
870 | for query in err_query_list { |
871 | _, err, _ := regex.regex_base(query) |
872 | if err != regex.compile_ok { |
873 | count++ |
874 | } |
875 | } |
876 | assert count == err_query_list.len |
877 | } |
878 | |
879 | fn test_long_query() { |
880 | test_len := 32768 |
881 | mut buf := strings.new_builder(test_len * 3) |
882 | base_string := rand.string(test_len) |
883 | |
884 | for c in base_string { |
885 | buf.write_u8(`(`) |
886 | buf.write_u8(c) |
887 | buf.write_u8(`)`) |
888 | } |
889 | |
890 | mut query := buf.str() |
891 | |
892 | // println(base_string) |
893 | // println(buf.str()) |
894 | |
895 | // test 1 |
896 | mut re := regex.regex_opt(query) or { panic(err) } |
897 | mut start, mut end := re.match_string(base_string) |
898 | // println("$start, $end") |
899 | assert start >= 0 && end == base_string.len |
900 | |
901 | // test 2 |
902 | buf.clear() |
903 | for c in base_string { |
904 | buf.write_u8(`(`) |
905 | buf.write_u8(c) |
906 | } |
907 | for _ in 0 .. base_string.len { |
908 | buf.write_u8(`)`) |
909 | } |
910 | query = buf.str() |
911 | re = regex.regex_opt(query) or { panic(err) } |
912 | start, end = re.match_string(base_string) |
913 | // println("$start, $end") |
914 | assert start >= 0 && end == base_string.len |
915 | } |
916 | |
917 | struct Test_negation_group { |
918 | src string |
919 | res bool |
920 | } |
921 | |
922 | const ( |
923 | negation_groups = [ |
924 | Test_negation_group{'automobile', false}, |
925 | Test_negation_group{'botomobile', true}, |
926 | Test_negation_group{'auto_caravan', false}, |
927 | Test_negation_group{'moto_mobile', true}, |
928 | Test_negation_group{'pippole', true}, |
929 | Test_negation_group{'boring test', false}, |
930 | ] |
931 | ) |
932 | |
933 | fn test_negation_groups() { |
934 | mut query := r'(?!auto)\w+le' |
935 | mut re := regex.regex_opt(query) or { panic(err) } |
936 | for test in negation_groups { |
937 | start, end := re.match_string(test.src) |
938 | assert (start >= 0) == test.res |
939 | } |
940 | } |