Gitly

v / vlib / regex

Raw file | 549 loc (493 sloc) | 13.13 KB | Latest commit hash 42a9eaac0


1 /*
2 regex 1.0 alpha
3 
4 Copyright (c) 2019-2023 Dario Deledda. All rights reserved.
5 Use of this source code is governed by an MIT license
6 that can be found in the LICENSE file.
7 */
8 module regex
9 
10 import strings
11 
12 /******************************************************************************
13 *
14 * Inits
15 *
16 ******************************************************************************/
17 // regex_base returns a regex object (`RE`) generated from `pattern` string and
18 // detailed information in re_err, err_pos, if an error occurred.
19 pub fn regex_base(pattern string) (RE, int, int) {
20     // init regex
21     mut re := RE{}
22     re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
23     re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
24     re.group_csave_flag = false // enable continuos group saving
25     re.group_max_nested = pattern.len >> 1 // set max 128 group nested
26     re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
27 
28     re.group_stack = []int{len: re.group_max, init: -1}
29     re.group_data = []int{len: re.group_max, init: -1}
30 
31     re_err, err_pos := re.impl_compile(pattern)
32     return re, re_err, err_pos
33 }
34 
35 /******************************************************************************
36 *
37 * Utilities
38 *
39 ******************************************************************************/
40 // get_group_bounds_by_name get a group boundaries by its name
41 pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int) {
42     if group_name in re.group_map {
43         tmp_index := re.group_map[group_name] - 1
44         start := re.groups[tmp_index * 2]
45         end := re.groups[tmp_index * 2 + 1]
46         return start, end
47     }
48     return -1, -1
49 }
50 
51 // get_group_by_name get a group boundaries by its name
52 pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
53     if group_name in re.group_map {
54         tmp_index := re.group_map[group_name] - 1
55         start := re.groups[tmp_index * 2]
56         end := re.groups[tmp_index * 2 + 1]
57         if start >= 0 && end > start {
58             return in_txt[start..end]
59         }
60     }
61     return ''
62 }
63 
64 // get_group_by_id get a group string by its id
65 pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
66     if group_id < (re.groups.len >> 1) {
67         index := group_id * 2
68         start := re.groups[index]
69         end := re.groups[index + 1]
70         if start >= 0 && end > start {
71             return in_txt[start..end]
72         }
73     }
74     return ''
75 }
76 
77 // get_group_by_id get a group boundaries by its id
78 pub fn (re RE) get_group_bounds_by_id(group_id int) (int, int) {
79     if group_id < re.group_count {
80         index := group_id * 2
81         return re.groups[index], re.groups[index + 1]
82     }
83     return -1, -1
84 }
85 
86 pub struct Re_group {
87 pub:
88     start int = -1
89     end   int = -1
90 }
91 
92 // get_group_list return a list of Re_group for the found groups
93 pub fn (re RE) get_group_list() []Re_group {
94     mut res := []Re_group{len: re.groups.len >> 1}
95     mut gi := 0
96     // println("len: ${re.groups.len} groups: ${re.groups}")
97 
98     for gi < re.groups.len {
99         if re.groups[gi] >= 0 {
100             txt_st := re.groups[gi]
101             txt_en := re.groups[gi + 1]
102 
103             // println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
104             if txt_st >= 0 && txt_en > txt_st {
105                 tmp := Re_group{
106                     start: re.groups[gi]
107                     end: re.groups[gi + 1]
108                 }
109                 // println(tmp)
110                 res[gi >> 1] = tmp
111             } else {
112                 res[gi >> 1] = Re_group{}
113             }
114         }
115         gi += 2
116     }
117     return res
118 }
119 
120 /******************************************************************************
121 *
122 * Matchers
123 *
124 ******************************************************************************/
125 // match_string Match the pattern with the in_txt string
126 [direct_array_access]
127 pub fn (re &RE) match_string(in_txt string) (int, int) {
128     unsafe {
129         start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
130         if end > in_txt.len {
131             end = in_txt.len
132         }
133 
134         if start >= 0 && end > start {
135             if (re.flag & f_ms) != 0 && start > 0 {
136                 return no_match_found, 0
137             }
138             if (re.flag & f_me) != 0 && end < in_txt.len {
139                 if in_txt[end] in new_line_list {
140                     return start, end
141                 }
142                 return no_match_found, 0
143             }
144             return start, end
145         }
146         return start, end
147     }
148 }
149 
150 // matches_string Checks if the pattern matches the in_txt string
151 pub fn (re &RE) matches_string(in_txt string) bool {
152     start, _ := re.match_string(in_txt)
153     return start != no_match_found
154 }
155 
156 /******************************************************************************
157 *
158 * Finders
159 *
160 ******************************************************************************/
161 /*
162 // find internal implementation HERE for reference do not remove!!
163 [direct_array_access]
164 fn (mut re RE) find_imp(in_txt string) (int,int) {
165     old_flag := re.flag
166     re.flag |= f_src  // enable search mode
167 
168     start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
169     //print("Find [$start,$end] '${in_txt[start..end]}'")
170     if end > in_txt.len {
171         end = in_txt.len
172     }
173     re.flag = old_flag
174 
175     if start >= 0 && end > start {
176         return start, end
177     }
178     return no_match_found, 0
179 }
180 */
181 
182 // find try to find the first match in the input string
183 [direct_array_access]
184 pub fn (mut re RE) find(in_txt string) (int, int) {
185     // old_flag := re.flag
186     // re.flag |= f_src  // enable search mode
187 
188     mut i := 0
189     for i < in_txt.len {
190         mut s := -1
191         mut e := -1
192         unsafe {
193             // tmp_str := tos(in_txt.str + i, in_txt.len - i)
194             // println("Check: [$tmp_str]")
195             s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1)
196 
197             if s >= 0 && e > s {
198                 // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
199                 // re.flag = old_flag
200                 mut gi := 0
201                 for gi < re.groups.len {
202                     re.groups[gi] += i
203                     gi++
204                 }
205                 // when ^ (f_ms) is used, it must match on beginning of string
206                 if (re.flag & f_ms) != 0 && s > 0 {
207                     break
208                 }
209                 // when $ (f_me) is used, it must match on ending of string
210                 if (re.flag & f_me) != 0 && i + e < in_txt.len {
211                     break
212                 }
213                 return i + s, i + e
214             }
215             i++
216         }
217     }
218     // re.flag = old_flag
219     return -1, -1
220 }
221 
222 // find try to find the first match in the input string strarting from start index
223 [direct_array_access]
224 pub fn (mut re RE) find_from(in_txt string, start int) (int, int) {
225     old_flag := re.flag
226     // re.flag |= f_src // enable search mode
227 
228     mut i := start
229     if i < 0 {
230         return -1, -1
231     }
232     for i < in_txt.len {
233         //--- speed references ---
234 
235         mut s := -1
236         mut e := -1
237 
238         unsafe {
239             tmp_str := tos(in_txt.str + i, in_txt.len - i)
240             s, e = re.match_string(tmp_str)
241         }
242         //------------------------
243         // s,e = re.find_imp(in_txt[i..])
244         //------------------------
245         if s >= 0 && e > s {
246             // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
247             re.flag = old_flag
248             mut gi := 0
249             for gi < re.groups.len {
250                 re.groups[gi] += i
251                 gi++
252             }
253             return i + s, i + e
254         } else {
255             i++
256         }
257     }
258     re.flag = old_flag
259     return -1, -1
260 }
261 
262 // find_all find all the non overlapping occurrences of the match pattern and return the start and end index of the match
263 //
264 // Usage:
265 // ```v
266 // blurb := 'foobar boo steelbar toolbox foot tooooot'
267 // mut re := regex.regex_opt('f|t[eo]+')?
268 // res := re.find_all(blurb) // [0, 3, 12, 15, 20, 23, 28, 31, 33, 39]
269 // ```
270 [direct_array_access]
271 pub fn (mut re RE) find_all(in_txt string) []int {
272     // old_flag := re.flag
273     // re.flag |= f_src // enable search mode
274 
275     mut i := 0
276     mut res := []int{}
277 
278     for i < in_txt.len {
279         mut s := -1
280         mut e := -1
281         unsafe {
282             // tmp_str := in_txt[i..]
283             // tmp_str := tos(in_txt.str + i, in_txt.len - i)
284             // println("Check: [$tmp_str]")
285             s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
286 
287             if s >= 0 && e > s {
288                 res << i + s
289                 res << i + e
290                 i += e
291                 continue
292             }
293             /*
294             if e > 0 {
295                 i += e
296                 continue
297             }
298             */
299             i++
300         }
301     }
302     // re.flag = old_flag
303     return res
304 }
305 
306 // split returns the sections of string around the regex
307 //
308 // Usage:
309 // ```v
310 // blurb := 'foobar boo steelbar toolbox foot tooooot'
311 // mut re := regex.regex_opt('f|t[eo]+')?
312 // res := re.split(blurb) // ['bar boo s', 'lbar ', 'lbox ', 't ', 't']
313 // ```
314 pub fn (mut re RE) split(in_txt string) []string {
315     pos := re.find_all(in_txt)
316 
317     mut sections := []string{cap: pos.len / 2 + 1}
318 
319     if pos.len == 0 {
320         return [in_txt]
321     }
322     for i := 0; i < pos.len; i += 2 {
323         if pos[i] == 0 {
324             continue
325         }
326         if i == 0 {
327             sections << in_txt[..pos[i]]
328         } else {
329             sections << in_txt[pos[i - 1]..pos[i]]
330         }
331     }
332     if pos[pos.len - 1] != in_txt.len {
333         sections << in_txt[pos[pos.len - 1]..]
334     }
335     return sections
336 }
337 
338 // find_all_str find all the non overlapping occurrences of the match pattern, return a string list
339 [direct_array_access]
340 pub fn (mut re RE) find_all_str(in_txt string) []string {
341     // old_flag := re.flag
342     // re.flag |= f_src // enable search mode
343 
344     mut i := 0
345     mut res := []string{}
346 
347     for i < in_txt.len {
348         mut s := -1
349         mut e := -1
350         unsafe {
351             // tmp_str := in_txt[i..]
352             // tmp_str := tos(in_txt.str + i, in_txt.len - i)
353             // println("Check: [$tmp_str]")
354             s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
355 
356             if s >= 0 && e > s {
357                 tmp_str := tos(in_txt.str + i, in_txt.len - i)
358                 mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e }
359                 // println("Found: $s:$e [${tmp_str[s..e]}]")
360                 res << tmp_str[..tmp_e]
361                 i += e
362                 continue
363             }
364         }
365         /*
366         if e > 0 {
367             i += e
368             continue
369         }
370         */
371         i++
372     }
373     // re.flag = old_flag
374     return res
375 }
376 
377 /******************************************************************************
378 *
379 * Replacers
380 *
381 ******************************************************************************/
382 // replace_simple return a string where the matches are replaced with the replace string
383 pub fn (mut re RE) replace_simple(in_txt string, repl string) string {
384     pos := re.find_all(in_txt)
385 
386     if pos.len > 0 {
387         mut res := ''
388         mut i := 0
389 
390         mut s1 := 0
391         mut e1 := in_txt.len
392 
393         for i < pos.len {
394             e1 = pos[i]
395             res += in_txt[s1..e1] + repl
396             s1 = pos[i + 1]
397             i += 2
398         }
399 
400         res += in_txt[s1..]
401         return res
402     }
403     return in_txt
404 }
405 
406 // type of function used for custom replace
407 // in_txt  source text
408 // start   index of the start of the match in in_txt
409 // end     index of the end   of the match in in_txt
410 // the match is in in_txt[start..end]
411 pub type FnReplace = fn (re RE, in_txt string, start int, end int) string
412 
413 // replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
414 pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
415     mut i := 0
416     mut res := strings.new_builder(in_txt.len)
417     mut last_end := 0
418 
419     for i < in_txt.len {
420         // println("Find Start. $i [${in_txt[i..]}]")
421         s, e := re.find_from(in_txt, i)
422         // println("Find End.")
423         if s >= 0 && e > s {
424             // println("find match in: ${s},${e} [${in_txt[s..e]}]")
425 
426             if last_end < s {
427                 res.write_string(in_txt[last_end..s])
428             }
429             /*
430             for g_i in 0 .. re.group_count {
431                 re.groups[g_i * 2] += i
432                 re.groups[(g_i * 2) + 1] += i
433             }
434             */
435             repl := repl_fn(re, in_txt, s, e)
436             // println("repl res: $repl")
437             res.write_string(repl)
438             // res.write_string("[[${in_txt[s..e]}]]")
439 
440             last_end = e
441             i = e
442         } else {
443             break
444             // i++
445         }
446         // println(i)
447     }
448     if last_end >= 0 && last_end < in_txt.len {
449         res.write_string(in_txt[last_end..])
450     }
451     return res.str()
452 }
453 
454 fn (re RE) parsed_replace_string(in_txt string, repl string) string {
455     str_lst := repl.split('\\')
456     mut res := str_lst[0]
457     mut i := 1
458     for i < str_lst.len {
459         tmp := str_lst[i]
460         // println("tmp: ${tmp}")
461         if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
462             group_id := int(tmp[0] - `0`)
463             group := re.get_group_by_id(in_txt, group_id)
464             // println("group: $group_id [$group]")
465             res += '${group}${tmp[1..]}'
466         } else {
467             res += '\\' + tmp
468         }
469         i++
470     }
471     return res
472 }
473 
474 // replace return a string where the matches are replaced with the repl_str string,
475 // this function support use groups in the replace string
476 pub fn (mut re RE) replace(in_txt string, repl_str string) string {
477     mut i := 0
478     mut res := strings.new_builder(in_txt.len)
479     mut last_end := 0
480 
481     for i < in_txt.len {
482         // println("Find Start. $i [${in_txt[i..]}]")
483         s, e := re.find_from(in_txt, i)
484         // println("Find End.")
485         if s >= 0 && e > s {
486             // println("find match in: ${s},${e} [${in_txt[s..e]}]")
487 
488             if last_end < s {
489                 res.write_string(in_txt[last_end..s])
490             }
491             /*
492             for g_i in 0 .. re.group_count {
493                 re.groups[g_i * 2] += i
494                 re.groups[(g_i * 2) + 1] += i
495             }
496             */
497             // repl := repl_fn(re, in_txt, s, e)
498             repl := re.parsed_replace_string(in_txt, repl_str)
499             // println("repl res: $repl")
500             res.write_string(repl)
501             // res.write_string("[[${in_txt[s..e]}]]")
502 
503             last_end = e
504             i = e
505         } else {
506             break
507             // i++
508         }
509         // println(i)
510     }
511     if last_end >= 0 && last_end < in_txt.len {
512         res.write_string(in_txt[last_end..])
513     }
514     return res.str()
515 }
516 
517 // replace_n return a string where the firts count matches are replaced with the repl_str string,
518 // if count is > 0 the replace began from the start of the string toward the end
519 // if count is < 0 the replace began from the end of the string toward the start
520 // if count is 0 do nothing
521 pub fn (mut re RE) replace_n(in_txt string, repl_str string, count int) string {
522     mut i := 0
523     mut index := 0
524     mut i_p := 0
525     mut res := strings.new_builder(in_txt.len)
526     mut lst := re.find_all(in_txt)
527 
528     if count < 0 { // start from the right of the string
529         lst = unsafe { lst#[count * 2..] } // limitate the number of substitions
530     } else if count > 0 { // start from the left of the string
531         lst = unsafe { lst#[..count * 2] } // limitate the number of substitions
532     } else if count == 0 { // no replace
533         return in_txt
534     }
535 
536     // println("found: ${lst}")
537     for index < lst.len {
538         i = lst[index]
539         res.write_string(in_txt[i_p..i])
540         res.write_string(repl_str)
541         index++
542         i_p = lst[index]
543         index++
544     }
545     i = i_p
546     res.write_string(in_txt[i..])
547 
548     return res.str()
549 }

1	/*
2	regex 1.0 alpha
3
4	Copyright (c) 2019-2023 Dario Deledda. All rights reserved.
5	Use of this source code is governed by an MIT license
6	that can be found in the LICENSE file.
7	*/
8	module regex
9
10	import strings
11
12	/******************************************************************************
13	*
14	* Inits
15	*
16	******************************************************************************/
17	// regex_base returns a regex object (`RE`) generated from `pattern` string and
18	// detailed information in re_err, err_pos, if an error occurred.
19	pub fn regex_base(pattern string) (RE, int, int) {
20	// init regex
21	mut re := RE{}
22	re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
23	re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
24	re.group_csave_flag = false // enable continuos group saving
25	re.group_max_nested = pattern.len >> 1 // set max 128 group nested
26	re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
27
28	re.group_stack = []int{len: re.group_max, init: -1}
29	re.group_data = []int{len: re.group_max, init: -1}
30
31	re_err, err_pos := re.impl_compile(pattern)
32	return re, re_err, err_pos
33	}
34
35	/******************************************************************************
36	*
37	* Utilities
38	*
39	******************************************************************************/
40	// get_group_bounds_by_name get a group boundaries by its name
41	pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int) {
42	if group_name in re.group_map {
43	tmp_index := re.group_map[group_name] - 1
44	start := re.groups[tmp_index * 2]
45	end := re.groups[tmp_index * 2 + 1]
46	return start, end
47	}
48	return -1, -1
49	}
50
51	// get_group_by_name get a group boundaries by its name
52	pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
53	if group_name in re.group_map {
54	tmp_index := re.group_map[group_name] - 1
55	start := re.groups[tmp_index * 2]
56	end := re.groups[tmp_index * 2 + 1]
57	if start >= 0 && end > start {
58	return in_txt[start..end]
59	}
60	}
61	return ''
62	}
63
64	// get_group_by_id get a group string by its id
65	pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
66	if group_id < (re.groups.len >> 1) {
67	index := group_id * 2
68	start := re.groups[index]
69	end := re.groups[index + 1]
70	if start >= 0 && end > start {
71	return in_txt[start..end]
72	}
73	}
74	return ''
75	}
76
77	// get_group_by_id get a group boundaries by its id
78	pub fn (re RE) get_group_bounds_by_id(group_id int) (int, int) {
79	if group_id < re.group_count {
80	index := group_id * 2
81	return re.groups[index], re.groups[index + 1]
82	}
83	return -1, -1
84	}
85
86	pub struct Re_group {
87	pub:
88	start int = -1
89	end int = -1
90	}
91
92	// get_group_list return a list of Re_group for the found groups
93	pub fn (re RE) get_group_list() []Re_group {
94	mut res := []Re_group{len: re.groups.len >> 1}
95	mut gi := 0
96	// println("len: ${re.groups.len} groups: ${re.groups}")
97
98	for gi < re.groups.len {
99	if re.groups[gi] >= 0 {
100	txt_st := re.groups[gi]
101	txt_en := re.groups[gi + 1]
102
103	// println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
104	if txt_st >= 0 && txt_en > txt_st {
105	tmp := Re_group{
106	start: re.groups[gi]
107	end: re.groups[gi + 1]
108	}
109	// println(tmp)
110	res[gi >> 1] = tmp
111	} else {
112	res[gi >> 1] = Re_group{}
113	}
114	}
115	gi += 2
116	}
117	return res
118	}
119
120	/******************************************************************************
121	*
122	* Matchers
123	*
124	******************************************************************************/
125	// match_string Match the pattern with the in_txt string
126	[direct_array_access]
127	pub fn (re &RE) match_string(in_txt string) (int, int) {
128	unsafe {
129	start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
130	if end > in_txt.len {
131	end = in_txt.len
132	}
133
134	if start >= 0 && end > start {
135	if (re.flag & f_ms) != 0 && start > 0 {
136	return no_match_found, 0
137	}
138	if (re.flag & f_me) != 0 && end < in_txt.len {
139	if in_txt[end] in new_line_list {
140	return start, end
141	}
142	return no_match_found, 0
143	}
144	return start, end
145	}
146	return start, end
147	}
148	}
149
150	// matches_string Checks if the pattern matches the in_txt string
151	pub fn (re &RE) matches_string(in_txt string) bool {
152	start, _ := re.match_string(in_txt)
153	return start != no_match_found
154	}
155
156	/******************************************************************************
157	*
158	* Finders
159	*
160	******************************************************************************/
161	/*
162	// find internal implementation HERE for reference do not remove!!
163	[direct_array_access]
164	fn (mut re RE) find_imp(in_txt string) (int,int) {
165	old_flag := re.flag
166	re.flag \|= f_src // enable search mode
167
168	start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
169	//print("Find [$start,$end] '${in_txt[start..end]}'")
170	if end > in_txt.len {
171	end = in_txt.len
172	}
173	re.flag = old_flag
174
175	if start >= 0 && end > start {
176	return start, end
177	}
178	return no_match_found, 0
179	}
180	*/
181
182	// find try to find the first match in the input string
183	[direct_array_access]
184	pub fn (mut re RE) find(in_txt string) (int, int) {
185	// old_flag := re.flag
186	// re.flag \|= f_src // enable search mode
187
188	mut i := 0
189	for i < in_txt.len {
190	mut s := -1
191	mut e := -1
192	unsafe {
193	// tmp_str := tos(in_txt.str + i, in_txt.len - i)
194	// println("Check: [$tmp_str]")
195	s, e = re.match_base(in_txt.str + i, in_txt.len - i + 1)
196
197	if s >= 0 && e > s {
198	// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
199	// re.flag = old_flag
200	mut gi := 0
201	for gi < re.groups.len {
202	re.groups[gi] += i
203	gi++
204	}
205	// when ^ (f_ms) is used, it must match on beginning of string
206	if (re.flag & f_ms) != 0 && s > 0 {
207	break
208	}
209	// when $ (f_me) is used, it must match on ending of string
210	if (re.flag & f_me) != 0 && i + e < in_txt.len {
211	break
212	}
213	return i + s, i + e
214	}
215	i++
216	}
217	}
218	// re.flag = old_flag
219	return -1, -1
220	}
221
222	// find try to find the first match in the input string strarting from start index
223	[direct_array_access]
224	pub fn (mut re RE) find_from(in_txt string, start int) (int, int) {
225	old_flag := re.flag
226	// re.flag \|= f_src // enable search mode
227
228	mut i := start
229	if i < 0 {
230	return -1, -1
231	}
232	for i < in_txt.len {
233	//--- speed references ---
234
235	mut s := -1
236	mut e := -1
237
238	unsafe {
239	tmp_str := tos(in_txt.str + i, in_txt.len - i)
240	s, e = re.match_string(tmp_str)
241	}
242	//------------------------
243	// s,e = re.find_imp(in_txt[i..])
244	//------------------------
245	if s >= 0 && e > s {
246	// println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
247	re.flag = old_flag
248	mut gi := 0
249	for gi < re.groups.len {
250	re.groups[gi] += i
251	gi++
252	}
253	return i + s, i + e
254	} else {
255	i++
256	}
257	}
258	re.flag = old_flag
259	return -1, -1
260	}
261
262	// find_all find all the non overlapping occurrences of the match pattern and return the start and end index of the match
263	//
264	// Usage:
265	// ```v
266	// blurb := 'foobar boo steelbar toolbox foot tooooot'
267	// mut re := regex.regex_opt('f\|t[eo]+')?
268	// res := re.find_all(blurb) // [0, 3, 12, 15, 20, 23, 28, 31, 33, 39]
269	// ```
270	[direct_array_access]
271	pub fn (mut re RE) find_all(in_txt string) []int {
272	// old_flag := re.flag
273	// re.flag \|= f_src // enable search mode
274
275	mut i := 0
276	mut res := []int{}
277
278	for i < in_txt.len {
279	mut s := -1
280	mut e := -1
281	unsafe {
282	// tmp_str := in_txt[i..]
283	// tmp_str := tos(in_txt.str + i, in_txt.len - i)
284	// println("Check: [$tmp_str]")
285	s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
286
287	if s >= 0 && e > s {
288	res << i + s
289	res << i + e
290	i += e
291	continue
292	}
293	/*
294	if e > 0 {
295	i += e
296	continue
297	}
298	*/
299	i++
300	}
301	}
302	// re.flag = old_flag
303	return res
304	}
305
306	// split returns the sections of string around the regex
307	//
308	// Usage:
309	// ```v
310	// blurb := 'foobar boo steelbar toolbox foot tooooot'
311	// mut re := regex.regex_opt('f\|t[eo]+')?
312	// res := re.split(blurb) // ['bar boo s', 'lbar ', 'lbox ', 't ', 't']
313	// ```
314	pub fn (mut re RE) split(in_txt string) []string {
315	pos := re.find_all(in_txt)
316
317	mut sections := []string{cap: pos.len / 2 + 1}
318
319	if pos.len == 0 {
320	return [in_txt]
321	}
322	for i := 0; i < pos.len; i += 2 {
323	if pos[i] == 0 {
324	continue
325	}
326	if i == 0 {
327	sections << in_txt[..pos[i]]
328	} else {
329	sections << in_txt[pos[i - 1]..pos[i]]
330	}
331	}
332	if pos[pos.len - 1] != in_txt.len {
333	sections << in_txt[pos[pos.len - 1]..]
334	}
335	return sections
336	}
337
338	// find_all_str find all the non overlapping occurrences of the match pattern, return a string list
339	[direct_array_access]
340	pub fn (mut re RE) find_all_str(in_txt string) []string {
341	// old_flag := re.flag
342	// re.flag \|= f_src // enable search mode
343
344	mut i := 0
345	mut res := []string{}
346
347	for i < in_txt.len {
348	mut s := -1
349	mut e := -1
350	unsafe {
351	// tmp_str := in_txt[i..]
352	// tmp_str := tos(in_txt.str + i, in_txt.len - i)
353	// println("Check: [$tmp_str]")
354	s, e = re.match_base(in_txt.str + i, in_txt.len + 1 - i)
355
356	if s >= 0 && e > s {
357	tmp_str := tos(in_txt.str + i, in_txt.len - i)
358	mut tmp_e := if e > tmp_str.len { tmp_str.len } else { e }
359	// println("Found: $s:$e [${tmp_str[s..e]}]")
360	res << tmp_str[..tmp_e]
361	i += e
362	continue
363	}
364	}
365	/*
366	if e > 0 {
367	i += e
368	continue
369	}
370	*/
371	i++
372	}
373	// re.flag = old_flag
374	return res
375	}
376
377	/******************************************************************************
378	*
379	* Replacers
380	*
381	******************************************************************************/
382	// replace_simple return a string where the matches are replaced with the replace string
383	pub fn (mut re RE) replace_simple(in_txt string, repl string) string {
384	pos := re.find_all(in_txt)
385
386	if pos.len > 0 {
387	mut res := ''
388	mut i := 0
389
390	mut s1 := 0
391	mut e1 := in_txt.len
392
393	for i < pos.len {
394	e1 = pos[i]
395	res += in_txt[s1..e1] + repl
396	s1 = pos[i + 1]
397	i += 2
398	}
399
400	res += in_txt[s1..]
401	return res
402	}
403	return in_txt
404	}
405
406	// type of function used for custom replace
407	// in_txt source text
408	// start index of the start of the match in in_txt
409	// end index of the end of the match in in_txt
410	// the match is in in_txt[start..end]
411	pub type FnReplace = fn (re RE, in_txt string, start int, end int) string
412
413	// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
414	pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
415	mut i := 0
416	mut res := strings.new_builder(in_txt.len)
417	mut last_end := 0
418
419	for i < in_txt.len {
420	// println("Find Start. $i [${in_txt[i..]}]")
421	s, e := re.find_from(in_txt, i)
422	// println("Find End.")
423	if s >= 0 && e > s {
424	// println("find match in: ${s},${e} [${in_txt[s..e]}]")
425
426	if last_end < s {
427	res.write_string(in_txt[last_end..s])
428	}
429	/*
430	for g_i in 0 .. re.group_count {
431	re.groups[g_i 2] += i*
432	re.groups[(g_i 2) + 1] += i*
433	}
434	*/
435	repl := repl_fn(re, in_txt, s, e)
436	// println("repl res: $repl")
437	res.write_string(repl)
438	// res.write_string("[[${in_txt[s..e]}]]")
439
440	last_end = e
441	i = e
442	} else {
443	break
444	// i++
445	}
446	// println(i)
447	}
448	if last_end >= 0 && last_end < in_txt.len {
449	res.write_string(in_txt[last_end..])
450	}
451	return res.str()
452	}
453
454	fn (re RE) parsed_replace_string(in_txt string, repl string) string {
455	str_lst := repl.split('\\')
456	mut res := str_lst[0]
457	mut i := 1
458	for i < str_lst.len {
459	tmp := str_lst[i]
460	// println("tmp: ${tmp}")
461	if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
462	group_id := int(tmp[0] - `0`)
463	group := re.get_group_by_id(in_txt, group_id)
464	// println("group: $group_id [$group]")
465	res += '${group}${tmp[1..]}'
466	} else {
467	res += '\\' + tmp
468	}
469	i++
470	}
471	return res
472	}
473
474	// replace return a string where the matches are replaced with the repl_str string,
475	// this function support use groups in the replace string
476	pub fn (mut re RE) replace(in_txt string, repl_str string) string {
477	mut i := 0
478	mut res := strings.new_builder(in_txt.len)
479	mut last_end := 0
480
481	for i < in_txt.len {
482	// println("Find Start. $i [${in_txt[i..]}]")
483	s, e := re.find_from(in_txt, i)
484	// println("Find End.")
485	if s >= 0 && e > s {
486	// println("find match in: ${s},${e} [${in_txt[s..e]}]")
487
488	if last_end < s {
489	res.write_string(in_txt[last_end..s])
490	}
491	/*
492	for g_i in 0 .. re.group_count {
493	re.groups[g_i 2] += i*
494	re.groups[(g_i 2) + 1] += i*
495	}
496	*/
497	// repl := repl_fn(re, in_txt, s, e)
498	repl := re.parsed_replace_string(in_txt, repl_str)
499	// println("repl res: $repl")
500	res.write_string(repl)
501	// res.write_string("[[${in_txt[s..e]}]]")
502
503	last_end = e
504	i = e
505	} else {
506	break
507	// i++
508	}
509	// println(i)
510	}
511	if last_end >= 0 && last_end < in_txt.len {
512	res.write_string(in_txt[last_end..])
513	}
514	return res.str()
515	}
516
517	// replace_n return a string where the firts count matches are replaced with the repl_str string,
518	// if count is > 0 the replace began from the start of the string toward the end
519	// if count is < 0 the replace began from the end of the string toward the start
520	// if count is 0 do nothing
521	pub fn (mut re RE) replace_n(in_txt string, repl_str string, count int) string {
522	mut i := 0
523	mut index := 0
524	mut i_p := 0
525	mut res := strings.new_builder(in_txt.len)
526	mut lst := re.find_all(in_txt)
527
528	if count < 0 { // start from the right of the string
529	lst = unsafe { lst#[count * 2..] } // limitate the number of substitions
530	} else if count > 0 { // start from the left of the string
531	lst = unsafe { lst#[..count * 2] } // limitate the number of substitions
532	} else if count == 0 { // no replace
533	return in_txt
534	}
535
536	// println("found: ${lst}")
537	for index < lst.len {
538	i = lst[index]
539	res.write_string(in_txt[i_p..i])
540	res.write_string(repl_str)
541	index++
542	i_p = lst[index]
543	index++
544	}
545	i = i_p
546	res.write_string(in_txt[i..])
547
548	return res.str()
549	}