v / vlib / builtin
Raw file | 2359 loc (2185 sloc) | 55.54 KB | Latest commit hash 17d65db82
1// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4module builtin
5
6import strconv
7
8/*
9Note: A V string should be/is immutable from the point of view of
10 V user programs after it is first created. A V string is
11 also slightly larger than the equivalent C string because
12 the V string also has an integer length attached.
13
14 This tradeoff is made, since V strings are created just *once*,
15 but potentially used *many times* over their lifetime.
16
17 The V string implementation uses a struct, that has a .str field,
18 which points to a C style 0 terminated memory block. Although not
19 strictly necessary from the V point of view, that additional 0
20 is *very useful for C interoperability*.
21
22 The V string implementation also has an integer .len field,
23 containing the length of the .str field, excluding the
24 terminating 0 (just like the C's strlen(s) would do).
25
26 The 0 ending of .str, and the .len field, mean that in practice:
27 a) a V string s can be used very easily, wherever a
28 C string is needed, just by passing s.str,
29 without a need for further conversion/copying.
30
31 b) where strlen(s) is needed, you can just pass s.len,
32 without having to constantly recompute the length of s
33 *over and over again* like some C programs do. This is because
34 V strings are immutable and so their length does not change.
35
36 Ordinary V code *does not need* to be concerned with the
37 additional 0 in the .str field. The 0 *must* be put there by the
38 low level string creating functions inside this module.
39
40 Failing to do this will lead to programs that work most of the
41 time, when used with pure V functions, but fail in strange ways,
42 when used with modules using C functions (for example os and so on).
43*/
44pub struct string {
45pub:
46 str &u8 = 0 // points to a C style 0 terminated string of bytes.
47 len int // the length of the .str field, excluding the ending 0 byte. It is always equal to strlen(.str).
48 // NB string.is_lit is an enumeration of the following:
49 // .is_lit == 0 => a fresh string, should be freed by autofree
50 // .is_lit == 1 => a literal string from .rodata, should NOT be freed
51 // .is_lit == -98761234 => already freed string, protects against double frees.
52 // ---------> ^^^^^^^^^ calling free on these is a bug.
53 // Any other value means that the string has been corrupted.
54mut:
55 is_lit int
56}
57
58// runes returns an array of all the utf runes in the string `s`
59// which is useful if you want random access to them
60[direct_array_access]
61pub fn (s string) runes() []rune {
62 mut runes := []rune{cap: s.len}
63 for i := 0; i < s.len; i++ {
64 char_len := utf8_char_len(unsafe { s.str[i] })
65 if char_len > 1 {
66 end := if s.len - 1 >= i + char_len { i + char_len } else { s.len }
67 mut r := unsafe { s[i..end] }
68 runes << r.utf32_code()
69 i += char_len - 1
70 } else {
71 runes << unsafe { s.str[i] }
72 }
73 }
74 return runes
75}
76
77// cstring_to_vstring creates a new V string copy of the C style string,
78// pointed by `s`. This function is most likely what you want to use when
79// working with C style pointers to 0 terminated strings (i.e. `char*`).
80// It is recomended to use it, unless you *do* understand the implications of
81// tos/tos2/tos3/tos4/tos5 in terms of memory management and interactions with
82// -autofree and `[manualfree]`.
83// It will panic, if the pointer `s` is 0.
84[unsafe]
85pub fn cstring_to_vstring(s &char) string {
86 return unsafe { tos2(&u8(s)) }.clone()
87}
88
89// tos_clone creates a new V string copy of the C style string, pointed by `s`.
90// See also cstring_to_vstring (it is the same as it, the only difference is,
91// that tos_clone expects `&byte`, while cstring_to_vstring expects &char).
92// It will panic, if the pointer `s` is 0.
93[unsafe]
94pub fn tos_clone(s &u8) string {
95 return unsafe { tos2(s) }.clone()
96}
97
98// tos creates a V string, given a C style pointer to a 0 terminated block.
99// Note: the memory block pointed by s is *reused, not copied*!
100// It will panic, when the pointer `s` is 0.
101// See also `tos_clone`.
102[unsafe]
103pub fn tos(s &u8, len int) string {
104 if s == 0 {
105 panic('tos(): nil string')
106 }
107 return string{
108 str: unsafe { s }
109 len: len
110 }
111}
112
113// tos2 creates a V string, given a C style pointer to a 0 terminated block.
114// Note: the memory block pointed by s is *reused, not copied*!
115// It will calculate the length first, thus it is more costly than `tos`.
116// It will panic, when the pointer `s` is 0.
117// It is the same as `tos3`, but for &byte pointers, avoiding callsite casts.
118// See also `tos_clone`.
119[unsafe]
120pub fn tos2(s &u8) string {
121 if s == 0 {
122 panic('tos2: nil string')
123 }
124 return string{
125 str: unsafe { s }
126 len: unsafe { vstrlen(s) }
127 }
128}
129
130// tos3 creates a V string, given a C style pointer to a 0 terminated block.
131// Note: the memory block pointed by s is *reused, not copied*!
132// It will calculate the length first, so it is more costly than tos.
133// It will panic, when the pointer `s` is 0.
134// It is the same as `tos2`, but for &char pointers, avoiding callsite casts.
135// See also `tos_clone`.
136[unsafe]
137pub fn tos3(s &char) string {
138 if s == 0 {
139 panic('tos3: nil string')
140 }
141 return string{
142 str: unsafe { &u8(s) }
143 len: unsafe { vstrlen_char(s) }
144 }
145}
146
147// tos4 creates a V string, given a C style pointer to a 0 terminated block.
148// Note: the memory block pointed by s is *reused, not copied*!
149// It will calculate the length first, so it is more costly than tos.
150// It returns '', when given a 0 pointer `s`, it does NOT panic.
151// It is the same as `tos5`, but for &byte pointers, avoiding callsite casts.
152// See also `tos_clone`.
153[unsafe]
154pub fn tos4(s &u8) string {
155 if s == 0 {
156 return ''
157 }
158 return string{
159 str: unsafe { s }
160 len: unsafe { vstrlen(s) }
161 }
162}
163
164// tos5 creates a V string, given a C style pointer to a 0 terminated block.
165// Note: the memory block pointed by s is *reused, not copied*!
166// It will calculate the length first, so it is more costly than tos.
167// It returns '', when given a 0 pointer `s`, it does NOT panic.
168// It is the same as `tos4`, but for &char pointers, avoiding callsite casts.
169// See also `tos_clone`.
170[unsafe]
171pub fn tos5(s &char) string {
172 if s == 0 {
173 return ''
174 }
175 return string{
176 str: unsafe { &u8(s) }
177 len: unsafe { vstrlen_char(s) }
178 }
179}
180
181// vstring converts a C style string to a V string.
182// Note: the memory block pointed by `bp` is *reused, not copied*!
183// Note: instead of `&u8(arr.data).vstring()`, do use `tos_clone(&u8(arr.data))`.
184// Strings returned from this function will be normal V strings beside that,
185// (i.e. they would be freed by V's -autofree mechanism, when they are no longer used).
186// See also `tos_clone`.
187[unsafe]
188pub fn (bp &u8) vstring() string {
189 return string{
190 str: unsafe { bp }
191 len: unsafe { vstrlen(bp) }
192 }
193}
194
195// vstring_with_len converts a C style 0 terminated string to a V string.
196// Note: the memory block pointed by `bp` is *reused, not copied*!
197// This method has lower overhead compared to .vstring(), since it
198// does not need to calculate the length of the 0 terminated string.
199// See also `tos_clone`.
200[unsafe]
201pub fn (bp &u8) vstring_with_len(len int) string {
202 return string{
203 str: unsafe { bp }
204 len: len
205 is_lit: 0
206 }
207}
208
209// vstring converts a C style string to a V string.
210// Note: the memory block pointed by `bp` is *reused, not copied*!
211// Strings returned from this function will be normal V strings beside that,
212// (i.e. they would be freed by V's -autofree mechanism, when they are
213// no longer used).
214// Note: instead of `&u8(a.data).vstring()`, use `tos_clone(&u8(a.data))`.
215// See also `tos_clone`.
216[unsafe]
217pub fn (cp &char) vstring() string {
218 return string{
219 str: &u8(cp)
220 len: unsafe { vstrlen_char(cp) }
221 is_lit: 0
222 }
223}
224
225// vstring_with_len converts a C style 0 terminated string to a V string.
226// Note: the memory block pointed by `bp` is *reused, not copied*!
227// This method has lower overhead compared to .vstring(), since it
228// does not calculate the length of the 0 terminated string.
229// See also `tos_clone`.
230[unsafe]
231pub fn (cp &char) vstring_with_len(len int) string {
232 return string{
233 str: &u8(cp)
234 len: len
235 is_lit: 0
236 }
237}
238
239// vstring_literal converts a C style string to a V string.
240// Note: the memory block pointed by `bp` is *reused, not copied*!
241// NB2: unlike vstring, vstring_literal will mark the string
242// as a literal, so it will not be freed by -autofree.
243// This is suitable for readonly strings, C string literals etc,
244// that can be read by the V program, but that should not be
245// managed/freed by it, for example `os.args` is implemented using it.
246// See also `tos_clone`.
247[unsafe]
248pub fn (bp &u8) vstring_literal() string {
249 return string{
250 str: unsafe { bp }
251 len: unsafe { vstrlen(bp) }
252 is_lit: 1
253 }
254}
255
256// vstring_with_len converts a C style string to a V string.
257// Note: the memory block pointed by `bp` is *reused, not copied*!
258// This method has lower overhead compared to .vstring_literal(), since it
259// does not need to calculate the length of the 0 terminated string.
260// See also `tos_clone`.
261[unsafe]
262pub fn (bp &u8) vstring_literal_with_len(len int) string {
263 return string{
264 str: unsafe { bp }
265 len: len
266 is_lit: 1
267 }
268}
269
270// vstring_literal converts a C style string char* pointer to a V string.
271// Note: the memory block pointed by `bp` is *reused, not copied*!
272// See also `byteptr.vstring_literal` for more details.
273// See also `tos_clone`.
274[unsafe]
275pub fn (cp &char) vstring_literal() string {
276 return string{
277 str: &u8(cp)
278 len: unsafe { vstrlen_char(cp) }
279 is_lit: 1
280 }
281}
282
283// vstring_literal_with_len converts a C style string char* pointer,
284// to a V string.
285// Note: the memory block pointed by `bp` is *reused, not copied*!
286// This method has lower overhead compared to .vstring_literal(), since it
287// does not need to calculate the length of the 0 terminated string.
288// See also `tos_clone`.
289[unsafe]
290pub fn (cp &char) vstring_literal_with_len(len int) string {
291 return string{
292 str: &u8(cp)
293 len: len
294 is_lit: 1
295 }
296}
297
298// len_utf8 returns the number of runes contained in the string `s`.
299pub fn (s string) len_utf8() int {
300 mut l := 0
301 mut i := 0
302 for i < s.len {
303 l++
304 i += ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1
305 }
306 return l
307}
308
309// clone_static returns an independent copy of a given array.
310// It should be used only in -autofree generated code.
311[inline]
312fn (a string) clone_static() string {
313 return a.clone()
314}
315
316// clone returns a copy of the V string `a`.
317pub fn (a string) clone() string {
318 if a.len == 0 {
319 return ''
320 }
321 mut b := string{
322 str: unsafe { malloc_noscan(a.len + 1) }
323 len: a.len
324 }
325 unsafe {
326 vmemcpy(b.str, a.str, a.len)
327 b.str[a.len] = 0
328 }
329 return b
330}
331
332// replace_once replaces the first occurence of `rep` with the string passed in `with`.
333pub fn (s string) replace_once(rep string, with string) string {
334 idx := s.index_(rep)
335 if idx == -1 {
336 return s.clone()
337 }
338 return s.substr(0, idx) + with + s.substr(idx + rep.len, s.len)
339}
340
341// replace replaces all occurences of `rep` with the string passed in `with`.
342[direct_array_access]
343pub fn (s string) replace(rep string, with string) string {
344 if s.len == 0 || rep.len == 0 || rep.len > s.len {
345 return s.clone()
346 }
347 if !s.contains(rep) {
348 return s.clone()
349 }
350 // TODO PERF Allocating ints is expensive. Should be a stack array
351 // Get locations of all reps within this string
352 mut idxs := []int{cap: s.len / rep.len}
353 defer {
354 unsafe { idxs.free() }
355 }
356 mut idx := 0
357 for {
358 idx = s.index_after(rep, idx)
359 if idx == -1 {
360 break
361 }
362 idxs << idx
363 idx += rep.len
364 }
365 // Dont change the string if there's nothing to replace
366 if idxs.len == 0 {
367 return s.clone()
368 }
369 // Now we know the number of replacements we need to do and we can calc the len of the new string
370 new_len := s.len + idxs.len * (with.len - rep.len)
371 mut b := unsafe { malloc_noscan(new_len + 1) } // add space for the null byte at the end
372 // Fill the new string
373 mut b_i := 0
374 mut s_idx := 0
375 for _, rep_pos in idxs {
376 for i in s_idx .. rep_pos { // copy everything up to piece being replaced
377 unsafe {
378 b[b_i] = s[i]
379 }
380 b_i++
381 }
382 s_idx = rep_pos + rep.len // move string index past replacement
383 for i in 0 .. with.len { // copy replacement piece
384 unsafe {
385 b[b_i] = with[i]
386 }
387 b_i++
388 }
389 }
390 if s_idx < s.len { // if any original after last replacement, copy it
391 for i in s_idx .. s.len {
392 unsafe {
393 b[b_i] = s[i]
394 }
395 b_i++
396 }
397 }
398 unsafe {
399 b[new_len] = 0
400 return tos(b, new_len)
401 }
402}
403
404struct RepIndex {
405 idx int
406 val_idx int
407}
408
409// replace_each replaces all occurences of the string pairs given in `vals`.
410// Example: assert 'ABCD'.replace_each(['B','C/','C','D','D','C']) == 'AC/DC'
411[direct_array_access]
412pub fn (s string) replace_each(vals []string) string {
413 if s.len == 0 || vals.len == 0 {
414 return s.clone()
415 }
416 if vals.len % 2 != 0 {
417 eprintln('string.replace_each(): odd number of strings')
418 return s.clone()
419 }
420 // `rep` - string to replace
421 // `with` - string to replace with
422 // Remember positions of all rep strings, and calculate the length
423 // of the new string to do just one allocation.
424 mut new_len := s.len
425 mut idxs := []RepIndex{cap: 6}
426 mut idx := 0
427 s_ := s.clone()
428 for rep_i := 0; rep_i < vals.len; rep_i += 2 {
429 // vals: ['rep1, 'with1', 'rep2', 'with2']
430 rep := vals[rep_i]
431 with := vals[rep_i + 1]
432
433 for {
434 idx = s_.index_after(rep, idx)
435 if idx == -1 {
436 break
437 }
438 // The string already found is set to `/del`, to avoid duplicate searches.
439 for i in 0 .. rep.len {
440 unsafe {
441 s_.str[idx + i] = 127
442 }
443 }
444 // We need to remember both the position in the string,
445 // and which rep/with pair it refers to.
446
447 idxs << RepIndex{
448 idx: idx
449 val_idx: rep_i
450 }
451
452 idx += rep.len
453 new_len += with.len - rep.len
454 }
455 }
456
457 // Dont change the string if there's nothing to replace
458 if idxs.len == 0 {
459 return s.clone()
460 }
461 idxs.sort(a.idx < b.idx)
462 mut b := unsafe { malloc_noscan(new_len + 1) } // add space for 0 terminator
463 // Fill the new string
464 mut idx_pos := 0
465 mut cur_idx := idxs[idx_pos]
466 mut b_i := 0
467 for i := 0; i < s.len; i++ {
468 if i == cur_idx.idx {
469 // Reached the location of rep, replace it with "with"
470 rep := vals[cur_idx.val_idx]
471 with := vals[cur_idx.val_idx + 1]
472 for j in 0 .. with.len {
473 unsafe {
474 b[b_i] = with[j]
475 }
476 b_i++
477 }
478 // Skip the length of rep, since we just replaced it with "with"
479 i += rep.len - 1
480 // Go to the next index
481 idx_pos++
482 if idx_pos < idxs.len {
483 cur_idx = idxs[idx_pos]
484 }
485 } else {
486 // Rep doesnt start here, just copy
487 unsafe {
488 b[b_i] = s.str[i]
489 }
490 b_i++
491 }
492 }
493 unsafe {
494 b[new_len] = 0
495 return tos(b, new_len)
496 }
497}
498
499// replace_char replaces all occurences of the character `rep` multiple occurences of the character passed in `with` with respect to `repeat`.
500// Example: assert '\tHello!'.replace_char(`\t`,` `,8) == ' Hello!'
501[direct_array_access]
502pub fn (s string) replace_char(rep u8, with u8, repeat int) string {
503 $if !no_bounds_checking {
504 if repeat <= 0 {
505 panic('string.replace_char(): tab length too short')
506 }
507 }
508 if s.len == 0 {
509 return s.clone()
510 }
511 // TODO Allocating ints is expensive. Should be a stack array
512 // - string.replace()
513 mut idxs := []int{cap: s.len}
514 defer {
515 unsafe { idxs.free() }
516 }
517 // No need to do a contains(), it already traverses the entire string
518 for i, ch in s {
519 if ch == rep { // Found char? Mark its location
520 idxs << i
521 }
522 }
523 if idxs.len == 0 {
524 return s.clone()
525 }
526 // Now we know the number of replacements we need to do and we can calc the len of the new string
527 new_len := s.len + idxs.len * (repeat - 1)
528 mut b := unsafe { malloc_noscan(new_len + 1) } // add space for the null byte at the end
529 // Fill the new string
530 mut b_i := 0
531 mut s_idx := 0
532 for rep_pos in idxs {
533 for i in s_idx .. rep_pos { // copy everything up to piece being replaced
534 unsafe {
535 b[b_i] = s[i]
536 }
537 b_i++
538 }
539 s_idx = rep_pos + 1 // move string index past replacement
540 for _ in 0 .. repeat { // copy replacement piece
541 unsafe {
542 b[b_i] = with
543 }
544 b_i++
545 }
546 }
547 if s_idx < s.len { // if any original after last replacement, copy it
548 for i in s_idx .. s.len {
549 unsafe {
550 b[b_i] = s[i]
551 }
552 b_i++
553 }
554 }
555 unsafe {
556 b[new_len] = 0
557 return tos(b, new_len)
558 }
559}
560
561// normalize_tabs replaces all tab characters with `tab_len` amount of spaces
562// Example: assert '\t\tpop rax\t; pop rax'.normalize_tabs(2) == ' pop rax ; pop rax'
563[inline]
564pub fn (s string) normalize_tabs(tab_len int) string {
565 return s.replace_char(`\t`, ` `, tab_len)
566}
567
568// bool returns `true` if the string equals the word "true" it will return `false` otherwise.
569[inline]
570pub fn (s string) bool() bool {
571 return s == 'true' || s == 't' // TODO t for pg, remove
572}
573
574// int returns the value of the string as an integer `'1'.int() == 1`.
575[inline]
576pub fn (s string) int() int {
577 return int(strconv.common_parse_int(s, 0, 32, false, false) or { 0 })
578}
579
580// i64 returns the value of the string as i64 `'1'.i64() == i64(1)`.
581[inline]
582pub fn (s string) i64() i64 {
583 return strconv.common_parse_int(s, 0, 64, false, false) or { 0 }
584}
585
586// i8 returns the value of the string as i8 `'1'.i8() == i8(1)`.
587[inline]
588pub fn (s string) i8() i8 {
589 return i8(strconv.common_parse_int(s, 0, 8, false, false) or { 0 })
590}
591
592// i16 returns the value of the string as i16 `'1'.i16() == i16(1)`.
593[inline]
594pub fn (s string) i16() i16 {
595 return i16(strconv.common_parse_int(s, 0, 16, false, false) or { 0 })
596}
597
598// f32 returns the value of the string as f32 `'1.0'.f32() == f32(1)`.
599[inline]
600pub fn (s string) f32() f32 {
601 return f32(strconv.atof64(s) or { 0 })
602}
603
604// f64 returns the value of the string as f64 `'1.0'.f64() == f64(1)`.
605[inline]
606pub fn (s string) f64() f64 {
607 return strconv.atof64(s) or { 0 }
608}
609
610// u8 returns the value of the string as u8 `'1'.u8() == u8(1)`.
611[inline]
612pub fn (s string) u8() u8 {
613 return u8(strconv.common_parse_uint(s, 0, 8, false, false) or { 0 })
614}
615
616// u16 returns the value of the string as u16 `'1'.u16() == u16(1)`.
617[inline]
618pub fn (s string) u16() u16 {
619 return u16(strconv.common_parse_uint(s, 0, 16, false, false) or { 0 })
620}
621
622// u32 returns the value of the string as u32 `'1'.u32() == u32(1)`.
623[inline]
624pub fn (s string) u32() u32 {
625 return u32(strconv.common_parse_uint(s, 0, 32, false, false) or { 0 })
626}
627
628// u64 returns the value of the string as u64 `'1'.u64() == u64(1)`.
629[inline]
630pub fn (s string) u64() u64 {
631 return strconv.common_parse_uint(s, 0, 64, false, false) or { 0 }
632}
633
634// parse_uint is like `parse_int` but for unsigned numbers
635//
636// This method directly exposes the `parse_uint` function from `strconv`
637// as a method on `string`. For more advanced features,
638// consider calling `strconv.common_parse_uint` directly.
639[inline]
640pub fn (s string) parse_uint(_base int, _bit_size int) !u64 {
641 return strconv.parse_uint(s, _base, _bit_size)
642}
643
644// parse_int interprets a string s in the given base (0, 2 to 36) and
645// bit size (0 to 64) and returns the corresponding value i.
646//
647// If the base argument is 0, the true base is implied by the string's
648// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
649// Also, for argument base 0 only, underscore characters are permitted
650// as defined by the Go syntax for integer literals.
651//
652// The bitSize argument specifies the integer type
653// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
654// correspond to int, int8, int16, int32, and int64.
655// If bitSize is below 0 or above 64, an error is returned.
656//
657// This method directly exposes the `parse_int` function from `strconv`
658// as a method on `string`. For more advanced features,
659// consider calling `strconv.common_parse_int` directly.
660[inline]
661pub fn (s string) parse_int(_base int, _bit_size int) !i64 {
662 return strconv.parse_int(s, _base, _bit_size)
663}
664
665[direct_array_access]
666fn (s string) == (a string) bool {
667 if s.str == 0 {
668 // should never happen
669 panic('string.eq(): nil string')
670 }
671 if s.len != a.len {
672 return false
673 }
674 if s.len > 0 {
675 last_idx := s.len - 1
676 if s[last_idx] != a[last_idx] {
677 return false
678 }
679 }
680 unsafe {
681 return vmemcmp(s.str, a.str, a.len) == 0
682 }
683}
684
685// compare returns -1 if `s` < `a`, 0 if `s` == `a`, and 1 if `s` > `a`
686[direct_array_access]
687pub fn (s string) compare(a string) int {
688 min_len := if s.len < a.len { s.len } else { a.len }
689 for i in 0 .. min_len {
690 if s[i] < a[i] {
691 return -1
692 }
693 if s[i] > a[i] {
694 return 1
695 }
696 }
697 if s.len < a.len {
698 return -1
699 }
700 if s.len > a.len {
701 return 1
702 }
703 return 0
704}
705
706[direct_array_access]
707fn (s string) < (a string) bool {
708 for i in 0 .. s.len {
709 if i >= a.len || s[i] > a[i] {
710 return false
711 } else if s[i] < a[i] {
712 return true
713 }
714 }
715 if s.len < a.len {
716 return true
717 }
718 return false
719}
720
721[direct_array_access]
722fn (s string) + (a string) string {
723 new_len := a.len + s.len
724 mut res := string{
725 str: unsafe { malloc_noscan(new_len + 1) }
726 len: new_len
727 }
728 unsafe {
729 vmemcpy(res.str, s.str, s.len)
730 vmemcpy(res.str + s.len, a.str, a.len)
731 }
732 unsafe {
733 res.str[new_len] = 0 // V strings are not null terminated, but just in case
734 }
735 return res
736}
737
738// split_any splits the string to an array by any of the `delim` chars.
739// Example: "first row\nsecond row".split_any(" \n") == ['first', 'row', 'second', 'row']
740// Split a string using the chars in the delimiter string as delimiters chars.
741// If the delimiter string is empty then `.split()` is used.
742[direct_array_access]
743pub fn (s string) split_any(delim string) []string {
744 mut res := []string{}
745 mut i := 0
746 // check empty source string
747 if s.len > 0 {
748 // if empty delimiter string using defautl split
749 if delim.len <= 0 {
750 return s.split('')
751 }
752 for index, ch in s {
753 for delim_ch in delim {
754 if ch == delim_ch {
755 res << s[i..index]
756 i = index + 1
757 break
758 }
759 }
760 }
761 if i < s.len {
762 res << s[i..]
763 }
764 }
765 return res
766}
767
768// rsplit_any splits the string to an array by any of the `delim` chars in reverse order.
769// Example: "first row\nsecond row".rsplit_any(" \n") == ['row', 'second', 'row', 'first']
770// Split a string using the chars in the delimiter string as delimiters chars.
771// If the delimiter string is empty then `.rsplit()` is used.
772[direct_array_access]
773pub fn (s string) rsplit_any(delim string) []string {
774 mut res := []string{}
775 mut i := s.len - 1
776 if s.len > 0 {
777 if delim.len <= 0 {
778 return s.rsplit('')
779 }
780 mut rbound := s.len
781 for i >= 0 {
782 for delim_ch in delim {
783 if s[i] == delim_ch {
784 res << s[i + 1..rbound]
785 rbound = i
786 break
787 }
788 }
789 i--
790 }
791 if rbound > 0 {
792 res << s[..rbound]
793 }
794 }
795 return res
796}
797
798// split splits the string to an array by `delim`.
799// Example: assert 'A B C'.split(' ') == ['A','B','C']
800// If `delim` is empty the string is split by it's characters.
801// Example: assert 'DEF'.split('') == ['D','E','F']
802[inline]
803pub fn (s string) split(delim string) []string {
804 return s.split_nth(delim, 0)
805}
806
807// rsplit splits the string to an array by `delim` in reverse order.
808// Example: assert 'A B C'.rsplit(' ') == ['C','B','A']
809// If `delim` is empty the string is split by it's characters.
810// Example: assert 'DEF'.rsplit('') == ['F','E','D']
811[inline]
812pub fn (s string) rsplit(delim string) []string {
813 return s.rsplit_nth(delim, 0)
814}
815
816// split_once devides string into pair of string by `delim`.
817// Example:
818// ```v
819// path, ext := 'file.ts.dts'.splice_once('.')?
820// assert path == 'file'
821// assert ext == 'ts.dts'
822// ```
823// Note that rsplit_once returns splitted string string as first part of pair,
824// and returns remaining as second part of pair.
825pub fn (s string) split_once(delim string) ?(string, string) {
826 result := s.split_nth(delim, 2)
827
828 if result.len != 2 {
829 return none
830 }
831
832 return result[0], result[1]
833}
834
835// rsplit_once devides string into pair of string by `delim`.
836// Example:
837// ```v
838// path, ext := 'file.ts.dts'.splice_once('.')?
839// assert path == 'file.ts'
840// assert ext == 'dts'
841// ```
842// Note that rsplit_once returns remaining string as first part of pair,
843// and returns splitted string as second part of pair.
844pub fn (s string) rsplit_once(delim string) ?(string, string) {
845 result := s.rsplit_nth(delim, 2)
846
847 if result.len != 2 {
848 return none
849 }
850
851 return result[1], result[0]
852}
853
854// split_nth splits the string based on the passed `delim` substring.
855// It returns the first Nth parts. When N=0, return all the splits.
856// The last returned element has the remainder of the string, even if
857// the remainder contains more `delim` substrings.
858[direct_array_access]
859pub fn (s string) split_nth(delim string, nth int) []string {
860 mut res := []string{}
861 mut i := 0
862
863 match delim.len {
864 0 {
865 i = 1
866 for ch in s {
867 if nth > 0 && i >= nth {
868 res << s[i - 1..]
869 break
870 }
871 res << ch.ascii_str()
872 i++
873 }
874 return res
875 }
876 1 {
877 mut start := 0
878 delim_byte := delim[0]
879
880 for i < s.len {
881 if s[i] == delim_byte {
882 was_last := nth > 0 && res.len == nth - 1
883 if was_last {
884 break
885 }
886 val := s.substr(start, i)
887 res << val
888 start = i + delim.len
889 i = start
890 } else {
891 i++
892 }
893 }
894
895 // Then the remaining right part of the string
896 if nth < 1 || res.len < nth {
897 res << s[start..]
898 }
899 return res
900 }
901 else {
902 mut start := 0
903 // Take the left part for each delimiter occurence
904 for i <= s.len {
905 is_delim := i + delim.len <= s.len && s.substr(i, i + delim.len) == delim
906 if is_delim {
907 was_last := nth > 0 && res.len == nth - 1
908 if was_last {
909 break
910 }
911 val := s.substr(start, i)
912 res << val
913 start = i + delim.len
914 i = start
915 } else {
916 i++
917 }
918 }
919 // Then the remaining right part of the string
920 if nth < 1 || res.len < nth {
921 res << s[start..]
922 }
923 return res
924 }
925 }
926}
927
928// rsplit_nth splits the string based on the passed `delim` substring in revese order.
929// It returns the first Nth parts. When N=0, return all the splits.
930// The last returned element has the remainder of the string, even if
931// the remainder contains more `delim` substrings.
932[direct_array_access]
933pub fn (s string) rsplit_nth(delim string, nth int) []string {
934 mut res := []string{}
935 mut i := s.len - 1
936
937 match delim.len {
938 0 {
939 for i >= 0 {
940 if nth > 0 && res.len == nth - 1 {
941 res << s[..i + 1]
942 break
943 }
944 res << s[i].ascii_str()
945 i--
946 }
947 return res
948 }
949 1 {
950 mut rbound := s.len
951 delim_byte := delim[0]
952
953 for i >= 0 {
954 if s[i] == delim_byte {
955 if nth > 0 && res.len == nth - 1 {
956 break
957 }
958 res << s[i + 1..rbound]
959 rbound = i
960 i--
961 } else {
962 i--
963 }
964 }
965
966 if nth < 1 || res.len < nth {
967 res << s[..rbound]
968 }
969 return res
970 }
971 else {
972 mut rbound := s.len
973
974 for i >= 0 {
975 is_delim := i - delim.len >= 0 && s[i - delim.len..i] == delim
976 if is_delim {
977 if nth > 0 && res.len == nth - 1 {
978 break
979 }
980 res << s[i..rbound]
981 rbound = i - delim.len
982 i -= delim.len
983 } else {
984 i--
985 }
986 }
987
988 if nth < 1 || res.len < nth {
989 res << s[..rbound]
990 }
991 return res
992 }
993 }
994}
995
996// split_into_lines splits the string by newline characters.
997// newlines are stripped.
998// `\r` (MacOS), `\n` (POSIX), and `\r\n` (WinOS) line endings are all supported (including mixed line endings).
999// NOTE: algorithm is "greedy", consuming '\r\n' as a single line ending with higher priority than '\r' and '\n' as multiple endings
1000[direct_array_access]
1001pub fn (s string) split_into_lines() []string {
1002 mut res := []string{}
1003 if s.len == 0 {
1004 return res
1005 }
1006 cr := `\r`
1007 lf := `\n`
1008 mut line_start := 0
1009 for i := 0; i < s.len; i++ {
1010 if line_start <= i {
1011 if s[i] == lf {
1012 res << if line_start == i { '' } else { s[line_start..i] }
1013 line_start = i + 1
1014 } else if s[i] == cr {
1015 res << if line_start == i { '' } else { s[line_start..i] }
1016 if (i + 1) < s.len && s[i + 1] == lf {
1017 line_start = i + 2
1018 } else {
1019 line_start = i + 1
1020 }
1021 }
1022 }
1023 }
1024 if line_start < s.len {
1025 res << s[line_start..]
1026 }
1027 return res
1028}
1029
1030// used internally for [2..4]
1031[inline]
1032fn (s string) substr2(start int, _end int, end_max bool) string {
1033 end := if end_max { s.len } else { _end }
1034 return s.substr(start, end)
1035}
1036
1037// substr returns the string between index positions `start` and `end`.
1038// Example: assert 'ABCD'.substr(1,3) == 'BC'
1039[direct_array_access]
1040pub fn (s string) substr(start int, end int) string {
1041 $if !no_bounds_checking {
1042 if start > end || start > s.len || end > s.len || start < 0 || end < 0 {
1043 panic('substr(${start}, ${end}) out of bounds (len=${s.len})')
1044 }
1045 }
1046 len := end - start
1047 if len == s.len {
1048 return s.clone()
1049 }
1050 mut res := string{
1051 str: unsafe { malloc_noscan(len + 1) }
1052 len: len
1053 }
1054 unsafe {
1055 vmemcpy(res.str, s.str + start, len)
1056 res.str[len] = 0
1057 }
1058 return res
1059}
1060
1061// version of `substr()` that is used in `a[start..end] or {`
1062// return an error when the index is out of range
1063[direct_array_access]
1064pub fn (s string) substr_with_check(start int, end int) !string {
1065 if start > end || start > s.len || end > s.len || start < 0 || end < 0 {
1066 return error('substr(${start}, ${end}) out of bounds (len=${s.len})')
1067 }
1068 len := end - start
1069 if len == s.len {
1070 return s.clone()
1071 }
1072 mut res := string{
1073 str: unsafe { malloc_noscan(len + 1) }
1074 len: len
1075 }
1076 unsafe {
1077 vmemcpy(res.str, s.str + start, len)
1078 res.str[len] = 0
1079 }
1080 return res
1081}
1082
1083// substr_ni returns the string between index positions `start` and `end` allowing negative indexes
1084// This function always return a valid string.
1085[direct_array_access]
1086pub fn (s string) substr_ni(_start int, _end int) string {
1087 mut start := _start
1088 mut end := _end
1089
1090 // borders math
1091 if start < 0 {
1092 start = s.len + start
1093 if start < 0 {
1094 start = 0
1095 }
1096 }
1097
1098 if end < 0 {
1099 end = s.len + end
1100 if end < 0 {
1101 end = 0
1102 }
1103 }
1104 if end >= s.len {
1105 end = s.len
1106 }
1107
1108 if start > s.len || end < start {
1109 return ''
1110 }
1111
1112 len := end - start
1113
1114 // string copy
1115 mut res := string{
1116 str: unsafe { malloc_noscan(len + 1) }
1117 len: len
1118 }
1119 unsafe {
1120 vmemcpy(res.str, s.str + start, len)
1121 res.str[len] = 0
1122 }
1123 return res
1124}
1125
1126// index returns the position of the first character of the input string.
1127// It will return `-1` if the input string can't be found.
1128[direct_array_access]
1129fn (s string) index_(p string) int {
1130 if p.len > s.len || p.len == 0 {
1131 return -1
1132 }
1133 if p.len > 2 {
1134 return s.index_kmp(p)
1135 }
1136 mut i := 0
1137 for i < s.len {
1138 mut j := 0
1139 for j < p.len && unsafe { s.str[i + j] == p.str[j] } {
1140 j++
1141 }
1142 if j == p.len {
1143 return i
1144 }
1145 i++
1146 }
1147 return -1
1148}
1149
1150// index returns the position of the first character of the input string.
1151// It will return `none` if the input string can't be found.
1152pub fn (s string) index(p string) ?int {
1153 idx := s.index_(p)
1154 if idx == -1 {
1155 return none
1156 }
1157 return idx
1158}
1159
1160// index_kmp does KMP search.
1161[direct_array_access; manualfree]
1162fn (s string) index_kmp(p string) int {
1163 if p.len > s.len {
1164 return -1
1165 }
1166 mut prefix := []int{len: p.len}
1167 defer {
1168 unsafe { prefix.free() }
1169 }
1170 mut j := 0
1171 for i := 1; i < p.len; i++ {
1172 for unsafe { p.str[j] != p.str[i] } && j > 0 {
1173 j = prefix[j - 1]
1174 }
1175 if unsafe { p.str[j] == p.str[i] } {
1176 j++
1177 }
1178 prefix[i] = j
1179 }
1180 j = 0
1181 for i in 0 .. s.len {
1182 for unsafe { p.str[j] != s.str[i] } && j > 0 {
1183 j = prefix[j - 1]
1184 }
1185 if unsafe { p.str[j] == s.str[i] } {
1186 j++
1187 }
1188 if j == p.len {
1189 return i - p.len + 1
1190 }
1191 }
1192 return -1
1193}
1194
1195// index_any returns the position of any of the characters in the input string - if found.
1196pub fn (s string) index_any(chars string) int {
1197 for i, ss in s {
1198 for c in chars {
1199 if c == ss {
1200 return i
1201 }
1202 }
1203 }
1204 return -1
1205}
1206
1207// last_index returns the position of the last occurence of the input string.
1208[direct_array_access]
1209fn (s string) last_index_(p string) int {
1210 if p.len > s.len || p.len == 0 {
1211 return -1
1212 }
1213 mut i := s.len - p.len
1214 for i >= 0 {
1215 mut j := 0
1216 for j < p.len && unsafe { s.str[i + j] == p.str[j] } {
1217 j++
1218 }
1219 if j == p.len {
1220 return i
1221 }
1222 i--
1223 }
1224 return -1
1225}
1226
1227// last_index returns the position of the last occurence of the input string.
1228pub fn (s string) last_index(p string) ?int {
1229 idx := s.last_index_(p)
1230 if idx == -1 {
1231 return none
1232 }
1233 return idx
1234}
1235
1236// index_after returns the position of the input string, starting search from `start` position.
1237[direct_array_access]
1238pub fn (s string) index_after(p string, start int) int {
1239 if p.len > s.len {
1240 return -1
1241 }
1242 mut strt := start
1243 if start < 0 {
1244 strt = 0
1245 }
1246 if start >= s.len {
1247 return -1
1248 }
1249 mut i := strt
1250 for i < s.len {
1251 mut j := 0
1252 mut ii := i
1253 for j < p.len && unsafe { s.str[ii] == p.str[j] } {
1254 j++
1255 ii++
1256 }
1257 if j == p.len {
1258 return i
1259 }
1260 i++
1261 }
1262 return -1
1263}
1264
1265// index_u8 returns the index of byte `c` if found in the string.
1266// index_u8 returns -1 if the byte can not be found.
1267[direct_array_access]
1268pub fn (s string) index_u8(c u8) int {
1269 for i, b in s {
1270 if b == c {
1271 return i
1272 }
1273 }
1274 return -1
1275}
1276
1277// last_index_byte returns the index of the last occurence of byte `c` if found in the string.
1278// last_index_byte returns -1 if the byte is not found.
1279[direct_array_access]
1280pub fn (s string) last_index_u8(c u8) int {
1281 for i := s.len - 1; i >= 0; i-- {
1282 if unsafe { s.str[i] == c } {
1283 return i
1284 }
1285 }
1286 return -1
1287}
1288
1289// count returns the number of occurrences of `substr` in the string.
1290// count returns -1 if no `substr` could be found.
1291[direct_array_access]
1292pub fn (s string) count(substr string) int {
1293 if s.len == 0 || substr.len == 0 {
1294 return 0
1295 }
1296 if substr.len > s.len {
1297 return 0
1298 }
1299
1300 mut n := 0
1301
1302 if substr.len == 1 {
1303 target := substr[0]
1304
1305 for letter in s {
1306 if letter == target {
1307 n++
1308 }
1309 }
1310
1311 return n
1312 }
1313
1314 mut i := 0
1315 for {
1316 i = s.index_after(substr, i)
1317 if i == -1 {
1318 return n
1319 }
1320 i += substr.len
1321 n++
1322 }
1323 return 0 // TODO can never get here - v doesn't know that
1324}
1325
1326// contains_u8 returns `true` if the string contains the byte value `x`.
1327// See also: [`string.index_u8`](#string.index_u8) , to get the index of the byte as well.
1328pub fn (s string) contains_u8(x u8) bool {
1329 for c in s {
1330 if x == c {
1331 return true
1332 }
1333 }
1334 return false
1335}
1336
1337// contains returns `true` if the string contains `substr`.
1338// See also: [`string.index`](#string.index)
1339pub fn (s string) contains(substr string) bool {
1340 if substr.len == 0 {
1341 return true
1342 }
1343 if substr.len == 1 {
1344 return s.contains_u8(unsafe { substr.str[0] })
1345 }
1346 return s.index_(substr) != -1
1347}
1348
1349// contains_any returns `true` if the string contains any chars in `chars`.
1350pub fn (s string) contains_any(chars string) bool {
1351 for c in chars {
1352 if s.contains_u8(c) {
1353 return true
1354 }
1355 }
1356 return false
1357}
1358
1359// contains_only returns `true`, if the string contains only the characters in `chars`.
1360pub fn (s string) contains_only(chars string) bool {
1361 if chars.len == 0 {
1362 return false
1363 }
1364 for ch in s {
1365 mut res := 0
1366 for i := 0; i < chars.len && res == 0; i++ {
1367 res += int(ch == unsafe { chars.str[i] })
1368 }
1369 if res == 0 {
1370 return false
1371 }
1372 }
1373 return true
1374}
1375
1376// contains_any_substr returns `true` if the string contains any of the strings in `substrs`.
1377pub fn (s string) contains_any_substr(substrs []string) bool {
1378 if substrs.len == 0 {
1379 return true
1380 }
1381 for sub in substrs {
1382 if s.contains(sub) {
1383 return true
1384 }
1385 }
1386 return false
1387}
1388
1389// starts_with returns `true` if the string starts with `p`.
1390[direct_array_access]
1391pub fn (s string) starts_with(p string) bool {
1392 if p.len > s.len {
1393 return false
1394 }
1395 for i in 0 .. p.len {
1396 if unsafe { s.str[i] != p.str[i] } {
1397 return false
1398 }
1399 }
1400 return true
1401}
1402
1403// ends_with returns `true` if the string ends with `p`.
1404[direct_array_access]
1405pub fn (s string) ends_with(p string) bool {
1406 if p.len > s.len {
1407 return false
1408 }
1409 for i in 0 .. p.len {
1410 if unsafe { p.str[i] != s.str[s.len - p.len + i] } {
1411 return false
1412 }
1413 }
1414 return true
1415}
1416
1417// to_lower returns the string in all lowercase characters.
1418// TODO only works with ASCII
1419[direct_array_access]
1420pub fn (s string) to_lower() string {
1421 unsafe {
1422 mut b := malloc_noscan(s.len + 1)
1423 for i in 0 .. s.len {
1424 if s.str[i] >= `A` && s.str[i] <= `Z` {
1425 b[i] = s.str[i] + 32
1426 } else {
1427 b[i] = s.str[i]
1428 }
1429 }
1430 b[s.len] = 0
1431 return tos(b, s.len)
1432 }
1433}
1434
1435// is_lower returns `true` if all characters in the string is lowercase.
1436// Example: assert 'hello developer'.is_lower() == true
1437[direct_array_access]
1438pub fn (s string) is_lower() bool {
1439 for i in 0 .. s.len {
1440 if s[i] >= `A` && s[i] <= `Z` {
1441 return false
1442 }
1443 }
1444 return true
1445}
1446
1447// to_upper returns the string in all uppercase characters.
1448// Example: assert 'Hello V'.to_upper() == 'HELLO V'
1449[direct_array_access]
1450pub fn (s string) to_upper() string {
1451 unsafe {
1452 mut b := malloc_noscan(s.len + 1)
1453 for i in 0 .. s.len {
1454 if s.str[i] >= `a` && s.str[i] <= `z` {
1455 b[i] = s.str[i] - 32
1456 } else {
1457 b[i] = s.str[i]
1458 }
1459 }
1460 b[s.len] = 0
1461 return tos(b, s.len)
1462 }
1463}
1464
1465// is_upper returns `true` if all characters in the string is uppercase.
1466// See also: [`byte.is_capital`](#byte.is_capital)
1467// Example: assert 'HELLO V'.is_upper() == true
1468[direct_array_access]
1469pub fn (s string) is_upper() bool {
1470 for i in 0 .. s.len {
1471 if s[i] >= `a` && s[i] <= `z` {
1472 return false
1473 }
1474 }
1475 return true
1476}
1477
1478// capitalize returns the string with the first character capitalized.
1479// Example: assert 'hello'.capitalize() == 'Hello'
1480[direct_array_access]
1481pub fn (s string) capitalize() string {
1482 if s.len == 0 {
1483 return ''
1484 }
1485 s0 := s[0]
1486 letter := s0.ascii_str()
1487 uletter := letter.to_upper()
1488 if s.len == 1 {
1489 return uletter
1490 }
1491 srest := s[1..]
1492 res := uletter + srest
1493 return res
1494}
1495
1496// is_capital returns `true`, if the first character in the string `s`,
1497// is a capital letter, and the rest are NOT.
1498// Example: assert 'Hello'.is_capital() == true
1499// Example: assert 'HelloWorld'.is_capital() == false
1500[direct_array_access]
1501pub fn (s string) is_capital() bool {
1502 if s.len == 0 || !(s[0] >= `A` && s[0] <= `Z`) {
1503 return false
1504 }
1505 for i in 1 .. s.len {
1506 if s[i] >= `A` && s[i] <= `Z` {
1507 return false
1508 }
1509 }
1510 return true
1511}
1512
1513// starts_with_capital returns `true`, if the first character in the string `s`,
1514// is a capital letter, even if the rest are not.
1515// Example: assert 'Hello'.starts_with_capital() == true
1516// Example: assert 'Hello. World.'.starts_with_capital() == true
1517[direct_array_access]
1518pub fn (s string) starts_with_capital() bool {
1519 if s.len == 0 || !(s[0] >= `A` && s[0] <= `Z`) {
1520 return false
1521 }
1522 return true
1523}
1524
1525// title returns the string with each word capitalized.
1526// Example: assert 'hello v developer'.title() == 'Hello V Developer'
1527pub fn (s string) title() string {
1528 words := s.split(' ')
1529 mut tit := []string{}
1530 for word in words {
1531 tit << word.capitalize()
1532 }
1533 title := tit.join(' ')
1534 return title
1535}
1536
1537// is_title returns true if all words of the string are capitalized.
1538// Example: assert 'Hello V Developer'.is_title() == true
1539pub fn (s string) is_title() bool {
1540 words := s.split(' ')
1541 for word in words {
1542 if !word.is_capital() {
1543 return false
1544 }
1545 }
1546 return true
1547}
1548
1549// find_between returns the string found between `start` string and `end` string.
1550// Example: assert 'hey [man] how you doin'.find_between('[', ']') == 'man'
1551pub fn (s string) find_between(start string, end string) string {
1552 start_pos := s.index_(start)
1553 if start_pos == -1 {
1554 return ''
1555 }
1556 // First get everything to the right of 'start'
1557 val := s[start_pos + start.len..]
1558 end_pos := val.index_(end)
1559 if end_pos == -1 {
1560 return val
1561 }
1562 return val[..end_pos]
1563}
1564
1565// trim_space strips any of ` `, `\n`, `\t`, `\v`, `\f`, `\r` from the start and end of the string.
1566// Example: assert ' Hello V '.trim_space() == 'Hello V'
1567[inline]
1568pub fn (s string) trim_space() string {
1569 return s.trim(' \n\t\v\f\r')
1570}
1571
1572// trim strips any of the characters given in `cutset` from the start and end of the string.
1573// Example: assert ' ffHello V ffff'.trim(' f') == 'Hello V'
1574pub fn (s string) trim(cutset string) string {
1575 if s.len < 1 || cutset.len < 1 {
1576 return s.clone()
1577 }
1578 left, right := s.trim_indexes(cutset)
1579 return s.substr(left, right)
1580}
1581
1582// trim_indexes gets the new start and end indicies of a string when any of the characters given in `cutset` were stripped from the start and end of the string. Should be used as an input to `substr()`. If the string contains only the characters in `cutset`, both values returned are zero.
1583// Example: left, right := '-hi-'.trim_indexes('-')
1584[direct_array_access]
1585pub fn (s string) trim_indexes(cutset string) (int, int) {
1586 mut pos_left := 0
1587 mut pos_right := s.len - 1
1588 mut cs_match := true
1589 for pos_left <= s.len && pos_right >= -1 && cs_match {
1590 cs_match = false
1591 for cs in cutset {
1592 if s[pos_left] == cs {
1593 pos_left++
1594 cs_match = true
1595 break
1596 }
1597 }
1598 for cs in cutset {
1599 if s[pos_right] == cs {
1600 pos_right--
1601 cs_match = true
1602 break
1603 }
1604 }
1605 if pos_left > pos_right {
1606 return 0, 0
1607 }
1608 }
1609 return pos_left, pos_right + 1
1610}
1611
1612// trim_left strips any of the characters given in `cutset` from the left of the string.
1613// Example: assert 'd Hello V developer'.trim_left(' d') == 'Hello V developer'
1614[direct_array_access]
1615pub fn (s string) trim_left(cutset string) string {
1616 if s.len < 1 || cutset.len < 1 {
1617 return s.clone()
1618 }
1619 mut pos := 0
1620 for pos < s.len {
1621 mut found := false
1622 for cs in cutset {
1623 if s[pos] == cs {
1624 found = true
1625 break
1626 }
1627 }
1628 if !found {
1629 break
1630 }
1631 pos++
1632 }
1633 return s[pos..]
1634}
1635
1636// trim_right strips any of the characters given in `cutset` from the right of the string.
1637// Example: assert ' Hello V d'.trim_right(' d') == ' Hello V'
1638[direct_array_access]
1639pub fn (s string) trim_right(cutset string) string {
1640 if s.len < 1 || cutset.len < 1 {
1641 return s.clone()
1642 }
1643 mut pos := s.len - 1
1644 for pos >= 0 {
1645 mut found := false
1646 for cs in cutset {
1647 if s[pos] == cs {
1648 found = true
1649 }
1650 }
1651 if !found {
1652 break
1653 }
1654 pos--
1655 }
1656 if pos < 0 {
1657 return ''
1658 }
1659 return s[..pos + 1]
1660}
1661
1662// trim_string_left strips `str` from the start of the string.
1663// Example: assert 'WorldHello V'.trim_string_left('World') == 'Hello V'
1664pub fn (s string) trim_string_left(str string) string {
1665 if s.starts_with(str) {
1666 return s[str.len..]
1667 }
1668 return s.clone()
1669}
1670
1671// trim_string_right strips `str` from the end of the string.
1672// Example: assert 'Hello VWorld'.trim_string_right('World') == 'Hello V'
1673pub fn (s string) trim_string_right(str string) string {
1674 if s.ends_with(str) {
1675 return s[..s.len - str.len]
1676 }
1677 return s.clone()
1678}
1679
1680// compare_strings returns `-1` if `a < b`, `1` if `a > b` else `0`.
1681pub fn compare_strings(a &string, b &string) int {
1682 if a < b {
1683 return -1
1684 }
1685 if a > b {
1686 return 1
1687 }
1688 return 0
1689}
1690
1691// compare_strings_by_len returns `-1` if `a.len < b.len`, `1` if `a.len > b.len` else `0`.
1692fn compare_strings_by_len(a &string, b &string) int {
1693 if a.len < b.len {
1694 return -1
1695 }
1696 if a.len > b.len {
1697 return 1
1698 }
1699 return 0
1700}
1701
1702// compare_lower_strings returns the same as compare_strings but converts `a` and `b` to lower case before comparing.
1703fn compare_lower_strings(a &string, b &string) int {
1704 aa := a.to_lower()
1705 bb := b.to_lower()
1706 return compare_strings(&aa, &bb)
1707}
1708
1709// sort_ignore_case sorts the string array using case insesitive comparing.
1710[inline]
1711pub fn (mut s []string) sort_ignore_case() {
1712 s.sort_with_compare(compare_lower_strings)
1713}
1714
1715// sort_by_len sorts the the string array by each string's `.len` length.
1716[inline]
1717pub fn (mut s []string) sort_by_len() {
1718 s.sort_with_compare(compare_strings_by_len)
1719}
1720
1721// str returns a copy of the string
1722[inline]
1723pub fn (s string) str() string {
1724 return s.clone()
1725}
1726
1727// at returns the byte at index `idx`.
1728// Example: assert 'ABC'.at(1) == u8(`B`)
1729fn (s string) at(idx int) byte {
1730 $if !no_bounds_checking {
1731 if idx < 0 || idx >= s.len {
1732 panic('string index out of range: ${idx} / ${s.len}')
1733 }
1734 }
1735 unsafe {
1736 return s.str[idx]
1737 }
1738}
1739
1740// version of `at()` that is used in `a[i] or {`
1741// return an error when the index is out of range
1742fn (s string) at_with_check(idx int) ?u8 {
1743 if idx < 0 || idx >= s.len {
1744 return none
1745 }
1746 unsafe {
1747 return s.str[idx]
1748 }
1749}
1750
1751// is_space returns `true` if the byte is a white space character.
1752// The following list is considered white space characters: ` `, `\t`, `\n`, `\v`, `\f`, `\r`, 0x85, 0xa0
1753// Example: assert u8(` `).is_space() == true
1754[inline]
1755pub fn (c u8) is_space() bool {
1756 // 0x85 is NEXT LINE (NEL)
1757 // 0xa0 is NO-BREAK SPACE
1758 return c == 32 || (c > 8 && c < 14) || c == 0x85 || c == 0xa0
1759}
1760
1761// is_digit returns `true` if the byte is in range 0-9 and `false` otherwise.
1762// Example: assert u8(`9`).is_digit() == true
1763[inline]
1764pub fn (c u8) is_digit() bool {
1765 return c >= `0` && c <= `9`
1766}
1767
1768// is_hex_digit returns `true` if the byte is either in range 0-9, a-f or A-F and `false` otherwise.
1769// Example: assert u8(`F`).is_hex_digit() == true
1770[inline]
1771pub fn (c u8) is_hex_digit() bool {
1772 return (c >= `0` && c <= `9`) || (c >= `a` && c <= `f`) || (c >= `A` && c <= `F`)
1773}
1774
1775// is_oct_digit returns `true` if the byte is in range 0-7 and `false` otherwise.
1776// Example: assert u8(`7`).is_oct_digit() == true
1777[inline]
1778pub fn (c u8) is_oct_digit() bool {
1779 return c >= `0` && c <= `7`
1780}
1781
1782// is_bin_digit returns `true` if the byte is a binary digit (0 or 1) and `false` otherwise.
1783// Example: assert u8(`0`).is_bin_digit() == true
1784[inline]
1785pub fn (c u8) is_bin_digit() bool {
1786 return c == `0` || c == `1`
1787}
1788
1789// is_letter returns `true` if the byte is in range a-z or A-Z and `false` otherwise.
1790// Example: assert u8(`V`).is_letter() == true
1791[inline]
1792pub fn (c u8) is_letter() bool {
1793 return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`)
1794}
1795
1796// is_alnum returns `true` if the byte is in range a-z, A-Z, 0-9 and `false` otherwise.
1797// Example: assert u8(`V`).is_alnum() == true
1798[inline]
1799pub fn (c u8) is_alnum() bool {
1800 return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) || (c >= `0` && c <= `9`)
1801}
1802
1803// free allows for manually freeing the memory occupied by the string
1804[manualfree; unsafe]
1805pub fn (s &string) free() {
1806 $if prealloc {
1807 return
1808 }
1809 if s.is_lit == -98761234 {
1810 double_free_msg := unsafe { &u8(c'double string.free() detected\n') }
1811 double_free_msg_len := unsafe { vstrlen(double_free_msg) }
1812 $if freestanding {
1813 bare_eprint(double_free_msg, u64(double_free_msg_len))
1814 } $else {
1815 _write_buf_to_fd(1, double_free_msg, double_free_msg_len)
1816 }
1817 return
1818 }
1819 if s.is_lit == 1 || s.str == 0 {
1820 return
1821 }
1822 unsafe {
1823 // C.printf(c's: %x %s\n', s.str, s.str)
1824 free(s.str)
1825 s.str = nil
1826 }
1827 s.is_lit = -98761234
1828}
1829
1830// before returns the contents before `sub` in the string.
1831// If the substring is not found, it returns the full input string.
1832// Example: assert '23:34:45.234'.before('.') == '23:34:45'
1833// Example: assert 'abcd'.before('.') == 'abcd'
1834// TODO: deprecate and remove either .before or .all_before
1835pub fn (s string) before(sub string) string {
1836 pos := s.index_(sub)
1837 if pos == -1 {
1838 return s.clone()
1839 }
1840 return s[..pos]
1841}
1842
1843// all_before returns the contents before `sub` in the string.
1844// If the substring is not found, it returns the full input string.
1845// Example: assert '23:34:45.234'.all_before('.') == '23:34:45'
1846// Example: assert 'abcd'.all_before('.') == 'abcd'
1847pub fn (s string) all_before(sub string) string {
1848 // TODO remove dup method
1849 pos := s.index_(sub)
1850 if pos == -1 {
1851 return s.clone()
1852 }
1853 return s[..pos]
1854}
1855
1856// all_before_last returns the contents before the last occurence of `sub` in the string.
1857// If the substring is not found, it returns the full input string.
1858// Example: assert '23:34:45.234'.all_before_last(':') == '23:34'
1859// Example: assert 'abcd'.all_before_last('.') == 'abcd'
1860pub fn (s string) all_before_last(sub string) string {
1861 pos := s.last_index_(sub)
1862 if pos == -1 {
1863 return s.clone()
1864 }
1865 return s[..pos]
1866}
1867
1868// all_after returns the contents after `sub` in the string.
1869// If the substring is not found, it returns the full input string.
1870// Example: assert '23:34:45.234'.all_after('.') == '234'
1871// Example: assert 'abcd'.all_after('z') == 'abcd'
1872pub fn (s string) all_after(sub string) string {
1873 pos := s.index_(sub)
1874 if pos == -1 {
1875 return s.clone()
1876 }
1877 return s[pos + sub.len..]
1878}
1879
1880// all_after_last returns the contents after the last occurence of `sub` in the string.
1881// If the substring is not found, it returns the full input string.
1882// Example: assert '23:34:45.234'.all_after_last(':') == '45.234'
1883// Example: assert 'abcd'.all_after_last('z') == 'abcd'
1884pub fn (s string) all_after_last(sub string) string {
1885 pos := s.last_index_(sub)
1886 if pos == -1 {
1887 return s.clone()
1888 }
1889 return s[pos + sub.len..]
1890}
1891
1892// all_after_first returns the contents after the first occurence of `sub` in the string.
1893// If the substring is not found, it returns the full input string.
1894// Example: assert '23:34:45.234'.all_after_first(':') == '34:45.234'
1895// Example: assert 'abcd'.all_after_first('z') == 'abcd'
1896pub fn (s string) all_after_first(sub string) string {
1897 pos := s.index_(sub)
1898 if pos == -1 {
1899 return s.clone()
1900 }
1901 return s[pos + sub.len..]
1902}
1903
1904// after returns the contents after the last occurence of `sub` in the string.
1905// If the substring is not found, it returns the full input string.
1906// Example: assert '23:34:45.234'.after(':') == '45.234'
1907// Example: assert 'abcd'.after('z') == 'abcd'
1908// TODO: deprecate either .all_after_last or .after
1909[inline]
1910pub fn (s string) after(sub string) string {
1911 return s.all_after_last(sub)
1912}
1913
1914// after_char returns the contents after the first occurence of `sub` character in the string.
1915// If the substring is not found, it returns the full input string.
1916// Example: assert '23:34:45.234'.after_char(`:`) == '34:45.234'
1917// Example: assert 'abcd'.after_char(`:`) == 'abcd'
1918pub fn (s string) after_char(sub u8) string {
1919 mut pos := -1
1920 for i, c in s {
1921 if c == sub {
1922 pos = i
1923 break
1924 }
1925 }
1926 if pos == -1 {
1927 return s.clone()
1928 }
1929 return s[pos + 1..]
1930}
1931
1932// join joins a string array into a string using `sep` separator.
1933// Example: assert ['Hello','V'].join(' ') == 'Hello V'
1934pub fn (a []string) join(sep string) string {
1935 if a.len == 0 {
1936 return ''
1937 }
1938 mut len := 0
1939 for val in a {
1940 len += val.len + sep.len
1941 }
1942 len -= sep.len
1943 // Allocate enough memory
1944 mut res := string{
1945 str: unsafe { malloc_noscan(len + 1) }
1946 len: len
1947 }
1948 mut idx := 0
1949 for i, val in a {
1950 unsafe {
1951 vmemcpy(voidptr(res.str + idx), val.str, val.len)
1952 idx += val.len
1953 }
1954 // Add sep if it's not last
1955 if i != a.len - 1 {
1956 unsafe {
1957 vmemcpy(voidptr(res.str + idx), sep.str, sep.len)
1958 idx += sep.len
1959 }
1960 }
1961 }
1962 unsafe {
1963 res.str[res.len] = 0
1964 }
1965 return res
1966}
1967
1968// join joins a string array into a string using a `\n` newline delimiter.
1969[inline]
1970pub fn (s []string) join_lines() string {
1971 return s.join('\n')
1972}
1973
1974// reverse returns a reversed string.
1975// Example: assert 'Hello V'.reverse() == 'V olleH'
1976[direct_array_access]
1977pub fn (s string) reverse() string {
1978 if s.len == 0 || s.len == 1 {
1979 return s.clone()
1980 }
1981 mut res := string{
1982 str: unsafe { malloc_noscan(s.len + 1) }
1983 len: s.len
1984 }
1985 for i := s.len - 1; i >= 0; i-- {
1986 unsafe {
1987 res.str[s.len - i - 1] = s[i]
1988 }
1989 }
1990 unsafe {
1991 res.str[res.len] = 0
1992 }
1993 return res
1994}
1995
1996// limit returns a portion of the string, starting at `0` and extending for a given number of characters afterward.
1997// 'hello'.limit(2) => 'he'
1998// 'hi'.limit(10) => 'hi'
1999pub fn (s string) limit(max int) string {
2000 u := s.runes()
2001 if u.len <= max {
2002 return s.clone()
2003 }
2004 return u[0..max].string()
2005}
2006
2007// hash returns an integer hash of the string.
2008pub fn (s string) hash() int {
2009 mut h := u32(0)
2010 if h == 0 && s.len > 0 {
2011 for c in s {
2012 h = h * 31 + u32(c)
2013 }
2014 }
2015 return int(h)
2016}
2017
2018// bytes returns the string converted to a byte array.
2019pub fn (s string) bytes() []u8 {
2020 if s.len == 0 {
2021 return []
2022 }
2023 mut buf := []u8{len: s.len}
2024 unsafe { vmemcpy(buf.data, s.str, s.len) }
2025 return buf
2026}
2027
2028// repeat returns a new string with `count` number of copies of the string it was called on.
2029[direct_array_access]
2030pub fn (s string) repeat(count int) string {
2031 if count < 0 {
2032 panic('string.repeat: count is negative: ${count}')
2033 } else if count == 0 {
2034 return ''
2035 } else if count == 1 {
2036 return s.clone()
2037 }
2038 mut ret := unsafe { malloc_noscan(s.len * count + 1) }
2039 for i in 0 .. count {
2040 unsafe {
2041 vmemcpy(ret + i * s.len, s.str, s.len)
2042 }
2043 }
2044 new_len := s.len * count
2045 unsafe {
2046 ret[new_len] = 0
2047 }
2048 return unsafe { ret.vstring_with_len(new_len) }
2049}
2050
2051// fields returns a string array of the string split by `\t` and ` `
2052// Example: assert '\t\tv = v'.fields() == ['v', '=', 'v']
2053// Example: assert ' sss ssss'.fields() == ['sss', 'ssss']
2054pub fn (s string) fields() []string {
2055 mut res := []string{}
2056 mut word_start := 0
2057 mut word_len := 0
2058 mut is_in_word := false
2059 mut is_space := false
2060 for i, c in s {
2061 is_space = c in [32, 9, 10]
2062 if !is_space {
2063 word_len++
2064 }
2065 if !is_in_word && !is_space {
2066 word_start = i
2067 is_in_word = true
2068 continue
2069 }
2070 if is_space && is_in_word {
2071 res << s[word_start..word_start + word_len]
2072 is_in_word = false
2073 word_len = 0
2074 word_start = 0
2075 continue
2076 }
2077 }
2078 if is_in_word && word_len > 0 {
2079 // collect the remainder word at the end
2080 res << s[word_start..s.len]
2081 }
2082 return res
2083}
2084
2085// strip_margin allows multi-line strings to be formatted in a way that removes white-space
2086// before a delimeter. by default `|` is used.
2087// Note: the delimiter has to be a byte at this time. That means surrounding
2088// the value in ``.
2089//
2090// See also: string.trim_indent()
2091//
2092// Example:
2093// ```v
2094// st := 'Hello there,
2095// | this is a string,
2096// | Everything before the first | is removed'.strip_margin()
2097//
2098// assert st == 'Hello there,
2099// this is a string,
2100// Everything before the first | is removed'
2101// ```
2102[inline]
2103pub fn (s string) strip_margin() string {
2104 return s.strip_margin_custom(`|`)
2105}
2106
2107// strip_margin_custom does the same as `strip_margin` but will use `del` as delimiter instead of `|`
2108[direct_array_access]
2109pub fn (s string) strip_margin_custom(del u8) string {
2110 mut sep := del
2111 if sep.is_space() {
2112 println('Warning: `strip_margin` cannot use white-space as a delimiter')
2113 println(' Defaulting to `|`')
2114 sep = `|`
2115 }
2116 // don't know how much space the resulting string will be, but the max it
2117 // can be is this big
2118 mut ret := unsafe { malloc_noscan(s.len + 1) }
2119 mut count := 0
2120 for i := 0; i < s.len; i++ {
2121 if s[i] in [10, 13] {
2122 unsafe {
2123 ret[count] = s[i]
2124 }
2125 count++
2126 // CRLF
2127 if s[i] == 13 && i < s.len - 1 && s[i + 1] == 10 {
2128 unsafe {
2129 ret[count] = s[i + 1]
2130 }
2131 count++
2132 i++
2133 }
2134 for s[i] != sep {
2135 i++
2136 if i >= s.len {
2137 break
2138 }
2139 }
2140 } else {
2141 unsafe {
2142 ret[count] = s[i]
2143 }
2144 count++
2145 }
2146 }
2147 unsafe {
2148 ret[count] = 0
2149 return ret.vstring_with_len(count)
2150 }
2151}
2152
2153// trim_indent detects a common minimal indent of all the input lines,
2154// removes it from every line and also removes the first and the last
2155// lines if they are blank (notice difference blank vs empty).
2156//
2157// Note that blank lines do not affect the detected indent level.
2158//
2159// In case if there are non-blank lines with no leading whitespace characters
2160// (no indent at all) then the common indent is 0, and therefore this function
2161// doesn't change the indentation.
2162//
2163// Example:
2164// ```v
2165// st := '
2166// Hello there,
2167// this is a string,
2168// all the leading indents are removed
2169// and also the first and the last lines if they are blank
2170// '.trim_indent()
2171//
2172// assert st == 'Hello there,
2173// this is a string,
2174// all the leading indents are removed
2175// and also the first and the last lines if they are blank'
2176// ```
2177pub fn (s string) trim_indent() string {
2178 mut lines := s.split_into_lines()
2179
2180 lines_indents := lines
2181 .filter(!it.is_blank())
2182 .map(it.indent_width())
2183
2184 mut min_common_indent := int(2147483647) // max int
2185 for line_indent in lines_indents {
2186 if line_indent < min_common_indent {
2187 min_common_indent = line_indent
2188 }
2189 }
2190
2191 // trim first line if it's blank
2192 if lines.len > 0 && lines.first().is_blank() {
2193 lines = unsafe { lines[1..] }
2194 }
2195
2196 // trim last line if it's blank
2197 if lines.len > 0 && lines.last().is_blank() {
2198 lines = unsafe { lines[..lines.len - 1] }
2199 }
2200
2201 mut trimmed_lines := []string{cap: lines.len}
2202
2203 for line in lines {
2204 if line.is_blank() {
2205 trimmed_lines << line
2206 continue
2207 }
2208
2209 trimmed_lines << line[min_common_indent..]
2210 }
2211
2212 return trimmed_lines.join('\n')
2213}
2214
2215// indent_width returns the number of spaces or tabs at the beginning of the string.
2216// Example: assert ' v'.indent_width() == 2
2217// Example: assert '\t\tv'.indent_width() == 2
2218pub fn (s string) indent_width() int {
2219 for i, c in s {
2220 if !c.is_space() {
2221 return i
2222 }
2223 }
2224
2225 return 0
2226}
2227
2228// is_blank returns true if the string is empty or contains only white-space.
2229// Example: assert ' '.is_blank()
2230// Example: assert '\t'.is_blank()
2231// Example: assert 'v'.is_blank() == false
2232pub fn (s string) is_blank() bool {
2233 if s.len == 0 {
2234 return true
2235 }
2236
2237 for c in s {
2238 if !c.is_space() {
2239 return false
2240 }
2241 }
2242
2243 return true
2244}
2245
2246// match_glob matches the string, with a Unix shell-style wildcard pattern.
2247// Note: wildcard patterns are NOT the same as regular expressions.
2248// They are much simpler, and do not allow backtracking, captures, etc.
2249// The special characters used in shell-style wildcards are:
2250// `*` - matches everything
2251// `?` - matches any single character
2252// `[seq]` - matches any of the characters in the sequence
2253// `[^seq]` - matches any character that is NOT in the sequence
2254// Any other character in `pattern`, is matched 1:1 to the corresponding
2255// character in `name`, including / and \.
2256// You can wrap the meta-characters in brackets too, i.e. `[?]` matches `?`
2257// in the string, and `[*]` matches `*` in the string.
2258// Example: assert 'ABCD'.match_glob('AB*')
2259// Example: assert 'ABCD'.match_glob('*D')
2260// Example: assert 'ABCD'.match_glob('*B*')
2261// Example: assert !'ABCD'.match_glob('AB')
2262[direct_array_access]
2263pub fn (name string) match_glob(pattern string) bool {
2264 // Initial port based on https://research.swtch.com/glob.go
2265 // See also https://research.swtch.com/glob
2266 mut px := 0
2267 mut nx := 0
2268 mut next_px := 0
2269 mut next_nx := 0
2270 plen := pattern.len
2271 nlen := name.len
2272 for px < plen || nx < nlen {
2273 if px < plen {
2274 c := pattern[px]
2275 match c {
2276 `?` {
2277 // single-character wildcard
2278 if nx < nlen {
2279 px++
2280 nx++
2281 continue
2282 }
2283 }
2284 `*` {
2285 // zero-or-more-character wildcard
2286 // Try to match at nx.
2287 // If that doesn't work out, restart at nx+1 next.
2288 next_px = px
2289 next_nx = nx + 1
2290 px++
2291 continue
2292 }
2293 `[` {
2294 if nx < nlen {
2295 wanted_c := name[nx]
2296 mut bstart := px
2297 mut is_inverted := false
2298 mut inner_match := false
2299 mut inner_idx := bstart + 1
2300 mut inner_c := 0
2301 if inner_idx < plen {
2302 inner_c = pattern[inner_idx]
2303 if inner_c == `^` {
2304 is_inverted = true
2305 inner_idx++
2306 }
2307 }
2308 for ; inner_idx < plen; inner_idx++ {
2309 inner_c = pattern[inner_idx]
2310 if inner_c == `]` {
2311 break
2312 }
2313 if inner_c == wanted_c {
2314 inner_match = true
2315 for px < plen && pattern[px] != `]` {
2316 px++
2317 }
2318 break
2319 }
2320 }
2321 if is_inverted {
2322 if inner_match {
2323 return false
2324 } else {
2325 px = inner_idx
2326 }
2327 }
2328 }
2329 px++
2330 nx++
2331 continue
2332 }
2333 else {
2334 // an ordinary character
2335 if nx < nlen && name[nx] == c {
2336 px++
2337 nx++
2338 continue
2339 }
2340 }
2341 }
2342 }
2343 if 0 < next_nx && next_nx <= nlen {
2344 // A mismatch, try restarting:
2345 px = next_px
2346 nx = next_nx
2347 continue
2348 }
2349 return false
2350 }
2351 // Matched all of `pattern` to all of `name`
2352 return true
2353}
2354
2355// is_ascii returns true if all characters belong to the US-ASCII set ([` `..`~`])
2356[inline]
2357pub fn (s string) is_ascii() bool {
2358 return !s.bytes().any(it < u8(` `) || it > u8(`~`))
2359}