1 | // Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved. |
2 | // Use of this source code is governed by an MIT license |
3 | // that can be found in the LICENSE file. |
4 | module builtin |
5 | |
6 | import strconv |
7 | |
8 | /* |
9 | Note: A V string should be/is immutable from the point of view of |
10 | V user programs after it is first created. A V string is |
11 | also slightly larger than the equivalent C string because |
12 | the V string also has an integer length attached. |
13 | |
14 | This tradeoff is made, since V strings are created just *once*, |
15 | but potentially used *many times* over their lifetime. |
16 | |
17 | The V string implementation uses a struct, that has a .str field, |
18 | which points to a C style 0 terminated memory block. Although not |
19 | strictly necessary from the V point of view, that additional 0 |
20 | is *very useful for C interoperability*. |
21 | |
22 | The V string implementation also has an integer .len field, |
23 | containing the length of the .str field, excluding the |
24 | terminating 0 (just like the C's strlen(s) would do). |
25 | |
26 | The 0 ending of .str, and the .len field, mean that in practice: |
27 | a) a V string s can be used very easily, wherever a |
28 | C string is needed, just by passing s.str, |
29 | without a need for further conversion/copying. |
30 | |
31 | b) where strlen(s) is needed, you can just pass s.len, |
32 | without having to constantly recompute the length of s |
33 | *over and over again* like some C programs do. This is because |
34 | V strings are immutable and so their length does not change. |
35 | |
36 | Ordinary V code *does not need* to be concerned with the |
37 | additional 0 in the .str field. The 0 *must* be put there by the |
38 | low level string creating functions inside this module. |
39 | |
40 | Failing to do this will lead to programs that work most of the |
41 | time, when used with pure V functions, but fail in strange ways, |
42 | when used with modules using C functions (for example os and so on). |
43 | */ |
44 | pub struct string { |
45 | pub: |
46 | str &u8 = 0 // points to a C style 0 terminated string of bytes. |
47 | len int // the length of the .str field, excluding the ending 0 byte. It is always equal to strlen(.str). |
48 | // NB string.is_lit is an enumeration of the following: |
49 | // .is_lit == 0 => a fresh string, should be freed by autofree |
50 | // .is_lit == 1 => a literal string from .rodata, should NOT be freed |
51 | // .is_lit == -98761234 => already freed string, protects against double frees. |
52 | // ---------> ^^^^^^^^^ calling free on these is a bug. |
53 | // Any other value means that the string has been corrupted. |
54 | mut: |
55 | is_lit int |
56 | } |
57 | |
58 | // runes returns an array of all the utf runes in the string `s` |
59 | // which is useful if you want random access to them |
60 | [direct_array_access] |
61 | pub fn (s string) runes() []rune { |
62 | mut runes := []rune{cap: s.len} |
63 | for i := 0; i < s.len; i++ { |
64 | char_len := utf8_char_len(unsafe { s.str[i] }) |
65 | if char_len > 1 { |
66 | end := if s.len - 1 >= i + char_len { i + char_len } else { s.len } |
67 | mut r := unsafe { s[i..end] } |
68 | runes << r.utf32_code() |
69 | i += char_len - 1 |
70 | } else { |
71 | runes << unsafe { s.str[i] } |
72 | } |
73 | } |
74 | return runes |
75 | } |
76 | |
77 | // cstring_to_vstring creates a new V string copy of the C style string, |
78 | // pointed by `s`. This function is most likely what you want to use when |
79 | // working with C style pointers to 0 terminated strings (i.e. `char*`). |
80 | // It is recomended to use it, unless you *do* understand the implications of |
81 | // tos/tos2/tos3/tos4/tos5 in terms of memory management and interactions with |
82 | // -autofree and `[manualfree]`. |
83 | // It will panic, if the pointer `s` is 0. |
84 | [unsafe] |
85 | pub fn cstring_to_vstring(s &char) string { |
86 | return unsafe { tos2(&u8(s)) }.clone() |
87 | } |
88 | |
89 | // tos_clone creates a new V string copy of the C style string, pointed by `s`. |
90 | // See also cstring_to_vstring (it is the same as it, the only difference is, |
91 | // that tos_clone expects `&byte`, while cstring_to_vstring expects &char). |
92 | // It will panic, if the pointer `s` is 0. |
93 | [unsafe] |
94 | pub fn tos_clone(s &u8) string { |
95 | return unsafe { tos2(s) }.clone() |
96 | } |
97 | |
98 | // tos creates a V string, given a C style pointer to a 0 terminated block. |
99 | // Note: the memory block pointed by s is *reused, not copied*! |
100 | // It will panic, when the pointer `s` is 0. |
101 | // See also `tos_clone`. |
102 | [unsafe] |
103 | pub fn tos(s &u8, len int) string { |
104 | if s == 0 { |
105 | panic('tos(): nil string') |
106 | } |
107 | return string{ |
108 | str: unsafe { s } |
109 | len: len |
110 | } |
111 | } |
112 | |
113 | // tos2 creates a V string, given a C style pointer to a 0 terminated block. |
114 | // Note: the memory block pointed by s is *reused, not copied*! |
115 | // It will calculate the length first, thus it is more costly than `tos`. |
116 | // It will panic, when the pointer `s` is 0. |
117 | // It is the same as `tos3`, but for &byte pointers, avoiding callsite casts. |
118 | // See also `tos_clone`. |
119 | [unsafe] |
120 | pub fn tos2(s &u8) string { |
121 | if s == 0 { |
122 | panic('tos2: nil string') |
123 | } |
124 | return string{ |
125 | str: unsafe { s } |
126 | len: unsafe { vstrlen(s) } |
127 | } |
128 | } |
129 | |
130 | // tos3 creates a V string, given a C style pointer to a 0 terminated block. |
131 | // Note: the memory block pointed by s is *reused, not copied*! |
132 | // It will calculate the length first, so it is more costly than tos. |
133 | // It will panic, when the pointer `s` is 0. |
134 | // It is the same as `tos2`, but for &char pointers, avoiding callsite casts. |
135 | // See also `tos_clone`. |
136 | [unsafe] |
137 | pub fn tos3(s &char) string { |
138 | if s == 0 { |
139 | panic('tos3: nil string') |
140 | } |
141 | return string{ |
142 | str: unsafe { &u8(s) } |
143 | len: unsafe { vstrlen_char(s) } |
144 | } |
145 | } |
146 | |
147 | // tos4 creates a V string, given a C style pointer to a 0 terminated block. |
148 | // Note: the memory block pointed by s is *reused, not copied*! |
149 | // It will calculate the length first, so it is more costly than tos. |
150 | // It returns '', when given a 0 pointer `s`, it does NOT panic. |
151 | // It is the same as `tos5`, but for &byte pointers, avoiding callsite casts. |
152 | // See also `tos_clone`. |
153 | [unsafe] |
154 | pub fn tos4(s &u8) string { |
155 | if s == 0 { |
156 | return '' |
157 | } |
158 | return string{ |
159 | str: unsafe { s } |
160 | len: unsafe { vstrlen(s) } |
161 | } |
162 | } |
163 | |
164 | // tos5 creates a V string, given a C style pointer to a 0 terminated block. |
165 | // Note: the memory block pointed by s is *reused, not copied*! |
166 | // It will calculate the length first, so it is more costly than tos. |
167 | // It returns '', when given a 0 pointer `s`, it does NOT panic. |
168 | // It is the same as `tos4`, but for &char pointers, avoiding callsite casts. |
169 | // See also `tos_clone`. |
170 | [unsafe] |
171 | pub fn tos5(s &char) string { |
172 | if s == 0 { |
173 | return '' |
174 | } |
175 | return string{ |
176 | str: unsafe { &u8(s) } |
177 | len: unsafe { vstrlen_char(s) } |
178 | } |
179 | } |
180 | |
181 | // vstring converts a C style string to a V string. |
182 | // Note: the memory block pointed by `bp` is *reused, not copied*! |
183 | // Note: instead of `&u8(arr.data).vstring()`, do use `tos_clone(&u8(arr.data))`. |
184 | // Strings returned from this function will be normal V strings beside that, |
185 | // (i.e. they would be freed by V's -autofree mechanism, when they are no longer used). |
186 | // See also `tos_clone`. |
187 | [unsafe] |
188 | pub fn (bp &u8) vstring() string { |
189 | return string{ |
190 | str: unsafe { bp } |
191 | len: unsafe { vstrlen(bp) } |
192 | } |
193 | } |
194 | |
195 | // vstring_with_len converts a C style 0 terminated string to a V string. |
196 | // Note: the memory block pointed by `bp` is *reused, not copied*! |
197 | // This method has lower overhead compared to .vstring(), since it |
198 | // does not need to calculate the length of the 0 terminated string. |
199 | // See also `tos_clone`. |
200 | [unsafe] |
201 | pub fn (bp &u8) vstring_with_len(len int) string { |
202 | return string{ |
203 | str: unsafe { bp } |
204 | len: len |
205 | is_lit: 0 |
206 | } |
207 | } |
208 | |
209 | // vstring converts a C style string to a V string. |
210 | // Note: the memory block pointed by `bp` is *reused, not copied*! |
211 | // Strings returned from this function will be normal V strings beside that, |
212 | // (i.e. they would be freed by V's -autofree mechanism, when they are |
213 | // no longer used). |
214 | // Note: instead of `&u8(a.data).vstring()`, use `tos_clone(&u8(a.data))`. |
215 | // See also `tos_clone`. |
216 | [unsafe] |
217 | pub fn (cp &char) vstring() string { |
218 | return string{ |
219 | str: &u8(cp) |
220 | len: unsafe { vstrlen_char(cp) } |
221 | is_lit: 0 |
222 | } |
223 | } |
224 | |
225 | // vstring_with_len converts a C style 0 terminated string to a V string. |
226 | // Note: the memory block pointed by `bp` is *reused, not copied*! |
227 | // This method has lower overhead compared to .vstring(), since it |
228 | // does not calculate the length of the 0 terminated string. |
229 | // See also `tos_clone`. |
230 | [unsafe] |
231 | pub fn (cp &char) vstring_with_len(len int) string { |
232 | return string{ |
233 | str: &u8(cp) |
234 | len: len |
235 | is_lit: 0 |
236 | } |
237 | } |
238 | |
239 | // vstring_literal converts a C style string to a V string. |
240 | // Note: the memory block pointed by `bp` is *reused, not copied*! |
241 | // NB2: unlike vstring, vstring_literal will mark the string |
242 | // as a literal, so it will not be freed by -autofree. |
243 | // This is suitable for readonly strings, C string literals etc, |
244 | // that can be read by the V program, but that should not be |
245 | // managed/freed by it, for example `os.args` is implemented using it. |
246 | // See also `tos_clone`. |
247 | [unsafe] |
248 | pub fn (bp &u8) vstring_literal() string { |
249 | return string{ |
250 | str: unsafe { bp } |
251 | len: unsafe { vstrlen(bp) } |
252 | is_lit: 1 |
253 | } |
254 | } |
255 | |
256 | // vstring_with_len converts a C style string to a V string. |
257 | // Note: the memory block pointed by `bp` is *reused, not copied*! |
258 | // This method has lower overhead compared to .vstring_literal(), since it |
259 | // does not need to calculate the length of the 0 terminated string. |
260 | // See also `tos_clone`. |
261 | [unsafe] |
262 | pub fn (bp &u8) vstring_literal_with_len(len int) string { |
263 | return string{ |
264 | str: unsafe { bp } |
265 | len: len |
266 | is_lit: 1 |
267 | } |
268 | } |
269 | |
270 | // vstring_literal converts a C style string char* pointer to a V string. |
271 | // Note: the memory block pointed by `bp` is *reused, not copied*! |
272 | // See also `byteptr.vstring_literal` for more details. |
273 | // See also `tos_clone`. |
274 | [unsafe] |
275 | pub fn (cp &char) vstring_literal() string { |
276 | return string{ |
277 | str: &u8(cp) |
278 | len: unsafe { vstrlen_char(cp) } |
279 | is_lit: 1 |
280 | } |
281 | } |
282 | |
283 | // vstring_literal_with_len converts a C style string char* pointer, |
284 | // to a V string. |
285 | // Note: the memory block pointed by `bp` is *reused, not copied*! |
286 | // This method has lower overhead compared to .vstring_literal(), since it |
287 | // does not need to calculate the length of the 0 terminated string. |
288 | // See also `tos_clone`. |
289 | [unsafe] |
290 | pub fn (cp &char) vstring_literal_with_len(len int) string { |
291 | return string{ |
292 | str: &u8(cp) |
293 | len: len |
294 | is_lit: 1 |
295 | } |
296 | } |
297 | |
298 | // len_utf8 returns the number of runes contained in the string `s`. |
299 | pub fn (s string) len_utf8() int { |
300 | mut l := 0 |
301 | mut i := 0 |
302 | for i < s.len { |
303 | l++ |
304 | i += ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1 |
305 | } |
306 | return l |
307 | } |
308 | |
309 | // clone_static returns an independent copy of a given array. |
310 | // It should be used only in -autofree generated code. |
311 | [inline] |
312 | fn (a string) clone_static() string { |
313 | return a.clone() |
314 | } |
315 | |
316 | // clone returns a copy of the V string `a`. |
317 | pub fn (a string) clone() string { |
318 | if a.len == 0 { |
319 | return '' |
320 | } |
321 | mut b := string{ |
322 | str: unsafe { malloc_noscan(a.len + 1) } |
323 | len: a.len |
324 | } |
325 | unsafe { |
326 | vmemcpy(b.str, a.str, a.len) |
327 | b.str[a.len] = 0 |
328 | } |
329 | return b |
330 | } |
331 | |
332 | // replace_once replaces the first occurence of `rep` with the string passed in `with`. |
333 | pub fn (s string) replace_once(rep string, with string) string { |
334 | idx := s.index_(rep) |
335 | if idx == -1 { |
336 | return s.clone() |
337 | } |
338 | return s.substr(0, idx) + with + s.substr(idx + rep.len, s.len) |
339 | } |
340 | |
341 | // replace replaces all occurences of `rep` with the string passed in `with`. |
342 | [direct_array_access] |
343 | pub fn (s string) replace(rep string, with string) string { |
344 | if s.len == 0 || rep.len == 0 || rep.len > s.len { |
345 | return s.clone() |
346 | } |
347 | if !s.contains(rep) { |
348 | return s.clone() |
349 | } |
350 | // TODO PERF Allocating ints is expensive. Should be a stack array |
351 | // Get locations of all reps within this string |
352 | mut idxs := []int{cap: s.len / rep.len} |
353 | defer { |
354 | unsafe { idxs.free() } |
355 | } |
356 | mut idx := 0 |
357 | for { |
358 | idx = s.index_after(rep, idx) |
359 | if idx == -1 { |
360 | break |
361 | } |
362 | idxs << idx |
363 | idx += rep.len |
364 | } |
365 | // Dont change the string if there's nothing to replace |
366 | if idxs.len == 0 { |
367 | return s.clone() |
368 | } |
369 | // Now we know the number of replacements we need to do and we can calc the len of the new string |
370 | new_len := s.len + idxs.len * (with.len - rep.len) |
371 | mut b := unsafe { malloc_noscan(new_len + 1) } // add space for the null byte at the end |
372 | // Fill the new string |
373 | mut b_i := 0 |
374 | mut s_idx := 0 |
375 | for _, rep_pos in idxs { |
376 | for i in s_idx .. rep_pos { // copy everything up to piece being replaced |
377 | unsafe { |
378 | b[b_i] = s[i] |
379 | } |
380 | b_i++ |
381 | } |
382 | s_idx = rep_pos + rep.len // move string index past replacement |
383 | for i in 0 .. with.len { // copy replacement piece |
384 | unsafe { |
385 | b[b_i] = with[i] |
386 | } |
387 | b_i++ |
388 | } |
389 | } |
390 | if s_idx < s.len { // if any original after last replacement, copy it |
391 | for i in s_idx .. s.len { |
392 | unsafe { |
393 | b[b_i] = s[i] |
394 | } |
395 | b_i++ |
396 | } |
397 | } |
398 | unsafe { |
399 | b[new_len] = 0 |
400 | return tos(b, new_len) |
401 | } |
402 | } |
403 | |
404 | struct RepIndex { |
405 | idx int |
406 | val_idx int |
407 | } |
408 | |
409 | // replace_each replaces all occurences of the string pairs given in `vals`. |
410 | // Example: assert 'ABCD'.replace_each(['B','C/','C','D','D','C']) == 'AC/DC' |
411 | [direct_array_access] |
412 | pub fn (s string) replace_each(vals []string) string { |
413 | if s.len == 0 || vals.len == 0 { |
414 | return s.clone() |
415 | } |
416 | if vals.len % 2 != 0 { |
417 | eprintln('string.replace_each(): odd number of strings') |
418 | return s.clone() |
419 | } |
420 | // `rep` - string to replace |
421 | // `with` - string to replace with |
422 | // Remember positions of all rep strings, and calculate the length |
423 | // of the new string to do just one allocation. |
424 | mut new_len := s.len |
425 | mut idxs := []RepIndex{cap: 6} |
426 | mut idx := 0 |
427 | s_ := s.clone() |
428 | for rep_i := 0; rep_i < vals.len; rep_i += 2 { |
429 | // vals: ['rep1, 'with1', 'rep2', 'with2'] |
430 | rep := vals[rep_i] |
431 | with := vals[rep_i + 1] |
432 | |
433 | for { |
434 | idx = s_.index_after(rep, idx) |
435 | if idx == -1 { |
436 | break |
437 | } |
438 | // The string already found is set to `/del`, to avoid duplicate searches. |
439 | for i in 0 .. rep.len { |
440 | unsafe { |
441 | s_.str[idx + i] = 127 |
442 | } |
443 | } |
444 | // We need to remember both the position in the string, |
445 | // and which rep/with pair it refers to. |
446 | |
447 | idxs << RepIndex{ |
448 | idx: idx |
449 | val_idx: rep_i |
450 | } |
451 | |
452 | idx += rep.len |
453 | new_len += with.len - rep.len |
454 | } |
455 | } |
456 | |
457 | // Dont change the string if there's nothing to replace |
458 | if idxs.len == 0 { |
459 | return s.clone() |
460 | } |
461 | idxs.sort(a.idx < b.idx) |
462 | mut b := unsafe { malloc_noscan(new_len + 1) } // add space for 0 terminator |
463 | // Fill the new string |
464 | mut idx_pos := 0 |
465 | mut cur_idx := idxs[idx_pos] |
466 | mut b_i := 0 |
467 | for i := 0; i < s.len; i++ { |
468 | if i == cur_idx.idx { |
469 | // Reached the location of rep, replace it with "with" |
470 | rep := vals[cur_idx.val_idx] |
471 | with := vals[cur_idx.val_idx + 1] |
472 | for j in 0 .. with.len { |
473 | unsafe { |
474 | b[b_i] = with[j] |
475 | } |
476 | b_i++ |
477 | } |
478 | // Skip the length of rep, since we just replaced it with "with" |
479 | i += rep.len - 1 |
480 | // Go to the next index |
481 | idx_pos++ |
482 | if idx_pos < idxs.len { |
483 | cur_idx = idxs[idx_pos] |
484 | } |
485 | } else { |
486 | // Rep doesnt start here, just copy |
487 | unsafe { |
488 | b[b_i] = s.str[i] |
489 | } |
490 | b_i++ |
491 | } |
492 | } |
493 | unsafe { |
494 | b[new_len] = 0 |
495 | return tos(b, new_len) |
496 | } |
497 | } |
498 | |
499 | // replace_char replaces all occurences of the character `rep` multiple occurences of the character passed in `with` with respect to `repeat`. |
500 | // Example: assert '\tHello!'.replace_char(`\t`,` `,8) == ' Hello!' |
501 | [direct_array_access] |
502 | pub fn (s string) replace_char(rep u8, with u8, repeat int) string { |
503 | $if !no_bounds_checking { |
504 | if repeat <= 0 { |
505 | panic('string.replace_char(): tab length too short') |
506 | } |
507 | } |
508 | if s.len == 0 { |
509 | return s.clone() |
510 | } |
511 | // TODO Allocating ints is expensive. Should be a stack array |
512 | // - string.replace() |
513 | mut idxs := []int{cap: s.len} |
514 | defer { |
515 | unsafe { idxs.free() } |
516 | } |
517 | // No need to do a contains(), it already traverses the entire string |
518 | for i, ch in s { |
519 | if ch == rep { // Found char? Mark its location |
520 | idxs << i |
521 | } |
522 | } |
523 | if idxs.len == 0 { |
524 | return s.clone() |
525 | } |
526 | // Now we know the number of replacements we need to do and we can calc the len of the new string |
527 | new_len := s.len + idxs.len * (repeat - 1) |
528 | mut b := unsafe { malloc_noscan(new_len + 1) } // add space for the null byte at the end |
529 | // Fill the new string |
530 | mut b_i := 0 |
531 | mut s_idx := 0 |
532 | for rep_pos in idxs { |
533 | for i in s_idx .. rep_pos { // copy everything up to piece being replaced |
534 | unsafe { |
535 | b[b_i] = s[i] |
536 | } |
537 | b_i++ |
538 | } |
539 | s_idx = rep_pos + 1 // move string index past replacement |
540 | for _ in 0 .. repeat { // copy replacement piece |
541 | unsafe { |
542 | b[b_i] = with |
543 | } |
544 | b_i++ |
545 | } |
546 | } |
547 | if s_idx < s.len { // if any original after last replacement, copy it |
548 | for i in s_idx .. s.len { |
549 | unsafe { |
550 | b[b_i] = s[i] |
551 | } |
552 | b_i++ |
553 | } |
554 | } |
555 | unsafe { |
556 | b[new_len] = 0 |
557 | return tos(b, new_len) |
558 | } |
559 | } |
560 | |
561 | // normalize_tabs replaces all tab characters with `tab_len` amount of spaces |
562 | // Example: assert '\t\tpop rax\t; pop rax'.normalize_tabs(2) == ' pop rax ; pop rax' |
563 | [inline] |
564 | pub fn (s string) normalize_tabs(tab_len int) string { |
565 | return s.replace_char(`\t`, ` `, tab_len) |
566 | } |
567 | |
568 | // bool returns `true` if the string equals the word "true" it will return `false` otherwise. |
569 | [inline] |
570 | pub fn (s string) bool() bool { |
571 | return s == 'true' || s == 't' // TODO t for pg, remove |
572 | } |
573 | |
574 | // int returns the value of the string as an integer `'1'.int() == 1`. |
575 | [inline] |
576 | pub fn (s string) int() int { |
577 | return int(strconv.common_parse_int(s, 0, 32, false, false) or { 0 }) |
578 | } |
579 | |
580 | // i64 returns the value of the string as i64 `'1'.i64() == i64(1)`. |
581 | [inline] |
582 | pub fn (s string) i64() i64 { |
583 | return strconv.common_parse_int(s, 0, 64, false, false) or { 0 } |
584 | } |
585 | |
586 | // i8 returns the value of the string as i8 `'1'.i8() == i8(1)`. |
587 | [inline] |
588 | pub fn (s string) i8() i8 { |
589 | return i8(strconv.common_parse_int(s, 0, 8, false, false) or { 0 }) |
590 | } |
591 | |
592 | // i16 returns the value of the string as i16 `'1'.i16() == i16(1)`. |
593 | [inline] |
594 | pub fn (s string) i16() i16 { |
595 | return i16(strconv.common_parse_int(s, 0, 16, false, false) or { 0 }) |
596 | } |
597 | |
598 | // f32 returns the value of the string as f32 `'1.0'.f32() == f32(1)`. |
599 | [inline] |
600 | pub fn (s string) f32() f32 { |
601 | return f32(strconv.atof64(s) or { 0 }) |
602 | } |
603 | |
604 | // f64 returns the value of the string as f64 `'1.0'.f64() == f64(1)`. |
605 | [inline] |
606 | pub fn (s string) f64() f64 { |
607 | return strconv.atof64(s) or { 0 } |
608 | } |
609 | |
610 | // u8 returns the value of the string as u8 `'1'.u8() == u8(1)`. |
611 | [inline] |
612 | pub fn (s string) u8() u8 { |
613 | return u8(strconv.common_parse_uint(s, 0, 8, false, false) or { 0 }) |
614 | } |
615 | |
616 | // u16 returns the value of the string as u16 `'1'.u16() == u16(1)`. |
617 | [inline] |
618 | pub fn (s string) u16() u16 { |
619 | return u16(strconv.common_parse_uint(s, 0, 16, false, false) or { 0 }) |
620 | } |
621 | |
622 | // u32 returns the value of the string as u32 `'1'.u32() == u32(1)`. |
623 | [inline] |
624 | pub fn (s string) u32() u32 { |
625 | return u32(strconv.common_parse_uint(s, 0, 32, false, false) or { 0 }) |
626 | } |
627 | |
628 | // u64 returns the value of the string as u64 `'1'.u64() == u64(1)`. |
629 | [inline] |
630 | pub fn (s string) u64() u64 { |
631 | return strconv.common_parse_uint(s, 0, 64, false, false) or { 0 } |
632 | } |
633 | |
634 | // parse_uint is like `parse_int` but for unsigned numbers |
635 | // |
636 | // This method directly exposes the `parse_uint` function from `strconv` |
637 | // as a method on `string`. For more advanced features, |
638 | // consider calling `strconv.common_parse_uint` directly. |
639 | [inline] |
640 | pub fn (s string) parse_uint(_base int, _bit_size int) !u64 { |
641 | return strconv.parse_uint(s, _base, _bit_size) |
642 | } |
643 | |
644 | // parse_int interprets a string s in the given base (0, 2 to 36) and |
645 | // bit size (0 to 64) and returns the corresponding value i. |
646 | // |
647 | // If the base argument is 0, the true base is implied by the string's |
648 | // prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise. |
649 | // Also, for argument base 0 only, underscore characters are permitted |
650 | // as defined by the Go syntax for integer literals. |
651 | // |
652 | // The bitSize argument specifies the integer type |
653 | // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 |
654 | // correspond to int, int8, int16, int32, and int64. |
655 | // If bitSize is below 0 or above 64, an error is returned. |
656 | // |
657 | // This method directly exposes the `parse_int` function from `strconv` |
658 | // as a method on `string`. For more advanced features, |
659 | // consider calling `strconv.common_parse_int` directly. |
660 | [inline] |
661 | pub fn (s string) parse_int(_base int, _bit_size int) !i64 { |
662 | return strconv.parse_int(s, _base, _bit_size) |
663 | } |
664 | |
665 | [direct_array_access] |
666 | fn (s string) == (a string) bool { |
667 | if s.str == 0 { |
668 | // should never happen |
669 | panic('string.eq(): nil string') |
670 | } |
671 | if s.len != a.len { |
672 | return false |
673 | } |
674 | if s.len > 0 { |
675 | last_idx := s.len - 1 |
676 | if s[last_idx] != a[last_idx] { |
677 | return false |
678 | } |
679 | } |
680 | unsafe { |
681 | return vmemcmp(s.str, a.str, a.len) == 0 |
682 | } |
683 | } |
684 | |
685 | // compare returns -1 if `s` < `a`, 0 if `s` == `a`, and 1 if `s` > `a` |
686 | [direct_array_access] |
687 | pub fn (s string) compare(a string) int { |
688 | min_len := if s.len < a.len { s.len } else { a.len } |
689 | for i in 0 .. min_len { |
690 | if s[i] < a[i] { |
691 | return -1 |
692 | } |
693 | if s[i] > a[i] { |
694 | return 1 |
695 | } |
696 | } |
697 | if s.len < a.len { |
698 | return -1 |
699 | } |
700 | if s.len > a.len { |
701 | return 1 |
702 | } |
703 | return 0 |
704 | } |
705 | |
706 | [direct_array_access] |
707 | fn (s string) < (a string) bool { |
708 | for i in 0 .. s.len { |
709 | if i >= a.len || s[i] > a[i] { |
710 | return false |
711 | } else if s[i] < a[i] { |
712 | return true |
713 | } |
714 | } |
715 | if s.len < a.len { |
716 | return true |
717 | } |
718 | return false |
719 | } |
720 | |
721 | [direct_array_access] |
722 | fn (s string) + (a string) string { |
723 | new_len := a.len + s.len |
724 | mut res := string{ |
725 | str: unsafe { malloc_noscan(new_len + 1) } |
726 | len: new_len |
727 | } |
728 | unsafe { |
729 | vmemcpy(res.str, s.str, s.len) |
730 | vmemcpy(res.str + s.len, a.str, a.len) |
731 | } |
732 | unsafe { |
733 | res.str[new_len] = 0 // V strings are not null terminated, but just in case |
734 | } |
735 | return res |
736 | } |
737 | |
738 | // split_any splits the string to an array by any of the `delim` chars. |
739 | // Example: "first row\nsecond row".split_any(" \n") == ['first', 'row', 'second', 'row'] |
740 | // Split a string using the chars in the delimiter string as delimiters chars. |
741 | // If the delimiter string is empty then `.split()` is used. |
742 | [direct_array_access] |
743 | pub fn (s string) split_any(delim string) []string { |
744 | mut res := []string{} |
745 | mut i := 0 |
746 | // check empty source string |
747 | if s.len > 0 { |
748 | // if empty delimiter string using defautl split |
749 | if delim.len <= 0 { |
750 | return s.split('') |
751 | } |
752 | for index, ch in s { |
753 | for delim_ch in delim { |
754 | if ch == delim_ch { |
755 | res << s[i..index] |
756 | i = index + 1 |
757 | break |
758 | } |
759 | } |
760 | } |
761 | if i < s.len { |
762 | res << s[i..] |
763 | } |
764 | } |
765 | return res |
766 | } |
767 | |
768 | // rsplit_any splits the string to an array by any of the `delim` chars in reverse order. |
769 | // Example: "first row\nsecond row".rsplit_any(" \n") == ['row', 'second', 'row', 'first'] |
770 | // Split a string using the chars in the delimiter string as delimiters chars. |
771 | // If the delimiter string is empty then `.rsplit()` is used. |
772 | [direct_array_access] |
773 | pub fn (s string) rsplit_any(delim string) []string { |
774 | mut res := []string{} |
775 | mut i := s.len - 1 |
776 | if s.len > 0 { |
777 | if delim.len <= 0 { |
778 | return s.rsplit('') |
779 | } |
780 | mut rbound := s.len |
781 | for i >= 0 { |
782 | for delim_ch in delim { |
783 | if s[i] == delim_ch { |
784 | res << s[i + 1..rbound] |
785 | rbound = i |
786 | break |
787 | } |
788 | } |
789 | i-- |
790 | } |
791 | if rbound > 0 { |
792 | res << s[..rbound] |
793 | } |
794 | } |
795 | return res |
796 | } |
797 | |
798 | // split splits the string to an array by `delim`. |
799 | // Example: assert 'A B C'.split(' ') == ['A','B','C'] |
800 | // If `delim` is empty the string is split by it's characters. |
801 | // Example: assert 'DEF'.split('') == ['D','E','F'] |
802 | [inline] |
803 | pub fn (s string) split(delim string) []string { |
804 | return s.split_nth(delim, 0) |
805 | } |
806 | |
807 | // rsplit splits the string to an array by `delim` in reverse order. |
808 | // Example: assert 'A B C'.rsplit(' ') == ['C','B','A'] |
809 | // If `delim` is empty the string is split by it's characters. |
810 | // Example: assert 'DEF'.rsplit('') == ['F','E','D'] |
811 | [inline] |
812 | pub fn (s string) rsplit(delim string) []string { |
813 | return s.rsplit_nth(delim, 0) |
814 | } |
815 | |
816 | // split_once devides string into pair of string by `delim`. |
817 | // Example: |
818 | // ```v |
819 | // path, ext := 'file.ts.dts'.splice_once('.')? |
820 | // assert path == 'file' |
821 | // assert ext == 'ts.dts' |
822 | // ``` |
823 | // Note that rsplit_once returns splitted string string as first part of pair, |
824 | // and returns remaining as second part of pair. |
825 | pub fn (s string) split_once(delim string) ?(string, string) { |
826 | result := s.split_nth(delim, 2) |
827 | |
828 | if result.len != 2 { |
829 | return none |
830 | } |
831 | |
832 | return result[0], result[1] |
833 | } |
834 | |
835 | // rsplit_once devides string into pair of string by `delim`. |
836 | // Example: |
837 | // ```v |
838 | // path, ext := 'file.ts.dts'.splice_once('.')? |
839 | // assert path == 'file.ts' |
840 | // assert ext == 'dts' |
841 | // ``` |
842 | // Note that rsplit_once returns remaining string as first part of pair, |
843 | // and returns splitted string as second part of pair. |
844 | pub fn (s string) rsplit_once(delim string) ?(string, string) { |
845 | result := s.rsplit_nth(delim, 2) |
846 | |
847 | if result.len != 2 { |
848 | return none |
849 | } |
850 | |
851 | return result[1], result[0] |
852 | } |
853 | |
854 | // split_nth splits the string based on the passed `delim` substring. |
855 | // It returns the first Nth parts. When N=0, return all the splits. |
856 | // The last returned element has the remainder of the string, even if |
857 | // the remainder contains more `delim` substrings. |
858 | [direct_array_access] |
859 | pub fn (s string) split_nth(delim string, nth int) []string { |
860 | mut res := []string{} |
861 | mut i := 0 |
862 | |
863 | match delim.len { |
864 | 0 { |
865 | i = 1 |
866 | for ch in s { |
867 | if nth > 0 && i >= nth { |
868 | res << s[i - 1..] |
869 | break |
870 | } |
871 | res << ch.ascii_str() |
872 | i++ |
873 | } |
874 | return res |
875 | } |
876 | 1 { |
877 | mut start := 0 |
878 | delim_byte := delim[0] |
879 | |
880 | for i < s.len { |
881 | if s[i] == delim_byte { |
882 | was_last := nth > 0 && res.len == nth - 1 |
883 | if was_last { |
884 | break |
885 | } |
886 | val := s.substr(start, i) |
887 | res << val |
888 | start = i + delim.len |
889 | i = start |
890 | } else { |
891 | i++ |
892 | } |
893 | } |
894 | |
895 | // Then the remaining right part of the string |
896 | if nth < 1 || res.len < nth { |
897 | res << s[start..] |
898 | } |
899 | return res |
900 | } |
901 | else { |
902 | mut start := 0 |
903 | // Take the left part for each delimiter occurence |
904 | for i <= s.len { |
905 | is_delim := i + delim.len <= s.len && s.substr(i, i + delim.len) == delim |
906 | if is_delim { |
907 | was_last := nth > 0 && res.len == nth - 1 |
908 | if was_last { |
909 | break |
910 | } |
911 | val := s.substr(start, i) |
912 | res << val |
913 | start = i + delim.len |
914 | i = start |
915 | } else { |
916 | i++ |
917 | } |
918 | } |
919 | // Then the remaining right part of the string |
920 | if nth < 1 || res.len < nth { |
921 | res << s[start..] |
922 | } |
923 | return res |
924 | } |
925 | } |
926 | } |
927 | |
928 | // rsplit_nth splits the string based on the passed `delim` substring in revese order. |
929 | // It returns the first Nth parts. When N=0, return all the splits. |
930 | // The last returned element has the remainder of the string, even if |
931 | // the remainder contains more `delim` substrings. |
932 | [direct_array_access] |
933 | pub fn (s string) rsplit_nth(delim string, nth int) []string { |
934 | mut res := []string{} |
935 | mut i := s.len - 1 |
936 | |
937 | match delim.len { |
938 | 0 { |
939 | for i >= 0 { |
940 | if nth > 0 && res.len == nth - 1 { |
941 | res << s[..i + 1] |
942 | break |
943 | } |
944 | res << s[i].ascii_str() |
945 | i-- |
946 | } |
947 | return res |
948 | } |
949 | 1 { |
950 | mut rbound := s.len |
951 | delim_byte := delim[0] |
952 | |
953 | for i >= 0 { |
954 | if s[i] == delim_byte { |
955 | if nth > 0 && res.len == nth - 1 { |
956 | break |
957 | } |
958 | res << s[i + 1..rbound] |
959 | rbound = i |
960 | i-- |
961 | } else { |
962 | i-- |
963 | } |
964 | } |
965 | |
966 | if nth < 1 || res.len < nth { |
967 | res << s[..rbound] |
968 | } |
969 | return res |
970 | } |
971 | else { |
972 | mut rbound := s.len |
973 | |
974 | for i >= 0 { |
975 | is_delim := i - delim.len >= 0 && s[i - delim.len..i] == delim |
976 | if is_delim { |
977 | if nth > 0 && res.len == nth - 1 { |
978 | break |
979 | } |
980 | res << s[i..rbound] |
981 | rbound = i - delim.len |
982 | i -= delim.len |
983 | } else { |
984 | i-- |
985 | } |
986 | } |
987 | |
988 | if nth < 1 || res.len < nth { |
989 | res << s[..rbound] |
990 | } |
991 | return res |
992 | } |
993 | } |
994 | } |
995 | |
996 | // split_into_lines splits the string by newline characters. |
997 | // newlines are stripped. |
998 | // `\r` (MacOS), `\n` (POSIX), and `\r\n` (WinOS) line endings are all supported (including mixed line endings). |
999 | // NOTE: algorithm is "greedy", consuming '\r\n' as a single line ending with higher priority than '\r' and '\n' as multiple endings |
1000 | [direct_array_access] |
1001 | pub fn (s string) split_into_lines() []string { |
1002 | mut res := []string{} |
1003 | if s.len == 0 { |
1004 | return res |
1005 | } |
1006 | cr := `\r` |
1007 | lf := `\n` |
1008 | mut line_start := 0 |
1009 | for i := 0; i < s.len; i++ { |
1010 | if line_start <= i { |
1011 | if s[i] == lf { |
1012 | res << if line_start == i { '' } else { s[line_start..i] } |
1013 | line_start = i + 1 |
1014 | } else if s[i] == cr { |
1015 | res << if line_start == i { '' } else { s[line_start..i] } |
1016 | if (i + 1) < s.len && s[i + 1] == lf { |
1017 | line_start = i + 2 |
1018 | } else { |
1019 | line_start = i + 1 |
1020 | } |
1021 | } |
1022 | } |
1023 | } |
1024 | if line_start < s.len { |
1025 | res << s[line_start..] |
1026 | } |
1027 | return res |
1028 | } |
1029 | |
1030 | // used internally for [2..4] |
1031 | [inline] |
1032 | fn (s string) substr2(start int, _end int, end_max bool) string { |
1033 | end := if end_max { s.len } else { _end } |
1034 | return s.substr(start, end) |
1035 | } |
1036 | |
1037 | // substr returns the string between index positions `start` and `end`. |
1038 | // Example: assert 'ABCD'.substr(1,3) == 'BC' |
1039 | [direct_array_access] |
1040 | pub fn (s string) substr(start int, end int) string { |
1041 | $if !no_bounds_checking { |
1042 | if start > end || start > s.len || end > s.len || start < 0 || end < 0 { |
1043 | panic('substr(${start}, ${end}) out of bounds (len=${s.len})') |
1044 | } |
1045 | } |
1046 | len := end - start |
1047 | if len == s.len { |
1048 | return s.clone() |
1049 | } |
1050 | mut res := string{ |
1051 | str: unsafe { malloc_noscan(len + 1) } |
1052 | len: len |
1053 | } |
1054 | unsafe { |
1055 | vmemcpy(res.str, s.str + start, len) |
1056 | res.str[len] = 0 |
1057 | } |
1058 | return res |
1059 | } |
1060 | |
1061 | // version of `substr()` that is used in `a[start..end] or {` |
1062 | // return an error when the index is out of range |
1063 | [direct_array_access] |
1064 | pub fn (s string) substr_with_check(start int, end int) !string { |
1065 | if start > end || start > s.len || end > s.len || start < 0 || end < 0 { |
1066 | return error('substr(${start}, ${end}) out of bounds (len=${s.len})') |
1067 | } |
1068 | len := end - start |
1069 | if len == s.len { |
1070 | return s.clone() |
1071 | } |
1072 | mut res := string{ |
1073 | str: unsafe { malloc_noscan(len + 1) } |
1074 | len: len |
1075 | } |
1076 | unsafe { |
1077 | vmemcpy(res.str, s.str + start, len) |
1078 | res.str[len] = 0 |
1079 | } |
1080 | return res |
1081 | } |
1082 | |
1083 | // substr_ni returns the string between index positions `start` and `end` allowing negative indexes |
1084 | // This function always return a valid string. |
1085 | [direct_array_access] |
1086 | pub fn (s string) substr_ni(_start int, _end int) string { |
1087 | mut start := _start |
1088 | mut end := _end |
1089 | |
1090 | // borders math |
1091 | if start < 0 { |
1092 | start = s.len + start |
1093 | if start < 0 { |
1094 | start = 0 |
1095 | } |
1096 | } |
1097 | |
1098 | if end < 0 { |
1099 | end = s.len + end |
1100 | if end < 0 { |
1101 | end = 0 |
1102 | } |
1103 | } |
1104 | if end >= s.len { |
1105 | end = s.len |
1106 | } |
1107 | |
1108 | if start > s.len || end < start { |
1109 | return '' |
1110 | } |
1111 | |
1112 | len := end - start |
1113 | |
1114 | // string copy |
1115 | mut res := string{ |
1116 | str: unsafe { malloc_noscan(len + 1) } |
1117 | len: len |
1118 | } |
1119 | unsafe { |
1120 | vmemcpy(res.str, s.str + start, len) |
1121 | res.str[len] = 0 |
1122 | } |
1123 | return res |
1124 | } |
1125 | |
1126 | // index returns the position of the first character of the input string. |
1127 | // It will return `-1` if the input string can't be found. |
1128 | [direct_array_access] |
1129 | fn (s string) index_(p string) int { |
1130 | if p.len > s.len || p.len == 0 { |
1131 | return -1 |
1132 | } |
1133 | if p.len > 2 { |
1134 | return s.index_kmp(p) |
1135 | } |
1136 | mut i := 0 |
1137 | for i < s.len { |
1138 | mut j := 0 |
1139 | for j < p.len && unsafe { s.str[i + j] == p.str[j] } { |
1140 | j++ |
1141 | } |
1142 | if j == p.len { |
1143 | return i |
1144 | } |
1145 | i++ |
1146 | } |
1147 | return -1 |
1148 | } |
1149 | |
1150 | // index returns the position of the first character of the input string. |
1151 | // It will return `none` if the input string can't be found. |
1152 | pub fn (s string) index(p string) ?int { |
1153 | idx := s.index_(p) |
1154 | if idx == -1 { |
1155 | return none |
1156 | } |
1157 | return idx |
1158 | } |
1159 | |
1160 | // index_kmp does KMP search. |
1161 | [direct_array_access; manualfree] |
1162 | fn (s string) index_kmp(p string) int { |
1163 | if p.len > s.len { |
1164 | return -1 |
1165 | } |
1166 | mut prefix := []int{len: p.len} |
1167 | defer { |
1168 | unsafe { prefix.free() } |
1169 | } |
1170 | mut j := 0 |
1171 | for i := 1; i < p.len; i++ { |
1172 | for unsafe { p.str[j] != p.str[i] } && j > 0 { |
1173 | j = prefix[j - 1] |
1174 | } |
1175 | if unsafe { p.str[j] == p.str[i] } { |
1176 | j++ |
1177 | } |
1178 | prefix[i] = j |
1179 | } |
1180 | j = 0 |
1181 | for i in 0 .. s.len { |
1182 | for unsafe { p.str[j] != s.str[i] } && j > 0 { |
1183 | j = prefix[j - 1] |
1184 | } |
1185 | if unsafe { p.str[j] == s.str[i] } { |
1186 | j++ |
1187 | } |
1188 | if j == p.len { |
1189 | return i - p.len + 1 |
1190 | } |
1191 | } |
1192 | return -1 |
1193 | } |
1194 | |
1195 | // index_any returns the position of any of the characters in the input string - if found. |
1196 | pub fn (s string) index_any(chars string) int { |
1197 | for i, ss in s { |
1198 | for c in chars { |
1199 | if c == ss { |
1200 | return i |
1201 | } |
1202 | } |
1203 | } |
1204 | return -1 |
1205 | } |
1206 | |
1207 | // last_index returns the position of the last occurence of the input string. |
1208 | [direct_array_access] |
1209 | fn (s string) last_index_(p string) int { |
1210 | if p.len > s.len || p.len == 0 { |
1211 | return -1 |
1212 | } |
1213 | mut i := s.len - p.len |
1214 | for i >= 0 { |
1215 | mut j := 0 |
1216 | for j < p.len && unsafe { s.str[i + j] == p.str[j] } { |
1217 | j++ |
1218 | } |
1219 | if j == p.len { |
1220 | return i |
1221 | } |
1222 | i-- |
1223 | } |
1224 | return -1 |
1225 | } |
1226 | |
1227 | // last_index returns the position of the last occurence of the input string. |
1228 | pub fn (s string) last_index(p string) ?int { |
1229 | idx := s.last_index_(p) |
1230 | if idx == -1 { |
1231 | return none |
1232 | } |
1233 | return idx |
1234 | } |
1235 | |
1236 | // index_after returns the position of the input string, starting search from `start` position. |
1237 | [direct_array_access] |
1238 | pub fn (s string) index_after(p string, start int) int { |
1239 | if p.len > s.len { |
1240 | return -1 |
1241 | } |
1242 | mut strt := start |
1243 | if start < 0 { |
1244 | strt = 0 |
1245 | } |
1246 | if start >= s.len { |
1247 | return -1 |
1248 | } |
1249 | mut i := strt |
1250 | for i < s.len { |
1251 | mut j := 0 |
1252 | mut ii := i |
1253 | for j < p.len && unsafe { s.str[ii] == p.str[j] } { |
1254 | j++ |
1255 | ii++ |
1256 | } |
1257 | if j == p.len { |
1258 | return i |
1259 | } |
1260 | i++ |
1261 | } |
1262 | return -1 |
1263 | } |
1264 | |
1265 | // index_u8 returns the index of byte `c` if found in the string. |
1266 | // index_u8 returns -1 if the byte can not be found. |
1267 | [direct_array_access] |
1268 | pub fn (s string) index_u8(c u8) int { |
1269 | for i, b in s { |
1270 | if b == c { |
1271 | return i |
1272 | } |
1273 | } |
1274 | return -1 |
1275 | } |
1276 | |
1277 | // last_index_byte returns the index of the last occurence of byte `c` if found in the string. |
1278 | // last_index_byte returns -1 if the byte is not found. |
1279 | [direct_array_access] |
1280 | pub fn (s string) last_index_u8(c u8) int { |
1281 | for i := s.len - 1; i >= 0; i-- { |
1282 | if unsafe { s.str[i] == c } { |
1283 | return i |
1284 | } |
1285 | } |
1286 | return -1 |
1287 | } |
1288 | |
1289 | // count returns the number of occurrences of `substr` in the string. |
1290 | // count returns -1 if no `substr` could be found. |
1291 | [direct_array_access] |
1292 | pub fn (s string) count(substr string) int { |
1293 | if s.len == 0 || substr.len == 0 { |
1294 | return 0 |
1295 | } |
1296 | if substr.len > s.len { |
1297 | return 0 |
1298 | } |
1299 | |
1300 | mut n := 0 |
1301 | |
1302 | if substr.len == 1 { |
1303 | target := substr[0] |
1304 | |
1305 | for letter in s { |
1306 | if letter == target { |
1307 | n++ |
1308 | } |
1309 | } |
1310 | |
1311 | return n |
1312 | } |
1313 | |
1314 | mut i := 0 |
1315 | for { |
1316 | i = s.index_after(substr, i) |
1317 | if i == -1 { |
1318 | return n |
1319 | } |
1320 | i += substr.len |
1321 | n++ |
1322 | } |
1323 | return 0 // TODO can never get here - v doesn't know that |
1324 | } |
1325 | |
1326 | // contains_u8 returns `true` if the string contains the byte value `x`. |
1327 | // See also: [`string.index_u8`](#string.index_u8) , to get the index of the byte as well. |
1328 | pub fn (s string) contains_u8(x u8) bool { |
1329 | for c in s { |
1330 | if x == c { |
1331 | return true |
1332 | } |
1333 | } |
1334 | return false |
1335 | } |
1336 | |
1337 | // contains returns `true` if the string contains `substr`. |
1338 | // See also: [`string.index`](#string.index) |
1339 | pub fn (s string) contains(substr string) bool { |
1340 | if substr.len == 0 { |
1341 | return true |
1342 | } |
1343 | if substr.len == 1 { |
1344 | return s.contains_u8(unsafe { substr.str[0] }) |
1345 | } |
1346 | return s.index_(substr) != -1 |
1347 | } |
1348 | |
1349 | // contains_any returns `true` if the string contains any chars in `chars`. |
1350 | pub fn (s string) contains_any(chars string) bool { |
1351 | for c in chars { |
1352 | if s.contains_u8(c) { |
1353 | return true |
1354 | } |
1355 | } |
1356 | return false |
1357 | } |
1358 | |
1359 | // contains_only returns `true`, if the string contains only the characters in `chars`. |
1360 | pub fn (s string) contains_only(chars string) bool { |
1361 | if chars.len == 0 { |
1362 | return false |
1363 | } |
1364 | for ch in s { |
1365 | mut res := 0 |
1366 | for i := 0; i < chars.len && res == 0; i++ { |
1367 | res += int(ch == unsafe { chars.str[i] }) |
1368 | } |
1369 | if res == 0 { |
1370 | return false |
1371 | } |
1372 | } |
1373 | return true |
1374 | } |
1375 | |
1376 | // contains_any_substr returns `true` if the string contains any of the strings in `substrs`. |
1377 | pub fn (s string) contains_any_substr(substrs []string) bool { |
1378 | if substrs.len == 0 { |
1379 | return true |
1380 | } |
1381 | for sub in substrs { |
1382 | if s.contains(sub) { |
1383 | return true |
1384 | } |
1385 | } |
1386 | return false |
1387 | } |
1388 | |
1389 | // starts_with returns `true` if the string starts with `p`. |
1390 | [direct_array_access] |
1391 | pub fn (s string) starts_with(p string) bool { |
1392 | if p.len > s.len { |
1393 | return false |
1394 | } |
1395 | for i in 0 .. p.len { |
1396 | if unsafe { s.str[i] != p.str[i] } { |
1397 | return false |
1398 | } |
1399 | } |
1400 | return true |
1401 | } |
1402 | |
1403 | // ends_with returns `true` if the string ends with `p`. |
1404 | [direct_array_access] |
1405 | pub fn (s string) ends_with(p string) bool { |
1406 | if p.len > s.len { |
1407 | return false |
1408 | } |
1409 | for i in 0 .. p.len { |
1410 | if unsafe { p.str[i] != s.str[s.len - p.len + i] } { |
1411 | return false |
1412 | } |
1413 | } |
1414 | return true |
1415 | } |
1416 | |
1417 | // to_lower returns the string in all lowercase characters. |
1418 | // TODO only works with ASCII |
1419 | [direct_array_access] |
1420 | pub fn (s string) to_lower() string { |
1421 | unsafe { |
1422 | mut b := malloc_noscan(s.len + 1) |
1423 | for i in 0 .. s.len { |
1424 | if s.str[i] >= `A` && s.str[i] <= `Z` { |
1425 | b[i] = s.str[i] + 32 |
1426 | } else { |
1427 | b[i] = s.str[i] |
1428 | } |
1429 | } |
1430 | b[s.len] = 0 |
1431 | return tos(b, s.len) |
1432 | } |
1433 | } |
1434 | |
1435 | // is_lower returns `true` if all characters in the string is lowercase. |
1436 | // Example: assert 'hello developer'.is_lower() == true |
1437 | [direct_array_access] |
1438 | pub fn (s string) is_lower() bool { |
1439 | for i in 0 .. s.len { |
1440 | if s[i] >= `A` && s[i] <= `Z` { |
1441 | return false |
1442 | } |
1443 | } |
1444 | return true |
1445 | } |
1446 | |
1447 | // to_upper returns the string in all uppercase characters. |
1448 | // Example: assert 'Hello V'.to_upper() == 'HELLO V' |
1449 | [direct_array_access] |
1450 | pub fn (s string) to_upper() string { |
1451 | unsafe { |
1452 | mut b := malloc_noscan(s.len + 1) |
1453 | for i in 0 .. s.len { |
1454 | if s.str[i] >= `a` && s.str[i] <= `z` { |
1455 | b[i] = s.str[i] - 32 |
1456 | } else { |
1457 | b[i] = s.str[i] |
1458 | } |
1459 | } |
1460 | b[s.len] = 0 |
1461 | return tos(b, s.len) |
1462 | } |
1463 | } |
1464 | |
1465 | // is_upper returns `true` if all characters in the string is uppercase. |
1466 | // See also: [`byte.is_capital`](#byte.is_capital) |
1467 | // Example: assert 'HELLO V'.is_upper() == true |
1468 | [direct_array_access] |
1469 | pub fn (s string) is_upper() bool { |
1470 | for i in 0 .. s.len { |
1471 | if s[i] >= `a` && s[i] <= `z` { |
1472 | return false |
1473 | } |
1474 | } |
1475 | return true |
1476 | } |
1477 | |
1478 | // capitalize returns the string with the first character capitalized. |
1479 | // Example: assert 'hello'.capitalize() == 'Hello' |
1480 | [direct_array_access] |
1481 | pub fn (s string) capitalize() string { |
1482 | if s.len == 0 { |
1483 | return '' |
1484 | } |
1485 | s0 := s[0] |
1486 | letter := s0.ascii_str() |
1487 | uletter := letter.to_upper() |
1488 | if s.len == 1 { |
1489 | return uletter |
1490 | } |
1491 | srest := s[1..] |
1492 | res := uletter + srest |
1493 | return res |
1494 | } |
1495 | |
1496 | // is_capital returns `true`, if the first character in the string `s`, |
1497 | // is a capital letter, and the rest are NOT. |
1498 | // Example: assert 'Hello'.is_capital() == true |
1499 | // Example: assert 'HelloWorld'.is_capital() == false |
1500 | [direct_array_access] |
1501 | pub fn (s string) is_capital() bool { |
1502 | if s.len == 0 || !(s[0] >= `A` && s[0] <= `Z`) { |
1503 | return false |
1504 | } |
1505 | for i in 1 .. s.len { |
1506 | if s[i] >= `A` && s[i] <= `Z` { |
1507 | return false |
1508 | } |
1509 | } |
1510 | return true |
1511 | } |
1512 | |
1513 | // starts_with_capital returns `true`, if the first character in the string `s`, |
1514 | // is a capital letter, even if the rest are not. |
1515 | // Example: assert 'Hello'.starts_with_capital() == true |
1516 | // Example: assert 'Hello. World.'.starts_with_capital() == true |
1517 | [direct_array_access] |
1518 | pub fn (s string) starts_with_capital() bool { |
1519 | if s.len == 0 || !(s[0] >= `A` && s[0] <= `Z`) { |
1520 | return false |
1521 | } |
1522 | return true |
1523 | } |
1524 | |
1525 | // title returns the string with each word capitalized. |
1526 | // Example: assert 'hello v developer'.title() == 'Hello V Developer' |
1527 | pub fn (s string) title() string { |
1528 | words := s.split(' ') |
1529 | mut tit := []string{} |
1530 | for word in words { |
1531 | tit << word.capitalize() |
1532 | } |
1533 | title := tit.join(' ') |
1534 | return title |
1535 | } |
1536 | |
1537 | // is_title returns true if all words of the string are capitalized. |
1538 | // Example: assert 'Hello V Developer'.is_title() == true |
1539 | pub fn (s string) is_title() bool { |
1540 | words := s.split(' ') |
1541 | for word in words { |
1542 | if !word.is_capital() { |
1543 | return false |
1544 | } |
1545 | } |
1546 | return true |
1547 | } |
1548 | |
1549 | // find_between returns the string found between `start` string and `end` string. |
1550 | // Example: assert 'hey [man] how you doin'.find_between('[', ']') == 'man' |
1551 | pub fn (s string) find_between(start string, end string) string { |
1552 | start_pos := s.index_(start) |
1553 | if start_pos == -1 { |
1554 | return '' |
1555 | } |
1556 | // First get everything to the right of 'start' |
1557 | val := s[start_pos + start.len..] |
1558 | end_pos := val.index_(end) |
1559 | if end_pos == -1 { |
1560 | return val |
1561 | } |
1562 | return val[..end_pos] |
1563 | } |
1564 | |
1565 | // trim_space strips any of ` `, `\n`, `\t`, `\v`, `\f`, `\r` from the start and end of the string. |
1566 | // Example: assert ' Hello V '.trim_space() == 'Hello V' |
1567 | [inline] |
1568 | pub fn (s string) trim_space() string { |
1569 | return s.trim(' \n\t\v\f\r') |
1570 | } |
1571 | |
1572 | // trim strips any of the characters given in `cutset` from the start and end of the string. |
1573 | // Example: assert ' ffHello V ffff'.trim(' f') == 'Hello V' |
1574 | pub fn (s string) trim(cutset string) string { |
1575 | if s.len < 1 || cutset.len < 1 { |
1576 | return s.clone() |
1577 | } |
1578 | left, right := s.trim_indexes(cutset) |
1579 | return s.substr(left, right) |
1580 | } |
1581 | |
1582 | // trim_indexes gets the new start and end indicies of a string when any of the characters given in `cutset` were stripped from the start and end of the string. Should be used as an input to `substr()`. If the string contains only the characters in `cutset`, both values returned are zero. |
1583 | // Example: left, right := '-hi-'.trim_indexes('-') |
1584 | [direct_array_access] |
1585 | pub fn (s string) trim_indexes(cutset string) (int, int) { |
1586 | mut pos_left := 0 |
1587 | mut pos_right := s.len - 1 |
1588 | mut cs_match := true |
1589 | for pos_left <= s.len && pos_right >= -1 && cs_match { |
1590 | cs_match = false |
1591 | for cs in cutset { |
1592 | if s[pos_left] == cs { |
1593 | pos_left++ |
1594 | cs_match = true |
1595 | break |
1596 | } |
1597 | } |
1598 | for cs in cutset { |
1599 | if s[pos_right] == cs { |
1600 | pos_right-- |
1601 | cs_match = true |
1602 | break |
1603 | } |
1604 | } |
1605 | if pos_left > pos_right { |
1606 | return 0, 0 |
1607 | } |
1608 | } |
1609 | return pos_left, pos_right + 1 |
1610 | } |
1611 | |
1612 | // trim_left strips any of the characters given in `cutset` from the left of the string. |
1613 | // Example: assert 'd Hello V developer'.trim_left(' d') == 'Hello V developer' |
1614 | [direct_array_access] |
1615 | pub fn (s string) trim_left(cutset string) string { |
1616 | if s.len < 1 || cutset.len < 1 { |
1617 | return s.clone() |
1618 | } |
1619 | mut pos := 0 |
1620 | for pos < s.len { |
1621 | mut found := false |
1622 | for cs in cutset { |
1623 | if s[pos] == cs { |
1624 | found = true |
1625 | break |
1626 | } |
1627 | } |
1628 | if !found { |
1629 | break |
1630 | } |
1631 | pos++ |
1632 | } |
1633 | return s[pos..] |
1634 | } |
1635 | |
1636 | // trim_right strips any of the characters given in `cutset` from the right of the string. |
1637 | // Example: assert ' Hello V d'.trim_right(' d') == ' Hello V' |
1638 | [direct_array_access] |
1639 | pub fn (s string) trim_right(cutset string) string { |
1640 | if s.len < 1 || cutset.len < 1 { |
1641 | return s.clone() |
1642 | } |
1643 | mut pos := s.len - 1 |
1644 | for pos >= 0 { |
1645 | mut found := false |
1646 | for cs in cutset { |
1647 | if s[pos] == cs { |
1648 | found = true |
1649 | } |
1650 | } |
1651 | if !found { |
1652 | break |
1653 | } |
1654 | pos-- |
1655 | } |
1656 | if pos < 0 { |
1657 | return '' |
1658 | } |
1659 | return s[..pos + 1] |
1660 | } |
1661 | |
1662 | // trim_string_left strips `str` from the start of the string. |
1663 | // Example: assert 'WorldHello V'.trim_string_left('World') == 'Hello V' |
1664 | pub fn (s string) trim_string_left(str string) string { |
1665 | if s.starts_with(str) { |
1666 | return s[str.len..] |
1667 | } |
1668 | return s.clone() |
1669 | } |
1670 | |
1671 | // trim_string_right strips `str` from the end of the string. |
1672 | // Example: assert 'Hello VWorld'.trim_string_right('World') == 'Hello V' |
1673 | pub fn (s string) trim_string_right(str string) string { |
1674 | if s.ends_with(str) { |
1675 | return s[..s.len - str.len] |
1676 | } |
1677 | return s.clone() |
1678 | } |
1679 | |
1680 | // compare_strings returns `-1` if `a < b`, `1` if `a > b` else `0`. |
1681 | pub fn compare_strings(a &string, b &string) int { |
1682 | if a < b { |
1683 | return -1 |
1684 | } |
1685 | if a > b { |
1686 | return 1 |
1687 | } |
1688 | return 0 |
1689 | } |
1690 | |
1691 | // compare_strings_by_len returns `-1` if `a.len < b.len`, `1` if `a.len > b.len` else `0`. |
1692 | fn compare_strings_by_len(a &string, b &string) int { |
1693 | if a.len < b.len { |
1694 | return -1 |
1695 | } |
1696 | if a.len > b.len { |
1697 | return 1 |
1698 | } |
1699 | return 0 |
1700 | } |
1701 | |
1702 | // compare_lower_strings returns the same as compare_strings but converts `a` and `b` to lower case before comparing. |
1703 | fn compare_lower_strings(a &string, b &string) int { |
1704 | aa := a.to_lower() |
1705 | bb := b.to_lower() |
1706 | return compare_strings(&aa, &bb) |
1707 | } |
1708 | |
1709 | // sort_ignore_case sorts the string array using case insesitive comparing. |
1710 | [inline] |
1711 | pub fn (mut s []string) sort_ignore_case() { |
1712 | s.sort_with_compare(compare_lower_strings) |
1713 | } |
1714 | |
1715 | // sort_by_len sorts the the string array by each string's `.len` length. |
1716 | [inline] |
1717 | pub fn (mut s []string) sort_by_len() { |
1718 | s.sort_with_compare(compare_strings_by_len) |
1719 | } |
1720 | |
1721 | // str returns a copy of the string |
1722 | [inline] |
1723 | pub fn (s string) str() string { |
1724 | return s.clone() |
1725 | } |
1726 | |
1727 | // at returns the byte at index `idx`. |
1728 | // Example: assert 'ABC'.at(1) == u8(`B`) |
1729 | fn (s string) at(idx int) byte { |
1730 | $if !no_bounds_checking { |
1731 | if idx < 0 || idx >= s.len { |
1732 | panic('string index out of range: ${idx} / ${s.len}') |
1733 | } |
1734 | } |
1735 | unsafe { |
1736 | return s.str[idx] |
1737 | } |
1738 | } |
1739 | |
1740 | // version of `at()` that is used in `a[i] or {` |
1741 | // return an error when the index is out of range |
1742 | fn (s string) at_with_check(idx int) ?u8 { |
1743 | if idx < 0 || idx >= s.len { |
1744 | return none |
1745 | } |
1746 | unsafe { |
1747 | return s.str[idx] |
1748 | } |
1749 | } |
1750 | |
1751 | // is_space returns `true` if the byte is a white space character. |
1752 | // The following list is considered white space characters: ` `, `\t`, `\n`, `\v`, `\f`, `\r`, 0x85, 0xa0 |
1753 | // Example: assert u8(` `).is_space() == true |
1754 | [inline] |
1755 | pub fn (c u8) is_space() bool { |
1756 | // 0x85 is NEXT LINE (NEL) |
1757 | // 0xa0 is NO-BREAK SPACE |
1758 | return c == 32 || (c > 8 && c < 14) || c == 0x85 || c == 0xa0 |
1759 | } |
1760 | |
1761 | // is_digit returns `true` if the byte is in range 0-9 and `false` otherwise. |
1762 | // Example: assert u8(`9`).is_digit() == true |
1763 | [inline] |
1764 | pub fn (c u8) is_digit() bool { |
1765 | return c >= `0` && c <= `9` |
1766 | } |
1767 | |
1768 | // is_hex_digit returns `true` if the byte is either in range 0-9, a-f or A-F and `false` otherwise. |
1769 | // Example: assert u8(`F`).is_hex_digit() == true |
1770 | [inline] |
1771 | pub fn (c u8) is_hex_digit() bool { |
1772 | return (c >= `0` && c <= `9`) || (c >= `a` && c <= `f`) || (c >= `A` && c <= `F`) |
1773 | } |
1774 | |
1775 | // is_oct_digit returns `true` if the byte is in range 0-7 and `false` otherwise. |
1776 | // Example: assert u8(`7`).is_oct_digit() == true |
1777 | [inline] |
1778 | pub fn (c u8) is_oct_digit() bool { |
1779 | return c >= `0` && c <= `7` |
1780 | } |
1781 | |
1782 | // is_bin_digit returns `true` if the byte is a binary digit (0 or 1) and `false` otherwise. |
1783 | // Example: assert u8(`0`).is_bin_digit() == true |
1784 | [inline] |
1785 | pub fn (c u8) is_bin_digit() bool { |
1786 | return c == `0` || c == `1` |
1787 | } |
1788 | |
1789 | // is_letter returns `true` if the byte is in range a-z or A-Z and `false` otherwise. |
1790 | // Example: assert u8(`V`).is_letter() == true |
1791 | [inline] |
1792 | pub fn (c u8) is_letter() bool { |
1793 | return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) |
1794 | } |
1795 | |
1796 | // is_alnum returns `true` if the byte is in range a-z, A-Z, 0-9 and `false` otherwise. |
1797 | // Example: assert u8(`V`).is_alnum() == true |
1798 | [inline] |
1799 | pub fn (c u8) is_alnum() bool { |
1800 | return (c >= `a` && c <= `z`) || (c >= `A` && c <= `Z`) || (c >= `0` && c <= `9`) |
1801 | } |
1802 | |
1803 | // free allows for manually freeing the memory occupied by the string |
1804 | [manualfree; unsafe] |
1805 | pub fn (s &string) free() { |
1806 | $if prealloc { |
1807 | return |
1808 | } |
1809 | if s.is_lit == -98761234 { |
1810 | double_free_msg := unsafe { &u8(c'double string.free() detected\n') } |
1811 | double_free_msg_len := unsafe { vstrlen(double_free_msg) } |
1812 | $if freestanding { |
1813 | bare_eprint(double_free_msg, u64(double_free_msg_len)) |
1814 | } $else { |
1815 | _write_buf_to_fd(1, double_free_msg, double_free_msg_len) |
1816 | } |
1817 | return |
1818 | } |
1819 | if s.is_lit == 1 || s.str == 0 { |
1820 | return |
1821 | } |
1822 | unsafe { |
1823 | // C.printf(c's: %x %s\n', s.str, s.str) |
1824 | free(s.str) |
1825 | s.str = nil |
1826 | } |
1827 | s.is_lit = -98761234 |
1828 | } |
1829 | |
1830 | // before returns the contents before `sub` in the string. |
1831 | // If the substring is not found, it returns the full input string. |
1832 | // Example: assert '23:34:45.234'.before('.') == '23:34:45' |
1833 | // Example: assert 'abcd'.before('.') == 'abcd' |
1834 | // TODO: deprecate and remove either .before or .all_before |
1835 | pub fn (s string) before(sub string) string { |
1836 | pos := s.index_(sub) |
1837 | if pos == -1 { |
1838 | return s.clone() |
1839 | } |
1840 | return s[..pos] |
1841 | } |
1842 | |
1843 | // all_before returns the contents before `sub` in the string. |
1844 | // If the substring is not found, it returns the full input string. |
1845 | // Example: assert '23:34:45.234'.all_before('.') == '23:34:45' |
1846 | // Example: assert 'abcd'.all_before('.') == 'abcd' |
1847 | pub fn (s string) all_before(sub string) string { |
1848 | // TODO remove dup method |
1849 | pos := s.index_(sub) |
1850 | if pos == -1 { |
1851 | return s.clone() |
1852 | } |
1853 | return s[..pos] |
1854 | } |
1855 | |
1856 | // all_before_last returns the contents before the last occurence of `sub` in the string. |
1857 | // If the substring is not found, it returns the full input string. |
1858 | // Example: assert '23:34:45.234'.all_before_last(':') == '23:34' |
1859 | // Example: assert 'abcd'.all_before_last('.') == 'abcd' |
1860 | pub fn (s string) all_before_last(sub string) string { |
1861 | pos := s.last_index_(sub) |
1862 | if pos == -1 { |
1863 | return s.clone() |
1864 | } |
1865 | return s[..pos] |
1866 | } |
1867 | |
1868 | // all_after returns the contents after `sub` in the string. |
1869 | // If the substring is not found, it returns the full input string. |
1870 | // Example: assert '23:34:45.234'.all_after('.') == '234' |
1871 | // Example: assert 'abcd'.all_after('z') == 'abcd' |
1872 | pub fn (s string) all_after(sub string) string { |
1873 | pos := s.index_(sub) |
1874 | if pos == -1 { |
1875 | return s.clone() |
1876 | } |
1877 | return s[pos + sub.len..] |
1878 | } |
1879 | |
1880 | // all_after_last returns the contents after the last occurence of `sub` in the string. |
1881 | // If the substring is not found, it returns the full input string. |
1882 | // Example: assert '23:34:45.234'.all_after_last(':') == '45.234' |
1883 | // Example: assert 'abcd'.all_after_last('z') == 'abcd' |
1884 | pub fn (s string) all_after_last(sub string) string { |
1885 | pos := s.last_index_(sub) |
1886 | if pos == -1 { |
1887 | return s.clone() |
1888 | } |
1889 | return s[pos + sub.len..] |
1890 | } |
1891 | |
1892 | // all_after_first returns the contents after the first occurence of `sub` in the string. |
1893 | // If the substring is not found, it returns the full input string. |
1894 | // Example: assert '23:34:45.234'.all_after_first(':') == '34:45.234' |
1895 | // Example: assert 'abcd'.all_after_first('z') == 'abcd' |
1896 | pub fn (s string) all_after_first(sub string) string { |
1897 | pos := s.index_(sub) |
1898 | if pos == -1 { |
1899 | return s.clone() |
1900 | } |
1901 | return s[pos + sub.len..] |
1902 | } |
1903 | |
1904 | // after returns the contents after the last occurence of `sub` in the string. |
1905 | // If the substring is not found, it returns the full input string. |
1906 | // Example: assert '23:34:45.234'.after(':') == '45.234' |
1907 | // Example: assert 'abcd'.after('z') == 'abcd' |
1908 | // TODO: deprecate either .all_after_last or .after |
1909 | [inline] |
1910 | pub fn (s string) after(sub string) string { |
1911 | return s.all_after_last(sub) |
1912 | } |
1913 | |
1914 | // after_char returns the contents after the first occurence of `sub` character in the string. |
1915 | // If the substring is not found, it returns the full input string. |
1916 | // Example: assert '23:34:45.234'.after_char(`:`) == '34:45.234' |
1917 | // Example: assert 'abcd'.after_char(`:`) == 'abcd' |
1918 | pub fn (s string) after_char(sub u8) string { |
1919 | mut pos := -1 |
1920 | for i, c in s { |
1921 | if c == sub { |
1922 | pos = i |
1923 | break |
1924 | } |
1925 | } |
1926 | if pos == -1 { |
1927 | return s.clone() |
1928 | } |
1929 | return s[pos + 1..] |
1930 | } |
1931 | |
1932 | // join joins a string array into a string using `sep` separator. |
1933 | // Example: assert ['Hello','V'].join(' ') == 'Hello V' |
1934 | pub fn (a []string) join(sep string) string { |
1935 | if a.len == 0 { |
1936 | return '' |
1937 | } |
1938 | mut len := 0 |
1939 | for val in a { |
1940 | len += val.len + sep.len |
1941 | } |
1942 | len -= sep.len |
1943 | // Allocate enough memory |
1944 | mut res := string{ |
1945 | str: unsafe { malloc_noscan(len + 1) } |
1946 | len: len |
1947 | } |
1948 | mut idx := 0 |
1949 | for i, val in a { |
1950 | unsafe { |
1951 | vmemcpy(voidptr(res.str + idx), val.str, val.len) |
1952 | idx += val.len |
1953 | } |
1954 | // Add sep if it's not last |
1955 | if i != a.len - 1 { |
1956 | unsafe { |
1957 | vmemcpy(voidptr(res.str + idx), sep.str, sep.len) |
1958 | idx += sep.len |
1959 | } |
1960 | } |
1961 | } |
1962 | unsafe { |
1963 | res.str[res.len] = 0 |
1964 | } |
1965 | return res |
1966 | } |
1967 | |
1968 | // join joins a string array into a string using a `\n` newline delimiter. |
1969 | [inline] |
1970 | pub fn (s []string) join_lines() string { |
1971 | return s.join('\n') |
1972 | } |
1973 | |
1974 | // reverse returns a reversed string. |
1975 | // Example: assert 'Hello V'.reverse() == 'V olleH' |
1976 | [direct_array_access] |
1977 | pub fn (s string) reverse() string { |
1978 | if s.len == 0 || s.len == 1 { |
1979 | return s.clone() |
1980 | } |
1981 | mut res := string{ |
1982 | str: unsafe { malloc_noscan(s.len + 1) } |
1983 | len: s.len |
1984 | } |
1985 | for i := s.len - 1; i >= 0; i-- { |
1986 | unsafe { |
1987 | res.str[s.len - i - 1] = s[i] |
1988 | } |
1989 | } |
1990 | unsafe { |
1991 | res.str[res.len] = 0 |
1992 | } |
1993 | return res |
1994 | } |
1995 | |
1996 | // limit returns a portion of the string, starting at `0` and extending for a given number of characters afterward. |
1997 | // 'hello'.limit(2) => 'he' |
1998 | // 'hi'.limit(10) => 'hi' |
1999 | pub fn (s string) limit(max int) string { |
2000 | u := s.runes() |
2001 | if u.len <= max { |
2002 | return s.clone() |
2003 | } |
2004 | return u[0..max].string() |
2005 | } |
2006 | |
2007 | // hash returns an integer hash of the string. |
2008 | pub fn (s string) hash() int { |
2009 | mut h := u32(0) |
2010 | if h == 0 && s.len > 0 { |
2011 | for c in s { |
2012 | h = h * 31 + u32(c) |
2013 | } |
2014 | } |
2015 | return int(h) |
2016 | } |
2017 | |
2018 | // bytes returns the string converted to a byte array. |
2019 | pub fn (s string) bytes() []u8 { |
2020 | if s.len == 0 { |
2021 | return [] |
2022 | } |
2023 | mut buf := []u8{len: s.len} |
2024 | unsafe { vmemcpy(buf.data, s.str, s.len) } |
2025 | return buf |
2026 | } |
2027 | |
2028 | // repeat returns a new string with `count` number of copies of the string it was called on. |
2029 | [direct_array_access] |
2030 | pub fn (s string) repeat(count int) string { |
2031 | if count < 0 { |
2032 | panic('string.repeat: count is negative: ${count}') |
2033 | } else if count == 0 { |
2034 | return '' |
2035 | } else if count == 1 { |
2036 | return s.clone() |
2037 | } |
2038 | mut ret := unsafe { malloc_noscan(s.len * count + 1) } |
2039 | for i in 0 .. count { |
2040 | unsafe { |
2041 | vmemcpy(ret + i * s.len, s.str, s.len) |
2042 | } |
2043 | } |
2044 | new_len := s.len * count |
2045 | unsafe { |
2046 | ret[new_len] = 0 |
2047 | } |
2048 | return unsafe { ret.vstring_with_len(new_len) } |
2049 | } |
2050 | |
2051 | // fields returns a string array of the string split by `\t` and ` ` |
2052 | // Example: assert '\t\tv = v'.fields() == ['v', '=', 'v'] |
2053 | // Example: assert ' sss ssss'.fields() == ['sss', 'ssss'] |
2054 | pub fn (s string) fields() []string { |
2055 | mut res := []string{} |
2056 | mut word_start := 0 |
2057 | mut word_len := 0 |
2058 | mut is_in_word := false |
2059 | mut is_space := false |
2060 | for i, c in s { |
2061 | is_space = c in [32, 9, 10] |
2062 | if !is_space { |
2063 | word_len++ |
2064 | } |
2065 | if !is_in_word && !is_space { |
2066 | word_start = i |
2067 | is_in_word = true |
2068 | continue |
2069 | } |
2070 | if is_space && is_in_word { |
2071 | res << s[word_start..word_start + word_len] |
2072 | is_in_word = false |
2073 | word_len = 0 |
2074 | word_start = 0 |
2075 | continue |
2076 | } |
2077 | } |
2078 | if is_in_word && word_len > 0 { |
2079 | // collect the remainder word at the end |
2080 | res << s[word_start..s.len] |
2081 | } |
2082 | return res |
2083 | } |
2084 | |
2085 | // strip_margin allows multi-line strings to be formatted in a way that removes white-space |
2086 | // before a delimeter. by default `|` is used. |
2087 | // Note: the delimiter has to be a byte at this time. That means surrounding |
2088 | // the value in ``. |
2089 | // |
2090 | // See also: string.trim_indent() |
2091 | // |
2092 | // Example: |
2093 | // ```v |
2094 | // st := 'Hello there, |
2095 | // | this is a string, |
2096 | // | Everything before the first | is removed'.strip_margin() |
2097 | // |
2098 | // assert st == 'Hello there, |
2099 | // this is a string, |
2100 | // Everything before the first | is removed' |
2101 | // ``` |
2102 | [inline] |
2103 | pub fn (s string) strip_margin() string { |
2104 | return s.strip_margin_custom(`|`) |
2105 | } |
2106 | |
2107 | // strip_margin_custom does the same as `strip_margin` but will use `del` as delimiter instead of `|` |
2108 | [direct_array_access] |
2109 | pub fn (s string) strip_margin_custom(del u8) string { |
2110 | mut sep := del |
2111 | if sep.is_space() { |
2112 | println('Warning: `strip_margin` cannot use white-space as a delimiter') |
2113 | println(' Defaulting to `|`') |
2114 | sep = `|` |
2115 | } |
2116 | // don't know how much space the resulting string will be, but the max it |
2117 | // can be is this big |
2118 | mut ret := unsafe { malloc_noscan(s.len + 1) } |
2119 | mut count := 0 |
2120 | for i := 0; i < s.len; i++ { |
2121 | if s[i] in [10, 13] { |
2122 | unsafe { |
2123 | ret[count] = s[i] |
2124 | } |
2125 | count++ |
2126 | // CRLF |
2127 | if s[i] == 13 && i < s.len - 1 && s[i + 1] == 10 { |
2128 | unsafe { |
2129 | ret[count] = s[i + 1] |
2130 | } |
2131 | count++ |
2132 | i++ |
2133 | } |
2134 | for s[i] != sep { |
2135 | i++ |
2136 | if i >= s.len { |
2137 | break |
2138 | } |
2139 | } |
2140 | } else { |
2141 | unsafe { |
2142 | ret[count] = s[i] |
2143 | } |
2144 | count++ |
2145 | } |
2146 | } |
2147 | unsafe { |
2148 | ret[count] = 0 |
2149 | return ret.vstring_with_len(count) |
2150 | } |
2151 | } |
2152 | |
2153 | // trim_indent detects a common minimal indent of all the input lines, |
2154 | // removes it from every line and also removes the first and the last |
2155 | // lines if they are blank (notice difference blank vs empty). |
2156 | // |
2157 | // Note that blank lines do not affect the detected indent level. |
2158 | // |
2159 | // In case if there are non-blank lines with no leading whitespace characters |
2160 | // (no indent at all) then the common indent is 0, and therefore this function |
2161 | // doesn't change the indentation. |
2162 | // |
2163 | // Example: |
2164 | // ```v |
2165 | // st := ' |
2166 | // Hello there, |
2167 | // this is a string, |
2168 | // all the leading indents are removed |
2169 | // and also the first and the last lines if they are blank |
2170 | // '.trim_indent() |
2171 | // |
2172 | // assert st == 'Hello there, |
2173 | // this is a string, |
2174 | // all the leading indents are removed |
2175 | // and also the first and the last lines if they are blank' |
2176 | // ``` |
2177 | pub fn (s string) trim_indent() string { |
2178 | mut lines := s.split_into_lines() |
2179 | |
2180 | lines_indents := lines |
2181 | .filter(!it.is_blank()) |
2182 | .map(it.indent_width()) |
2183 | |
2184 | mut min_common_indent := int(2147483647) // max int |
2185 | for line_indent in lines_indents { |
2186 | if line_indent < min_common_indent { |
2187 | min_common_indent = line_indent |
2188 | } |
2189 | } |
2190 | |
2191 | // trim first line if it's blank |
2192 | if lines.len > 0 && lines.first().is_blank() { |
2193 | lines = unsafe { lines[1..] } |
2194 | } |
2195 | |
2196 | // trim last line if it's blank |
2197 | if lines.len > 0 && lines.last().is_blank() { |
2198 | lines = unsafe { lines[..lines.len - 1] } |
2199 | } |
2200 | |
2201 | mut trimmed_lines := []string{cap: lines.len} |
2202 | |
2203 | for line in lines { |
2204 | if line.is_blank() { |
2205 | trimmed_lines << line |
2206 | continue |
2207 | } |
2208 | |
2209 | trimmed_lines << line[min_common_indent..] |
2210 | } |
2211 | |
2212 | return trimmed_lines.join('\n') |
2213 | } |
2214 | |
2215 | // indent_width returns the number of spaces or tabs at the beginning of the string. |
2216 | // Example: assert ' v'.indent_width() == 2 |
2217 | // Example: assert '\t\tv'.indent_width() == 2 |
2218 | pub fn (s string) indent_width() int { |
2219 | for i, c in s { |
2220 | if !c.is_space() { |
2221 | return i |
2222 | } |
2223 | } |
2224 | |
2225 | return 0 |
2226 | } |
2227 | |
2228 | // is_blank returns true if the string is empty or contains only white-space. |
2229 | // Example: assert ' '.is_blank() |
2230 | // Example: assert '\t'.is_blank() |
2231 | // Example: assert 'v'.is_blank() == false |
2232 | pub fn (s string) is_blank() bool { |
2233 | if s.len == 0 { |
2234 | return true |
2235 | } |
2236 | |
2237 | for c in s { |
2238 | if !c.is_space() { |
2239 | return false |
2240 | } |
2241 | } |
2242 | |
2243 | return true |
2244 | } |
2245 | |
2246 | // match_glob matches the string, with a Unix shell-style wildcard pattern. |
2247 | // Note: wildcard patterns are NOT the same as regular expressions. |
2248 | // They are much simpler, and do not allow backtracking, captures, etc. |
2249 | // The special characters used in shell-style wildcards are: |
2250 | // `*` - matches everything |
2251 | // `?` - matches any single character |
2252 | // `[seq]` - matches any of the characters in the sequence |
2253 | // `[^seq]` - matches any character that is NOT in the sequence |
2254 | // Any other character in `pattern`, is matched 1:1 to the corresponding |
2255 | // character in `name`, including / and \. |
2256 | // You can wrap the meta-characters in brackets too, i.e. `[?]` matches `?` |
2257 | // in the string, and `[*]` matches `*` in the string. |
2258 | // Example: assert 'ABCD'.match_glob('AB*') |
2259 | // Example: assert 'ABCD'.match_glob('*D') |
2260 | // Example: assert 'ABCD'.match_glob('*B*') |
2261 | // Example: assert !'ABCD'.match_glob('AB') |
2262 | [direct_array_access] |
2263 | pub fn (name string) match_glob(pattern string) bool { |
2264 | // Initial port based on https://research.swtch.com/glob.go |
2265 | // See also https://research.swtch.com/glob |
2266 | mut px := 0 |
2267 | mut nx := 0 |
2268 | mut next_px := 0 |
2269 | mut next_nx := 0 |
2270 | plen := pattern.len |
2271 | nlen := name.len |
2272 | for px < plen || nx < nlen { |
2273 | if px < plen { |
2274 | c := pattern[px] |
2275 | match c { |
2276 | `?` { |
2277 | // single-character wildcard |
2278 | if nx < nlen { |
2279 | px++ |
2280 | nx++ |
2281 | continue |
2282 | } |
2283 | } |
2284 | `*` { |
2285 | // zero-or-more-character wildcard |
2286 | // Try to match at nx. |
2287 | // If that doesn't work out, restart at nx+1 next. |
2288 | next_px = px |
2289 | next_nx = nx + 1 |
2290 | px++ |
2291 | continue |
2292 | } |
2293 | `[` { |
2294 | if nx < nlen { |
2295 | wanted_c := name[nx] |
2296 | mut bstart := px |
2297 | mut is_inverted := false |
2298 | mut inner_match := false |
2299 | mut inner_idx := bstart + 1 |
2300 | mut inner_c := 0 |
2301 | if inner_idx < plen { |
2302 | inner_c = pattern[inner_idx] |
2303 | if inner_c == `^` { |
2304 | is_inverted = true |
2305 | inner_idx++ |
2306 | } |
2307 | } |
2308 | for ; inner_idx < plen; inner_idx++ { |
2309 | inner_c = pattern[inner_idx] |
2310 | if inner_c == `]` { |
2311 | break |
2312 | } |
2313 | if inner_c == wanted_c { |
2314 | inner_match = true |
2315 | for px < plen && pattern[px] != `]` { |
2316 | px++ |
2317 | } |
2318 | break |
2319 | } |
2320 | } |
2321 | if is_inverted { |
2322 | if inner_match { |
2323 | return false |
2324 | } else { |
2325 | px = inner_idx |
2326 | } |
2327 | } |
2328 | } |
2329 | px++ |
2330 | nx++ |
2331 | continue |
2332 | } |
2333 | else { |
2334 | // an ordinary character |
2335 | if nx < nlen && name[nx] == c { |
2336 | px++ |
2337 | nx++ |
2338 | continue |
2339 | } |
2340 | } |
2341 | } |
2342 | } |
2343 | if 0 < next_nx && next_nx <= nlen { |
2344 | // A mismatch, try restarting: |
2345 | px = next_px |
2346 | nx = next_nx |
2347 | continue |
2348 | } |
2349 | return false |
2350 | } |
2351 | // Matched all of `pattern` to all of `name` |
2352 | return true |
2353 | } |
2354 | |
2355 | // is_ascii returns true if all characters belong to the US-ASCII set ([` `..`~`]) |
2356 | [inline] |
2357 | pub fn (s string) is_ascii() bool { |
2358 | return !s.bytes().any(it < u8(` `) || it > u8(`~`)) |
2359 | } |