1 | // Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved. |
2 | // Use of this source code is governed by an MIT license |
3 | // that can be found in the LICENSE file. |
4 | |
5 | module builtin |
6 | |
7 | // import strings |
8 | |
9 | // B-trees are balanced search trees with all leaves at |
10 | // the same level. B-trees are generally faster than |
11 | // binary search trees due to the better locality of |
12 | // reference, since multiple keys are stored in one node. |
13 | |
14 | // The number for `degree` has been picked through vigor- |
15 | // ous benchmarking but can be changed to any number > 1. |
16 | // `degree` determines the maximum length of each node. |
17 | const ( |
18 | degree = 6 |
19 | mid_index = degree - 1 |
20 | max_len = 2 * degree - 1 |
21 | children_bytes = sizeof(voidptr) * (max_len + 1) |
22 | ) |
23 | |
24 | pub struct SortedMap { |
25 | value_bytes int |
26 | mut: |
27 | root &mapnode |
28 | pub mut: |
29 | len int |
30 | } |
31 | |
32 | struct mapnode { |
33 | mut: |
34 | children &voidptr |
35 | len int |
36 | keys [11]string // TODO: Should use `max_len` |
37 | values [11]voidptr // TODO: Should use `max_len` |
38 | } |
39 | |
40 | fn new_sorted_map(n int, value_bytes int) SortedMap { // TODO: Remove `n` |
41 | return SortedMap{ |
42 | value_bytes: value_bytes |
43 | root: new_node() |
44 | len: 0 |
45 | } |
46 | } |
47 | |
48 | fn new_sorted_map_init(n int, value_bytes int, keys &string, values voidptr) SortedMap { |
49 | mut out := new_sorted_map(n, value_bytes) |
50 | for i in 0 .. n { |
51 | unsafe { |
52 | out.set(keys[i], &u8(values) + i * value_bytes) |
53 | } |
54 | } |
55 | return out |
56 | } |
57 | |
58 | // The tree is initialized with an empty node as root to |
59 | // avoid having to check whether the root is null for |
60 | // each insertion. |
61 | fn new_node() &mapnode { |
62 | return &mapnode{ |
63 | children: unsafe { nil } |
64 | len: 0 |
65 | } |
66 | } |
67 | |
68 | // This implementation does proactive insertion, meaning |
69 | // that splits are done top-down and not bottom-up. |
70 | fn (mut m SortedMap) set(key string, value voidptr) { |
71 | mut node := m.root |
72 | mut child_index := 0 |
73 | mut parent := &mapnode(unsafe { nil }) |
74 | for { |
75 | if node.len == max_len { |
76 | if parent == unsafe { nil } { |
77 | parent = new_node() |
78 | m.root = parent |
79 | } |
80 | parent.split_child(child_index, mut node) |
81 | if key == parent.keys[child_index] { |
82 | unsafe { |
83 | vmemcpy(parent.values[child_index], value, m.value_bytes) |
84 | } |
85 | return |
86 | } |
87 | if key < parent.keys[child_index] { |
88 | node = unsafe { &mapnode(parent.children[child_index]) } |
89 | } else { |
90 | node = unsafe { &mapnode(parent.children[child_index + 1]) } |
91 | } |
92 | } |
93 | mut i := 0 |
94 | for i < node.len && key > node.keys[i] { |
95 | i++ |
96 | } |
97 | if i != node.len && key == node.keys[i] { |
98 | unsafe { |
99 | vmemcpy(node.values[i], value, m.value_bytes) |
100 | } |
101 | return |
102 | } |
103 | if node.children == unsafe { nil } { |
104 | mut j := node.len - 1 |
105 | for j >= 0 && key < node.keys[j] { |
106 | node.keys[j + 1] = node.keys[j] |
107 | node.values[j + 1] = node.values[j] |
108 | j-- |
109 | } |
110 | node.keys[j + 1] = key |
111 | unsafe { |
112 | node.values[j + 1] = malloc(m.value_bytes) |
113 | vmemcpy(node.values[j + 1], value, m.value_bytes) |
114 | } |
115 | node.len++ |
116 | m.len++ |
117 | return |
118 | } |
119 | parent = node |
120 | child_index = i |
121 | node = unsafe { &mapnode(node.children[child_index]) } |
122 | } |
123 | } |
124 | |
125 | fn (mut n mapnode) split_child(child_index int, mut y mapnode) { |
126 | mut z := new_node() |
127 | z.len = mid_index |
128 | y.len = mid_index |
129 | for j := mid_index - 1; j >= 0; j-- { |
130 | z.keys[j] = y.keys[j + degree] |
131 | z.values[j] = y.values[j + degree] |
132 | } |
133 | if y.children != unsafe { nil } { |
134 | z.children = unsafe { &voidptr(malloc(int(children_bytes))) } |
135 | for jj := degree - 1; jj >= 0; jj-- { |
136 | unsafe { |
137 | z.children[jj] = y.children[jj + degree] |
138 | } |
139 | } |
140 | } |
141 | unsafe { |
142 | if n.children == nil { |
143 | n.children = &voidptr(malloc(int(children_bytes))) |
144 | } |
145 | n.children[n.len + 1] = n.children[n.len] |
146 | } |
147 | for j := n.len; j > child_index; j-- { |
148 | n.keys[j] = n.keys[j - 1] |
149 | n.values[j] = n.values[j - 1] |
150 | unsafe { |
151 | n.children[j] = n.children[j - 1] |
152 | } |
153 | } |
154 | n.keys[child_index] = y.keys[mid_index] |
155 | n.values[child_index] = y.values[mid_index] |
156 | unsafe { |
157 | n.children[child_index] = voidptr(y) |
158 | n.children[child_index + 1] = voidptr(z) |
159 | } |
160 | n.len++ |
161 | } |
162 | |
163 | fn (m SortedMap) get(key string, out voidptr) bool { |
164 | mut node := m.root |
165 | for { |
166 | mut i := node.len - 1 |
167 | for i >= 0 && key < node.keys[i] { |
168 | i-- |
169 | } |
170 | if i != -1 && key == node.keys[i] { |
171 | unsafe { |
172 | vmemcpy(out, node.values[i], m.value_bytes) |
173 | } |
174 | return true |
175 | } |
176 | if node.children == unsafe { nil } { |
177 | break |
178 | } |
179 | node = unsafe { &mapnode(node.children[i + 1]) } |
180 | } |
181 | return false |
182 | } |
183 | |
184 | fn (m SortedMap) exists(key string) bool { |
185 | if m.root == unsafe { nil } { // TODO: find out why root can be nil |
186 | return false |
187 | } |
188 | mut node := m.root |
189 | for { |
190 | mut i := node.len - 1 |
191 | for i >= 0 && key < node.keys[i] { |
192 | i-- |
193 | } |
194 | if i != -1 && key == node.keys[i] { |
195 | return true |
196 | } |
197 | if node.children == unsafe { nil } { |
198 | break |
199 | } |
200 | node = unsafe { &mapnode(node.children[i + 1]) } |
201 | } |
202 | return false |
203 | } |
204 | |
205 | fn (n &mapnode) find_key(k string) int { |
206 | mut idx := 0 |
207 | for idx < n.len && n.keys[idx] < k { |
208 | idx++ |
209 | } |
210 | return idx |
211 | } |
212 | |
213 | fn (mut n mapnode) remove_key(k string) bool { |
214 | idx := n.find_key(k) |
215 | if idx < n.len && n.keys[idx] == k { |
216 | if n.children == unsafe { nil } { |
217 | n.remove_from_leaf(idx) |
218 | } else { |
219 | n.remove_from_non_leaf(idx) |
220 | } |
221 | return true |
222 | } else { |
223 | if n.children == unsafe { nil } { |
224 | return false |
225 | } |
226 | flag := if idx == n.len { true } else { false } |
227 | if unsafe { &mapnode(n.children[idx]) }.len < degree { |
228 | n.fill(idx) |
229 | } |
230 | |
231 | mut node := &mapnode(unsafe { nil }) |
232 | if flag && idx > n.len { |
233 | node = unsafe { &mapnode(n.children[idx - 1]) } |
234 | } else { |
235 | node = unsafe { &mapnode(n.children[idx]) } |
236 | } |
237 | return node.remove_key(k) |
238 | } |
239 | } |
240 | |
241 | fn (mut n mapnode) remove_from_leaf(idx int) { |
242 | for i := idx + 1; i < n.len; i++ { |
243 | n.keys[i - 1] = n.keys[i] |
244 | n.values[i - 1] = n.values[i] |
245 | } |
246 | n.len-- |
247 | } |
248 | |
249 | fn (mut n mapnode) remove_from_non_leaf(idx int) { |
250 | k := n.keys[idx] |
251 | if unsafe { &mapnode(n.children[idx]) }.len >= degree { |
252 | mut current := unsafe { &mapnode(n.children[idx]) } |
253 | for current.children != unsafe { nil } { |
254 | current = unsafe { &mapnode(current.children[current.len]) } |
255 | } |
256 | predecessor := current.keys[current.len - 1] |
257 | n.keys[idx] = predecessor |
258 | n.values[idx] = current.values[current.len - 1] |
259 | mut node := unsafe { &mapnode(n.children[idx]) } |
260 | node.remove_key(predecessor) |
261 | } else if unsafe { &mapnode(n.children[idx + 1]) }.len >= degree { |
262 | mut current := unsafe { &mapnode(n.children[idx + 1]) } |
263 | for current.children != unsafe { nil } { |
264 | current = unsafe { &mapnode(current.children[0]) } |
265 | } |
266 | successor := current.keys[0] |
267 | n.keys[idx] = successor |
268 | n.values[idx] = current.values[0] |
269 | mut node := unsafe { &mapnode(n.children[idx + 1]) } |
270 | node.remove_key(successor) |
271 | } else { |
272 | n.merge(idx) |
273 | mut node := unsafe { &mapnode(n.children[idx]) } |
274 | node.remove_key(k) |
275 | } |
276 | } |
277 | |
278 | fn (mut n mapnode) fill(idx int) { |
279 | if idx != 0 && unsafe { &mapnode(n.children[idx - 1]) }.len >= degree { |
280 | n.borrow_from_prev(idx) |
281 | } else if idx != n.len && unsafe { &mapnode(n.children[idx + 1]) }.len >= degree { |
282 | n.borrow_from_next(idx) |
283 | } else if idx != n.len { |
284 | n.merge(idx) |
285 | } else { |
286 | n.merge(idx - 1) |
287 | } |
288 | } |
289 | |
290 | fn (mut n mapnode) borrow_from_prev(idx int) { |
291 | mut child := unsafe { &mapnode(n.children[idx]) } |
292 | mut sibling := unsafe { &mapnode(n.children[idx - 1]) } |
293 | for i := child.len - 1; i >= 0; i-- { |
294 | child.keys[i + 1] = child.keys[i] |
295 | child.values[i + 1] = child.values[i] |
296 | } |
297 | if child.children != unsafe { nil } { |
298 | for i := child.len; i >= 0; i-- { |
299 | unsafe { |
300 | child.children[i + 1] = child.children[i] |
301 | } |
302 | } |
303 | } |
304 | child.keys[0] = n.keys[idx - 1] |
305 | child.values[0] = n.values[idx - 1] |
306 | if child.children != unsafe { nil } { |
307 | unsafe { |
308 | child.children[0] = sibling.children[sibling.len] |
309 | } |
310 | } |
311 | n.keys[idx - 1] = sibling.keys[sibling.len - 1] |
312 | n.values[idx - 1] = sibling.values[sibling.len - 1] |
313 | child.len++ |
314 | sibling.len-- |
315 | } |
316 | |
317 | fn (mut n mapnode) borrow_from_next(idx int) { |
318 | mut child := unsafe { &mapnode(n.children[idx]) } |
319 | mut sibling := unsafe { &mapnode(n.children[idx + 1]) } |
320 | child.keys[child.len] = n.keys[idx] |
321 | child.values[child.len] = n.values[idx] |
322 | if child.children != unsafe { nil } { |
323 | unsafe { |
324 | child.children[child.len + 1] = sibling.children[0] |
325 | } |
326 | } |
327 | n.keys[idx] = sibling.keys[0] |
328 | n.values[idx] = sibling.values[0] |
329 | for i := 1; i < sibling.len; i++ { |
330 | sibling.keys[i - 1] = sibling.keys[i] |
331 | sibling.values[i - 1] = sibling.values[i] |
332 | } |
333 | if sibling.children != unsafe { nil } { |
334 | for i := 1; i <= sibling.len; i++ { |
335 | unsafe { |
336 | sibling.children[i - 1] = sibling.children[i] |
337 | } |
338 | } |
339 | } |
340 | child.len++ |
341 | sibling.len-- |
342 | } |
343 | |
344 | fn (mut n mapnode) merge(idx int) { |
345 | mut child := unsafe { &mapnode(n.children[idx]) } |
346 | sibling := unsafe { &mapnode(n.children[idx + 1]) } |
347 | child.keys[mid_index] = n.keys[idx] |
348 | child.values[mid_index] = n.values[idx] |
349 | for i in 0 .. sibling.len { |
350 | child.keys[i + degree] = sibling.keys[i] |
351 | child.values[i + degree] = sibling.values[i] |
352 | } |
353 | if child.children != unsafe { nil } { |
354 | for i := 0; i <= sibling.len; i++ { |
355 | unsafe { |
356 | child.children[i + degree] = sibling.children[i] |
357 | } |
358 | } |
359 | } |
360 | for i := idx + 1; i < n.len; i++ { |
361 | n.keys[i - 1] = n.keys[i] |
362 | n.values[i - 1] = n.values[i] |
363 | } |
364 | for i := idx + 2; i <= n.len; i++ { |
365 | unsafe { |
366 | n.children[i - 1] = n.children[i] |
367 | } |
368 | } |
369 | child.len += sibling.len + 1 |
370 | n.len-- |
371 | // free(sibling) |
372 | } |
373 | |
374 | pub fn (mut m SortedMap) delete(key string) { |
375 | if m.root.len == 0 { |
376 | return |
377 | } |
378 | |
379 | removed := m.root.remove_key(key) |
380 | if removed { |
381 | m.len-- |
382 | } |
383 | |
384 | if m.root.len == 0 { |
385 | // tmp := t.root |
386 | if m.root.children == unsafe { nil } { |
387 | return |
388 | } else { |
389 | m.root = unsafe { &mapnode(m.root.children[0]) } |
390 | } |
391 | // free(tmp) |
392 | } |
393 | } |
394 | |
395 | // Insert all keys of the subtree into array `keys` |
396 | // starting at `at`. Keys are inserted in order. |
397 | fn (n &mapnode) subkeys(mut keys []string, at int) int { |
398 | mut position := at |
399 | if n.children != unsafe { nil } { |
400 | // Traverse children and insert |
401 | // keys inbetween children |
402 | for i in 0 .. n.len { |
403 | child := unsafe { &mapnode(n.children[i]) } |
404 | position += child.subkeys(mut keys, position) |
405 | keys[position] = n.keys[i] |
406 | position++ |
407 | } |
408 | // Insert the keys of the last child |
409 | child := unsafe { &mapnode(n.children[n.len]) } |
410 | position += child.subkeys(mut keys, position) |
411 | } else { |
412 | // If leaf, insert keys |
413 | for i in 0 .. n.len { |
414 | keys[position + i] = n.keys[i] |
415 | } |
416 | position += n.len |
417 | } |
418 | // Return # of added keys |
419 | return position - at |
420 | } |
421 | |
422 | pub fn (m &SortedMap) keys() []string { |
423 | mut keys := []string{len: m.len} |
424 | if m.root == unsafe { nil } || m.root.len == 0 { |
425 | return keys |
426 | } |
427 | m.root.subkeys(mut keys, 0) |
428 | return keys |
429 | } |
430 | |
431 | fn (mut n mapnode) free() { |
432 | println('TODO') |
433 | } |
434 | |
435 | pub fn (mut m SortedMap) free() { |
436 | if m.root == unsafe { nil } { |
437 | return |
438 | } |
439 | m.root.free() |
440 | } |
441 | |
442 | pub fn (m SortedMap) print() { |
443 | println('TODO') |
444 | } |
445 | |
446 | // pub fn (m map_string) str() string { |
447 | // if m.len == 0 { |
448 | // return '{}' |
449 | // } |
450 | // mut sb := strings.new_builder(50) |
451 | // sb.writeln('{') |
452 | // for key, val in m { |
453 | // sb.writeln(' "$key" => "$val"') |
454 | // } |
455 | // sb.writeln('}') |
456 | // return sb.str() |
457 | // } |