Lite³
A JSON-Compatible Zero-Copy Serialization Format
Loading...
Searching...
No Matches
lite3.c
1/*
2 Lite³: A JSON-Compatible Zero-Copy Serialization Format
3
4 Copyright © 2025 Elias de Jong <elias@fastserial.com>
5
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in all
14 copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23
24 __ __________________ ____
25 _ ___ ___/ /___(_)_/ /_______|_ /
26 _ _____/ / __/ /_ __/ _ \_/_ <
27 ___ __/ /___/ / / /_ / __/____/
28 /_____/_/ \__/ \___/
29*/
30#include "lite3.h"
31
32#include <stddef.h>
33#include <stdlib.h>
34#include <string.h>
35#include <stdint.h>
36#include <errno.h>
37#include <assert.h>
38
39
40
41// Typedef for primitive types
42typedef float f32;
43typedef double f64;
44typedef int8_t i8;
45typedef uint8_t u8;
46typedef int16_t i16;
47typedef uint16_t u16;
48typedef int32_t i32;
49typedef uint32_t u32;
50typedef int64_t i64;
51typedef uint64_t u64;
52
53
54
55/*
56 B-tree node struct
57 Advanced users can adjust the B-tree node size to experiment with different performance characteristics.
58 Larger node sizes will bloat message size, but also reduce the tree height, thus also reducing node walks.
59 Actual effects are dependent on the architecture and workload. To be sure, experiment with different settings and profile.
60
61 IMPORTANT RULES TO OBEY:
62 - hashes[] and kv_ofs[] should always have an element count exactly equal to LITE3_NODE_KEY_COUNT_MASK
63 - hashes[] and kv_ofs[] should always have an uneven element count
64 - hashes[] and kv_ofs[] should always have equal element count
65 - child_ofs[] should always have element count of exactly 1 greater than hashes[] and kv_ofs[]
66
67 How to change:
68 1) uncomment `LITE3_NODE_KEY_COUNT_MASK` to the preferred setting
69 2) adjust the member array sizes inside the `struct node` definition
70 3) uncomment `LITE3_NODE_SIZE` to the correct size inside the header
71 4) uncomment LITE3_NODE_SIZE_KC_OFFSET` to the correct size inside the header
72 5) uncomment `LITE3_TREE_HEIGHT_MAX` to the correct value inside the header
73
74 [ WARNING ] If you change this setting, everyone you communicate with must also change it.
75 Unless you control all communicating parties, you probably should not touch this.
76*/
77struct node {
78 u32 gen_type; // upper 24 bits: gen lower 8 bits: lite3_type
79 u32 hashes[7];
80 u32 size_kc; // upper 26 bits: size lower 6 bits: key_count
81 u32 kv_ofs[7];
82 u32 child_ofs[8];
83};
84static_assert(sizeof(struct node) == LITE3_NODE_SIZE, "sizeof(struct node) must equal LITE3_NODE_SIZE");
85static_assert(offsetof(struct node, gen_type) == 0, "Runtime type checks and LITE3_BYTES() & LITE3_STR() macros expect to read (struct node).gen_type field at offset 0");
86static_assert(sizeof(((struct node *)0)->gen_type) == sizeof(uint32_t), "LITE3_BYTES() & LITE3_STR() macros expect to read (struct node).gen_type as uint32_t");
87static_assert(offsetof(struct node, size_kc) == LITE3_NODE_SIZE_KC_OFFSET, "Offset of (struct node).size_kc must equal LITE3_NODE_SIZE_KC_OFFSET");
88static_assert(sizeof(((struct node *)0)->size_kc) == sizeof(uint32_t), "Node size checks expect to read (struct node).size_kc as uint32_t");
89static_assert(sizeof(((struct node *)0)->gen_type) == sizeof(((lite3_iter *)0)->gen), "Iterator expects to read (struct node).gen_type as uint32_t");
90
91#define LITE3_NODE_TYPE_SHIFT 0
92#define LITE3_NODE_TYPE_MASK ((u32)((1 << 8) - 1)) // 8 LSB
93
94#define LITE3_NODE_GEN_SHIFT 8
95#define LITE3_NODE_GEN_MASK ((u32)~((1 << 8) - 1)) // 24 MSB
96
97#define LITE3_NODE_KEY_COUNT_MAX ((int)(sizeof(((struct node *)0)->hashes) / sizeof(u32)))
98#define LITE3_NODE_KEY_COUNT_MIN ((int)(LITE3_NODE_KEY_COUNT_MAX / 2))
99
100#define LITE3_NODE_KEY_COUNT_SHIFT 0
101// #define LITE3_NODE_KEY_COUNT_MASK ((u32)((1 << 2) - 1)) // 2 LSB key_count: 0-3 hashes[3] kv_ofs[3] child_ofs[4] LITE3_NODE_SIZE: 48 (0.75 cache lines)
102#define LITE3_NODE_KEY_COUNT_MASK ((u32)((1 << 3) - 1)) // 3 LSB key_count: 0-7 hashes[7] kv_ofs[7] child_ofs[8] LITE3_NODE_SIZE: 96 (1.5 cache lines)
103// #define LITE3_NODE_KEY_COUNT_MASK ((u32)((1 << 4) - 1)) // 4 LSB key_count: 0-15 hashes[15] kv_ofs[15] child_ofs[16] LITE3_NODE_SIZE: 192 (3 cache lines)
104// #define LITE3_NODE_KEY_COUNT_MASK ((u32)((1 << 5) - 1)) // 5 LSB key_count: 0-31 hashes[31] kv_ofs[31] child_ofs[32] LITE3_NODE_SIZE: 384 (6 cache lines)
105// #define LITE3_NODE_KEY_COUNT_MASK ((u32)((1 << 6) - 1)) // 6 LSB key_count: 0-63 hashes[63] kv_ofs[63] child_ofs[64] LITE3_NODE_SIZE: 768 (12 cache lines)
106
107
108
109#define LITE3_KEY_TAG_SIZE_MIN 1
110#define LITE3_KEY_TAG_SIZE_MAX 4
111#define LITE3_KEY_TAG_SIZE_MASK ((1 << 2) - 1)
112#define LITE3_KEY_TAG_SIZE_SHIFT 0
113
114#define LITE3_KEY_TAG_KEY_SIZE_MASK (~((1 << 2) - 1))
115#define LITE3_KEY_TAG_KEY_SIZE_SHIFT 2
116/*
117 Verify a key inside the buffer to ensure readers don't go out of bounds.
118 Optionally compare the existing key to an input key; a mismatch implies a hash collision.
119 - Returns 0 on success
120 - Returns < 0 on failure
121
122 [ NOTE ] For internal use only.
123*/
124static inline int _verify_key(
125 const u8 *buf, // buffer pointer
126 size_t buflen, // buffer length (bytes)
127 const char *restrict key, // key string (string, optionally call with NULL)
128 size_t key_size, // key size (bytes including null-terminator, optionally call with 0)
129 size_t key_tag_size, // key tag size (bytes, optionally call with 0)
130 size_t *restrict inout_ofs, // key entry offset (relative to *buf)
131 size_t *restrict out_key_tag_size) // key tag size (optionally call with NULL)
132{
133 if (LITE3_UNLIKELY(LITE3_KEY_TAG_SIZE_MAX > buflen || *inout_ofs > buflen - LITE3_KEY_TAG_SIZE_MAX)) {
134 LITE3_PRINT_ERROR("KEY ENTRY OUT OF BOUNDS\n");
135 errno = EFAULT;
136 return -1;
137 }
138 size_t _key_tag_size = (size_t)((*((u8 *)(buf + *inout_ofs)) & LITE3_KEY_TAG_SIZE_MASK) + 1);
139 if (key_tag_size) {
140 if (key_tag_size != _key_tag_size) {
141 LITE3_PRINT_ERROR("KEY TAG SIZE DOES NOT MATCH\n");
142 errno = EINVAL;
143 return -1;
144 }
145 }
146 size_t _key_size = 0;
147 memcpy(&_key_size, buf + *inout_ofs, _key_tag_size);
148 _key_size >>= LITE3_KEY_TAG_KEY_SIZE_SHIFT;
149 *inout_ofs += _key_tag_size;
150
151 if (LITE3_UNLIKELY(_key_size > buflen || *inout_ofs > buflen - _key_size)) {
152 LITE3_PRINT_ERROR("KEY ENTRY OUT OF BOUNDS\n");
153 errno = EFAULT;
154 return -1;
155 }
156 if (key_size) {
157 int cmp = memcmp(
158 (const char *)(buf + *inout_ofs),
159 key,
160 (key_size < _key_size) ? key_size : _key_size
161 );
162 if (LITE3_UNLIKELY(cmp != 0)) {
163 LITE3_PRINT_ERROR("HASH COLLISION\n");
164 errno = EINVAL;
165 return -1;
166 }
167 }
168 *inout_ofs += _key_size;
169 if (out_key_tag_size)
170 *out_key_tag_size = _key_tag_size;
171 return 0;
172}
173
174/*
175 Verify a value inside the buffer to ensure readers don't go out of bounds.
176 - Returns 0 on success
177 - Returns < 0 on failure
178
179 [ NOTE ] For internal use only.
180*/
181static inline int _verify_val(
182 const u8 *buf, // buffer pointer
183 size_t buflen, // buffer length (bytes)
184 size_t *restrict inout_ofs) // val entry offset (relative to *buf)
185{
186 if (LITE3_UNLIKELY(LITE3_VAL_SIZE > buflen || *inout_ofs > buflen - LITE3_VAL_SIZE)) {
187 LITE3_PRINT_ERROR("VALUE OUT OF BOUNDS\n");
188 errno = EFAULT;
189 return -1;
190 }
191 enum lite3_type type = (enum lite3_type)(*(buf + *inout_ofs));
192
193 if (LITE3_UNLIKELY(type >= LITE3_TYPE_INVALID)) {
194 LITE3_PRINT_ERROR("VALUE TYPE INVALID\n");
195 errno = EINVAL;
196 return -1;
197 }
198 size_t _val_entry_size = LITE3_VAL_SIZE + lite3_type_sizes[type];
199
200 if (LITE3_UNLIKELY(_val_entry_size > buflen || *inout_ofs > buflen - _val_entry_size)) {
201 LITE3_PRINT_ERROR("VALUE OUT OF BOUNDS\n");
202 errno = EFAULT;
203 return -1;
204 }
205 if (type == LITE3_TYPE_STRING || type == LITE3_TYPE_BYTES) { // extra check required for str/bytes
206 size_t byte_count = 0;
207 memcpy(&byte_count, buf + *inout_ofs + LITE3_VAL_SIZE, lite3_type_sizes[LITE3_TYPE_BYTES]);
208 _val_entry_size += byte_count;
209 if (LITE3_UNLIKELY(_val_entry_size > buflen || *inout_ofs > buflen - _val_entry_size)) {
210 LITE3_PRINT_ERROR("VALUE OUT OF BOUNDS\n");
211 errno = EFAULT;
212 return -1;
213 }
214 }
215 *inout_ofs += _val_entry_size;
216 return 0;
217}
218
219int lite3_get_impl(
220 const unsigned char *buf, // buffer pointer
221 size_t buflen, // buffer length (bytes)
222 size_t ofs, // start offset (0 == root)
223 const char *restrict key, // key pointer (string)
224 lite3_key_data key_data, // key data struct
225 lite3_val **out) // value entry pointer (out pointer)
226{
227 #ifdef LITE3_DEBUG
228 if (*(buf + ofs) == LITE3_TYPE_OBJECT) {
229 LITE3_PRINT_DEBUG("GET\tkey: %s\n", key);
230 } else if (*(buf + ofs) == LITE3_TYPE_ARRAY) {
231 LITE3_PRINT_DEBUG("GET\tindex: %u\n", key_data.hash);
232 } else {
233 LITE3_PRINT_DEBUG("GET INVALID: EXEPCTING ARRAY OR OBJECT TYPE\n");
234 }
235 #endif
236
237 size_t key_tag_size = (size_t)((!!(key_data.size >> (16 - LITE3_KEY_TAG_KEY_SIZE_SHIFT)) << 1)
238 + !!(key_data.size >> (8 - LITE3_KEY_TAG_KEY_SIZE_SHIFT))
239 + !!key_data.size);
240
241 struct node *restrict node = __builtin_assume_aligned((struct node *)(buf + ofs), LITE3_NODE_ALIGNMENT);
242 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0); // *buf + ofs must be aligned to LITE3_NODE_ALIGNMENT
243
244 int key_count;
245 int i;
246 int node_walks = 0;
247 while (1) {
248 key_count = node->size_kc & LITE3_NODE_KEY_COUNT_MASK;
249 i = 0;
250 while (i < key_count && node->hashes[i] < key_data.hash)
251 i++;
252 if (i < key_count && node->hashes[i] == key_data.hash) { // target key found
253 size_t target_ofs = node->kv_ofs[i];
254 if (key && _verify_key(buf, buflen, key, (size_t)key_data.size, key_tag_size, &target_ofs, NULL) < 0)
255 return -1;
256 size_t val_start_ofs = target_ofs;
257 if (_verify_val(buf, buflen, &target_ofs) < 0)
258 return -1;
259 *out = (lite3_val *)(buf + val_start_ofs);
260 return 0;
261 }
262 if (node->child_ofs[0]) { // if children, walk to next node
263 size_t next_node_ofs = (size_t)node->child_ofs[i];
264 node = __builtin_assume_aligned((struct node *)(buf + next_node_ofs), LITE3_NODE_ALIGNMENT);
265 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0);
266
267 if (LITE3_UNLIKELY(next_node_ofs > buflen - LITE3_NODE_SIZE)) {
268 LITE3_PRINT_ERROR("NODE WALK OFFSET OUT OF BOUNDS\n");
269 errno = EFAULT;
270 return -1;
271 }
272 if (LITE3_UNLIKELY(++node_walks > LITE3_TREE_HEIGHT_MAX)) {
273 LITE3_PRINT_ERROR("NODE WALKS EXCEEDED LITE3_TREE_HEIGHT_MAX\n");
274 errno = EBADMSG;
275 return -1;
276 }
277 } else {
278 LITE3_PRINT_ERROR("KEY NOT FOUND\n");
279 errno = ENOENT;
280 return -1;
281 }
282 }
283}
284
285int lite3_iter_create_impl(const unsigned char *buf, size_t buflen, size_t ofs, lite3_iter *out)
286{
287 LITE3_PRINT_DEBUG("CREATE ITER\n");
288
289 struct node *restrict node = __builtin_assume_aligned((struct node *)(buf + ofs), LITE3_NODE_ALIGNMENT);
290 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0); // node must be aligned to LITE3_NODE_ALIGNMENT
291
292 enum lite3_type type = node->gen_type & LITE3_NODE_TYPE_MASK;
293 if (LITE3_UNLIKELY(!(type == LITE3_TYPE_OBJECT || type == LITE3_TYPE_ARRAY))) {
294 LITE3_PRINT_ERROR("INVALID ARGUMENT: EXPECTING ARRAY OR OBJECT TYPE\n");
295 errno = EINVAL;
296 return -1;
297 }
298 out->gen = ((struct node *)buf)->gen_type;
299 out->depth = 0;
300 out->node_ofs[0] = (u32)ofs;
301 out->node_i[0] = 0;
302
303 while (node->child_ofs[0]) { // has children, travel down
304 u32 next_node_ofs = node->child_ofs[0];
305
306 node = __builtin_assume_aligned((struct node *)(buf + next_node_ofs), LITE3_NODE_ALIGNMENT);
307 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0);
308
309 if (LITE3_UNLIKELY(++out->depth > LITE3_TREE_HEIGHT_MAX)) {
310 LITE3_PRINT_ERROR("NODE WALKS EXCEEDED LITE3_TREE_HEIGHT_MAX\n");
311 errno = EBADMSG;
312 return -1;
313 }
314 if (LITE3_UNLIKELY((size_t)next_node_ofs > buflen - LITE3_NODE_SIZE)) {
315 LITE3_PRINT_ERROR("NODE WALK OFFSET OUT OF BOUNDS\n");
316 errno = EFAULT;
317 return -1;
318 }
319 out->node_ofs[out->depth] = next_node_ofs;
320 out->node_i[out->depth] = 0;
321 }
322 #ifdef LITE3_PREFETCHING
323 __builtin_prefetch(buf + node->kv_ofs[0], 0, 0); // prefetch first few items
324 __builtin_prefetch(buf + node->kv_ofs[0] + 64, 0, 0);
325 __builtin_prefetch(buf + node->kv_ofs[1], 0, 0);
326 __builtin_prefetch(buf + node->kv_ofs[1] + 64, 0, 0);
327 __builtin_prefetch(buf + node->kv_ofs[2], 0, 0);
328 __builtin_prefetch(buf + node->kv_ofs[2] + 64, 0, 0);
329 #endif
330 return 0;
331}
332
333int lite3_iter_next(const unsigned char *buf, size_t buflen, lite3_iter *iter, lite3_str *out_key, size_t *out_val_ofs)
334{
335 if (LITE3_UNLIKELY(iter->gen != ((struct node *)buf)->gen_type)) {
336 LITE3_PRINT_ERROR("ITERATOR INVALID: iter->gen != node->gen_type (BUFFER MUTATION INVALIDATES ITERATORS)\n");
337 errno = EINVAL;
338 return -1;
339 }
340
341 struct node *restrict node = __builtin_assume_aligned((struct node *)(buf + iter->node_ofs[iter->depth]), LITE3_NODE_ALIGNMENT);
342 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0); // node must be aligned to LITE3_NODE_ALIGNMENT
343
344 enum lite3_type type = node->gen_type & LITE3_NODE_TYPE_MASK;
345 if (LITE3_UNLIKELY(!(type == LITE3_TYPE_OBJECT || type == LITE3_TYPE_ARRAY))) {
346 LITE3_PRINT_ERROR("INVALID ARGUMENT: EXPECTING ARRAY ORlite3_iter_nex OBJECT TYPE\n");
347 errno = EINVAL;
348 return -1;
349 }
350 if (iter->depth == 0 && (iter->node_i[iter->depth] == (node->size_kc & LITE3_NODE_KEY_COUNT_MASK))) { // key_count reached, done
351 return LITE3_ITER_DONE;
352 }
353 size_t target_ofs = node->kv_ofs[iter->node_i[iter->depth]];
354
355 int ret;
356 if (type == LITE3_TYPE_OBJECT && out_key) { // write back key if not NULL
357 size_t key_tag_size;
358 size_t key_start_ofs = target_ofs;
359 if ((ret = _verify_key(buf, buflen, NULL, 0, 0, &target_ofs, &key_tag_size)) < 0)
360 return ret;
361 out_key->gen = iter->gen;
362 out_key->len = 0;
363 memcpy(&out_key->len, buf + key_start_ofs, key_tag_size);
364 --out_key->len; // Lite³ stores string size including NULL-terminator. Correction required for public API.
365 out_key->ptr = (const char *)(buf + key_start_ofs + key_tag_size);
366 }
367 if (out_val_ofs) { // write back val if not NULL
368 size_t val_start_ofs = target_ofs;
369 if ((ret = _verify_val(buf, buflen, &target_ofs)) < 0)
370 return ret;
371 *out_val_ofs = val_start_ofs;
372 }
373
374 ++iter->node_i[iter->depth];
375
376 while (node->child_ofs[iter->node_i[iter->depth]]) { // has children, travel down
377 u32 next_node_ofs = node->child_ofs[iter->node_i[iter->depth]];
378
379 node = __builtin_assume_aligned((struct node *)(buf + next_node_ofs), LITE3_NODE_ALIGNMENT);
380 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0);
381
382 if (LITE3_UNLIKELY(++iter->depth > LITE3_TREE_HEIGHT_MAX)) {
383 LITE3_PRINT_ERROR("NODE WALKS EXCEEDED LITE3_TREE_HEIGHT_MAX\n");
384 errno = EBADMSG;
385 return -1;
386 }
387 if (LITE3_UNLIKELY((size_t)next_node_ofs > buflen - LITE3_NODE_SIZE)) {
388 LITE3_PRINT_ERROR("NODE WALK OFFSET OUT OF BOUNDS\n");
389 errno = EFAULT;
390 return -1;
391 }
392 iter->node_ofs[iter->depth] = next_node_ofs;
393 iter->node_i[iter->depth] = 0;
394 }
395 while (iter->depth > 0 && (iter->node_i[iter->depth] == (node->size_kc & LITE3_NODE_KEY_COUNT_MASK))) { // key_count reached, go up
396 --iter->depth;
397 node = __builtin_assume_aligned((struct node *)(buf + iter->node_ofs[iter->depth]), LITE3_NODE_ALIGNMENT);
398 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0);
399 #ifdef LITE3_PREFETCHING
400 __builtin_prefetch(buf + node->child_ofs[(iter->node_i[iter->depth] + 1) & LITE3_NODE_KEY_COUNT_MASK], 0, 2); // prefetch next nodes
401 __builtin_prefetch(buf + node->child_ofs[(iter->node_i[iter->depth] + 1) & LITE3_NODE_KEY_COUNT_MASK] + 64, 0, 2);
402 __builtin_prefetch(buf + node->child_ofs[(iter->node_i[iter->depth] + 2) & LITE3_NODE_KEY_COUNT_MASK], 0, 2);
403 __builtin_prefetch(buf + node->child_ofs[(iter->node_i[iter->depth] + 2) & LITE3_NODE_KEY_COUNT_MASK] + 64, 0, 2);
404 #endif
405 }
406 #ifdef LITE3_PREFETCHING
407 __builtin_prefetch(buf + node->kv_ofs[iter->node_i[iter->depth]], 0, 0); // prefetch next items
408 __builtin_prefetch(buf + node->kv_ofs[iter->node_i[iter->depth]] + 64, 0, 0);
409 __builtin_prefetch(buf + node->kv_ofs[iter->node_i[iter->depth] + 1], 0, 0);
410 __builtin_prefetch(buf + node->kv_ofs[iter->node_i[iter->depth] + 1] + 64, 0, 0);
411 __builtin_prefetch(buf + node->kv_ofs[iter->node_i[iter->depth] + 2], 0, 0);
412 __builtin_prefetch(buf + node->kv_ofs[iter->node_i[iter->depth] + 2] + 64, 0, 0);
413 #endif
414 return LITE3_ITER_ITEM;
415}
416
417
418static inline void _lite3_init_impl(unsigned char *buf, size_t ofs, enum lite3_type type)
419{
420 LITE3_PRINT_DEBUG("INITIALIZE %s\n", type == LITE3_TYPE_OBJECT ? "OBJECT" : "ARRAY");
421
422 struct node *node = (struct node *)(buf + ofs);
423 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0); // node must be aligned to LITE3_NODE_ALIGNMENT
424 node->gen_type = type & LITE3_NODE_TYPE_MASK;
425 node->size_kc = 0x00;
426 #ifdef LITE3_ZERO_MEM_EXTRA
427 memset(node->hashes, LITE3_ZERO_MEM_8, sizeof(((struct node *)0)->hashes));
428 memset(node->kv_ofs, LITE3_ZERO_MEM_8, sizeof(((struct node *)0)->kv_ofs));
429 #endif
430 memset(node->child_ofs, 0x00, sizeof(((struct node *)0)->child_ofs));
431}
432
433int lite3_init_obj(unsigned char *buf, size_t *restrict out_buflen, size_t bufsz)
434{
435 if (LITE3_UNLIKELY(bufsz < LITE3_NODE_SIZE)) {
436 LITE3_PRINT_ERROR("INVALID ARGUMENT: bufsz < LITE3_NODE_SIZE\n");
437 errno = EINVAL;
438 return -1;
439 }
440 _lite3_init_impl(buf, 0, LITE3_TYPE_OBJECT);
441 *out_buflen = LITE3_NODE_SIZE;
442 return 0;
443}
444
445int lite3_init_arr(unsigned char *buf, size_t *restrict out_buflen, size_t bufsz)
446{
447 if (LITE3_UNLIKELY(bufsz < LITE3_NODE_SIZE)) {
448 LITE3_PRINT_ERROR("INVALID ARGUMENT: bufsz < LITE3_NODE_SIZE\n");
449 errno = EINVAL;
450 return -1;
451 }
452 _lite3_init_impl(buf, 0, LITE3_TYPE_ARRAY);
453 *out_buflen = LITE3_NODE_SIZE;
454 return 0;
455}
456
457/*
458 Inserts entry into the Lite³ structure to prepare for writing of the actual value.
459 - Returns 0 on success
460 - Returns < 0 on failure
461
462 [ NOTE ] This function expects the caller to write to:
463 1) `val->type`: the value type (bytes written should equal to `LITE3_VAL_SIZE`)
464 2) `val->val`: the actual value (bytes written should equal `val_len`)
465 This has the advantage that the responsibility of type-specific logic is also moved to the caller.
466 Otherwise, this function would have to contain branches to account for all types.
467*/
468int lite3_set_impl(
469 unsigned char *buf, // buffer pointer
470 size_t *restrict inout_buflen, // buffer used length (bytes, inout value)
471 size_t ofs, // start offset (0 == root)
472 size_t bufsz, // buffer max size (bytes)
473 const char *restrict key, // key string (string, pass NULL when inserting in array)
474 lite3_key_data key_data, // key data struct
475 size_t val_len, // value length (bytes)
476 lite3_val **out) // value entry pointer (out pointer)
477{
478 #ifdef LITE3_DEBUG
479 if (*(buf + ofs) == LITE3_TYPE_OBJECT) {
480 LITE3_PRINT_DEBUG("SET\tkey: %s\n", key);
481 } else if (*(buf + ofs) == LITE3_TYPE_ARRAY) {
482 LITE3_PRINT_DEBUG("SET\tindex: %u\n", key_data.hash);
483 } else {
484 LITE3_PRINT_DEBUG("SET INVALID: EXEPCTING ARRAY OR OBJECT TYPE\n");
485 }
486 #endif
487 size_t key_tag_size = (size_t)((!!(key_data.size >> (16 - LITE3_KEY_TAG_KEY_SIZE_SHIFT)) << 1)
488 + !!(key_data.size >> (8 - LITE3_KEY_TAG_KEY_SIZE_SHIFT))
489 + !!key_data.size);
490 size_t entry_size = key_tag_size + (size_t)key_data.size + LITE3_VAL_SIZE + val_len;
491
492 struct node *restrict parent = NULL;
493 struct node *restrict node = __builtin_assume_aligned((struct node *)(buf + ofs), LITE3_NODE_ALIGNMENT);
494 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0); // *buf + ofs must be aligned to LITE3_NODE_ALIGNMENT
495
496 u32 gen = node->gen_type >> LITE3_NODE_GEN_SHIFT;
497 ++gen;
498 node->gen_type = (node->gen_type & ~LITE3_NODE_GEN_MASK) | (gen << LITE3_NODE_GEN_SHIFT);
499
500 int key_count;
501 int i;
502 int node_walks = 0;
503 while (1) {
504 if ((node->size_kc & LITE3_NODE_KEY_COUNT_MASK) == LITE3_NODE_KEY_COUNT_MAX) { // node full, need to split
505
506 size_t buflen_aligned = (*inout_buflen + LITE3_NODE_ALIGNMENT_MASK) & ~(size_t)LITE3_NODE_ALIGNMENT_MASK; // next multiple of LITE3_NODE_ALIGNMENT
507 size_t new_node_size = parent ? LITE3_NODE_SIZE : 2 * LITE3_NODE_SIZE;
508
509 if (LITE3_UNLIKELY(new_node_size > bufsz || buflen_aligned > bufsz - new_node_size)) {
510 LITE3_PRINT_ERROR("NO BUFFER SPACE FOR NODE SPLIT\n");
511 errno = ENOBUFS;
512 return -1;
513 }
514 *inout_buflen = buflen_aligned;
515 // TODO: add lost bytes from alignment to GC index
516 if (!parent) { // if root split, create new root
517 LITE3_PRINT_DEBUG("NEW ROOT\n");
518 memcpy(buf + *inout_buflen, node, LITE3_NODE_SIZE);
519 node = __builtin_assume_aligned((struct node *)(buf + *inout_buflen), LITE3_NODE_ALIGNMENT);
520 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0);
521 parent = __builtin_assume_aligned((struct node *)(buf + ofs), LITE3_NODE_ALIGNMENT);
522 assert(((uintptr_t)parent & LITE3_NODE_ALIGNMENT_MASK) == 0);
523 #ifdef LITE3_ZERO_MEM_EXTRA
524 memset(parent->hashes, LITE3_ZERO_MEM_8, sizeof(((struct node *)0)->hashes));
525 memset(parent->kv_ofs, LITE3_ZERO_MEM_8, sizeof(((struct node *)0)->kv_ofs));
526 memset(parent->child_ofs, 0x00, sizeof(((struct node *)0)->child_ofs));
527 #endif
528 parent->size_kc &= ~LITE3_NODE_KEY_COUNT_MASK; // set key_count to 0
529 parent->child_ofs[0] = (u32)*inout_buflen; // insert node as child of new root
530 *inout_buflen += LITE3_NODE_SIZE;
531 key_count = 0;
532 i = 0;
533 }
534 LITE3_PRINT_DEBUG("SPLIT NODE\n");
535 for (int j = key_count; j > i; j--) { // shift parent array before separator insert
536 parent->hashes[j] = parent->hashes[j - 1];
537 parent->kv_ofs[j] = parent->kv_ofs[j - 1];
538 parent->child_ofs[j + 1] = parent->child_ofs[j];
539 }
540 parent->hashes[i] = node->hashes[LITE3_NODE_KEY_COUNT_MIN]; // insert new separator key in parent
541 parent->kv_ofs[i] = node->kv_ofs[LITE3_NODE_KEY_COUNT_MIN];
542 parent->child_ofs[i + 1] = (u32)*inout_buflen; // insert sibling as child in parent
543 parent->size_kc = (parent->size_kc & ~LITE3_NODE_KEY_COUNT_MASK)
544 | ((parent->size_kc + 1) & LITE3_NODE_KEY_COUNT_MASK); // key_count++
545 #ifdef LITE3_ZERO_MEM_EXTRA
546 node->hashes[LITE3_NODE_KEY_COUNT_MIN] = LITE3_ZERO_MEM_32;
547 node->kv_ofs[LITE3_NODE_KEY_COUNT_MIN] = LITE3_ZERO_MEM_32;
548 #endif
549 struct node *restrict sibling = __builtin_assume_aligned((struct node *)(buf + *inout_buflen), LITE3_NODE_ALIGNMENT);
550 assert(((uintptr_t)sibling & LITE3_NODE_ALIGNMENT_MASK) == 0);
551 #ifdef LITE3_ZERO_MEM_EXTRA
552 memset(sibling->hashes, LITE3_ZERO_MEM_8, sizeof(((struct node *)0)->hashes));
553 memset(sibling->kv_ofs, LITE3_ZERO_MEM_8, sizeof(((struct node *)0)->kv_ofs));
554 #endif
555 sibling->gen_type = ((struct node *)(buf + ofs))->gen_type & LITE3_NODE_TYPE_MASK;
556 sibling->size_kc = LITE3_NODE_KEY_COUNT_MIN & LITE3_NODE_KEY_COUNT_MASK;
557 node->size_kc = LITE3_NODE_KEY_COUNT_MIN & LITE3_NODE_KEY_COUNT_MASK;
558 memset(sibling->child_ofs, 0x00, sizeof(((struct node *)0)->child_ofs));
559 sibling->child_ofs[0] = node->child_ofs[LITE3_NODE_KEY_COUNT_MIN + 1]; // take child from node
560 node->child_ofs[LITE3_NODE_KEY_COUNT_MIN + 1] = 0x00;
561 for (int j = 0; j < LITE3_NODE_KEY_COUNT_MIN; j++) { // copy half of node's keys to sibling
562 sibling->hashes[j] = node->hashes[j + LITE3_NODE_KEY_COUNT_MIN + 1];
563 sibling->kv_ofs[j] = node->kv_ofs[j + LITE3_NODE_KEY_COUNT_MIN + 1];
564 sibling->child_ofs[j + 1] = node->child_ofs[j + LITE3_NODE_KEY_COUNT_MIN + 2];
565 #ifdef LITE3_ZERO_MEM_EXTRA
566 node->hashes[j + LITE3_NODE_KEY_COUNT_MIN + 1] = LITE3_ZERO_MEM_32;
567 node->kv_ofs[j + LITE3_NODE_KEY_COUNT_MIN + 1] = LITE3_ZERO_MEM_32;
568 node->child_ofs[j + LITE3_NODE_KEY_COUNT_MIN + 2] = 0x00000000;
569 #endif
570 }
571 if (key_data.hash > parent->hashes[i]) { // sibling has target key? then we follow
572 node = __builtin_assume_aligned(sibling, LITE3_NODE_ALIGNMENT);
573 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0);
574 }
575 *inout_buflen += LITE3_NODE_SIZE;
576 }
577
578 key_count = node->size_kc & LITE3_NODE_KEY_COUNT_MASK;
579 i = 0;
580 while (i < key_count && node->hashes[i] < key_data.hash)
581 i++;
582 if (i < key_count && node->hashes[i] == key_data.hash) { // matching key found, already exists?
583 size_t target_ofs = node->kv_ofs[i];
584 size_t key_start_ofs = target_ofs;
585 if (key && _verify_key(buf, *inout_buflen, key, (size_t)key_data.size, key_tag_size, &target_ofs, NULL) < 0)
586 return -1;
587 size_t val_start_ofs = target_ofs;
588 if (_verify_val(buf, *inout_buflen, &target_ofs) < 0)
589 return -1;
590 if (val_len >= target_ofs - val_start_ofs) { // value is too large, we must append
591 size_t alignment_mask = val_len == lite3_type_sizes[LITE3_TYPE_OBJECT] ? (size_t)LITE3_NODE_ALIGNMENT_MASK : 0;
592 size_t unaligned_val_ofs = *inout_buflen + key_tag_size + (size_t)key_data.size;
593 size_t alignment_padding = ((unaligned_val_ofs + alignment_mask) & ~alignment_mask) - unaligned_val_ofs;
594 entry_size += alignment_padding;
595 if (LITE3_UNLIKELY(entry_size > bufsz || *inout_buflen > bufsz - entry_size)) {
596 LITE3_PRINT_ERROR("NO BUFFER SPACE FOR ENTRY INSERTION\n");
597 errno = ENOBUFS;
598 return -1;
599 }
600 #ifdef LITE3_ZERO_MEM_DELETED
601 memset(buf + node->kv_ofs[i], LITE3_ZERO_MEM_8, target_ofs - key_start_ofs); // zero out key + value
602 #endif
603 (void)key_start_ofs; // silence unused variable warning
604 *inout_buflen += alignment_padding;
605 node->kv_ofs[i] = (u32)*inout_buflen;
606 goto insert_append;
607 // TODO: add lost bytes to GC index
608 }
609 #ifdef LITE3_ZERO_MEM_DELETED
610 memset(buf + val_start_ofs, LITE3_ZERO_MEM_8, target_ofs - val_start_ofs); // zero out value
611 #endif
612 *out = (lite3_val *)(buf + val_start_ofs); // caller overwrites value in place
613 // TODO: add lost bytes to GC index
614 return 0;
615 }
616 if (node->child_ofs[0]) { // if children, walk to next node
617 size_t next_node_ofs = (size_t)node->child_ofs[i];
618
619 parent = __builtin_assume_aligned(node, LITE3_NODE_ALIGNMENT);
620 assert(((uintptr_t)parent & LITE3_NODE_ALIGNMENT_MASK) == 0);
621 node = __builtin_assume_aligned((struct node *)(buf + next_node_ofs), LITE3_NODE_ALIGNMENT);
622 assert(((uintptr_t)node & LITE3_NODE_ALIGNMENT_MASK) == 0);
623
624 if (LITE3_UNLIKELY(next_node_ofs > *inout_buflen - LITE3_NODE_SIZE)) {
625 LITE3_PRINT_ERROR("NODE WALK OFFSET OUT OF BOUNDS\n");
626 errno = EFAULT;
627 return -1;
628 }
629 if (LITE3_UNLIKELY(++node_walks > LITE3_TREE_HEIGHT_MAX)) {
630 LITE3_PRINT_ERROR("NODE WALKS EXCEEDED LITE3_TREE_HEIGHT_MAX\n");
631 errno = EBADMSG;
632 return -1;
633 }
634 } else { // insert the kv-pair
635 size_t alignment_mask = val_len == lite3_type_sizes[LITE3_TYPE_OBJECT] ? (size_t)LITE3_NODE_ALIGNMENT_MASK : 0;
636 size_t unaligned_val_ofs = *inout_buflen + key_tag_size + (size_t)key_data.size;
637 size_t alignment_padding = ((unaligned_val_ofs + alignment_mask) & ~alignment_mask) - unaligned_val_ofs;
638 entry_size += alignment_padding;
639 if (LITE3_UNLIKELY(entry_size > bufsz || *inout_buflen > bufsz - entry_size)) {
640 LITE3_PRINT_ERROR("NO BUFFER SPACE FOR ENTRY INSERTION\n");
641 errno = ENOBUFS;
642 return -1;
643 }
644 for (int j = key_count; j > i; j--) {
645 node->hashes[j] = node->hashes[j - 1];
646 node->kv_ofs[j] = node->kv_ofs[j - 1];
647 }
648 node->hashes[i] = key_data.hash;
649 node->size_kc = (node->size_kc & ~LITE3_NODE_KEY_COUNT_MASK)
650 | ((node->size_kc + 1) & LITE3_NODE_KEY_COUNT_MASK); // key_count++
651 *inout_buflen += alignment_padding;
652 node->kv_ofs[i] = (u32)*inout_buflen;
653
654 node = __builtin_assume_aligned((struct node *)(buf + ofs), LITE3_NODE_ALIGNMENT); // set node to root
655 u32 size = node->size_kc >> LITE3_NODE_SIZE_SHIFT;
656 ++size;
657 node->size_kc = (node->size_kc & ~LITE3_NODE_SIZE_MASK) | (size << LITE3_NODE_SIZE_SHIFT); // node size++
658 goto insert_append;
659 }
660 }
661insert_append:
662 size_t key_size_tmp = (key_data.size << LITE3_KEY_TAG_KEY_SIZE_SHIFT) | (key_tag_size - 1);
663 memcpy(buf + *inout_buflen, &key_size_tmp, key_tag_size);
664 *inout_buflen += key_tag_size;
665 memcpy(buf + *inout_buflen, key, (size_t)key_data.size);
666 *inout_buflen += (size_t)key_data.size;
667 *out = (lite3_val *)(buf + *inout_buflen);
668 *inout_buflen += LITE3_VAL_SIZE + val_len;
669 return 0;
670}
671
672int lite3_set_obj_impl(unsigned char *buf, size_t *restrict inout_buflen, size_t ofs, size_t bufsz, const char *restrict key, lite3_key_data key_data, size_t *restrict out_ofs)
673{
674 lite3_val *val;
675 int ret;
676 if ((ret = lite3_set_impl(buf, inout_buflen, ofs, bufsz, key, key_data, lite3_type_sizes[LITE3_TYPE_OBJECT], &val)) < 0)
677 return ret;
678 size_t init_ofs = (size_t)((u8 *)val - buf);
679 if (out_ofs)
680 *out_ofs = init_ofs;
681 _lite3_init_impl(buf, init_ofs, LITE3_TYPE_OBJECT);
682 return ret;
683}
684
685int lite3_set_arr_impl(unsigned char *buf, size_t *restrict inout_buflen, size_t ofs, size_t bufsz, const char *restrict key, lite3_key_data key_data, size_t *restrict out_ofs)
686{
687 lite3_val *val;
688 int ret;
689 if ((ret = lite3_set_impl(buf, inout_buflen, ofs, bufsz, key, key_data, lite3_type_sizes[LITE3_TYPE_ARRAY], &val)) < 0)
690 return ret;
691 size_t init_ofs = (size_t)((u8 *)val - buf);
692 if (out_ofs)
693 *out_ofs = init_ofs;
694 _lite3_init_impl(buf, init_ofs, LITE3_TYPE_ARRAY);
695 return ret;
696}
697
698int lite3_arr_append_obj_impl(unsigned char *buf, size_t *restrict inout_buflen, size_t ofs, size_t bufsz, size_t *restrict out_ofs)
699{
700 u32 size = ((struct node *)(buf + ofs))->size_kc >> LITE3_NODE_SIZE_SHIFT;
701 lite3_key_data key_data = {
702 .hash = size,
703 .size = 0,
704 };
705 lite3_val *val;
706 int ret;
707 if ((ret = lite3_set_impl(buf, inout_buflen, ofs, bufsz, NULL, key_data, lite3_type_sizes[LITE3_TYPE_OBJECT], &val)) < 0)
708 return ret;
709 size_t init_ofs = (size_t)((u8 *)val - buf);
710 if (out_ofs)
711 *out_ofs = init_ofs;
712 _lite3_init_impl(buf, init_ofs, LITE3_TYPE_OBJECT);
713 return ret;
714}
715
716int lite3_arr_append_arr_impl(unsigned char *buf, size_t *restrict inout_buflen, size_t ofs, size_t bufsz, size_t *restrict out_ofs)
717{
718 u32 size = ((struct node *)(buf + ofs))->size_kc >> LITE3_NODE_SIZE_SHIFT;
719 lite3_key_data key_data = {
720 .hash = size,
721 .size = 0,
722 };
723 lite3_val *val;
724 int ret;
725 if ((ret = lite3_set_impl(buf, inout_buflen, ofs, bufsz, NULL, key_data, lite3_type_sizes[LITE3_TYPE_ARRAY], &val)) < 0)
726 return ret;
727 size_t init_ofs = (size_t)((u8 *)val - buf);
728 if (out_ofs)
729 *out_ofs = init_ofs;
730 _lite3_init_impl(buf, init_ofs, LITE3_TYPE_ARRAY);
731 return ret;
732}
733
734int lite3_arr_set_obj_impl(unsigned char *buf, size_t *restrict inout_buflen, size_t ofs, size_t bufsz, uint32_t index, size_t *restrict out_ofs)
735{
736 u32 size = ((struct node *)(buf + ofs))->size_kc >> LITE3_NODE_SIZE_SHIFT;
737 if (LITE3_UNLIKELY(index > size)) {
738 LITE3_PRINT_ERROR("INVALID ARGUMENT: ARRAY INDEX %u OUT OF BOUNDS (size == %u)\n", index, size);
739 errno = EINVAL;
740 return -1;
741 }
742 lite3_key_data key_data = {
743 .hash = index,
744 .size = 0,
745 };
746 lite3_val *val;
747 int ret;
748 if ((ret = lite3_set_impl(buf, inout_buflen, ofs, bufsz, NULL, key_data, lite3_type_sizes[LITE3_TYPE_OBJECT], &val)) < 0)
749 return ret;
750 size_t init_ofs = (size_t)((u8 *)val - buf);
751 if (out_ofs)
752 *out_ofs = init_ofs;
753 _lite3_init_impl(buf, init_ofs, LITE3_TYPE_OBJECT);
754 return ret;
755}
756
757int lite3_arr_set_arr_impl(unsigned char *buf, size_t *restrict inout_buflen, size_t ofs, size_t bufsz, uint32_t index, size_t *restrict out_ofs)
758{
759 u32 size = ((struct node *)(buf + ofs))->size_kc >> LITE3_NODE_SIZE_SHIFT;
760 if (LITE3_UNLIKELY(index > size)) {
761 LITE3_PRINT_ERROR("INVALID ARGUMENT: ARRAY INDEX %u OUT OF BOUNDS (size == %u)\n", index, size);
762 errno = EINVAL;
763 return -1;
764 }
765 lite3_key_data key_data = {
766 .hash = index,
767 .size = 0,
768 };
769 lite3_val *val;
770 int ret;
771 if ((ret = lite3_set_impl(buf, inout_buflen, ofs, bufsz, NULL, key_data, lite3_type_sizes[LITE3_TYPE_ARRAY], &val)) < 0)
772 return ret;
773 size_t init_ofs = (size_t)((u8 *)val - buf);
774 if (out_ofs)
775 *out_ofs = init_ofs;
776 _lite3_init_impl(buf, init_ofs, LITE3_TYPE_ARRAY);
777 return ret;
778}
#define LITE3_TREE_HEIGHT_MAX
Maximum B-tree height.
Definition lite3.h:336
#define LITE3_NODE_ALIGNMENT
B-tree node alignment configuration.
Definition lite3.h:299
#define LITE3_NODE_SIZE
B-tree node size setting.
Definition lite3.h:321
#define LITE3_NODE_SIZE_KC_OFFSET
Offset of the size_kc field inside struct node.
Definition lite3.h:350
#define LITE3_ITER_ITEM
Return value of lite3_iter_next(); iterator produced an item, continue;.
Definition lite3.h:2616
#define LITE3_ITER_DONE
Return value of lite3_iter_next(); iterator finished; stop.
Definition lite3.h:2618
int lite3_iter_next(const unsigned char *buf, size_t buflen, lite3_iter *iter, lite3_str *out_key, size_t *out_val_ofs)
Get the next item from a lite3 iterator.
Definition lite3.c:333
lite3_type
enum containing all Lite³ types
Definition lite3.h:510
@ LITE3_TYPE_ARRAY
maps to 'array' type in JSON
Definition lite3.h:518
@ LITE3_TYPE_INVALID
any type value equal or greater than this is considered invalid
Definition lite3.h:519
@ LITE3_TYPE_STRING
maps to 'string' type in JSON
Definition lite3.h:516
@ LITE3_TYPE_BYTES
coverted to base64 string in JSON
Definition lite3.h:515
@ LITE3_TYPE_OBJECT
maps to 'object' type in JSON
Definition lite3.h:517
Lite³ Buffer API Header.
Struct containing iterator state.
Definition lite3.h:2626
Struct holding a reference to a string inside a Lite³ buffer.
Definition lite3.h:601
uint32_t len
char array length (characters, exclusive of NULL-terminator)
Definition lite3.h:603
const char * ptr
char array pointer to string inside Lite³ buffer
Definition lite3.h:604
uint32_t gen
generation of the Lite³ buffer when this struct was returned
Definition lite3.h:602
Struct representing a value inside a Lite³ buffer.
Definition lite3.h:531
Definition lite3.c:77