am_string.h (10268B)
1 #ifndef AM_STRING_H 2 #define AM_STRING_H 3 4 #ifndef AM_MEMORY_H 5 #error "am_memory.h is required" 6 #endif 7 #ifndef AM_LIST_H 8 #error "am_list.h is required" 9 #endif 10 11 typedef struct { 12 usz size; 13 u8 *str; 14 } Str; 15 16 typedef struct StrListNode StrListNode; 17 struct StrListNode { 18 StrListNode *next; 19 Str s; 20 }; 21 22 typedef struct { 23 StrListNode *first; 24 StrListNode *last; 25 u64 node_count; 26 u64 total_size; 27 } StrList; 28 29 #define AM_STR_LIT(s) (Str) { .str = (u8 *)(s), .size = sizeof(s) - 1, } 30 #define AM_STR_EXPAND(s) (s32)((s).size), ((s).str) 31 32 internal usz am_cstr_len(char *cstr) { 33 usz len = 0; 34 while (cstr && *cstr != '\0') { 35 len++; 36 cstr++; 37 } 38 return len; 39 } 40 41 internal Str am_str(u8 *str, usz size) { 42 return (Str) { .size = size, .str = str, }; 43 } 44 45 internal Str am_str_from_range(u8 *start, u8 *opl) { 46 assert(start < opl); 47 return am_str(start, (usz)(opl - start)); 48 } 49 50 internal Str am_str_from_cstr(char *cstr, usz len) { 51 return am_str((u8 *)cstr, len); 52 } 53 54 // TODO: Use string interning 55 internal bool am_str_eq(Str s1, Str s2) { 56 if (s1.size != s2.size) { 57 return false; 58 } 59 60 for (usz i = 0; i < s1.size; i++) { 61 if (s1.str[i] != s2.str[i]) { 62 return false; 63 } 64 } 65 66 return true; 67 } 68 69 internal bool am_cstr_eq(char *cstr1, char *cstr2) { 70 usz len1 = am_cstr_len(cstr1); 71 usz len2 = am_cstr_len(cstr2); 72 return am_str_eq(am_str_from_cstr(cstr1, len1), am_str_from_cstr(cstr2, len2)); 73 } 74 75 internal Str am_str_line(Str s, u8 *cursor) { 76 u8 *opl = s.str + s.size; 77 78 assert(cursor >= s.str && cursor < opl); 79 80 u8 *end = cursor; 81 while (end < opl) { 82 if (*end == '\n') { 83 break; 84 } 85 end++; 86 } 87 88 u8 *beginning = cursor - (*cursor == '\n'); 89 while (beginning > s.str) { 90 if (*beginning == '\n') { 91 beginning++; 92 break; 93 } 94 beginning--; 95 } 96 97 return am_str_from_range(beginning, end); 98 } 99 100 internal bool am_str_contains(Str s, char c) { 101 for (usz i = 0; i < s.size; i++) { 102 if (s.str[i] == c) { 103 return true; 104 } 105 } 106 return false; 107 } 108 109 internal bool am_cstr_contains(char *s, usz len, char c) { 110 return am_str_contains(am_str_from_cstr(s, len), c); 111 } 112 113 internal bool am_str_cstr_eq(Str s, char *cstr) { 114 return am_str_eq(s, am_str_from_cstr(cstr, am_cstr_len(cstr))); 115 } 116 117 internal void am_str_list_append(MemArena *arena, StrList *list, Str s) { 118 StrListNode *node = am_mem_arena_push(arena, sizeof(StrListNode)); 119 *node = (StrListNode) { 120 .s = s, 121 }; 122 SLL_QUEUE_PUSH(list->first, list->last, node); 123 list->node_count += 1; 124 list->total_size += s.size; 125 } 126 127 internal void am_str_list_discard_empty(StrList *list) { 128 assert(list); 129 if (list->node_count) { 130 StrListNode *previous = NULL; 131 132 for (StrListNode *n = list->first; n; n = n->next) { 133 if (n->s.size == 0) { 134 assert(list->node_count > 0); 135 list->node_count--; 136 if (previous) { 137 previous->next = n->next; 138 } else { 139 list->first = n->next; 140 } 141 } else { 142 previous = n; 143 } 144 } 145 } 146 } 147 148 typedef struct { 149 Str pre; 150 Str mid; 151 Str post; 152 } StringJoinOptions; 153 154 internal Str am_str_join(MemArena *arena, StrList *list, StringJoinOptions *options) { 155 StringJoinOptions join; 156 if (options) { 157 join = *options; 158 } else { 159 join = (StringJoinOptions){0}; 160 } 161 162 Str s = {0}; 163 164 if (list->node_count) { 165 assert(list->node_count > 0); 166 167 u64 total_size = list->total_size 168 + join.pre.size 169 + (join.mid.size * (list->node_count - 1)) 170 + join.post.size; 171 172 s = (Str) { 173 .str = AM_MEM_ARENA_PUSH_ARRAY(arena, u8, total_size), 174 .size = total_size, 175 }; 176 177 u8 *p = s.str; 178 am_mem_copy(p, join.pre.str, join.pre.size); 179 p += join.pre.size; 180 181 bool is_mid = false; 182 for (StrListNode *n = list->first; n; n = n->next) { 183 if (is_mid) { 184 am_mem_copy(p, join.mid.str, join.mid.size); 185 p += join.mid.size; 186 } 187 188 am_mem_copy(p, n->s.str, n->s.size); 189 p += n->s.size; 190 191 is_mid = true; 192 } 193 194 am_mem_copy(p, join.post.str, join.post.size); 195 p += join.post.size; 196 } 197 198 return s; 199 } 200 201 internal StrList am_str_split(MemArena *arena, Str s, u8 *split_chars, u64 split_char_count) { 202 StrList list = {0}; 203 204 u8 *cursor = s.str; 205 u8 *split_beginning = cursor; 206 u8 *opl = s.str + s.size; 207 while (cursor <= opl) { 208 bool split_byte = false; 209 for (u64 i = 0; i < split_char_count; i++) { 210 if (split_chars[i] == *cursor) { 211 split_byte = true; 212 break; 213 } 214 } 215 216 if (split_byte) { 217 if (cursor == split_beginning) { 218 am_str_list_append(arena, &list, AM_STR_LIT("")); 219 } else { 220 am_str_list_append(arena, &list, am_str_from_range(split_beginning, cursor)); 221 } 222 split_beginning = cursor + 1; 223 } 224 cursor++; 225 } 226 227 return list; 228 } 229 230 internal void am_string_test(void) { 231 assert(am_cstr_len("abcdefg") == 7); 232 assert(am_cstr_len("") == 0); 233 assert(am_cstr_len("\0") == 0); 234 assert(am_cstr_eq("", "")); 235 assert(am_cstr_eq("\0", "\0")); 236 assert(am_cstr_eq("abc", "abc")); 237 assert(!am_cstr_eq("ABC", "abc")); 238 assert(!am_cstr_eq("", " ")); 239 assert(!am_cstr_eq("abc", "abcde")); 240 assert(!am_str_cstr_eq(AM_STR_LIT("abcd"), "abc")); 241 assert(am_str_cstr_eq(AM_STR_LIT("abc"), "abc")); 242 assert(am_cstr_contains("abc", 3, 'c')); 243 assert(am_cstr_contains("a c", 3, ' ')); 244 assert(am_cstr_contains("", 1, '\0')); 245 assert(!am_cstr_contains("abc", 3, 'z')); 246 assert(!am_cstr_contains("", 0, 'z')); 247 assert(!am_cstr_contains("", 0, '\0')); 248 assert(am_cstr_contains("https://www.example.com", 23, '/')); 249 250 { 251 StrList l = {0}; 252 MemArena a = am_mem_arena_create(am_mem_base_allocator_malloc()); 253 Str strings[] = { 254 AM_STR_LIT("one"), 255 AM_STR_LIT("two"), 256 AM_STR_LIT("three"), 257 }; 258 259 for (u64 i = 0; i < ARRAY_COUNT(strings); i++) { 260 am_str_list_append(&a, &l, strings[i]); 261 } 262 263 Str joined = am_str_join(&a, &l, &(StringJoinOptions){ 264 .pre = AM_STR_LIT("Joined: '"), 265 .mid = AM_STR_LIT(", "), 266 .post = AM_STR_LIT("'."), 267 }); 268 269 printf("%.*s\n", AM_STR_EXPAND(joined)); 270 271 StrList split = am_str_split(&a, AM_STR_LIT(", one, two, three, four, "), (u8 *)", ", 2); 272 273 for (StrListNode *n = split.first; n; n = n->next) { 274 printf("split: '%.*s'\n", AM_STR_EXPAND(n->s)); 275 } 276 277 am_mem_arena_release(&a); 278 } 279 280 { 281 MemArena a = am_mem_arena_create(am_mem_base_allocator_malloc()); 282 StrList lines = am_str_split(&a, AM_STR_LIT("one|two|three|four"), (u8 *)"|", 1); 283 284 Str expected[] = { 285 AM_STR_LIT("one"), 286 AM_STR_LIT("two"), 287 AM_STR_LIT("three"), 288 AM_STR_LIT("four"), 289 }; 290 291 size_t i = 0; 292 for (StrListNode *line = lines.first; line; line = line->next) { 293 printf("Expected: '%.*s', Got: '%.*s'\n", AM_STR_EXPAND(expected[i]), AM_STR_EXPAND(line->s)); 294 assert(i < ARRAY_COUNT(expected)); 295 assert(am_str_eq(line->s, expected[i])); 296 i++; 297 } 298 299 assert(i + 1 == ARRAY_COUNT(expected)); 300 am_mem_arena_release(&a); 301 } 302 303 { 304 MemArena a = am_mem_arena_create(am_mem_base_allocator_malloc()); 305 StrList lines = am_str_split(&a, AM_STR_LIT("|one|two||three|four|"), (u8 *)"|", 1); 306 307 Str expected[] = { 308 AM_STR_LIT(""), 309 AM_STR_LIT("one"), 310 AM_STR_LIT("two"), 311 AM_STR_LIT(""), 312 AM_STR_LIT("three"), 313 AM_STR_LIT("four"), 314 AM_STR_LIT(""), 315 }; 316 317 size_t i = 0; 318 for (StrListNode *line = lines.first; line; line = line->next) { 319 printf("Expected: '%.*s', Got: '%.*s'\n", AM_STR_EXPAND(expected[i]), AM_STR_EXPAND(line->s)); 320 assert(i < ARRAY_COUNT(expected)); 321 assert(am_str_eq(line->s, expected[i])); 322 i++; 323 } 324 325 assert(i + 1 == ARRAY_COUNT(expected)); 326 am_mem_arena_release(&a); 327 } 328 329 { 330 MemArena a = am_mem_arena_create(am_mem_base_allocator_malloc()); 331 StrList lines = am_str_split(&a, AM_STR_LIT("|||"), (u8 *)"|", 1); 332 333 Str expected[] = { 334 AM_STR_LIT(""), 335 AM_STR_LIT(""), 336 AM_STR_LIT(""), 337 AM_STR_LIT(""), 338 }; 339 340 size_t i = 0; 341 for (StrListNode *line = lines.first; line; line = line->next) { 342 printf("Expected: '%.*s', Got: '%.*s'\n", AM_STR_EXPAND(expected[i]), AM_STR_EXPAND(line->s)); 343 assert(i < ARRAY_COUNT(expected)); 344 assert(am_str_eq(line->s, expected[i])); 345 i++; 346 } 347 348 assert(i + 1 == ARRAY_COUNT(expected)); 349 am_mem_arena_release(&a); 350 } 351 352 { 353 MemArena a = am_mem_arena_create(am_mem_base_allocator_malloc()); 354 StrList lines = am_str_split(&a, AM_STR_LIT(""), (u8 *)"|", 1); 355 356 assert(lines.node_count == 0); 357 am_mem_arena_release(&a); 358 } 359 360 { 361 MemArena a = am_mem_arena_create(am_mem_base_allocator_malloc()); 362 StrList lines = am_str_split(&a, AM_STR_LIT("one two three four"), (u8 *)"|", 1); 363 364 Str expected[] = { 365 AM_STR_LIT("one two three four"), 366 }; 367 368 size_t i = 0; 369 for (StrListNode *line = lines.first; line; line = line->next) { 370 printf("Expected: '%.*s', Got: '%.*s'\n", AM_STR_EXPAND(expected[i]), AM_STR_EXPAND(line->s)); 371 assert(i < ARRAY_COUNT(expected)); 372 assert(am_str_eq(line->s, expected[i])); 373 i++; 374 } 375 376 assert(i + 1 == ARRAY_COUNT(expected)); 377 am_mem_arena_release(&a); 378 } 379 } 380 381 #endif // AM_STRING_H