From d37d1ebc6c586b38d432f459060dc0c15df40d21 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Sat, 22 Mar 2025 17:48:44 +0000 Subject: [PATCH] bug fixes --- html/template/HtmlTemplateCache.h | 39 ++++++++++++++----- html/template/HtmlTemplateInterpreter.h | 31 ++++++++++----- html/template/HtmlTemplateParser.h | 51 ++++++++++++++++--------- platform/linux/FileUtils.cpp | 40 +++++++++++++++---- platform/win32/FileUtils.cpp | 11 ++++-- stdlib/HashMap.h | 4 ++ stdlib/PerfectHashMap.h | 43 +++++++++++++++++++++ utils/StringUtils.h | 14 +++++++ 8 files changed, 185 insertions(+), 48 deletions(-) diff --git a/html/template/HtmlTemplateCache.h b/html/template/HtmlTemplateCache.h index 3c1164f..6681e22 100755 --- a/html/template/HtmlTemplateCache.h +++ b/html/template/HtmlTemplateCache.h @@ -38,6 +38,7 @@ void html_template_find(const char* path, va_list args) { char** paths = va_arg(args, char**); uint32* path_count = va_arg(args, uint32*); uint32* max_path_count = va_arg(args, uint32*); + uint32* total_file_size = va_arg(args, uint32*); RingMemory* ring = va_arg(args, RingMemory*); if (path_count == max_path_count) { @@ -49,6 +50,7 @@ void html_template_find(const char* path, va_list args) { *paths = new_paths; } + *total_file_size += file_size(path); str_copy_short(paths[*path_count], path, 256); ++(*path_count); } @@ -57,11 +59,16 @@ void html_template_cache_init(HtmlTemplateCache* cache, const char* basedir, Buf uint32 max_path_count = 1000; uint32 path_count = 0; char* paths = (char *) ring_get_memory(ring, max_path_count * 256 * sizeof(char), 8, true); + uint32 total_file_size = 0; - iterate_directory(basedir, ".tpl.html", html_template_find, &paths, &path_count, &max_path_count, ring); + iterate_directory(basedir, ".tpl.html", html_template_find, &paths, &path_count, &max_path_count, &total_file_size, ring); + cache->cache_size = (uint64) (total_file_size * 1.2); + cache->cache = (byte *) buffer_get_memory(buf, cache->cache_size, 64, true); - perfect_hashmap_create(&cache->hm, path_count, sizeof(uint32), buf); - perfect_hashmap_prepare(&cache->hm, (const char**) paths, path_count, 10000, ring); + perfect_hashmap_create(&cache->hm, path_count, sizeof(PerfectHashEntryInt32), buf); + perfect_hashmap_prepare(&cache->hm, (const char*) paths, path_count, 256, 10000, ring); + + LOG_1("Created HtmlTemplateCache with %n B for %n templates with %n B in uncompressed file size", {{LOG_DATA_INT64, &cache->cache_size}, {LOG_DATA_INT32, &path_count}, {LOG_DATA_INT32, &total_file_size}}); } bool html_template_in_control_structure(const char* str, const char** controls, int32 control_length) { @@ -92,9 +99,12 @@ void html_template_cache_load(HtmlTemplateCache* cache, const char* key, const c // All-in-all let's consider this a pre-pass that we might want to move to the lexer in the future but I don't think so int32 in_control_structure = 0; while (*str) { - if (!in_control_structure && str_is_empty(*str)) { - ++str; + if (!in_control_structure && str_is_eol(*str)) { + str_skip_eol(&str); + } else if (!in_control_structure && str_is_empty(*str)) { + // @performance This keeps whitespaces, which we don't want and could optimize away str_skip_empty(&str); + --str; } if (!in_control_structure @@ -136,9 +146,6 @@ void html_template_cache_load(HtmlTemplateCache* cache, const char* key, const c ASSERT_SIMPLE(((uintptr_t) ast) % 64 == 0); perfect_hashmap_insert(&cache->hm, key, (int32) ((uintptr_t) ast - (uintptr_t) cache->cache)); - - // @todo This belongs to wherever we want to output the user specified template - //interpret(ast, &context_stack); } static @@ -146,14 +153,28 @@ void html_template_cache_iter(const char* path, va_list args) { HtmlTemplateCache* cache = va_arg(args, HtmlTemplateCache*); RingMemory* ring = va_arg(args, RingMemory*); + char full_path[MAX_PATH]; + relative_to_absolute(path, full_path); + FileBody file = {}; - file_read(path, &file, ring); + file_read(full_path, &file, ring); html_template_cache_load(cache, path, (const char *) file.content); } void html_template_cache_load_all(HtmlTemplateCache* cache, const char* basedir, RingMemory* ring) { iterate_directory(basedir, ".tpl.html", html_template_cache_iter, cache, ring); + LOG_1("Loaded all html templates with %n in cache size", {{LOG_DATA_INT32, &cache->cache_pos}}); +} + +HtmlTemplateASTNode* html_template_cache_get(HtmlTemplateCache* cache, const char* key) +{ + PerfectHashEntryInt32* entry = (PerfectHashEntryInt32 *) perfect_hashmap_get_entry(&cache->hm, key); + if (!entry) { + return NULL; + } + + return (HtmlTemplateASTNode *) (cache->cache + entry->value); } #endif \ No newline at end of file diff --git a/html/template/HtmlTemplateInterpreter.h b/html/template/HtmlTemplateInterpreter.h index db872eb..d5f7996 100755 --- a/html/template/HtmlTemplateInterpreter.h +++ b/html/template/HtmlTemplateInterpreter.h @@ -29,7 +29,7 @@ struct HtmlTemplateValue { bool boolValue; int64 int64Value; f64 f64Value; - char* ptrValue; + const char* ptrValue; }; }; @@ -48,7 +48,7 @@ struct HtmlTemplateVariable { bool boolValue; int64 int64Value; f64 f64Value; - char* ptrValue; + const char* ptrValue; }; }; @@ -269,29 +269,40 @@ bool html_template_condition_eval(HtmlTemplateASTNode *node, HtmlTemplateContext } // @todo should take in a buffer for template output -// @performance, what if there is no template data, just html? -> a simple pointer to the resource data should be created?! -// This would maybe allow us to also return files in the future -void html_template_interpret(HtmlTemplateASTNode *node, HtmlTemplateContextStack *context_stack) { +int32 html_template_interpret(HtmlTemplateASTNode *node, char* buffer, int32 buffer_size, HtmlTemplateContextStack *context_stack) { + int32 out_length = 0; + switch (node->type) { + case NODE_RAW: + // @performance If the entire file is raw we shouldn't have to copy the text over + memcpy(buffer, node->ptrValue, node->value_length); + out_length += node->value_length; + + if (node->right) { + out_length += html_template_interpret(node->right, buffer + node->value_length, buffer_size - node->value_length, context_stack); + } + break; case NODE_ASSIGN: // Handle assignment break; case NODE_IF: // Handle if statement - html_template_interpret(node->left, context_stack); // Condition - html_template_interpret(node->right, context_stack); // Body + html_template_interpret(node->left, buffer, buffer_size, context_stack); // Condition + html_template_interpret(node->right, buffer, buffer_size, context_stack); // Body break; case NODE_FOR: // Handle for loop - html_template_interpret(node->left, context_stack); // Init + html_template_interpret(node->left, buffer, buffer_size, context_stack); // Init while (html_template_condition_eval(node->right->left, context_stack)) { // Condition - html_template_interpret(node->right->right, context_stack); // Body - html_template_interpret(node->right->left->right, context_stack); // Update + html_template_interpret(node->right->right, buffer, buffer_size, context_stack); // Body + html_template_interpret(node->right->left->right, buffer, buffer_size, context_stack); // Update } break; default: break; } + + return out_length; } #endif \ No newline at end of file diff --git a/html/template/HtmlTemplateParser.h b/html/template/HtmlTemplateParser.h index ea20617..3dd6fb3 100755 --- a/html/template/HtmlTemplateParser.h +++ b/html/template/HtmlTemplateParser.h @@ -16,6 +16,7 @@ enum HtmlTemplateNodeType : byte { NODE_BINOP, NODE_PTR, NODE_IDENTIFIER, + NODE_RAW, NODE_BOOL, NODE_INTEGER64, NODE_FLOAT64, @@ -71,14 +72,14 @@ struct HtmlTemplateASTNode { bool boolValue; int64 int64Value; f64 f64Value; - char* ptrValue; + const char* ptrValue; }; }; HtmlTemplateASTNode* html_template_node_create(HtmlTemplateNodeType type, HtmlTemplateToken* token, byte** memory) { - *memory = (byte *) ROUND_TO_NEAREST((uintptr_t) memory, 32); + *memory = (byte *) ROUND_TO_NEAREST((uintptr_t) *memory, 32); HtmlTemplateASTNode* node = (HtmlTemplateASTNode *) *memory; - *memory = (byte *) ROUND_TO_NEAREST((uintptr_t) (memory + sizeof(HtmlTemplateASTNode)), 32); + *memory = (byte *) ROUND_TO_NEAREST((uintptr_t) (*memory + sizeof(HtmlTemplateASTNode)), 32); node->type = type; node->left = NULL; @@ -86,7 +87,7 @@ HtmlTemplateASTNode* html_template_node_create(HtmlTemplateNodeType type, HtmlTe node->value_length = token->length; // @question instead of handling the parsing below, why not handle it here for known types such as int, float, bool, string - memcpy(&node->int64Value, token->value, sizeof(uintptr_t)); + node->ptrValue = token->value; return node; } @@ -180,20 +181,20 @@ HtmlTemplateASTNode* html_template_assignment_parse(const char** input, HtmlTemp return html_template_node_create(NODE_ASSIGN, {}, memory); } -HtmlTemplateASTNode* html_template_parse_if(const char** input, HtmlTemplateToken* token_current, HtmlTemplateContextStack* contextStack, HtmlTemplateContextFlag context_flag, byte** memory) { - HtmlTemplateContext newContext = peekContext(contextStack); +HtmlTemplateASTNode* html_template_parse_if(const char** input, HtmlTemplateToken* token_current, HtmlTemplateContextStack* context_stack, HtmlTemplateContextFlag context_flag, byte** memory) { + HtmlTemplateContext newContext = peekContext(context_stack); ++newContext.scope_level; - pushContext(contextStack, newContext); + pushContext(context_stack, newContext); *token_current = html_template_token_next(input, context_flag); // Consume 'if' *token_current = html_template_token_next(input, context_flag); // Consume '(' HtmlTemplateASTNode* condition = html_template_expression_parse(input, token_current, context_flag, memory); *token_current = html_template_token_next(input, context_flag); // Consume ')' *token_current = html_template_token_next(input, context_flag); // Consume '{' - HtmlTemplateASTNode* body = html_template_statement_parse(input, token_current, contextStack, context_flag, memory); + HtmlTemplateASTNode* body = html_template_statement_parse(input, token_current, context_stack, context_flag, memory); *token_current = html_template_token_next(input, context_flag); // Consume '}' - popContext(contextStack); + popContext(context_stack); HtmlTemplateASTNode* ifNode = html_template_node_create(NODE_IF, {}, memory); ifNode->left = condition; // Condition @@ -202,11 +203,11 @@ HtmlTemplateASTNode* html_template_parse_if(const char** input, HtmlTemplateToke return ifNode; } -HtmlTemplateASTNode* html_template_parse_for(const char** input, HtmlTemplateToken* token_current, HtmlTemplateContextStack* contextStack, HtmlTemplateContextFlag context_flag, byte** memory) { - HtmlTemplateContext newContext = peekContext(contextStack); +HtmlTemplateASTNode* html_template_parse_for(const char** input, HtmlTemplateToken* token_current, HtmlTemplateContextStack* context_stack, HtmlTemplateContextFlag context_flag, byte** memory) { + HtmlTemplateContext newContext = peekContext(context_stack); ++newContext.scope_level; ++newContext.loop_nesting_level; - pushContext(contextStack, newContext); + pushContext(context_stack, newContext); *token_current = html_template_token_next(input, context_flag); // Consume 'for' *token_current = html_template_token_next(input, context_flag); // Consume '(' @@ -216,10 +217,10 @@ HtmlTemplateASTNode* html_template_parse_for(const char** input, HtmlTemplateTok HtmlTemplateASTNode* update = html_template_assignment_parse(input, token_current, context_flag, memory); *token_current = html_template_token_next(input, context_flag); // Consume ')' *token_current = html_template_token_next(input, context_flag); // Consume '{' - HtmlTemplateASTNode* body = html_template_statement_parse(input, token_current, contextStack, context_flag, memory); + HtmlTemplateASTNode* body = html_template_statement_parse(input, token_current, context_stack, context_flag, memory); *token_current = html_template_token_next(input, context_flag); // Consume '}' - popContext(contextStack); + popContext(context_stack); HtmlTemplateASTNode* forNode = html_template_node_create(NODE_FOR, {}, memory); forNode->left = init; // Initialization @@ -231,14 +232,28 @@ HtmlTemplateASTNode* html_template_parse_for(const char** input, HtmlTemplateTok return forNode; } -HtmlTemplateASTNode* html_template_statement_parse(const char** input, HtmlTemplateToken* token_current, HtmlTemplateContextStack* contextStack, HtmlTemplateContextFlag context_flag, byte** memory) { - if (token_current->type == TOKEN_ASSIGN) { +HtmlTemplateASTNode* html_template_html_parse(const char** input, HtmlTemplateToken* token_current, HtmlTemplateContextStack* context_stack, HtmlTemplateContextFlag context_flag, byte** memory) { + HtmlTemplateASTNode* html = html_template_node_create(NODE_RAW, token_current, memory); + + *token_current = html_template_token_next(input, context_flag); // Consume html + if (token_current->type != TOKEN_EOF) { + html->right = html_template_statement_parse(input, token_current, context_stack, context_flag, memory); + } + + return html; +} + +HtmlTemplateASTNode* html_template_statement_parse(const char** input, HtmlTemplateToken* token_current, HtmlTemplateContextStack* context_stack, HtmlTemplateContextFlag context_flag, byte** memory) { + if (token_current->type == TOKEN_HTML) { + return html_template_html_parse(input, token_current, context_stack, context_flag, memory); + } else if (token_current->type == TOKEN_ASSIGN) { return html_template_assignment_parse(input, token_current, context_flag, memory); } else if (token_current->type == TOKEN_IF) { - return html_template_parse_if(input, token_current, contextStack, context_flag, memory); + return html_template_parse_if(input, token_current, context_stack, context_flag, memory); } else if (token_current->type == TOKEN_FOR) { - return html_template_parse_for(input, token_current, contextStack, context_flag, memory); + return html_template_parse_for(input, token_current, context_stack, context_flag, memory); } else { + ASSERT_SIMPLE(false); exit(1); } } diff --git a/platform/linux/FileUtils.cpp b/platform/linux/FileUtils.cpp index 42034bf..5f630a2 100755 --- a/platform/linux/FileUtils.cpp +++ b/platform/linux/FileUtils.cpp @@ -112,8 +112,17 @@ void relative_to_absolute(const char* __restrict rel, char* __restrict path) inline uint64 file_size(const char* filename) { struct stat buffer; - if (stat(filename, &buffer) != 0) { - return 0; + + if (*filename == '.') { + char full_path[MAX_PATH]; + relative_to_absolute(filename, full_path); + if (stat(full_path, &buffer) != 0) { + return 0; + } + } else { + if (stat(filename, &buffer) != 0) { + return 0; + } } return buffer.st_size; @@ -123,7 +132,14 @@ inline uint64 file_last_modified(const char* filename) { struct stat buffer; - stat(filename, &buffer); + + if (*filename == '.') { + char full_path[MAX_PATH]; + relative_to_absolute(filename, full_path); + stat(full_path, &buffer); + } else { + stat(filename, &buffer); + } return (uint64) buffer.st_mtime; } @@ -378,11 +394,14 @@ void self_path(char* path) { } } -void iterate_directory(const char *base_path, const char* file_ending, void (*handler)(const char *, va_list), ...) { +void iterate_directory(const char* base_path, const char* file_ending, void (*handler)(const char *, va_list), ...) { va_list args; va_start(args, handler); - DIR *dir = opendir(base_path); + char full_base_path[MAX_PATH]; + relative_to_absolute(base_path, full_base_path); + + DIR* dir = opendir(full_base_path); if (!dir) { return; } @@ -397,15 +416,20 @@ void iterate_directory(const char *base_path, const char* file_ending, void (*ha continue; } - char full_path[1024]; + char full_path[MAX_PATH]; // @performance This is bad, we are internally moving two times too often to the end of full_path // Maybe make str_copy_short return the length, same as append? str_copy_short(full_path, base_path); - str_concat_append(full_path, "/"); + if (!str_ends_with(base_path, "/")) { + str_concat_append(full_path, "/"); + } str_concat_append(full_path, entry->d_name); + char full_file_path[MAX_PATH]; + relative_to_absolute(full_path, full_file_path); + struct stat statbuf; - if (stat(full_path, &statbuf) == -1) { + if (stat(full_file_path, &statbuf) == -1) { continue; } diff --git a/platform/win32/FileUtils.cpp b/platform/win32/FileUtils.cpp index b7dcfe8..b433103 100755 --- a/platform/win32/FileUtils.cpp +++ b/platform/win32/FileUtils.cpp @@ -829,13 +829,16 @@ inline void self_path(char* path) GetModuleFileNameA(NULL, (LPSTR) path, MAX_PATH); } -void iterate_directory(const char *base_path, const char* file_ending, void (*handler)(const char *, void *), ...) { +void iterate_directory(const char* base_path, const char* file_ending, void (*handler)(const char *, void *), ...) { va_list args; va_start(args, handler); + char full_base_path[MAX_PATH]; + relative_to_absolute(base_path, full_base_path); + WIN32_FIND_DATA find_file_data; char search_path[MAX_PATH]; - snprintf(search_path, MAX_PATH, "%s\\*", base_path); + snprintf(search_path, MAX_PATH, "%s\\*", full_base_path); HANDLE hFind = FindFirstFile(search_path, &find_file_data); if (hFind == INVALID_HANDLE_VALUE) { @@ -855,7 +858,9 @@ void iterate_directory(const char *base_path, const char* file_ending, void (*ha // @performance This is bad, we are internally moving two times too often to the end of full_path // Maybe make str_copy_short return the length, same as append? str_copy_short(full_path, base_path); - str_concat_append(full_path, "/"); + if (!str_ends_with(base_path, "/")) { + str_concat_append(full_path, "/"); + } str_concat_append(full_path, entry->d_name); if (find_file_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h index 37a5fa7..8a519ed 100755 --- a/stdlib/HashMap.h +++ b/stdlib/HashMap.h @@ -210,6 +210,10 @@ int64 hashmap_size(const HashMap* hm) noexcept ///////////////////////////// // string key ///////////////////////////// +// @performance We could greatly improve the hashmap performance by ensuring that a uniquely hashed entry always starts at a cacheline +// If another hash results in the same index that should be at the cacheline + 32 position (if 32 bit size) +// This would ensure for those elements that if there is one collision we at least don't have to read another cache line +// Of course for more than 1 collision we would still need to load multiple cachelines, but ideally that shouldn't happen too often void hashmap_insert(HashMap* hm, const char* key, int32 value) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; diff --git a/stdlib/PerfectHashMap.h b/stdlib/PerfectHashMap.h index 7af5124..1417757 100755 --- a/stdlib/PerfectHashMap.h +++ b/stdlib/PerfectHashMap.h @@ -108,6 +108,49 @@ PerfectHashMap* perfect_hashmap_prepare(PerfectHashMap* hm, const char** keys, i } ASSERT_SIMPLE(false); + LOG_1("Couldn't create perfect hashmap"); + + return NULL; +} + +// Same code as above with the difference that we are using a fixed length key array instead of an array of pointers +PerfectHashMap* perfect_hashmap_prepare(PerfectHashMap* hm, const char* keys, int32 key_count, int32 key_length, int32 seed_tries, RingMemory* ring) +{ + int32* indices = (int32 *) ring_get_memory(ring, hm->map_count * sizeof(int32), 4); + bool is_unique = false; + + for (uint32 i = 0; i < ARRAY_COUNT(PERFECT_HASH_FUNCTIONS); ++i) { + int32 seed; + int32 c = 0; + + while (!is_unique && c < seed_tries) { + is_unique = true; + seed = rand(); + memset(indices, 0, hm->map_count * sizeof(int32)); + + for (int32 j = 0; j < key_count; ++j) { + int32 index = (PERFECT_HASH_FUNCTIONS[i])(&keys[j * key_length], seed) % hm->map_count; + if (indices[index]) { + is_unique = false; + break; + } else { + indices[index] = 1; + } + } + + ++c; + } + + if (is_unique) { + hm->hash_seed = seed; + hm->hash_function = PERFECT_HASH_FUNCTIONS[i]; + + return hm; + } + } + + ASSERT_SIMPLE(false); + LOG_1("Couldn't create perfect hashmap"); return NULL; } diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 59acd69..5de0068 100755 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -1285,6 +1285,12 @@ bool str_is_empty(const char str) noexcept return str == ' ' || str == '\t' || str == '\n' || str == '\r'; } +inline +bool str_is_eol(const char str) noexcept +{ + return str == '\n' || str == '\r'; +} + inline void str_skip_empty(const char** str) noexcept { @@ -1293,6 +1299,14 @@ void str_skip_empty(const char** str) noexcept } } +inline +void str_skip_eol(const char** str) noexcept +{ + while (**str == '\n' || **str == '\r') { + ++(*str); + } +} + inline void str_skip_non_empty(const char** str) noexcept {