/** * Jingga * * @copyright Jingga * @license OMS License 2.0 * @version 1.0.0 * @link https://jingga.app */ #ifndef TOS_MEMORY_CHUNK_MEMORY_H #define TOS_MEMORY_CHUNK_MEMORY_H #include #include "../stdlib/Types.h" #include "../utils/MathUtils.h" #include "../utils/TestUtils.h" #include "../utils/EndianUtils.h" #include "../log/DebugMemory.h" #include "BufferMemory.h" #if _WIN32 #include "../platform/win32/Allocator.h" #elif __linux__ #include "../platform/linux/Allocator.h" #endif #if _WIN32 #include "../platform/win32/threading/Thread.h" #elif __linux__ #include "../platform/linux/threading/Thread.h" #endif struct ChunkMemory { byte* memory; uint64 count; uint64 size; uint64 last_pos; uint32 chunk_size; uint32 alignment; // length = count // free describes which locations are used and which are free uint64* free; }; inline void chunk_alloc(ChunkMemory* buf, uint64 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); buf->memory = alignment < 2 ? (byte *) platform_alloc(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64)) : (byte *) platform_alloc_aligned(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64), alignment); buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; buf->last_pos = 0; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? buf->free = (uint64 *) (buf->memory + count * chunk_size); memset(buf->memory, 0, buf->size); DEBUG_MEMORY_INIT((uint64) buf->memory, buf->size); } inline void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); buf->memory = buffer_get_memory(data, count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64)); buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; buf->last_pos = 0; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? // On the other hand the way we do it right now we never have to move past the free array since it is at the end // On another hand we could by accident overwrite the values in free if we are not careful buf->free = (uint64 *) (buf->memory + count * chunk_size); DEBUG_MEMORY_INIT((uint64) buf->memory, buf->size); DEBUG_MEMORY_RESERVE((uint64) buf->memory, buf->size, 187); } inline void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); // @bug what if an alignment is defined? buf->memory = data; buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; buf->last_pos = 0; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? // On the other hand the way we do it right now we never have to move past the free array since it is at the end // On another hand we could by accident overwrite the values in free if we are not careful buf->free = (uint64 *) (buf->memory + count * chunk_size); DEBUG_MEMORY_INIT((uint64) buf->memory, buf->size); DEBUG_MEMORY_RESERVE((uint64) buf->memory, buf->size, 187); } inline void chunk_free(ChunkMemory* buf) { DEBUG_MEMORY_DELETE((uint64) buf->memory, buf->size); if (buf->alignment < 2) { platform_free((void **) &buf->memory); } else { platform_aligned_free((void **) &buf->memory); } } inline byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) { byte* offset = buf->memory + element * buf->chunk_size; ASSERT_SIMPLE(offset); if (zeroed) { memset((void *) offset, 0, buf->chunk_size); } DEBUG_MEMORY_READ((uint64) offset, buf->chunk_size); return offset; } /** * In some cases we know exactly which index is free */ void chunk_reserve_index(ChunkMemory* buf, int64 index, int64 elements = 1, bool zeroed = false) { int64 byte_index = index / 64; int32 bit_index = index % 64; // Mark the bits as reserved for (int32 j = 0; j < elements; ++j) { int64 current_byte_index = byte_index + (bit_index + j) / 64; int32 current_bit_index = (bit_index + j) % 64; buf->free[current_byte_index] |= (1LL << current_bit_index); } if (zeroed) { memset(buf->memory + index * buf->chunk_size, 0, elements * buf->chunk_size); } DEBUG_MEMORY_WRITE((uint64) (buf->memory + index * buf->chunk_size), elements * buf->chunk_size); buf->last_pos = index; } int64 chunk_reserve(ChunkMemory* buf, uint64 elements = 1, bool zeroed = false) { int64 free_index = (buf->last_pos + 1) / 64; int32 bit_index; int64 free_element = -1; int64 mask; int32 i = 0; int64 max_bytes = (buf->count + 7) / 64; while (free_element < 0 && i < buf->count) { ++i; if (free_index >= max_bytes) { free_index = 0; } if (buf->free[free_index] == 0xFF) { ++free_index; continue; } // @performance There is some redundancy happening down below, we should ++free_index in certain conditions? for (bit_index = 0; bit_index < 64; ++bit_index) { int32 consecutive_free_bits = 0; // Check if there are 'elements' consecutive free bits for (int32 j = 0; j < elements; ++j) { // Check if there is enough space until the end of the buffer. // Remember, the last free index may only allow only 1 bit if the size is 65 if (free_index * 64 + (bit_index + j) >= buf->count) { break; } uint64 current_free_index = free_index + (bit_index + j) / 64; int32 current_bit_index = (bit_index + j) % 64; mask = 1LL << current_bit_index; if ((buf->free[current_free_index] & mask) == 0) { ++consecutive_free_bits; } else { break; } } if (consecutive_free_bits == elements) { free_element = free_index * 64 + bit_index; // Mark the bits as reserved for (int32 j = 0; j < elements; ++j) { int64 current_free_index = free_index + (bit_index + j) / 64; int32 current_bit_index = (bit_index + j) % 64; buf->free[current_free_index] |= (1LL << current_bit_index); } break; } } ++i; ++free_index; } if (free_element < 0) { ASSERT_SIMPLE(false); return -1; } if (zeroed) { memset(buf->memory + free_element * buf->chunk_size, 0, elements * buf->chunk_size); } DEBUG_MEMORY_WRITE((uint64) (buf->memory + free_element * buf->chunk_size), elements * buf->chunk_size); buf->last_pos = free_element; return free_element; } byte* chunk_find_free(ChunkMemory* buf) { int64 free_index = (buf->last_pos + 1) / 64; int32 bit_index; int64 free_element = -1; int64 mask; int32 i = 0; int64 max_bytes = (buf->count + 7) / 64; while (free_element < 0 && i < buf->count) { if (free_index >= max_bytes) { free_index = 0; } if (buf->free[free_index] == 0xFF) { ++i; ++free_index; continue; } // This always breaks! // @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index // because we know that the bit_index is based on last_pos for (bit_index = 0; bit_index < 64; ++bit_index) { mask = 1LL << bit_index; if ((buf->free[free_index] & mask) == 0) { free_element = free_index * 64 + bit_index; buf->free[free_index] |= (1LL << bit_index); break; } } } if (free_element < 0) { return NULL; } return buf->memory + free_element * buf->chunk_size; } inline void chunk_free_element(ChunkMemory* buf, uint64 element) { DEBUG_MEMORY_DELETE((uint64) (buf->memory + element * buf->chunk_size), buf->chunk_size); int64 free_index = element / 64; int32 bit_index = element % 64; buf->free[free_index] &= ~(1LL << bit_index); } inline int64 chunk_dump(const ChunkMemory* buf, byte* data) { byte* start = data; // Count *((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->count); data += sizeof(buf->count); // Size *((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->size); data += sizeof(buf->size); // Chunk Size *((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->chunk_size); data += sizeof(buf->chunk_size); // Last pos *((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->last_pos); data += sizeof(buf->last_pos); // Alignment *((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->alignment); data += sizeof(buf->alignment); // All memory is handled in the buffer -> simply copy the buffer // This also includes the free array memcpy(data, buf->memory, buf->size); data += buf->size; return data - start; } inline int64 chunk_load(ChunkMemory* buf, const byte* data) { // Count buf->count = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); data += sizeof(buf->count); // Size buf->size = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); data += sizeof(buf->size); // Chunk Size buf->chunk_size = SWAP_ENDIAN_LITTLE(*((uint32 *) data)); data += sizeof(buf->chunk_size); // Last pos buf->last_pos = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); data += sizeof(buf->last_pos); // Alignment buf->alignment = SWAP_ENDIAN_LITTLE(*((uint32 *) data)); data += sizeof(buf->alignment); memcpy(buf->memory, data, buf->size); data += buf->size; buf->free = (uint64 *) (buf->memory + buf->count * buf->chunk_size); return buf->size; } #endif