cOMS/memory/ChunkMemory.h

382 lines
12 KiB
C

/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_MEMORY_CHUNK_MEMORY_H
#define TOS_MEMORY_CHUNK_MEMORY_H
#include <string.h>
#include "../stdlib/Types.h"
#include "../utils/TestUtils.h"
#include "../utils/EndianUtils.h"
#include "../utils/BitUtils.h"
#include "../log/DebugMemory.h"
#include "BufferMemory.h"
#include "../system/Allocator.h"
#include "../thread/Thread.h"
struct ChunkMemory {
byte* memory;
// @question Why are we making the count 64 bit? is this really realistically possible?
uint64 size;
int32 last_pos;
uint32 count;
uint32 chunk_size;
uint32 alignment;
// length = count
// free describes which locations are used and which are free
uint64* free;
};
inline
void chunk_alloc(ChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignment = 64)
{
ASSERT_SIMPLE(chunk_size);
ASSERT_SIMPLE(count);
chunk_size = ROUND_TO_NEAREST(chunk_size, alignment);
buf->memory = alignment < 2
? (byte *) platform_alloc(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64))
: (byte *) platform_alloc_aligned(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64), alignment);
buf->count = count;
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->alignment = alignment;
// @question Could it be beneficial to have this before the element data?
buf->free = (uint64 *) (buf->memory + count * chunk_size);
memset(buf->memory, 0, buf->size);
DEBUG_MEMORY_INIT((uintptr_t) buf->memory, buf->size);
LOG_INCREMENT_BY(DEBUG_COUNTER_MEM_ALLOC, buf->size);
LOG_LEVEL_2("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
}
inline
void chunk_init(ChunkMemory* buf, BufferMemory* data, uint32 count, uint32 chunk_size, int32 alignment = 64)
{
ASSERT_SIMPLE(chunk_size);
ASSERT_SIMPLE(count);
chunk_size = ROUND_TO_NEAREST(chunk_size, alignment);
buf->memory = buffer_get_memory(data, count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64));
buf->count = count;
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->alignment = alignment;
// @question Could it be beneficial to have this before the element data?
// On the other hand the way we do it right now we never have to move past the free array since it is at the end
// On another hand we could by accident overwrite the values in free if we are not careful
buf->free = (uint64 *) (buf->memory + count * chunk_size);
DEBUG_MEMORY_INIT((uintptr_t) buf->memory, buf->size);
DEBUG_MEMORY_RESERVE((uintptr_t) buf->memory, buf->size, 187);
}
inline
void chunk_init(ChunkMemory* buf, byte* data, uint32 count, uint32 chunk_size, int32 alignment = 64)
{
ASSERT_SIMPLE(chunk_size);
ASSERT_SIMPLE(count);
chunk_size = ROUND_TO_NEAREST(chunk_size, alignment);
// @bug what if an alignment is defined?
buf->memory = data;
buf->count = count;
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->alignment = alignment;
// @question Could it be beneficial to have this before the element data?
// On the other hand the way we do it right now we never have to move past the free array since it is at the end
// On another hand we could by accident overwrite the values in free if we are not careful
buf->free = (uint64 *) (buf->memory + count * chunk_size);
DEBUG_MEMORY_INIT((uintptr_t) buf->memory, buf->size);
DEBUG_MEMORY_RESERVE((uintptr_t) buf->memory, buf->size, 187);
}
inline
void chunk_free(ChunkMemory* buf)
{
DEBUG_MEMORY_DELETE((uintptr_t) buf->memory, buf->size);
if (buf->alignment < 2) {
platform_free((void **) &buf->memory);
} else {
platform_aligned_free((void **) &buf->memory);
}
}
inline
uint32 chunk_id_from_memory(ChunkMemory* buf, byte* pos) {
return (uint32) ((uintptr_t) pos - (uintptr_t) buf->memory) / buf->chunk_size;
}
inline
byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false)
{
byte* offset = buf->memory + element * buf->chunk_size;
ASSERT_SIMPLE(offset);
if (zeroed) {
memset((void *) offset, 0, buf->chunk_size);
}
DEBUG_MEMORY_READ((uintptr_t) offset, buf->chunk_size);
return offset;
}
// @performance This is a very important function, revisit in the future for optimization (e.g. ABM)
int32 chunk_reserve(ChunkMemory* buf, uint32 elements = 1)
{
int32 free_index = (buf->last_pos + 1) / 64;
int32 bit_index = (buf->last_pos + 1) & 63;
int32 free_element = -1;
int32 i = -1;
int32 consecutive_free_bits = 0;
while (free_element < 0 && ++i < buf->count) {
// Skip fully filled ranges
if (free_index * 64 + bit_index + elements - consecutive_free_bits >= buf->count) {
free_index = 0;
bit_index = 0;
i += buf->count - (free_index * 64 + bit_index);
consecutive_free_bits = 0;
} else if (buf->free[free_index] == 0xFFFFFFFFFFFFFFFF) {
++free_index;
bit_index = 0;
i += 63;
consecutive_free_bits = 0;
continue;
}
// Find first free element
while (IS_BIT_SET_64_R2L(buf->free[free_index], bit_index)) {
consecutive_free_bits = 0;
++bit_index;
++i;
// We still need to check for overflow since our initial bit_index is based on buf->last_pos
if (bit_index > 63) {
bit_index = 0;
++free_index;
break;
}
}
// The previous while may exit with an "overflow", that's why this check is required
if (IS_BIT_SET_64_R2L(buf->free[free_index], bit_index)) {
consecutive_free_bits = 0;
continue;
}
// We found our first free element, let's check if we have enough free space
while (!IS_BIT_SET_64_R2L(buf->free[free_index], bit_index)
&& consecutive_free_bits != elements
&& free_index * 64 + bit_index + elements - consecutive_free_bits < buf->count
) {
++i;
++consecutive_free_bits;
++bit_index;
if (bit_index > 63) {
bit_index = 0;
++free_index;
break;
}
}
// Do we have enough free bits?
if (consecutive_free_bits == elements) {
free_element = free_index * 64 + bit_index - elements;
int32 possible_free_index = free_element / 64;
int32 possible_bit_index = free_element & 63;
// Mark as used
if (elements == 1) {
buf->free[possible_free_index] |= (1LL << possible_bit_index);
} else {
uint32 elements_temp = elements;
int64 current_free_index = possible_free_index;
int32 current_bit_index = possible_bit_index;
while (elements > 0) {
// Calculate the number of bits we can set in the current 64-bit block
int32 bits_in_current_block = OMS_MIN(64 - current_bit_index, elements);
// Create a mask to set the bits
uint64 mask = ((1ULL << bits_in_current_block) - 1) << current_bit_index;
buf->free[current_free_index] |= mask;
// Update the counters and indices
elements -= bits_in_current_block;
++current_free_index;
current_bit_index = 0;
}
}
break;
}
}
if (free_element < 0) {
ASSERT_SIMPLE(false);
return -1;
}
DEBUG_MEMORY_WRITE((uintptr_t) (buf->memory + free_element * buf->chunk_size), elements * buf->chunk_size);
buf->last_pos = free_element;
return (int32) free_element;
}
inline
void chunk_free_element(ChunkMemory* buf, uint64 free_index, int32 bit_index)
{
DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + (free_index * 64 + bit_index) * buf->chunk_size), buf->chunk_size);
buf->free[free_index] &= ~(1LL << bit_index);
}
inline
void chunk_free_elements(ChunkMemory* buf, uint64 element, uint32 element_count = 1)
{
DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + element * buf->chunk_size), buf->chunk_size);
int64 free_index = element / 64;
int32 bit_index = element & 63;
if (element == 1) {
chunk_free_element(buf, free_index, bit_index);
return;
}
while (element_count > 0) {
// Calculate the number of bits we can clear in the current 64-bit block
uint32 bits_in_current_block = OMS_MIN(64 - bit_index, element_count);
// Create a mask to clear the bits
uint64 mask = ((1ULL << bits_in_current_block) - 1) << bit_index;
buf->free[free_index] &= ~mask;
// Update the counters and indices
element_count -= bits_in_current_block;
++free_index;
bit_index = 0;
}
}
inline
int64 chunk_dump(const ChunkMemory* buf, byte* data)
{
byte* start = data;
// Count
*((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->count);
data += sizeof(buf->count);
// Size
*((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->size);
data += sizeof(buf->size);
// Chunk Size
*((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->chunk_size);
data += sizeof(buf->chunk_size);
// Last pos
*((int32 *) data) = SWAP_ENDIAN_LITTLE(buf->last_pos);
data += sizeof(buf->last_pos);
// Alignment
*((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->alignment);
data += sizeof(buf->alignment);
// All memory is handled in the buffer -> simply copy the buffer
// This also includes the free array
memcpy(data, buf->memory, buf->size);
data += buf->size;
return data - start;
}
inline
int64 chunk_load(ChunkMemory* buf, const byte* data)
{
// Count
buf->count = SWAP_ENDIAN_LITTLE(*((uint32 *) data));
data += sizeof(buf->count);
// Size
buf->size = SWAP_ENDIAN_LITTLE(*((uint64 *) data));
data += sizeof(buf->size);
// Chunk Size
buf->chunk_size = SWAP_ENDIAN_LITTLE(*((uint32 *) data));
data += sizeof(buf->chunk_size);
// Last pos
buf->last_pos = SWAP_ENDIAN_LITTLE(*((int32 *) data));
data += sizeof(buf->last_pos);
// Alignment
buf->alignment = SWAP_ENDIAN_LITTLE(*((uint32 *) data));
data += sizeof(buf->alignment);
memcpy(buf->memory, data, buf->size);
data += buf->size;
buf->free = (uint64 *) (buf->memory + buf->count * buf->chunk_size);
LOG_LEVEL_2("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
return buf->size;
}
#define chunk_iterate_start(buf, chunk_id) \
int32 free_index = 0; \
int32 bit_index = 0; \
\
/* Iterate the chunk memory */ \
for (; chunk_id < (buf)->count; ++chunk_id) { \
/* Check if asset is defined */ \
if (!(buf)->free[free_index]) { \
/* Skip various elements */ \
/* @performance Consider to only check 1 byte instead of 8 */ \
/* There are probably even better ways by using compiler intrinsics if available */ \
bit_index += 63; /* +64 - 1 since the loop also increases by 1 */ \
} else if ((buf)->free[free_index] & (1ULL << bit_index)) {
#define chunk_iterate_end \
} \
\
++bit_index; \
if (bit_index > 63) { \
bit_index = 0; \
++free_index; \
} \
}
#endif