cOMS/compression/Huffman.h
Dennis Eichhorn 39fbcf4300
Some checks are pending
CodeQL / Analyze (${{ matrix.language }}) (autobuild, c-cpp) (push) Waiting to run
Microsoft C++ Code Analysis / Analyze (push) Waiting to run
linux bug fixes
2025-03-22 01:10:19 +00:00

235 lines
6.1 KiB
C
Executable File

/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef COMS_COMPRESSION_HUFFMAN_H
#define COMS_COMPRESSION_HUFFMAN_H
#include <stdio.h>
#include <string.h>
#include "../stdlib/Types.h"
#include "../utils/BitUtils.h"
#include "../utils/EndianUtils.h"
struct HuffmanNode {
HuffmanNode* left;
HuffmanNode* right;
int32 frequency;
byte character;
};
struct Huffman {
HuffmanNode pool[512];
HuffmanNode priority_queue[511];
HuffmanNode** pq; // 1 indexed array (see huffman_init)
int32 node_count;
int32 pq_end;
char buffer[1024]; // Contains the actual table data
char* code[256]; // Contains a pointer per ASCII character to the huffman code sequence
};
// We could combine this function with the one below but this would introduce a if != 0 check for the frequency
// I would assume the current version is faster since we avoid a branch
static inline
HuffmanNode* huffman_node_create(Huffman* hf, int32 frequency, byte character, HuffmanNode* left, HuffmanNode* right) noexcept
{
HuffmanNode* node = hf->pool + hf->node_count++;
node->character = character;
node->frequency = frequency;
return node;
}
// Same as other function but frequency = 0
static inline
HuffmanNode* huffman_node_create(Huffman* hf, byte character, HuffmanNode* left, HuffmanNode* right) noexcept
{
HuffmanNode* node = hf->pool + hf->node_count++;
node->left = left;
node->right = right;
node->frequency = left->frequency + right->frequency;
return node;
}
static inline
void huffman_node_insert(Huffman* hf, HuffmanNode* node) noexcept
{
int32 child_id;
int32 parent_id = hf->pq_end++;
while ((child_id = parent_id / 2) && hf->pq[child_id]->frequency <= node->frequency) {
hf->pq[parent_id] = hf->pq[child_id];
parent_id = child_id;
}
hf->pq[parent_id] = node;
}
static
HuffmanNode* huffman_node_remove(Huffman* hf) noexcept
{
int32 parent_id = 1;
int32 left_child_id;
HuffmanNode* min_node = hf->pq[parent_id];
if (hf->pq_end < 2) {
return 0;
}
--hf->pq_end;
while ((left_child_id = parent_id * 2) < hf->pq_end) {
// Branchless increment
left_child_id += (int32) (left_child_id + 1 < hf->pq_end
&& hf->pq[left_child_id + 1]->frequency < hf->pq[left_child_id]->frequency
);
hf->pq[parent_id] = hf->pq[left_child_id];
parent_id = left_child_id;
}
hf->pq[parent_id] = hf->pq[hf->pq_end];
return min_node;
}
static
int64 huffman_code_build(Huffman* hf, HuffmanNode* root, char* code, int32 length, char* code_buffer, int32* buffer_position) noexcept
{
if (root->character) {
code[length] = 0;
memcpy(&code_buffer[*buffer_position], code, length + 1);
hf->code[root->character] = &code_buffer[*buffer_position];
*buffer_position += length + 1;
return;
}
code[length] = '0'; huffman_code_build(hf, root->left, code, length + 1, code_buffer, buffer_position);
code[length] = '1'; huffman_code_build(hf, root->right, code, length + 1, code_buffer, buffer_position);
}
void huffman_init(Huffman* hf, const byte* in) noexcept
{
int32 frequency[256] = {0};
int32 buffer_position = 0;
char temp_code[16];
// We artificially force the root element (usually the 0 element) to have the index 1.
hf->pq = (HuffmanNode **) (hf->priority_queue - 1);
while (*in) {
++frequency[(byte) *in++];
}
for (int32 i = 0; i < 256; ++i) {
if (frequency[i]) {
huffman_node_insert(hf, huffman_node_create(hf, frequency[i], i, NULL, NULL));
}
}
while (hf->pq_end > 2) {
huffman_node_insert(hf, huffman_node_create(hf, 0, huffman_node_remove(hf), huffman_node_remove(hf)));
}
huffman_code_build(hf, hf->pq[1], temp_code, 0, hf->buffer, &buffer_position);
}
inline
void huffman_dump(const Huffman* hf, byte* out)
{
// dump the char -> code relations as relative indices
for (uint32 i = 0; i < ARRAY_COUNT(hf->code); ++i) {
*((int64 *) out) = hf->code[i]
? SWAP_ENDIAN_LITTLE(hf->code[i] - hf->buffer)
: SWAP_ENDIAN_LITTLE(-1);
out += sizeof(int64);
}
// dump the table codes
memcpy(out, hf->buffer, sizeof(char) * ARRAY_COUNT(hf->buffer));
}
inline
void huffman_load(Huffman* hf, const byte* in)
{
// load the char -> code relations and convert relative indices to pointers
for (uint32 i = 0; i < ARRAY_COUNT(hf->code); ++i) {
int64 value = SWAP_ENDIAN_LITTLE(*((int64 *) in));
in += sizeof(value);
if (value > -1) {
hf->code[i] = hf->buffer + value;
}
}
// load the table codes
memcpy(hf->buffer, in, sizeof(char) * ARRAY_COUNT(hf->buffer));
}
inline
int64 huffman_encode(Huffman* hf, const byte* in, byte* out) noexcept
{
uint64 bit_length = 0;
int32 pos_bit = 0;
while (*in) {
const char* code = hf->code[*in++];
while (*code) {
if (*code == '1') {
BIT_SET_L2R(*out, pos_bit, 1);
}
++code;
++bit_length;
// Make sure it wraps around to 0 for pos_bit > 7
pos_bit = MODULO_2(++pos_bit, 8);
if (pos_bit == 0) {
++out;
}
}
}
return bit_length;
}
inline
int64 huffman_decode(Huffman* hf, const byte* in, byte* out, uint64 bit_length) noexcept
{
HuffmanNode* current = hf->pq[1];
int32 pos_bit = 0;
byte* start = out;
while (pos_bit < bit_length) {
// Branchless version of checking if bit is set and then updating current
int32 bit = BITS_GET_8_L2R(*in, pos_bit, 1);
current = (HuffmanNode *) (((uintptr_t) current->left & ~bit) | ((uintptr_t) current->right & bit));
if (current->character) {
*out++ = current->character;
current = hf->pq[1];
}
pos_bit = MODULO_2(++pos_bit, 8);
if (pos_bit == 0) {
++in;
}
}
*out = '\0';
// -1 for the \0 character which is not part of the length
return out - start - 1;
}
#endif