implement png BUT not working since not debugged yet

This commit is contained in:
Dennis Eichhorn 2024-09-23 04:34:31 +02:00
parent 44ebefd06a
commit e88840f0fa
11 changed files with 715 additions and 305 deletions

View File

@ -277,41 +277,65 @@ void image_bmp_generate(const FileBody* src_data, Image* image)
image->width = src.dib_header.width; image->width = src.dib_header.width;
image->height = src.dib_header.height; image->height = src.dib_header.height;
image->length = image->width * image->height; image->pixel_count = image->width * image->height;
// rows are 4 bytes multiples in length // rows are 4 bytes multiples in length
uint32 width = ROUND_TO_NEAREST(src.dib_header.width, 4); uint32 width = ROUND_TO_NEAREST(src.dib_header.width, 4);
uint32 pixel_bytes = src.dib_header.bits_per_pixel / 8; uint32 pixel_bytes = src.dib_header.bits_per_pixel / 8;
if (image->order_pixels == IMAGE_PIXEL_ORDER_BGRA) { byte alpha_offset = pixel_bytes > 3;
memcpy((void *) image->pixels, src.pixels, image->length * pixel_bytes);
image->has_alpha |= (bool) alpha_offset;
if (image->order_pixels == IMAGE_PIXEL_ORDER_BGRA
&& image->order_rows == IMAGE_ROW_ORDER_BOTTOM_TO_TOP
) {
// @bug This doesn't consider the situation where we want alpha as a setting but the img doesn't have it
// @bug This also copies possible padding which will corrupt the image
memcpy((void *) image->pixels, src.pixels, image->pixel_count * pixel_bytes);
return; return;
} }
byte alpha_offset = pixel_bytes == 3 ? 0 : 1; uint32 pixel_rgb_bytes = pixel_bytes - alpha_offset;
uint32 row_pos1 = 0; uint32 row_pos1;
uint32 row_pos2 = 0; uint32 row_pos2;
uint32 width_pixel_bytes = width * pixel_bytes;
for (uint32 y = 0; y < src.dib_header.height; ++y) { for (uint32 y = 0; y < src.dib_header.height; ++y) {
row_pos1 = y * width_pixel_bytes;
if (image->order_rows == IMAGE_ROW_ORDER_TOP_TO_BOTTOM) {
row_pos2 = (src.dib_header.height - y - 1) * width_pixel_bytes;
} else {
row_pos2 = y * width_pixel_bytes;
}
for (uint32 x = 0; x < width; ++x) { for (uint32 x = 0; x < width; ++x) {
if (x >= image->width) { if (x >= image->width) {
// we don't care about the padding // Bitmaps may have padding at the end of the row
// We don't care about that
continue; continue;
} }
row_pos1 = y * width * pixel_bytes;
row_pos2 = (src.dib_header.height - y - 1) * width * pixel_bytes;
// Invert byte order // Invert byte order
for (uint32 i = 0; i < pixel_bytes - alpha_offset; ++i) { if (image->order_pixels == IMAGE_PIXEL_ORDER_RGBA) {
image->pixels[row_pos1 + x * pixel_bytes + i] = src.pixels[row_pos2 + x * pixel_bytes + pixel_bytes - alpha_offset - i]; for (uint32 i = 0; i < pixel_rgb_bytes; ++i) {
image->pixels[row_pos1 + x * pixel_bytes + i] = src.pixels[row_pos2 + x * pixel_bytes + pixel_rgb_bytes - i];
}
} else {
for (uint32 i = 0; i < pixel_rgb_bytes; ++i) {
image->pixels[row_pos1 + x * pixel_bytes + i] = src.pixels[row_pos2 + x * pixel_bytes + i];
}
} }
// Add alpha channel at end // Add alpha channel at end of every RGB value
if (alpha_offset > 0) { if (alpha_offset > 0) {
image->pixels[row_pos1 + x * pixel_bytes + 3] = src.pixels[row_pos2 + x * pixel_bytes + pixel_bytes + 3]; image->pixels[row_pos1 + x * pixel_bytes + 3] = src.pixels[row_pos2 + x * pixel_bytes + pixel_bytes + 3];
} else if (image->has_alpha) {
image->pixels[row_pos1 + x * pixel_bytes + 3] = 0xFF;
} }
} }
} }

View File

@ -29,7 +29,7 @@ void image_from_file(RingMemory* ring, const char* path, Image* image)
file_read(path, &file, ring); file_read(path, &file, ring);
if (str_ends_with(path, ".png")) { if (str_ends_with(path, ".png")) {
image_png_generate(&file, image); image_png_generate(&file, image, ring);
} else if (str_ends_with(path, ".tga")) { } else if (str_ends_with(path, ".tga")) {
image_tga_generate(&file, image); image_tga_generate(&file, image);
} else if (str_ends_with(path, ".bmp")) { } else if (str_ends_with(path, ".bmp")) {

View File

@ -17,11 +17,16 @@
#define IMAGE_ROW_ORDER_TOP_TO_BOTTOM 0 #define IMAGE_ROW_ORDER_TOP_TO_BOTTOM 0
#define IMAGE_ROW_ORDER_BOTTOM_TO_TOP 1 #define IMAGE_ROW_ORDER_BOTTOM_TO_TOP 1
// This struct also functions as a setting on how to load the image data
// has_alpha is defined it forces an alpha channel even for bitmaps
// order_pixels defines how the pixels should be ordered
// order_rows defines how the rows should be ordered
struct Image { struct Image {
uint32 width; uint32 width;
uint32 height; uint32 height;
uint32 length; uint32 pixel_count;
// Image settings
bool has_alpha; bool has_alpha;
byte order_pixels; // RGBA vs BGRA byte order_pixels; // RGBA vs BGRA
byte order_rows; // top-to-bottom vs bottom-to-top byte order_rows; // top-to-bottom vs bottom-to-top

View File

@ -7,6 +7,7 @@
* @link https://jingga.app * @link https://jingga.app
* *
* png: https://www.w3.org/TR/2003/REC-PNG-20031110/ * png: https://www.w3.org/TR/2003/REC-PNG-20031110/
* png: https://www.w3.org/TR/PNG-Chunks.html
* zlib: https://www.ietf.org/rfc/rfc1950.txt * zlib: https://www.ietf.org/rfc/rfc1950.txt
* deflate: https://www.ietf.org/rfc/rfc1951.txt * deflate: https://www.ietf.org/rfc/rfc1951.txt
*/ */
@ -15,7 +16,7 @@
#include <string.h> #include <string.h>
#include "../stdlib/Types.h" #include "../stdlib/Types.h"
#include "../utils/Utils.h" #include "../utils/BitUtils.h"
#include "../utils/EndianUtils.h" #include "../utils/EndianUtils.h"
#include "Image.h" #include "Image.h"
@ -23,31 +24,66 @@
#define PNG_HEADER_SIZE 8 #define PNG_HEADER_SIZE 8
struct PngHeader { struct PngHeader {
byte signature[8]; uint8 signature[8];
}; };
/*
The following table describes the chunk layout.
Please note that we do NOT support most of this
Critical chunks (order is defined):
Name Multiple Ordering constraints
IHDR No Must be first
PLTE No Before IDAT (optional)
IDAT Yes Multiple IDATs must be consecutive
IEND No Must be last
Ancillary chunks (order is not defined):
Name Multiple Ordering constraints
cHRM No Before PLTE and IDAT
gAMA No Before PLTE and IDAT
iCCP No Before PLTE and IDAT
sBIT No Before PLTE and IDAT
sRGB No Before PLTE and IDAT
bKGD No After PLTE, before IDAT
hIST No After PLTE, before IDAT
tRNS No After PLTE, before IDAT
pHYs No Before IDAT
sPLT Yes Before IDAT
tIME No None
iTXt Yes None
tEXt Yes None
zTXt Yes None
*/
#define PNG_CHUNK_SIZE_MIN 12
struct PngChunk { struct PngChunk {
uint32 length; uint32 length;
uint32 type; uint32 type;
// +data here, can be 0
uint32 crc; uint32 crc;
}; };
// Special chunk
#define PNG_IHDR_SIZE 25
struct PngIHDR { struct PngIHDR {
uint32 length; uint32 length;
uint32 type; uint32 type;
uint32 width; uint32 width;
uint32 height; uint32 height;
byte bit_depth; uint8 bit_depth;
byte colory_type; uint8 colory_type;
byte compression; uint8 compression;
byte filter; uint8 filter;
byte interlace; uint8 interlace;
uint32 crc; uint32 crc;
}; };
struct PngIDATHeader { struct PngIDATHeader {
byte zlib_method_flag; uint8 zlib_method_flag;
byte add_flag; uint8 add_flag;
}; };
struct Png { struct Png {
@ -55,10 +91,10 @@ struct Png {
PngIHDR ihdr; PngIHDR ihdr;
// Encoded pixel data // Encoded pixel data
byte* pixels; // WARNING: This is not the owner of the data. The owner is the FileBody uint8* pixels; // WARNING: This is not the owner of the data. The owner is the FileBody
uint32 size; uint32 size;
byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody uint8* data; // WARNING: This is not the owner of the data. The owner is the FileBody
}; };
struct PngHuffmanEntry { struct PngHuffmanEntry {
@ -72,7 +108,7 @@ struct PngHuffman {
PngHuffmanEntry entries[32768]; // 2^15 PngHuffmanEntry entries[32768]; // 2^15
}; };
static const byte PNG_SIGNATURE[] = {0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}; static const uint8 PNG_SIGNATURE[] = {0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A};
static const uint32 HUFFMAN_BIT_COUNTS[][2] = {{143, 8}, {255, 9}, {279, 7}, {287, 8}, {319, 5}}; static const uint32 HUFFMAN_BIT_COUNTS[][2] = {{143, 8}, {255, 9}, {279, 7}, {287, 8}, {319, 5}};
static const uint32 HUFFMAN_CODE_LENGTH_ALPHA[] = { static const uint32 HUFFMAN_CODE_LENGTH_ALPHA[] = {
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
@ -91,7 +127,7 @@ static const PngHuffmanEntry PNG_DIST_EXTRA[] = {
{4097, 11}, {6145, 11}, {8193, 12}, {12289, 12}, {16385, 13}, {24577, 13} {4097, 11}, {6145, 11}, {8193, 12}, {12289, 12}, {16385, 13}, {24577, 13}
}; };
void huffman_png_compute(uint32 symbol_count, uint32* symbol_code_length, PngHuffman* huff) void huffman_png_compute(uint32 symbol_count, const uint32* __restrict symbol_code_length, PngHuffman* huff)
{ {
uint32 code_length_hist[16] = {}; uint32 code_length_hist[16] = {};
for (uint32 i = 0; i < symbol_count; ++i) { for (uint32 i = 0; i < symbol_count; ++i) {
@ -118,7 +154,7 @@ void huffman_png_compute(uint32 symbol_count, uint32* symbol_code_length, PngHuf
for (uint32 j = 0; j < entries; ++j) { for (uint32 j = 0; j < entries; ++j) {
uint32 base_index = (code << bits) | j; uint32 base_index = (code << bits) | j;
uint32 index = reverse_bits(base_index, huff->max_code_length); uint32 index = bits_reverse(base_index, huff->max_code_length);
PngHuffmanEntry* entry = huff->entries + index; PngHuffmanEntry* entry = huff->entries + index;
@ -129,41 +165,140 @@ void huffman_png_compute(uint32 symbol_count, uint32* symbol_code_length, PngHuf
} }
inline inline
PngHuffmanEntry huffman_png_decode(PngHuffman* huff, const byte* data, int pos) uint16 huffman_png_decode(PngHuffman* __restrict huff, BitWalk* __restrict stream)
{ {
uint32 index = (uint32) get_bits(data, huff->max_code_length, pos); // huff->max_code_length has a length of a maximum of 15 -> span a maximum of 3 bytes
return huff->entries[index]; uint32 index = SWAP_ENDIAN_BIG(BITS_GET_32(BYTES_MERGE_4(stream->pos), stream->bit_pos, huff->max_code_length));
bits_walk(stream, huff->entries[index].bits_used);
return huff->entries[index].symbol;
} }
void png_filter_reconstruct(uint32 width, uint32 height, const byte* decompressed, byte* finalized, int steps) inline
uint8 png_filter_1_and_2(const uint8* __restrict x, const uint8* __restrict a, uint32 channel)
{ {
uint32 zero = 0; return x[channel] + a[channel];
byte* prev_row = NULL; }
byte prev_row_advance = 0;
inline
uint8 png_filter_3(const uint8* x, const uint8* a, const uint8* b, uint32 channel)
{
return x[channel] + (uint8) (((uint32) a[channel] + (uint32) b[channel]) / 2);
}
inline
uint8 png_filter_4(const uint8* x, const uint8* a_full, const uint8* b_full, const uint8* c_full, uint32 channel)
{
int32 a = (int32) a_full[channel];
int32 b = (int32) b_full[channel];
int32 c = (int32) c_full[channel];
int32 p = a + b - c;
int32 pa = p >= a ? p - a : a - p;
int32 pb = p >= b ? p - b : b - p;
int32 pc = p >= c ? p - c : c - p;
int32 paeth;
if (pa < pb && pa <= pc) {
paeth = a;
} else if (pb <= pc) {
paeth = b;
} else {
paeth = c;
}
return x[channel] + (uint8) paeth;
}
void png_filter_reconstruct(uint32 width, uint32 height, const uint8* decompressed, uint8* finalized, int steps = 8)
{
uint64 zero = 0;
uint8* prev_row = (uint8 *) &zero;
uint8 prev_row_advance = 0;
const uint8* src = decompressed;
uint8* dest = finalized;
for (uint32 y = 0; y < height; ++y) { for (uint32 y = 0; y < height; ++y) {
byte filter = *decompressed; uint8 filter = *decompressed;
byte* current_row = 0; // @todo need actual value uint8* current_row = dest;
switch (filter) { switch (filter) {
case 0: { case 0: {
memcpy(finalized + y * width, decompressed + y * width, width); memcpy(dest, src, width * sizeof(uint32));
dest += 4 * width;
src += 4 * width;
} break; } break;
case 1: { case 1: {
// no simd possible, well 4 + 4 probably not worth it uint32 a_pixel = 0;
for (uint32 x = 0; x < width; ++x) {
// png_filter_1_and_2
dest[0] = src[0] + ((uint8 *) &a_pixel)[0];
dest[1] = src[1] + ((uint8 *) &a_pixel)[1];
dest[2] = src[2] + ((uint8 *) &a_pixel)[2];
dest[3] = src[3] + ((uint8 *) &a_pixel)[3];
a_pixel = *(uint32 *) dest;
dest += 4;
src += 4;
}
} break; } break;
case 2: { case 2: {
// @performance this is simd optimizable
// requires manual simd impl. since prev_row_advance can be 0 or 4 // requires manual simd impl. since prev_row_advance can be 0 or 4
uint8* b_pixel = prev_row;
for (uint32 x = 0; x < width; ++x) {
// png_filter_1_and_2
dest[0] = src[0] + b_pixel[0];
dest[1] = src[1] + b_pixel[1];
dest[2] = src[2] + b_pixel[2];
dest[3] = src[3] + b_pixel[3];
b_pixel += prev_row_advance;
dest += 4;
src += 4;
}
} break; } break;
case 3: { case 3: {
// no simd possible, well 4 + 4 probably not worth it uint32 a_pixel = 0;
uint8* b_pixel = prev_row;
for (uint32 x = 0; x < width; ++x) {
// png_filter_3
dest[0] = src[0] + (uint8) (((uint32) ((uint8 *) &a_pixel)[0] + (uint32) b_pixel[0]) / 2);
dest[1] = src[1] + (uint8) (((uint32) ((uint8 *) &a_pixel)[1] + (uint32) b_pixel[1]) / 2);
dest[2] = src[2] + (uint8) (((uint32) ((uint8 *) &a_pixel)[2] + (uint32) b_pixel[2]) / 2);
dest[3] = src[3] + (uint8) (((uint32) ((uint8 *) &a_pixel)[3] + (uint32) b_pixel[3]) / 2);
a_pixel = *(uint32 *) dest;
b_pixel += prev_row_advance;
dest += 4;
src += 4;
}
} break; } break;
case 4: { case 4: {
// no simd possible, well 4 + 4 probably not worth it uint32 a_pixel = 0;
uint32 c_pixel = 0;
uint8* b_pixel = prev_row;
for (uint32 x = 0; x < width; ++x) {
// png_filter_4
dest[0] = png_filter_4(src, (uint8 *) &a_pixel, b_pixel, (uint8 *) &c_pixel, 0);
dest[1] = png_filter_4(src, (uint8 *) &a_pixel, b_pixel, (uint8 *) &c_pixel, 1);
dest[2] = png_filter_4(src, (uint8 *) &a_pixel, b_pixel, (uint8 *) &c_pixel, 2);
dest[3] = png_filter_4(src, (uint8 *) &a_pixel, b_pixel, (uint8 *) &c_pixel, 3);
a_pixel = *(uint32 *) dest;
c_pixel = *(uint32 *) b_pixel;
b_pixel += prev_row_advance;
dest += 4;
src += 4;
}
} break; } break;
default: { default: {
ASSERT_SIMPLE(false);
} }
} }
@ -177,22 +312,30 @@ void generate_default_png_references(const FileBody* file, Png* png)
png->size = (uint32) file->size; png->size = (uint32) file->size;
png->data = file->content; png->data = file->content;
if (png->size < 33) { if (png->size < PNG_IHDR_SIZE + PNG_HEADER_SIZE) {
// This shouldn't happen // This shouldn't happen
ASSERT_SIMPLE(false);
return; return;
} }
// The first chunk MUST be IHDR -> we handle it here // The first chunk MUST be IHDR -> we handle it here
memcpy(png, file->content, 29); ASSERT_SIMPLE_CONST(PNG_HEADER_SIZE + PNG_IHDR_SIZE == 33);
png->ihdr.crc = SWAP_ENDIAN_BIG((uint32 *) (file->content + 30)); memcpy(png, file->content, PNG_HEADER_SIZE + PNG_IHDR_SIZE);
png->ihdr.length = SWAP_ENDIAN_BIG(&png->ihdr.length); png->ihdr.length = SWAP_ENDIAN_BIG(png->ihdr.length);
png->ihdr.type = SWAP_ENDIAN_BIG(&png->ihdr.type); png->ihdr.type = SWAP_ENDIAN_BIG(png->ihdr.type);
png->ihdr.width = SWAP_ENDIAN_BIG(&png->ihdr.width); png->ihdr.width = SWAP_ENDIAN_BIG(png->ihdr.width);
png->ihdr.height = SWAP_ENDIAN_BIG(&png->ihdr.height); png->ihdr.height = SWAP_ENDIAN_BIG(png->ihdr.height);
png->ihdr.crc = SWAP_ENDIAN_BIG(png->ihdr.crc);
} }
bool image_png_generate(const FileBody* src_data, Image* image, int steps = 8) // Below you will often see code like SWAP_ENDIAN_BIG(BITS_GET_16(BYTES_MERGE_2()))
// 1. Merge two bytes together creating a "new" data structure from which we can easily read bits
// 1.1. This is required to read bits that cross multiple bytes
// 1.2. Only if you read more than 8 bits will you need to merge 4 bytes
// 2. Now we can retrieve the bits from this data structure at a position with a length
// 3. Whenever we use the result as an integer (16 or 32 bits) we need to consider the endianness
bool image_png_generate(const FileBody* src_data, Image* image, RingMemory* ring, int32 steps = 8)
{ {
// @performance We are generating the struct and then filling the data. // @performance We are generating the struct and then filling the data.
// There is some asignment/copy overhead // There is some asignment/copy overhead
@ -205,154 +348,169 @@ bool image_png_generate(const FileBody* src_data, Image* image, int steps = 8)
// 3. temp pixel buffer (larger) // 3. temp pixel buffer (larger)
// 4. final pixel buffer (already here) // 4. final pixel buffer (already here)
// @todo Consider to support (0, 2, 3, 4, and 6)
// A simple black and white image or a image without alpha should be supported
if (src.ihdr.bit_depth != 8 if (src.ihdr.bit_depth != 8
|| src.ihdr.colory_type != 6 || src.ihdr.colory_type != 6
|| src.ihdr.compression != 0 || src.ihdr.compression != 0
|| src.ihdr.filter != 0 || src.ihdr.filter != 0
|| src.ihdr.interlace != 0 || src.ihdr.interlace != 0
) { ) {
// We don't support this type of png // We don't support this type of png (see comment below)
ASSERT_SIMPLE(false);
/*
Color Allowed Interpretation
Type Bit Depths
0 1,2,4,8,16 Each pixel is a grayscale sample.
2 8,16 Each pixel is an R,G,B triple.
3 1,2,4,8 Each pixel is a palette index, a PLTE chunk must appear.
4 8,16 Each pixel is a grayscale sample, followed by an alpha sample.
6 8,16 Each pixel is an R,G,B triple, followed by an alpha sample.
*/
return false; return false;
} }
PngChunk chunk; // @performance Could we probably avoid this? There is some overhead using this.
PngIDATHeader idat_header; // We are only using it because there might be situations where there is a bit overhang to another chunk
BitWalk stream;
// Note: If we would support more png formats this offset would be wrong
stream.pos = src_data->content + PNG_IHDR_SIZE + PNG_HEADER_SIZE;
stream.bit_pos = 0;
bool is_first_idat = true;
uint32 out_pos = 0;
// @question the following is a lot of data, should this be moved to heap?
uint32 literal_length_dist_table[512]; uint32 literal_length_dist_table[512];
PngHuffman literal_length_huffman; PngHuffman* literal_length_huffman = (PngHuffman *) ring_get_memory(ring, sizeof(PngHuffman));
literal_length_huffman.max_code_length = 15; literal_length_huffman->max_code_length = 15;
literal_length_huffman.count = 1 << literal_length_huffman.max_code_length; literal_length_huffman->count = 1 << literal_length_huffman->max_code_length;
PngHuffman distance_huffman; PngHuffman* distance_huffman = (PngHuffman *) ring_get_memory(ring, sizeof(PngHuffman));
distance_huffman.max_code_length = 15; distance_huffman->max_code_length = 15;
distance_huffman.count = 1 << distance_huffman.max_code_length; distance_huffman->count = 1 << distance_huffman->max_code_length;
PngHuffman dictionary_huffman; PngHuffman* dictionary_huffman = (PngHuffman *) ring_get_memory(ring, sizeof(PngHuffman));
dictionary_huffman.max_code_length = 7; dictionary_huffman->max_code_length = 7;
dictionary_huffman.count = 1 << dictionary_huffman.max_code_length; dictionary_huffman->count = 1 << dictionary_huffman->max_code_length;
// i is the current byte to read // We need full width * height, since we don't know how much data this IDAT actually holds
int i = 33; uint8* finalized = ring_get_memory(ring, src.ihdr.width * src.ihdr.height * 4);
// r is the re-shift value in case we need to go back // Needs some extra space
// @todo r unused? uint8* decompressed = ring_get_memory(ring, src.ihdr.width * src.ihdr.height * 4 + src.ihdr.height);
int r = 0;
// b is the current bit to read uint8* dest = decompressed;
int b = 0;
while(i < src.size) { // @bug We might not be able/allowed to simply iterate this loop below since data might be split accross chunks
chunk.length = SWAP_ENDIAN_BIG((uint32 *) (src_data->content + i)); // If that is the case we have to first create a linked list of all the actual data and then we perform the actions below on this linked list
chunk.type = SWAP_ENDIAN_BIG((uint32 *) (src_data->content + i + 4)); // This ofcourse poses the challenge of handling the border between two list elements
// Copying data would be slow so we ideally would like to iterate through that list and just handle the border
// since the border only becomes relevant at the beginning of every loop we should be fine, no?
uint8 BFINAL = 0;
while(stream.pos - src_data->content < src.size && BFINAL == 0) {
PngChunk chunk;
PngIDATHeader idat_header;
// @bug the code below doesn't need bit walk on the first loop, what about the second loop?
// For our png reader, we only care about IDAT // For our png reader, we only care about IDAT
// @question consider PLTE, tRNS, gAMA, iCCP // @question consider PLTE, tRNS, gAMA, iCCP
chunk.length = SWAP_ENDIAN_BIG(*((uint32 *) stream.pos));
stream.pos += sizeof(chunk.length);
chunk.type = SWAP_ENDIAN_BIG(*((uint32 *) stream.pos));
stream.pos += sizeof(chunk.type);
if (chunk.type == 'IEND') { if (chunk.type == 'IEND') {
// we arrived at the end of the file
break; break;
} else if (chunk.type != 'IDAT') { } else if (chunk.type != 'IDAT') {
// IDAT chunks are continuous and we don't care for anything else // some other data?!
if (!is_first_idat) {
break;
}
i += chunk.length + 12;
continue; continue;
} }
if (is_first_idat) { // @question Not sure if this below is actually the case
idat_header.zlib_method_flag = *(src_data->content + i + 8); // @bug Is this even correct, we might have an overhang from the previous chunk
idat_header.add_flag = *(src_data->content + i + 9); // Then we need to:
// read n bits from the previous chunk
// move accross the chunk header data
// read another x bits from the new chunk
//
// This means we cannot jump here (or better we need to check if the bit position is != 0)
// BUT WE MIGHT NOT CARE ABOUT MULTIPLE IDAT CHUNKS?
idat_header.zlib_method_flag = *stream.pos;
++stream.pos;
byte CM = idat_header.zlib_method_flag & 0xF; idat_header.add_flag = *stream.pos;
byte FDICT = (idat_header.add_flag >> 5) & 0x1; ++stream.pos;
is_first_idat = false; uint8 CM = idat_header.zlib_method_flag & 0xF;
uint8 FDICT = (idat_header.add_flag >> 5) & 0x1;
if (CM != 8 || FDICT != 0) { if (CM != 8 || FDICT != 0) {
return false; // Not supported
} return false;
i += 10;
} }
// @bug The algorithm below works on "blocks". // This data might be stored in the prvious IDAT chunk?!
// Could it be possible that a block is spread accross 2 IDAT chunks? BFINAL = (uint8) SWAP_ENDIAN_BIG(BITS_GET_8(*stream.pos, stream.bit_pos, 1));
// If so this would be bad and break the code below bits_walk(&stream, 1);
// We could solve this by just having another counting variable and jump to the next block
// start: src_data->content + i + 8 uint32 BTYPE = SWAP_ENDIAN_BIG(BITS_GET_8(BYTES_MERGE_2(stream.pos), stream.bit_pos, 2));
// end: src_data->content + i + 8 + length - 1 bits_walk(&stream, 2);
// DEFLATE Algorithm
// @bug the following 3 lines are wrong, they don't have to start at a bit 0/1
// A block doesn't have to start at an byte boundary
byte BFINAL = (byte) get_bits(src_data->content + i, 1, b);
i += (b > 7 - 1);
b = (b + 1) & 7;
byte BTYPE = (byte) get_bits(src_data->content + i, 2, b);
i += (b > 7 - 2);
b = (b + 2) & 7;
if (BTYPE == 0) { if (BTYPE == 0) {
// starts at byte boundary -> position = +1 of previous byte // starts at uint8 boundary -> position = +1 of previous uint8
if (b == 0) { bits_flush(&stream);
i -= 1;
}
uint16 len = *((uint16 *) (src_data->content + i + 1)); uint16 len = *((uint16 *) stream.pos);
stream.pos += 2;
// @todo nlen unused? uint16 nlen = *((uint16 *) stream.pos);
uint16 nlen = *((uint16 *) (src_data->content + i + 3)); stream.pos += 2;
memcpy(image->pixels + out_pos, src_data->content + i + 5, len); ASSERT_SIMPLE(len == ~nlen);
out_pos += len;
i += 5 + len; memcpy(dest, &stream.pos, len);
b = 0; stream.pos += len;
} else if (BTYPE == 3) {
// Invalid BTYPE
ASSERT_SIMPLE(false);
} else { } else {
// @question is this even required or are we overwriting anyways? // @question is this even required or are we overwriting anyways?
memset(&literal_length_dist_table, 0, 512 * 4); memset(&literal_length_dist_table, 0, sizeof(literal_length_dist_table));
memset(&literal_length_huffman.entries, 0, sizeof(PngHuffmanEntry) * 15); memset(literal_length_huffman->entries, 0, sizeof(PngHuffmanEntry) * literal_length_huffman->max_code_length);
memset(&distance_huffman.entries, 0, sizeof(PngHuffmanEntry) * 15); memset(distance_huffman->entries, 0, sizeof(PngHuffmanEntry) * distance_huffman->max_code_length);
memset(&dictionary_huffman.entries, 0, sizeof(PngHuffmanEntry) * 7); memset(dictionary_huffman->entries, 0, sizeof(PngHuffmanEntry) * dictionary_huffman->max_code_length);
uint32 huffman_literal = 0; uint32 huffman_literal = 0;
uint32 huffman_dist = 0; uint32 huffman_dist = 0;
if (BTYPE == 2) { if (BTYPE == 2) {
// Compressed with dynamic Huffman code // Compressed with dynamic Huffman code
huffman_literal = (uint32) get_bits(src_data->content + i, 5, b); huffman_literal = SWAP_ENDIAN_BIG(BITS_GET_16(BYTES_MERGE_2(stream.pos), stream.bit_pos, 5));
i += (b > 7 - 5); bits_walk(&stream, 5);
b = (b + 5) & 7;
huffman_dist = (uint32) get_bits(src_data->content + i, 5, b); huffman_dist = SWAP_ENDIAN_BIG(BITS_GET_16(BYTES_MERGE_2(stream.pos), stream.bit_pos, 5));
i += (b > 7 - 5); bits_walk(&stream, 5);
b = (b + 5) & 7;
uint32 huffman_code_length = (uint32) get_bits(src_data->content + i, 4, b); uint32 huffman_code_length = SWAP_ENDIAN_BIG(BITS_GET_16(BYTES_MERGE_2(stream.pos), stream.bit_pos, 4));
i += (b > 7 - 4); bits_walk(&stream, 5);
b = (b + 4) & 7;
huffman_literal += 257; huffman_literal += 257;
huffman_dist += 1; huffman_dist += 1;
huffman_code_length += 4; huffman_code_length += 4;
uint32 huffman_code_length_table[19] = {}; uint32 huffman_code_length_table[ARRAY_COUNT(HUFFMAN_CODE_LENGTH_ALPHA)] = {};
for (uint32 j = 0; j < huffman_code_length; ++j) { for (uint32 j = 0; j < huffman_code_length; ++j) {
huffman_code_length_table[HUFFMAN_CODE_LENGTH_ALPHA[j]] = (uint32) get_bits(src_data->content + i, 3, b); huffman_code_length_table[HUFFMAN_CODE_LENGTH_ALPHA[j]] = SWAP_ENDIAN_BIG(BITS_GET_16(BYTES_MERGE_2(stream.pos), stream.bit_pos, 3));
i += (b > 7 - 3); bits_walk(&stream, 3);
b = (b + 3) & 7;
} }
huffman_png_compute(19, huffman_code_length_table, &dictionary_huffman); huffman_png_compute(ARRAY_COUNT(HUFFMAN_CODE_LENGTH_ALPHA), huffman_code_length_table, dictionary_huffman);
uint32 literal_length_count = 0; uint32 literal_length_count = 0;
uint32 length_count = huffman_literal + huffman_dist; uint32 length_count = huffman_literal + huffman_dist;
@ -362,31 +520,26 @@ bool image_png_generate(const FileBody* src_data, Image* image, int steps = 8)
uint32 rep_count = 1; uint32 rep_count = 1;
uint32 rep_val = 0; uint32 rep_val = 0;
PngHuffmanEntry dict = huffman_png_decode(&dictionary_huffman, src_data->content + i, b); uint32 encoded_length = huffman_png_decode(dictionary_huffman, &stream);
i += (b + dict.bits_used) / 8;
b = (b + dict.bits_used) & 7;
uint32 encoded_length = dict.bits_used;
if (encoded_length <= 15) { if (encoded_length <= 15) {
rep_val = encoded_length; rep_val = encoded_length;
} else if (encoded_length == 16) { } else if (encoded_length == 16) {
rep_count = 3 + (uint32) get_bits(src_data->content + i, 2, b); rep_count = 3 + SWAP_ENDIAN_BIG(BITS_GET_8(BYTES_MERGE_2(stream.pos), stream.bit_pos, 2));
i += (b > 7 - 2); bits_walk(&stream, 2);
b = (b + 2) & 7;
rep_val = literal_length_dist_table[literal_length_count - 1]; rep_val = literal_length_dist_table[literal_length_count - 1];
} else if (encoded_length == 17) { } else if (encoded_length == 17) {
rep_count = 3 + (uint32) get_bits(src_data->content + i, 3, b); rep_count = 3 + SWAP_ENDIAN_BIG(BITS_GET_8(BYTES_MERGE_2(stream.pos), stream.bit_pos, 3));
i += (b > 7 - 3); bits_walk(&stream, 3);
b = (b + 3) & 7;
} else if (encoded_length == 18) { } else if (encoded_length == 18) {
rep_count = 11 + (uint32) get_bits(src_data->content + i, 7, b); rep_count = 11 + SWAP_ENDIAN_BIG(BITS_GET_8(BYTES_MERGE_2(stream.pos), stream.bit_pos, 7));
i += (b > 7 - 7); bits_walk(&stream, 7);
b = (b + 7) & 7;
} }
memset(literal_length_dist_table + literal_length_count, rep_val, rep_count); while (rep_count--) {
literal_length_dist_table[literal_length_count++] = rep_val;
}
} }
} else if (BTYPE == 1) { } else if (BTYPE == 1) {
// Compressed with fixed Huffman code // Compressed with fixed Huffman code
@ -394,7 +547,7 @@ bool image_png_generate(const FileBody* src_data, Image* image, int steps = 8)
huffman_dist = 32; huffman_dist = 32;
uint32 bit_index = 0; uint32 bit_index = 0;
for(uint32 range_index = 0; range_index < 5; ++range_index) { for(uint32 range_index = 0; range_index < ARRAY_COUNT(HUFFMAN_BIT_COUNTS); ++range_index) {
uint32 bit_count = HUFFMAN_BIT_COUNTS[range_index][1]; uint32 bit_count = HUFFMAN_BIT_COUNTS[range_index][1];
uint32 last = HUFFMAN_BIT_COUNTS[range_index][0]; uint32 last = HUFFMAN_BIT_COUNTS[range_index][0];
@ -404,68 +557,65 @@ bool image_png_generate(const FileBody* src_data, Image* image, int steps = 8)
} }
} }
huffman_png_compute(huffman_literal, literal_length_dist_table, &literal_length_huffman); huffman_png_compute(huffman_literal, literal_length_dist_table, literal_length_huffman);
huffman_png_compute(huffman_dist, literal_length_dist_table + huffman_literal, &distance_huffman); huffman_png_compute(huffman_dist, literal_length_dist_table + huffman_literal, distance_huffman);
while (true) { while (true) {
PngHuffmanEntry literal = huffman_png_decode(&literal_length_huffman, src_data->content + i, b); uint32 literal_length = huffman_png_decode(literal_length_huffman, &stream);
i += (b + literal.bits_used) / 8;
b = (b + literal.bits_used) & 7;
uint32 literal_length = literal.bits_used;
if (literal_length == 256) { if (literal_length == 256) {
break; break;
} }
if (literal_length <= 255) { if (literal_length <= 255) {
*(image->pixels + out_pos) = (byte) (literal_length & 0xFF); *dest++ = (literal_length & 0xFF);
++out_pos;
} else { } else {
uint32 length_tab_index = literal_length - 257; uint32 length_tab_index = literal_length - 257;
PngHuffmanEntry length_tab = PNG_LENGTH_EXTRA[length_tab_index]; PngHuffmanEntry length_tab = PNG_LENGTH_EXTRA[length_tab_index];
uint32 length = length_tab.symbol; uint32 length = length_tab.symbol;
if (length_tab.bits_used) { if (length_tab.bits_used) {
uint32 extra_bits = (uint32) get_bits(src_data->content + i, length_tab.bits_used, b); // @performance If we knew that bits_used is always <= 15 we could use more efficient MERGE/GET
i += (b + length_tab.bits_used) / 8; uint32 extra_bits = SWAP_ENDIAN_BIG(BITS_GET_32(BYTES_MERGE_4(stream.pos), stream.bit_pos, length_tab.bits_used));
b = (b + length_tab.bits_used) & 7; bits_walk(&stream, length_tab.bits_used);
length += extra_bits; length += extra_bits;
} }
PngHuffmanEntry tab = huffman_png_decode(&distance_huffman, src_data->content + i, b); uint32 dist_tab_index = huffman_png_decode(distance_huffman, &stream);
i += (b + tab.bits_used) / 8;
b = (b + tab.bits_used) & 7;
uint32 dist_tab_index = tab.bits_used;
PngHuffmanEntry dist_tab = PNG_DIST_EXTRA[dist_tab_index]; PngHuffmanEntry dist_tab = PNG_DIST_EXTRA[dist_tab_index];
uint32 dist = dist_tab.symbol; uint32 dist = dist_tab.symbol;
if (dist_tab.bits_used) { if (dist_tab.bits_used) {
uint32 extra_bits = (uint32) get_bits(src_data->content + i, dist_tab.bits_used, b); // @performance If we knew that bits_used is always <= 15 we could use more efficient MERGE/GET
i += (b + dist_tab.bits_used) / 8; uint32 extra_bits = SWAP_ENDIAN_BIG(BITS_GET_32(BYTES_MERGE_4(stream.pos), stream.bit_pos, dist_tab.bits_used));
b = (b + dist_tab.bits_used) & 7; bits_walk(&stream, dist_tab.bits_used);
dist += extra_bits; dist += extra_bits;
} }
memcpy(image->pixels + out_pos, image->pixels + out_pos - dist, length); // @performance Maybe we could use memcopy depending on length and dist
uint8* source = dest - dist;
while (length--) {
*dest++ = *source++;
}
} }
} }
} }
if (BFINAL == 0) { // Skip the CRC
break; stream.pos += sizeof(chunk.crc);
} stream.bit_pos = 0;
} }
image->width = src.ihdr.width; image->width = src.ihdr.width;
image->height = src.ihdr.height; image->height = src.ihdr.height;
image->pixel_count = image->width * image->height;
image->has_alpha = true;
image->order_pixels = IMAGE_PIXEL_ORDER_RGBA;
image->order_rows = IMAGE_ROW_ORDER_TOP_TO_BOTTOM;
// @todo fix pixels parameter png_filter_reconstruct(src.ihdr.width, src.ihdr.height, decompressed, finalized, steps);
png_filter_reconstruct(image->width, image->height, (byte *) image->pixels, (byte *) image->pixels, steps);
return true; return true;
} }

View File

@ -85,36 +85,57 @@ void image_tga_generate(const FileBody* src_data, Image* image)
image->width = src.header.width; image->width = src.header.width;
image->height = src.header.height; image->height = src.header.height;
image->length = image->width * image->height; image->pixel_count = image->width * image->height;
// @todo also handle bottom-top/top-bottom order here
uint32 pixel_bytes = src.header.bits_per_pixel / 8; uint32 pixel_bytes = src.header.bits_per_pixel / 8;
if (image->order_pixels == IMAGE_PIXEL_ORDER_BGRA) { byte alpha_offset = pixel_bytes > 3;
memcpy((void *) image->pixels, src.pixels, image->length * pixel_bytes);
image->has_alpha |= (bool) alpha_offset;
// We can check same settings through equality since we use the same values
if (image->order_rows == src.header.vertical_ordering
&& image->order_pixels == src.header.horizonal_ordering
) {
// @bug This doesn't consider the situation where we want alpha as a setting but the img doesn't have it
memcpy((void *) image->pixels, src.pixels, image->pixel_count * pixel_bytes);
return; return;
} }
byte alpha_offset = pixel_bytes == 3 ? 0 : 1;
uint32 pixel_rgb_bytes = pixel_bytes - alpha_offset; uint32 pixel_rgb_bytes = pixel_bytes - alpha_offset;
uint32 row_pos1; uint32 row_pos1;
uint32 row_pos2; uint32 row_pos2;
for (uint32 y = 0; y < src.header.height; ++y) { uint32 width_pixel_bytes = src.header.width * pixel_bytes;
for (uint32 x = 0; x < src.header.width; ++x) {
row_pos1 = y * image->width * pixel_bytes;
row_pos2 = src.header.vertical_ordering == 0
? y * image->width * pixel_bytes
: (image->height - y - 1) * image->width * pixel_bytes;
for (uint32 i = 0; i < pixel_rgb_bytes; ++i) { for (uint32 y = 0; y < src.header.height; ++y) {
image->pixels[row_pos1 + x * pixel_bytes + i] = src.pixels[row_pos2 + x * pixel_bytes + pixel_rgb_bytes - i]; row_pos1 = y * image->width * pixel_bytes;
if ((image->order_rows == IMAGE_ROW_ORDER_TOP_TO_BOTTOM && src.header.vertical_ordering == 1)
|| (image->order_rows == IMAGE_ROW_ORDER_BOTTOM_TO_TOP && src.header.vertical_ordering == 0)
) {
row_pos2 = (src.header.height - y - 1) * image->width * pixel_bytes;
} else {
row_pos2 = y * width_pixel_bytes;
}
for (uint32 x = 0; x < src.header.width; ++x) {
if (image->order_pixels == src.header.horizonal_ordering) {
for (uint32 i = 0; i < pixel_rgb_bytes; ++i) {
image->pixels[row_pos1 + x * pixel_bytes + i] = src.pixels[row_pos2 + x * pixel_bytes + i];
}
} else {
for (uint32 i = 0; i < pixel_rgb_bytes; ++i) {
image->pixels[row_pos1 + x * pixel_bytes + i] = src.pixels[row_pos2 + x * pixel_bytes + pixel_rgb_bytes - i];
}
} }
// Add alpha channel at end // Add alpha channel at end of every RGB value
if (alpha_offset > 0) { if (alpha_offset > 0) {
image->pixels[row_pos1 + x * pixel_bytes + 3] = src.pixels[row_pos2 + x * pixel_bytes + pixel_bytes + 3]; image->pixels[row_pos1 + x * pixel_bytes + 3] = src.pixels[row_pos2 + x * pixel_bytes + pixel_bytes + 3];
} else if (image->has_alpha) {
image->pixels[row_pos1 + x * pixel_bytes + 3] = 0xFF;
} }
} }
} }

View File

@ -43,7 +43,6 @@ struct Texture {
// If yes remember to update prepare_texture() // If yes remember to update prepare_texture()
byte texture_data_type; byte texture_data_type;
byte texture_wrap_type_s; byte texture_wrap_type_s;
byte texture_wrap_type_t; byte texture_wrap_type_t;
byte texture_wrap_type_r; byte texture_wrap_type_r;
@ -51,8 +50,6 @@ struct Texture {
byte texture_minification; byte texture_minification;
Image image; Image image;
int32 texture_ref;
}; };
#endif #endif

View File

@ -108,6 +108,16 @@ void audio_play(AudioSetting* setting, DirectSoundSetting* api_setting)
setting->is_playing = true; setting->is_playing = true;
} }
inline
void audio_stop(AudioSetting* setting, DirectSoundSetting* api_setting) {
if (!api_setting->secondary_buffer) {
return;
}
api_setting->secondary_buffer->Stop();
setting->is_playing = false;
}
inline inline
void audio_free(AudioSetting*, DirectSoundSetting* api_setting) void audio_free(AudioSetting*, DirectSoundSetting* api_setting)
{ {

View File

@ -124,21 +124,19 @@ void audio_play(AudioSetting* setting, XAudio2Setting* api_setting) {
} }
} }
inline
void audio_stop(AudioSetting* setting, XAudio2Setting* api_setting) {
if (!api_setting->source_voice) {
return;
}
api_setting->source_voice->Stop(0, XAUDIO2_COMMIT_NOW);
setting->is_playing = false;
}
inline inline
void audio_free(AudioSetting* setting, XAudio2Setting* api_setting) void audio_free(AudioSetting* setting, XAudio2Setting* api_setting)
{ {
if (api_setting->internal_buffer[0].pAudioData) {
free((void *) api_setting->internal_buffer[0].pAudioData);
}
if (api_setting->internal_buffer[1].pAudioData) {
free((void *) api_setting->internal_buffer[1].pAudioData);
}
if (setting->buffer) {
free((void *) setting->buffer);
}
if (api_setting->source_voice) { if (api_setting->source_voice) {
api_setting->source_voice->DestroyVoice(); api_setting->source_voice->DestroyVoice();
} }
@ -150,6 +148,18 @@ void audio_free(AudioSetting* setting, XAudio2Setting* api_setting)
if (api_setting->audio_handle) { if (api_setting->audio_handle) {
api_setting->audio_handle->Release(); api_setting->audio_handle->Release();
} }
if (api_setting->internal_buffer[0].pAudioData) {
free((void *) api_setting->internal_buffer[0].pAudioData);
}
if (api_setting->internal_buffer[1].pAudioData) {
free((void *) api_setting->internal_buffer[1].pAudioData);
}
if (setting->buffer) {
free((void *) setting->buffer);
}
} }
/** /**

View File

@ -17,6 +17,244 @@
#define BIT_UNSET(num, pos) ((num) & ~((uint32) 1 << (pos))) #define BIT_UNSET(num, pos) ((num) & ~((uint32) 1 << (pos)))
#define BIT_FLIP(num, pos) ((num) ^ ((uint32) 1 << (pos))) #define BIT_FLIP(num, pos) ((num) ^ ((uint32) 1 << (pos)))
#define BIT_SET_TO(num, pos, x) ((num) & ~((uint32) 1 << (pos)) | ((uint32) (x) << (pos))) #define BIT_SET_TO(num, pos, x) ((num) & ~((uint32) 1 << (pos)) | ((uint32) (x) << (pos)))
#define BITS_GET_8(num, pos, to_read) (((num) >> (8 - (pos) - (to_read))) & ((1U << (to_read)) - 1))
#define BITS_GET_16(num, pos, to_read) (((num) >> (16 - (pos) - (to_read))) & ((1U << (to_read)) - 1))
#define BITS_GET_32(num, pos, to_read) (((num) >> (32 - (pos) - (to_read))) & ((1U << (to_read)) - 1))
#define BITS_GET_64(num, pos, to_read) (((num) >> (64 - (pos) - (to_read))) & ((1ULL << (to_read)) - 1))
#define BYTES_MERGE_2(num) (((num)[0] << 8) | (num)[1])
#define BYTES_MERGE_4(num) (((num)[0] << 24) | ((num)[1] << 16) | ((num)[2] << 8) | (num)[3])
#define BYTES_MERGE_8(num) (((uint64_t)(num)[0] << 56) | ((uint64_t)(num)[1] << 48) | ((uint64_t)(num)[2] << 40) | ((uint64_t)(num)[3] << 32) | ((uint64_t)(num)[4] << 24) | ((uint64_t)(num)[5] << 16) | ((uint64_t)(num)[6] << 8) | ((uint64_t)(num)[7]))
struct BitWalk {
byte* pos;
uint32 bit_pos;
};
inline
void bits_walk(BitWalk* stream, uint32 bits_to_walk)
{
stream->bit_pos += bits_to_walk;
stream->pos += stream->bit_pos / 8;
stream->bit_pos %= 8;
}
inline
void bits_flush(BitWalk* stream)
{
if (stream->bit_pos > 0) {
stream->bit_pos = 0;
++stream->pos;
}
}
// inline
// uint8 bits_consume_8(BitWalk* stream, uint32 bits_to_consume)
// {
// uint8 result;
// uint32 remaining = 8 - stream->bit_pos;
// uint32 range_1 = bits_to_consume >= remaining
// ? remaining
// : bits_to_consume;
// result = (*stream->pos >> (remaining - range_1)) & ((1 << range_1) - 1);
// stream->bit_pos += range_1;
// if (bits_to_consume < remaining) {
// return result;
// }
// ++stream->pos;
// stream->bit_pos = 0;
// bits_to_consume -= range_1;
// /*
// uint32 full_bytes = bits_to_consume / 8;
// if (full_bytes > 0) {
// for (int i = 0; i < full_bytes; ++i) {
// result = (result << 8) | *stream->pos;
// ++stream->pos;
// }
// }
// */
// if (bits_to_consume == 0) {
// return result;
// }
// stream->bit_pos += bits_to_consume;
// return (result << bits_to_consume) | ((*stream->pos >> (8 - bits_to_consume)) & ((1 << bits_to_consume) - 1));
// }
// inline
// uint16 bits_consume_16(BitWalk* stream, uint32 bits_to_consume)
// {
// uint16 result;
// uint32 remaining = 8 - stream->bit_pos;
// uint32 range_1 = bits_to_consume >= remaining
// ? remaining
// : bits_to_consume;
// result = (*stream->pos >> (remaining - range_1)) & ((1 << range_1) - 1);
// stream->bit_pos += range_1;
// if (bits_to_consume < remaining) {
// return result;
// }
// ++stream->pos;
// stream->bit_pos = 0;
// bits_to_consume -= range_1;
// uint32 full_bytes = bits_to_consume / 8;
// if (full_bytes > 0) {
// for (int i = 0; i < full_bytes; ++i) {
// result = (result << 8) | *stream->pos;
// ++stream->pos;
// }
// }
// uint32 range_2 = bits_to_consume - full_bytes * 8;
// if (range_2 == 0) {
// return result;
// }
// stream->bit_pos += range_2;
// return (result << range_2) | ((*stream->pos >> (8 - range_2)) & ((1 << range_2) - 1));
// }
// inline
// uint32 bits_consume_32(BitWalk* stream, uint32 bits_to_consume)
// {
// uint32 result;
// uint32 remaining = 8 - stream->bit_pos;
// uint32 range_1 = bits_to_consume >= remaining
// ? remaining
// : bits_to_consume;
// result = (*stream->pos >> (remaining - range_1)) & ((1 << range_1) - 1);
// stream->bit_pos += range_1;
// if (bits_to_consume < remaining) {
// return result;
// }
// ++stream->pos;
// stream->bit_pos = 0;
// bits_to_consume -= range_1;
// uint32 full_bytes = bits_to_consume / 8;
// if (full_bytes > 0) {
// for (int i = 0; i < full_bytes; ++i) {
// result = (result << 8) | *stream->pos;
// ++stream->pos;
// }
// }
// uint32 range_2 = bits_to_consume - full_bytes * 8;
// if (range_2 == 0) {
// return result;
// }
// stream->bit_pos += range_2;
// return (result << range_2) | ((*stream->pos >> (8 - range_2)) & ((1 << range_2) - 1));
// }
// inline
// uint64 bits_consume_64(BitWalk* stream, uint32 bits_to_consume)
// {
// uint64 result;
// uint32 remaining = 8 - stream->bit_pos;
// uint32 range_1 = bits_to_consume >= remaining
// ? remaining
// : bits_to_consume;
// result = (*stream->pos >> (remaining - range_1)) & ((1 << range_1) - 1);
// stream->bit_pos += range_1;
// if (bits_to_consume < remaining) {
// return result;
// }
// ++stream->pos;
// stream->bit_pos = 0;
// bits_to_consume -= range_1;
// uint32 full_bytes = bits_to_consume / 8;
// if (full_bytes > 0) {
// for (int i = 0; i < full_bytes; ++i) {
// result = (result << 8) | *stream->pos;
// ++stream->pos;
// }
// }
// uint32 range_2 = bits_to_consume - full_bytes * 8;
// if (range_2 == 0) {
// return result;
// }
// stream->bit_pos += range_2;
// return (result << range_2) | ((*stream->pos >> (8 - range_2)) & ((1 << range_2) - 1));
// }
// uint8 bits_peek_8(BitWalk* stream, uint32 bits_to_consume) {
// byte* pos = stream->pos;
// byte bit_pos = stream->bit_pos;
// uint8 bits = bits_consume_8(stream, bits_to_consume);
// stream->pos = pos;
// stream->bit_pos = bit_pos;
// return bits;
// }
// uint16 bits_peek_16(BitWalk* stream, uint32 bits_to_consume) {
// byte* pos = stream->pos;
// byte bit_pos = stream->bit_pos;
// uint16 bits = bits_consume_16(stream, bits_to_consume);
// stream->pos = pos;
// stream->bit_pos = bit_pos;
// return bits;
// }
// uint32 bits_peek_32(BitWalk* stream, uint32 bits_to_consume) {
// byte* pos = stream->pos;
// byte bit_pos = stream->bit_pos;
// uint32 bits = bits_consume_32(stream, bits_to_consume);
// stream->pos = pos;
// stream->bit_pos = bit_pos;
// return bits;
// }
// uint64 bits_peek_64(BitWalk* stream, uint32 bits_to_consume) {
// byte* pos = stream->pos;
// byte bit_pos = stream->bit_pos;
// uint64 bits = bits_consume_64(stream, bits_to_consume);
// stream->pos = pos;
// stream->bit_pos = bit_pos;
// return bits;
// }
inline inline
uint32 bytes_merge(byte b0, byte b1, byte b2, byte b3) { uint32 bytes_merge(byte b0, byte b1, byte b2, byte b3) {
@ -77,55 +315,8 @@ inline int find_first_set_bit(int value) {
#endif #endif
} }
inline inline
byte get_bits(byte data, int bits_to_read, int start_pos) uint32 bits_reverse(uint32 data, uint32 count)
{
byte mask = (1 << bits_to_read) - 1;
return (data >> (8 - start_pos - bits_to_read)) & mask;
}
inline
uint64 get_bits(const byte* data, int bits_to_read, int start_pos)
{
if (bits_to_read <= 0 || bits_to_read > sizeof(uint64)) {
return 0;
}
int byte_index = start_pos / 8;
int bit_offset = start_pos % 8;
uint64_t mask = (1ULL << bits_to_read) - 1;
uint64_t result = 0;
int bits_read = 0;
while (bits_read < bits_to_read) {
int bits_in_current_byte = 8 - bit_offset;
int bits_to_take = bits_to_read - bits_read;
if (bits_to_take > bits_in_current_byte) {
bits_to_take = bits_in_current_byte;
}
uint8_t current_byte = data[byte_index];
current_byte >>= bit_offset;
current_byte &= (1 << bits_to_take) - 1;
result |= ((uint64_t)current_byte << bits_read);
bits_read += bits_to_take;
bit_offset = 0;
byte_index++;
}
result &= mask;
return result;
}
inline
uint32 reverse_bits(uint32 data, uint32 count)
{ {
uint32 reversed = 0; uint32 reversed = 0;
for (uint32 i = 0; i <= (count / 2); ++i) { for (uint32 i = 0; i <= (count / 2); ++i) {

View File

@ -11,6 +11,10 @@
#include "../stdlib/Types.h" #include "../stdlib/Types.h"
#define SWAP_ENDIAN_16(val) ((((val) << 8) | ((val) >> 8)))
#define SWAP_ENDIAN_32(val) (((val) << 24) | (((val) & 0xFF00) << 8) | (((val) >> 8) & 0xFF00) | ((val) >> 24))
#define SWAP_ENDIAN_64(val) (((val) << 56) | (((val) & 0x000000000000FF00ULL) << 40) | (((val) & 0x0000000000FF0000ULL) << 24) | (((val) & 0x00000000FF000000ULL) << 8) | (((val) & 0x000000FF00000000ULL) >> 8) | (((val) & 0x0000FF0000000000ULL) >> 24) | (((val) & 0x00FF000000000000ULL) >> 40) | ((val) >> 56))
// Automatically perform endian swap if necessary // Automatically perform endian swap if necessary
// If we are on little endian (e.g. Win32) we swap big endian data but not little endian // If we are on little endian (e.g. Win32) we swap big endian data but not little endian
#if _WIN32 || __LITTLE_ENDIAN #if _WIN32 || __LITTLE_ENDIAN
@ -29,79 +33,71 @@ bool is_little_endian()
} }
inline inline
uint16 endian_swap(const uint16* val) uint16 endian_swap(uint16 val)
{ {
uint16 v = *val; return ((val << 8) | (val >> 8));
return ((v << 8) | (v >> 8));
} }
inline inline
int16 endian_swap(const int16* val) int16 endian_swap(int16 val)
{ {
uint16 v = (uint16) (*val); return (int16) ((val << 8) | (val >> 8));
return (int16) ((v << 8) | (v >> 8));
} }
inline inline
uint32 endian_swap(const uint32* val) uint32 endian_swap(uint32 val)
{ {
uint32 v = *val; return ((val << 24)
return ((v << 24) | ((val & 0xFF00) << 8)
| ((v & 0xFF00) << 8) | ((val >> 8) & 0xFF00)
| ((v >> 8) & 0xFF00) | (val >> 24));
| (v >> 24));
} }
inline inline
int32 endian_swap(const int32* val) int32 endian_swap(int32 val)
{ {
uint32 v = (uint32) (*val); return (int32) ((val << 24)
return (int32) ((v << 24) | ((val & 0xFF00) << 8)
| ((v & 0xFF00) << 8) | ((val >> 8) & 0xFF00)
| ((v >> 8) & 0xFF00) | (val >> 24));
| (v >> 24));
} }
inline inline
uint64 endian_swap(const uint64* val) uint64 endian_swap(uint64 val)
{ {
uint64 v = *val; return ((val << 56)
return ((v << 56) | ((val & 0x000000000000FF00ULL) << 40)
| ((v & 0x000000000000FF00ULL) << 40) | ((val & 0x0000000000FF0000ULL) << 24)
| ((v & 0x0000000000FF0000ULL) << 24) | ((val & 0x00000000FF000000ULL) << 8)
| ((v & 0x00000000FF000000ULL) << 8) | ((val & 0x000000FF00000000ULL) >> 8)
| ((v & 0x000000FF00000000ULL) >> 8) | ((val & 0x0000FF0000000000ULL) >> 24)
| ((v & 0x0000FF0000000000ULL) >> 24) | ((val & 0x00FF000000000000ULL) >> 40)
| ((v & 0x00FF000000000000ULL) >> 40) | (val >> 56));
| (v >> 56));
} }
inline inline
int64 endian_swap(const int64* val) int64 endian_swap(int64 val)
{ {
uint64 v = (uint64) (*val); return (int64) ((val << 56)
return (int64) ((v << 56) | ((val & 0x000000000000FF00ULL) << 40)
| ((v & 0x000000000000FF00ULL) << 40) | ((val & 0x0000000000FF0000ULL) << 24)
| ((v & 0x0000000000FF0000ULL) << 24) | ((val & 0x00000000FF000000ULL) << 8)
| ((v & 0x00000000FF000000ULL) << 8) | ((val & 0x000000FF00000000ULL) >> 8)
| ((v & 0x000000FF00000000ULL) >> 8) | ((val & 0x0000FF0000000000ULL) >> 24)
| ((v & 0x0000FF0000000000ULL) >> 24) | ((val & 0x00FF000000000000ULL) >> 40)
| ((v & 0x00FF000000000000ULL) >> 40) | (val >> 56));
| (v >> 56));
} }
inline inline
float endian_swap(const float* val) float endian_swap(float val)
{ {
uint32* ival = (uint32 *) val; return (float) endian_swap(val);
return (float) endian_swap(ival);
} }
inline inline
double endian_swap(const double* val) double endian_swap(double val)
{ {
uint64* ival = (uint64 *) val; return (double) endian_swap(val);
return (double) endian_swap(ival);
} }
#endif #endif

View File

@ -114,10 +114,16 @@ void update_timing_stat(TimingStat *stat)
#if DEBUG #if DEBUG
#define ASSERT_SIMPLE(a) \ #define ASSERT_SIMPLE(a) \
if (!(a)) { \ if (!(a)) { \
*(volatile int *)0 = 0; \ *(volatile int *) 0 = 0; \
}
#define ASSERT_SIMPLE_CONST(a) \
if constexpr (!(a)) { \
*(volatile int *) 0 = 0; \
} }
#else #else
#define ASSERT_SIMPLE(a) ((void) 0) #define ASSERT_SIMPLE(a) ((void) 0)
#define ASSERT_SIMPLE_CONST(a) ((void) 0)
#endif #endif
#define ASSERT_TRUE(a) \ #define ASSERT_TRUE(a) \