Moving data reader to arrays instead of slices

This commit is contained in:
Caleb Gardner
2025-07-25 06:42:22 -05:00
parent 8c44c77456
commit 61d194e80a
4 changed files with 36 additions and 30 deletions
+1 -1
View File
@@ -4,4 +4,4 @@ Messing around with zig via making a squashfs library. May amount to something.
## Current state ## Current state
Performance is pretty terrible, but overall the library should fully work for decompression. Lzo & Lz4 decompression are not supported as they are not a part of zig's stdlib (support may be added later with external libraries). Performance is reatively bad (when compared to the official [squashfs-tools](https://github.com/plougher/squashfs-tools), but the basics should fully work.
+32 -27
View File
@@ -11,6 +11,11 @@ const DataReaderError = error{
InvalidIndex, InvalidIndex,
}; };
const DataBlock = struct {
data: [1024 * 1024]u8, // Blocks can be up to 1MB in size.
len: usize,
};
pub fn DataReader(comptime T: type) type { pub fn DataReader(comptime T: type) type {
return struct { return struct {
const Self = @This(); const Self = @This();
@@ -26,9 +31,9 @@ pub fn DataReader(comptime T: type) type {
block_size: u32, block_size: u32,
sizes: []BlockSize, sizes: []BlockSize,
frag: []u8 = &[0]u8{}, frag: DataBlock = DataBlock{ .data = &[0]u8, .len = 0 },
read_block: []u8 = &[0]u8{}, read_block: DataBlock = DataBlock{ .data = &[0]u8, .len = 0 },
read_offset: u64 = 0, read_offset: u64 = 0,
read_idx: u32 = 0, read_idx: u32 = 0,
@@ -64,52 +69,53 @@ pub fn DataReader(comptime T: type) type {
} }
pub fn addFragment(self: *Self, entry: FragEntry, offset: u32) !void { pub fn addFragment(self: *Self, entry: FragEntry, offset: u32) !void {
self.frag = try self.alloc.alloc(u8, self.file_size % self.block_size); self.frag.len = self.file_size % self.block_size;
errdefer self.frag.len = 0;
if (entry.size.size == 0) { if (entry.size.size == 0) {
@memset(self.frag, 0); @memset(self.frag.data, 0);
return; return;
} else if (entry.size.uncompressed) { } else if (entry.size.uncompressed) {
_ = try self.rdr.pread(self.frag, entry.block + offset); _ = try self.rdr.pread(self.frag.data, entry.block + offset);
return; return;
} }
const block = try self.alloc.alloc(u8, offset + self.frag.len); const block: [1024 * 1024]u8 = undefined;
defer self.alloc.free(block);
_ = try self.comp.decompress( _ = try self.comp.decompress(
1024 * 1024,
self.alloc, self.alloc,
self.rdr.readerAt(entry.block).reader(), self.rdr.readerAt(entry.block).reader(),
block, block,
); );
@memcpy(self.frag, block[offset..]); @memcpy(self.frag.data, block[offset..]);
} }
pub fn setPool(self: *Self, pool: *std.Thread.Pool) void { pub fn setPool(self: *Self, pool: *std.Thread.Pool) void {
self.pool = pool; self.pool = pool;
} }
fn blockAt(self: Self, idx: usize) ![]u8 { fn blockAt(self: Self, idx: usize) !DataBlock {
if (self.frag.len > 0 and idx == self.sizes.len) return self.frag; if (self.frag.len > 0 and idx == self.sizes.len) return self.frag;
if (idx >= self.sizes.len) return DataReaderError.InvalidIndex; if (idx >= self.sizes.len) return DataReaderError.InvalidIndex;
const size = blk: { const out: DataBlock = undefined;
out.len = blk: {
if (idx == self.sizes.len - 1 and self.frag.len == 0) { if (idx == self.sizes.len - 1 and self.frag.len == 0) {
break :blk self.file_size % self.block_size; break :blk self.file_size % self.block_size;
} }
break :blk self.block_size; break :blk self.block_size;
}; };
const block = try self.alloc.alloc(u8, size);
errdefer self.alloc.free(block);
if (self.sizes[idx].size == 0) { if (self.sizes[idx].size == 0) {
@memset(block, 0); @memset(out.data[0..out.len], 0);
return block; return out;
} else if (self.sizes[idx].uncompressed) { } else if (self.sizes[idx].uncompressed) {
_ = try self.rdr.pread(block, self.offsets[idx]); _ = try self.rdr.pread(out.data[0..out.len], self.offsets[idx]);
return block; return out;
} }
_ = try self.comp.decompress( _ = try self.comp.decompress(
1024 * 1024,
self.alloc, self.alloc,
self.rdr.readerAt(self.offsets[idx]).reader(), self.rdr.readerAt(self.offsets[idx]).reader(),
block, out.data[0..out.len],
); );
return block; return out;
} }
fn numBlocks(self: Self) usize { fn numBlocks(self: Self) usize {
@@ -135,7 +141,7 @@ pub fn DataReader(comptime T: type) type {
self.read_idx += 1; self.read_idx += 1;
} }
to_read = @min(buf.len - cur_red, self.block_size - self.read_offset); to_read = @min(buf.len - cur_red, self.block_size - self.read_offset);
@memcpy(buf[cur_red .. cur_red + to_read], self.read_block[self.read_offset .. self.read_offset + to_read]); @memcpy(buf[cur_red .. cur_red + to_read], self.read_block.data[self.read_offset .. self.read_offset + to_read]);
cur_red += to_read; cur_red += to_read;
self.read_offset += to_read; self.read_offset += to_read;
} }
@@ -152,7 +158,7 @@ pub fn DataReader(comptime T: type) type {
var mut: std.Thread.Mutex = .{}; var mut: std.Thread.Mutex = .{};
var cur_idx: usize = 0; var cur_idx: usize = 0;
var wg: std.Thread.WaitGroup = .{}; var wg: std.Thread.WaitGroup = .{};
var completed: std.AutoHashMap(usize, []u8) = .init(self.alloc); var completed: std.AutoHashMap(usize, DataBlock) = .init(self.alloc);
defer completed.deinit(); defer completed.deinit();
var errs: std.ArrayList(anyerror) = .init(self.alloc); var errs: std.ArrayList(anyerror) = .init(self.alloc);
defer errs.deinit(); defer errs.deinit();
@@ -178,8 +184,7 @@ pub fn DataReader(comptime T: type) type {
return self.file_size; return self.file_size;
} }
/// Similiar to writeTo, but does not block until finished. /// Similiar to writeTo, but does not block until finished.
/// When all blocks have been written, on_finish and wg.finish() (in that order) will be called. /// Calls on_finish when all blocks have been written.
/// NOTE: wg.start() is not called;
pub fn writeToNoBlock( pub fn writeToNoBlock(
self: Self, self: Self,
errs: *std.ArrayList(anyerror), errs: *std.ArrayList(anyerror),
@@ -198,9 +203,9 @@ pub fn DataReader(comptime T: type) type {
block_wg.* = .{}; block_wg.* = .{};
const finish_mut = try self.alloc.create(std.Thread.Mutex); const finish_mut = try self.alloc.create(std.Thread.Mutex);
finish_mut.* = .{}; finish_mut.* = .{};
var completed: ?std.AutoHashMap(usize, []u8) = null; var completed: ?std.AutoHashMap(usize, DataBlock) = null;
if (!comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { if (!comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) {
completed = std.AutoHashMap(usize, []u8).init(self.alloc); completed = std.AutoHashMap(usize, DataBlock).init(self.alloc);
} }
block_wg.startMany(self.numBlocks()); block_wg.startMany(self.numBlocks());
for (0..self.numBlocks()) |i| { for (0..self.numBlocks()) |i| {
@@ -229,7 +234,7 @@ pub fn DataReader(comptime T: type) type {
mut: *std.Thread.Mutex, mut: *std.Thread.Mutex,
cur_idx: *usize, cur_idx: *usize,
errs: *std.ArrayList(anyerror), errs: *std.ArrayList(anyerror),
completed: *std.AutoHashMap(usize, []u8), completed: *std.AutoHashMap(usize, DataBlock),
idx: usize, idx: usize,
writer: anytype, writer: anytype,
) void { ) void {
@@ -306,7 +311,7 @@ pub fn DataReader(comptime T: type) type {
mut: *std.Thread.Mutex, mut: *std.Thread.Mutex,
cur_idx: *usize, cur_idx: *usize,
errs: *std.ArrayList(anyerror), errs: *std.ArrayList(anyerror),
completed: *std.AutoHashMap(usize, []u8), completed: *std.AutoHashMap(usize, DataBlock),
idx: usize, idx: usize,
writer: anytype, writer: anytype,
) void { ) void {
@@ -330,7 +335,7 @@ pub fn DataReader(comptime T: type) type {
mut: *std.Thread.Mutex, mut: *std.Thread.Mutex,
cur_idx: *usize, cur_idx: *usize,
errs: *std.ArrayList(anyerror), errs: *std.ArrayList(anyerror),
completed: *std.AutoHashMap(usize, []u8), completed: *std.AutoHashMap(usize, DataBlock),
idx: usize, idx: usize,
writer: anytype, writer: anytype,
finish_mut: *std.Thread.Mutex, finish_mut: *std.Thread.Mutex,
+1
View File
@@ -39,6 +39,7 @@ pub fn MetadataReader(comptime T: type) type {
self.block_size = try self.rdr.pread(self.block[0..hdr.size], self.offset); self.block_size = try self.rdr.pread(self.block[0..hdr.size], self.offset);
} else { } else {
self.block_size = try self.comp.decompress( self.block_size = try self.comp.decompress(
8192,
self.alloc, self.alloc,
self.rdr.readerAt(self.offset).reader(), self.rdr.readerAt(self.offset).reader(),
&self.block, &self.block,
+2 -2
View File
@@ -51,7 +51,7 @@ pub const Compression = enum(u16) {
lz4, lz4,
zstd, zstd,
pub fn decompress(self: Compression, alloc: std.mem.Allocator, source: anytype, dest: []u8) !usize { pub fn decompress(self: Compression, comptime max_size: u32, alloc: std.mem.Allocator, source: anytype, dest: []u8) !usize {
switch (self) { switch (self) {
.gzip => { .gzip => {
var decomp = std.compress.zlib.decompressor(source); var decomp = std.compress.zlib.decompressor(source);
@@ -70,7 +70,7 @@ pub const Compression = enum(u16) {
}, },
.lz4 => return DecompressError.Lz4Unavailable, .lz4 => return DecompressError.Lz4Unavailable,
.zstd => { .zstd => {
var window: [std.compress.zstd.DecompressorOptions.default_window_buffer_len]u8 = undefined; var window: [max_size]u8 = undefined;
var decomp = std.compress.zstd.decompressor(source, .{ .window_buffer = &window }); var decomp = std.compress.zstd.decompressor(source, .{ .window_buffer = &window });
return decomp.read(dest); return decomp.read(dest);
}, },