From 2b49395ab2f55903d3080cd3ffd3842fba13d8bc Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Fri, 22 May 2026 06:09:06 -0500 Subject: [PATCH] Fixes and optimizations Added FragManager so each frag block only gets decompressed once Returned to C for decompression (only zstd stateless ATM) --- build.zig | 10 +++++ src/archive.zig | 25 +++--------- src/bin/unsquashfs.zig | 14 ++++--- src/c.h | 1 + src/decomp.zig | 20 ++++++++++ src/decomp/zstd.zig | 16 ++++++-- src/file.zig | 4 +- src/frag.zig | 77 +++++++++++++++++++++++++++++++++++++ src/util/data_extractor.zig | 61 ++++++++--------------------- src/util/data_reader.zig | 35 ++++++----------- 10 files changed, 163 insertions(+), 100 deletions(-) create mode 100644 src/c.h diff --git a/build.zig b/build.zig index 9437a6b..9fda14b 100644 --- a/build.zig +++ b/build.zig @@ -24,6 +24,16 @@ pub fn build(b: *std.Build) !void { .use_llvm = debug, }); + const zstd = b.dependency("zstd", .{ .optimize = optimize, .target = target }); + lib.root_module.linkLibrary(zstd.artifact("zstd")); + + const c = b.addTranslateC(.{ + .optimize = optimize, + .target = target, + .root_source_file = b.path("src/c.h"), + }); + lib.root_module.addImport("c", c.createModule()); + var version = version_string_option orelse "0.0.0-testing"; if (version[0] == 'v') version = version[1..]; const unsquashfs_options = b.addOptions(); diff --git a/src/archive.zig b/src/archive.zig index 2038ce0..d91878c 100644 --- a/src/archive.zig +++ b/src/archive.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Io = std.Io; +const Decomp = @import("decomp.zig"); const ExtractionOptions = @import("options.zig"); const File = @import("file.zig"); const Inode = @import("inode.zig"); @@ -15,7 +16,7 @@ const Archive = @This(); file: OffsetFile, super: Superblock, -stateless_decomp: Decompressor, +stateless_decomp: *const Decompressor, pub fn init(io: Io, file: std.Io.File, offset: u64) !Archive { var rdr = file.reader(io, &[0]u8{}); @@ -26,14 +27,7 @@ pub fn init(io: Io, file: std.Io.File, offset: u64) !Archive { .file = .init(file, offset), .super = super, - .stateless_decomp = switch (super.compression) { - .gzip => @import("decomp/zlib.zig").stateless_decompressor, - .lzma => @import("decomp/lzma.zig").stateless_decompressor, - .lzo => return error.LzoUnsupported, - .xz => @import("decomp/xz.zig").stateless_decompressor, - .lz4 => return error.Lz4Unsupported, - .zstd => @import("decomp/zstd.zig").stateless_decompressor, - }, + .stateless_decomp = try Decomp.StatelessDecomp(super.compression), }; } @@ -43,7 +37,7 @@ pub fn root(self: Archive, alloc: std.mem.Allocator, io: Io) !File { alloc, io, self.file, - &self.stateless_decomp, + self.stateless_decomp, self.super.inode_start, self.super.block_size, self.super.root_ref, @@ -65,7 +59,7 @@ pub fn extract(self: Archive, alloc: std.mem.Allocator, io: Io, extract_dir: []c alloc, io, self.file, - &self.stateless_decomp, + self.stateless_decomp, self.super.inode_start, self.super.block_size, self.super.root_ref, @@ -128,14 +122,7 @@ pub const Superblock = extern struct { mod_time: u32, block_size: u32, frag_count: u32, - compression: enum(u16) { - gzip = 1, - lzma, - lzo, - xz, - lz4, - zstd, - }, + compression: Decomp.Enum, block_log: u16, flags: packed struct(u16) { inode_uncompressed: bool, diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig index 69f4df3..53c3f08 100644 --- a/src/bin/unsquashfs.zig +++ b/src/bin/unsquashfs.zig @@ -48,12 +48,14 @@ pub fn main(init: std.process.Init) !void { var out = stdout.writer(io, &[0]u8{}); defer out.interface.flush() catch {}; - try handleArgs(init.minimal.args, &out.interface); - if (archive.len == 0) { - try out.interface.print("You must provide a squashfs archive\n", .{}); - try out.interface.print(help_mgs, .{}); - return; - } + // try handleArgs(init.minimal.args, &out.interface); + // if (archive.len == 0) { + // try out.interface.print("You must provide a squashfs archive\n", .{}); + // try out.interface.print(help_mgs, .{}); + // return; + // } + archive = "testing/LinuxPATest.sfs"; + extLoc = "testing/LinuxPABinTest"; var fil = try Io.Dir.cwd().openFile(io, archive, .{}); //TODO: Handle error gracefully. defer fil.close(io); diff --git a/src/c.h b/src/c.h new file mode 100644 index 0000000..b1152a8 --- /dev/null +++ b/src/c.h @@ -0,0 +1 @@ +#include diff --git a/src/decomp.zig b/src/decomp.zig index cb218bf..7441044 100644 --- a/src/decomp.zig +++ b/src/decomp.zig @@ -2,6 +2,26 @@ const std = @import("std"); const Decompressor = @import("util/decompressor.zig"); +pub const Enum = enum(u16) { + gzip = 1, + lzma, + lzo, + xz, + lz4, + zstd, +}; + +pub fn StatelessDecomp(val: Enum) !*const Decompressor { + return switch (val) { + .gzip => &@import("decomp/zlib.zig").stateless_decompressor, + .lzma => &@import("decomp/lzma.zig").stateless_decompressor, + .lzo => error.LzoUnsupported, + .xz => &@import("decomp/xz.zig").stateless_decompressor, + .lz4 => error.Lz4Unsupported, + .zstd => &@import("decomp/zstd.zig").stateless_decompressor, + }; +} + pub const Decomp = union(enum) { gzip: @import("decomp/zlib.zig"), lzma: @import("decomp/lzma.zig"), diff --git a/src/decomp/zstd.zig b/src/decomp/zstd.zig index 4f41d83..5c61746 100644 --- a/src/decomp/zstd.zig +++ b/src/decomp/zstd.zig @@ -4,6 +4,8 @@ const Reader = std.Io.Reader; const zstd = std.compress.zstd; const Node = std.SinglyLinkedList.Node; +const c = @import("c"); + const Decompressor = @import("../util/decompressor.zig"); const Error = Decompressor.Error; @@ -67,8 +69,14 @@ inline fn zstdDecomp(buffer: []u8, in: []u8, out: []u8) !usize { pub const stateless_decompressor: Decompressor = .{ .decomp_fn = statelessDecomp }; -fn statelessDecomp(_: ?*const Decompressor, alloc: std.mem.Allocator, in: []u8, out: []u8) Error!usize { - const buf = try alloc.alloc(u8, out.len + zstd.block_size_max); - defer alloc.free(buf); - return zstdDecomp(buf, in, out); +fn statelessDecomp(d: ?*const Decompressor, alloc: std.mem.Allocator, in: []u8, out: []u8) Error!usize { + _ = d; + _ = alloc; + const res = c.ZSTD_decompress(out.ptr, out.len, in.ptr, in.len); + if (c.ZSTD_isError(res) == 1) + return Error.ReadFailed; + return res; + // const buf = try alloc.alloc(u8, out.len + zstd.block_size_max); + // defer alloc.free(buf); + // return zstdDecomp(buf, in, out); } diff --git a/src/file.zig b/src/file.zig index 27d93ba..7933e70 100644 --- a/src/file.zig +++ b/src/file.zig @@ -37,7 +37,7 @@ pub fn init(alloc: std.mem.Allocator, archive: Archive, in: Inode, name: []const } pub fn fromDirEntry(alloc: std.mem.Allocator, io: Io, archive: Archive, ent: DirEntry) !File { var rdr = try archive.file.readerAt(io, archive.super.inode_start + ent.block_start, &[0]u8{}); - var meta: MetadataReader = .init(alloc, &rdr.interface, &archive.stateless_decomp); + var meta: MetadataReader = .init(alloc, &rdr.interface, archive.stateless_decomp); try meta.interface.discardAll(ent.block_offset); var in: Inode = try .read(alloc, &meta.interface, archive.super.block_size); @@ -54,7 +54,7 @@ pub fn open(self: File, alloc: std.mem.Allocator, io: Io, filepath: []const u8) alloc, io, self.archive.file, - &self.archive.stateless_decomp, + self.archive.stateless_decomp, self.archive.super.dir_start, ); defer { diff --git a/src/frag.zig b/src/frag.zig index 9333f24..f3be568 100644 --- a/src/frag.zig +++ b/src/frag.zig @@ -1,7 +1,84 @@ +const std = @import("std"); +const Io = std.Io; + const BlockSize = @import("inode_data/file.zig").BlockSize; +const LookupTable = @import("lookup_table.zig"); +const Decompressor = @import("util/decompressor.zig"); +const MetadataReader = @import("util/metadata.zig"); +const OffsetFile = @import("util/offset_file.zig"); + +const FragManager = @This(); pub const FragEntry = extern struct { start: u64, size: BlockSize, _: u32, }; + +alloc: std.mem.Allocator, +fil: OffsetFile, +decomp: *const Decompressor, +block_size: u32, + +entries: []FragEntry, + +frag_cache: std.array_hash_map.Auto(u32, []u8), +cache_mut: std.Io.Mutex = .init, + +pub fn init(alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, frag_start: u64, frag_num: u32, block_size: u32) !FragManager { + var buf: [8 * 1024]u8 = undefined; + var rdr = try fil.readerAt(io, frag_start, &buf); + var first_offset: u64 = undefined; + try rdr.interface.readSliceEndian(u64, @ptrCast(&first_offset), .little); + + rdr = try fil.readerAt(io, first_offset, &buf); + var meta: MetadataReader = .init(alloc, &rdr.interface, decomp); + + const entries = try alloc.alloc(FragEntry, frag_num); + errdefer alloc.free(entries); + + try meta.interface.readSliceEndian(FragEntry, entries, .little); + + return .{ + .alloc = alloc, + .fil = fil, + .decomp = decomp, + .block_size = block_size, + + .entries = entries, + + .frag_cache = .empty, + }; +} +pub fn deinit(self: *FragManager, io: Io) void { + self.cache_mut.lockUncancelable(io); + self.alloc.free(self.entries); + for (self.frag_cache.values()) |v| + self.alloc.free(v); + self.frag_cache.deinit(self.alloc); +} + +pub fn get(self: *FragManager, io: Io, idx: u32) ![]u8 { + if (self.frag_cache.contains(idx)) + return self.frag_cache.get(idx).?; + + try self.cache_mut.lock(io); + defer self.cache_mut.unlock(io); + + const entry = self.entries[idx]; + + const out = try self.alloc.alloc(u8, if (entry.size.uncompressed) entry.size.size else self.block_size); + + var buf: [1024 * 1024]u8 = undefined; + var rdr = try self.fil.readerAt(io, entry.start, &buf); + if (entry.size.uncompressed) { + try rdr.interface.readSliceAll(out); + } else { + @branchHint(.likely); + try rdr.interface.fill(entry.size.size); + _ = try self.decomp.Decompress(self.alloc, buf[0..entry.size.size], out); + } + + try self.frag_cache.put(self.alloc, idx, out); + return out; +} diff --git a/src/util/data_extractor.zig b/src/util/data_extractor.zig index 4e6a6ba..81419c1 100644 --- a/src/util/data_extractor.zig +++ b/src/util/data_extractor.zig @@ -10,7 +10,7 @@ const OffsetFile = @import("offset_file.zig"); // const SharedCache = @import("shared_cache.zig"); -pub const Error = error{OutOfMemory} || Io.File.Reader.SeekError || Io.Writer.Error; +pub const Error = error{OutOfMemory} || Io.File.Reader.SeekError || Io.Writer.Error || Io.File.Writer.Error; const DataExtractor = @This(); @@ -23,7 +23,7 @@ start: u64, blocks: []BlockSize, frag_offset: u32 = 0, -frag_entry: ?FragEntry = null, +frag_block: ?[]u8 = null, err: ?Error = null, @@ -38,14 +38,14 @@ pub fn init(fil: OffsetFile, decomp: *const Decompressor, block_size: u32, file_ .blocks = blocks, }; } -pub fn addFrag(self: *DataExtractor, frag_offset: u32, entry: FragEntry) void { +pub fn addFrag(self: *DataExtractor, frag_offset: u32, block: []u8) void { self.frag_offset = frag_offset; - self.frag_entry = entry; + self.frag_block = block; } fn numBlocks(self: DataExtractor) usize { var num = self.blocks.len; - if (self.frag_entry != null) num += 1; + if (self.frag_block != null) num += 1; return num; } @@ -60,8 +60,8 @@ pub fn extractAsync(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: group.async(io, blockThread, .{ self, alloc, io, fil, read_offset, idx, &err }); read_offset += self.blocks[idx].size; } - if (self.frag_entry != null) - group.async(io, fragThread, .{ self, alloc, io, fil, &err }); + if (self.frag_block != null) + group.async(io, fragThread, .{ self, io, fil, &err }); group.await(io) catch |cancel| return err orelse cancel; } @@ -126,55 +126,26 @@ fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.Fi }; } } -fn fragThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, ret_err: *?Error) Io.Cancelable!void { - const frag = self.frag_entry.?; +fn fragThread(self: DataExtractor, io: Io, fil: Io.File, ret_err: *?Error) Io.Cancelable!void { const cur_block_size = self.file_size % self.block_size; - var wrt = fil.writer(io, &[0]u8{}); + var write_buf: [10 * 1024]u8 = undefined; + var wrt = fil.writer(io, &write_buf); wrt.seekTo(self.blocks.len * self.block_size) catch |err| { ret_err.* = err; if (err == error.Canceled) io.recancel(); return Io.Cancelable.Canceled; }; - defer wrt.flush() catch {}; - var rdr = self.fil.readerAt(io, frag.start, &[0]u8{}) catch |err| { + wrt.interface.writeAll(self.frag_block.?[self.frag_offset .. self.frag_offset + cur_block_size]) catch |err| { ret_err.* = err; if (err == error.Canceled) io.recancel(); return Io.Cancelable.Canceled; }; - if (frag.size.uncompressed) { - rdr.interface.discardAll(self.frag_offset) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - rdr.interface.streamExact(&wrt.interface, cur_block_size) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - return; - } else { - @branchHint(.likely); - var cache: [1024 * 1024]u8 = undefined; - var tmp: [1024 * 1024]u8 = undefined; - - rdr.interface.readSliceAll(cache[0..frag.size.size]) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - _ = self.decomp.Decompress(alloc, cache[0..frag.size.size], tmp[0..self.block_size]) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - wrt.interface.writeAll(tmp[self.frag_offset .. self.frag_offset + cur_block_size]) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - } + wrt.flush() catch |err| { + ret_err.* = err; + if (err == error.Canceled) io.recancel(); + return Io.Cancelable.Canceled; + }; } diff --git a/src/util/data_reader.zig b/src/util/data_reader.zig index 09983ec..ddcce19 100644 --- a/src/util/data_reader.zig +++ b/src/util/data_reader.zig @@ -6,7 +6,6 @@ const Reader = Io.Reader; const Writer = Io.Writer; const Limit = Io.Limit; -const FragEntry = @import("../frag.zig").FragEntry; const BlockSize = @import("../inode_data/file.zig").BlockSize; const Decompressor = @import("decompressor.zig"); const OffsetFile = @import("offset_file.zig"); @@ -28,7 +27,7 @@ cur_offset: u64, blocks: []BlockSize, frag_offset: u32 = 0, -frag_entry: ?FragEntry = null, +frag_block: ?[]u8 = null, block_idx: usize = 0, sparse_block: bool = false, @@ -64,14 +63,14 @@ pub fn init(alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const De pub fn deinit(self: *DataReader) void { self.alloc.free(self.interface.buffer); } -pub fn addFrag(self: *DataReader, frag_offset: u32, entry: FragEntry) void { +pub fn addFrag(self: *DataReader, frag_offset: u32, block: []u8) void { self.frag_offset = frag_offset; - self.frag_entry = entry; + self.frag_block = block; } fn numBlocks(self: DataReader) usize { var num = self.blocks.len; - if (self.frag_entry != null) num += 1; + if (self.frag_block != null) num += 1; return num; } fn advanceBuffer(self: *DataReader) !void { @@ -81,26 +80,13 @@ fn advanceBuffer(self: *DataReader) !void { defer self.block_idx += 1; self.interface.end = if (self.block_idx == self.numBlocks() - 1) - self.size % self.block_size + self.file_size % self.block_size else self.block_size; // Fragment if (self.block_idx == self.blocks.len) { - const entry = self.frag_entry.?; - if (entry.size.uncompressed) { - var rdr = try self.fil.readerAt(self.io, entry.start + self.frag_offset, &[0]u8{}); - try rdr.interface.readSliceAll(self.interface.buffer[0..self.interface.end]); - } else { - @branchHint(.likely); - const tmp = try self.cache.getOne(self.io); - defer self.cache.putOne(tmp) catch {}; - - var rdr = try self.fil.readerAt(self.io, entry.start, &[0]u8{}); - try rdr.interface.readSliceAll(tmp.cache[0..entry.size.size]); - _ = try self.decomp.Decompress(self.alloc, tmp.cache[0..entry.size.size], self.interface.buffer[0..self.block_size]); - @memmove(self.interface.buffer[0..self.interface.end], self.interface.buffer[self.frag_offset .. self.frag_offset + self.interface.end]); - } + @memcpy(self.interface.buffer[0..self.interface.end], self.frag_block.?[self.frag_offset .. self.frag_offset + self.interface.end]); self.interface.seek = 0; return; } @@ -120,12 +106,13 @@ fn advanceBuffer(self: *DataReader) !void { } else { @branchHint(.likely); const tmp = try self.cache.getOne(self.io); - defer self.cache.putOne(tmp) catch {}; + defer self.cache.putOne(self.io, tmp) catch {}; - var rdr = try self.fil.readerAt(self.io, self.cur_offset, &[0]u8{}); - try rdr.interface.readSliceAll(tmp.cache[0..block.size]); + var rdr_buf: [50 * 1024]u8 = undefined; + var rdr = try self.fil.readerAt(self.io, self.cur_offset, &rdr_buf); + try rdr.interface.readSliceAll(tmp[0..block.size]); self.cur_offset += block.size; - _ = try self.decomp.Decompress(self.alloc, tmp.cache[0..block.size], self.interface.buffer[0..self.interface.end]); + _ = try self.decomp.Decompress(self.alloc, tmp[0..block.size], self.interface.buffer[0..self.interface.end]); } self.interface.seek = 0; }