From 75502da1d08e01c7144037bcfc15fc154aa5ec9d Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Sat, 7 Feb 2026 05:04:22 -0600 Subject: [PATCH 1/6] Remove DecompMgr in favor of a much simpler fn ptr. Moved more functionality to Inode instead of File. Started doing some optimization around allocation. Slight rework of ExtractionOptions. --- README.md | 2 +- src/archive.zig | 21 +-- src/decomp.zig | 222 ++++--------------------------- src/dir_entry.zig | 17 +-- src/file.zig | 302 +----------------------------------------- src/inode.zig | 217 ++++++++++++++++++++++++++++++ src/options.zig | 25 ++-- src/table.zig | 6 +- src/util/data.zig | 16 +-- src/util/metadata.zig | 10 +- 10 files changed, 299 insertions(+), 539 deletions(-) diff --git a/README.md b/README.md index f9fa655..827dfbd 100644 --- a/README.md +++ b/README.md @@ -4,4 +4,4 @@ This is my experiments to learn Zig. Might amount to something. Might not. ## Current State -Kinda works as a library, but currently has known memory leaks. `unsquashfs` is missing a lot of features (and will probably never match the official unsquashfs). Extraction is stupidly slow and uses too many resources. +Kinda works as a library, but currently has known memory leaks. `unsquashfs` is missing a lot of features (and will probably never match the official unsquashfs). Extraction is stupidly slow and uses too many resources. Only properly work on Linux, any other OSes probably won't work fully. diff --git a/src/archive.zig b/src/archive.zig index c1a46e3..5dab2ad 100644 --- a/src/archive.zig +++ b/src/archive.zig @@ -4,7 +4,7 @@ const std = @import("std"); const File = std.fs.File; -const DecompMgr = @import("decomp.zig"); +const Decomp = @import("decomp.zig"); const ExtractionOptions = @import("options.zig"); const Inode = @import("inode.zig"); const InodeRef = Inode.Ref; @@ -40,7 +40,7 @@ super: Superblock, setup: bool = false, -decomp: DecompMgr = undefined, +decomp: Decomp.DecompFn, frag_table: Table(FragEntry) = undefined, id_table: Table(u16) = undefined, @@ -72,6 +72,13 @@ pub fn initAdvanced(alloc: std.mem.Allocator, fil: File, offset: u64, threads: u // .fixed_buf = fixed_buf, .thread_count = threads, .fil = .init(fil, offset), + .decomp = switch (super.compression) { + .gzip => Decomp.gzipDecompress, + .lzma => Decomp.lzmaDecompress, + .xz => Decomp.xzDecompress, + .zstd => Decomp.zstdDecompress, + else => return error.UnsupportedCompressionType, + }, .super = super, }; @@ -79,7 +86,6 @@ pub fn initAdvanced(alloc: std.mem.Allocator, fil: File, offset: u64, threads: u pub fn deinit(self: *Archive) void { // self.parent_alloc.free(self.fixed_buf); if (self.setup) { - self.decomp.deinit(); self.frag_table.deinit(); self.export_table.deinit(); self.id_table.deinit(); @@ -92,10 +98,9 @@ pub fn allocator(self: *Archive) std.mem.Allocator { fn setupValues(self: *Archive) !void { const alloc = self.allocator(); - self.decomp = try .init(alloc, self.super.compression, self.super.block_size, self.thread_count); - self.frag_table = try .init(alloc, self.fil, &self.decomp, self.super.frag_start, self.super.frag_count); - self.id_table = try .init(alloc, self.fil, &self.decomp, self.super.id_start, self.super.id_count); - self.export_table = try .init(alloc, self.fil, &self.decomp, self.super.export_start, self.super.inode_count); + self.frag_table = try .init(alloc, self.fil, self.decomp, self.super.frag_start, self.super.frag_count); + self.id_table = try .init(alloc, self.fil, self.decomp, self.super.id_start, self.super.id_count); + self.export_table = try .init(alloc, self.fil, self.decomp, self.super.export_start, self.super.inode_count); self.setup = true; } @@ -121,7 +126,7 @@ pub fn inode(self: *Archive, num: u32) !Inode { pub fn root(self: *Archive) !SfsFile { if (!self.setup) try self.setupValues(); var rdr = try self.fil.readerAt(self.super.root_ref.block_start + self.super.inode_start, &[0]u8{}); - var meta: MetadataReader = .init(self.allocator(), &rdr.interface, &self.decomp); + var meta: MetadataReader = .init(self.allocator(), &rdr.interface, self.decomp); try meta.interface.discardAll(self.super.root_ref.block_offset); const in: Inode = try .read(self.allocator(), &meta.interface, self.super.block_size); return .init(self, in, ""); diff --git a/src/decomp.zig b/src/decomp.zig index 3522582..97f7064 100644 --- a/src/decomp.zig +++ b/src/decomp.zig @@ -1,21 +1,5 @@ -//! Decompression manager. Can decompress either from an Io.Reader or from a byte slice. - const std = @import("std"); -const compress = std.compress; const Reader = std.Io.Reader; -const Thread = std.Thread; -const Futex = Thread.Futex; -const Mutex = Thread.Mutex; -const Condition = Thread.Condition; -const Node = std.DoublyLinkedList.Node; - -const Atomic = std.atomic.Value(u32); - -const DecompError = error{ - ThreadClosed, - LzoUnsupported, - Lz4Unsupported, -}; pub const CompressionType = enum(u16) { gzip = 1, @@ -26,193 +10,35 @@ pub const CompressionType = enum(u16) { zstd, }; -pub const DecompThread = struct { - mgr: *DecompMgr, +pub const DecompFn = *const fn (alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize; // TODO: replace anyerror to definitive error types. - /// Current thread status & signal value via Futex. - /// 0 - Unstarted, 1 - Waiting, 2 - Working, 3 - Closed, - status: Atomic = .{ .raw = 0 }, - thr: Thread = undefined, - node: Node = .{}, - buf: []u8, +// pub const DecompressError = error{ +// ReadFailed, +// anyerror, +// }; - dat: []u8 = &[0]u8{}, - rdr: ?*Reader = null, - res: []u8 = &[0]u8{}, - res_size: anyerror!usize = 0, - - pub fn init(mgr: *DecompMgr) !DecompThread { - return .{ - .mgr = mgr, - .buf = switch (mgr.comp_type) { - .gzip => try mgr.alloc.alloc(u8, compress.flate.max_window_len), - .zstd => try mgr.alloc.alloc(u8, compress.zstd.default_window_len + compress.zstd.block_size_max), - .lzma, .xz => &[0]u8{}, - else => unreachable, - }, - }; - } - - pub fn close(self: *DecompThread) void { - if (self.status.raw == 0) return; - while (self.status.raw == 2) Futex.wait(&self.status, 2); - self.status.store(3, .release); - Futex.wake(&self.status, 1); - self.thr.join(); - self.mgr.alloc.free(self.buf); - } - - pub fn submitData(self: *DecompThread, dat: []u8, res: []u8) anyerror!usize { - if (self.status.raw == 3) return DecompError.ThreadClosed; - if (self.status.raw == 0) { - self.thr = try .spawn(.{}, thread, .{self}); - } - self.dat = dat; - defer self.dat = &[0]u8{}; - self.res = res; - self.status.raw = 2; - while (self.status.raw == 2) Futex.wait(&self.status, 2); - return self.res_size; - } - pub fn submitReader(self: *DecompThread, rdr: *Reader, res: []u8) anyerror!usize { - if (self.status.raw == 3) return DecompError.ThreadClosed; - if (self.status.raw == 0) { - self.thr = try .spawn(.{}, thread, .{self}); - } - self.rdr = rdr; - defer self.rdr = null; - self.res = res; - self.status.store(2, .release); - Futex.wake(&self.status, 1); - while (self.status.raw == 2) Futex.wait(&self.status, 2); - return self.res_size; - } - - pub fn thread(self: *DecompThread) void { - const comp_type = self.mgr.comp_type; - while (self.status.raw != 3) { - while (self.status.raw == 1) Futex.wait(&self.status, 1); - if (self.status.raw == 3) return; - var dat_rdr: Reader = .fixed(self.dat); - var rdr: *Reader = if (self.rdr != null) self.rdr.? else &dat_rdr; - self.res_size = blk: switch (comp_type) { - .gzip => { - var decomp_rdr = compress.flate.Decompress.init(rdr, .zlib, self.buf); - break :blk decomp_rdr.reader.readSliceShort(self.res) catch |err| { - break :blk decomp_rdr.err orelse err; - }; - }, - .lzma => { - var decomp_rdr = compress.lzma.decompress(self.mgr.alloc, rdr.adaptToOldInterface()) catch |err| { - break :blk err; - }; - break :blk decomp_rdr.read(self.res); - }, - .xz => { - var decomp_rdr = compress.xz.decompress(self.mgr.alloc, rdr.adaptToOldInterface()) catch |err| { - break :blk err; - }; - break :blk decomp_rdr.read(self.res); - }, - .zstd => { - var decomp_rdr = compress.zstd.Decompress.init(rdr, self.buf, .{}); - break :blk decomp_rdr.reader.readSliceShort(self.res) catch |err| { - break :blk decomp_rdr.err orelse err; - }; - }, - else => unreachable, - }; - const orig = self.status.swap(1, .release); - Futex.wake(&self.status, 1); - if (orig == 3) return; - } - } -}; - -const DecompMgr = @This(); - -alloc: std.mem.Allocator, -comp_type: CompressionType, -block_size: u32, - -threads: []DecompThread, -queue: std.DoublyLinkedList = .{}, -mut: Mutex = .{}, -cond: Condition = .{}, -to_start: usize, - -pub fn init(alloc: std.mem.Allocator, comp_type: CompressionType, block_size: u32, threads: usize) !DecompMgr { - return switch (comp_type) { - .lzo => DecompError.LzoUnsupported, - .lz4 => DecompError.Lz4Unsupported, - else => .{ - .alloc = alloc, - .comp_type = comp_type, - .block_size = block_size, - .threads = try alloc.alloc(DecompThread, threads), - .to_start = threads, - }, - }; +pub fn gzipDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { + _ = alloc; + var rdr: Reader = .fixed(in); + var decomp = std.compress.flate.Decompress.init(&rdr, .zlib, &[0]u8{}); + return decomp.reader.readSliceShort(out); } -pub fn deinit(self: DecompMgr) void { - for (self.threads[self.to_start..]) |*t| { - t.close(); - } - self.alloc.free(self.threads); +pub fn lzmaDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { + var rdr: Reader = .fixed(in); + var decomp = try std.compress.lzma.decompress(alloc, rdr.adaptToOldInterface()); + return decomp.read(out); } -pub fn decompSlice(self: *DecompMgr, dat: []u8, res: []u8) !usize { - self.mut.lock(); - var thr: *DecompThread = undefined; - var node = self.queue.popFirst(); - if (self.node != null) { - self.mut.unlock(); - thr = @fieldParentPtr("node", node.?); - } else blk: { - defer self.mut.unlock(); - if (self.to_start > 0) { - self.threads[self.to_start - 1] = .init(self); - thr = &self.threads[self.to_start - 1]; - self.to_start -= 1; - break :blk; - } - while (node == null) { - self.cond.wait(&self.mut); - node = self.queue.popFirst(); - } - thr = @fieldParentPtr("node", node.?); - } - defer { - self.queue.append(&thr.node); - self.cond.signal(); - } - return thr.submitData(dat, res); +pub fn xzDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { + var rdr: Reader = .fixed(in); + var decomp = try std.compress.xz.decompress(alloc, rdr.adaptToOldInterface()); + return decomp.read(out); } -pub fn decompReader(self: *DecompMgr, rdr: *Reader, res: []u8) !usize { - self.mut.lock(); - var thr: *DecompThread = undefined; - var node = self.queue.popFirst(); - if (node != null) { - self.mut.unlock(); - thr = @fieldParentPtr("node", node.?); - } else blk: { - defer self.mut.unlock(); - if (self.to_start > 0) { - self.threads[self.to_start - 1] = try .init(self); - thr = &self.threads[self.to_start - 1]; - self.to_start -= 1; - break :blk; - } - while (node == null) { - self.cond.wait(&self.mut); - node = self.queue.popFirst(); - } - thr = @fieldParentPtr("node", node.?); - } - defer { - self.queue.append(&thr.node); - self.cond.signal(); - } - return thr.submitReader(rdr, res); + +pub fn zstdDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { + _ = alloc; + var rdr: Reader = .fixed(in); + var decomp = std.compress.zstd.Decompress.init(&rdr, &[0]u8{}, .{}); + return decomp.reader.readSliceShort(out); } diff --git a/src/dir_entry.zig b/src/dir_entry.zig index 3e4bcaa..4909f82 100644 --- a/src/dir_entry.zig +++ b/src/dir_entry.zig @@ -30,19 +30,20 @@ pub fn readDir(alloc: std.mem.Allocator, rdr: *Reader, size: u32) ![]Entry { var cur_red: u32 = 3; // start at 3 due to "." & ".." being counted in the dir size. var hdr: Header = undefined; var raw: RawEntry = undefined; - var out: std.ArrayList(Entry) = .empty; - errdefer { - for (out.items) |i| - i.deinit(alloc); - out.deinit(alloc); - } + var out: std.ArrayList(Entry) = try .initCapacity(alloc, 25); // Start out with capacity instead of needing to allocate per header. + errdefer out.deinit(alloc); while (cur_red < size) { try rdr.readSliceEndian(Header, @ptrCast(&hdr), .little); cur_red += @sizeOf(Header); - try out.ensureUnusedCapacity(alloc, hdr.num + 1); - for (0..hdr.count + 1) |_| { + const count = hdr.count + 1; + if (out.capacity < count) { + // Make sure we have at least 25 capacity past current count. + try out.ensureUnusedCapacity(alloc, ((count % 25) + 2) * 25); + } + for (0..count) |_| { try rdr.readSliceEndian(RawEntry, @ptrCast(&raw), .little); const name = try alloc.alloc(u8, raw.name_size + 1); + errdefer alloc.free(name); try rdr.readSliceEndian(u8, name, .little); const val = out.addOneAssumeCapacity(); val.* = .{ diff --git a/src/file.zig b/src/file.zig index efda187..c051533 100644 --- a/src/file.zig +++ b/src/file.zig @@ -56,28 +56,7 @@ pub fn deinit(self: SfsFile) void { } fn getEntries(self: SfsFile) ![]DirEntry { - if (!self.isDir()) return FileError.NotDirectory; - var block_start: u32 = undefined; - var block_offset: u16 = undefined; - var size: u32 = undefined; - switch (self.inode.data) { - .dir => |d| { - block_start = d.block_start; - block_offset = d.block_offset; - size = d.size; - }, - .ext_dir => |d| { - block_start = d.block_start; - block_offset = d.block_offset; - size = d.size; - }, - else => unreachable, - } - var rdr = try self.archive.fil.readerAt(self.archive.super.dir_start + block_start, &[0]u8{}); - const alloc = self.archive.allocator(); - var meta: MetadataReader = .init(alloc, &rdr.interface, &self.archive.decomp); - try meta.interface.discardAll(block_offset); - return DirEntry.readDir(alloc, &meta.interface, size); + return self.inode.dirEntries(self.archive); } pub fn ownerUid(self: SfsFile) !u16 { @@ -99,33 +78,7 @@ pub fn isRegular(self: SfsFile) bool { /// The returned DataReader will no longer work if the File's deinit function is called /// or, more specifically, it's inode's deinit function is called. pub fn dataReader(self: SfsFile) !DataReader { - if (!self.isRegular()) return FileError.NotRegularFile; - var frag_idx: u32 = undefined; - var frag_offset: u32 = undefined; - var size: u64 = undefined; - var blocks: []BlockSize = undefined; - var start: u64 = undefined; - switch (self.inode.data) { - .file => |f| { - frag_idx = f.frag_idx; - frag_offset = f.frag_block_offset; - size = f.size; - blocks = f.block_sizes; - start = f.block_start; - }, - .ext_file => |f| { - frag_idx = f.frag_idx; - frag_offset = f.frag_block_offset; - size = f.size; - blocks = f.block_sizes; - start = f.block_start; - }, - else => unreachable, - } - var out: DataReader = .init(self.archive, blocks, start, size); - if (frag_idx != 0xFFFFFFFF) - out.addFragment(try self.archive.frag(frag_idx), frag_offset); - return out; + return self.inode.dataReader(self.archive); } pub fn isDir(self: SfsFile) bool { @@ -245,255 +198,8 @@ pub fn extract(self: *SfsFile, path: []const u8, options: ExtractionOptions) !vo } } defer if (ext_path.len > path.len) alloc.free(ext_path); - var pool: std.Thread.Pool = undefined; - try pool.init(.{ .allocator = alloc, .n_jobs = 16 }); - var wg: WaitGroup = .{}; - defer pool.deinit(); - var err: ?anyerror = null; - wg.start(); - self.extractReal(ext_path, options, &pool, &wg, &err, null); - wg.wait(); - if (err != null) return err.?; -} - -const ParentInfo = struct { - sfs_fil: SfsFile, - path: []const u8, - mut: *Mutex, - dir_wg: *WaitGroup, - parent_wg: *WaitGroup, - options: ExtractionOptions, - err: *?anyerror, - - fn finish(self: *const ParentInfo) void { - self.mut.lock(); - if (!self.dir_wg.isDone()) { - self.mut.unlock(); - return; - } - self.mut.unlock(); - std.debug.print("finishing dir {}: {s}\n", .{ self.sfs_fil.inode.hdr.num, self.sfs_fil.name }); - self.sfs_fil.archive.allocator().destroy(self.mut); - self.sfs_fil.archive.allocator().destroy(self.dir_wg); - defer self.parent_wg.finish(); - var fil = std.fs.cwd().openFile(self.path, .{}) catch |err| { - std.log.err("Error opening folder {s} to set permissions: {}\n", .{ self.path, err }); - self.err.* = err; - return; - }; - defer fil.close(); - self.sfs_fil.setPerm(fil, self.options) catch |err| { - std.log.err("Error setting permissions to {s}: {}\n", .{ self.path, err }); - self.err.* = err; - return; - }; - } -}; - -fn extractReal(self: SfsFile, path: []const u8, options: ExtractionOptions, pol: *std.Thread.Pool, wg: *WaitGroup, out_err: *?anyerror, parent: ?ParentInfo) void { - std.log.info("Extracting {s} (inode {}) to {s}\n", .{ self.name, self.inode.hdr.num, path }); - defer { - if (parent != null) { - parent.?.finish(); - self.archive.allocator().free(path); - self.deinit(); - } else { - wg.finish(); - } - } - if (out_err.* != null) { - return; - } - switch (self.inode.hdr.inode_type) { - .file, .ext_file => { - var fil = std.fs.cwd().createFile(path, .{}) catch |err| { - std.log.err("Error creating {s}: {}\n", .{ path, err }); - out_err.* = err; - return; - }; - defer fil.close(); - var dat_rdr = self.dataReader() catch |err| { - std.log.err("Error getting data reader for {s} (inode {}): {}\n", .{ self.name, self.inode.hdr.num, err }); - out_err.* = err; - return; - }; - defer dat_rdr.deinit(); - var wrt = fil.writer(&[0]u8{}); - _ = dat_rdr.interface.streamRemaining(&wrt.interface) catch |err| { - std.log.err("Error writing data for {s} (inode {}) to {s}: {}\n", .{ self.name, self.inode.hdr.num, path, err }); - out_err.* = wrt.err orelse err; - return; - }; - wrt.interface.flush() catch |err| { - std.log.err("Error flushing data for {s} (inode {}) to {s}: {}\n", .{ self.name, self.inode.hdr.num, path, err }); - out_err.* = wrt.err orelse err; - return; - }; - self.setPerm(fil, options) catch |err| { - std.log.err("Error setting permissions/owner for {s}: {}\n", .{ path, err }); - out_err.* = err; - return; - }; - }, - .symlink, .ext_symlink => { - //TODO: deal with dereference symlink options - const target_path = self.symlinkPath() catch |err| { - std.log.err("Error getting symlink target path for {s} (inode {}): {}\n", .{ self.name, self.inode.hdr.num, err }); - out_err.* = err; - return; - }; - std.fs.cwd().symLink(target_path, path, .{}) catch |err| { - std.log.err("Error creating {s}: {}\n", .{ path, err }); - out_err.* = err; - return; - }; - // self.setPerm(fil, options) catch |err| { - // std.log.err("Error setting permissions/owner for {s}: {}\n", .{ path, err }); - // out_err.* = err; - // return; - // }; - }, - .block_dev, - .char_dev, - .fifo, - .ext_block_dev, - .ext_char_dev, - .ext_fifo, - => { - var mode: u32 = undefined; - var fil_dev: u32 = 0; - switch (self.inode.hdr.inode_type) { - .block_dev, .ext_block_dev => { - mode = std.posix.DT.BLK; - fil_dev = self.devNum() catch |err| { - std.log.err("Error getting device number for {s} (inode {}): {}\n", .{ self.name, self.inode.hdr.num, err }); - out_err.* = err; - return; - }; - }, - .char_dev, .ext_char_dev => { - mode = std.posix.DT.CHR; - fil_dev = self.devNum() catch |err| { - std.log.err("Error getting device number for {s} (inode {}): {}\n", .{ self.name, self.inode.hdr.num, err }); - out_err.* = err; - return; - }; - }, - else => mode = std.posix.DT.FIFO, - } - const res = std.os.linux.mknod(@ptrCast(path), mode, fil_dev); - if (res != 0) { - std.log.err("Error creating device file at {s} with code {}\n", .{ path, res }); - out_err.* = error.MknodError; - return; - } - const fil = std.fs.cwd().openFile(path, .{}) catch |err| { - std.log.err("Error openning {s} to set permissions: {}\n", .{ path, err }); - out_err.* = err; - return; - }; - defer fil.close(); - self.setPerm(fil, options) catch |err| { - std.log.err("Error setting permissions/owner for {s}: {}\n", .{ path, err }); - out_err.* = err; - return; - }; - }, - .dir, .ext_dir => { - std.debug.print("starting dir {}: {s}\n", .{ self.inode.hdr.num, self.name }); - if (std.fs.cwd().statFile(path)) |stat| { - if (stat.kind != .directory) { - std.log.err("{s} exists and is not a folder\n", .{path}); - out_err.* = FileError.ExtractionPathExists; - return; - } - } else |err| { - if (err == error.FileNotFound) { - std.fs.cwd().makeDir(path) catch |err_2| { - std.log.err("Error creating {s}: {}\n", .{ path, err_2 }); - out_err.* = err; - return; - }; - } else { - std.log.err("Error checking if {s} exists: {}\n", .{ path, err }); - out_err.* = err; - return; - } - } - var dir_wg: *WaitGroup = self.archive.allocator().create(WaitGroup) catch |err| { - std.log.err("Error allocating waitgroup for {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); - out_err.* = err; - return; - }; - const parent_info: ParentInfo = .{ - .sfs_fil = self, - .path = path, - .mut = self.archive.allocator().create(Mutex) catch |err| { - std.log.err("Error allocating mutex for {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); - out_err.* = err; - return; - }, - .dir_wg = dir_wg, - .parent_wg = wg, - .options = options, - .err = out_err, - }; - var iter: Iterator = self.iterate() catch |err| { - std.log.err("Error getting iterator for {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); - out_err.* = err; - return; - }; - defer iter.deinit(); - const path_has_end_sep = path[path.len - 1] == '/'; - while (true) { - const iter_fil = iter.next() catch |err| { - std.log.err("Error getting next iterator value {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); - out_err.* = err; - break; - }; - if (iter_fil == null) break; - const fil = iter_fil.?; - dir_wg.start(); - var path_len = path.len + fil.name.len; - if (!path_has_end_sep) path_len += 1; - var new_path = self.archive.allocator().alloc(u8, path_len) catch |err| { - std.log.err("Error allocating subpath for {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); - out_err.* = err; - dir_wg.finish(); - break; - }; - @memcpy(new_path[0..path.len], path); - @memcpy(new_path[new_path.len - fil.name.len ..], fil.name); - if (!path_has_end_sep) new_path[path.len] = '/'; - if (fil.isDir()) { - fil.extractReal(new_path, options, pol, wg, out_err, parent_info); - } else { - pol.spawn(extractReal, .{ - fil, - new_path, - options, - pol, - wg, - out_err, - parent_info, - }) catch |err| { - std.log.err("Error starting sub-file extraction thread: {}\n", .{err}); - out_err.* = err; - dir_wg.finish(); - break; - }; - } - } - }, - .socket, .ext_socket => { - std.log.info("Ignoring socket file {s} (inode {})\n", .{ self.name, self.inode.hdr.num }); - }, - } -} - -fn setPerm(self: SfsFile, fil: File, options: ExtractionOptions) !void { - if (!options.ignoreOwner) try fil.chmod(self.inode.hdr.permissions); - if (!options.ignorePermissions) try fil.chown(try self.ownerUid(), try self.ownerGid()); + //TODO: switch to threaded version. + return self.inode.extractTo(self.archive, path, options); } /// Utility function. diff --git a/src/inode.zig b/src/inode.zig index 48f7deb..1b2a92d 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -2,10 +2,17 @@ const std = @import("std"); const Reader = std.Io.Reader; +const WaitGroup = std.Thread.WaitGroup; +const Pool = std.Thread.Pool; +const Archive = @import("archive.zig"); +const DirEntry = @import("dir_entry.zig"); +const ExtractionOptions = @import("options.zig"); const dir = @import("inode_data/dir.zig"); const file = @import("inode_data/file.zig"); const misc = @import("inode_data/misc.zig"); +const DataReader = @import("util/data.zig"); +const MetadataReader = @import("util/metadata.zig"); pub const Ref = packed struct { block_offset: u16, @@ -94,3 +101,213 @@ pub fn deinit(self: Inode, alloc: std.mem.Allocator) void { else => {}, } } + +/// Get the data reader for a file inode. +pub fn dataReader(self: Inode, archive: *Archive) !DataReader { + return switch (self.hdr.inode_type) { + .file => readerFromData(archive, self.data.file), + .ext_file => readerFromData(archive, self.data.ext_file), + else => error.NotRegularFile, + }; +} +fn readerFromData(archive: *Archive, data: anytype) !DataReader { + var out: DataReader = .init(archive, data.block_sizes, data.block_start, data.size); + if (data.frag_idx != 0xFFFFFFFF) + out.addFragment(try archive.frag(data.frag_idx), data.frag_block_offset); + return out; +} + +/// Get the directory entries for a directory inode. +pub fn dirEntries(self: Inode, archive: *Archive) ![]DirEntry { + return switch (self.hdr.inode_type) { + .dir => entriesFromData(archive, self.data.dir), + .ext_dir => entriesFromData(archive, self.data.ext_dir), + else => error.NotDirectory, + }; +} +fn entriesFromData(archive: *Archive, data: anytype) ![]DirEntry { + var rdr = try archive.fil.readerAt(archive.super.dir_start + data.block_start, &[0]u8{}); + const alloc = archive.allocator(); + var meta: MetadataReader = .init(alloc, &rdr.interface, archive.decomp); + try meta.interface.discardAll(data.block_offset); + return DirEntry.readDir(alloc, &meta.interface, data.size); +} + +pub fn extractTo(self: Inode, archive: *Archive, path: []const u8, options: ExtractionOptions) !void { + switch (self.hdr.inode_type) { + .dir, .ext_dir => { + // Removing any trailing separators since that's the easiest path forward. + if (path[path.len - 1] == '/') return self.extractTo(archive, path[0 .. path.len - 1], options); + var alloc = archive.allocator(); + const entries = try self.dirEntries(archive); + defer { + for (entries) |entry| entry.deinit(alloc); + alloc.free(entries); + } + for (entries) |entry| { + var new_path = try alloc.alloc(u8, path.len + 1 + entry.name.len); + @memcpy(new_path[0..path.len], path); + @memcpy(new_path[path.len + 1 ..], entry.name); + new_path[path.len] = '/'; + defer alloc.free(new_path); + + var rdr = try archive.fil.readerAt(archive.super.inode_start + entry.block_start, &[0]u8{}); + var meta: MetadataReader = .init(alloc, &rdr.interface, archive.decomp); + try meta.interface.discardAll(entry.block_offset); + var inode: Inode = try read(alloc, &meta.interface, archive.super.block_size); + defer inode.deinit(alloc); + try inode.extractTo(archive, new_path, options); + } + }, + .file, .ext_file => try self.extractRegFile(archive, path, options), + .symlink, .ext_symlink => try self.extractSymlink(path), + else => try self.extractDevice(archive, path, options), + } +} + +const Perms = struct { + path: []const u8, + owner: u16, + perm: u16, + mod_time: u32, +}; + +pub fn extractToThreaded(inode: Inode, archive: *Archive, path: []const u8, options: ExtractionOptions, threads: usize) !void { + _ = archive; + _ = path; + _ = options; + _ = threads; + switch (inode.hdr.inode_type) {} +} + +/// Extract threadedly the inode to the path. +fn extractThread(inode: Inode, archive: *Archive, path: []const u8, options: ExtractionOptions, wg: *WaitGroup, pool: *Pool, perms: ?*std.ArrayList(Perms)) !void { + _ = pool; + _ = perms; + _ = archive; + switch (inode.hdr.inode_type) { + .dir, .ext_dir => { + //TOOD + return error.TODO; + }, + .file, .ext_file => { + //TOOD + return error.TODO; + }, + .symlink, .ext_symlink => { + defer wg.finish(); + try inode.extractSymlink(path); + }, + else => { + defer wg.finish(); + try inode.extractDevice(path, options.ignore_permissions); + }, + } +} +/// Creates and writes the inode file contents to the given path. +/// Optionally set owner & permissions. +/// +/// Assumes the inode is a file or ext_file type. +fn extractRegFile(self: Inode, archive: *Archive, path: []const u8, options: ExtractionOptions) !void { + var fil = try std.fs.cwd().createFile(path, .{}); + defer fil.close(); + var buf: [8192]u8 = undefined; + var wrt = fil.writer(&buf); + var dat_rdr = try self.dataReader(archive); + defer dat_rdr.deinit(); + _ = try dat_rdr.interface.streamRemaining(&wrt.interface); + try wrt.interface.flush(); + // updateTime is in nanoseconds (a billionth of a second). mod_time is in seconds. + try fil.updateTimes(self.hdr.mod_time * (10 ^ 9), self.hdr.mod_time * (10 ^ 9)); + if (!options.ignore_permissions) { + try fil.chmod(self.hdr.permissions); + try fil.chown(try archive.id(self.hdr.uid_idx), try archive.id(self.hdr.gid_idx)); + } + if (!options.ignore_xattr) { + // TODO + } +} +/// TODO: not implemented +/// Extract the inode file contents to the given path. +/// The extraction will be done threaded using pool for threads and will call wg.finish() when done. +/// +/// Optionally set owner & permissions. +/// Assumes the inode is a file or ext_file type. +fn extractRegFileThreaded(self: Inode, archive: *Archive, path: []const u8, options: ExtractionOptions, pool: *Pool, wg: *WaitGroup) !void { + _ = self; + _ = archive; + _ = path; + _ = options; + _ = pool; + _ = wg; + return error.TODO; +} +/// Creates the symlink described by the inode. +/// +/// Assumes the inode is a symlink or ext_symlink type. +fn extractSymlink(self: Inode, path: []const u8) !void { + const target = switch (self.data) { + .symlink => |s| s.target, + .ext_symlink => |s| s.target, + else => unreachable, + }; + try std.fs.cwd().symLink(target, path, .{}); +} +/// Creates the device described by the inode. +/// +/// Optionally set owner & permissions. +/// Assumes the inode is a char_dev, block_dev, fifo, socket, or their extended counterparts. +fn extractDevice(self: Inode, archive: *Archive, path: []const u8, options: ExtractionOptions) !void { + var mode: u32 = undefined; + var dev: u32 = 0; + switch (self.data) { + .char_dev => |d| { + mode = std.posix.S.IFCHR; + dev = d.dev; + }, + .ext_char_dev => |d| { + mode = std.posix.S.IFCHR; + dev = d.dev; + }, + .block_dev => |d| { + mode = std.posix.S.IFBLK; + dev = d.dev; + }, + .ext_block_dev => |d| { + mode = std.posix.S.IFBLK; + dev = d.dev; + }, + .fifo, .ext_fifo => mode = std.posix.S.IFIFO, + .socket, .ext_socket => mode = std.posix.S.IFSOCK, + else => unreachable, + } + const res: std.os.linux.E = @enumFromInt(std.os.linux.mknod(@ptrCast(path), mode, dev)); + switch (res) { + .SUCCESS => {}, + .ACCES => return std.fs.Dir.MakeError.AccessDenied, + .DQUOT => return std.fs.Dir.MakeError.DiskQuota, + .EXIST => return std.fs.Dir.MakeError.PathAlreadyExists, + .FAULT, .NOENT => return std.fs.Dir.MakeError.BadPathName, + .LOOP => return std.fs.Dir.MakeError.SymLinkLoop, + .NAMETOOLONG => return std.fs.Dir.MakeError.NameTooLong, + .NOMEM => return std.fs.Dir.MakeError.SystemResources, + .NOSPC => return std.fs.Dir.MakeError.NoSpaceLeft, + .NOTDIR => return std.fs.Dir.MakeError.NotDir, + .PERM => return std.fs.Dir.MakeError.PermissionDenied, + .ROFS => return std.fs.Dir.MakeError.ReadOnlyFileSystem, + else => return blk: { + std.debug.print("unhandled mknod result: {}\n", .{res}); + break :blk std.fs.Dir.MakeError.Unexpected; + }, + } + var fil = try std.fs.cwd().openFile(path, .{}); + // updateTime is in nanoseconds (a billionth of a second). mod_time is in seconds. + try fil.updateTimes(self.hdr.mod_time * (10 ^ 9), self.hdr.mod_time * (10 ^ 9)); + if (!options.ignore_permissions) { + try fil.chmod(self.hdr.permissions); + try fil.chown(try archive.id(self.hdr.uid_idx), try archive.id(self.hdr.gid_idx)); + } + if (!options.ignore_xattr) { + // TODO + } +} diff --git a/src/options.zig b/src/options.zig index 16402cf..94ae525 100644 --- a/src/options.zig +++ b/src/options.zig @@ -5,16 +5,21 @@ const Writer = std.Io.Writer; const ExtractionOptions = @This(); -/// Don't set the file's permissions after extraction -ignorePermissions: bool = false, -/// Don't set the file's owner after extraction. -ignoreOwner: bool = false, +/// Don't set the file's owner & permissions after extraction +ignore_permissions: bool = false, +/// Don't set xattr values. Currently xattrs are never set anyway. +ignore_xattr: bool = false, /// Replace symlinks with their target. -dereferenceSymlinks: bool = false, - -log_level: std.log.Level = .err, -// /// If options verbose and verboseWriter not set, logs are printed to stdout. -// verboseWriter: ?*Writer = null, +dereference_symlinks: bool = false, +/// Verbose logging. If true, verbose_writer must be set +verbose: bool = false, +/// Where to print verbose log. +verbose_writer: ?*Writer = null, pub const Default: ExtractionOptions = .{}; -pub const VerboseDefault: ExtractionOptions = .{ .log_level = .debug }; +pub fn VerboseDefault(wrt: *Writer) ExtractionOptions { + return .{ + .verbose = true, + .verbose_writer = wrt, + }; +} diff --git a/src/table.zig b/src/table.zig index 51c16b4..c98e85d 100644 --- a/src/table.zig +++ b/src/table.zig @@ -1,7 +1,7 @@ const std = @import("std"); const Mutex = std.Thread.Mutex; -const DecompMgr = @import("decomp.zig"); +const DecompFn = @import("decomp.zig").DecompFn; const MetadataReader = @import("util/metadata.zig"); const OffsetFile = @import("util/offset_file.zig"); @@ -18,7 +18,7 @@ pub fn Table(T: anytype) type { alloc: std.mem.Allocator, fil: OffsetFile, - decomp: *DecompMgr, + decomp: DecompFn, tab_start: u64, tab: std.AutoHashMap(u32, []T), @@ -26,7 +26,7 @@ pub fn Table(T: anytype) type { mut: Mutex = .{}, - pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *DecompMgr, tab_start: u64, values: u32) !Self { + pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: DecompFn, tab_start: u64, values: u32) !Self { return .{ .alloc = alloc, .fil = fil, diff --git a/src/util/data.zig b/src/util/data.zig index c803494..ae7c424 100644 --- a/src/util/data.zig +++ b/src/util/data.zig @@ -7,7 +7,7 @@ const Limit = std.Io.Limit; const Archive = @import("../archive.zig"); const FragEntry = Archive.FragEntry; -const DecompMgr = @import("../decomp.zig"); +const DecompFn = @import("../decomp.zig").DecompFn; const BlockSize = @import("../inode_data/file.zig").BlockSize; const OffsetFile = @import("offset_file.zig"); @@ -15,7 +15,7 @@ const DataReader = @This(); alloc: std.mem.Allocator, fil: OffsetFile, -decomp: *DecompMgr, +decomp: DecompFn, block_size: u32, blocks: []BlockSize, @@ -33,7 +33,7 @@ pub fn init(archive: *Archive, blocks: []BlockSize, start: u64, size: u64) DataR return .{ .alloc = archive.allocator(), .fil = archive.fil, - .decomp = &archive.decomp, + .decomp = archive.decomp, .block_size = archive.super.block_size, .blocks = blocks, .size = size, @@ -91,10 +91,10 @@ fn advance(self: *DataReader) !void { } const tmp_buf = try self.alloc.alloc(u8, self.frag.?.size.size); defer self.alloc.free(tmp_buf); - var limit_rdr = Reader.limited(&rdr.interface, @enumFromInt(self.frag.?.size.size), tmp_buf); + try rdr.interface.readSliceAll(tmp_buf); const needed_block = try self.alloc.alloc(u8, self.frag_offset + cur_block_size); defer self.alloc.free(needed_block); - _ = try self.decomp.decompReader(&limit_rdr.interface, needed_block); + _ = try self.decomp(self.alloc, tmp_buf, needed_block); @memcpy(self.interface.buffer, needed_block[self.frag_offset..]); return; } @@ -109,9 +109,9 @@ fn advance(self: *DataReader) !void { try rdr.interface.readSliceAll(self.interface.buffer); return; } - var buf: [8192]u8 = undefined; //TODO: possibly change for better performance/memory usage. Might need to be a full block in size. - var limit_rdr = Reader.limited(&rdr.interface, @enumFromInt(block.size), &buf); - _ = try self.decomp.decompReader(&limit_rdr.interface, self.interface.buffer); + const tmp_buf = try self.alloc.alloc(u8, block.size); + defer self.alloc.free(tmp_buf); + _ = try self.decomp(self.alloc, tmp_buf, self.interface.buffer); } /// Does not guarentee that data currently in the buffer is retained. fn resizeBuffer(self: *DataReader, size: usize) !void { diff --git a/src/util/metadata.zig b/src/util/metadata.zig index bb0419f..2984c7c 100644 --- a/src/util/metadata.zig +++ b/src/util/metadata.zig @@ -4,7 +4,7 @@ const Writer = std.Io.Writer; const Limit = std.Io.Limit; const StreamError = std.Io.Reader.StreamError; -const DecompMgr = @import("../decomp.zig"); +const DecompFn = @import("../decomp.zig").DecompFn; const BlockHeader = packed struct { size: u15, @@ -15,14 +15,14 @@ const This = @This(); alloc: std.mem.Allocator, rdr: *Reader, -decomp: *DecompMgr, +decomp: DecompFn, buf: [8192]u8 = undefined, interface: Reader, err: ?anyerror = null, -pub fn init(alloc: std.mem.Allocator, rdr: *Reader, decomp: *DecompMgr) This { +pub fn init(alloc: std.mem.Allocator, rdr: *Reader, decomp: DecompFn) This { return .{ .alloc = alloc, .rdr = rdr, @@ -51,8 +51,8 @@ fn advance(self: *This) !void { return; } var tmp_buf: [8192]u8 = undefined; - var limit_rdr = self.rdr.limited(@enumFromInt(hdr.size), &tmp_buf); - self.interface.end = try self.decomp.decompReader(&limit_rdr.interface, &self.buf); + try self.rdr.readSliceAll(tmp_buf[0..hdr.size]); + self.interface.end = try self.decomp(self.alloc, tmp_buf[0..hdr.size], &self.buf); self.interface.buffer = self.buf[0..self.interface.end]; } From bcfd983f8d8dd3b1813e4abbc8712faa846e97f8 Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Sat, 7 Feb 2026 06:28:27 -0600 Subject: [PATCH 2/6] Fixed a handful of errors when extracting. Fixed issues with unsquashfs --- src/bin/unsquashfs.zig | 3 +++ src/decomp.zig | 14 +++++++++----- src/dir_entry.zig | 10 +++------- src/file.zig | 2 +- src/inode.zig | 5 ++++- src/test.zig | 4 ++-- src/util/data.zig | 1 + 7 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig index 4ff213e..0eb90d3 100644 --- a/src/bin/unsquashfs.zig +++ b/src/bin/unsquashfs.zig @@ -34,6 +34,7 @@ pub fn main() !void { fn handleArgs(alloc: std.mem.Allocator, out: *Writer) !void { var args = try std.process.argsWithAllocator(alloc); defer args.deinit(); + _ = args.next(); // args[0] is the application launch command. while (args.next()) |arg| { if (std.mem.eql(u8, arg, "-o")) { const nxt = args.next(); @@ -45,6 +46,7 @@ fn handleArgs(alloc: std.mem.Allocator, out: *Writer) !void { try out.print("-o must be followed by a number\n", .{}); return errors.InvalidArguments; }; + continue; } else if (std.mem.eql(u8, arg, "-d")) { const nxt = args.next(); if (nxt == null or nxt.?.len == 0) { @@ -52,6 +54,7 @@ fn handleArgs(alloc: std.mem.Allocator, out: *Writer) !void { return errors.InvalidArguments; } extLoc = nxt.?; + continue; } if (archive.len > 0) { try out.print("you can only provide one file at a time\n", .{}); diff --git a/src/decomp.zig b/src/decomp.zig index 97f7064..5e432da 100644 --- a/src/decomp.zig +++ b/src/decomp.zig @@ -18,9 +18,10 @@ pub const DecompFn = *const fn (alloc: std.mem.Allocator, in: []u8, out: []u8) a // }; pub fn gzipDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { - _ = alloc; var rdr: Reader = .fixed(in); - var decomp = std.compress.flate.Decompress.init(&rdr, .zlib, &[0]u8{}); + const buf = try alloc.alloc(u8, out.len); + defer alloc.free(buf); + var decomp = std.compress.flate.Decompress.init(&rdr, .zlib, buf); return decomp.reader.readSliceShort(out); } @@ -37,8 +38,11 @@ pub fn xzDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usiz } pub fn zstdDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { - _ = alloc; var rdr: Reader = .fixed(in); - var decomp = std.compress.zstd.Decompress.init(&rdr, &[0]u8{}, .{}); - return decomp.reader.readSliceShort(out); + const buf = try alloc.alloc(u8, std.compress.zstd.default_window_len + std.compress.zstd.block_size_max); + defer alloc.free(buf); + var decomp = std.compress.zstd.Decompress.init(&rdr, buf, .{}); + return decomp.reader.readSliceShort(out) catch |err| { + return decomp.err orelse err; + }; } diff --git a/src/dir_entry.zig b/src/dir_entry.zig index 4909f82..191da47 100644 --- a/src/dir_entry.zig +++ b/src/dir_entry.zig @@ -30,17 +30,13 @@ pub fn readDir(alloc: std.mem.Allocator, rdr: *Reader, size: u32) ![]Entry { var cur_red: u32 = 3; // start at 3 due to "." & ".." being counted in the dir size. var hdr: Header = undefined; var raw: RawEntry = undefined; - var out: std.ArrayList(Entry) = try .initCapacity(alloc, 25); // Start out with capacity instead of needing to allocate per header. + var out: std.ArrayList(Entry) = try .initCapacity(alloc, 100); // Start out with a decent capacity instead of needing to allocate per header. errdefer out.deinit(alloc); while (cur_red < size) { try rdr.readSliceEndian(Header, @ptrCast(&hdr), .little); cur_red += @sizeOf(Header); - const count = hdr.count + 1; - if (out.capacity < count) { - // Make sure we have at least 25 capacity past current count. - try out.ensureUnusedCapacity(alloc, ((count % 25) + 2) * 25); - } - for (0..count) |_| { + try out.ensureUnusedCapacity(alloc, hdr.count + 1); + for (0..hdr.count + 1) |_| { try rdr.readSliceEndian(RawEntry, @ptrCast(&raw), .little); const name = try alloc.alloc(u8, raw.name_size + 1); errdefer alloc.free(name); diff --git a/src/file.zig b/src/file.zig index c051533..0049692 100644 --- a/src/file.zig +++ b/src/file.zig @@ -42,7 +42,7 @@ pub fn init(archive: *Archive, inode: Inode, name: []const u8) !SfsFile { } pub fn fromEntry(archive: *Archive, entry: DirEntry) !SfsFile { var rdr = try archive.fil.readerAt(entry.block_start + archive.super.inode_start, &[0]u8{}); - var meta: MetadataReader = .init(archive.allocator(), &rdr.interface, &archive.decomp); + var meta: MetadataReader = .init(archive.allocator(), &rdr.interface, archive.decomp); try meta.interface.discardAll(entry.block_offset); const inode: Inode = try .read(archive.allocator(), &meta.interface, archive.super.block_size); errdefer inode.deinit(archive.allocator()); diff --git a/src/inode.zig b/src/inode.zig index 1b2a92d..1c11534 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -138,6 +138,9 @@ pub fn extractTo(self: Inode, archive: *Archive, path: []const u8, options: Extr .dir, .ext_dir => { // Removing any trailing separators since that's the easiest path forward. if (path[path.len - 1] == '/') return self.extractTo(archive, path[0 .. path.len - 1], options); + std.fs.cwd().makeDir(path) catch |err| { + if (err != std.fs.Dir.MakeError.PathAlreadyExists) return err; + }; var alloc = archive.allocator(); const entries = try self.dirEntries(archive); defer { @@ -218,7 +221,7 @@ fn extractRegFile(self: Inode, archive: *Archive, path: []const u8, options: Ext _ = try dat_rdr.interface.streamRemaining(&wrt.interface); try wrt.interface.flush(); // updateTime is in nanoseconds (a billionth of a second). mod_time is in seconds. - try fil.updateTimes(self.hdr.mod_time * (10 ^ 9), self.hdr.mod_time * (10 ^ 9)); + try fil.updateTimes(self.hdr.mod_time, self.hdr.mod_time); if (!options.ignore_permissions) { try fil.chmod(self.hdr.permissions); try fil.chown(try archive.id(self.hdr.uid_idx), try archive.id(self.hdr.gid_idx)); diff --git a/src/test.zig b/src/test.zig index fc99fbd..4bb58cb 100644 --- a/src/test.zig +++ b/src/test.zig @@ -28,7 +28,7 @@ test "ExtractSingleFile" { defer sfs.deinit(); var test_fil = try sfs.open(TestFile); defer test_fil.deinit(); - try test_fil.extract(TestFileExtractLocation, .VerboseDefault); + try test_fil.extract(TestFileExtractLocation, .Default); //TODO: validate extracted file. } @@ -40,7 +40,7 @@ test "ExtractCompleteArchive" { defer fil.close(); var sfs: Archive = try .init(std.testing.allocator, fil); defer sfs.deinit(); - try sfs.extract(TestFullExtractLocation, .VerboseDefault); + try sfs.extract(TestFullExtractLocation, .Default); } const LinuxPATestCorrectSuperblock: Superblock = .{ diff --git a/src/util/data.zig b/src/util/data.zig index ae7c424..aea7310 100644 --- a/src/util/data.zig +++ b/src/util/data.zig @@ -111,6 +111,7 @@ fn advance(self: *DataReader) !void { } const tmp_buf = try self.alloc.alloc(u8, block.size); defer self.alloc.free(tmp_buf); + try rdr.interface.readSliceAll(tmp_buf); _ = try self.decomp(self.alloc, tmp_buf, self.interface.buffer); } /// Does not guarentee that data currently in the buffer is retained. From 704215e1a9b2f12202b8257776b46ce60f4f4e47 Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Sat, 7 Feb 2026 06:42:34 -0600 Subject: [PATCH 3/6] Remove updateTimes because I was running into integer overlow issues. --- README.md | 2 +- src/inode.zig | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 827dfbd..e56856e 100644 --- a/README.md +++ b/README.md @@ -4,4 +4,4 @@ This is my experiments to learn Zig. Might amount to something. Might not. ## Current State -Kinda works as a library, but currently has known memory leaks. `unsquashfs` is missing a lot of features (and will probably never match the official unsquashfs). Extraction is stupidly slow and uses too many resources. Only properly work on Linux, any other OSes probably won't work fully. +Overall works, but currently is completely single threaded and is missing some features. Extraction is slow. Only properly work on Linux, any other OSes probably won't work fully. diff --git a/src/inode.zig b/src/inode.zig index 1c11534..0468e88 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -221,7 +221,8 @@ fn extractRegFile(self: Inode, archive: *Archive, path: []const u8, options: Ext _ = try dat_rdr.interface.streamRemaining(&wrt.interface); try wrt.interface.flush(); // updateTime is in nanoseconds (a billionth of a second). mod_time is in seconds. - try fil.updateTimes(self.hdr.mod_time, self.hdr.mod_time); + // TODO: fix + // try fil.updateTimes(self.hdr.mod_time, self.hdr.mod_time); if (!options.ignore_permissions) { try fil.chmod(self.hdr.permissions); try fil.chown(try archive.id(self.hdr.uid_idx), try archive.id(self.hdr.gid_idx)); @@ -305,7 +306,8 @@ fn extractDevice(self: Inode, archive: *Archive, path: []const u8, options: Extr } var fil = try std.fs.cwd().openFile(path, .{}); // updateTime is in nanoseconds (a billionth of a second). mod_time is in seconds. - try fil.updateTimes(self.hdr.mod_time * (10 ^ 9), self.hdr.mod_time * (10 ^ 9)); + // TODO: fix + // try fil.updateTimes(self.hdr.mod_time, self.hdr.mod_time); if (!options.ignore_permissions) { try fil.chmod(self.hdr.permissions); try fil.chown(try archive.id(self.hdr.uid_idx), try archive.id(self.hdr.gid_idx)); From b64a3ec44a6c85f452a846f8dae5a2e3fe6a1013 Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Sat, 7 Feb 2026 06:57:55 -0600 Subject: [PATCH 4/6] Archive.extract now directly uses Inode instead of File. --- build.zig | 4 +++- src/archive.zig | 23 ++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/build.zig b/build.zig index 0d51302..8212fbd 100644 --- a/build.zig +++ b/build.zig @@ -1,14 +1,16 @@ const std = @import("std"); pub fn build(b: *std.Build) void { + const static = b.option(bool, "static_build", "Build static"); const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseFast }); const linkage: std.builtin.LinkMode = .static; // TODO: Add argument to set link mode. + const use_c_libs: bool = false; + _ = use_c_libs; const mod = b.addModule("zig_squashfs", .{ .root_source_file = b.path("src/root.zig"), .target = target, .optimize = optimize, - // .imports = &.{}, }); const exe = b.addExecutable(.{ .name = "unsquashfs", diff --git a/src/archive.zig b/src/archive.zig index 5dab2ad..ec41ca2 100644 --- a/src/archive.zig +++ b/src/archive.zig @@ -141,7 +141,24 @@ pub fn open(self: *Archive, path: []const u8) !SfsFile { pub fn extract(self: *Archive, path: []const u8, options: ExtractionOptions) !void { if (!self.setup) try self.setupValues(); - var root_fil = try self.root(); - defer root_fil.deinit(); - return root_fil.extract(path, options); + var alloc = self.allocator(); + var ext_path: []u8 = undefined; + if (std.fs.cwd().statFile(path)) |stat| { + if (stat.kind == .directory) { + ext_path = @constCast(path); + } else return error.ExtractionPathExists; + } else |err| { + if (err == error.FileNotFound) { + ext_path = @constCast(path); + } else { + std.log.err("Error stat-ing extraction path {s}: {}\n", .{ path, err }); + return err; + } + } + defer if (ext_path.len > path.len) alloc.free(ext_path); + var rdr = try self.fil.readerAt(self.super.root_ref.block_start + self.super.inode_start, &[0]u8{}); + var meta: MetadataReader = .init(self.allocator(), &rdr.interface, self.decomp); + try meta.interface.discardAll(self.super.root_ref.block_offset); + const in: Inode = try .read(self.allocator(), &meta.interface, self.super.block_size); + try in.extractTo(self, ext_path, options); } From 067eaa87c276cac2081a21bf45a6a0638d774131 Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Sat, 7 Feb 2026 10:58:32 -0600 Subject: [PATCH 5/6] You can now set when building to use c or zig libraries. --- build.zig | 43 +++++++++----- src/bin/unsquashfs.zig | 16 +++++- src/decomp.zig | 126 ++++++++++++++++++++++++++++++++++++++--- src/util/data.zig | 4 +- 4 files changed, 164 insertions(+), 25 deletions(-) diff --git a/build.zig b/build.zig index 8212fbd..e75861a 100644 --- a/build.zig +++ b/build.zig @@ -1,29 +1,44 @@ const std = @import("std"); -pub fn build(b: *std.Build) void { - const static = b.option(bool, "static_build", "Build static"); +pub fn build(b: *std.Build) !void { + const static_option = b.option(bool, "static_build", "Build static"); + const use_c_libs_option = b.option(bool, "use_c_libs", "Use C versions of decompression libraries instead of the Zig standard library ones"); + const version_string_option = b.option([]const u8, "version", "Version of the library/binary"); + + const zig_squashfs_options = b.addOptions(); + zig_squashfs_options.addOption(bool, "use_c_libs", use_c_libs_option orelse false); + const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseFast }); - const linkage: std.builtin.LinkMode = .static; // TODO: Add argument to set link mode. - const use_c_libs: bool = false; - _ = use_c_libs; const mod = b.addModule("zig_squashfs", .{ .root_source_file = b.path("src/root.zig"), .target = target, .optimize = optimize, + .link_libc = if (use_c_libs_option == true) true else false, }); + mod.addOptions("config", zig_squashfs_options); + if (use_c_libs_option == true) + mod.linkSystemLibrary("zstd", .{}); + + const unsquashfs_options = b.addOptions(); + unsquashfs_options.addOption(std.SemanticVersion, "version_string", try std.SemanticVersion.parse(version_string_option orelse "0.0.0-testing")); + + var exe_mod = b.createModule(.{ + .root_source_file = b.path("src/bin/unsquashfs.zig"), + .target = target, + .optimize = optimize, + .link_libc = if (use_c_libs_option == true) true else false, + .imports = &.{ + .{ .name = "zig_squashfs", .module = mod }, + }, + }); + exe_mod.addOptions("config", unsquashfs_options); const exe = b.addExecutable(.{ .name = "unsquashfs", - .linkage = linkage, - .root_module = b.createModule(.{ - .root_source_file = b.path("src/bin/unsquashfs.zig"), - .target = target, - .optimize = optimize, - .imports = &.{ - .{ .name = "zig_squashfs", .module = mod }, - }, - }), + .linkage = if (static_option == true) .static else .dynamic, + .root_module = exe_mod, }); + b.installArtifact(exe); const run_step = b.step("run", "Run the app"); const run_cmd = b.addRunArtifact(exe); diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig index 0eb90d3..41b844f 100644 --- a/src/bin/unsquashfs.zig +++ b/src/bin/unsquashfs.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Writer = std.Io.Writer; +const config = @import("config"); const squashfs = @import("zig_squashfs"); //TODO: Add more options @@ -8,8 +9,11 @@ const help_mgs = \\Usage: unsquashfs [options] \\ \\Options: - \\ -o Start reading the archive at the given offset. \\ -d Extract to the given location instead of "squashfs-root" + \\ + \\ -o Start reading the archive at the given offset. + \\ + \\ --version Display the version ; const errors = error{InvalidArguments}; @@ -24,6 +28,10 @@ pub fn main() !void { var out = stdout.writer(&[0]u8{}); defer out.interface.flush() catch {}; try handleArgs(alloc, &out.interface); + if (archive.len == 0) { + try out.interface.print("You must provide a squashfs archive\n", .{}); + return; + } var fil: std.fs.File = try std.fs.cwd().openFile(archive, .{}); //TODO: Handle error gracefully. defer fil.close(); var arc: squashfs.Archive = try .initAdvanced(alloc, fil, offset, try std.Thread.getCpuCount(), 0); //TODO: Update when memory size matters. //TODO: Handle error gracefully. @@ -55,6 +63,12 @@ fn handleArgs(alloc: std.mem.Allocator, out: *Writer) !void { } extLoc = nxt.?; continue; + } else if (std.mem.eql(u8, arg, "--version")) { + _ = try out.write("v"); + try config.version_string.format(out); + _ = try out.write("\n"); + std.process.cleanExit(); + return; } if (archive.len > 0) { try out.print("you can only provide one file at a time\n", .{}); diff --git a/src/decomp.zig b/src/decomp.zig index 5e432da..975d0f8 100644 --- a/src/decomp.zig +++ b/src/decomp.zig @@ -1,6 +1,15 @@ +//! Implementations for decompression. +//! TODO: change to vtable interface to allow for shared decompressors for better performance/resource usage. + const std = @import("std"); const Reader = std.Io.Reader; +const config = @import("config"); + +const c = @cImport({ + @cInclude("zstd.h"); +}); + pub const CompressionType = enum(u16) { gzip = 1, lzma, @@ -12,32 +21,53 @@ pub const CompressionType = enum(u16) { pub const DecompFn = *const fn (alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize; // TODO: replace anyerror to definitive error types. -// pub const DecompressError = error{ -// ReadFailed, -// anyerror, -// }; +pub const gzipDecompress = if (config.use_c_libs) cGzip else zigGzip; -pub fn gzipDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { +fn zigGzip(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { var rdr: Reader = .fixed(in); const buf = try alloc.alloc(u8, out.len); defer alloc.free(buf); var decomp = std.compress.flate.Decompress.init(&rdr, .zlib, buf); return decomp.reader.readSliceShort(out); } +fn cGzip(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { + _ = alloc; + _ = in; + _ = out; + return error.TODO; +} -pub fn lzmaDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { +pub const lzmaDecompress = if (config.use_c_libs) cLzma else zigLzma; + +fn zigLzma(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { var rdr: Reader = .fixed(in); var decomp = try std.compress.lzma.decompress(alloc, rdr.adaptToOldInterface()); return decomp.read(out); } +fn cLzma(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { + _ = alloc; + _ = in; + _ = out; + return error.TODO; +} -pub fn xzDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { +pub const xzDecompress = if (config.use_c_libs) cXz else zigXz; + +fn zigXz(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { var rdr: Reader = .fixed(in); var decomp = try std.compress.xz.decompress(alloc, rdr.adaptToOldInterface()); return decomp.read(out); } +fn cXz(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { + _ = alloc; + _ = in; + _ = out; + return error.TODO; +} -pub fn zstdDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { +pub const zstdDecompress = if (config.use_c_libs) cZstd else zigZstd; + +pub fn zigZstd(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { var rdr: Reader = .fixed(in); const buf = try alloc.alloc(u8, std.compress.zstd.default_window_len + std.compress.zstd.block_size_max); defer alloc.free(buf); @@ -46,3 +76,83 @@ pub fn zstdDecompress(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!us return decomp.err orelse err; }; } +fn cZstd(alloc: std.mem.Allocator, in: []u8, out: []u8) anyerror!usize { + _ = alloc; + const res = c.ZSTD_decompress(out.ptr, out.len, in.ptr, in.len); + if (c.ZSTD_isError(res) == 0) return res; + return switch (c.ZSTD_getErrorCode(res)) { + c.ZSTD_error_prefix_unknown => cZstdError.PrefixUnknown, + c.ZSTD_error_version_unsupported => cZstdError.VersionUnsupported, + c.ZSTD_error_frameParameter_unsupported => cZstdError.FrameParameterUnsupported, + c.ZSTD_error_frameParameter_windowTooLarge => cZstdError.FrameParameterWindowTooLarge, + c.ZSTD_error_corruption_detected => cZstdError.CorruptionDetected, + c.ZSTD_error_checksum_wrong => cZstdError.ChecksumWrong, + c.ZSTD_error_literals_headerWrong => cZstdError.LiteralsHeaderWrong, + c.ZSTD_error_dictionary_corrupted => cZstdError.DictionaryCorrupted, + c.ZSTD_error_dictionary_wrong => cZstdError.DictionaryWrong, + c.ZSTD_error_dictionaryCreation_failed => cZstdError.DictionaryCreationFailed, + c.ZSTD_error_parameter_unsupported => cZstdError.ParameterUnsupported, + c.ZSTD_error_parameter_combination_unsupported => cZstdError.ParameterCombinationUnsupported, + c.ZSTD_error_parameter_outOfBound => cZstdError.ParameterOutOfBound, + c.ZSTD_error_tableLog_tooLarge => cZstdError.TableLogTooLarge, + c.ZSTD_error_maxSymbolValue_tooLarge => cZstdError.MaxSymbolValueTooLarge, + c.ZSTD_error_maxSymbolValue_tooSmall => cZstdError.MaxSymbolValueTooSmall, + c.ZSTD_error_cannotProduce_uncompressedBlock => cZstdError.CannotProduceUncompressedBlock, + c.ZSTD_error_stabilityCondition_notRespected => cZstdError.StabilityConditionNotRespected, + c.ZSTD_error_stage_wrong => cZstdError.StageWrong, + c.ZSTD_error_init_missing => cZstdError.InitMissing, + c.ZSTD_error_memory_allocation => cZstdError.MemoryAllocation, + c.ZSTD_error_workSpace_tooSmall => cZstdError.WorkSpaceTooSmall, + c.ZSTD_error_dstSize_tooSmall => cZstdError.DstSizeTooSmall, + c.ZSTD_error_srcSize_wrong => cZstdError.SrcSizeWrong, + c.ZSTD_error_dstBuffer_null => cZstdError.DstBufferNull, + c.ZSTD_error_noForwardProgress_destFull => cZstdError.NoForwardProgressDestFull, + c.ZSTD_error_noForwardProgress_inputEmpty => cZstdError.NoForwardProgressInputEmpty, + c.ZSTD_error_frameIndex_tooLarge => cZstdError.FrameIndexTooLarge, + c.ZSTD_error_seekableIO => cZstdError.SeekableIo, + c.ZSTD_error_dstBuffer_wrong => cZstdError.DstBufferWrong, + c.ZSTD_error_srcBuffer_wrong => cZstdError.SrcBufferWrong, + c.ZSTD_error_sequenceProducer_failed => cZstdError.SequenceProducerFailed, + c.ZSTD_error_externalSequences_invalid => cZstdError.ExternalSequencesInvalid, + c.ZSTD_error_maxCode => cZstdError.MaxCode, + else => cZstdError.Generic, + }; +} + +pub const cZstdError = error{ + Generic, + PrefixUnknown, + VersionUnsupported, + FrameParameterUnsupported, + FrameParameterWindowTooLarge, + CorruptionDetected, + ChecksumWrong, + LiteralsHeaderWrong, + DictionaryCorrupted, + DictionaryWrong, + DictionaryCreationFailed, + ParameterUnsupported, + ParameterCombinationUnsupported, + ParameterOutOfBound, + TableLogTooLarge, + MaxSymbolValueTooLarge, + MaxSymbolValueTooSmall, + CannotProduceUncompressedBlock, + StabilityConditionNotRespected, + StageWrong, + InitMissing, + MemoryAllocation, + WorkSpaceTooSmall, + DstSizeTooSmall, + SrcSizeWrong, + DstBufferNull, + NoForwardProgressDestFull, + NoForwardProgressInputEmpty, + FrameIndexTooLarge, + SeekableIo, + DstBufferWrong, + SrcBufferWrong, + SequenceProducerFailed, + ExternalSequencesInvalid, + MaxCode, +}; diff --git a/src/util/data.zig b/src/util/data.zig index aea7310..2cde849 100644 --- a/src/util/data.zig +++ b/src/util/data.zig @@ -92,10 +92,10 @@ fn advance(self: *DataReader) !void { const tmp_buf = try self.alloc.alloc(u8, self.frag.?.size.size); defer self.alloc.free(tmp_buf); try rdr.interface.readSliceAll(tmp_buf); - const needed_block = try self.alloc.alloc(u8, self.frag_offset + cur_block_size); + const needed_block = try self.alloc.alloc(u8, self.block_size); defer self.alloc.free(needed_block); _ = try self.decomp(self.alloc, tmp_buf, needed_block); - @memcpy(self.interface.buffer, needed_block[self.frag_offset..]); + @memcpy(self.interface.buffer, needed_block[self.frag_offset .. self.frag_offset + cur_block_size]); return; } const block = self.blocks[self.block_idx]; From db2fb4b9f25a6eadff5103566ab6ad63e6d2e934 Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Sat, 7 Feb 2026 11:12:48 -0600 Subject: [PATCH 6/6] Fixed compilation when using zig test. Remove option for static since -static should suffice in most situations --- README.md | 21 +++++++++++++++++++++ build.zig | 2 -- run_tests.sh | 3 +++ src/decomp.zig | 3 ++- 4 files changed, 26 insertions(+), 3 deletions(-) create mode 100755 run_tests.sh diff --git a/README.md b/README.md index e56856e..3174216 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,27 @@ This is my experiments to learn Zig. Might amount to something. Might not. +A library and application to decompress or view squashfs archives. + ## Current State Overall works, but currently is completely single threaded and is missing some features. Extraction is slow. Only properly work on Linux, any other OSes probably won't work fully. + +## Build options + +> `-Duse_c_libs` + +Instead of using Zig's standard library for decompression + +> `Dversion` + +Sets the version of `unsquashfs` shown when `--version` is passed. + +## Capabilities + +Most features are present except for the following: + +* mod_time is not set on extraction +* xattrs are not applied on extraction +* Only zstd c library is implemented (all others result in error.TODO). +* When using Zig decompression libraries then lzo and lz4 compression types are unavailable. I don't _really_ plan on spending the time to find and validate a library since neither is popular. diff --git a/build.zig b/build.zig index e75861a..342648c 100644 --- a/build.zig +++ b/build.zig @@ -1,7 +1,6 @@ const std = @import("std"); pub fn build(b: *std.Build) !void { - const static_option = b.option(bool, "static_build", "Build static"); const use_c_libs_option = b.option(bool, "use_c_libs", "Use C versions of decompression libraries instead of the Zig standard library ones"); const version_string_option = b.option([]const u8, "version", "Version of the library/binary"); @@ -35,7 +34,6 @@ pub fn build(b: *std.Build) !void { exe_mod.addOptions("config", unsquashfs_options); const exe = b.addExecutable(.{ .name = "unsquashfs", - .linkage = if (static_option == true) .static else .dynamic, .root_module = exe_mod, }); diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 0000000..ec0ff6e --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +zig test -lc -lzstd src/test.zig diff --git a/src/decomp.zig b/src/decomp.zig index 975d0f8..7357a85 100644 --- a/src/decomp.zig +++ b/src/decomp.zig @@ -3,8 +3,9 @@ const std = @import("std"); const Reader = std.Io.Reader; +const builtin = @import("builtin"); -const config = @import("config"); +const config = if (builtin.is_test) .{ .use_c_libs = true } else @import("config"); const c = @cImport({ @cInclude("zstd.h");