From 5c14b7db48db3a197e17bb6a42654219abb43eca Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Wed, 9 Jul 2025 06:42:02 -0500 Subject: [PATCH 01/15] Try 4, lol. --- build.zig | 3 +- src/bin/unsquashfs.zig | 3 + src/decompress.zig | 85 -------- src/directory.zig | 66 ------ src/file.zig | 361 --------------------------------- src/fragment.zig | 26 --- src/inode.zig | 85 ++++++++ src/inode/dir.zig | 40 ++-- src/inode/file.zig | 87 ++++---- src/inode/inode.zig | 103 ---------- src/inode/misc.zig | 85 ++++++-- src/inode/sym.zig | 48 ----- src/reader.zig | 142 ++----------- src/reader/metadata.zig | 43 ++++ src/readers/data_extractor.zig | 202 ------------------ src/readers/data_reader.zig | 164 --------------- src/readers/file_holder.zig | 90 -------- src/readers/metadata.zig | 77 ------- src/root.zig | 1 - src/superblock.zig | 82 +++++--- src/table.zig | 56 ----- src/zig_unsquashfs.zig | 203 ------------------ 22 files changed, 331 insertions(+), 1721 deletions(-) create mode 100644 src/bin/unsquashfs.zig delete mode 100644 src/decompress.zig delete mode 100644 src/directory.zig delete mode 100644 src/file.zig delete mode 100644 src/fragment.zig create mode 100644 src/inode.zig delete mode 100644 src/inode/inode.zig delete mode 100644 src/inode/sym.zig create mode 100644 src/reader/metadata.zig delete mode 100644 src/readers/data_extractor.zig delete mode 100644 src/readers/data_reader.zig delete mode 100644 src/readers/file_holder.zig delete mode 100644 src/readers/metadata.zig delete mode 100644 src/table.zig delete mode 100644 src/zig_unsquashfs.zig diff --git a/build.zig b/build.zig index 070544a..c682bc7 100644 --- a/build.zig +++ b/build.zig @@ -23,10 +23,11 @@ pub fn build(b: *std.Build) !void { }); const exe_mod = b.createModule(.{ - .root_source_file = b.path("src/zig_unsquashfs.zig"), + .root_source_file = b.path("src/bin/unsquashfs.zig"), .target = target, .optimize = optimize, }); + exe_mod.addImport("squashfs", lib_mod); exe_mod.addOptions("config", opt); const exe = b.addExecutable(.{ .linkage = .static, diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig new file mode 100644 index 0000000..66a7fbc --- /dev/null +++ b/src/bin/unsquashfs.zig @@ -0,0 +1,3 @@ +const import = @import("std"); + +pub fn main() void {} diff --git a/src/decompress.zig b/src/decompress.zig deleted file mode 100644 index d35a355..0000000 --- a/src/decompress.zig +++ /dev/null @@ -1,85 +0,0 @@ -const std = @import("std"); -const io = std.io; -const compress = std.compress; - -const DecompressError = error{ - LzoUnsupported, - Lz4Unsupported, -}; - -pub const DecompressType = enum(u16) { - zlib = 1, - lzma, - lzo, - xz, - lz4, - zstd, - - pub fn decompress(self: DecompressType, alloc: std.mem.Allocator, rdr: io.AnyReader) !std.ArrayList(u8) { - var out = std.ArrayList(u8).init(alloc); - errdefer out.deinit(); - switch (self) { - .zlib => try compress.zlib.decompress(rdr, out.writer()), - .lzma => { - var decomp = try compress.lzma.decompress(alloc, rdr); - defer decomp.deinit(); - try decomp.reader().readAllArrayList(&out, 1024 * 1024); - }, - .lzo => return DecompressError.LzoUnsupported, - .xz => { - var decomp = try compress.xz.decompress(alloc, rdr); - defer decomp.deinit(); - try decomp.reader().readAllArrayList(&out, 1024 * 1024); - }, - .lz4 => return DecompressError.Lz4Unsupported, - .zstd => { - const buf = try alloc.alloc(u8, compress.zstd.DecompressorOptions.default_window_buffer_len); - defer alloc.free(buf); - var decomp = compress.zstd.decompressor(rdr, .{ - .window_buffer = buf, - }); - try decomp.reader().readAllArrayList(&out, 1024 * 1024); - }, - } - return out; - } - - pub fn decompressTo(self: DecompressType, alloc: std.mem.Allocator, rdr: io.AnyReader, writer: io.AnyWriter) anyerror!void { - const buf_size: usize = 8192; - switch (self) { - .zlib => try compress.zlib.decompress(rdr, writer), - .lzma => { - var decomp = try compress.lzma.decompress(alloc, rdr); - defer decomp.deinit(); - var buf: [buf_size]u8 = undefined; - var red = try decomp.read(&buf); - while (red > 0) : (red = try decomp.read(&buf)) { - _ = try writer.writeAll(&buf); - } - }, - .lzo => return DecompressError.LzoUnsupported, - .xz => { - var decomp = try compress.xz.decompress(alloc, rdr); - defer decomp.deinit(); - var buf: [buf_size]u8 = undefined; - var red = try decomp.read(&buf); - while (red > 0) : (red = try decomp.read(&buf)) { - _ = try writer.writeAll(&buf); - } - }, - .lz4 => return DecompressError.Lz4Unsupported, - .zstd => { - const window_buf = try alloc.alloc(u8, compress.zstd.DecompressorOptions.default_window_buffer_len); - defer alloc.free(window_buf); - var decomp = compress.zstd.decompressor(rdr, .{ - .window_buffer = window_buf, - }); - var buf: [buf_size]u8 = undefined; - var red = try decomp.read(&buf); - while (red > 0) : (red = try decomp.read(&buf)) { - _ = try writer.writeAll(&buf); - } - }, - } - } -}; diff --git a/src/directory.zig b/src/directory.zig deleted file mode 100644 index 9ed1fad..0000000 --- a/src/directory.zig +++ /dev/null @@ -1,66 +0,0 @@ -const std = @import("std"); -const io = std.io; - -const InodeType = @import("inode/inode.zig").InodeType; - -const DirHeader = extern struct { - count: u32, - inode_block_start: u32, - inode_num: u32, -}; - -const RawDirEntryStart = packed struct { - inode_block_offset: u16, - /// Difference from the current DirHeader inode_num - inode_num_difference: i16, - /// Extended inodes will be their basic type. - inode_type: InodeType, - name_size: u16, -}; - -pub const DirEntry = struct { - block_start: u32, - offset: u16, - inode_num: u32, - name: []const u8, - - fn init(alloc: std.mem.Allocator, hdr: DirHeader, rdr: io.AnyReader) !DirEntry { - const raw = try rdr.readStruct(RawDirEntryStart); - const name = try alloc.alloc(u8, raw.name_size + 1); - errdefer alloc.free(name); - _ = try rdr.read(name); - return .{ - .block_start = hdr.inode_block_start, - .offset = raw.inode_block_offset, - .inode_num = if (raw.inode_num_difference > 0) - hdr.inode_num + @abs(raw.inode_num_difference) - else - hdr.inode_num - @abs(raw.inode_num_difference), - .name = name, - }; - } - - pub fn deinit(self: DirEntry, alloc: std.mem.Allocator) void { - alloc.free(self.name); - } -}; - -pub fn readDirectory(alloc: std.mem.Allocator, rdr: io.AnyReader, size: u64) !std.StringHashMap(DirEntry) { - var out: std.StringHashMap(DirEntry) = .init(alloc); - errdefer out.deinit(); - var red_size: u64 = 3; - var hdr: DirHeader = undefined; - while (red_size < size) { - hdr = try rdr.readStruct(DirHeader); - red_size += 12; - var i: u32 = 0; - try out.ensureUnusedCapacity(hdr.count + 1); - while (i <= hdr.count) : (i += 1) { - var tmp: DirEntry = try .init(alloc, hdr, rdr); - errdefer tmp.deinit(alloc); - out.putAssumeCapacity(tmp.name, tmp); - red_size += 8 + tmp.name.len; - } - } - return out; -} diff --git a/src/file.zig b/src/file.zig deleted file mode 100644 index a685797..0000000 --- a/src/file.zig +++ /dev/null @@ -1,361 +0,0 @@ -const std = @import("std"); -const io = std.io; -const fs = std.fs; -const builtin = @import("builtin"); - -const inode = @import("inode/inode.zig"); -const directory = @import("directory.zig"); - -const Reader = @import("reader.zig").Reader; -const DirEntry = @import("directory.zig").DirEntry; -const DataReader = @import("readers/data_reader.zig").DataReader; -const DataExtractor = @import("readers/data_extractor.zig").DataExtractor; -const MetadataReader = @import("readers/metadata.zig").MetadataReader; - -/// A file or directory inside of a squashfs. -/// Make sure to call deinit(); -pub const File = struct { - name: []const u8, - inode: inode.Inode, - parent_path: []const u8, - - dirEntries: ?std.StringHashMap(DirEntry) = null, - data_rdr: ?DataReader = null, - - pub const FileError = error{ - NotDirectory, - NotNormalFile, - NotSymlink, - NotFound, - }; - - fn fromDirEntry(rdr: *Reader, ent: DirEntry, parent_path: []const u8) !File { - var offset_rdr = rdr.holder.readerAt(ent.block_start + rdr.super.inode_table_start); - var meta_rdr: MetadataReader = .init( - rdr.alloc, - rdr.super.decomp, - offset_rdr.any(), - ); - defer meta_rdr.deinit(); - try meta_rdr.skip(ent.offset); - const name = try rdr.alloc.alloc(u8, ent.name.len); - errdefer rdr.alloc.free(name); - @memcpy(name, ent.name); - var out: File = .{ - .name = name, - .inode = try .init( - rdr.alloc, - meta_rdr.any(), - rdr.super.block_size, - ), - .parent_path = parent_path, - }; - switch (out.inode.header.inode_type) { - .file, .ext_file => { - out.data_rdr = try .init(&out, rdr); - }, - else => {}, - } - return out; - } - - pub fn file_path(self: File, alloc: std.mem.Allocator) ![]u8 { - if (self.parent_path.len == 0) { - const out = try alloc.alloc(u8, self.name.len); - @memcpy(out, self.name); - return out; - } - return std.mem.concat(alloc, u8, &[3][]const u8{ self.parent_path, "/", self.name }); - } - - pub fn uid(self: File, rdr: *Reader) !u32 { - return rdr.id_table.getValue(rdr, self.inode.header.uid_idx); - } - - pub fn gid(self: File, rdr: *Reader) !u32 { - return rdr.id_table.getValue(rdr, self.inode.header.gid_idx); - } - - pub fn deinit(self: *File, alloc: std.mem.Allocator) void { - self.inode.deinit(); - alloc.free(self.name); - alloc.free(self.parent_path); - if (self.data_rdr != null) self.data_rdr.?.deinit(); - if (self.dirEntries != null) { - var iter = self.dirEntries.?.iterator(); - while (iter.next()) |ent| { - ent.value_ptr.deinit(alloc); - } - self.dirEntries.?.deinit(); - } - } - - pub fn isDir(self: File) bool { - return switch (self.inode.header.inode_type) { - .dir, .ext_dir => true, - else => false, - }; - } - - /// If the File is a directory, tries to return the file at path. - /// An empty path returns itself. - pub fn open(self: *File, rdr: *Reader, path: []const u8) !File { - return self.realOpen(rdr, path, true); - } - - fn realOpen(self: *File, rdr: *Reader, path: []const u8, first: bool) (FileError || anyerror)!File { - const clean_path: []const u8 = std.mem.trim(u8, path, "/"); - if (clean_path.len == 0) { - return self.*; - } - defer if (!first) self.deinit(rdr.alloc); - switch (self.inode.header.inode_type) { - .dir, .ext_dir => {}, - else => return FileError.NotDirectory, - } - try self.readDirEntries(rdr); - const split_idx = std.mem.indexOf(u8, clean_path, "/") orelse clean_path.len; - const name = clean_path[0..split_idx]; - const ent = self.dirEntries.?.get(name); - if (ent == null) { - return FileError.NotFound; - } - var fil = try fromDirEntry(rdr, ent.?, try self.file_path(rdr.alloc)); - return fil.realOpen(rdr, clean_path[split_idx..], false); - } - - /// If the File is a symlink, returns the symlink's target path. - pub fn symPath(self: File) (FileError || anyerror)![]const u8 { - return switch (self.inode.data) { - .sym => |s| s.target, - .ext_sym => |s| s.target, - else => FileError.NotSymlink, - }; - } - - /// If the File is a directory, returns an iterator that iterates over it's children. - pub fn iterator(self: *File, rdr: *Reader) (FileError || anyerror)!FileIterator { - switch (self.inode.header.inode_type) { - .dir, .ext_dir => {}, - else => return FileError.NotDirectory, - } - try self.readDirEntries(rdr); - var files = try rdr.alloc.alloc(File, self.dirEntries.?.count()); - errdefer rdr.alloc.free(files); - var dirEntryIter = self.dirEntries.?.valueIterator(); - var i: u32 = 0; - while (dirEntryIter.next()) |ent| : (i += 1) { - files[i] = try .fromDirEntry(rdr, ent.*, try self.file_path(rdr.alloc)); - } - return .{ - .alloc = rdr.alloc, - .files = files, - }; - } - - fn readDirEntries(self: *File, rdr: *Reader) (FileError || anyerror)!void { - if (self.dirEntries != null) return; - var block_start: u32 = 0; - var offset: u16 = 0; - var siz: u32 = 0; - switch (self.inode.data) { - .dir => |d| { - block_start = d.block_start; - offset = d.offset; - siz = d.size; - }, - .ext_dir => |d| { - block_start = d.block_start; - offset = d.offset; - siz = d.size; - }, - else => return FileError.NotDirectory, - } - var offset_rdr = rdr.holder.readerAt(rdr.super.dir_table_start + block_start); - var meta_rdr: MetadataReader = .init( - rdr.alloc, - rdr.super.decomp, - offset_rdr.any(), - ); - defer meta_rdr.deinit(); - try meta_rdr.skip(offset); - self.dirEntries = try directory.readDirectory(rdr.alloc, meta_rdr.any(), siz); - } - - pub fn size(self: File) u64 { - return switch (self.inode.data) { - .file => |f| f.size, - .ext_file => |f| f.size, - else => 0, - }; - } - - /// If the file is a normal file, reads it's data. - pub fn read(self: *File, bytes: []u8) (FileError || anyerror)!usize { - if (self.data_rdr == null) { - return FileError.NotNormalFile; - } - return self.data_rdr.?.read(bytes); - } - - const FileReader = io.GenericReader(*File, (FileError || anyerror), read); - - pub fn reader(self: *File) FileReader { - return .{ - .context = self, - }; - } - - fn extractor(self: *File, rdr: *Reader) !DataExtractor { - return .init(self, rdr); - } - - pub const ExtractConfig = struct { - /// The amount of worker threads to spawn. Defaults to your cpu core count. - thread_count: u16, - /// The maximum amount of additional memory this extraction will use. - /// Default is 1GB or a quarter of your system memory, whichever is smaller. - /// Actually memory usage will be higher, as this does not account of vaious metadata (such as file names). - max_mem: u64, - deref_sym: bool = false, - unbreak_sym: bool = false, - verbose: bool = false, - pub fn init() !ExtractConfig { - const sys_mem = try std.process.totalSystemMemory(); - return .{ - .thread_count = @truncate(try std.Thread.getCpuCount()), - .max_mem = @min(sys_mem / 4, 1024 * 1024 * 1024), - }; - } - }; - - pub const ExtractError = error{ - FileExists, - }; - - /// Extract's the File to the path. - pub fn extract(self: *File, rdr: *Reader, config: ExtractConfig, path: []const u8) (ExtractError || anyerror)!void { - var pol: std.Thread.Pool = undefined; - try pol.init(.{ - .allocator = rdr.alloc, - .n_jobs = config.thread_count, - }); - defer pol.deinit(); - return self.extractReal(rdr, config, &pol, path, true); - } - - fn extractReal(self: *File, rdr: *Reader, config: ExtractConfig, pool: *std.Thread.Pool, path: []const u8, first: bool) (ExtractError || anyerror)!void { - const real_path = std.mem.trimRight(u8, path, "/"); - var exists = true; - var stat: ?fs.File.Stat = null; - if (fs.cwd().statFile(real_path)) |s| { - stat = s; - } else |err| { - if (err == fs.File.OpenError.FileNotFound) { - exists = false; - } else return err; - } - switch (self.inode.header.inode_type) { - .dir, .ext_dir => { - if (!exists) { - fs.cwd().makeDir(real_path) catch |err| { - if (config.verbose) - std.log.err("error creating directory {s}: {any}", .{ real_path, err }); - return err; - }; - } - var iter = try self.iterator(rdr); - defer iter.deinit(); - while (iter.next()) |f| { - const extr_path = try std.mem.concat(rdr.alloc, u8, &[3][]const u8{ real_path, "/", f.name }); - defer rdr.alloc.free(extr_path); - try f.extractReal(rdr, config, pool, extr_path, false); - } - }, - .file, .ext_file => { - if ((!first and exists) or - (first and exists and stat.?.kind != .directory)) return ExtractError.FileExists; - var extr_path: []u8 = undefined; - if (first and exists and stat.?.kind == .directory) { - extr_path = try std.mem.concat(rdr.alloc, u8, &[3][]const u8{ real_path, "/", self.name }); - } else { - extr_path = try rdr.alloc.alloc(u8, real_path.len); - @memcpy(extr_path, real_path); - } - defer rdr.alloc.free(extr_path); - var fil = fs.cwd().createFile(extr_path, .{}) catch |err| { - if (config.verbose) - std.log.err("error creating file {s}: {any}", .{ extr_path, err }); - return err; - }; - defer fil.close(); - if (config.thread_count > 1 and self.size() > rdr.super.block_size) { - var ext = try self.extractor(rdr); - defer ext.deinit(); - ext.writeToFile(pool, &fil) catch |err| { - if (config.verbose) - std.log.err("error writing file {s}: {any}", .{ self.name, err }); - return err; - }; - } else { - var buf = [1]u8{0} ** 8192; - var total_red: u64 = 0; - while (total_red < self.size()) { - const red = try self.read(&buf); - total_red += red; - } - } - }, - .sym, .ext_sym => { - //TODO: unbreak symlinks & dereference symlinks - if (exists) return ExtractError.FileExists; - fs.cwd().symLink(try self.symPath(), real_path, .{}) catch |err| { - if (config.verbose) - std.log.err("error creating symlink {s}: {any}", .{ self.name, err }); - return err; - }; - }, - .block, .ext_block, .char, .ext_char, .fifo, .ext_fifo => { - if (exists) return ExtractError.FileExists; - comptime if (builtin.os.tag != .linux) return; - const mode: u32 = switch (self.inode.header.inode_type) { - .block, .ext_block => std.posix.S.IFBLK, - .char, .ext_char => std.posix.S.IFCHR, - .fifo, .ext_fifo => std.posix.S.IFIFO, - else => unreachable, - }; - const dev = switch (self.inode.data) { - .block, .char => |b| b.device, - .ext_block, .ext_char => |b| b.device, - .fifo, .ext_fifo => 0, - else => unreachable, - }; - _ = std.os.linux.mknod(@ptrCast(real_path), mode, dev); - }, - .sock, .ext_sock => {}, //TODO - } - //TODO: permissions - } -}; - -const FileIterator = struct { - alloc: std.mem.Allocator, - files: []File, - - curIndex: u32 = 0, - - pub fn next(self: *FileIterator) ?*File { - if (self.curIndex >= self.files.len) return null; - defer self.curIndex += 1; - return &self.files[self.curIndex]; - } - pub fn reset(self: *FileIterator) void { - self.curIndex = 0; - } - pub fn deinit(self: *FileIterator) void { - for (self.files) |*f| { - f.deinit(self.alloc); - } - self.alloc.free(self.files); - } -}; diff --git a/src/fragment.zig b/src/fragment.zig deleted file mode 100644 index 8ac1336..0000000 --- a/src/fragment.zig +++ /dev/null @@ -1,26 +0,0 @@ -const std = @import("std"); - -const BlockSize = @import("inode/file.zig").BlockSize; -const Reader = @import("reader.zig").Reader; - -pub const FragEntry = packed struct { - start: u64, - size: BlockSize, - _: u32, - - pub fn getData(self: FragEntry, rdr: *Reader, offset: u32, frag_size: u32) ![]u8 { - var offset_rdr = rdr.holder.readerAt(self.start); - if (self.size.not_compressed) { - const buf = try rdr.alloc.alloc(u8, frag_size); - _ = try offset_rdr.read(buf); - return buf; - } - var limit_rdr = std.io.limitedReader(offset_rdr, self.size.size); - var decomp = try rdr.super.decomp.decompress(rdr.alloc, limit_rdr.reader().any()); - var frag_all = try decomp.toOwnedSlice(); - defer rdr.alloc.free(frag_all); - const out = try rdr.alloc.alloc(u8, frag_size); - @memcpy(out, frag_all[offset .. offset + frag_size]); - return out; - } -}; diff --git a/src/inode.zig b/src/inode.zig new file mode 100644 index 0000000..321d344 --- /dev/null +++ b/src/inode.zig @@ -0,0 +1,85 @@ +const std = @import("std"); + +const dir = @import("inode/dir.zig"); +const file = @import("inode/file.zig"); +const misc = @import("inode/misc.zig"); + +pub const Ref = packed struct { + offset: u16, + block: u32, + _: u16, +}; + +const Type = enum(u16) { + dir = 1, + file, + symlink, + block_dev, + char_dev, + fifo, + socket, + ext_dir, + ext_file, + ext_symlink, + ext_block_dev, + ext_char_dev, + ext_fifo, + ext_socket, +}; + +const Header = packed struct { + type: Type, + perm: u16, + uid_idx: u16, + gid_idx: u16, + mod_time: u32, + num: u32, +}; + +const Data = union(enum) { + dir: dir.Dir, + file: file.File, + symlink: misc.Symlink, + block_dev: misc.Dev, + char_dev: misc.Dev, + fifo: misc.IPC, + socket: misc.IPC, + ext_dir: dir.ExtDir, + ext_file: file.ExtFile, + ext_symlink: misc.ExtSymlink, + ext_block_dev: misc.ExtDev, + ext_char_dev: misc.ExtDev, + ext_fifo: misc.ExtIPC, + ext_socket: misc.ExtIPC, +}; + +const Self = @This(); + +hdr: Header, +data: Data, + +pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self { + std.debug.assert(std.meta.hasFn(@TypeOf(rdr), "read")); + var hdr: Header = undefined; + _ = try rdr.read(std.mem.asBytes(&hdr)); + const data = switch (hdr.type) { + .dir => .{ .dir = .init(rdr) }, + .file => .{ .file = .init(rdr, alloc, block_size) }, + .symlink => .{ .symlink = .init(rdr, alloc) }, + .block_dev => .{ .block_dev = .init(rdr) }, + .char_dev => .{ .char_dev = .init(rdr) }, + .fifo => .{ .fifo = .init(rdr) }, + .socket => .{ .socket = .init(rdr) }, + .ext_dir => .{ .ext_dir = .init(rdr) }, + .ext_file => .{ .ext_file = .init(rdr, alloc, block_size) }, + .ext_symlink => .{ .ext_symlink = .init(rdr, alloc) }, + .ext_block_dev => .{ .ext_block_dev = .init(rdr) }, + .ext_char_dev => .{ .ext_char_dev = .init(rdr) }, + .ext_fifo => .{ .ext_fifo = .init(rdr) }, + .ext_socket => .{ .ext_socket = .init(rdr) }, + }; + return .{ + .hdr = hdr, + .data = data, + }; +} diff --git a/src/inode/dir.zig b/src/inode/dir.zig index b1afc21..8234393 100644 --- a/src/inode/dir.zig +++ b/src/inode/dir.zig @@ -1,37 +1,31 @@ -const io = @import("std").io; +const std = @import("std"); -pub const DirInode = packed struct { - block_start: u32, - hard_links: u32, - /// Note: size is 3 larger then the actual size, due to "." and ".." +pub const Dir = packed struct { + block: u32, + hard_link: u32, size: u16, offset: u16, parent_num: u32, - pub fn init(rdr: io.AnyReader) !DirInode { - return rdr.readStruct(DirInode); + pub fn init(rdr: anytype) !Dir { + const out: Dir = undefined; + _ = rdr.read(std.mem.asBytes(&out)); + return out; } }; -const DirIndex = struct { - offset: u32, - block_start: u32, - name_size: u32, - name: []const u8, -}; - -pub const ExtDirInode = packed struct { - hard_links: u32, - /// Note: size is 3 larger then the actual size, due to "." and ".." +pub const ExtDir = packed struct { + hard_link: u32, size: u32, - block_start: u32, + block: u32, parent_num: u32, - index_count: u16, + idx_count: u16, offset: u16, - xattr_inx: u32, - // TODO: possibly also read dir indexes. Maybe relagate to function... + xattr_idx: u32, - pub fn init(rdr: io.AnyReader) !ExtDirInode { - return rdr.readStruct(ExtDirInode); + pub fn init(rdr: anytype) !ExtDir { + const out: ExtDir = undefined; + _ = rdr.read(std.mem.asBytes(&out)); + return out; } }; diff --git a/src/inode/file.zig b/src/inode/file.zig index bd86862..5b464a5 100644 --- a/src/inode/file.zig +++ b/src/inode/file.zig @@ -1,76 +1,71 @@ const std = @import("std"); -const io = std.io; pub const BlockSize = packed struct { size: u24, - not_compressed: bool, + uncompressed: bool, _: u7, }; -pub const FileInode = struct { - data_start: u32, +pub const File = struct { + block: u32, frag_idx: u32, - frag_offset: u32, + offset: u32, size: u32, - blocks: []const BlockSize, + block_sizes: []BlockSize, - pub fn init(alloc: std.mem.Allocator, rdr: io.AnyReader, block_size: u32) !FileInode { - var fixed_buf: [16]u8 = undefined; - _ = try rdr.readAll(&fixed_buf); - const frag_idx = std.mem.bytesToValue(u32, fixed_buf[4..8]); - const size = std.mem.bytesToValue(u32, fixed_buf[12..16]); - var block_num = size / block_size; - if (frag_idx == 0xFFFFFFFF and size % block_size > 0) { - block_num += 1; + pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !File { + var fixed: [16]u8 = undefined; + _ = try rdr.read(&fixed); + const frag_idx = std.mem.readInt(u32, fixed[4..8], .little); + const size = std.mem.readInt(u32, fixed[12..16], .little); + var blocks: u32 = size / block_size; + if (size % block_size > 0 and frag_idx != 0xffffffff) { + blocks += 1; } - const blocks = try alloc.alloc(BlockSize, block_num); - _ = try rdr.readAll(@ptrCast(blocks)); + const block_sizes = alloc.alloc(BlockSize, blocks); + errdefer alloc.free(block_sizes); + _ = try rdr.read(std.mem.sliceAsBytes(block_sizes)); return .{ - .data_start = std.mem.bytesToValue(u32, fixed_buf[0..4]), + .block = std.mem.readInt(u32, fixed[0..4], .little), .frag_idx = frag_idx, - .frag_offset = std.mem.bytesToValue(u32, fixed_buf[8..12]), + .offset = std.mem.readInt(u32, fixed[8..12], .little), .size = size, - .blocks = blocks, + .block_sizes = block_sizes, }; } - pub fn deinit(self: FileInode, alloc: std.mem.Allocator) void { - alloc.free(self.blocks); - } }; -pub const ExtFileInode = struct { - data_start: u64, +pub const ExtFile = struct { + block: u64, size: u64, sparse: u64, - hard_links: u32, + hard_link: u32, frag_idx: u32, - frag_offset: u32, + offset: u32, xattr_idx: u32, - blocks: []const BlockSize, + block_sizes: []BlockSize, - pub fn init(alloc: std.mem.Allocator, rdr: io.AnyReader, block_size: u32) !ExtFileInode { - var fixed_buf = [1]u8{0} ** 40; - _ = try rdr.readAll(&fixed_buf); - const size = std.mem.bytesToValue(u64, fixed_buf[8..16]); - const frag_idx = std.mem.bytesToValue(u32, fixed_buf[28..32]); - var block_num = size / block_size; - if (frag_idx == 0xFFFFFFFF and size % block_size > 0) { - block_num += 1; + pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !ExtFile { + var fixed: [40]u8 = undefined; + _ = try rdr.read(&fixed); + const size = std.mem.readInt(u64, fixed[8..16], .little); + const frag_idx = std.mem.readInt(u32, fixed[28..32], .little); + var blocks: u32 = size / block_size; + if (size % block_size > 0 and frag_idx != 0xffffffff) { + blocks += 1; } - const blocks = try alloc.alloc(BlockSize, block_num); - _ = try rdr.readAll(@ptrCast(blocks)); + const block_sizes = alloc.alloc(BlockSize, blocks); + errdefer alloc.free(block_sizes); + _ = try rdr.read(std.mem.sliceAsBytes(block_sizes)); return .{ - .data_start = std.mem.bytesToValue(u64, fixed_buf[0..8]), + .block = std.mem.readInt(u64, fixed[0..8], .little), .size = size, - .sparse = std.mem.bytesToValue(u64, fixed_buf[16..24]), - .hard_links = std.mem.bytesToValue(u32, fixed_buf[24..28]), + .sparse = std.mem.readInt(u64, fixed[16..24], .little), + .hard_link = std.mem.readInt(u32, fixed[24..28], .little), .frag_idx = frag_idx, - .frag_offset = std.mem.bytesToValue(u32, fixed_buf[32..36]), - .xattr_idx = std.mem.bytesToValue(u32, fixed_buf[36..40]), - .blocks = blocks, + .offset = std.mem.readInt(u32, fixed[32..36], .little), + .xattr_idx = std.mem.readInt(u32, fixed[36..40], .little), + .block_sizes = blocks, }; } - pub fn deinit(self: ExtFileInode, alloc: std.mem.Allocator) void { - alloc.free(self.blocks); - } }; diff --git a/src/inode/inode.zig b/src/inode/inode.zig deleted file mode 100644 index a67a0c6..0000000 --- a/src/inode/inode.zig +++ /dev/null @@ -1,103 +0,0 @@ -const std = @import("std"); -const io = std.io; - -pub const InodeRef = packed struct { - offset: u16, - block_start: u32, - _: u16 = 0, - - pub fn init(block_start: u32, offset: u16) InodeRef { - return .{ - .offset = offset, - .block_start = block_start, - }; - } -}; - -pub const InodeType = enum(u16) { - dir = 1, - file, - sym, - block, - char, - fifo, - sock, - ext_dir, - ext_file, - ext_sym, - ext_block, - ext_char, - ext_fifo, - ext_sock, -}; - -const dir = @import("dir.zig"); -const file = @import("file.zig"); -const sym = @import("sym.zig"); -const misc = @import("misc.zig"); - -pub const InodeData = union(enum) { - dir: dir.DirInode, - file: file.FileInode, - sym: sym.SymInode, - block: misc.DeviceInode, - char: misc.DeviceInode, - fifo: misc.IPCInode, - sock: misc.IPCInode, - ext_dir: dir.ExtDirInode, - ext_file: file.ExtFileInode, - ext_sym: sym.ExtSymInode, - ext_block: misc.ExtDeviceInode, - ext_char: misc.ExtDeviceInode, - ext_fifo: misc.ExtIPCInode, - ext_sock: misc.ExtIPCInode, -}; - -pub const InodeHeader = packed struct { - inode_type: InodeType, - perm: u16, - uid_idx: u16, - gid_idx: u16, - mod_time: u32, - num: u32, -}; - -pub const Inode = struct { - alloc: std.mem.Allocator, - header: InodeHeader, - data: InodeData, - - pub fn init(alloc: std.mem.Allocator, rdr: io.AnyReader, block_size: u32) !Inode { - const hdr = try rdr.readStruct(InodeHeader); - const data: InodeData = switch (hdr.inode_type) { - .dir => .{ .dir = try .init(rdr) }, - .file => .{ .file = try .init(alloc, rdr, block_size) }, - .sym => .{ .sym = try .init(alloc, rdr) }, - .block => .{ .block = try .init(rdr) }, - .char => .{ .char = try .init(rdr) }, - .fifo => .{ .fifo = try .init(rdr) }, - .sock => .{ .sock = try .init(rdr) }, - .ext_dir => .{ .ext_dir = try .init(rdr) }, - .ext_file => .{ .ext_file = try .init(alloc, rdr, block_size) }, - .ext_sym => .{ .ext_sym = try .init(alloc, rdr) }, - .ext_block => .{ .ext_block = try .init(rdr) }, - .ext_char => .{ .ext_char = try .init(rdr) }, - .ext_fifo => .{ .ext_fifo = try .init(rdr) }, - .ext_sock => .{ .ext_sock = try .init(rdr) }, - }; - return .{ - .alloc = alloc, - .header = hdr, - .data = data, - }; - } - pub fn deinit(self: Inode) void { - switch (self.data) { - .file => |d| d.deinit(self.alloc), - .sym => |d| d.deinit(self.alloc), - .ext_file => |d| d.deinit(self.alloc), - .ext_sym => |d| d.deinit(self.alloc), - else => {}, - } - } -}; diff --git a/src/inode/misc.zig b/src/inode/misc.zig index 940a0a0..134f9de 100644 --- a/src/inode/misc.zig +++ b/src/inode/misc.zig @@ -1,38 +1,87 @@ const std = @import("std"); -const io = std.io; -pub const DeviceInode = packed struct { - hard_links: u32, - device: u32, +pub const Symlink = struct { + hard_link: u32, + // size: u32, + target: []const u8, - pub fn init(rdr: io.AnyReader) !DeviceInode { - return rdr.readStruct(DeviceInode); + pub fn init(rdr: anytype, alloc: std.mem.Allocator) !Symlink { + var fixed: [8]u8 = undefined; + _ = try rdr.read(&fixed); + const size = std.mem.readInt(u32, fixed[4..8], .little); + const target = alloc.alloc(u8, size); + errdefer alloc.free(target); + _ = try rdr.read(target); + return .{ + .hard_link = std.mem.readInt(u32, fixed[0..4], .little), + .target = target, + }; } }; -pub const ExtDeviceInode = packed struct { - hard_links: u32, +pub const ExtSymlink = struct { + hard_link: u32, + // size: u32, + target: []const u8, + xattr_idx: u32, + + pub fn init(rdr: anytype, alloc: std.mem.Allocator) !ExtSymlink { + var fixed: [8]u8 = undefined; + _ = try rdr.read(&fixed); + const size = std.mem.readInt(u32, fixed[4..8], .little); + const target = alloc.alloc(u8, size); + errdefer alloc.free(target); + _ = try rdr.read(target); + var xattr_idx: u32 = 0; + _ = try rdr.read(std.mem.asBytes(&xattr_idx)); + return .{ + .hard_link = std.mem.readInt(u32, fixed[0..4], .little), + .target = target, + .xattr_idx = xattr_idx, + }; + } +}; + +pub const Dev = packed struct { + hard_link: u32, + device: u32, + + pub fn init(rdr: anytype) !Dev { + const out: Dev = undefined; + _ = try rdr.read(std.mem.asBytes(&out)); + return out; + } +}; + +pub const ExtDev = packed struct { + hard_link: u32, device: u32, xattr_idx: u32, - pub fn init(rdr: io.AnyReader) !ExtDeviceInode { - return rdr.readStruct(ExtDeviceInode); + pub fn init(rdr: anytype) !ExtDev { + const out: ExtDev = undefined; + _ = try rdr.read(std.mem.asBytes(&out)); + return out; } }; -pub const IPCInode = packed struct { - hard_links: u32, +pub const IPC = packed struct { + hard_link: u32, - pub fn init(rdr: io.AnyReader) !IPCInode { - return rdr.readStruct(IPCInode); + pub fn init(rdr: anytype) !IPC { + const out: IPC = undefined; + _ = try rdr.read(std.mem.asBytes(&out)); + return out; } }; -pub const ExtIPCInode = packed struct { - hard_links: u32, +pub const ExtIPC = packed struct { + hard_link: u32, xattr_idx: u32, - pub fn init(rdr: io.AnyReader) !ExtIPCInode { - return rdr.readStruct(ExtIPCInode); + pub fn init(rdr: anytype) !ExtIPC { + const out: ExtIPC = undefined; + _ = try rdr.read(std.mem.asBytes(&out)); + return out; } }; diff --git a/src/inode/sym.zig b/src/inode/sym.zig deleted file mode 100644 index 8f68c85..0000000 --- a/src/inode/sym.zig +++ /dev/null @@ -1,48 +0,0 @@ -const std = @import("std"); -const io = std.io; - -pub const SymInode = struct { - hard_links: u32, - size: u32, - target: []const u8, - - pub fn init(alloc: std.mem.Allocator, rdr: io.AnyReader) !SymInode { - var fixed_buf = [_]u8{0} ** 8; - _ = try rdr.readAll(@ptrCast(&fixed_buf)); - const size = std.mem.bytesToValue(u32, fixed_buf[4..]); - const target = try alloc.alloc(u8, size); - _ = try rdr.readAll(target); - return .{ - .hard_links = std.mem.bytesToValue(u32, fixed_buf[0..4]), - .size = size, - .target = target, - }; - } - pub fn deinit(self: SymInode, alloc: std.mem.Allocator) void { - alloc.free(self.target); - } -}; - -pub const ExtSymInode = struct { - hard_links: u32, - size: u32, - target: []const u8, - xattr_idx: u32, - - pub fn init(alloc: std.mem.Allocator, rdr: io.AnyReader) !ExtSymInode { - var fixed_buf = [_]u8{0} ** 8; - _ = try rdr.readAll(&fixed_buf); - const size = std.mem.bytesToValue(u32, fixed_buf[4..]); - const target = try alloc.alloc(u8, size); - _ = try rdr.readAll(target); - return .{ - .hard_links = std.mem.bytesToValue(u32, fixed_buf[0..4]), - .size = size, - .target = target, - .xattr_idx = try rdr.readInt(u32, std.builtin.Endian.little), - }; - } - pub fn deinit(self: ExtSymInode, alloc: std.mem.Allocator) void { - alloc.free(self.target); - } -}; diff --git a/src/reader.zig b/src/reader.zig index b71bb9d..a21312a 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -1,139 +1,25 @@ const std = @import("std"); -const inode = @import("inode/inode.zig"); - -const Table = @import("table.zig").Table; -const FileHolder = @import("readers/file_holder.zig").FileHolder; const Superblock = @import("superblock.zig").Superblock; -const File = @import("file.zig").File; -const MetadataReader = @import("readers/metadata.zig").MetadataReader; -const DirEntry = @import("directory.zig").DirEntry; -const FragEntry = @import("fragment.zig").FragEntry; -/// A squashfs archive reader. Make sure to call deinit(). -/// For most actions, you'll want to use Reader.root. -pub const Reader = struct { - alloc: std.mem.Allocator, - holder: FileHolder, - super: Superblock, - root: File, +pub fn Reader(comptime T: type) type { + std.debug.assert(std.meta.hasFn(T, "pread")); - frag_table: Table(FragEntry), - export_table: Table(inode.InodeRef), - id_table: Table(u32), + return struct { + const Self = @This(); - pub fn init(alloc: std.mem.Allocator, filepath: []const u8, offset: u64) !Reader { - var holder: FileHolder = try .init(filepath, offset); - const super: Superblock = try holder.reader().readStruct(Superblock); - try super.validate(); - var out: Reader = .{ - .alloc = alloc, - .holder = holder, - .super = super, - .root = undefined, - .frag_table = undefined, - .export_table = undefined, - .id_table = undefined, - }; - out.frag_table = .init( - &out, - super.frag_table_start, - super.frag_count, - ); - out.export_table = .init( - &out, - super.export_table_start, - super.inode_count, - ); - out.id_table = .init( - &out, - super.id_table_start, - super.id_count, - ); - out.root = try out.rootFile(); - return out; - } - pub fn deinit(self: *Reader) void { - self.frag_table.deinit(self.alloc); - self.export_table.deinit(self.alloc); - self.id_table.deinit(self.alloc); - self.root.deinit(self.alloc); - self.holder.deinit(); - } + alloc: std.mem.Allocator, + rdr: T, - pub fn open(self: *Reader, path: []const u8) !File { - return self.root.open(self, path); - } + super: Superblock = undefined, - fn rootFile(self: *Reader) !File { - var offset_rdr = self.holder.readerAt(self.super.root_ref.block_start + self.super.inode_table_start); - var meta_rdr: MetadataReader = .init( - self.alloc, - self.super.decomp, - offset_rdr.any(), - ); - defer meta_rdr.deinit(); - try meta_rdr.skip(self.super.root_ref.offset); - return .{ - .name = "", - .inode = try .init( - self.alloc, - meta_rdr.any(), - self.super.block_size, - ), - .parent_path = "", - }; - } -}; - -const test_sfs_path = "testing/LinuxPATest.sfs"; - -test "root iter" { - var rdr: Reader = try .init(std.testing.allocator, test_sfs_path, 0); - defer rdr.deinit(); - var rootIter = try rdr.root.iterator(&rdr); - defer rootIter.deinit(); - while (rootIter.next()) |f| { - std.debug.print("{s}\n", .{f.name}); - } -} - -test "extract single file" { - const sfs_file_path = "PortableApps/Cool_Retro_Term-dac2b4f-x86_64.AppImage"; - const extract_path = "testing/Cool_Retro_Term-dac2b4f-x86_64.AppImage"; - std.fs.cwd().deleteFile(extract_path) catch |err| { - if (err != std.fs.Dir.DeleteFileError.FileNotFound) { - return err; + pub fn init(alloc: std.mem.Allocator, rdr: T) Self { + const out = Self{ + .alloc = alloc, + .rdr = rdr, + }; + _ = try rdr.pread(std.mem.asBytes(&out.super), 0); + return out; } }; - var rdr: Reader = try .init(std.testing.allocator, test_sfs_path, 0); - defer rdr.deinit(); - var fil = try rdr.open(sfs_file_path); - defer fil.deinit(std.testing.allocator); - try fil.extract(&rdr, try .init(), extract_path); -} - -test "extract single directory" { - const sfs_file_path = "Documents"; - const extract_path = "testing/Documents"; - try std.fs.cwd().deleteTree(extract_path); - var rdr: Reader = try .init(std.testing.allocator, test_sfs_path, 0); - defer rdr.deinit(); - var fil = try rdr.open(sfs_file_path); - defer fil.deinit(std.testing.allocator); - var config: File.ExtractConfig = try .init(); - config.verbose = true; - try fil.extract(&rdr, config, extract_path); -} - -test "full extract" { - const extract_path = "testing/testExtract"; - std.fs.cwd().deleteTree(extract_path) catch |err| { - if (err != std.fs.Dir.DeleteFileError.FileNotFound) { - return err; - } - }; - var rdr: Reader = try .init(std.testing.allocator, test_sfs_path, 0); - defer rdr.deinit(); - try rdr.root.extract(&rdr, try .init(), extract_path); } diff --git a/src/reader/metadata.zig b/src/reader/metadata.zig new file mode 100644 index 0000000..f468717 --- /dev/null +++ b/src/reader/metadata.zig @@ -0,0 +1,43 @@ +const std = @import("std"); + +const Compression = @import("../superblock.zig").Compression; + +const MetaHeader = packed struct { + size: u15, + uncompressed: bool, +}; + +pub fn MetadataReader(comptime T: type) type { + return struct { + const Self = @This(); + + alloc: std.mem.Allocator, + comp: Compression, + rdr: T, + + block: [8192]u8 = undefined, + block_size: usize = 0, + block_offset: u32 = 0, + + pub fn init(alloc: std.mem.Allocator, comp: Compression, rdr: T) !Self { + var out: Self = .{ + .alloc = alloc, + .comp = comp, + .rdr = rdr, + }; + try out.readNextBlock(); + return out; + } + + fn readNextBlock(self: *Self) !void { + const hdr: MetaHeader = undefined; + _ = try self.rdr.read(std.mem.asBytes(hdr)); + self.block_size = try self.comp.decompress( + 8192, + self.alloc, + std.io.limitedReader(self.rdr, hdr.size), + self.block, + ); + } + }; +} diff --git a/src/readers/data_extractor.zig b/src/readers/data_extractor.zig deleted file mode 100644 index b746a40..0000000 --- a/src/readers/data_extractor.zig +++ /dev/null @@ -1,202 +0,0 @@ -const std = @import("std"); -const fs = std.fs; -const io = std.io; - -const File = @import("../file.zig").File; -const Reader = @import("../reader.zig").Reader; -const BlockSize = @import("../inode/file.zig").BlockSize; -const DecompressionType = @import("../decompress.zig").DecompressType; -const FileHolder = @import("../readers/file_holder.zig").FileHolder; -const FileOffsetWriter = @import("../readers/file_holder.zig").FileOffsetWriter; -const DataReader = @import("data_reader.zig").DataReader; -const Config = @import("../file.zig").Config; - -/// A specialized File data reader that's meant to write all of it's data at once. -/// Can be re-used freely until deinit() is called. -pub const DataExtractor = struct { - alloc: std.mem.Allocator, - decomp: DecompressionType, - holder: *FileHolder, - block_size: u32, - file_size: u64, - sizes: []BlockSize, - block_offset: []u64, - frag_data: ?[]u8 = null, - - pub fn init(fil: *File, reader: *Reader) !DataExtractor { - var data_start: u64 = 0; - var sizes: []BlockSize = undefined; - var file_size: u64 = 0; - var frag_idx: u32 = 0; - var frag_offset: u32 = 0; - switch (fil.inode.data) { - .file => |f| { - data_start = f.data_start; - sizes = try reader.alloc.alloc(BlockSize, f.blocks.len); - @memcpy(sizes, f.blocks); - file_size = f.size; - frag_idx = f.frag_idx; - frag_offset = f.frag_offset; - }, - .ext_file => |f| { - data_start = f.data_start; - sizes = try reader.alloc.alloc(BlockSize, f.blocks.len); - @memcpy(sizes, f.blocks); - file_size = f.size; - frag_idx = f.frag_idx; - frag_offset = f.frag_offset; - }, - else => return File.FileError.NotNormalFile, - } - var out: DataExtractor = .{ - .alloc = reader.alloc, - .decomp = reader.super.decomp, - .holder = &reader.holder, - .block_size = reader.super.block_size, - .file_size = file_size, - .sizes = sizes, - .block_offset = try reader.alloc.alloc(u64, sizes.len), - }; - errdefer out.deinit(); - var offset: u64 = data_start; - for (0.., out.block_offset) |i, _| { - out.block_offset[i] = offset; - offset += out.sizes[i].size; - } - if (frag_idx != 0xFFFFFFFF) { - const frag_ent = try reader.frag_table.getValue(reader, frag_idx); - out.frag_data = try frag_ent.getData(reader, frag_offset, @truncate(file_size % reader.super.block_size)); - } - return out; - } - - pub fn deinit(self: *DataExtractor) void { - self.alloc.free(self.sizes); - self.alloc.free(self.block_offset); - if (self.frag_data != null) self.alloc.free(self.frag_data.?); - } - - fn processBlockToFile(self: *DataExtractor, wg: *std.Thread.WaitGroup, errs: *MutexList, block_ind: usize, fil: *fs.File) void { - defer wg.finish(); - if (self.sizes[block_ind].not_compressed) { - @branchHint(.unlikely); - if (self.sizes[block_ind].size == 0) { - if (block_ind == self.sizes.len - 1) { - fil.pwriteAll(&[1]u8{0}, self.file_size - 1) catch |err| { - std.debug.print("yo1\n", .{}); - errs.append(err) catch {}; - }; - } else { - fil.pwriteAll(&[1]u8{0}, ((block_ind + 1) * self.block_size) - 1) catch |err| { - std.debug.print("yo2\n", .{}); - errs.append(err) catch {}; - }; - } - return; - } - const dat = self.alloc.alloc(u8, self.sizes[block_ind].size) catch |err| { - errs.append(err) catch {}; - return; - }; - defer self.alloc.free(dat); - _ = self.holder.file.preadAll(dat, self.block_offset[block_ind]) catch |err| { - errs.append(err) catch {}; - return; - }; - fil.pwriteAll(dat, block_ind * self.block_size) catch |err| { - errs.append(err) catch {}; - }; - } else { - @branchHint(.likely); - const offset_rdr = self.holder.readerAt(self.block_offset[block_ind]); - var fil_wrtr: FileOffsetWriter = .init(fil, block_ind * self.block_size); - var limit = std.io.limitedReader(offset_rdr, self.sizes[block_ind].size); - self.decomp.decompressTo( - self.alloc, - limit.reader().any(), - fil_wrtr.any(), - ) catch |err| { - errs.append(err) catch {}; - }; - } - } - - fn fragmentToFile(self: *DataExtractor, wg: *std.Thread.WaitGroup, errs: *MutexList, fil: *fs.File) void { - defer wg.finish(); - fil.pwriteAll(self.frag_data.?, self.block_size * self.sizes.len) catch |err| { - errs.append(err) catch {}; - }; - } - - /// Write the data completely to the given file. - /// Ignores the file's current offset and writes from the beginning of the file. - /// Returns the amount of bytes written. - /// - /// Optimized for lower memory usage by using File.pwrite. - pub fn writeToFile(self: *DataExtractor, pool: *std.Thread.Pool, fil: *fs.File) !void { - var wg: std.Thread.WaitGroup = .{}; - var errs: MutexList = .init(self.alloc); - defer errs.deinit(); - for (0..self.sizes.len) |i| { - wg.start(); - try pool.spawn(processBlockToFile, .{ self, &wg, &errs, i, fil }); - } - if (self.frag_data != null) { - wg.start(); - try pool.spawn(fragmentToFile, .{ self, &wg, &errs, fil }); - } - wg.wait(); - if (errs.list.items.len > 0) { - //TODO: better handle all the errors - return errs.list.items[0]; - } - } - - // fn processBlock(self: *DataExtractor, errs: std.ArrayList(anyerror), data_out: std.AutoHashMap([]u8), block_ind: u32) void { - // const offset_rdr = self.holder.readerAt(self.block_offset[block_ind]); - // const out = self.decomp.decompress( - // self.alloc, - // std.io.limitedReader(offset_rdr, self.sizes[block_ind].size), - // ) catch |err| { - // errs.append(err); - // return; - // }; - // data_out.put(block_ind, ) - // } - - // Write the data completely to the given writer. - // Returns the amount of bytes written. - // - // To write data in order, some data may end up cached temporarily. - // pub fn writeToWriter(self: DataExtractor, pool: *std.Thread.Pool, writer: io.AnyWriter) !void { - // const wg: std.Thread.WaitGroup = .{}; - // const errs: std.ArrayList(anyerror) = .init(self.alloc); - // const data: std.AutoHashMap(u32, []u8) = .init(self.alloc); - // const cond: std.Thread. = .{}; - // defer errs.deinit(); - // for (0..self.sizes.len) |i| { - // pool.spawnWg(&wg, processBlock, .{ &self, i, fil }); - // } - // wg.wait(); - // } -}; - -const MutexList = struct { - list: std.ArrayList(anyerror), - mut: std.Thread.Mutex = .{}, - - fn init(alloc: std.mem.Allocator) MutexList { - return .{ - .list = .init(alloc), - }; - } - fn deinit(self: *MutexList) void { - self.list.deinit(); - } - - fn append(self: *MutexList, err: anyerror) !void { - self.mut.lock(); - defer self.mut.unlock(); - try self.list.append(err); - } -}; diff --git a/src/readers/data_reader.zig b/src/readers/data_reader.zig deleted file mode 100644 index 5f9e1d4..0000000 --- a/src/readers/data_reader.zig +++ /dev/null @@ -1,164 +0,0 @@ -const std = @import("std"); -const io = std.io; -const fs = std.fs; - -const File = @import("../file.zig").File; -const Reader = @import("../reader.zig").Reader; -const BlockSize = @import("../inode/file.zig").BlockSize; -const DecompressionType = @import("../decompress.zig").DecompressType; -const FileOffsetReader = @import("../readers/file_holder.zig").FileOffsetReader; -const FragEntry = @import("../fragment.zig").FragEntry; - -const DataReaderError = error{ - EOF, -}; - -pub const DataReader = struct { - alloc: std.mem.Allocator, - decomp: DecompressionType, - rdr: FileOffsetReader, - block_size: u32, - file_size: u64, - sizes: []BlockSize, - frag_data: ?[]u8 = null, - - next_block_num: u32 = 0, - cur_bloc: []u8 = &[0]u8{}, - cur_offset: u32 = 0, - - pub fn init(fil: *File, reader: *Reader) !DataReader { - var data_start: u64 = 0; - var sizes: []BlockSize = undefined; - var file_size: u64 = 0; - var frag_idx: u32 = 0; - var frag_offset: u32 = 0; - switch (fil.inode.data) { - .file => |f| { - sizes = try reader.alloc.alloc(BlockSize, f.blocks.len); - @memcpy(sizes, f.blocks); - data_start = f.data_start; - file_size = f.size; - frag_idx = f.frag_idx; - frag_offset = f.frag_offset; - }, - .ext_file => |f| { - sizes = try reader.alloc.alloc(BlockSize, f.blocks.len); - @memcpy(sizes, f.blocks); - data_start = f.data_start; - file_size = f.size; - frag_idx = f.frag_idx; - frag_offset = f.frag_offset; - }, - else => return File.FileError.NotNormalFile, - } - var out: DataReader = .{ - .alloc = reader.alloc, - .decomp = reader.super.decomp, - .rdr = reader.holder.readerAt(data_start), - .block_size = reader.super.block_size, - .file_size = file_size, - .sizes = sizes, - }; - errdefer out.deinit(); - if (frag_idx != 0xFFFFFFFF) { - const frag_ent = try reader.frag_table.getValue(reader, frag_idx); - out.frag_data = try frag_ent.getData(reader, frag_offset, @truncate(file_size % reader.super.block_size)); - } - return out; - } - pub fn fromFragEntry(reader: *Reader, ent: FragEntry) !DataReader { - const size = try reader.alloc.alloc(BlockSize, 1); - size[0] = ent.size; - return .{ - .alloc = reader.alloc, - .decomp = reader.super.decomp, - .rdr = reader.holder.readerAt(ent.start), - .block_size = reader.super.block_size, - .sizes = size, - }; - } - - pub fn deinit(self: *DataReader) void { - self.alloc.free(self.sizes); - if (self.cur_bloc.len > 0) self.alloc.free(self.cur_bloc); - if (self.frag_data != null) self.alloc.free(self.frag_data.?); - } - - pub fn skip(self: *DataReader, offset: u32) !void { - var cur_skip: u32 = 0; - var to_skip: u32 = 0; - while (cur_skip < offset) { - if (self.cur_offset >= self.cur_bloc.len) try self.readNextBlock(); - to_skip = @min(offset - cur_skip, self.cur_bloc.len - self.cur_offset); - cur_skip += to_skip; - self.cur_offset += to_skip; - } - } - - fn readNextBlock(self: *DataReader) !void { - defer self.next_block_num += 1; - if (self.next_block_num == self.sizes.len and self.frag_data != null) { - try self.sizeBlock(self.frag_data.?.len); - @memcpy(self.cur_bloc, self.frag_data.?); - return; - } else if (self.next_block_num >= self.sizes.len) { - return DataReaderError.EOF; - } - const siz = self.sizes[self.next_block_num]; - if (siz.size == 0) { - if (self.next_block_num == self.sizes.len) { - try self.sizeBlock(@truncate(self.file_size % self.block_size)); - } else { - try self.sizeBlock(self.block_size); - } - @memset(self.cur_bloc, 0); - return; - } - if (siz.not_compressed) { - try self.sizeBlock(siz.size); - _ = try self.rdr.any().readAll(self.cur_bloc); - } else { - self.alloc.free(self.cur_bloc); - var limit = std.io.limitedReader(self.rdr, siz.size); - var dat = try self.decomp.decompress(self.alloc, limit.reader().any()); - self.cur_bloc = try dat.toOwnedSlice(); - } - } - - fn sizeBlock(self: *DataReader, size: usize) !void { - if (!self.alloc.resize(self.cur_bloc, size)) { - self.alloc.free(self.cur_bloc); - self.cur_bloc = try self.alloc.alloc(u8, size); - } - } - - pub fn read(self: *DataReader, bytes: []u8) !usize { - var cur_read: usize = 0; - var to_read: usize = 0; - while (cur_read < bytes.len) { - if (self.cur_offset >= self.cur_bloc.len) { - self.readNextBlock() catch |err| { - if (err == DataReaderError.EOF) return cur_read; - return err; - }; - } - to_read = @min(bytes.len - cur_read, self.cur_bloc.len - self.cur_offset); - @memcpy(bytes[cur_read .. cur_read + to_read], self.cur_bloc[self.cur_offset .. @as(usize, self.cur_offset) + to_read]); - self.cur_offset += @truncate(to_read); - cur_read += to_read; - } - return cur_read; - } - - pub fn any(self: *DataReader) io.AnyReader { - return .{ - .context = @ptrCast(self), - .readFn = readOpaque, - }; - } - - fn readOpaque(context: *const anyopaque, bytes: []u8) !usize { - var self: *DataReader = @constCast(@ptrCast(@alignCast(context))); - return self.read(bytes); - } -}; diff --git a/src/readers/file_holder.zig b/src/readers/file_holder.zig deleted file mode 100644 index 703a02a..0000000 --- a/src/readers/file_holder.zig +++ /dev/null @@ -1,90 +0,0 @@ -const std = @import("std"); -const fs = std.fs; -const io = std.io; - -const File = std.fs.File; - -pub const FileHolder = struct { - file: File, - offset: u64, - - pub fn init(path: []const u8, offset: u64) !FileHolder { - return .{ - .file = try fs.cwd().openFile(path, .{ .mode = .read_write }), - .offset = offset, - }; - } - pub fn deinit(self: FileHolder) void { - self.file.close(); - } - - pub fn reader(self: *FileHolder) File.Reader { - return self.file.reader(); - } - pub fn readerAt(self: *FileHolder, offset: u64) FileOffsetReader { - return .{ - .file = &self.file, - .offset = self.offset + offset, - }; - } - - // pub fn writerAt(self: *FileHolder, offset: u64) FileOffsetWriter { - // return .{ - // .file = &self.file, - // .offset = self.offset + offset, - // }; - // } -}; - -pub const FileOffsetWriter = struct { - file: *File, - offset: u64, - - pub fn init(fil: *File, init_offset: u64) FileOffsetWriter { - return .{ - .file = fil, - .offset = init_offset, - }; - } - - pub const Error = fs.File.PWriteError; - - pub fn write(self: *FileOffsetWriter, bytes: []const u8) !usize { - try self.file.pwriteAll(bytes, self.offset); - self.offset += bytes.len; - return bytes.len; - } - pub fn any(self: *FileOffsetWriter) io.AnyWriter { - return .{ - .context = @ptrCast(self), - .writeFn = writeOpaque, - }; - } - fn writeOpaque(context: *const anyopaque, bytes: []const u8) anyerror!usize { - var rdr: *FileOffsetWriter = @constCast(@ptrCast(@alignCast(context))); - return try rdr.write(bytes); - } -}; - -pub const FileOffsetReader = struct { - file: *File, - offset: u64, - - pub const Error = fs.File.PReadError; - - pub fn read(self: *FileOffsetReader, bytes: []u8) !usize { - const red = try self.file.preadAll(bytes, self.offset); - self.offset += red; - return red; - } - pub fn any(self: *FileOffsetReader) io.AnyReader { - return .{ - .context = @ptrCast(self), - .readFn = readOpaque, - }; - } - fn readOpaque(context: *const anyopaque, bytes: []u8) !usize { - var rdr: *FileOffsetReader = @constCast(@ptrCast(@alignCast(context))); - return try rdr.read(bytes); - } -}; diff --git a/src/readers/metadata.zig b/src/readers/metadata.zig deleted file mode 100644 index 33d87ab..0000000 --- a/src/readers/metadata.zig +++ /dev/null @@ -1,77 +0,0 @@ -const std = @import("std"); -const io = std.io; - -const DecompressType = @import("../decompress.zig").DecompressType; - -const MetadataHeader = packed struct { - size: u15, - not_compressed: bool, -}; - -pub const MetadataReader = struct { - alloc: std.mem.Allocator, - decomp: DecompressType, - reader: io.AnyReader, - block: []u8 = &[0]u8{}, - offset: u32 = 0, - - pub fn init(alloc: std.mem.Allocator, decomp: DecompressType, rdr: io.AnyReader) MetadataReader { - return .{ - .alloc = alloc, - .decomp = decomp, - .reader = rdr, - }; - } - pub fn deinit(self: *MetadataReader) void { - self.alloc.free(self.block); - } - - pub fn skip(self: *MetadataReader, offset: u16) !void { - var cur_skip: u32 = 0; - var to_skip: u32 = 0; - while (cur_skip < offset) { - if (self.offset >= self.block.len) try self.readNextBlock(); - to_skip = @min(offset - cur_skip, self.block.len - self.offset); - cur_skip += to_skip; - self.offset += to_skip; - } - } - - fn readNextBlock(self: *MetadataReader) !void { - self.offset = 0; - if (self.block.len > 0) self.alloc.free(self.block); - const hdr = try self.reader.readStruct(MetadataHeader); - if (hdr.not_compressed) { - self.block = try self.alloc.alloc(u8, hdr.size); - _ = try self.reader.readAll(self.block); - } else { - var limit = std.io.limitedReader(self.reader, hdr.size); - var dat = try self.decomp.decompress(self.alloc, limit.reader().any()); - self.block = try dat.toOwnedSlice(); - } - } - - pub fn any(self: *MetadataReader) io.AnyReader { - return .{ - .context = @ptrCast(self), - .readFn = readOpaque, - }; - } - - pub fn read(self: *MetadataReader, bytes: []u8) !usize { - var cur_read: usize = 0; - var to_read: usize = 0; - while (cur_read < bytes.len) { - if (self.offset >= self.block.len) try self.readNextBlock(); - to_read = @min(bytes.len - cur_read, self.block.len - self.offset); - @memcpy(bytes[cur_read .. cur_read + to_read], self.block[self.offset .. @as(usize, self.offset) + to_read]); - self.offset += @truncate(to_read); - cur_read += to_read; - } - return cur_read; - } - fn readOpaque(context: *const anyopaque, bytes: []u8) !usize { - var rdr: *MetadataReader = @constCast(@ptrCast(@alignCast(context))); - return rdr.read(bytes); - } -}; diff --git a/src/root.zig b/src/root.zig index b64c197..e69de29 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1 +0,0 @@ -pub const Reader = @import("reader.zig").Reader; diff --git a/src/superblock.zig b/src/superblock.zig index b1d255a..c17db3c 100644 --- a/src/superblock.zig +++ b/src/superblock.zig @@ -1,10 +1,4 @@ -const math = @import("std").math; - -const SuperblockError = error{ - InvalidMagic, - InvalidBlockLog, - InvalidVersion, -}; +const std = @import("std"); pub const Superblock = packed struct { magic: u32, @@ -12,28 +6,70 @@ pub const Superblock = packed struct { mod_time: u32, block_size: u32, frag_count: u32, - decomp: @import("decompress.zig").DecompressType, + comp: Compression, block_log: u16, - flags: u16, + flags: packed struct { + _: u4, + id_uncomp: bool, + comp_options: bool, + no_xattr: bool, + xattr_uncomp: bool, + has_export: bool, + de_dupe: bool, + frag_always: bool, + no_frag: bool, + frag_uncomp: bool, + check: bool, + data_uncomp: bool, + inode_uncomp: bool, + }, id_count: u16, ver_maj: u16, ver_min: u16, - root_ref: @import("inode/inode.zig").InodeRef, + root_ref: u64, size: u64, - id_table_start: u64, - xattr_table_start: u64, - inode_table_start: u64, - dir_table_start: u64, - frag_table_start: u64, - export_table_start: u64, + id_start: u64, + xattr_start: u64, + inode_start: u64, + dir_start: u64, + frag_start: u64, + export_start: u64, +}; - pub fn validate(self: Superblock) SuperblockError!void { - if (self.magic != 0x73717368) { - return SuperblockError.InvalidMagic; - } else if (self.block_log != math.log2(self.block_size)) { - return SuperblockError.InvalidBlockLog; - } else if (self.ver_maj != 4 or self.ver_min != 0) { - return SuperblockError.InvalidVersion; +const DecompressError = error{ + LzoUnavailable, + Lz4Unavailable, +}; + +pub const Compression = enum(u16) { + gzip = 1, + lzma, + lzo, + xz, + lz4, + zstd, + + pub fn decompress(self: Compression, comptime max_size: u16, alloc: std.mem.Allocator, source: anytype, dest: *[max_size]u8) !usize { + switch (self) { + .gzip => { + const decomp = std.compress.zlib.decompressor(source); + return decomp.read(dest); + }, + .lzma => { + const decomp = try std.compress.lzma.decompress(alloc, source); + return decomp.read(dest); + }, + .lzo => return DecompressError.LzoUnavailable, + .xz => { + const decomp = try std.compress.xz.decompress(alloc, source); + return decomp.read(dest); + }, + .lz4 => return DecompressError.Lz4Unavailable, + .zstd => { + const window: [@min(std.compress.zstd.DecompressorOptions.default_window_buffer_len, max_size)]u8 = undefined; + const decomp = std.compress.zstd.decompressor(source, .{ .window_buffer = window }); + return decomp.read(dest); + }, } } }; diff --git a/src/table.zig b/src/table.zig deleted file mode 100644 index e3acab4..0000000 --- a/src/table.zig +++ /dev/null @@ -1,56 +0,0 @@ -const std = @import("std"); - -const Reader = @import("reader.zig").Reader; -const DecompressType = @import("decompress.zig").DecompressType; -const FileHolder = @import("readers/file_holder.zig").FileHolder; -const FileOffsetReader = @import("readers/file_holder.zig").FileOffsetReader; -const MetadataReader = @import("readers/metadata.zig").MetadataReader; - -const TableError = error{InvalidIndex}; - -/// A lazily read squashfs table. -pub fn Table( - comptime T: type, -) type { - return struct { - decomp: DecompressType, - table: []T = &[0]T{}, - offset: u64, - item_count: u32, - - pub fn init(read: *Reader, offset: u64, item_count: u32) Self { - return .{ - .decomp = read.super.decomp, - .offset = offset, - .item_count = item_count, - }; - } - pub fn deinit(self: *Self, alloc: std.mem.Allocator) void { - if (self.table.len != 0) alloc.free(self.table); - } - - pub fn getValue(self: *Self, read: *Reader, i: u64) !T { - if (i >= self.item_count) return TableError.InvalidIndex; - if (self.table.len > i) return self.table[i]; - var offset_rdr: FileOffsetReader = undefined; - var meta_rdr: MetadataReader = undefined; - var meta_buf: [8]u8 = [1]u8{0} ** 8; - const meta_offset = std.mem.bytesAsValue(u64, &meta_buf); - var to_read: u32 = 0; - while (self.table.len <= i) { - _ = try read.holder.file.preadAll(&meta_buf, self.offset); - self.offset += 8; - offset_rdr = read.holder.readerAt(meta_offset.*); - meta_rdr = .init(read.alloc, self.decomp, offset_rdr.any()); - defer meta_rdr.deinit(); - to_read = @min(self.item_count - self.table.len, comptime 8192 / @sizeOf(T)); - const alloc_size = self.table.len + to_read; - if (self.table.len != 0) read.alloc.free(self.table); - self.table = try read.alloc.alloc(T, alloc_size); - _ = try meta_rdr.any().readAll(@ptrCast(self.table[self.table.len - to_read ..])); - } - return self.table[i]; - } - const Self: type = @This(); - }; -} diff --git a/src/zig_unsquashfs.zig b/src/zig_unsquashfs.zig deleted file mode 100644 index acab20c..0000000 --- a/src/zig_unsquashfs.zig +++ /dev/null @@ -1,203 +0,0 @@ -const std = @import("std"); -const config = @import("config"); - -const File = @import("file.zig").File; -const Reader = @import("reader.zig").Reader; -const ExtractConfig = @import("file.zig").File.ExtractConfig; - -const stdout = std.io.getStdOut(); - -var extr_files: std.ArrayList([]const u8) = undefined; -var offset: u64 = 0; -var verbose: bool = false; -var unbreak: bool = false; -var deref: bool = false; -var processors: u16 = 0; -var list: ListTypes = .None; - -var filename: []const u8 = ""; -var extr_location: []const u8 = ""; - -const ListTypes = enum { - None, - List, - ListAttr, - ListNumeric, -}; - -fn help() !void { - const help_msg = - \\Basic Usage: zig-unsquashfs [Options] SQUASHFS_FILE EXTRACT_LOCATION - \\ - \\General options: - \\ -e Path to a file or directory inside the archive to extract instead of the whole archive. - \\ Can be given multiple times. - \\ -o Skip before reading from the archive. - \\ -v Verbose output. - \\ --help Prints this help message. - \\ -h Same as --help - \\ - \\Extraction options: - \\ --unbreak-symlinks Attempt extract symlink targets along with symlinks. Will not place files outside of the extraction location. - \\ -us Same as --unbreak-symlinks - \\ --deref-symlinks Replace symlink files with their target. - \\ -ds Same as --deref-symlinks - \\ -p <#> Use at most # of processors. Defaults to logical core count. - \\ - \\Listing Options: - \\ -l List files instead of extracting. When used, you do not need to specify an extraction location. - \\ -ll Like -l, but with file attributes. - \\ -lln Like -ll, but with numeric uids and gids. - \\ - \\Other: - \\ --version Print version number. - \\ - ; - _ = try stdout.writeAll(help_msg); -} - -pub fn main() !void { - var alloc: std.heap.GeneralPurposeAllocator(.{}) = .init; - extr_files = .init(alloc.allocator()); - defer extr_files.deinit(); - var args = std.process.argsWithAllocator(alloc.allocator()) catch { - _ = try stdout.writeAll("Unable allocate memory"); - return; - }; - defer args.deinit(); - _ = args.next(); - while (args.next()) |arg| { - if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) { - try help(); - return; - } else if (std.mem.eql(u8, arg, "-v")) { - verbose = true; - } else if (std.mem.eql(u8, arg, "--unbreak-symlinks") or std.mem.eql(u8, arg, "-us")) { - unbreak = true; - } else if (std.mem.eql(u8, arg, "--deref-symlinks") or std.mem.eql(u8, arg, "-ds")) { - deref = true; - } else if (std.mem.eql(u8, arg, "-l")) { - list = .List; - } else if (std.mem.eql(u8, arg, "-ll")) { - list = .ListAttr; - } else if (std.mem.eql(u8, arg, "-lln")) { - list = .ListNumeric; - } else if (std.mem.eql(u8, arg, "-e")) { - const next = args.next(); - if (next == null) { - _ = try stdout.writeAll("path required after -e\n"); - return; - } - try extr_files.append(next.?); - } else if (std.mem.eql(u8, arg, "-o")) { - const next = args.next(); - if (next == null) { - _ = try stdout.writeAll("offset required after -o\n"); - return; - } - offset = try std.fmt.parseInt(u64, next.?, 10); - } else if (std.mem.eql(u8, arg, "-p")) { - const next = args.next(); - if (next == null) { - _ = try stdout.writeAll("number required after -p\n"); - return; - } - processors = try std.fmt.parseInt(u16, next.?, 10); - } else if (std.mem.eql(u8, arg, "--version")) { - try config.version.format("", .{}, stdout.writer()); - _ = try stdout.write("\n"); - return; - } else if (filename.len == 0) { - filename = arg; - } else if (extr_location.len == 0) { - extr_location = arg; - } else { - _ = try stdout.writeAll("invalid or too many arguments\n"); - return; - } - } - if (filename.len == 0) { - _ = try stdout.writeAll("no archive given\n"); - return; - } - if (list == .None and extr_location.len == 0) { - _ = try stdout.writeAll("no extract location given\n"); - return; - } - var rdr = Reader.init( - alloc.allocator(), - filename, - offset, - ) catch |err| { - try std.fmt.format(stdout.writer(), "Error opening {s} as squashfs: {any}\n", .{ filename, err }); - return; - }; - defer rdr.deinit(); - switch (list) { - .None => { - var conf = ExtractConfig.init() catch |err| { - try std.fmt.format(stdout.writer(), "Error getting system info: {any}\n", .{err}); - return; - }; - conf.deref_sym = deref; - conf.unbreak_sym = unbreak; - conf.verbose = verbose; - if (extr_files.items.len == 0) { - rdr.root.extract(&rdr, conf, extr_location) catch |err| { - try std.fmt.format(stdout.writer(), "Error extracting archive: {any}\n", .{err}); - return; - }; - } else { - for (extr_files.items) |path| { - var fil = rdr.root.open(&rdr, path) catch |err| { - try std.fmt.format(stdout.writer(), "Error extracting {s}: {any}\n", .{ path, err }); - return; - }; - defer fil.deinit(alloc.allocator()); - fil.extract(&rdr, conf, extr_location) catch |err| { - try std.fmt.format(stdout.writer(), "Error extracting {s}: {any}\n", .{ path, err }); - return; - }; - } - } - }, - else => { - if (extr_files.items.len == 0) { - try printFile(alloc.allocator(), &rdr, &rdr.root); - } else { - for (extr_files.items) |path| { - var fil = rdr.root.open(&rdr, path) catch |err| { - try std.fmt.format(stdout.writer(), "Error finding {s}: {any}\n", .{ path, err }); - return; - }; - defer fil.deinit(alloc.allocator()); - try printFile(alloc.allocator(), &rdr, &fil); - } - } - }, - } -} - -fn printFile(alloc: std.mem.Allocator, rdr: *Reader, f: *File) anyerror!void { - const pth = try f.file_path(alloc); - defer alloc.free(pth); - if (list == .List) { - try std.fmt.format(stdout.writer(), "{s}\n", .{pth}); - if (f.isDir()) { - try printDir(alloc, rdr, f); - } - return; - } - try std.fmt.format(stdout.writer(), "{s} {d}/{d} {d} {s}\n", .{ "tmp-perm", try f.uid(rdr), try f.gid(rdr), f.size(), pth }); - if (f.isDir()) { - try printDir(alloc, rdr, f); - } -} - -fn printDir(alloc: std.mem.Allocator, rdr: *Reader, f: *File) anyerror!void { - var iter = try f.iterator(rdr); - defer iter.deinit(); - while (iter.next()) |fil| { - try printFile(alloc, rdr, fil); - } -} From 23687eabb05ff8c0ab68f57f71c401cb37d4c59a Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Fri, 11 Jul 2025 04:32:02 -0500 Subject: [PATCH 02/15] Further progress in parsing format --- src/bin/unsquashfs.zig | 133 +++++++++++++++++++++++++++++++++++++++- src/directory.zig | 70 +++++++++++++++++++++ src/fragment.zig | 7 +++ src/inode.zig | 18 ++++-- src/reader.zig | 38 ++++++++++-- src/reader/metadata.zig | 26 ++++++++ src/reader/p_read.zig | 29 +++++++++ src/reader/to_read.zig | 24 ++++++++ src/root.zig | 5 ++ src/superblock.zig | 6 +- src/table.zig | 68 ++++++++++++++++++++ 11 files changed, 412 insertions(+), 12 deletions(-) create mode 100644 src/directory.zig create mode 100644 src/fragment.zig create mode 100644 src/reader/p_read.zig create mode 100644 src/reader/to_read.zig create mode 100644 src/table.zig diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig index 66a7fbc..48d1f45 100644 --- a/src/bin/unsquashfs.zig +++ b/src/bin/unsquashfs.zig @@ -1,3 +1,132 @@ -const import = @import("std"); +const std = @import("std"); +const config = @import("config"); +const squashfs = @import("squashfs"); -pub fn main() void {} +const help_msg = + \\Basic Usage: zig-unsquashfs [Options] SQUASHFS_FILE + \\ + \\General options: + \\ -e Path to a file or directory inside the archive to extract instead of the whole archive. + \\ Can be given multiple times. + \\ -o Skip before reading from the archive. + \\ -v Verbose output. + \\ + \\Extraction options: + \\ --unbreak-symlinks Attempt extract symlink targets along with symlinks. Will not place files outside of the extraction location. + \\ -us Same as --unbreak-symlinks + \\ --deref-symlinks Replace symlink files with their target. + \\ -ds Same as --deref-symlinks + \\ -p <#> Use at most # of processors. Defaults to logical core count. + \\ + \\Listing Options: + \\ -l List files instead of extracting. When used, you do not need to specify an extraction location. + \\ -ll Similiar to -l, but with file attributes. + \\ -lln Similiar to -ll, but with numeric uids and gids. + \\ + \\Other: + \\ --help Prints this help message. + \\ -h Same as --help + \\ --version Print version number. + \\ +; + +const stdout = std.io.getStdOut(); + +var extr_files: std.ArrayList([]const u8) = undefined; +var offset: u64 = 0; +var verbose: bool = false; +var unbreak: bool = false; +var deref: bool = false; +var processors: u16 = 0; +var list: ListTypes = .None; + +var filename: []const u8 = ""; +var extr_location: []const u8 = ""; + +const ListTypes = enum { + None, + List, + ListAttr, + ListNumeric, +}; + +pub fn main() !void { + var alloc: std.heap.GeneralPurposeAllocator(.{}) = .init; + extr_files = .init(alloc.allocator()); + defer extr_files.deinit(); + var args = std.process.argsWithAllocator(alloc.allocator()) catch { + _ = try stdout.writeAll("Unable to allocate memory"); + return; + }; + defer args.deinit(); + _ = args.next(); + while (args.next()) |arg| { + if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) { + _ = try stdout.writeAll(help_msg); + return; + } else if (std.mem.eql(u8, arg, "--version")) { + try config.version.format("", .{}, stdout.writer()); + _ = try stdout.write("\n"); + return; + } else if (std.mem.eql(u8, arg, "-v")) { + verbose = true; + } else if (std.mem.eql(u8, arg, "--unbreak-symlinks") or std.mem.eql(u8, arg, "-us")) { + unbreak = true; + } else if (std.mem.eql(u8, arg, "--deref-symlinks") or std.mem.eql(u8, arg, "-ds")) { + deref = true; + } else if (std.mem.eql(u8, arg, "-l")) { + list = .List; + } else if (std.mem.eql(u8, arg, "-ll")) { + list = .ListAttr; + } else if (std.mem.eql(u8, arg, "-lln")) { + list = .ListNumeric; + } else if (std.mem.eql(u8, arg, "-e")) { + const next = args.next(); + if (next == null) { + _ = try stdout.writeAll("path required after -e\n"); + return; + } + try extr_files.append(next.?); + } else if (std.mem.eql(u8, arg, "-o")) { + const next = args.next(); + if (next == null) { + _ = try stdout.writeAll("offset required after -o\n"); + return; + } + offset = try std.fmt.parseInt(u64, next.?, 10); + } else if (std.mem.eql(u8, arg, "-p")) { + const next = args.next(); + if (next == null) { + _ = try stdout.writeAll("number required after -p\n"); + return; + } + processors = try std.fmt.parseInt(u16, next.?, 10); + } else if (filename.len == 0) { + filename = arg; + } else if (extr_location.len == 0) { + extr_location = arg; + } else { + _ = try stdout.writeAll("invalid or too many arguments\n"); + return; + } + } + if (filename.len == 0) { + _ = try stdout.writeAll("no archive given\n"); + return; + } + if (list == .None and extr_location.len == 0) { + _ = try stdout.writeAll("no extract location given\n"); + return; + } + const fil = try std.fs.cwd().openFile(filename, .{}); + var rdr = squashfs.FileReader.init( + alloc.allocator(), + fil, + offset, + ) catch |err| { + try std.fmt.format(stdout.writer(), "Error opening {s} as squashfs: {any}\n", .{ filename, err }); + return; + }; + defer rdr.deinit(); + //TODO +} diff --git a/src/directory.zig b/src/directory.zig new file mode 100644 index 0000000..36257ff --- /dev/null +++ b/src/directory.zig @@ -0,0 +1,70 @@ +const std = @import("std"); + +const InodeType = @import("inode.zig").Type; +const Compression = @import("superblock.zig").Compression; + +const Header = extern struct { //use extern instead of packed, due to bit alignment + count: u32, + block: u32, + num: u32, +}; + +const RawEntry = struct { + offset: u16, + num_offset: i16, + type: InodeType, + size: u16, + name: []const u8, + + pub fn init(alloc: std.mem.Allocator, rdr: anytype) !RawEntry { + const fixed: [8]u8 = undefined; + _ = try rdr.read(&fixed); + const size = std.mem.readInt(u16, fixed[6..8], .little); + const name = try alloc.alloc(u8, size + 1); + _ = try rdr.read(name); + return .{ + .offset = std.mem.readInt(u16, fixed[0..2], .little), + .num_offset = std.mem.readInt(i16, fixed[2..4], .little), + .type = std.mem.readInt(u16, fixed[4..6], .little), + .size = size, + .name = name, + }; + } +}; + +pub const Entry = struct { + block: u32, + offset: u16, + num: u32, + type: InodeType, + name: []const u8, + + pub fn deinit(self: Entry, alloc: std.mem.Allocator) void { + alloc.free(self.name); + } +}; + +pub fn readDirectory(alloc: std.mem.Allocator, rdr: anytype, size: u32) []Entry { + const entries: std.ArrayList(Entry) = .init(alloc); + errdefer entries.deinit(); + var cur_red: u32 = 3; // dir size includes "." & "..", so its actual size is off by 3. + var hdr: Header = undefined; + while (cur_red < size) { + _ = try rdr.read(std.mem.asBytes(&hdr)); + cur_red += 12; + try entries.ensureUnusedCapacity(hdr.count + 1); + for (0..hdr.count + 1) |_| { + const raw_ent: RawEntry = try .init(alloc, rdr); + cur_red += 9 + raw_ent.size; + errdefer alloc.free(raw_ent.name); + entries.appendAssumeCapacity(.{ + .block = hdr.block, + .offset = raw_ent.offset, + .num = hdr.num + raw_ent.num_offset, + .type = raw_ent.type, + .name = raw_ent.name, + }); + } + } + return entries.toOwnedSlice(); +} diff --git a/src/fragment.zig b/src/fragment.zig new file mode 100644 index 0000000..49e27cf --- /dev/null +++ b/src/fragment.zig @@ -0,0 +1,7 @@ +const BlockSize = @import("inode/file.zig").BlockSize; + +pub const FragEntry = packed struct { + block: u64, + size: BlockSize, + _: u32, +}; diff --git a/src/inode.zig b/src/inode.zig index 321d344..b44583d 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -4,13 +4,17 @@ const dir = @import("inode/dir.zig"); const file = @import("inode/file.zig"); const misc = @import("inode/misc.zig"); +const ToRead = @import("reader/to_read.zig").ToRead; +const Compression = @import("superblock.zig").Compression; +const MetadataReader = @import("reader/metadata.zig").MetadataReader; + pub const Ref = packed struct { offset: u16, block: u32, _: u16, }; -const Type = enum(u16) { +pub const Type = enum(u16) { dir = 1, file, symlink, @@ -27,7 +31,7 @@ const Type = enum(u16) { ext_socket, }; -const Header = packed struct { +pub const Header = packed struct { type: Type, perm: u16, uid_idx: u16, @@ -36,7 +40,7 @@ const Header = packed struct { num: u32, }; -const Data = union(enum) { +pub const Data = union(enum) { dir: dir.Dir, file: file.File, symlink: misc.Symlink, @@ -59,7 +63,6 @@ hdr: Header, data: Data, pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self { - std.debug.assert(std.meta.hasFn(@TypeOf(rdr), "read")); var hdr: Header = undefined; _ = try rdr.read(std.mem.asBytes(&hdr)); const data = switch (hdr.type) { @@ -83,3 +86,10 @@ pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self { .data = data, }; } +pub fn initFromRef(p_rdr: anytype, comp: Compression, ref: Ref, table_start: u64, alloc: std.mem.Allocator, block_size: u32) !Self { + const rdr: ToRead(@TypeOf(p_rdr)) = .init(p_rdr, ref.block + table_start); + const meta_rdr: MetadataReader(ToRead(@TypeOf(p_rdr))) = try .init(alloc, comp, rdr); + defer meta_rdr.deinit(); + try meta_rdr.skip(ref.offset); + return init(meta_rdr, alloc, block_size); +} diff --git a/src/reader.zig b/src/reader.zig index a21312a..e907949 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -1,25 +1,53 @@ const std = @import("std"); +const Inode = @import("inode.zig"); +const Table = @import("table.zig").Table; +const PRead = @import("reader/p_read.zig").PRead; +const FragEntry = @import("fragment.zig").FragEntry; const Superblock = @import("superblock.zig").Superblock; pub fn Reader(comptime T: type) type { - std.debug.assert(std.meta.hasFn(T, "pread")); + comptime std.debug.assert(std.meta.hasFn(T, "pread")); return struct { const Self = @This(); alloc: std.mem.Allocator, - rdr: T, + rdr: PRead(T), super: Superblock = undefined, + /// ID table. Can be accessed directly + id_table: Table(u32, T) = undefined, + /// Fragment table. Can be accessed directly + frag_table: Table(FragEntry, T) = undefined, + /// Export table. Each element is an inode referce. + /// If accessing directly, keep in mind, the table starts at inode 1, as such it's recommended to use the InodeAt function instead. + export_table: Table(Inode.Ref, T) = undefined, + root: ?Inode = null, - pub fn init(alloc: std.mem.Allocator, rdr: T) Self { - const out = Self{ + pub fn init(alloc: std.mem.Allocator, rdr: T, offset: u64) !Self { + var out: Self = .{ .alloc = alloc, - .rdr = rdr, + .rdr = .init(rdr, offset), }; _ = try rdr.pread(std.mem.asBytes(&out.super), 0); + out.frag_table = .init(alloc, rdr, out.super.frag_start, out.super.frag_count); + out.id_table = .init(alloc, rdr, out.super.id_start, out.super.id_count); + out.export_table = .init(alloc, rdr, out.super.export_start, out.super.inode_count); return out; } + pub fn deinit(self: *Self) void { + self.id_table.deinit(); + self.frag_table.deinit(); + self.export_table.deinit(); + } + + /// Returns the inode with the given Inode Number. + /// Requires for the archive to have an export table. + pub fn InodeAt(self: Self, num: u32) !Inode { + const ref = try self.export_table.get(num - 1); + _ = ref; + return error{TODO}.TODO; + } }; } diff --git a/src/reader/metadata.zig b/src/reader/metadata.zig index f468717..66489fc 100644 --- a/src/reader/metadata.zig +++ b/src/reader/metadata.zig @@ -8,6 +8,7 @@ const MetaHeader = packed struct { }; pub fn MetadataReader(comptime T: type) type { + comptime std.debug.assert(std.meta.hasFn(T, "read")); return struct { const Self = @This(); @@ -38,6 +39,31 @@ pub fn MetadataReader(comptime T: type) type { std.io.limitedReader(self.rdr, hdr.size), self.block, ); + self.block_offset = 0; + } + + pub fn skip(self: *Self, offset: u32) !void { + var skipped = 0; + var to_skip = 0; + while (skipped < offset) { + if (self.block_offset >= self.block_size) try self.readNextBlock(); + to_skip = @min(self.block_size - self.block_offset, offset - skipped); + self.block_offset += to_skip; + skipped += to_skip; + } + } + + pub fn read(self: *Self, buf: []u8) !usize { + var cur_red: usize = 0; + var to_read: usize = 0; + while (cur_red < buf.len) { + if (self.block_offset >= self.block_size) try self.readNextBlock(); + to_read = @min(buf.len - cur_red, self.block_size - self.block_offset); + @memcpy(buf[cur_red .. cur_red + to_read], self.block[self.block_offset .. self.block_offset + to_read]); + cur_red += to_read; + self.block_offset += to_read; + } + return cur_red; } }; } diff --git a/src/reader/p_read.zig b/src/reader/p_read.zig new file mode 100644 index 0000000..ca9e476 --- /dev/null +++ b/src/reader/p_read.zig @@ -0,0 +1,29 @@ +const std = @import("std"); + +const ToRead = @import("to_read.zig").ToRead; + +/// A simple wrapper around a type with the pread([]u8, u64) function. +/// Provides a couple useful utility functions. +pub fn PRead(comptime T: type) type { + comptime std.debug.assert(std.meta.hasFn(T, "pread")); + return struct { + const Self = @This(); + + rdr: T, + offset: u64, + + pub fn init(rdr: T, offset: u64) Self { + return .{ + .rdr = rdr, + .offset = offset, + }; + } + + pub fn pread(self: Self, buf: []u8, offset: u64) !usize { + return self.rdr.pread(buf, self.offset + offset); + } + pub fn readerAt(self: Self, offset: u64) ToRead(T) { + return .init(self.rdr, self.offset + offset); + } + }; +} diff --git a/src/reader/to_read.zig b/src/reader/to_read.zig new file mode 100644 index 0000000..a311b4d --- /dev/null +++ b/src/reader/to_read.zig @@ -0,0 +1,24 @@ +const std = @import("std"); + +pub fn ToRead(comptime T: type) type { + comptime std.debug.assert(std.meta.hasFn(T, "pread")); + return struct { + const Self = @This(); + + rdr: T, + offset: u64, + + pub fn init(rdr: T, init_offset: u64) Self { + return .{ + .rdr = rdr, + .offset = init_offset, + }; + } + + pub fn read(self: *Self, buf: []u8) !usize { + const red = try self.rdr.pread(buf, self.offset); + self.offset += red; + return red; + } + }; +} diff --git a/src/root.zig b/src/root.zig index e69de29..c9d80e3 100644 --- a/src/root.zig +++ b/src/root.zig @@ -0,0 +1,5 @@ +const std = @import("std"); + +pub const Reader = @import("reader.zig").Reader; + +pub const FileReader = Reader(std.fs.File); diff --git a/src/superblock.zig b/src/superblock.zig index c17db3c..fea3886 100644 --- a/src/superblock.zig +++ b/src/superblock.zig @@ -1,5 +1,7 @@ const std = @import("std"); +const InodeRef = @import("inode.zig").Ref; + pub const Superblock = packed struct { magic: u32, inode_count: u32, @@ -26,7 +28,7 @@ pub const Superblock = packed struct { id_count: u16, ver_maj: u16, ver_min: u16, - root_ref: u64, + root_ref: InodeRef, size: u64, id_start: u64, xattr_start: u64, @@ -57,11 +59,13 @@ pub const Compression = enum(u16) { }, .lzma => { const decomp = try std.compress.lzma.decompress(alloc, source); + defer decomp.deinit(); return decomp.read(dest); }, .lzo => return DecompressError.LzoUnavailable, .xz => { const decomp = try std.compress.xz.decompress(alloc, source); + defer decomp.deinit(); return decomp.read(dest); }, .lz4 => return DecompressError.Lz4Unavailable, diff --git a/src/table.zig b/src/table.zig new file mode 100644 index 0000000..9e10ec4 --- /dev/null +++ b/src/table.zig @@ -0,0 +1,68 @@ +const std = @import("std"); + +const TableError = error{ + InvalidIndex, +}; + +pub fn Table(comptime T: type, comptime R: type) type { + comptime std.debug.assert(std.meta.hasFn(R, "pread")); + return struct { + const Self = @This(); + + alloc: std.mem.Allocator, + rdr: R, + + offset: u64, + table_count: u32, + mut: std.Thread.RwLock = .{}, + + table: []T = &[0]T{}, + + pub fn init(alloc: std.mem.Allocator, rdr: R, offset: u64, table_count: u32) Self { + return .{ + .alloc = alloc, + .rdr = rdr, + .offset = offset, + .table_count = table_count, + }; + } + pub fn deinit(self: Self) void { + self.alloc.free(self.table); + } + + fn resize(self: *Self, to_add: usize) !void { + if (!self.alloc.resize(self.table, self.table.len + to_add)) { + const new_table = try self.alloc.alloc(T, self.table.len + to_add); + @memcpy(new_table[0..self.table.len], self.table); + self.alloc.free(self.table); + self.table = new_table; + } + } + + pub fn get(self: *Self, idx: u32) !T { + if (idx >= self.table_count) return TableError.InvalidIndex; + self.mut.lockShared(); + defer self.mut.unlockShared(); + if (idx >= self.table.len) { + return self.getAndFill(idx); + } + return self.table[idx]; + } + fn getAndFill(self: *Self, idx: u32) !T { + self.mut.unlockShared(); + defer self.mut.lockShared(); + self.mut.lock(); + defer self.mut.unlock(); + var to_read: usize = 0; + var offset: u64 = 0; + while (idx >= self.table.len) { + to_read = @min(self.table_count - self.table.len, comptime 8192 / @sizeOf(T)); + try self.resize(to_read); + _ = try self.rdr.pread(std.mem.asBytes(&offset), self.offset); + self.offset += 8; + _ = try self.rdr.pread(std.mem.sliceAsBytes(self.table[self.table.len - to_read ..]), offset); + } + return self.table[idx]; + } + }; +} From 61c86c9feaf82f4e0be8f071879832097aa14354 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Fri, 11 Jul 2025 06:45:51 -0500 Subject: [PATCH 03/15] Laying down some framework --- src/extract_options.zig | 6 ++++++ src/file.zig | 3 +++ src/root.zig | 1 + 3 files changed, 10 insertions(+) create mode 100644 src/extract_options.zig create mode 100644 src/file.zig diff --git a/src/extract_options.zig b/src/extract_options.zig new file mode 100644 index 0000000..3857943 --- /dev/null +++ b/src/extract_options.zig @@ -0,0 +1,6 @@ +dereference_symlinks: bool, +unbreak_symlinks: bool, +ignore_permissions: bool, + +processor_count: u16, +// max_memory: u64, diff --git a/src/file.zig b/src/file.zig new file mode 100644 index 0000000..5f96003 --- /dev/null +++ b/src/file.zig @@ -0,0 +1,3 @@ +pub fn File(comptime T: type) type { + return struct {}; +} diff --git a/src/root.zig b/src/root.zig index c9d80e3..3fd6e81 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,5 +1,6 @@ const std = @import("std"); pub const Reader = @import("reader.zig").Reader; +pub const ExtractionOptions = @import("extract_options.zig"); pub const FileReader = Reader(std.fs.File); From 69d90242bac89cbe219c25ac4d849d8f14e5c866 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Sat, 12 Jul 2025 02:13:41 -0500 Subject: [PATCH 04/15] Further progress Mainly focusing on getting things more consistent --- src/extract_options.zig | 26 +++++++++++-- src/file.zig | 83 ++++++++++++++++++++++++++++++++++++++++- src/inode.zig | 15 +++++--- src/reader.zig | 23 +++++++++--- src/reader/data.zig | 55 +++++++++++++++++++++++++++ src/reader/metadata.zig | 23 ++++++++---- src/root.zig | 4 +- src/superblock.zig | 2 +- src/table.zig | 2 +- 9 files changed, 206 insertions(+), 27 deletions(-) create mode 100644 src/reader/data.zig diff --git a/src/extract_options.zig b/src/extract_options.zig index 3857943..4658940 100644 --- a/src/extract_options.zig +++ b/src/extract_options.zig @@ -1,6 +1,24 @@ -dereference_symlinks: bool, -unbreak_symlinks: bool, -ignore_permissions: bool, +const std = @import("std"); + +const Self = @This(); + +/// Replace symlinks with their targets +dereference_symlinks: bool = false, +/// Always extract a symlink's target if it's part of the archive. +/// May result in the symlink's target being changed. +unbreak_symlinks: bool = false, +/// Do not set file's permissions & owner when extracted. +ignore_permissions: bool = false, -processor_count: u16, // max_memory: u64, + +pol: std.Thread.Pool = undefined, + +pub fn init(alloc: std.mem.Allocator, thread_count: u16) !Self { + var out: Self = .{}; + out.pol.init(.{ + .allocator = alloc, + .n_jobs = thread_count, + }); + return out; +} diff --git a/src/file.zig b/src/file.zig index 5f96003..1dc4990 100644 --- a/src/file.zig +++ b/src/file.zig @@ -1,3 +1,84 @@ +const std = @import("std"); + +const dir = @import("directory.zig"); + +const DirEntry = dir.Entry; +const Inode = @import("inode.zig"); +const SfsReader = @import("reader.zig").SfsReader; +const ToReader = @import("reader/to_read.zig").ToRead; +const ExtractionOptions = @import("extract_options.zig"); +const Compression = @import("superblock.zig").Compression; +const MetadataReader = @import("reader/metadata.zig").MetadataReader; + +pub const FileError = error{ + NotRegular, + NotDirectory, +}; + pub fn File(comptime T: type) type { - return struct {}; + return struct { + const Self = @This(); + + rdr: *SfsReader(T), + + inode: Inode, + name: []const u8, + + /// Directory entries. Only populated on directories. + entries: ?[]DirEntry = null, + /// File reader. Only populated on regular files. + // data_reader: ?DataReader + + pub fn init(rdr: *SfsReader(T), inode: Inode, name: []const u8) !Self { + var out = Self{ + .rdr = rdr, + .inode = inode, + .name = name, + }; + switch (inode.data) { + .dir => |d| { + const meta = MetadataReader(T).init( + rdr.alloc, + rdr.super.comp, + rdr.rdr, + d.block + rdr.super.dir_start, + ); + try meta.skip(d.offset); + out.entries = try dir.readDirectory(rdr.alloc, meta, d.size); + }, + .ext_dir => |d| { + const meta = MetadataReader(T).init( + rdr.alloc, + rdr.super.comp, + rdr.rdr, + d.block + rdr.super.dir_start, + ); + try meta.skip(d.offset); + out.entries = try dir.readDirectory(rdr.alloc, meta, d.size); + }, + .file => |f| { + _ = f; + //TODO + }, + .ext_file => |f| { + _ = f; + //TODO + }, + } + return out; + } + pub fn deinit(self: Self) void { + self.rdr.alloc.free(self.name); + self.inode.deinit(self.rdr.alloc); + if (self.entries != null) { + for (self.entries.?) |e| { + e.deinit(self.rdr.alloc); + } + self.rdr.alloc.free(self.entries.?); + } + // if(self.data_reader != null){ + // self.data_reader.?.deinit(); + // } + } + }; } diff --git a/src/inode.zig b/src/inode.zig index b44583d..b36ceca 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -4,6 +4,8 @@ const dir = @import("inode/dir.zig"); const file = @import("inode/file.zig"); const misc = @import("inode/misc.zig"); +const Reader = @import("reader.zig"); +const DirEntry = @import("directory.zig").Entry; const ToRead = @import("reader/to_read.zig").ToRead; const Compression = @import("superblock.zig").Compression; const MetadataReader = @import("reader/metadata.zig").MetadataReader; @@ -86,10 +88,11 @@ pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self { .data = data, }; } -pub fn initFromRef(p_rdr: anytype, comp: Compression, ref: Ref, table_start: u64, alloc: std.mem.Allocator, block_size: u32) !Self { - const rdr: ToRead(@TypeOf(p_rdr)) = .init(p_rdr, ref.block + table_start); - const meta_rdr: MetadataReader(ToRead(@TypeOf(p_rdr))) = try .init(alloc, comp, rdr); - defer meta_rdr.deinit(); - try meta_rdr.skip(ref.offset); - return init(meta_rdr, alloc, block_size); +pub fn deinit(self: Self, alloc: std.mem.Allocator) void { + switch (self.data) { + .file => |f| alloc.free(f.block_sizes), + .ext_file => |f| alloc.free(f.block_sizes), + .symlink => |s| alloc.free(s.target), + .ext_symlink => |s| alloc.free(s.target), + } } diff --git a/src/reader.zig b/src/reader.zig index e907949..9328de8 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -5,8 +5,13 @@ const Table = @import("table.zig").Table; const PRead = @import("reader/p_read.zig").PRead; const FragEntry = @import("fragment.zig").FragEntry; const Superblock = @import("superblock.zig").Superblock; +const MetadataReader = @import("reader/metadata.zig").MetadataReader; -pub fn Reader(comptime T: type) type { +pub const SfsError = error{ + NotExportable, +}; + +pub fn SfsReader(comptime T: type) type { comptime std.debug.assert(std.meta.hasFn(T, "pread")); return struct { @@ -40,14 +45,22 @@ pub fn Reader(comptime T: type) type { self.id_table.deinit(); self.frag_table.deinit(); self.export_table.deinit(); + // if (self.root != null) self.root.?.deinit(); } /// Returns the inode with the given Inode Number. - /// Requires for the archive to have an export table. - pub fn InodeAt(self: Self, num: u32) !Inode { + /// Requires the archive to have an export table. + pub fn inodeAt(self: Self, num: u32) !Inode { + if (!self.super.flags.has_export) return SfsError.NotExportable; const ref = try self.export_table.get(num - 1); - _ = ref; - return error{TODO}.TODO; + const meta = MetadataReader(T).init( + self.alloc, + self.super.comp, + self.rdr, + self.super.inode_start + ref.block, + ); + try meta.skip(ref.offset); + return .init(meta, self.alloc, self.super.block_size); } }; } diff --git a/src/reader/data.zig b/src/reader/data.zig new file mode 100644 index 0000000..258decd --- /dev/null +++ b/src/reader/data.zig @@ -0,0 +1,55 @@ +const std = @import("std"); + +const PRead = @import("p_read.zig").Pread; +const FragEntry = @import("../fragment.zig").FragEntry; +const BlockSize = @import("../inode/file.zig").BlockSize; +const Compression = @import("../superblock.zig").Compression; + +pub fn DataReader(comptime T: type) type { + return struct { + const Self = @This(); + + alloc: std.mem.Allocator, + + rdr: PRead(T), + comp: Compression, + offsets: []BlockSize, + + file_size: u64, + block_size: u32, + sizes: []BlockSize, + + frag: []u8 = undefined, + + pub fn init( + alloc: std.mem.Allocator, + rdr: PRead(T), + comp: Compression, + init_offset: u64, + file_size: u64, + sizes: []BlockSize, + block_size: u32, + ) !Self { + var cur_offset = init_offset; + const offsets = alloc.alloc(u64, sizes.len); + for (0..sizes.len) |i| { + offsets[i] = cur_offset; + cur_offset += sizes[i].size; + } + return .{ + .alloc = alloc, + .rdr = rdr, + .comp = comp, + .offsets = offsets, + .file_size = file_size, + .block_size = block_size, + .sizes = sizes, + }; + } + + pub fn addFragment(self: *Self, entry: FragEntry, offset: u32) void { + self.frag = self.alloc.alloc(u8, self.file_size % self.block_size); + //TODO: + } + }; +} diff --git a/src/reader/metadata.zig b/src/reader/metadata.zig index 66489fc..ee9f8a5 100644 --- a/src/reader/metadata.zig +++ b/src/reader/metadata.zig @@ -1,5 +1,6 @@ const std = @import("std"); +const PRead = @import("p_read.zig").PRead; const Compression = @import("../superblock.zig").Compression; const MetaHeader = packed struct { @@ -14,36 +15,44 @@ pub fn MetadataReader(comptime T: type) type { alloc: std.mem.Allocator, comp: Compression, - rdr: T, + rdr: PRead(T), + offset: u64, block: [8192]u8 = undefined, block_size: usize = 0, block_offset: u32 = 0, - pub fn init(alloc: std.mem.Allocator, comp: Compression, rdr: T) !Self { - var out: Self = .{ + pub fn init(alloc: std.mem.Allocator, comp: Compression, rdr: PRead(T), offset: u64) Self { + return .{ .alloc = alloc, .comp = comp, .rdr = rdr, + .offset = offset, }; - try out.readNextBlock(); - return out; } fn readNextBlock(self: *Self) !void { const hdr: MetaHeader = undefined; - _ = try self.rdr.read(std.mem.asBytes(hdr)); + _ = try self.rdr.pread(std.mem.asBytes(hdr), self.offset); + self.offset += 2; self.block_size = try self.comp.decompress( 8192, self.alloc, - std.io.limitedReader(self.rdr, hdr.size), + std.io.limitedReader(self.rdr.readerAt(self.offset), hdr.size), self.block, ); + self.offset += hdr.size; self.block_offset = 0; } pub fn skip(self: *Self, offset: u32) !void { var skipped = 0; + const hdr: MetaHeader = undefined; + while (offset - skipped >= 8192) { + _ = try self.rdr.pread(std.mem.asBytes(hdr), self.offset); + self.offset += 2 + hdr.size; + skipped += 8192; + } var to_skip = 0; while (skipped < offset) { if (self.block_offset >= self.block_size) try self.readNextBlock(); diff --git a/src/root.zig b/src/root.zig index 3fd6e81..fc291d4 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,6 +1,6 @@ const std = @import("std"); -pub const Reader = @import("reader.zig").Reader; +pub const SfsReader = @import("reader.zig").SfsReader; pub const ExtractionOptions = @import("extract_options.zig"); -pub const FileReader = Reader(std.fs.File); +pub const FileReader = SfsReader(std.fs.File); diff --git a/src/superblock.zig b/src/superblock.zig index fea3886..f20bddc 100644 --- a/src/superblock.zig +++ b/src/superblock.zig @@ -38,7 +38,7 @@ pub const Superblock = packed struct { export_start: u64, }; -const DecompressError = error{ +pub const DecompressError = error{ LzoUnavailable, Lz4Unavailable, }; diff --git a/src/table.zig b/src/table.zig index 9e10ec4..2f857d0 100644 --- a/src/table.zig +++ b/src/table.zig @@ -1,6 +1,6 @@ const std = @import("std"); -const TableError = error{ +pub const TableError = error{ InvalidIndex, }; From b0dced90bcdb210327b4baaf48e96c1dabfae252 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Mon, 14 Jul 2025 01:40:04 -0500 Subject: [PATCH 05/15] MORE PROGRESS --- src/inode.zig | 1 + src/reader.zig | 29 +++++++++++++- src/reader/data.zig | 85 +++++++++++++++++++++++++++++++++++++---- src/reader/metadata.zig | 3 +- src/superblock.zig | 4 +- 5 files changed, 109 insertions(+), 13 deletions(-) diff --git a/src/inode.zig b/src/inode.zig index b36ceca..e959a4f 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -94,5 +94,6 @@ pub fn deinit(self: Self, alloc: std.mem.Allocator) void { .ext_file => |f| alloc.free(f.block_sizes), .symlink => |s| alloc.free(s.target), .ext_symlink => |s| alloc.free(s.target), + else => {}, } } diff --git a/src/reader.zig b/src/reader.zig index 9328de8..4f982c4 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Inode = @import("inode.zig"); +const File = @import("file.zig").File; const Table = @import("table.zig").Table; const PRead = @import("reader/p_read.zig").PRead; const FragEntry = @import("fragment.zig").FragEntry; @@ -28,7 +29,7 @@ pub fn SfsReader(comptime T: type) type { /// Export table. Each element is an inode referce. /// If accessing directly, keep in mind, the table starts at inode 1, as such it's recommended to use the InodeAt function instead. export_table: Table(Inode.Ref, T) = undefined, - root: ?Inode = null, + root: ?File(T) = null, pub fn init(alloc: std.mem.Allocator, rdr: T, offset: u64) !Self { var out: Self = .{ @@ -45,7 +46,31 @@ pub fn SfsReader(comptime T: type) type { self.id_table.deinit(); self.frag_table.deinit(); self.export_table.deinit(); - // if (self.root != null) self.root.?.deinit(); + if (self.root != null) self.root.?.deinit(); + } + + fn populateRoot(self: *Self) !void { + if (self.root != null) return; + const meta = MetadataReader(T).init( + self.alloc, + self.super.comp, + self.rdr, + self.super.inode_start + self.super.root_ref.block, + ); + try meta.skip(self.super.root_ref.offset); + const root_inode: Inode = try .init(meta, self.alloc, self.super.block_size); + self.root = try .init(self, root_inode, ""); + } + + pub fn archiveRoot(self: *Self) !File { + if (self.root == null) try self.populateRoot(); + return self.root.?; + } + pub fn open(self: *Self, path: []const u8) !File { + if (self.root == null) try self.populateRoot(); + _ = path; + // return self.root.?.open(path); + return error{TODO}.TODO; } /// Returns the inode with the given Inode Number. diff --git a/src/reader/data.zig b/src/reader/data.zig index 258decd..d71d924 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -1,6 +1,6 @@ const std = @import("std"); -const PRead = @import("p_read.zig").Pread; +const PRead = @import("p_read.zig").PRead; const FragEntry = @import("../fragment.zig").FragEntry; const BlockSize = @import("../inode/file.zig").BlockSize; const Compression = @import("../superblock.zig").Compression; @@ -10,16 +10,21 @@ pub fn DataReader(comptime T: type) type { const Self = @This(); alloc: std.mem.Allocator, + pool: ?*std.Thread.Pool = null, rdr: PRead(T), comp: Compression, - offsets: []BlockSize, + offsets: []u64, file_size: u64, block_size: u32, sizes: []BlockSize, - frag: []u8 = undefined, + frag: []u8 = &[0]u8{}, + + read_block: []u8, + read_offset: u64, + read_idx: u32 = 0, pub fn init( alloc: std.mem.Allocator, @@ -46,10 +51,76 @@ pub fn DataReader(comptime T: type) type { .sizes = sizes, }; } - - pub fn addFragment(self: *Self, entry: FragEntry, offset: u32) void { - self.frag = self.alloc.alloc(u8, self.file_size % self.block_size); - //TODO: + pub fn deinit(self: Self) void { + self.alloc.free(self.offsets); + self.alloc.free(self.frag); } + + pub fn addFragment(self: *Self, entry: FragEntry, offset: u32) !void { + self.frag = try self.alloc.alloc(u8, self.file_size % self.block_size); + if (entry.size.size == 0) { + @memset(self.frag, 0); + return; + } else if (entry.size.uncompressed) { + _ = try self.rdr.pread(self.frag, entry.block + offset); + return; + } + const block = try self.alloc.alloc(u8, offset + self.frag.len); + defer self.alloc.free(block); + _ = try self.comp.decompress( + self.alloc, + std.io.limitedReader( + self.rdr.readerAt(entry.block), + entry.size.size, + ), + block, + ); + @memcpy(self.frag, block[offset..]); + } + pub fn setPool(self: *Self, pool: *std.Thread.Pool) void { + self.pool = pool; + } + + fn blockAt(self: Self, idx: u32) ![]u8 { + const size = if (idx == self.sizes.len - 1 and self.frag.len == 0) { + self.file_size % self.block_size; + } else { + self.block_size; + }; + const block = try self.alloc.alloc(u8, size); + errdefer self.alloc.free(block); + if (self.sizes[idx].size == 0) { + @memset(block, 0); + return block; + } else if (self.sizes[idx].uncompressed) { + _ = try self.rdr.pread(block, self.offsets[idx]); + return block; + } + _ = try self.comp.decompress( + self.alloc, + std.io.limitedReader( + self.rdr.readerAt(self.offsets[idx]), + self.sizes[idx].size, + ), + block, + ); + return block; + } + + pub fn read(self: *Self, buf: []u8) !usize { + var cur_red: usize = 0; + while (cur_red < buf.len) { + if (self.read_offset >= self.read_block.len) { + //TODO: + } + //TODO: + } + return cur_red; + } + + /// Write the entire file's contents to the writer. + /// If availble, pwrite will be used. + /// If a thread pool is not set via setPool, one is created based on cpu thread count. + pub fn writeTo(self: Self, writer: anytype) !usize {} }; } diff --git a/src/reader/metadata.zig b/src/reader/metadata.zig index ee9f8a5..054a032 100644 --- a/src/reader/metadata.zig +++ b/src/reader/metadata.zig @@ -36,10 +36,9 @@ pub fn MetadataReader(comptime T: type) type { _ = try self.rdr.pread(std.mem.asBytes(hdr), self.offset); self.offset += 2; self.block_size = try self.comp.decompress( - 8192, self.alloc, std.io.limitedReader(self.rdr.readerAt(self.offset), hdr.size), - self.block, + &self.block, ); self.offset += hdr.size; self.block_offset = 0; diff --git a/src/superblock.zig b/src/superblock.zig index f20bddc..48ad5b5 100644 --- a/src/superblock.zig +++ b/src/superblock.zig @@ -51,7 +51,7 @@ pub const Compression = enum(u16) { lz4, zstd, - pub fn decompress(self: Compression, comptime max_size: u16, alloc: std.mem.Allocator, source: anytype, dest: *[max_size]u8) !usize { + pub fn decompress(self: Compression, alloc: std.mem.Allocator, source: anytype, dest: []u8) !usize { switch (self) { .gzip => { const decomp = std.compress.zlib.decompressor(source); @@ -70,7 +70,7 @@ pub const Compression = enum(u16) { }, .lz4 => return DecompressError.Lz4Unavailable, .zstd => { - const window: [@min(std.compress.zstd.DecompressorOptions.default_window_buffer_len, max_size)]u8 = undefined; + const window: [@min(std.compress.zstd.DecompressorOptions.default_window_buffer_len, dest.len)]u8 = undefined; const decomp = std.compress.zstd.decompressor(source, .{ .window_buffer = window }); return decomp.read(dest); }, From 87563e43a51908b659c75b6b1c4ce7ad38d95020 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Thu, 17 Jul 2025 02:24:57 -0500 Subject: [PATCH 06/15] Finished (?) data reader --- src/reader/data.zig | 131 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 127 insertions(+), 4 deletions(-) diff --git a/src/reader/data.zig b/src/reader/data.zig index d71d924..2d67237 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -5,6 +5,11 @@ const FragEntry = @import("../fragment.zig").FragEntry; const BlockSize = @import("../inode/file.zig").BlockSize; const Compression = @import("../superblock.zig").Compression; +const DataReaderError = error{ + EOF, + ThreadPoolNotSet, +}; + pub fn DataReader(comptime T: type) type { return struct { const Self = @This(); @@ -77,11 +82,14 @@ pub fn DataReader(comptime T: type) type { ); @memcpy(self.frag, block[offset..]); } + pub fn setPool(self: *Self, pool: *std.Thread.Pool) void { self.pool = pool; } fn blockAt(self: Self, idx: u32) ![]u8 { + if (self.frag.len > 0 and idx == self.sizes.len) return self.frag; + if (idx >= self.sizes.len) return DataReaderError.InvalidIndex; const size = if (idx == self.sizes.len - 1 and self.frag.len == 0) { self.file_size % self.block_size; } else { @@ -107,20 +115,135 @@ pub fn DataReader(comptime T: type) type { return block; } + fn numBlocks(self: Self) usize { + var out = self.sizes.len; + if (self.frag.len > 0) out += 1; + return out; + } + pub fn read(self: *Self, buf: []u8) !usize { var cur_red: usize = 0; + var to_read: usize = 0; while (cur_red < buf.len) { if (self.read_offset >= self.read_block.len) { - //TODO: + if (self.read_idx == self.sizes.len or (self.frag.len == 0 and self.read_idx == self.sizes.len - 1)) { + self.block_size = self.file_size % self.block_size; + } + self.read_block = self.blockAt(self.read_idx) catch |err| { + if (err == DataReaderError.EOF) return cur_red; + return err; + }; + self.read_idx += 1; } - //TODO: + to_read = @min(buf.len - cur_red, self.block_size - self.read_offset); + @memcpy(buf[cur_red .. cur_red + to_read], self.read_block[self.read_offset .. self.read_offset + to_read]); + cur_red += to_read; + self.read_offset += to_read; } return cur_red; } /// Write the entire file's contents to the writer. /// If availble, pwrite will be used. - /// If a thread pool is not set via setPool, one is created based on cpu thread count. - pub fn writeTo(self: Self, writer: anytype) !usize {} + pub fn writeTo(self: Self, writer: anytype) !usize { + if (comptime self.pool == null) return DataReaderError.ThreadPoolNotSet; + const mut: std.Thread.Mutex = .{}; + var cur_idx: usize = 0; + const wg: std.Thread.WaitGroup = .{}; + const completed = comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { + std.ArrayList(anyerror).init(self.alloc); + } else { + std.AutoArrayHashMap(usize, anyerror![]u8).init(self.alloc); + }; + defer completed.deinit(); + for (0..self.numBlocks()) |i| { + wg.start(); + self.pool.?.spawn( + comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { + extractThreadedPWrite; + } else { + extractThreaded; + }, + comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { + .{ self, &wg, &completed, i, writer }; + } else { + .{ self, &mut, &cur_idx, &wg, &completed, i, writer }; + }, + ); + } + wg.wait(); + if (completed.items.len > 0) { + return completed.items.get(0); + } + return self.file_size; + } + fn extractThreaded( + self: Self, + mut: *std.Thread.Mutex, + cur_idx: *usize, + wg: *std.Thread.WaitGroup, + completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + idx: usize, + writer: anytype, + ) void { + if (cur_idx.* >= self.sizes.len + 1) return; + defer wg.finish(); + const block = self.blockAt(idx) catch |err| { + cur_idx.* = self.sizes.len + 1; + completed.put(idx, err) catch {}; + return; + }; + defer if (idx < self.sizes.len) { + self.alloc.free(block); + }; + mut.lock(); + defer mut.unlock(); + if (cur_idx.* == idx) { + _ = writer.write(block) catch |err| { + cur_idx.* = self.sizes.len + 1; + completed.put(idx, err) catch {}; + return; + }; + } else { + completed.put(idx, block) catch |err| { + cur_idx.* = self.sizes.len + 1; + completed.put(idx, err) catch {}; + return; + }; + } + if (completed.count() == 0) return; + for (cur_idx.*..self.numBlocks()) |i| { + const val = completed.get(i); + if (val == null) return; + _ = writer.write(block) catch |err| { + cur_idx.* = self.sizes.len + 1; + completed.put(i, err) catch {}; + return; + }; + cur_idx.* += 1; + if (completed.count() == 0) return; + } + } + fn extractThreadedPWrite( + self: Self, + wg: *std.Thread.WaitGroup, + completed: *std.ArrayList(anyerror), + idx: usize, + writer: anytype, + ) void { + if (completed.items.len > 0) return; + defer wg.finish(); + const block = self.blockAt(idx) catch |err| { + completed.append(err) catch {}; + return; + }; + defer if (idx < self.sizes.len) { + self.alloc.free(block); + }; + _ = writer.pwrite(idx * self.block_size, block) catch |err| { + completed.append(err) catch {}; + return; + }; + } }; } From d6b136bc8f767313f242c797e9d644a16fe70963 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Thu, 17 Jul 2025 03:50:09 -0500 Subject: [PATCH 07/15] Added basic test Fixed various bugs --- src/bin/unsquashfs.zig | 2 +- src/directory.zig | 10 ++++---- src/file.zig | 56 ++++++++++++++++++++++++++++++++--------- src/inode.zig | 36 +++++++++++--------------- src/inode/dir.zig | 8 +++--- src/inode/file.zig | 22 ++++++++++------ src/inode/misc.zig | 12 ++++----- src/reader.zig | 22 ++++++---------- src/reader/data.zig | 16 ++++-------- src/reader/metadata.zig | 16 ++++++------ src/reader/to_read.zig | 18 +++++++++++++ src/root.zig | 14 ++++++++++- src/superblock.zig | 10 ++++---- 13 files changed, 145 insertions(+), 97 deletions(-) diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig index 48d1f45..ceca2e0 100644 --- a/src/bin/unsquashfs.zig +++ b/src/bin/unsquashfs.zig @@ -119,7 +119,7 @@ pub fn main() !void { return; } const fil = try std.fs.cwd().openFile(filename, .{}); - var rdr = squashfs.FileReader.init( + var rdr = squashfs.SfsFile.init( alloc.allocator(), fil, offset, diff --git a/src/directory.zig b/src/directory.zig index 36257ff..0c52b07 100644 --- a/src/directory.zig +++ b/src/directory.zig @@ -17,7 +17,7 @@ const RawEntry = struct { name: []const u8, pub fn init(alloc: std.mem.Allocator, rdr: anytype) !RawEntry { - const fixed: [8]u8 = undefined; + var fixed: [8]u8 = undefined; _ = try rdr.read(&fixed); const size = std.mem.readInt(u16, fixed[6..8], .little); const name = try alloc.alloc(u8, size + 1); @@ -25,7 +25,7 @@ const RawEntry = struct { return .{ .offset = std.mem.readInt(u16, fixed[0..2], .little), .num_offset = std.mem.readInt(i16, fixed[2..4], .little), - .type = std.mem.readInt(u16, fixed[4..6], .little), + .type = @enumFromInt(std.mem.readInt(u16, fixed[4..6], .little)), .size = size, .name = name, }; @@ -44,8 +44,8 @@ pub const Entry = struct { } }; -pub fn readDirectory(alloc: std.mem.Allocator, rdr: anytype, size: u32) []Entry { - const entries: std.ArrayList(Entry) = .init(alloc); +pub fn readDirectory(alloc: std.mem.Allocator, rdr: anytype, size: u32) ![]Entry { + var entries: std.ArrayList(Entry) = .init(alloc); errdefer entries.deinit(); var cur_red: u32 = 3; // dir size includes "." & "..", so its actual size is off by 3. var hdr: Header = undefined; @@ -60,7 +60,7 @@ pub fn readDirectory(alloc: std.mem.Allocator, rdr: anytype, size: u32) []Entry entries.appendAssumeCapacity(.{ .block = hdr.block, .offset = raw_ent.offset, - .num = hdr.num + raw_ent.num_offset, + .num = @truncate(@abs(@as(i64, hdr.num) + raw_ent.num_offset)), .type = raw_ent.type, .name = raw_ent.name, }); diff --git a/src/file.zig b/src/file.zig index 1dc4990..da5c0ab 100644 --- a/src/file.zig +++ b/src/file.zig @@ -7,6 +7,7 @@ const Inode = @import("inode.zig"); const SfsReader = @import("reader.zig").SfsReader; const ToReader = @import("reader/to_read.zig").ToRead; const ExtractionOptions = @import("extract_options.zig"); +const DataReader = @import("reader/data.zig").DataReader; const Compression = @import("superblock.zig").Compression; const MetadataReader = @import("reader/metadata.zig").MetadataReader; @@ -27,7 +28,7 @@ pub fn File(comptime T: type) type { /// Directory entries. Only populated on directories. entries: ?[]DirEntry = null, /// File reader. Only populated on regular files. - // data_reader: ?DataReader + data_reader: ?DataReader(T) = null, pub fn init(rdr: *SfsReader(T), inode: Inode, name: []const u8) !Self { var out = Self{ @@ -37,33 +38,60 @@ pub fn File(comptime T: type) type { }; switch (inode.data) { .dir => |d| { - const meta = MetadataReader(T).init( + var meta = MetadataReader(T).init( rdr.alloc, rdr.super.comp, rdr.rdr, d.block + rdr.super.dir_start, ); try meta.skip(d.offset); - out.entries = try dir.readDirectory(rdr.alloc, meta, d.size); + out.entries = try dir.readDirectory(rdr.alloc, &meta, d.size); }, .ext_dir => |d| { - const meta = MetadataReader(T).init( + var meta = MetadataReader(T).init( rdr.alloc, rdr.super.comp, rdr.rdr, d.block + rdr.super.dir_start, ); try meta.skip(d.offset); - out.entries = try dir.readDirectory(rdr.alloc, meta, d.size); + out.entries = try dir.readDirectory(rdr.alloc, &meta, d.size); }, .file => |f| { - _ = f; - //TODO + out.data_reader = try .init( + rdr.alloc, + rdr.rdr, + rdr.super.comp, + f.block, + f.size, + f.block_sizes, + rdr.super.block_size, + ); + if (f.hasFragment()) { + try out.data_reader.?.addFragment( + try rdr.frag_table.get(f.frag_idx), + f.frag_offset, + ); + } }, .ext_file => |f| { - _ = f; - //TODO + out.data_reader = try .init( + rdr.alloc, + rdr.rdr, + rdr.super.comp, + f.block, + f.size, + f.block_sizes, + rdr.super.block_size, + ); + if (f.hasFragment()) { + try out.data_reader.?.addFragment( + try rdr.frag_table.get(f.frag_idx), + f.frag_offset, + ); + } }, + else => {}, } return out; } @@ -76,9 +104,13 @@ pub fn File(comptime T: type) type { } self.rdr.alloc.free(self.entries.?); } - // if(self.data_reader != null){ - // self.data_reader.?.deinit(); - // } + if (self.data_reader != null) { + self.data_reader.?.deinit(); + } + } + + pub fn iter(self: Self) !void { + _ = self; } }; } diff --git a/src/inode.zig b/src/inode.zig index e959a4f..1acf6b1 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -4,12 +4,6 @@ const dir = @import("inode/dir.zig"); const file = @import("inode/file.zig"); const misc = @import("inode/misc.zig"); -const Reader = @import("reader.zig"); -const DirEntry = @import("directory.zig").Entry; -const ToRead = @import("reader/to_read.zig").ToRead; -const Compression = @import("superblock.zig").Compression; -const MetadataReader = @import("reader/metadata.zig").MetadataReader; - pub const Ref = packed struct { offset: u16, block: u32, @@ -67,21 +61,21 @@ data: Data, pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self { var hdr: Header = undefined; _ = try rdr.read(std.mem.asBytes(&hdr)); - const data = switch (hdr.type) { - .dir => .{ .dir = .init(rdr) }, - .file => .{ .file = .init(rdr, alloc, block_size) }, - .symlink => .{ .symlink = .init(rdr, alloc) }, - .block_dev => .{ .block_dev = .init(rdr) }, - .char_dev => .{ .char_dev = .init(rdr) }, - .fifo => .{ .fifo = .init(rdr) }, - .socket => .{ .socket = .init(rdr) }, - .ext_dir => .{ .ext_dir = .init(rdr) }, - .ext_file => .{ .ext_file = .init(rdr, alloc, block_size) }, - .ext_symlink => .{ .ext_symlink = .init(rdr, alloc) }, - .ext_block_dev => .{ .ext_block_dev = .init(rdr) }, - .ext_char_dev => .{ .ext_char_dev = .init(rdr) }, - .ext_fifo => .{ .ext_fifo = .init(rdr) }, - .ext_socket => .{ .ext_socket = .init(rdr) }, + const data: Data = switch (hdr.type) { + .dir => .{ .dir = try .init(rdr) }, + .file => .{ .file = try .init(rdr, alloc, block_size) }, + .symlink => .{ .symlink = try .init(rdr, alloc) }, + .block_dev => .{ .block_dev = try .init(rdr) }, + .char_dev => .{ .char_dev = try .init(rdr) }, + .fifo => .{ .fifo = try .init(rdr) }, + .socket => .{ .socket = try .init(rdr) }, + .ext_dir => .{ .ext_dir = try .init(rdr) }, + .ext_file => .{ .ext_file = try .init(rdr, alloc, block_size) }, + .ext_symlink => .{ .ext_symlink = try .init(rdr, alloc) }, + .ext_block_dev => .{ .ext_block_dev = try .init(rdr) }, + .ext_char_dev => .{ .ext_char_dev = try .init(rdr) }, + .ext_fifo => .{ .ext_fifo = try .init(rdr) }, + .ext_socket => .{ .ext_socket = try .init(rdr) }, }; return .{ .hdr = hdr, diff --git a/src/inode/dir.zig b/src/inode/dir.zig index 8234393..dc02bf9 100644 --- a/src/inode/dir.zig +++ b/src/inode/dir.zig @@ -8,8 +8,8 @@ pub const Dir = packed struct { parent_num: u32, pub fn init(rdr: anytype) !Dir { - const out: Dir = undefined; - _ = rdr.read(std.mem.asBytes(&out)); + var out: Dir = undefined; + _ = try rdr.read(std.mem.asBytes(&out)); return out; } }; @@ -24,8 +24,8 @@ pub const ExtDir = packed struct { xattr_idx: u32, pub fn init(rdr: anytype) !ExtDir { - const out: ExtDir = undefined; - _ = rdr.read(std.mem.asBytes(&out)); + var out: ExtDir = undefined; + _ = try rdr.read(std.mem.asBytes(&out)); return out; } }; diff --git a/src/inode/file.zig b/src/inode/file.zig index 5b464a5..fb6b659 100644 --- a/src/inode/file.zig +++ b/src/inode/file.zig @@ -9,7 +9,7 @@ pub const BlockSize = packed struct { pub const File = struct { block: u32, frag_idx: u32, - offset: u32, + frag_offset: u32, size: u32, block_sizes: []BlockSize, @@ -22,17 +22,20 @@ pub const File = struct { if (size % block_size > 0 and frag_idx != 0xffffffff) { blocks += 1; } - const block_sizes = alloc.alloc(BlockSize, blocks); + const block_sizes = try alloc.alloc(BlockSize, blocks); errdefer alloc.free(block_sizes); _ = try rdr.read(std.mem.sliceAsBytes(block_sizes)); return .{ .block = std.mem.readInt(u32, fixed[0..4], .little), .frag_idx = frag_idx, - .offset = std.mem.readInt(u32, fixed[8..12], .little), + .frag_offset = std.mem.readInt(u32, fixed[8..12], .little), .size = size, .block_sizes = block_sizes, }; } + pub fn hasFragment(self: File) bool { + return self.frag_idx != 0xffffffff; + } }; pub const ExtFile = struct { @@ -41,7 +44,7 @@ pub const ExtFile = struct { sparse: u64, hard_link: u32, frag_idx: u32, - offset: u32, + frag_offset: u32, xattr_idx: u32, block_sizes: []BlockSize, @@ -50,11 +53,11 @@ pub const ExtFile = struct { _ = try rdr.read(&fixed); const size = std.mem.readInt(u64, fixed[8..16], .little); const frag_idx = std.mem.readInt(u32, fixed[28..32], .little); - var blocks: u32 = size / block_size; + var blocks: u32 = @truncate(size / block_size); if (size % block_size > 0 and frag_idx != 0xffffffff) { blocks += 1; } - const block_sizes = alloc.alloc(BlockSize, blocks); + const block_sizes = try alloc.alloc(BlockSize, blocks); errdefer alloc.free(block_sizes); _ = try rdr.read(std.mem.sliceAsBytes(block_sizes)); return .{ @@ -63,9 +66,12 @@ pub const ExtFile = struct { .sparse = std.mem.readInt(u64, fixed[16..24], .little), .hard_link = std.mem.readInt(u32, fixed[24..28], .little), .frag_idx = frag_idx, - .offset = std.mem.readInt(u32, fixed[32..36], .little), + .frag_offset = std.mem.readInt(u32, fixed[32..36], .little), .xattr_idx = std.mem.readInt(u32, fixed[36..40], .little), - .block_sizes = blocks, + .block_sizes = block_sizes, }; } + pub fn hasFragment(self: ExtFile) bool { + return self.frag_idx != 0xffffffff; + } }; diff --git a/src/inode/misc.zig b/src/inode/misc.zig index 134f9de..d9fb93a 100644 --- a/src/inode/misc.zig +++ b/src/inode/misc.zig @@ -9,7 +9,7 @@ pub const Symlink = struct { var fixed: [8]u8 = undefined; _ = try rdr.read(&fixed); const size = std.mem.readInt(u32, fixed[4..8], .little); - const target = alloc.alloc(u8, size); + const target = try alloc.alloc(u8, size); errdefer alloc.free(target); _ = try rdr.read(target); return .{ @@ -29,7 +29,7 @@ pub const ExtSymlink = struct { var fixed: [8]u8 = undefined; _ = try rdr.read(&fixed); const size = std.mem.readInt(u32, fixed[4..8], .little); - const target = alloc.alloc(u8, size); + const target = try alloc.alloc(u8, size); errdefer alloc.free(target); _ = try rdr.read(target); var xattr_idx: u32 = 0; @@ -47,7 +47,7 @@ pub const Dev = packed struct { device: u32, pub fn init(rdr: anytype) !Dev { - const out: Dev = undefined; + var out: Dev = undefined; _ = try rdr.read(std.mem.asBytes(&out)); return out; } @@ -59,7 +59,7 @@ pub const ExtDev = packed struct { xattr_idx: u32, pub fn init(rdr: anytype) !ExtDev { - const out: ExtDev = undefined; + var out: ExtDev = undefined; _ = try rdr.read(std.mem.asBytes(&out)); return out; } @@ -69,7 +69,7 @@ pub const IPC = packed struct { hard_link: u32, pub fn init(rdr: anytype) !IPC { - const out: IPC = undefined; + var out: IPC = undefined; _ = try rdr.read(std.mem.asBytes(&out)); return out; } @@ -80,7 +80,7 @@ pub const ExtIPC = packed struct { xattr_idx: u32, pub fn init(rdr: anytype) !ExtIPC { - const out: ExtIPC = undefined; + var out: ExtIPC = undefined; _ = try rdr.read(std.mem.asBytes(&out)); return out; } diff --git a/src/reader.zig b/src/reader.zig index 4f982c4..73b1482 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -29,7 +29,6 @@ pub fn SfsReader(comptime T: type) type { /// Export table. Each element is an inode referce. /// If accessing directly, keep in mind, the table starts at inode 1, as such it's recommended to use the InodeAt function instead. export_table: Table(Inode.Ref, T) = undefined, - root: ?File(T) = null, pub fn init(alloc: std.mem.Allocator, rdr: T, offset: u64) !Self { var out: Self = .{ @@ -46,28 +45,21 @@ pub fn SfsReader(comptime T: type) type { self.id_table.deinit(); self.frag_table.deinit(); self.export_table.deinit(); - if (self.root != null) self.root.?.deinit(); } - fn populateRoot(self: *Self) !void { - if (self.root != null) return; - const meta = MetadataReader(T).init( + pub fn archiveRoot(self: *Self) !File(T) { + var meta = MetadataReader(T).init( self.alloc, self.super.comp, self.rdr, self.super.inode_start + self.super.root_ref.block, ); try meta.skip(self.super.root_ref.offset); - const root_inode: Inode = try .init(meta, self.alloc, self.super.block_size); - self.root = try .init(self, root_inode, ""); + const root_inode: Inode = try .init(&meta, self.alloc, self.super.block_size); + return try .init(self, root_inode, ""); } - - pub fn archiveRoot(self: *Self) !File { - if (self.root == null) try self.populateRoot(); - return self.root.?; - } - pub fn open(self: *Self, path: []const u8) !File { - if (self.root == null) try self.populateRoot(); + pub fn open(self: *Self, path: []const u8) !File(T) { + _ = self; _ = path; // return self.root.?.open(path); return error{TODO}.TODO; @@ -78,7 +70,7 @@ pub fn SfsReader(comptime T: type) type { pub fn inodeAt(self: Self, num: u32) !Inode { if (!self.super.flags.has_export) return SfsError.NotExportable; const ref = try self.export_table.get(num - 1); - const meta = MetadataReader(T).init( + var meta = MetadataReader(T).init( self.alloc, self.super.comp, self.rdr, diff --git a/src/reader/data.zig b/src/reader/data.zig index 2d67237..abf1f76 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -27,8 +27,8 @@ pub fn DataReader(comptime T: type) type { frag: []u8 = &[0]u8{}, - read_block: []u8, - read_offset: u64, + read_block: []u8 = &[0]u8{}, + read_offset: u64 = 0, read_idx: u32 = 0, pub fn init( @@ -41,7 +41,7 @@ pub fn DataReader(comptime T: type) type { block_size: u32, ) !Self { var cur_offset = init_offset; - const offsets = alloc.alloc(u64, sizes.len); + const offsets = try alloc.alloc(u64, sizes.len); for (0..sizes.len) |i| { offsets[i] = cur_offset; cur_offset += sizes[i].size; @@ -74,10 +74,7 @@ pub fn DataReader(comptime T: type) type { defer self.alloc.free(block); _ = try self.comp.decompress( self.alloc, - std.io.limitedReader( - self.rdr.readerAt(entry.block), - entry.size.size, - ), + self.rdr.readerAt(entry.block).reader(), block, ); @memcpy(self.frag, block[offset..]); @@ -106,10 +103,7 @@ pub fn DataReader(comptime T: type) type { } _ = try self.comp.decompress( self.alloc, - std.io.limitedReader( - self.rdr.readerAt(self.offsets[idx]), - self.sizes[idx].size, - ), + self.rdr.readerAt(self.offsets[idx]).reader(), block, ); return block; diff --git a/src/reader/metadata.zig b/src/reader/metadata.zig index 054a032..89d74ff 100644 --- a/src/reader/metadata.zig +++ b/src/reader/metadata.zig @@ -32,12 +32,12 @@ pub fn MetadataReader(comptime T: type) type { } fn readNextBlock(self: *Self) !void { - const hdr: MetaHeader = undefined; - _ = try self.rdr.pread(std.mem.asBytes(hdr), self.offset); + var hdr: MetaHeader = undefined; + _ = try self.rdr.pread(std.mem.asBytes(&hdr), self.offset); self.offset += 2; self.block_size = try self.comp.decompress( self.alloc, - std.io.limitedReader(self.rdr.readerAt(self.offset), hdr.size), + self.rdr.readerAt(self.offset).reader(), &self.block, ); self.offset += hdr.size; @@ -45,14 +45,14 @@ pub fn MetadataReader(comptime T: type) type { } pub fn skip(self: *Self, offset: u32) !void { - var skipped = 0; - const hdr: MetaHeader = undefined; + var skipped: u32 = 0; + var hdr: MetaHeader = undefined; while (offset - skipped >= 8192) { - _ = try self.rdr.pread(std.mem.asBytes(hdr), self.offset); + _ = try self.rdr.pread(std.mem.asBytes(&hdr), self.offset); self.offset += 2 + hdr.size; skipped += 8192; } - var to_skip = 0; + var to_skip: u32 = 0; while (skipped < offset) { if (self.block_offset >= self.block_size) try self.readNextBlock(); to_skip = @min(self.block_size - self.block_offset, offset - skipped); @@ -69,7 +69,7 @@ pub fn MetadataReader(comptime T: type) type { to_read = @min(buf.len - cur_red, self.block_size - self.block_offset); @memcpy(buf[cur_red .. cur_red + to_read], self.block[self.block_offset .. self.block_offset + to_read]); cur_red += to_read; - self.block_offset += to_read; + self.block_offset += @truncate(to_read); } return cur_red; } diff --git a/src/reader/to_read.zig b/src/reader/to_read.zig index a311b4d..a72a063 100644 --- a/src/reader/to_read.zig +++ b/src/reader/to_read.zig @@ -5,6 +5,8 @@ pub fn ToRead(comptime T: type) type { return struct { const Self = @This(); + pub const Error = anyerror; + rdr: T, offset: u64, @@ -20,5 +22,21 @@ pub fn ToRead(comptime T: type) type { self.offset += red; return red; } + pub fn readAll(self: *Self, buf: []u8) !usize { + var cur_red = try self.read(buf); + if (cur_red == 0) return cur_red; + var res: usize = 0; + while (cur_red < buf.len) { + res = try self.read(buf[cur_red..]); + if (res == 0) break; + cur_red += res; + } + return cur_red; + } + pub fn reader(self: anytype) std.io.Reader(*Self, anyerror, read) { + return .{ + .context = @constCast(self), + }; + } }; } diff --git a/src/root.zig b/src/root.zig index fc291d4..2869353 100644 --- a/src/root.zig +++ b/src/root.zig @@ -3,4 +3,16 @@ const std = @import("std"); pub const SfsReader = @import("reader.zig").SfsReader; pub const ExtractionOptions = @import("extract_options.zig"); -pub const FileReader = SfsReader(std.fs.File); +pub const SfsFile = SfsReader(std.fs.File); + +const test_file = "testing/LinuxPATest.sfs"; + +test "OpenTest" { + const fil = try std.fs.cwd().openFile(test_file, .{}); + defer fil.close(); + var rdr: SfsFile = try .init(std.testing.allocator, fil, 0); + defer rdr.deinit(); + std.debug.print("{}\n", .{rdr.super}); + const root = try rdr.archiveRoot(); + defer root.deinit(); +} diff --git a/src/superblock.zig b/src/superblock.zig index 48ad5b5..c817ca0 100644 --- a/src/superblock.zig +++ b/src/superblock.zig @@ -54,24 +54,24 @@ pub const Compression = enum(u16) { pub fn decompress(self: Compression, alloc: std.mem.Allocator, source: anytype, dest: []u8) !usize { switch (self) { .gzip => { - const decomp = std.compress.zlib.decompressor(source); + var decomp = std.compress.zlib.decompressor(source); return decomp.read(dest); }, .lzma => { - const decomp = try std.compress.lzma.decompress(alloc, source); + var decomp = try std.compress.lzma.decompress(alloc, source); defer decomp.deinit(); return decomp.read(dest); }, .lzo => return DecompressError.LzoUnavailable, .xz => { - const decomp = try std.compress.xz.decompress(alloc, source); + var decomp = try std.compress.xz.decompress(alloc, source); defer decomp.deinit(); return decomp.read(dest); }, .lz4 => return DecompressError.Lz4Unavailable, .zstd => { - const window: [@min(std.compress.zstd.DecompressorOptions.default_window_buffer_len, dest.len)]u8 = undefined; - const decomp = std.compress.zstd.decompressor(source, .{ .window_buffer = window }); + var window: [std.compress.zstd.DecompressorOptions.default_window_buffer_len]u8 = undefined; + var decomp = std.compress.zstd.decompressor(source, .{ .window_buffer = &window }); return decomp.read(dest); }, } From 4d52627d5d8451c782fe28e6a167a14659df1c6c Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Thu, 17 Jul 2025 09:16:32 -0500 Subject: [PATCH 08/15] Further tweaks & fixes --- src/file.zig | 54 +++++++++++++++++++++++++++++++++++++++++++++++--- src/reader.zig | 20 ++++++------------- src/root.zig | 21 ++++++++++++++++++-- 3 files changed, 76 insertions(+), 19 deletions(-) diff --git a/src/file.zig b/src/file.zig index da5c0ab..461c147 100644 --- a/src/file.zig +++ b/src/file.zig @@ -14,6 +14,7 @@ const MetadataReader = @import("reader/metadata.zig").MetadataReader; pub const FileError = error{ NotRegular, NotDirectory, + NotFound, }; pub fn File(comptime T: type) type { @@ -95,8 +96,19 @@ pub fn File(comptime T: type) type { } return out; } + pub fn initFromRef(rdr: *SfsReader(T), ref: Inode.Ref, name: []const u8) !Self { + var meta: MetadataReader(T) = .init(rdr.alloc, rdr.super.comp, rdr.rdr, ref.block + rdr.super.inode_start); + try meta.skip(ref.offset); + const inode: Inode = try .init(&meta, rdr.alloc, rdr.super.block_size); + return .init(rdr, inode, name); + } + pub fn initFromEntry(rdr: *SfsReader(T), ent: DirEntry) !Self { + var meta: MetadataReader(T) = .init(rdr.alloc, rdr.super.comp, rdr.rdr, ent.block + rdr.super.inode_start); + try meta.skip(ent.offset); + const inode: Inode = try .init(&meta, rdr.alloc, rdr.super.block_size); + return .init(rdr, inode, ent.name); + } pub fn deinit(self: Self) void { - self.rdr.alloc.free(self.name); self.inode.deinit(self.rdr.alloc); if (self.entries != null) { for (self.entries.?) |e| { @@ -109,8 +121,44 @@ pub fn File(comptime T: type) type { } } - pub fn iter(self: Self) !void { - _ = self; + pub fn open(self: Self, path: []const u8) !Self { + if (self.entries == null) return FileError.NotDirectory; + if (path.len == 0) return self; + const idx = std.mem.indexOf(u8, path, "/") orelse path.len; + if (idx == 0) return self.open(path[1..]); + const name = path[0..idx]; + for (self.entries.?) |e| { + if (std.mem.eql(u8, e.name, name)) { + var fil: Self = try .initFromEntry(self.rdr, e); + if (idx >= path.len - 1) return fil; + defer fil.deinit(); + return fil.open(path[idx + 1 ..]); + } + } + return FileError.NotFound; } + pub fn iterate(self: Self) Iterator { + return .{ + .rdr = self.rdr, + .entries = self.entries.?, + }; + } + + const Iterator = struct { + rdr: *SfsReader(T), + entries: []DirEntry, + + idx: u32 = 0, + + pub fn next(self: *Iterator) !?File(T) { + if (self.idx >= self.entries.len) return null; + const out = try Self.initFromEntry(self.rdr, self.entries[self.idx]); + self.idx += 1; + return out; + } + pub fn reset(self: *Iterator) void { + self.idx = 0; + } + }; }; } diff --git a/src/reader.zig b/src/reader.zig index 73b1482..4c20de0 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -47,22 +47,14 @@ pub fn SfsReader(comptime T: type) type { self.export_table.deinit(); } - pub fn archiveRoot(self: *Self) !File(T) { - var meta = MetadataReader(T).init( - self.alloc, - self.super.comp, - self.rdr, - self.super.inode_start + self.super.root_ref.block, - ); - try meta.skip(self.super.root_ref.offset); - const root_inode: Inode = try .init(&meta, self.alloc, self.super.block_size); - return try .init(self, root_inode, ""); + pub fn root(self: *Self) !File(T) { + return .initFromRef(self, self.super.root_ref, ""); } pub fn open(self: *Self, path: []const u8) !File(T) { - _ = self; - _ = path; - // return self.root.?.open(path); - return error{TODO}.TODO; + var rt = try self.root(); + if (path.len == 0 or (path.len == 1 and path[0] == '/')) return rt; + defer rt.deinit(); + return rt.open(path); } /// Returns the inode with the given Inode Number. diff --git a/src/root.zig b/src/root.zig index 2869353..ae7a282 100644 --- a/src/root.zig +++ b/src/root.zig @@ -7,12 +7,29 @@ pub const SfsFile = SfsReader(std.fs.File); const test_file = "testing/LinuxPATest.sfs"; -test "OpenTest" { +test "OpenFile" { const fil = try std.fs.cwd().openFile(test_file, .{}); defer fil.close(); var rdr: SfsFile = try .init(std.testing.allocator, fil, 0); defer rdr.deinit(); std.debug.print("{}\n", .{rdr.super}); - const root = try rdr.archiveRoot(); + const root = try rdr.root(); + defer root.deinit(); + var iter = root.iterate(); + while (try iter.next()) |f| { + defer f.deinit(); + std.debug.print("{s}\n", .{f.name}); + } + const start = try root.open("Start.exe"); + defer start.deinit(); +} + +test "ReadFile" { + const fil = try std.fs.cwd().openFile(test_file, .{}); + defer fil.close(); + var rdr: SfsFile = try .init(std.testing.allocator, fil, 0); + defer rdr.deinit(); + std.debug.print("{}\n", .{rdr.super}); + const root = try rdr.root(); defer root.deinit(); } From b4af1233e58210c861f8e0bb3bb28c7788d79e95 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Thu, 17 Jul 2025 22:17:58 -0500 Subject: [PATCH 09/15] Started work on file extraction --- src/extract_options.zig | 21 +++++++------ src/file.zig | 66 +++++++++++++++++++++++++++++++++++++++++ src/reader/data.zig | 8 +++++ src/root.zig | 4 ++- 4 files changed, 87 insertions(+), 12 deletions(-) diff --git a/src/extract_options.zig b/src/extract_options.zig index 4658940..604d9cf 100644 --- a/src/extract_options.zig +++ b/src/extract_options.zig @@ -9,16 +9,15 @@ dereference_symlinks: bool = false, unbreak_symlinks: bool = false, /// Do not set file's permissions & owner when extracted. ignore_permissions: bool = false, +/// Verbose logging +verbose: bool = false, +/// Verbose logging writer. If not set, stdout is used. +verbose_logger: ?std.io.AnyWriter = null, +/// Number of threads used during extraction. Defualts to std.Thread.getCpuCount(). +thread_count: u32, -// max_memory: u64, - -pol: std.Thread.Pool = undefined, - -pub fn init(alloc: std.mem.Allocator, thread_count: u16) !Self { - var out: Self = .{}; - out.pol.init(.{ - .allocator = alloc, - .n_jobs = thread_count, - }); - return out; +pub fn init() !Self { + return .{ + .thread_count = try std.Thread.getCpuCount(), + }; } diff --git a/src/file.zig b/src/file.zig index 461c147..dc465af 100644 --- a/src/file.zig +++ b/src/file.zig @@ -121,6 +121,17 @@ pub fn File(comptime T: type) type { } } + const Reader = std.io.GenericReader(*DataReader(T), anyerror, DataReader(T).read); + + pub fn read(self: *Self, buf: []u8) !usize { + if (self.data_reader == null) return FileError.NotRegular; + return self.data_reader.?.read(buf); + } + pub fn reader(self: *Self) !Reader { + if (self.data_reader == null) return FileError.NotRegular; + return self.data_reader.?.reader(); + } + pub fn open(self: Self, path: []const u8) !Self { if (self.entries == null) return FileError.NotDirectory; if (path.len == 0) return self; @@ -160,5 +171,60 @@ pub fn File(comptime T: type) type { self.idx = 0; } }; + + pub fn extract(self: Self, op: *ExtractionOptions, path: []const u8) !void { + if(op.verbose and op.verbose_logger == null){ + op.verbose_logger = std.io.getStdOut().writer().any(); + } + var wg: std.Thread.WaitGroup = .{}; + var pol: std.Thread.Pool = undefined; + try pol.init(.{ + .n_jobs = op.thread_count, + .allocator = self.rdr.alloc, + }); + } + fn extractReal(self: Self, op: *ExtractionOptions, path: []const u8) !void{ + switch (self.inode.hdr.type) { + .dir, .ext_dir => self.extractDir(path), + .file, .ext_file => self.extractReg(op, path), + .symlink, .ext_symlink => self.extractSymlink(op, path), + .block_dev, + .ext_block_dev, + .char_dev, + .ext_char_dev, + .fifo, + .ext_fifo, + => self.extractDev(path), + else => { + if(op.verbose){ + std.fmt.format(op.verbose_logger.?, "inode {} \"{}\" is a socket. Ignoring.\n"); + return; + } + } + } + } + fn extractDir(self: Self, op: *ExtractionOptions, path: []const u8) !void {} + fn extractReg(self: Self, op: *ExtractionOptions, path: []const u8) !void {} + fn extractSymlink(self: Self, op: *ExtractionOptions, path: []const u8) !void {} + fn extractDev(self: Self, op: *ExtractionOptions, path: []const u8) !void { + if (exists) return ExtractError.FileExists; + comptime if (builtin.os.tag != .linux) { + if(op.ver) + return; + } + const mode: u32 = switch (self.inode.header.inode_type) { + .block, .ext_block => std.posix.S.IFBLK, + .char, .ext_char => std.posix.S.IFCHR, + .fifo, .ext_fifo => std.posix.S.IFIFO, + else => unreachable, + }; + const dev = switch (self.inode.data) { + .block, .char => |b| b.device, + .ext_block, .ext_char => |b| b.device, + .fifo, .ext_fifo => 0, + else => unreachable, + }; + _ = std.os.linux.mknod(@ptrCast(real_path), mode, dev); + } }; } diff --git a/src/reader/data.zig b/src/reader/data.zig index abf1f76..2541ace 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -59,6 +59,7 @@ pub fn DataReader(comptime T: type) type { pub fn deinit(self: Self) void { self.alloc.free(self.offsets); self.alloc.free(self.frag); + if (self.read_idx < self.sizes.len) self.alloc.free(self.read_block); } pub fn addFragment(self: *Self, entry: FragEntry, offset: u32) !void { @@ -115,6 +116,8 @@ pub fn DataReader(comptime T: type) type { return out; } + const Reader = std.io.GenericReader(*Self, anyerror, read); + pub fn read(self: *Self, buf: []u8) !usize { var cur_red: usize = 0; var to_read: usize = 0; @@ -136,6 +139,9 @@ pub fn DataReader(comptime T: type) type { } return cur_red; } + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } /// Write the entire file's contents to the writer. /// If availble, pwrite will be used. @@ -165,12 +171,14 @@ pub fn DataReader(comptime T: type) type { }, ); } + std.Thread.yield(); wg.wait(); if (completed.items.len > 0) { return completed.items.get(0); } return self.file_size; } + pub fn writeToThreaded(self: Self, errs: *std.ArrayList(anyerror), wg: *std.Thread.WaitGroup, writer: anytype) void {} fn extractThreaded( self: Self, mut: *std.Thread.Mutex, diff --git a/src/root.zig b/src/root.zig index ae7a282..c65635e 100644 --- a/src/root.zig +++ b/src/root.zig @@ -20,8 +20,10 @@ test "OpenFile" { defer f.deinit(); std.debug.print("{s}\n", .{f.name}); } - const start = try root.open("Start.exe"); + var start = try root.open("Start.exe"); defer start.deinit(); + const startReader = try start.reader(); + _ = startReader; } test "ReadFile" { From de988f083fc7533409cbbb4127fb3ae9407fdc96 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Fri, 18 Jul 2025 05:41:27 -0500 Subject: [PATCH 10/15] Finished (?) file extraction --- src/extract_options.zig | 2 +- src/file.zig | 154 +++++++++++++++++++++------ src/inode/file.zig | 2 +- src/reader.zig | 14 ++- src/reader/data.zig | 230 ++++++++++++++++++++++++++++++---------- src/root.zig | 31 +++--- 6 files changed, 323 insertions(+), 110 deletions(-) diff --git a/src/extract_options.zig b/src/extract_options.zig index 604d9cf..36b2232 100644 --- a/src/extract_options.zig +++ b/src/extract_options.zig @@ -14,7 +14,7 @@ verbose: bool = false, /// Verbose logging writer. If not set, stdout is used. verbose_logger: ?std.io.AnyWriter = null, /// Number of threads used during extraction. Defualts to std.Thread.getCpuCount(). -thread_count: u32, +thread_count: usize, pub fn init() !Self { return .{ diff --git a/src/file.zig b/src/file.zig index dc465af..bb10126 100644 --- a/src/file.zig +++ b/src/file.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const dir = @import("directory.zig"); @@ -11,14 +12,14 @@ const DataReader = @import("reader/data.zig").DataReader; const Compression = @import("superblock.zig").Compression; const MetadataReader = @import("reader/metadata.zig").MetadataReader; -pub const FileError = error{ - NotRegular, - NotDirectory, - NotFound, -}; - pub fn File(comptime T: type) type { return struct { + pub const FileError = error{ + NotRegular, + NotDirectory, + NotFound, + }; + const Self = @This(); rdr: *SfsReader(T), @@ -32,10 +33,12 @@ pub fn File(comptime T: type) type { data_reader: ?DataReader(T) = null, pub fn init(rdr: *SfsReader(T), inode: Inode, name: []const u8) !Self { + const name_cpy: []u8 = try rdr.alloc.alloc(u8, name.len); + @memcpy(name_cpy, name); var out = Self{ .rdr = rdr, .inode = inode, - .name = name, + .name = name_cpy, }; switch (inode.data) { .dir => |d| { @@ -109,6 +112,7 @@ pub fn File(comptime T: type) type { return .init(rdr, inode, ent.name); } pub fn deinit(self: Self) void { + self.rdr.alloc.free(self.name); self.inode.deinit(self.rdr.alloc); if (self.entries != null) { for (self.entries.?) |e| { @@ -121,6 +125,13 @@ pub fn File(comptime T: type) type { } } + pub fn uid(self: Self) !u32 { + return self.rdr.id_table.get(self.inode.hdr.uid_idx); + } + pub fn gid(self: Self) !u32 { + return self.rdr.id_table.get(self.inode.hdr.uid_idx); + } + const Reader = std.io.GenericReader(*DataReader(T), anyerror, DataReader(T).read); pub fn read(self: *Self, buf: []u8) !usize { @@ -172,59 +183,136 @@ pub fn File(comptime T: type) type { } }; + const WaitGroup = std.Thread.WaitGroup; + const Pool = std.Thread.Pool; + + pub const ExtractError = error{FileExists}; + pub fn extract(self: Self, op: *ExtractionOptions, path: []const u8) !void { - if(op.verbose and op.verbose_logger == null){ + if (op.verbose and op.verbose_logger == null) { op.verbose_logger = std.io.getStdOut().writer().any(); } - var wg: std.Thread.WaitGroup = .{}; - var pol: std.Thread.Pool = undefined; + var exists = true; + var stat: ?std.fs.File.Stat = null; + if (std.fs.cwd().statFile(path)) |s| { + stat = s; + } else |err| { + if (err == std.fs.File.OpenError.FileNotFound) { + exists = false; + } else { + return err; + } + } + switch (self.inode.hdr.type) { + .dir, .ext_dir => { + if (exists and stat.?.kind != .directory) { + return ExtractError.FileExists; + } else if (!exists) { + try std.fs.cwd().makeDir(path); + } + }, + else => if (exists) return ExtractError.FileExists, + } + var wg: WaitGroup = .{}; + var pol: Pool = undefined; try pol.init(.{ .n_jobs = op.thread_count, .allocator = self.rdr.alloc, }); + defer pol.deinit(); + var errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); + defer errs.deinit(); + try self.extractReal(op, &errs, &wg, &pol, path); + wg.wait(); + if (errs.items.len > 0) return errs.items[0]; } - fn extractReal(self: Self, op: *ExtractionOptions, path: []const u8) !void{ - switch (self.inode.hdr.type) { - .dir, .ext_dir => self.extractDir(path), - .file, .ext_file => self.extractReg(op, path), - .symlink, .ext_symlink => self.extractSymlink(op, path), + fn extractReal( + self: Self, + op: *ExtractionOptions, + errs: *std.ArrayList(anyerror), + wg: *WaitGroup, + pol: *Pool, + path: []const u8, + ) !void { + return switch (self.inode.hdr.type) { + .dir, .ext_dir => self.extractDir(op, errs, wg, pol, path), + .file, .ext_file => self.extractReg(op, errs, wg, pol, path), + .symlink, .ext_symlink => self.extractSymlink(op, errs, wg, pol, path), .block_dev, .ext_block_dev, .char_dev, .ext_char_dev, .fifo, .ext_fifo, - => self.extractDev(path), + => self.extractDev(op, path), else => { - if(op.verbose){ - std.fmt.format(op.verbose_logger.?, "inode {} \"{}\" is a socket. Ignoring.\n"); - return; + if (op.verbose) { + std.fmt.format( + op.verbose_logger.?, + "inode {} \"{s}\" is a socket file. Ignoring.\n", + .{ self.inode.hdr.num, self.name }, + ) catch {}; } - } - } + }, + }; + } + fn extractDir(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + if (errs.items.len > 0) return; + _ = self; + _ = op; + _ = wg; + _ = pol; + _ = path; + return error{TODO}.TODO; + } + fn extractReg(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + if (errs.items.len > 0) return; + const fil = try std.fs.cwd().createFile(path, .{}); + @constCast(&self.data_reader.?).setPool(pol); + wg.start(); + try self.data_reader.?.writeToNoBlock(errs, fil, wg, extractRegFinish, .{ self, fil }); + _ = op; + //TODO: add some way of verbose logging of the errors for this file in particular. + return; + } + fn extractRegFinish(self: Self, fil: std.fs.File) void { + defer fil.close(); + //TODO: set owners & permissions. Check if we need to call self.deinit(); + _ = self; + } + fn extractSymlink(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + if (errs.items.len > 0) return; + _ = self; + _ = op; + _ = wg; + _ = pol; + _ = path; + return error{TODO}.TODO; } - fn extractDir(self: Self, op: *ExtractionOptions, path: []const u8) !void {} - fn extractReg(self: Self, op: *ExtractionOptions, path: []const u8) !void {} - fn extractSymlink(self: Self, op: *ExtractionOptions, path: []const u8) !void {} fn extractDev(self: Self, op: *ExtractionOptions, path: []const u8) !void { - if (exists) return ExtractError.FileExists; - comptime if (builtin.os.tag != .linux) { - if(op.ver) + if (comptime builtin.os.tag != .linux) { + if (op.verbose) { + std.fmt.format( + op.verbose_logger.?, + "inode {} \"{s}\" is a device/fifo file and the OS is not Linux. Ignoring.\n", + .{ self.inode.hdr.num, self.name }, + ) catch {}; + } return; } - const mode: u32 = switch (self.inode.header.inode_type) { - .block, .ext_block => std.posix.S.IFBLK, - .char, .ext_char => std.posix.S.IFCHR, + const mode: u32 = switch (self.inode.hdr.type) { + .block_dev, .ext_block_dev => std.posix.S.IFBLK, + .char_dev, .ext_char_dev => std.posix.S.IFCHR, .fifo, .ext_fifo => std.posix.S.IFIFO, else => unreachable, }; const dev = switch (self.inode.data) { - .block, .char => |b| b.device, - .ext_block, .ext_char => |b| b.device, + .block_dev, .char_dev => |b| b.device, + .ext_block_dev, .ext_char_dev => |b| b.device, .fifo, .ext_fifo => 0, else => unreachable, }; - _ = std.os.linux.mknod(@ptrCast(real_path), mode, dev); + _ = std.os.linux.mknod(@ptrCast(path), mode, dev); } }; } diff --git a/src/inode/file.zig b/src/inode/file.zig index fb6b659..86af63b 100644 --- a/src/inode/file.zig +++ b/src/inode/file.zig @@ -19,7 +19,7 @@ pub const File = struct { const frag_idx = std.mem.readInt(u32, fixed[4..8], .little); const size = std.mem.readInt(u32, fixed[12..16], .little); var blocks: u32 = size / block_size; - if (size % block_size > 0 and frag_idx != 0xffffffff) { + if (size % block_size > 0 and frag_idx == 0xffffffff) { blocks += 1; } const block_sizes = try alloc.alloc(BlockSize, blocks); diff --git a/src/reader.zig b/src/reader.zig index 4c20de0..b0f69e2 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -6,6 +6,7 @@ const Table = @import("table.zig").Table; const PRead = @import("reader/p_read.zig").PRead; const FragEntry = @import("fragment.zig").FragEntry; const Superblock = @import("superblock.zig").Superblock; +const ExtractionOptions = @import("extract_options.zig"); const MetadataReader = @import("reader/metadata.zig").MetadataReader; pub const SfsError = error{ @@ -47,17 +48,26 @@ pub fn SfsReader(comptime T: type) type { self.export_table.deinit(); } + /// A representation of the archives root folder. pub fn root(self: *Self) !File(T) { return .initFromRef(self, self.super.root_ref, ""); } + /// Get the file at path. Equivelent to calling open on the root File. pub fn open(self: *Self, path: []const u8) !File(T) { var rt = try self.root(); - if (path.len == 0 or (path.len == 1 and path[0] == '/')) return rt; + if (path.len == 0 or (path.len == 1 and path[0] == '/') or path.len == 1 and path[0] == '.') return rt; defer rt.deinit(); return rt.open(path); } + /// Extract the entire archive to the given path & with the given options. + /// Equivelent to calling extract on the root File. + pub fn extract(self: *Self, op: *ExtractionOptions, path: []const u8) !void { + var rt = try self.root(); + defer rt.deinit(); + return rt.extract(op, path); + } - /// Returns the inode with the given Inode Number. + /// Returns the Inode with the given Inode Number. /// Requires the archive to have an export table. pub fn inodeAt(self: Self, num: u32) !Inode { if (!self.super.flags.has_export) return SfsError.NotExportable; diff --git a/src/reader/data.zig b/src/reader/data.zig index 2541ace..2cffe48 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -8,6 +8,7 @@ const Compression = @import("../superblock.zig").Compression; const DataReaderError = error{ EOF, ThreadPoolNotSet, + InvalidIndex, }; pub fn DataReader(comptime T: type) type { @@ -85,13 +86,14 @@ pub fn DataReader(comptime T: type) type { self.pool = pool; } - fn blockAt(self: Self, idx: u32) ![]u8 { + fn blockAt(self: Self, idx: usize) ![]u8 { if (self.frag.len > 0 and idx == self.sizes.len) return self.frag; if (idx >= self.sizes.len) return DataReaderError.InvalidIndex; - const size = if (idx == self.sizes.len - 1 and self.frag.len == 0) { - self.file_size % self.block_size; - } else { - self.block_size; + const size = blk: { + if (idx == self.sizes.len - 1 and self.frag.len == 0) { + break :blk self.file_size % self.block_size; + } + break :blk self.block_size; }; const block = try self.alloc.alloc(u8, size); errdefer self.alloc.free(block); @@ -143,56 +145,91 @@ pub fn DataReader(comptime T: type) type { return .{ .context = self }; } - /// Write the entire file's contents to the writer. + /// Write the entire file's contents to the writer using multiple threads. /// If availble, pwrite will be used. pub fn writeTo(self: Self, writer: anytype) !usize { - if (comptime self.pool == null) return DataReaderError.ThreadPoolNotSet; - const mut: std.Thread.Mutex = .{}; + if (self.pool == null) return DataReaderError.ThreadPoolNotSet; + var mut: std.Thread.Mutex = .{}; var cur_idx: usize = 0; - const wg: std.Thread.WaitGroup = .{}; - const completed = comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { - std.ArrayList(anyerror).init(self.alloc); - } else { - std.AutoArrayHashMap(usize, anyerror![]u8).init(self.alloc); - }; + var wg: std.Thread.WaitGroup = .{}; + var completed: std.AutoArrayHashMap(usize, anyerror![]u8) = .init(self.alloc); defer completed.deinit(); + var errs: std.ArrayList(anyerror) = .init(self.alloc); + defer errs.deinit(); for (0..self.numBlocks()) |i| { wg.start(); self.pool.?.spawn( - comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { - extractThreadedPWrite; - } else { - extractThreaded; + comptime blk: { + if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { + break :blk writeToThreadPWrite; + } + break :blk writeToThread; }, - comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { - .{ self, &wg, &completed, i, writer }; - } else { - .{ self, &mut, &cur_idx, &wg, &completed, i, writer }; + blk: { + if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { + break :blk .{ self, &wg, &errs, i, writer }; + } + break :blk .{ self, &wg, &mut, &cur_idx, &completed, i, writer }; }, ); } - std.Thread.yield(); wg.wait(); - if (completed.items.len > 0) { - return completed.items.get(0); - } + if (errs.items.len > 0) return errs.items[0]; return self.file_size; } - pub fn writeToThreaded(self: Self, errs: *std.ArrayList(anyerror), wg: *std.Thread.WaitGroup, writer: anytype) void {} - fn extractThreaded( + /// Similiar to writeTo, but does not block until finished. + /// When all blocks have been written, on_finish and wg.finish() (in that order) will be called. + /// NOTE: wg.start() is not called; + pub fn writeToNoBlock( + self: Self, + errs: *std.ArrayList(anyerror), + writer: anytype, + wg: *std.Thread.WaitGroup, + comptime on_finish: anytype, + on_finish_args: anytype, + ) !void { + if (self.pool == null) return DataReaderError.ThreadPoolNotSet; + const mut: std.Thread.Mutex = .{}; + var cur_idx: usize = 0; + var block_wg: std.Thread.WaitGroup = .{}; + var finish_mut: std.Thread.Mutex = .{}; + var completed: ?std.AutoHashMap(usize, []u8) = null; + if (!comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { + completed = std.AutoHashMap(usize, []u8).init(self.alloc); + } + block_wg.startMany(self.numBlocks()); + for (0..self.numBlocks()) |i| { + try self.pool.?.spawn( + comptime blk: { + if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { + break :blk noBlockThreadPWrite; + } + break :blk noBlockThread; + }, + blk: { + if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { + break :blk .{ self, &block_wg, errs, i, writer, wg, &finish_mut, on_finish, on_finish_args }; + } else { + break :blk .{ self, &block_wg, &mut, &cur_idx, errs, &completed.?, i, writer, wg, &finish_mut, on_finish, on_finish_args }; + } + }, + ); + } + } + + fn writeBlockTo( self: Self, mut: *std.Thread.Mutex, cur_idx: *usize, - wg: *std.Thread.WaitGroup, - completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + errs: *std.ArrayList(anyerror), + completed: *std.AutoHashMap(usize, []u8), idx: usize, writer: anytype, - ) void { - if (cur_idx.* >= self.sizes.len + 1) return; - defer wg.finish(); + ) !void { + //TODO: We can marginally reduce memory usage if we don't store sparse blocks in completed. + if (errs.items.len > 0) return; // Indicates an error has occured in another thread. const block = self.blockAt(idx) catch |err| { - cur_idx.* = self.sizes.len + 1; - completed.put(idx, err) catch {}; + errs.append(err) catch {}; return; }; defer if (idx < self.sizes.len) { @@ -202,14 +239,12 @@ pub fn DataReader(comptime T: type) type { defer mut.unlock(); if (cur_idx.* == idx) { _ = writer.write(block) catch |err| { - cur_idx.* = self.sizes.len + 1; - completed.put(idx, err) catch {}; + errs.append(err) catch {}; return; }; } else { completed.put(idx, block) catch |err| { - cur_idx.* = self.sizes.len + 1; - completed.put(idx, err) catch {}; + errs.append(err) catch {}; return; }; } @@ -217,35 +252,114 @@ pub fn DataReader(comptime T: type) type { for (cur_idx.*..self.numBlocks()) |i| { const val = completed.get(i); if (val == null) return; - _ = writer.write(block) catch |err| { - cur_idx.* = self.sizes.len + 1; - completed.put(i, err) catch {}; + _ = writer.write(val.?) catch |err| { + errs.append(err) catch {}; return; }; + _ = completed.remove(i); cur_idx.* += 1; if (completed.count() == 0) return; } } - fn extractThreadedPWrite( + fn writeBlockToPWrite( self: Self, - wg: *std.Thread.WaitGroup, - completed: *std.ArrayList(anyerror), + errs: *std.ArrayList(anyerror), idx: usize, writer: anytype, ) void { - if (completed.items.len > 0) return; - defer wg.finish(); - const block = self.blockAt(idx) catch |err| { - completed.append(err) catch {}; - return; - }; - defer if (idx < self.sizes.len) { - self.alloc.free(block); - }; - _ = writer.pwrite(idx * self.block_size, block) catch |err| { - completed.append(err) catch {}; - return; - }; + if (errs.items.len > 0) return; + if (self.sizes[idx].size == 0) { + var pos = idx * self.block_size; + if (self.frag.len == 0 and idx == self.sizes.len - 1) { + pos += self.file_size % self.block_size; + } else { + pos += self.block_size; + } + _ = writer.pwrite(&[1]u8{0}, pos - 1) catch |err| { + errs.append(err) catch {}; + }; + } else { + const block = self.blockAt(idx) catch |err| { + errs.append(err) catch {}; + return; + }; + defer if (idx < self.sizes.len) { + self.alloc.free(block); + }; + _ = writer.pwrite(block, idx * self.block_size) catch |err| { + errs.append(err) catch {}; + return; + }; + } + } + + fn writeToThread( + self: Self, + wg: *std.Thread.WaitGroup, + mut: *std.Thread.Mutex, + cur_idx: *usize, + errs: *std.ArrayList(anyerror), + completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + idx: usize, + writer: anytype, + ) void { + self.writeBlockTo(mut, cur_idx, errs, completed, idx, writer); + wg.finish(); + } + fn writeToThreadPWrite( + self: Self, + wg: *std.Thread.WaitGroup, + errs: *std.ArrayList(anyerror), + idx: usize, + writer: anytype, + ) void { + self.writeBlockToPWrite(errs, idx, writer); + wg.finish(); + } + + fn noBlockThread( + self: Self, + block_wg: *std.Thread.WaitGroup, + mut: *std.Thread.Mutex, + cur_idx: *usize, + errs: *std.ArrayList(anyerror), + completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + idx: usize, + writer: anytype, + finish_wg: *std.Thread.WaitGroup, + finish_mut: *std.Thread.Mutex, + comptime on_finish: anytype, + on_finish_args: anytype, + ) void { + self.writeBlockTo(mut, cur_idx, errs, completed, idx, writer); + block_wg.finish(); + finish_mut.lock(); + defer finish_mut.unlock(); + if (block_wg.isDone()) { + @call(.auto, on_finish, on_finish_args); + finish_wg.finish(); + completed.deinit(); + } + } + fn noBlockThreadPWrite( + self: Self, + block_wg: *std.Thread.WaitGroup, + errs: *std.ArrayList(anyerror), + idx: usize, + writer: anytype, + finish_wg: *std.Thread.WaitGroup, + finish_mut: *std.Thread.Mutex, + comptime on_finish: anytype, + on_finish_args: anytype, + ) void { + self.writeBlockToPWrite(errs, idx, writer); + block_wg.finish(); + finish_mut.lock(); + defer finish_mut.unlock(); + if (block_wg.isDone()) { + @call(.auto, on_finish, on_finish_args); + finish_wg.finish(); + } } }; } diff --git a/src/root.zig b/src/root.zig index c65635e..a7fd31e 100644 --- a/src/root.zig +++ b/src/root.zig @@ -5,12 +5,14 @@ pub const ExtractionOptions = @import("extract_options.zig"); pub const SfsFile = SfsReader(std.fs.File); -const test_file = "testing/LinuxPATest.sfs"; +const test_archive = "testing/LinuxPATest.sfs"; +const test_file = "Start.exe"; +const file_extr_loc = "testing/Start.exe"; test "OpenFile" { - const fil = try std.fs.cwd().openFile(test_file, .{}); - defer fil.close(); - var rdr: SfsFile = try .init(std.testing.allocator, fil, 0); + const sfs_fil = try std.fs.cwd().openFile(test_archive, .{}); + defer sfs_fil.close(); + var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0); defer rdr.deinit(); std.debug.print("{}\n", .{rdr.super}); const root = try rdr.root(); @@ -20,18 +22,17 @@ test "OpenFile" { defer f.deinit(); std.debug.print("{s}\n", .{f.name}); } - var start = try root.open("Start.exe"); - defer start.deinit(); - const startReader = try start.reader(); - _ = startReader; } -test "ReadFile" { - const fil = try std.fs.cwd().openFile(test_file, .{}); - defer fil.close(); - var rdr: SfsFile = try .init(std.testing.allocator, fil, 0); +test "ExtractFile" { + std.fs.cwd().deleteFile(file_extr_loc) catch {}; + const sfs_fil = try std.fs.cwd().openFile(test_archive, .{}); + defer sfs_fil.close(); + var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0); defer rdr.deinit(); - std.debug.print("{}\n", .{rdr.super}); - const root = try rdr.root(); - defer root.deinit(); + const fil = try rdr.open(test_file); + defer fil.deinit(); + var op: ExtractionOptions = try .init(); + op.verbose = true; + try fil.extract(&op, file_extr_loc); } From eb214feefa211aab5ce19f42508bcc62ef76ef5b Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Fri, 18 Jul 2025 22:30:30 -0500 Subject: [PATCH 11/15] Further progress on extraction --- src/extract_options.zig | 2 +- src/file.zig | 141 ++++++++++++++++++++++++++++++++-------- src/root.zig | 2 +- 3 files changed, 116 insertions(+), 29 deletions(-) diff --git a/src/extract_options.zig b/src/extract_options.zig index 36b2232..1d3c9b6 100644 --- a/src/extract_options.zig +++ b/src/extract_options.zig @@ -12,7 +12,7 @@ ignore_permissions: bool = false, /// Verbose logging verbose: bool = false, /// Verbose logging writer. If not set, stdout is used. -verbose_logger: ?std.io.AnyWriter = null, +verbose_logger: std.io.AnyWriter = std.io.getStdOut().writer().any(), /// Number of threads used during extraction. Defualts to std.Thread.getCpuCount(). thread_count: usize, diff --git a/src/file.zig b/src/file.zig index bb10126..6e70e70 100644 --- a/src/file.zig +++ b/src/file.zig @@ -185,13 +185,11 @@ pub fn File(comptime T: type) type { const WaitGroup = std.Thread.WaitGroup; const Pool = std.Thread.Pool; + const Mutex = std.Thread.Mutex; pub const ExtractError = error{FileExists}; - pub fn extract(self: Self, op: *ExtractionOptions, path: []const u8) !void { - if (op.verbose and op.verbose_logger == null) { - op.verbose_logger = std.io.getStdOut().writer().any(); - } + pub fn extract(self: Self, op: ExtractionOptions, path: []const u8) !void { var exists = true; var stat: ?std.fs.File.Stat = null; if (std.fs.cwd().statFile(path)) |s| { @@ -222,33 +220,43 @@ pub fn File(comptime T: type) type { defer pol.deinit(); var errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); defer errs.deinit(); - try self.extractReal(op, &errs, &wg, &pol, path); + try self.extractReal(op, &errs, &wg, &pol, path, true); wg.wait(); if (errs.items.len > 0) return errs.items[0]; } fn extractReal( self: Self, - op: *ExtractionOptions, + op: ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8, + first: bool, + comptime on_finish: anytype, + finish_args: anytype, ) !void { + if (op.verbose) { + std.fmt.format(op.verbose_logger, "extracting inode {} \"{s}\" to {s}...\n", .{ self.inode.hdr.num, self.name, path }) catch {}; + } return switch (self.inode.hdr.type) { - .dir, .ext_dir => self.extractDir(op, errs, wg, pol, path), - .file, .ext_file => self.extractReg(op, errs, wg, pol, path), - .symlink, .ext_symlink => self.extractSymlink(op, errs, wg, pol, path), + .dir, .ext_dir => self.extractDir(op, errs, wg, pol, path, first), + .file, .ext_file => self.extractReg(op, errs, wg, pol, path, first), + .symlink, .ext_symlink => self.extractSymlink(op, errs, wg, pol, path, first), .block_dev, .ext_block_dev, .char_dev, .ext_char_dev, .fifo, .ext_fifo, - => self.extractDev(op, path), + => { + try self.extractDev(op, path); + if (!first) self.deinit(); + return; + }, else => { if (op.verbose) { std.fmt.format( - op.verbose_logger.?, + op.verbose_logger, "inode {} \"{s}\" is a socket file. Ignoring.\n", .{ self.inode.hdr.num, self.name }, ) catch {}; @@ -256,31 +264,104 @@ pub fn File(comptime T: type) type { }, }; } - fn extractDir(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + fn extractDir( + self: Self, + op: ExtractionOptions, + errs: *std.ArrayList(anyerror), + wg: *WaitGroup, + pol: *Pool, + path: []const u8, + comptime on_finish: anytype, + finish_args: anytype, + ) !void { if (errs.items.len > 0) return; - _ = self; - _ = op; - _ = wg; - _ = pol; - _ = path; + wg.start(); + var dir_wg: WaitGroup = .{}; + dir_wg.startMany(self.entries.?.len); + for (self.entries.?) |e| { + const fil: Self = try .initFromEntry(self.rdr, e); + } return error{TODO}.TODO; } - fn extractReg(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + fn extractReg( + self: Self, + op: ExtractionOptions, + errs: *std.ArrayList(anyerror), + wg: *WaitGroup, + pol: *Pool, + path: []const u8, + first: bool, + comptime on_finish: anytype, + finish_args: anytype, + ) !void { if (errs.items.len > 0) return; const fil = try std.fs.cwd().createFile(path, .{}); @constCast(&self.data_reader.?).setPool(pol); wg.start(); - try self.data_reader.?.writeToNoBlock(errs, fil, wg, extractRegFinish, .{ self, fil }); - _ = op; - //TODO: add some way of verbose logging of the errors for this file in particular. + var fil_errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); + try self.data_reader.?.writeToNoBlock(fil_errs, fil, wg, extractRegFinish, .{ self, op, fil, &fil_errs, first }); return; } - fn extractRegFinish(self: Self, fil: std.fs.File) void { + fn extractRegFinish( + self: Self, + op: ExtractionOptions, + fil: std.fs.File, + errs: *std.ArrayList(anyerror), + fil_errs: *std.ArrayList(anyerror), + first: bool, + comptime on_finish: anytype, + finish_args: anytype, + ) void { defer fil.close(); - //TODO: set owners & permissions. Check if we need to call self.deinit(); - _ = self; + defer fil_errs.deinit(); + defer if (!first) self.deinit(); + if (fil_errs.items.len > 0) { + if (op.verbose) { + for (fil_errs.items) |err| { + std.fmt.format(op.verbose_logger, "error extracting inode {} \"{s}\": {}\n", .{ self.inode.num, self.name, err }) catch {}; + } + } + errs.append(fil_errs.items[0]) catch {}; + return; + } + if (!op.ignore_permissions) { + const fil_uid = self.uid() catch |err| { + if (op.verbose) { + std.fmt.format(op.verbose_logger, "error getting uid: {}\n", .{err}) catch {}; + return; + } + }; + const fil_gid = self.gid() catch |err| { + if (op.verbose) { + std.fmt.format(op.verbose_logger, "error getting gid: {}\n", .{err}) catch {}; + return; + } + }; + fil.chmod(self.inode.hdr.perm) catch |err| { + if (op.verbose) { + std.fmt.format(op.verbose_logger, "error setting permissions: {}\n", .{err}) catch {}; + return; + } + }; + fil.chown(fil_uid, fil_gid) catch |err| { + if (op.verbose) { + std.fmt.format(op.verbose_logger, "error setting owners: {}\n", .{err}) catch {}; + return; + } + }; + } } - fn extractSymlink(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + fn extractSymlink( + self: Self, + op: ExtractionOptions, + errs: *std.ArrayList(anyerror), + wg: *WaitGroup, + pol: *Pool, + path: []const u8, + first: bool, + comptime on_finish: anytype, + finish_args: anytype, + ) !void { if (errs.items.len > 0) return; _ = self; _ = op; @@ -289,11 +370,17 @@ pub fn File(comptime T: type) type { _ = path; return error{TODO}.TODO; } - fn extractDev(self: Self, op: *ExtractionOptions, path: []const u8) !void { + fn extractDev( + self: Self, + op: ExtractionOptions, + path: []const u8, + comptime on_finish: anytype, + finish_args: anytype, + ) !void { if (comptime builtin.os.tag != .linux) { if (op.verbose) { std.fmt.format( - op.verbose_logger.?, + op.verbose_logger, "inode {} \"{s}\" is a device/fifo file and the OS is not Linux. Ignoring.\n", .{ self.inode.hdr.num, self.name }, ) catch {}; diff --git a/src/root.zig b/src/root.zig index a7fd31e..48e3049 100644 --- a/src/root.zig +++ b/src/root.zig @@ -34,5 +34,5 @@ test "ExtractFile" { defer fil.deinit(); var op: ExtractionOptions = try .init(); op.verbose = true; - try fil.extract(&op, file_extr_loc); + try fil.extract(op, file_extr_loc); } From 9c1d90f60b182ea8cb0a2d1d0aeb7cbbc644d072 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Sat, 19 Jul 2025 06:30:24 -0500 Subject: [PATCH 12/15] Reset on extraction logic Fixed not using MetadataReader for Tables Fixed MetadataReaders not respecting uncompressed blocks --- src/file.zig | 157 +--------------------------------------- src/inode/file.zig | 2 +- src/reader.zig | 6 +- src/reader/data.zig | 13 +--- src/reader/metadata.zig | 14 ++-- src/root.zig | 3 + src/table.zig | 13 +++- 7 files changed, 34 insertions(+), 174 deletions(-) diff --git a/src/file.zig b/src/file.zig index 6e70e70..8e3ef4e 100644 --- a/src/file.zig +++ b/src/file.zig @@ -232,27 +232,15 @@ pub fn File(comptime T: type) type { pol: *Pool, path: []const u8, first: bool, - comptime on_finish: anytype, - finish_args: anytype, ) !void { if (op.verbose) { std.fmt.format(op.verbose_logger, "extracting inode {} \"{s}\" to {s}...\n", .{ self.inode.hdr.num, self.name, path }) catch {}; } return switch (self.inode.hdr.type) { - .dir, .ext_dir => self.extractDir(op, errs, wg, pol, path, first), - .file, .ext_file => self.extractReg(op, errs, wg, pol, path, first), - .symlink, .ext_symlink => self.extractSymlink(op, errs, wg, pol, path, first), - .block_dev, - .ext_block_dev, - .char_dev, - .ext_char_dev, - .fifo, - .ext_fifo, - => { - try self.extractDev(op, path); - if (!first) self.deinit(); - return; - }, + .dir, .ext_dir => {}, + .file, .ext_file => {}, + .symlink, .ext_symlink => {}, + .block_dev, .ext_block_dev, .char_dev, .ext_char_dev, .fifo, .ext_fifo => {}, else => { if (op.verbose) { std.fmt.format( @@ -264,142 +252,5 @@ pub fn File(comptime T: type) type { }, }; } - fn extractDir( - self: Self, - op: ExtractionOptions, - errs: *std.ArrayList(anyerror), - wg: *WaitGroup, - pol: *Pool, - path: []const u8, - comptime on_finish: anytype, - finish_args: anytype, - ) !void { - if (errs.items.len > 0) return; - wg.start(); - var dir_wg: WaitGroup = .{}; - dir_wg.startMany(self.entries.?.len); - for (self.entries.?) |e| { - const fil: Self = try .initFromEntry(self.rdr, e); - } - return error{TODO}.TODO; - } - fn extractReg( - self: Self, - op: ExtractionOptions, - errs: *std.ArrayList(anyerror), - wg: *WaitGroup, - pol: *Pool, - path: []const u8, - first: bool, - comptime on_finish: anytype, - finish_args: anytype, - ) !void { - if (errs.items.len > 0) return; - const fil = try std.fs.cwd().createFile(path, .{}); - @constCast(&self.data_reader.?).setPool(pol); - wg.start(); - var fil_errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); - try self.data_reader.?.writeToNoBlock(fil_errs, fil, wg, extractRegFinish, .{ self, op, fil, &fil_errs, first }); - return; - } - fn extractRegFinish( - self: Self, - op: ExtractionOptions, - fil: std.fs.File, - errs: *std.ArrayList(anyerror), - fil_errs: *std.ArrayList(anyerror), - first: bool, - comptime on_finish: anytype, - finish_args: anytype, - ) void { - defer fil.close(); - defer fil_errs.deinit(); - defer if (!first) self.deinit(); - if (fil_errs.items.len > 0) { - if (op.verbose) { - for (fil_errs.items) |err| { - std.fmt.format(op.verbose_logger, "error extracting inode {} \"{s}\": {}\n", .{ self.inode.num, self.name, err }) catch {}; - } - } - errs.append(fil_errs.items[0]) catch {}; - return; - } - if (!op.ignore_permissions) { - const fil_uid = self.uid() catch |err| { - if (op.verbose) { - std.fmt.format(op.verbose_logger, "error getting uid: {}\n", .{err}) catch {}; - return; - } - }; - const fil_gid = self.gid() catch |err| { - if (op.verbose) { - std.fmt.format(op.verbose_logger, "error getting gid: {}\n", .{err}) catch {}; - return; - } - }; - fil.chmod(self.inode.hdr.perm) catch |err| { - if (op.verbose) { - std.fmt.format(op.verbose_logger, "error setting permissions: {}\n", .{err}) catch {}; - return; - } - }; - fil.chown(fil_uid, fil_gid) catch |err| { - if (op.verbose) { - std.fmt.format(op.verbose_logger, "error setting owners: {}\n", .{err}) catch {}; - return; - } - }; - } - } - fn extractSymlink( - self: Self, - op: ExtractionOptions, - errs: *std.ArrayList(anyerror), - wg: *WaitGroup, - pol: *Pool, - path: []const u8, - first: bool, - comptime on_finish: anytype, - finish_args: anytype, - ) !void { - if (errs.items.len > 0) return; - _ = self; - _ = op; - _ = wg; - _ = pol; - _ = path; - return error{TODO}.TODO; - } - fn extractDev( - self: Self, - op: ExtractionOptions, - path: []const u8, - comptime on_finish: anytype, - finish_args: anytype, - ) !void { - if (comptime builtin.os.tag != .linux) { - if (op.verbose) { - std.fmt.format( - op.verbose_logger, - "inode {} \"{s}\" is a device/fifo file and the OS is not Linux. Ignoring.\n", - .{ self.inode.hdr.num, self.name }, - ) catch {}; - } - return; - } - const mode: u32 = switch (self.inode.hdr.type) { - .block_dev, .ext_block_dev => std.posix.S.IFBLK, - .char_dev, .ext_char_dev => std.posix.S.IFCHR, - .fifo, .ext_fifo => std.posix.S.IFIFO, - else => unreachable, - }; - const dev = switch (self.inode.data) { - .block_dev, .char_dev => |b| b.device, - .ext_block_dev, .ext_char_dev => |b| b.device, - .fifo, .ext_fifo => 0, - else => unreachable, - }; - _ = std.os.linux.mknod(@ptrCast(path), mode, dev); - } }; } diff --git a/src/inode/file.zig b/src/inode/file.zig index 86af63b..4f70051 100644 --- a/src/inode/file.zig +++ b/src/inode/file.zig @@ -54,7 +54,7 @@ pub const ExtFile = struct { const size = std.mem.readInt(u64, fixed[8..16], .little); const frag_idx = std.mem.readInt(u32, fixed[28..32], .little); var blocks: u32 = @truncate(size / block_size); - if (size % block_size > 0 and frag_idx != 0xffffffff) { + if (size % block_size > 0 and frag_idx == 0xffffffff) { blocks += 1; } const block_sizes = try alloc.alloc(BlockSize, blocks); diff --git a/src/reader.zig b/src/reader.zig index b0f69e2..71e1096 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -37,9 +37,9 @@ pub fn SfsReader(comptime T: type) type { .rdr = .init(rdr, offset), }; _ = try rdr.pread(std.mem.asBytes(&out.super), 0); - out.frag_table = .init(alloc, rdr, out.super.frag_start, out.super.frag_count); - out.id_table = .init(alloc, rdr, out.super.id_start, out.super.id_count); - out.export_table = .init(alloc, rdr, out.super.export_start, out.super.inode_count); + out.frag_table = .init(alloc, out.rdr, out.super.comp, out.super.frag_start, out.super.frag_count); + out.id_table = .init(alloc, out.rdr, out.super.comp, out.super.id_start, out.super.id_count); + out.export_table = .init(alloc, out.rdr, out.super.comp, out.super.export_start, out.super.inode_count); return out; } pub fn deinit(self: *Self) void { diff --git a/src/reader/data.zig b/src/reader/data.zig index 2cffe48..4462138 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -184,7 +184,6 @@ pub fn DataReader(comptime T: type) type { self: Self, errs: *std.ArrayList(anyerror), writer: anytype, - wg: *std.Thread.WaitGroup, comptime on_finish: anytype, on_finish_args: anytype, ) !void { @@ -208,9 +207,9 @@ pub fn DataReader(comptime T: type) type { }, blk: { if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { - break :blk .{ self, &block_wg, errs, i, writer, wg, &finish_mut, on_finish, on_finish_args }; + break :blk .{ self, &block_wg, errs, i, writer, &finish_mut, on_finish, on_finish_args }; } else { - break :blk .{ self, &block_wg, &mut, &cur_idx, errs, &completed.?, i, writer, wg, &finish_mut, on_finish, on_finish_args }; + break :blk .{ self, &block_wg, &mut, &cur_idx, errs, &completed.?, i, writer, &finish_mut, on_finish, on_finish_args }; } }, ); @@ -326,18 +325,16 @@ pub fn DataReader(comptime T: type) type { completed: *std.AutoArrayHashMap(usize, anyerror![]u8), idx: usize, writer: anytype, - finish_wg: *std.Thread.WaitGroup, finish_mut: *std.Thread.Mutex, comptime on_finish: anytype, on_finish_args: anytype, ) void { self.writeBlockTo(mut, cur_idx, errs, completed, idx, writer); - block_wg.finish(); finish_mut.lock(); + block_wg.finish(); defer finish_mut.unlock(); if (block_wg.isDone()) { @call(.auto, on_finish, on_finish_args); - finish_wg.finish(); completed.deinit(); } } @@ -347,18 +344,16 @@ pub fn DataReader(comptime T: type) type { errs: *std.ArrayList(anyerror), idx: usize, writer: anytype, - finish_wg: *std.Thread.WaitGroup, finish_mut: *std.Thread.Mutex, comptime on_finish: anytype, on_finish_args: anytype, ) void { self.writeBlockToPWrite(errs, idx, writer); - block_wg.finish(); finish_mut.lock(); + block_wg.finish(); defer finish_mut.unlock(); if (block_wg.isDone()) { @call(.auto, on_finish, on_finish_args); - finish_wg.finish(); } } }; diff --git a/src/reader/metadata.zig b/src/reader/metadata.zig index 89d74ff..289ebe0 100644 --- a/src/reader/metadata.zig +++ b/src/reader/metadata.zig @@ -35,11 +35,15 @@ pub fn MetadataReader(comptime T: type) type { var hdr: MetaHeader = undefined; _ = try self.rdr.pread(std.mem.asBytes(&hdr), self.offset); self.offset += 2; - self.block_size = try self.comp.decompress( - self.alloc, - self.rdr.readerAt(self.offset).reader(), - &self.block, - ); + if (hdr.uncompressed) { + self.block_size = try self.rdr.pread(self.block[0..hdr.size], self.offset); + } else { + self.block_size = try self.comp.decompress( + self.alloc, + self.rdr.readerAt(self.offset).reader(), + &self.block, + ); + } self.offset += hdr.size; self.block_offset = 0; } diff --git a/src/root.zig b/src/root.zig index 48e3049..df3b4b4 100644 --- a/src/root.zig +++ b/src/root.zig @@ -14,6 +14,9 @@ test "OpenFile" { defer sfs_fil.close(); var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0); defer rdr.deinit(); + _ = try rdr.frag_table.get(rdr.super.frag_count - 1); + _ = try rdr.id_table.get(rdr.super.id_count - 1); + _ = try rdr.export_table.get(rdr.super.inode_count - 1); std.debug.print("{}\n", .{rdr.super}); const root = try rdr.root(); defer root.deinit(); diff --git a/src/table.zig b/src/table.zig index 2f857d0..c60761d 100644 --- a/src/table.zig +++ b/src/table.zig @@ -1,5 +1,9 @@ const std = @import("std"); +const PRead = @import("reader/p_read.zig").PRead; +const Compression = @import("superblock.zig").Compression; +const MetadataReader = @import("reader/metadata.zig").MetadataReader; + pub const TableError = error{ InvalidIndex, }; @@ -10,7 +14,8 @@ pub fn Table(comptime T: type, comptime R: type) type { const Self = @This(); alloc: std.mem.Allocator, - rdr: R, + rdr: PRead(R), + comp: Compression, offset: u64, table_count: u32, @@ -18,10 +23,11 @@ pub fn Table(comptime T: type, comptime R: type) type { table: []T = &[0]T{}, - pub fn init(alloc: std.mem.Allocator, rdr: R, offset: u64, table_count: u32) Self { + pub fn init(alloc: std.mem.Allocator, rdr: PRead(R), comp: Compression, offset: u64, table_count: u32) Self { return .{ .alloc = alloc, .rdr = rdr, + .comp = comp, .offset = offset, .table_count = table_count, }; @@ -60,7 +66,8 @@ pub fn Table(comptime T: type, comptime R: type) type { try self.resize(to_read); _ = try self.rdr.pread(std.mem.asBytes(&offset), self.offset); self.offset += 8; - _ = try self.rdr.pread(std.mem.sliceAsBytes(self.table[self.table.len - to_read ..]), offset); + var meta: MetadataReader(R) = .init(self.alloc, self.comp, self.rdr, offset); + _ = try meta.read(std.mem.sliceAsBytes(self.table[self.table.len - to_read ..])); } return self.table[idx]; } From 8998d28253b9e77dfe2fd6d23c22a4a812cefff9 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Sat, 19 Jul 2025 16:40:54 -0500 Subject: [PATCH 13/15] Data reader fixes Re-started extraction logic --- src/file.zig | 102 +++++++++++++++++++++++++++++++++++++++++--- src/reader/data.zig | 14 +++--- 2 files changed, 103 insertions(+), 13 deletions(-) diff --git a/src/file.zig b/src/file.zig index 8e3ef4e..e1914ab 100644 --- a/src/file.zig +++ b/src/file.zig @@ -23,6 +23,7 @@ pub fn File(comptime T: type) type { const Self = @This(); rdr: *SfsReader(T), + // parent: *File(T), inode: Inode, name: []const u8, @@ -220,37 +221,126 @@ pub fn File(comptime T: type) type { defer pol.deinit(); var errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); defer errs.deinit(); - try self.extractReal(op, &errs, &wg, &pol, path, true); + try self.extractReal(op, path, &errs, &wg, &pol, true); wg.wait(); if (errs.items.len > 0) return errs.items[0]; } fn extractReal( self: Self, op: ExtractionOptions, + path: []const u8, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, - path: []const u8, first: bool, ) !void { + if (errs.items.len > 0) return; if (op.verbose) { std.fmt.format(op.verbose_logger, "extracting inode {} \"{s}\" to {s}...\n", .{ self.inode.hdr.num, self.name, path }) catch {}; } return switch (self.inode.hdr.type) { - .dir, .ext_dir => {}, - .file, .ext_file => {}, + .dir, .ext_dir => { + wg.start(); + errdefer wg.finish(); + for (self.entries.?) |ent| { + var fil = initFromEntry(self.rdr, ent) catch |err| { + continue; + }; + } + }, + .file, .ext_file => { + wg.start(); + errdefer wg.finish(); + var ext_fil = try std.fs.cwd().createFile(path, .{}); + errdefer ext_fil.close(); + var fil_errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); + errdefer fil_errs.deinit(); + @constCast(&self.data_reader.?).setPool(pol); + try self.data_reader.?.writeToNoBlock(errs, ext_fil, filExtractFinish, .{ + self, + op, + path, + &fil_errs, + errs, + wg, + ext_fil, + first, + }); + }, .symlink, .ext_symlink => {}, - .block_dev, .ext_block_dev, .char_dev, .ext_char_dev, .fifo, .ext_fifo => {}, + .block_dev, .ext_block_dev, .char_dev, .ext_char_dev, .fifo, .ext_fifo => { + //TODO: check for all oses that accept unix permissions. + }, else => { if (op.verbose) { std.fmt.format( op.verbose_logger, "inode {} \"{s}\" is a socket file. Ignoring.\n", - .{ self.inode.hdr.num, self.name }, + .{ self.inode.hdr.num, path }, ) catch {}; } }, }; } + fn filExtractFinish( + self: Self, + op: ExtractionOptions, + path: []const u8, + fil_errs: *std.ArrayList(anyerror), + errs: *std.ArrayList(anyerror), + wg: *WaitGroup, + fil: std.fs.File, + first: bool, + ) void { + defer wg.finish(); + defer fil.close(); + defer if (!first) self.deinit(); + if (fil_errs.items.len > 0) { + if (op.verbose) { + for (fil_errs.items) |err| { + std.fmt.format( + op.verbose_logger, + "error extracting inode {} to \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + } + } + errs.append(fil_errs.items[0]) catch {}; + return; + } + if (op.ignore_permissions) return; + const fil_uid = self.uid() catch |err| { + std.fmt.format( + op.verbose_logger, + "error getting uid for inode {} \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + return; + }; + const fil_gid = self.gid() catch |err| { + std.fmt.format( + op.verbose_logger, + "error getting gid for inode {} \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + return; + }; + fil.chmod(self.inode.hdr.perm) catch |err| { + std.fmt.format( + op.verbose_logger, + "error setting permissions for inode {} \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + return; + }; + fil.chown(fil_uid, fil_gid) catch |err| { + std.fmt.format( + op.verbose_logger, + "error setting owner for inode {} \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + return; + }; + } }; } diff --git a/src/reader/data.zig b/src/reader/data.zig index 4462138..f2521ac 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -152,7 +152,7 @@ pub fn DataReader(comptime T: type) type { var mut: std.Thread.Mutex = .{}; var cur_idx: usize = 0; var wg: std.Thread.WaitGroup = .{}; - var completed: std.AutoArrayHashMap(usize, anyerror![]u8) = .init(self.alloc); + var completed: std.AutoHashMap(usize, []u8) = .init(self.alloc); defer completed.deinit(); var errs: std.ArrayList(anyerror) = .init(self.alloc); defer errs.deinit(); @@ -169,7 +169,7 @@ pub fn DataReader(comptime T: type) type { if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { break :blk .{ self, &wg, &errs, i, writer }; } - break :blk .{ self, &wg, &mut, &cur_idx, &completed, i, writer }; + break :blk .{ self, &wg, &mut, &cur_idx, &errs, &completed, i, writer }; }, ); } @@ -188,7 +188,7 @@ pub fn DataReader(comptime T: type) type { on_finish_args: anytype, ) !void { if (self.pool == null) return DataReaderError.ThreadPoolNotSet; - const mut: std.Thread.Mutex = .{}; + var mut: std.Thread.Mutex = .{}; var cur_idx: usize = 0; var block_wg: std.Thread.WaitGroup = .{}; var finish_mut: std.Thread.Mutex = .{}; @@ -224,7 +224,7 @@ pub fn DataReader(comptime T: type) type { completed: *std.AutoHashMap(usize, []u8), idx: usize, writer: anytype, - ) !void { + ) void { //TODO: We can marginally reduce memory usage if we don't store sparse blocks in completed. if (errs.items.len > 0) return; // Indicates an error has occured in another thread. const block = self.blockAt(idx) catch |err| { @@ -298,7 +298,7 @@ pub fn DataReader(comptime T: type) type { mut: *std.Thread.Mutex, cur_idx: *usize, errs: *std.ArrayList(anyerror), - completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + completed: *std.AutoHashMap(usize, []u8), idx: usize, writer: anytype, ) void { @@ -308,7 +308,7 @@ pub fn DataReader(comptime T: type) type { fn writeToThreadPWrite( self: Self, wg: *std.Thread.WaitGroup, - errs: *std.ArrayList(anyerror), + errs: std.ArrayList(anyerror), idx: usize, writer: anytype, ) void { @@ -322,7 +322,7 @@ pub fn DataReader(comptime T: type) type { mut: *std.Thread.Mutex, cur_idx: *usize, errs: *std.ArrayList(anyerror), - completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + completed: *std.AutoHashMap(usize, []u8), idx: usize, writer: anytype, finish_mut: *std.Thread.Mutex, From d5c50b19f2870d4d1d6974520cdd9cd41db8b85b Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Mon, 21 Jul 2025 06:40:18 -0500 Subject: [PATCH 14/15] Further progress on extraction --- src/file.zig | 89 ++++++++++++++++++++++++++++++++++++++------- src/reader.zig | 2 +- src/reader/data.zig | 19 +++++++--- src/root.zig | 26 ++++++++++--- 4 files changed, 109 insertions(+), 27 deletions(-) diff --git a/src/file.zig b/src/file.zig index e1914ab..4c637e6 100644 --- a/src/file.zig +++ b/src/file.zig @@ -206,8 +206,6 @@ pub fn File(comptime T: type) type { .dir, .ext_dir => { if (exists and stat.?.kind != .directory) { return ExtractError.FileExists; - } else if (!exists) { - try std.fs.cwd().makeDir(path); } }, else => if (exists) return ExtractError.FileExists, @@ -236,31 +234,85 @@ pub fn File(comptime T: type) type { ) !void { if (errs.items.len > 0) return; if (op.verbose) { - std.fmt.format(op.verbose_logger, "extracting inode {} \"{s}\" to {s}...\n", .{ self.inode.hdr.num, self.name, path }) catch {}; + std.fmt.format( + op.verbose_logger, + "extracting inode {} \"{s}\" to {s}...\n", + .{ self.inode.hdr.num, self.name, path }, + ) catch {}; } return switch (self.inode.hdr.type) { .dir, .ext_dir => { wg.start(); + defer std.debug.print("{}\n", .{wg.state.raw}); errdefer wg.finish(); + std.fs.cwd().makeDir(path) catch |err| { + if (err != std.fs.Dir.MakeError.PathAlreadyExists) { + return err; + } + }; + var dir_wg = try self.rdr.alloc.create(WaitGroup); + dir_wg.* = .{}; for (self.entries.?) |ent| { var fil = initFromEntry(self.rdr, ent) catch |err| { + std.fmt.format( + op.verbose_logger, + "error extracting inode {} \"{s}\": {}\n", + .{ ent.num, path, err }, + ) catch {}; + continue; + }; + const ext_path = blk: { + if (path[path.len - 1] == '/') { + var new = self.rdr.alloc.alloc(u8, path.len + ent.name.len) catch |err| { + break :blk err; + }; + @memcpy(new[0..path.len], path); + @memcpy(new[path.len..], ent.name); + break :blk new; + } + var new = self.rdr.alloc.alloc(u8, path.len + ent.name.len + 1) catch |err| { + break :blk err; + }; + @memcpy(new[0..path.len], path); + new[path.len] = '/'; + @memcpy(new[path.len + 1 ..], ent.name); + break :blk new; + } catch |err| { + std.fmt.format( + op.verbose_logger, + "error extracting inode {} \"{s}\": {}\n", + .{ ent.num, path, err }, + ) catch {}; + continue; + }; + fil.extractReal(op, ext_path, errs, dir_wg, pol, false) catch |err| { + std.fmt.format( + op.verbose_logger, + "error extracting inode {} \"{s}\": {}\n", + .{ ent.num, path, err }, + ) catch {}; continue; }; } + dir_wg.wait(); + wg.finish(); + std.debug.print("finished: {s}\n", .{path}); }, .file, .ext_file => { wg.start(); errdefer wg.finish(); var ext_fil = try std.fs.cwd().createFile(path, .{}); errdefer ext_fil.close(); - var fil_errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); + var fil_errs = try self.rdr.alloc.create(std.ArrayList(anyerror)); + errdefer self.rdr.alloc.destroy(fil_errs); + fil_errs.* = .init(self.rdr.alloc); errdefer fil_errs.deinit(); @constCast(&self.data_reader.?).setPool(pol); - try self.data_reader.?.writeToNoBlock(errs, ext_fil, filExtractFinish, .{ + try self.data_reader.?.writeToNoBlock(errs, ext_fil, extractRegFinish, .{ self, op, path, - &fil_errs, + fil_errs, errs, wg, ext_fil, @@ -282,7 +334,15 @@ pub fn File(comptime T: type) type { }, }; } - fn filExtractFinish( + // fn extractFileFinish( + // self: Self, + // op: ExtractionOptions, + // path: []const u8, + // dir_wg: *WaitGroup, + // dir_wg_mut: *Mutex, + // wg: *WaitGroup, + // ) void {} + fn extractRegFinish( self: Self, op: ExtractionOptions, path: []const u8, @@ -292,18 +352,19 @@ pub fn File(comptime T: type) type { fil: std.fs.File, first: bool, ) void { + defer std.debug.print("{}\n", .{wg.state.raw}); defer wg.finish(); defer fil.close(); + defer self.rdr.alloc.destroy(fil_errs); defer if (!first) self.deinit(); + defer if (!first) self.rdr.alloc.free(path); if (fil_errs.items.len > 0) { if (op.verbose) { - for (fil_errs.items) |err| { - std.fmt.format( - op.verbose_logger, - "error extracting inode {} to \"{s}\": {}\n", - .{ self.inode.hdr.num, path, err }, - ) catch {}; - } + std.fmt.format( + op.verbose_logger, + "error extracting inode {} to \"{s}\": {}\n", + .{ self.inode.hdr.num, path, fil_errs.items[0] }, + ) catch {}; } errs.append(fil_errs.items[0]) catch {}; return; diff --git a/src/reader.zig b/src/reader.zig index 71e1096..1989ad3 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -61,7 +61,7 @@ pub fn SfsReader(comptime T: type) type { } /// Extract the entire archive to the given path & with the given options. /// Equivelent to calling extract on the root File. - pub fn extract(self: *Self, op: *ExtractionOptions, path: []const u8) !void { + pub fn extract(self: *Self, op: ExtractionOptions, path: []const u8) !void { var rt = try self.root(); defer rt.deinit(); return rt.extract(op, path); diff --git a/src/reader/data.zig b/src/reader/data.zig index f2521ac..aa6e8ff 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -190,8 +190,10 @@ pub fn DataReader(comptime T: type) type { if (self.pool == null) return DataReaderError.ThreadPoolNotSet; var mut: std.Thread.Mutex = .{}; var cur_idx: usize = 0; - var block_wg: std.Thread.WaitGroup = .{}; - var finish_mut: std.Thread.Mutex = .{}; + var block_wg = try self.alloc.create(std.Thread.WaitGroup); + block_wg.* = .{}; + const finish_mut = try self.alloc.create(std.Thread.Mutex); + finish_mut.* = .{}; var completed: ?std.AutoHashMap(usize, []u8) = null; if (!comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { completed = std.AutoHashMap(usize, []u8).init(self.alloc); @@ -207,9 +209,9 @@ pub fn DataReader(comptime T: type) type { }, blk: { if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { - break :blk .{ self, &block_wg, errs, i, writer, &finish_mut, on_finish, on_finish_args }; + break :blk .{ self, block_wg, errs, i, writer, finish_mut, on_finish, on_finish_args }; } else { - break :blk .{ self, &block_wg, &mut, &cur_idx, errs, &completed.?, i, writer, &finish_mut, on_finish, on_finish_args }; + break :blk .{ self, block_wg, &mut, &cur_idx, errs, &completed.?, i, writer, finish_mut, on_finish, on_finish_args }; } }, ); @@ -267,7 +269,7 @@ pub fn DataReader(comptime T: type) type { writer: anytype, ) void { if (errs.items.len > 0) return; - if (self.sizes[idx].size == 0) { + if (idx < self.sizes.len and self.sizes[idx].size == 0) { var pos = idx * self.block_size; if (self.frag.len == 0 and idx == self.sizes.len - 1) { pos += self.file_size % self.block_size; @@ -351,8 +353,13 @@ pub fn DataReader(comptime T: type) type { self.writeBlockToPWrite(errs, idx, writer); finish_mut.lock(); block_wg.finish(); - defer finish_mut.unlock(); + defer { + const done = block_wg.isDone(); + finish_mut.unlock(); + if (done) self.alloc.destroy(finish_mut); + } if (block_wg.isDone()) { + self.alloc.destroy(block_wg); @call(.auto, on_finish, on_finish_args); } } diff --git a/src/root.zig b/src/root.zig index df3b4b4..35391c5 100644 --- a/src/root.zig +++ b/src/root.zig @@ -6,8 +6,6 @@ pub const ExtractionOptions = @import("extract_options.zig"); pub const SfsFile = SfsReader(std.fs.File); const test_archive = "testing/LinuxPATest.sfs"; -const test_file = "Start.exe"; -const file_extr_loc = "testing/Start.exe"; test "OpenFile" { const sfs_fil = try std.fs.cwd().openFile(test_archive, .{}); @@ -27,15 +25,31 @@ test "OpenFile" { } } -test "ExtractFile" { - std.fs.cwd().deleteFile(file_extr_loc) catch {}; +test "ExtractSingleFile" { + const single_file = "Start.exe"; + const single_file_extr_loc = "testing/Start.exe"; + + std.fs.cwd().deleteFile(single_file_extr_loc) catch {}; const sfs_fil = try std.fs.cwd().openFile(test_archive, .{}); defer sfs_fil.close(); var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0); defer rdr.deinit(); - const fil = try rdr.open(test_file); + const fil = try rdr.open(single_file); defer fil.deinit(); var op: ExtractionOptions = try .init(); op.verbose = true; - try fil.extract(op, file_extr_loc); + try fil.extract(op, single_file_extr_loc); +} + +test "ExtractAll" { + const extr_dir = "testing/testExtract"; + + std.fs.cwd().deleteTree(extr_dir) catch {}; + const sfs_fil = try std.fs.cwd().openFile(test_archive, .{}); + defer sfs_fil.close(); + var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0); + defer rdr.deinit(); + var op: ExtractionOptions = try .init(); + op.verbose = true; + try rdr.extract(op, extr_dir); } From a96ad46a6c160ba7094fb72ed2a8aece58a7ddb5 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Tue, 22 Jul 2025 05:13:34 -0500 Subject: [PATCH 15/15] Things are working (though slowly) --- build.zig | 2 +- src/bin/unsquashfs.zig | 24 ++++- src/file.zig | 217 ++++++++++++++++++++++++++++++++--------- src/reader/data.zig | 14 ++- src/root.zig | 8 +- 5 files changed, 207 insertions(+), 58 deletions(-) diff --git a/build.zig b/build.zig index c682bc7..34e9b29 100644 --- a/build.zig +++ b/build.zig @@ -31,7 +31,7 @@ pub fn build(b: *std.Build) !void { exe_mod.addOptions("config", opt); const exe = b.addExecutable(.{ .linkage = .static, - .name = "zig-unsquashfs", + .name = "unsquashfs", .root_module = exe_mod, .version = sem_ver, }); diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig index ceca2e0..7727b15 100644 --- a/src/bin/unsquashfs.zig +++ b/src/bin/unsquashfs.zig @@ -51,10 +51,10 @@ const ListTypes = enum { }; pub fn main() !void { - var alloc: std.heap.GeneralPurposeAllocator(.{}) = .init; - extr_files = .init(alloc.allocator()); + const alloc = std.heap.smp_allocator; + extr_files = .init(alloc); defer extr_files.deinit(); - var args = std.process.argsWithAllocator(alloc.allocator()) catch { + var args = std.process.argsWithAllocator(alloc) catch { _ = try stdout.writeAll("Unable to allocate memory"); return; }; @@ -119,8 +119,10 @@ pub fn main() !void { return; } const fil = try std.fs.cwd().openFile(filename, .{}); + defer fil.close(); + var th_alloc: std.heap.ThreadSafeAllocator = .{ .child_allocator = std.heap.smp_allocator }; var rdr = squashfs.SfsFile.init( - alloc.allocator(), + th_alloc.allocator(), fil, offset, ) catch |err| { @@ -128,5 +130,17 @@ pub fn main() !void { return; }; defer rdr.deinit(); - //TODO + //TODO: list and extr_files; + var op: squashfs.ExtractionOptions = squashfs.ExtractionOptions.init() catch |err| { + try std.fmt.format(stdout.writer(), "Error setting extraction options: {any}\n", .{err}); + return; + }; + op.verbose = verbose; + op.dereference_symlinks = deref; + op.unbreak_symlinks = unbreak; + if (processors != 0) op.thread_count = processors; + rdr.extract(op, extr_location) catch |err| { + try std.fmt.format(stdout.writer(), "Error extracting archive: {any}\n", .{err}); + return; + }; } diff --git a/src/file.zig b/src/file.zig index 4c637e6..8c18f86 100644 --- a/src/file.zig +++ b/src/file.zig @@ -219,7 +219,7 @@ pub fn File(comptime T: type) type { defer pol.deinit(); var errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); defer errs.deinit(); - try self.extractReal(op, path, &errs, &wg, &pol, true); + self.extractReal(op, path, &errs, &wg, &pol, true); wg.wait(); if (errs.items.len > 0) return errs.items[0]; } @@ -231,7 +231,7 @@ pub fn File(comptime T: type) type { wg: *WaitGroup, pol: *Pool, first: bool, - ) !void { + ) void { if (errs.items.len > 0) return; if (op.verbose) { std.fmt.format( @@ -242,18 +242,22 @@ pub fn File(comptime T: type) type { } return switch (self.inode.hdr.type) { .dir, .ext_dir => { + var complete = false; wg.start(); - defer std.debug.print("{}\n", .{wg.state.raw}); - errdefer wg.finish(); + defer if (!complete) wg.finish(); std.fs.cwd().makeDir(path) catch |err| { if (err != std.fs.Dir.MakeError.PathAlreadyExists) { - return err; + errs.append(err) catch {}; + return; } }; - var dir_wg = try self.rdr.alloc.create(WaitGroup); + const dir_wg = self.rdr.alloc.create(WaitGroup) catch |err| { + errs.append(err) catch {}; + return; + }; dir_wg.* = .{}; for (self.entries.?) |ent| { - var fil = initFromEntry(self.rdr, ent) catch |err| { + const fil = initFromEntry(self.rdr, ent) catch |err| { std.fmt.format( op.verbose_logger, "error extracting inode {} \"{s}\": {}\n", @@ -285,39 +289,109 @@ pub fn File(comptime T: type) type { ) catch {}; continue; }; - fil.extractReal(op, ext_path, errs, dir_wg, pol, false) catch |err| { - std.fmt.format( - op.verbose_logger, - "error extracting inode {} \"{s}\": {}\n", - .{ ent.num, path, err }, - ) catch {}; + var thr = std.Thread.spawn(.{ .allocator = self.rdr.alloc }, extractReal, .{ + fil, + op, + ext_path, + errs, + dir_wg, + pol, + false, + }) catch |err| { + self.rdr.alloc.free(ext_path); + if (op.verbose) { + std.fmt.format( + op.verbose_logger, + "error extracting inode {} \"{s}\": {}\n", + .{ ent.num, path, err }, + ) catch {}; + } continue; }; + thr.detach(); } - dir_wg.wait(); - wg.finish(); - std.debug.print("finished: {s}\n", .{path}); + var thr = std.Thread.spawn( + .{ .allocator = self.rdr.alloc }, + extractDirWait, + .{ + self, + op, + path, + dir_wg, + wg, + first, + }, + ) catch |err| { + if (op.verbose) { + std.fmt.format( + op.verbose_logger, + "error spawning wait thread for \"{s}\": {}\n", + .{ path, err }, + ) catch {}; + } + self.extractDirWait(op, path, dir_wg, wg, first); + return; + }; + thr.detach(); + complete = true; }, .file, .ext_file => { + var complete = false; wg.start(); - errdefer wg.finish(); - var ext_fil = try std.fs.cwd().createFile(path, .{}); - errdefer ext_fil.close(); - var fil_errs = try self.rdr.alloc.create(std.ArrayList(anyerror)); - errdefer self.rdr.alloc.destroy(fil_errs); + defer if (!complete) wg.finish(); + var ext_fil = std.fs.cwd().createFile(path, .{}) catch |err| { + if (op.verbose) { + std.fmt.format( + op.verbose_logger, + "error creating file \"{s}\": {}\n", + .{ path, err }, + ) catch {}; + } + errs.append(err) catch {}; + return; + }; + defer if (!complete) ext_fil.close(); + var fil_errs = self.rdr.alloc.create(std.ArrayList(anyerror)) catch |err| { + if (op.verbose) { + std.fmt.format( + op.verbose_logger, + "error allocating memory: {}\n", + .{err}, + ) catch {}; + } + errs.append(err) catch {}; + return; + }; + defer if (!complete) self.rdr.alloc.destroy(fil_errs); fil_errs.* = .init(self.rdr.alloc); - errdefer fil_errs.deinit(); + defer if (!complete) fil_errs.deinit(); @constCast(&self.data_reader.?).setPool(pol); - try self.data_reader.?.writeToNoBlock(errs, ext_fil, extractRegFinish, .{ - self, - op, - path, - fil_errs, + self.data_reader.?.writeToNoBlock( errs, - wg, ext_fil, - first, - }); + extractRegFinish, + .{ + self, + op, + path, + fil_errs, + errs, + wg, + ext_fil, + first, + }, + ) catch |err| { + if (op.verbose) { + std.fmt.format( + op.verbose_logger, + "error extracting file \"{s}\": {}\n", + .{ path, err }, + ) catch {}; + } + errs.append(err) catch {}; + return; + }; + complete = true; }, .symlink, .ext_symlink => {}, .block_dev, .ext_block_dev, .char_dev, .ext_char_dev, .fifo, .ext_fifo => { @@ -334,14 +408,66 @@ pub fn File(comptime T: type) type { }, }; } - // fn extractFileFinish( - // self: Self, - // op: ExtractionOptions, - // path: []const u8, - // dir_wg: *WaitGroup, - // dir_wg_mut: *Mutex, - // wg: *WaitGroup, - // ) void {} + fn extractDirWait( + self: Self, + op: ExtractionOptions, + path: []const u8, + dir_wg: *WaitGroup, + wg: *WaitGroup, + first: bool, + ) void { + dir_wg.wait(); + self.rdr.alloc.destroy(dir_wg); + defer { + wg.finish(); + if (!first) { + self.rdr.alloc.free(path); + self.deinit(); + } + } + if (op.ignore_permissions) return; + const dir_uid = self.uid() catch |err| { + std.fmt.format( + op.verbose_logger, + "error getting uid for inode {} \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + return; + }; + const dir_gid = self.gid() catch |err| { + std.fmt.format( + op.verbose_logger, + "error getting gid for inode {} \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + return; + }; + var ext_dir = std.fs.cwd().openFile(path, .{}) catch |err| { + std.fmt.format( + op.verbose_logger, + "error setting owner & permissions for \"{s}\": {}\n", + .{ path, err }, + ) catch {}; + return; + }; + defer ext_dir.close(); + ext_dir.chmod(self.inode.hdr.perm) catch |err| { + std.fmt.format( + op.verbose_logger, + "error setting permissions for inode {} \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + return; + }; + ext_dir.chown(dir_uid, dir_gid) catch |err| { + std.fmt.format( + op.verbose_logger, + "error setting owner for inode {} \"{s}\": {}\n", + .{ self.inode.hdr.num, path, err }, + ) catch {}; + return; + }; + } fn extractRegFinish( self: Self, op: ExtractionOptions, @@ -352,12 +478,15 @@ pub fn File(comptime T: type) type { fil: std.fs.File, first: bool, ) void { - defer std.debug.print("{}\n", .{wg.state.raw}); - defer wg.finish(); - defer fil.close(); - defer self.rdr.alloc.destroy(fil_errs); - defer if (!first) self.deinit(); - defer if (!first) self.rdr.alloc.free(path); + defer { + wg.finish(); + fil.close(); + self.rdr.alloc.destroy(fil_errs); + if (!first) { + self.deinit(); + self.rdr.alloc.free(path); + } + } if (fil_errs.items.len > 0) { if (op.verbose) { std.fmt.format( diff --git a/src/reader/data.zig b/src/reader/data.zig index aa6e8ff..3002a00 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -188,6 +188,10 @@ pub fn DataReader(comptime T: type) type { on_finish_args: anytype, ) !void { if (self.pool == null) return DataReaderError.ThreadPoolNotSet; + if (self.numBlocks() == 0) { + @call(.auto, on_finish, on_finish_args); + return; + } var mut: std.Thread.Mutex = .{}; var cur_idx: usize = 0; var block_wg = try self.alloc.create(std.Thread.WaitGroup); @@ -200,7 +204,8 @@ pub fn DataReader(comptime T: type) type { } block_wg.startMany(self.numBlocks()); for (0..self.numBlocks()) |i| { - try self.pool.?.spawn( + var thr = try std.Thread.spawn( + .{ .allocator = self.alloc }, comptime blk: { if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { break :blk noBlockThreadPWrite; @@ -215,6 +220,7 @@ pub fn DataReader(comptime T: type) type { } }, ); + thr.detach(); } } @@ -353,12 +359,12 @@ pub fn DataReader(comptime T: type) type { self.writeBlockToPWrite(errs, idx, writer); finish_mut.lock(); block_wg.finish(); + const isDone = block_wg.isDone(); defer { - const done = block_wg.isDone(); finish_mut.unlock(); - if (done) self.alloc.destroy(finish_mut); + if (isDone) self.alloc.destroy(finish_mut); } - if (block_wg.isDone()) { + if (isDone) { self.alloc.destroy(block_wg); @call(.auto, on_finish, on_finish_args); } diff --git a/src/root.zig b/src/root.zig index 35391c5..b120a1f 100644 --- a/src/root.zig +++ b/src/root.zig @@ -26,8 +26,8 @@ test "OpenFile" { } test "ExtractSingleFile" { - const single_file = "Start.exe"; - const single_file_extr_loc = "testing/Start.exe"; + const single_file = "PortableApps/Notepad++Portable/App/Notepad++/doLocalConf.xml"; + const single_file_extr_loc = "testing/doLocalConf.xml"; std.fs.cwd().deleteFile(single_file_extr_loc) catch {}; const sfs_fil = try std.fs.cwd().openFile(test_archive, .{}); @@ -49,7 +49,7 @@ test "ExtractAll" { defer sfs_fil.close(); var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0); defer rdr.deinit(); - var op: ExtractionOptions = try .init(); - op.verbose = true; + const op: ExtractionOptions = try .init(); + // op.verbose = true; try rdr.extract(op, extr_dir); }