From 23687eabb05ff8c0ab68f57f71c401cb37d4c59a Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Fri, 11 Jul 2025 04:32:02 -0500 Subject: [PATCH] Further progress in parsing format --- src/bin/unsquashfs.zig | 133 +++++++++++++++++++++++++++++++++++++++- src/directory.zig | 70 +++++++++++++++++++++ src/fragment.zig | 7 +++ src/inode.zig | 18 ++++-- src/reader.zig | 38 ++++++++++-- src/reader/metadata.zig | 26 ++++++++ src/reader/p_read.zig | 29 +++++++++ src/reader/to_read.zig | 24 ++++++++ src/root.zig | 5 ++ src/superblock.zig | 6 +- src/table.zig | 68 ++++++++++++++++++++ 11 files changed, 412 insertions(+), 12 deletions(-) create mode 100644 src/directory.zig create mode 100644 src/fragment.zig create mode 100644 src/reader/p_read.zig create mode 100644 src/reader/to_read.zig create mode 100644 src/table.zig diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig index 66a7fbc..48d1f45 100644 --- a/src/bin/unsquashfs.zig +++ b/src/bin/unsquashfs.zig @@ -1,3 +1,132 @@ -const import = @import("std"); +const std = @import("std"); +const config = @import("config"); +const squashfs = @import("squashfs"); -pub fn main() void {} +const help_msg = + \\Basic Usage: zig-unsquashfs [Options] SQUASHFS_FILE + \\ + \\General options: + \\ -e Path to a file or directory inside the archive to extract instead of the whole archive. + \\ Can be given multiple times. + \\ -o Skip before reading from the archive. + \\ -v Verbose output. + \\ + \\Extraction options: + \\ --unbreak-symlinks Attempt extract symlink targets along with symlinks. Will not place files outside of the extraction location. + \\ -us Same as --unbreak-symlinks + \\ --deref-symlinks Replace symlink files with their target. + \\ -ds Same as --deref-symlinks + \\ -p <#> Use at most # of processors. Defaults to logical core count. + \\ + \\Listing Options: + \\ -l List files instead of extracting. When used, you do not need to specify an extraction location. + \\ -ll Similiar to -l, but with file attributes. + \\ -lln Similiar to -ll, but with numeric uids and gids. + \\ + \\Other: + \\ --help Prints this help message. + \\ -h Same as --help + \\ --version Print version number. + \\ +; + +const stdout = std.io.getStdOut(); + +var extr_files: std.ArrayList([]const u8) = undefined; +var offset: u64 = 0; +var verbose: bool = false; +var unbreak: bool = false; +var deref: bool = false; +var processors: u16 = 0; +var list: ListTypes = .None; + +var filename: []const u8 = ""; +var extr_location: []const u8 = ""; + +const ListTypes = enum { + None, + List, + ListAttr, + ListNumeric, +}; + +pub fn main() !void { + var alloc: std.heap.GeneralPurposeAllocator(.{}) = .init; + extr_files = .init(alloc.allocator()); + defer extr_files.deinit(); + var args = std.process.argsWithAllocator(alloc.allocator()) catch { + _ = try stdout.writeAll("Unable to allocate memory"); + return; + }; + defer args.deinit(); + _ = args.next(); + while (args.next()) |arg| { + if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) { + _ = try stdout.writeAll(help_msg); + return; + } else if (std.mem.eql(u8, arg, "--version")) { + try config.version.format("", .{}, stdout.writer()); + _ = try stdout.write("\n"); + return; + } else if (std.mem.eql(u8, arg, "-v")) { + verbose = true; + } else if (std.mem.eql(u8, arg, "--unbreak-symlinks") or std.mem.eql(u8, arg, "-us")) { + unbreak = true; + } else if (std.mem.eql(u8, arg, "--deref-symlinks") or std.mem.eql(u8, arg, "-ds")) { + deref = true; + } else if (std.mem.eql(u8, arg, "-l")) { + list = .List; + } else if (std.mem.eql(u8, arg, "-ll")) { + list = .ListAttr; + } else if (std.mem.eql(u8, arg, "-lln")) { + list = .ListNumeric; + } else if (std.mem.eql(u8, arg, "-e")) { + const next = args.next(); + if (next == null) { + _ = try stdout.writeAll("path required after -e\n"); + return; + } + try extr_files.append(next.?); + } else if (std.mem.eql(u8, arg, "-o")) { + const next = args.next(); + if (next == null) { + _ = try stdout.writeAll("offset required after -o\n"); + return; + } + offset = try std.fmt.parseInt(u64, next.?, 10); + } else if (std.mem.eql(u8, arg, "-p")) { + const next = args.next(); + if (next == null) { + _ = try stdout.writeAll("number required after -p\n"); + return; + } + processors = try std.fmt.parseInt(u16, next.?, 10); + } else if (filename.len == 0) { + filename = arg; + } else if (extr_location.len == 0) { + extr_location = arg; + } else { + _ = try stdout.writeAll("invalid or too many arguments\n"); + return; + } + } + if (filename.len == 0) { + _ = try stdout.writeAll("no archive given\n"); + return; + } + if (list == .None and extr_location.len == 0) { + _ = try stdout.writeAll("no extract location given\n"); + return; + } + const fil = try std.fs.cwd().openFile(filename, .{}); + var rdr = squashfs.FileReader.init( + alloc.allocator(), + fil, + offset, + ) catch |err| { + try std.fmt.format(stdout.writer(), "Error opening {s} as squashfs: {any}\n", .{ filename, err }); + return; + }; + defer rdr.deinit(); + //TODO +} diff --git a/src/directory.zig b/src/directory.zig new file mode 100644 index 0000000..36257ff --- /dev/null +++ b/src/directory.zig @@ -0,0 +1,70 @@ +const std = @import("std"); + +const InodeType = @import("inode.zig").Type; +const Compression = @import("superblock.zig").Compression; + +const Header = extern struct { //use extern instead of packed, due to bit alignment + count: u32, + block: u32, + num: u32, +}; + +const RawEntry = struct { + offset: u16, + num_offset: i16, + type: InodeType, + size: u16, + name: []const u8, + + pub fn init(alloc: std.mem.Allocator, rdr: anytype) !RawEntry { + const fixed: [8]u8 = undefined; + _ = try rdr.read(&fixed); + const size = std.mem.readInt(u16, fixed[6..8], .little); + const name = try alloc.alloc(u8, size + 1); + _ = try rdr.read(name); + return .{ + .offset = std.mem.readInt(u16, fixed[0..2], .little), + .num_offset = std.mem.readInt(i16, fixed[2..4], .little), + .type = std.mem.readInt(u16, fixed[4..6], .little), + .size = size, + .name = name, + }; + } +}; + +pub const Entry = struct { + block: u32, + offset: u16, + num: u32, + type: InodeType, + name: []const u8, + + pub fn deinit(self: Entry, alloc: std.mem.Allocator) void { + alloc.free(self.name); + } +}; + +pub fn readDirectory(alloc: std.mem.Allocator, rdr: anytype, size: u32) []Entry { + const entries: std.ArrayList(Entry) = .init(alloc); + errdefer entries.deinit(); + var cur_red: u32 = 3; // dir size includes "." & "..", so its actual size is off by 3. + var hdr: Header = undefined; + while (cur_red < size) { + _ = try rdr.read(std.mem.asBytes(&hdr)); + cur_red += 12; + try entries.ensureUnusedCapacity(hdr.count + 1); + for (0..hdr.count + 1) |_| { + const raw_ent: RawEntry = try .init(alloc, rdr); + cur_red += 9 + raw_ent.size; + errdefer alloc.free(raw_ent.name); + entries.appendAssumeCapacity(.{ + .block = hdr.block, + .offset = raw_ent.offset, + .num = hdr.num + raw_ent.num_offset, + .type = raw_ent.type, + .name = raw_ent.name, + }); + } + } + return entries.toOwnedSlice(); +} diff --git a/src/fragment.zig b/src/fragment.zig new file mode 100644 index 0000000..49e27cf --- /dev/null +++ b/src/fragment.zig @@ -0,0 +1,7 @@ +const BlockSize = @import("inode/file.zig").BlockSize; + +pub const FragEntry = packed struct { + block: u64, + size: BlockSize, + _: u32, +}; diff --git a/src/inode.zig b/src/inode.zig index 321d344..b44583d 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -4,13 +4,17 @@ const dir = @import("inode/dir.zig"); const file = @import("inode/file.zig"); const misc = @import("inode/misc.zig"); +const ToRead = @import("reader/to_read.zig").ToRead; +const Compression = @import("superblock.zig").Compression; +const MetadataReader = @import("reader/metadata.zig").MetadataReader; + pub const Ref = packed struct { offset: u16, block: u32, _: u16, }; -const Type = enum(u16) { +pub const Type = enum(u16) { dir = 1, file, symlink, @@ -27,7 +31,7 @@ const Type = enum(u16) { ext_socket, }; -const Header = packed struct { +pub const Header = packed struct { type: Type, perm: u16, uid_idx: u16, @@ -36,7 +40,7 @@ const Header = packed struct { num: u32, }; -const Data = union(enum) { +pub const Data = union(enum) { dir: dir.Dir, file: file.File, symlink: misc.Symlink, @@ -59,7 +63,6 @@ hdr: Header, data: Data, pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self { - std.debug.assert(std.meta.hasFn(@TypeOf(rdr), "read")); var hdr: Header = undefined; _ = try rdr.read(std.mem.asBytes(&hdr)); const data = switch (hdr.type) { @@ -83,3 +86,10 @@ pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self { .data = data, }; } +pub fn initFromRef(p_rdr: anytype, comp: Compression, ref: Ref, table_start: u64, alloc: std.mem.Allocator, block_size: u32) !Self { + const rdr: ToRead(@TypeOf(p_rdr)) = .init(p_rdr, ref.block + table_start); + const meta_rdr: MetadataReader(ToRead(@TypeOf(p_rdr))) = try .init(alloc, comp, rdr); + defer meta_rdr.deinit(); + try meta_rdr.skip(ref.offset); + return init(meta_rdr, alloc, block_size); +} diff --git a/src/reader.zig b/src/reader.zig index a21312a..e907949 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -1,25 +1,53 @@ const std = @import("std"); +const Inode = @import("inode.zig"); +const Table = @import("table.zig").Table; +const PRead = @import("reader/p_read.zig").PRead; +const FragEntry = @import("fragment.zig").FragEntry; const Superblock = @import("superblock.zig").Superblock; pub fn Reader(comptime T: type) type { - std.debug.assert(std.meta.hasFn(T, "pread")); + comptime std.debug.assert(std.meta.hasFn(T, "pread")); return struct { const Self = @This(); alloc: std.mem.Allocator, - rdr: T, + rdr: PRead(T), super: Superblock = undefined, + /// ID table. Can be accessed directly + id_table: Table(u32, T) = undefined, + /// Fragment table. Can be accessed directly + frag_table: Table(FragEntry, T) = undefined, + /// Export table. Each element is an inode referce. + /// If accessing directly, keep in mind, the table starts at inode 1, as such it's recommended to use the InodeAt function instead. + export_table: Table(Inode.Ref, T) = undefined, + root: ?Inode = null, - pub fn init(alloc: std.mem.Allocator, rdr: T) Self { - const out = Self{ + pub fn init(alloc: std.mem.Allocator, rdr: T, offset: u64) !Self { + var out: Self = .{ .alloc = alloc, - .rdr = rdr, + .rdr = .init(rdr, offset), }; _ = try rdr.pread(std.mem.asBytes(&out.super), 0); + out.frag_table = .init(alloc, rdr, out.super.frag_start, out.super.frag_count); + out.id_table = .init(alloc, rdr, out.super.id_start, out.super.id_count); + out.export_table = .init(alloc, rdr, out.super.export_start, out.super.inode_count); return out; } + pub fn deinit(self: *Self) void { + self.id_table.deinit(); + self.frag_table.deinit(); + self.export_table.deinit(); + } + + /// Returns the inode with the given Inode Number. + /// Requires for the archive to have an export table. + pub fn InodeAt(self: Self, num: u32) !Inode { + const ref = try self.export_table.get(num - 1); + _ = ref; + return error{TODO}.TODO; + } }; } diff --git a/src/reader/metadata.zig b/src/reader/metadata.zig index f468717..66489fc 100644 --- a/src/reader/metadata.zig +++ b/src/reader/metadata.zig @@ -8,6 +8,7 @@ const MetaHeader = packed struct { }; pub fn MetadataReader(comptime T: type) type { + comptime std.debug.assert(std.meta.hasFn(T, "read")); return struct { const Self = @This(); @@ -38,6 +39,31 @@ pub fn MetadataReader(comptime T: type) type { std.io.limitedReader(self.rdr, hdr.size), self.block, ); + self.block_offset = 0; + } + + pub fn skip(self: *Self, offset: u32) !void { + var skipped = 0; + var to_skip = 0; + while (skipped < offset) { + if (self.block_offset >= self.block_size) try self.readNextBlock(); + to_skip = @min(self.block_size - self.block_offset, offset - skipped); + self.block_offset += to_skip; + skipped += to_skip; + } + } + + pub fn read(self: *Self, buf: []u8) !usize { + var cur_red: usize = 0; + var to_read: usize = 0; + while (cur_red < buf.len) { + if (self.block_offset >= self.block_size) try self.readNextBlock(); + to_read = @min(buf.len - cur_red, self.block_size - self.block_offset); + @memcpy(buf[cur_red .. cur_red + to_read], self.block[self.block_offset .. self.block_offset + to_read]); + cur_red += to_read; + self.block_offset += to_read; + } + return cur_red; } }; } diff --git a/src/reader/p_read.zig b/src/reader/p_read.zig new file mode 100644 index 0000000..ca9e476 --- /dev/null +++ b/src/reader/p_read.zig @@ -0,0 +1,29 @@ +const std = @import("std"); + +const ToRead = @import("to_read.zig").ToRead; + +/// A simple wrapper around a type with the pread([]u8, u64) function. +/// Provides a couple useful utility functions. +pub fn PRead(comptime T: type) type { + comptime std.debug.assert(std.meta.hasFn(T, "pread")); + return struct { + const Self = @This(); + + rdr: T, + offset: u64, + + pub fn init(rdr: T, offset: u64) Self { + return .{ + .rdr = rdr, + .offset = offset, + }; + } + + pub fn pread(self: Self, buf: []u8, offset: u64) !usize { + return self.rdr.pread(buf, self.offset + offset); + } + pub fn readerAt(self: Self, offset: u64) ToRead(T) { + return .init(self.rdr, self.offset + offset); + } + }; +} diff --git a/src/reader/to_read.zig b/src/reader/to_read.zig new file mode 100644 index 0000000..a311b4d --- /dev/null +++ b/src/reader/to_read.zig @@ -0,0 +1,24 @@ +const std = @import("std"); + +pub fn ToRead(comptime T: type) type { + comptime std.debug.assert(std.meta.hasFn(T, "pread")); + return struct { + const Self = @This(); + + rdr: T, + offset: u64, + + pub fn init(rdr: T, init_offset: u64) Self { + return .{ + .rdr = rdr, + .offset = init_offset, + }; + } + + pub fn read(self: *Self, buf: []u8) !usize { + const red = try self.rdr.pread(buf, self.offset); + self.offset += red; + return red; + } + }; +} diff --git a/src/root.zig b/src/root.zig index e69de29..c9d80e3 100644 --- a/src/root.zig +++ b/src/root.zig @@ -0,0 +1,5 @@ +const std = @import("std"); + +pub const Reader = @import("reader.zig").Reader; + +pub const FileReader = Reader(std.fs.File); diff --git a/src/superblock.zig b/src/superblock.zig index c17db3c..fea3886 100644 --- a/src/superblock.zig +++ b/src/superblock.zig @@ -1,5 +1,7 @@ const std = @import("std"); +const InodeRef = @import("inode.zig").Ref; + pub const Superblock = packed struct { magic: u32, inode_count: u32, @@ -26,7 +28,7 @@ pub const Superblock = packed struct { id_count: u16, ver_maj: u16, ver_min: u16, - root_ref: u64, + root_ref: InodeRef, size: u64, id_start: u64, xattr_start: u64, @@ -57,11 +59,13 @@ pub const Compression = enum(u16) { }, .lzma => { const decomp = try std.compress.lzma.decompress(alloc, source); + defer decomp.deinit(); return decomp.read(dest); }, .lzo => return DecompressError.LzoUnavailable, .xz => { const decomp = try std.compress.xz.decompress(alloc, source); + defer decomp.deinit(); return decomp.read(dest); }, .lz4 => return DecompressError.Lz4Unavailable, diff --git a/src/table.zig b/src/table.zig new file mode 100644 index 0000000..9e10ec4 --- /dev/null +++ b/src/table.zig @@ -0,0 +1,68 @@ +const std = @import("std"); + +const TableError = error{ + InvalidIndex, +}; + +pub fn Table(comptime T: type, comptime R: type) type { + comptime std.debug.assert(std.meta.hasFn(R, "pread")); + return struct { + const Self = @This(); + + alloc: std.mem.Allocator, + rdr: R, + + offset: u64, + table_count: u32, + mut: std.Thread.RwLock = .{}, + + table: []T = &[0]T{}, + + pub fn init(alloc: std.mem.Allocator, rdr: R, offset: u64, table_count: u32) Self { + return .{ + .alloc = alloc, + .rdr = rdr, + .offset = offset, + .table_count = table_count, + }; + } + pub fn deinit(self: Self) void { + self.alloc.free(self.table); + } + + fn resize(self: *Self, to_add: usize) !void { + if (!self.alloc.resize(self.table, self.table.len + to_add)) { + const new_table = try self.alloc.alloc(T, self.table.len + to_add); + @memcpy(new_table[0..self.table.len], self.table); + self.alloc.free(self.table); + self.table = new_table; + } + } + + pub fn get(self: *Self, idx: u32) !T { + if (idx >= self.table_count) return TableError.InvalidIndex; + self.mut.lockShared(); + defer self.mut.unlockShared(); + if (idx >= self.table.len) { + return self.getAndFill(idx); + } + return self.table[idx]; + } + fn getAndFill(self: *Self, idx: u32) !T { + self.mut.unlockShared(); + defer self.mut.lockShared(); + self.mut.lock(); + defer self.mut.unlock(); + var to_read: usize = 0; + var offset: u64 = 0; + while (idx >= self.table.len) { + to_read = @min(self.table_count - self.table.len, comptime 8192 / @sizeOf(T)); + try self.resize(to_read); + _ = try self.rdr.pread(std.mem.asBytes(&offset), self.offset); + self.offset += 8; + _ = try self.rdr.pread(std.mem.sliceAsBytes(self.table[self.table.len - to_read ..]), offset); + } + return self.table[idx]; + } + }; +}