Further progress in parsing format

This commit is contained in:
Caleb Gardner
2025-07-11 04:32:02 -05:00
parent 5c14b7db48
commit 23687eabb0
11 changed files with 412 additions and 12 deletions
+131 -2
View File
@@ -1,3 +1,132 @@
const import = @import("std");
const std = @import("std");
const config = @import("config");
const squashfs = @import("squashfs");
pub fn main() void {}
const help_msg =
\\Basic Usage: zig-unsquashfs [Options] SQUASHFS_FILE <EXTRACT_LOCATION>
\\
\\General options:
\\ -e <path> Path to a file or directory inside the archive to extract instead of the whole archive.
\\ Can be given multiple times.
\\ -o <bytes> Skip <bytes> before reading from the archive.
\\ -v Verbose output.
\\
\\Extraction options:
\\ --unbreak-symlinks Attempt extract symlink targets along with symlinks. Will not place files outside of the extraction location.
\\ -us Same as --unbreak-symlinks
\\ --deref-symlinks Replace symlink files with their target.
\\ -ds Same as --deref-symlinks
\\ -p <#> Use at most # of processors. Defaults to logical core count.
\\
\\Listing Options:
\\ -l List files instead of extracting. When used, you do not need to specify an extraction location.
\\ -ll Similiar to -l, but with file attributes.
\\ -lln Similiar to -ll, but with numeric uids and gids.
\\
\\Other:
\\ --help Prints this help message.
\\ -h Same as --help
\\ --version Print version number.
\\
;
const stdout = std.io.getStdOut();
var extr_files: std.ArrayList([]const u8) = undefined;
var offset: u64 = 0;
var verbose: bool = false;
var unbreak: bool = false;
var deref: bool = false;
var processors: u16 = 0;
var list: ListTypes = .None;
var filename: []const u8 = "";
var extr_location: []const u8 = "";
const ListTypes = enum {
None,
List,
ListAttr,
ListNumeric,
};
pub fn main() !void {
var alloc: std.heap.GeneralPurposeAllocator(.{}) = .init;
extr_files = .init(alloc.allocator());
defer extr_files.deinit();
var args = std.process.argsWithAllocator(alloc.allocator()) catch {
_ = try stdout.writeAll("Unable to allocate memory");
return;
};
defer args.deinit();
_ = args.next();
while (args.next()) |arg| {
if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) {
_ = try stdout.writeAll(help_msg);
return;
} else if (std.mem.eql(u8, arg, "--version")) {
try config.version.format("", .{}, stdout.writer());
_ = try stdout.write("\n");
return;
} else if (std.mem.eql(u8, arg, "-v")) {
verbose = true;
} else if (std.mem.eql(u8, arg, "--unbreak-symlinks") or std.mem.eql(u8, arg, "-us")) {
unbreak = true;
} else if (std.mem.eql(u8, arg, "--deref-symlinks") or std.mem.eql(u8, arg, "-ds")) {
deref = true;
} else if (std.mem.eql(u8, arg, "-l")) {
list = .List;
} else if (std.mem.eql(u8, arg, "-ll")) {
list = .ListAttr;
} else if (std.mem.eql(u8, arg, "-lln")) {
list = .ListNumeric;
} else if (std.mem.eql(u8, arg, "-e")) {
const next = args.next();
if (next == null) {
_ = try stdout.writeAll("path required after -e\n");
return;
}
try extr_files.append(next.?);
} else if (std.mem.eql(u8, arg, "-o")) {
const next = args.next();
if (next == null) {
_ = try stdout.writeAll("offset required after -o\n");
return;
}
offset = try std.fmt.parseInt(u64, next.?, 10);
} else if (std.mem.eql(u8, arg, "-p")) {
const next = args.next();
if (next == null) {
_ = try stdout.writeAll("number required after -p\n");
return;
}
processors = try std.fmt.parseInt(u16, next.?, 10);
} else if (filename.len == 0) {
filename = arg;
} else if (extr_location.len == 0) {
extr_location = arg;
} else {
_ = try stdout.writeAll("invalid or too many arguments\n");
return;
}
}
if (filename.len == 0) {
_ = try stdout.writeAll("no archive given\n");
return;
}
if (list == .None and extr_location.len == 0) {
_ = try stdout.writeAll("no extract location given\n");
return;
}
const fil = try std.fs.cwd().openFile(filename, .{});
var rdr = squashfs.FileReader.init(
alloc.allocator(),
fil,
offset,
) catch |err| {
try std.fmt.format(stdout.writer(), "Error opening {s} as squashfs: {any}\n", .{ filename, err });
return;
};
defer rdr.deinit();
//TODO
}
+70
View File
@@ -0,0 +1,70 @@
const std = @import("std");
const InodeType = @import("inode.zig").Type;
const Compression = @import("superblock.zig").Compression;
const Header = extern struct { //use extern instead of packed, due to bit alignment
count: u32,
block: u32,
num: u32,
};
const RawEntry = struct {
offset: u16,
num_offset: i16,
type: InodeType,
size: u16,
name: []const u8,
pub fn init(alloc: std.mem.Allocator, rdr: anytype) !RawEntry {
const fixed: [8]u8 = undefined;
_ = try rdr.read(&fixed);
const size = std.mem.readInt(u16, fixed[6..8], .little);
const name = try alloc.alloc(u8, size + 1);
_ = try rdr.read(name);
return .{
.offset = std.mem.readInt(u16, fixed[0..2], .little),
.num_offset = std.mem.readInt(i16, fixed[2..4], .little),
.type = std.mem.readInt(u16, fixed[4..6], .little),
.size = size,
.name = name,
};
}
};
pub const Entry = struct {
block: u32,
offset: u16,
num: u32,
type: InodeType,
name: []const u8,
pub fn deinit(self: Entry, alloc: std.mem.Allocator) void {
alloc.free(self.name);
}
};
pub fn readDirectory(alloc: std.mem.Allocator, rdr: anytype, size: u32) []Entry {
const entries: std.ArrayList(Entry) = .init(alloc);
errdefer entries.deinit();
var cur_red: u32 = 3; // dir size includes "." & "..", so its actual size is off by 3.
var hdr: Header = undefined;
while (cur_red < size) {
_ = try rdr.read(std.mem.asBytes(&hdr));
cur_red += 12;
try entries.ensureUnusedCapacity(hdr.count + 1);
for (0..hdr.count + 1) |_| {
const raw_ent: RawEntry = try .init(alloc, rdr);
cur_red += 9 + raw_ent.size;
errdefer alloc.free(raw_ent.name);
entries.appendAssumeCapacity(.{
.block = hdr.block,
.offset = raw_ent.offset,
.num = hdr.num + raw_ent.num_offset,
.type = raw_ent.type,
.name = raw_ent.name,
});
}
}
return entries.toOwnedSlice();
}
+7
View File
@@ -0,0 +1,7 @@
const BlockSize = @import("inode/file.zig").BlockSize;
pub const FragEntry = packed struct {
block: u64,
size: BlockSize,
_: u32,
};
+14 -4
View File
@@ -4,13 +4,17 @@ const dir = @import("inode/dir.zig");
const file = @import("inode/file.zig");
const misc = @import("inode/misc.zig");
const ToRead = @import("reader/to_read.zig").ToRead;
const Compression = @import("superblock.zig").Compression;
const MetadataReader = @import("reader/metadata.zig").MetadataReader;
pub const Ref = packed struct {
offset: u16,
block: u32,
_: u16,
};
const Type = enum(u16) {
pub const Type = enum(u16) {
dir = 1,
file,
symlink,
@@ -27,7 +31,7 @@ const Type = enum(u16) {
ext_socket,
};
const Header = packed struct {
pub const Header = packed struct {
type: Type,
perm: u16,
uid_idx: u16,
@@ -36,7 +40,7 @@ const Header = packed struct {
num: u32,
};
const Data = union(enum) {
pub const Data = union(enum) {
dir: dir.Dir,
file: file.File,
symlink: misc.Symlink,
@@ -59,7 +63,6 @@ hdr: Header,
data: Data,
pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self {
std.debug.assert(std.meta.hasFn(@TypeOf(rdr), "read"));
var hdr: Header = undefined;
_ = try rdr.read(std.mem.asBytes(&hdr));
const data = switch (hdr.type) {
@@ -83,3 +86,10 @@ pub fn init(rdr: anytype, alloc: std.mem.Allocator, block_size: u32) !Self {
.data = data,
};
}
pub fn initFromRef(p_rdr: anytype, comp: Compression, ref: Ref, table_start: u64, alloc: std.mem.Allocator, block_size: u32) !Self {
const rdr: ToRead(@TypeOf(p_rdr)) = .init(p_rdr, ref.block + table_start);
const meta_rdr: MetadataReader(ToRead(@TypeOf(p_rdr))) = try .init(alloc, comp, rdr);
defer meta_rdr.deinit();
try meta_rdr.skip(ref.offset);
return init(meta_rdr, alloc, block_size);
}
+33 -5
View File
@@ -1,25 +1,53 @@
const std = @import("std");
const Inode = @import("inode.zig");
const Table = @import("table.zig").Table;
const PRead = @import("reader/p_read.zig").PRead;
const FragEntry = @import("fragment.zig").FragEntry;
const Superblock = @import("superblock.zig").Superblock;
pub fn Reader(comptime T: type) type {
std.debug.assert(std.meta.hasFn(T, "pread"));
comptime std.debug.assert(std.meta.hasFn(T, "pread"));
return struct {
const Self = @This();
alloc: std.mem.Allocator,
rdr: T,
rdr: PRead(T),
super: Superblock = undefined,
/// ID table. Can be accessed directly
id_table: Table(u32, T) = undefined,
/// Fragment table. Can be accessed directly
frag_table: Table(FragEntry, T) = undefined,
/// Export table. Each element is an inode referce.
/// If accessing directly, keep in mind, the table starts at inode 1, as such it's recommended to use the InodeAt function instead.
export_table: Table(Inode.Ref, T) = undefined,
root: ?Inode = null,
pub fn init(alloc: std.mem.Allocator, rdr: T) Self {
const out = Self{
pub fn init(alloc: std.mem.Allocator, rdr: T, offset: u64) !Self {
var out: Self = .{
.alloc = alloc,
.rdr = rdr,
.rdr = .init(rdr, offset),
};
_ = try rdr.pread(std.mem.asBytes(&out.super), 0);
out.frag_table = .init(alloc, rdr, out.super.frag_start, out.super.frag_count);
out.id_table = .init(alloc, rdr, out.super.id_start, out.super.id_count);
out.export_table = .init(alloc, rdr, out.super.export_start, out.super.inode_count);
return out;
}
pub fn deinit(self: *Self) void {
self.id_table.deinit();
self.frag_table.deinit();
self.export_table.deinit();
}
/// Returns the inode with the given Inode Number.
/// Requires for the archive to have an export table.
pub fn InodeAt(self: Self, num: u32) !Inode {
const ref = try self.export_table.get(num - 1);
_ = ref;
return error{TODO}.TODO;
}
};
}
+26
View File
@@ -8,6 +8,7 @@ const MetaHeader = packed struct {
};
pub fn MetadataReader(comptime T: type) type {
comptime std.debug.assert(std.meta.hasFn(T, "read"));
return struct {
const Self = @This();
@@ -38,6 +39,31 @@ pub fn MetadataReader(comptime T: type) type {
std.io.limitedReader(self.rdr, hdr.size),
self.block,
);
self.block_offset = 0;
}
pub fn skip(self: *Self, offset: u32) !void {
var skipped = 0;
var to_skip = 0;
while (skipped < offset) {
if (self.block_offset >= self.block_size) try self.readNextBlock();
to_skip = @min(self.block_size - self.block_offset, offset - skipped);
self.block_offset += to_skip;
skipped += to_skip;
}
}
pub fn read(self: *Self, buf: []u8) !usize {
var cur_red: usize = 0;
var to_read: usize = 0;
while (cur_red < buf.len) {
if (self.block_offset >= self.block_size) try self.readNextBlock();
to_read = @min(buf.len - cur_red, self.block_size - self.block_offset);
@memcpy(buf[cur_red .. cur_red + to_read], self.block[self.block_offset .. self.block_offset + to_read]);
cur_red += to_read;
self.block_offset += to_read;
}
return cur_red;
}
};
}
+29
View File
@@ -0,0 +1,29 @@
const std = @import("std");
const ToRead = @import("to_read.zig").ToRead;
/// A simple wrapper around a type with the pread([]u8, u64) function.
/// Provides a couple useful utility functions.
pub fn PRead(comptime T: type) type {
comptime std.debug.assert(std.meta.hasFn(T, "pread"));
return struct {
const Self = @This();
rdr: T,
offset: u64,
pub fn init(rdr: T, offset: u64) Self {
return .{
.rdr = rdr,
.offset = offset,
};
}
pub fn pread(self: Self, buf: []u8, offset: u64) !usize {
return self.rdr.pread(buf, self.offset + offset);
}
pub fn readerAt(self: Self, offset: u64) ToRead(T) {
return .init(self.rdr, self.offset + offset);
}
};
}
+24
View File
@@ -0,0 +1,24 @@
const std = @import("std");
pub fn ToRead(comptime T: type) type {
comptime std.debug.assert(std.meta.hasFn(T, "pread"));
return struct {
const Self = @This();
rdr: T,
offset: u64,
pub fn init(rdr: T, init_offset: u64) Self {
return .{
.rdr = rdr,
.offset = init_offset,
};
}
pub fn read(self: *Self, buf: []u8) !usize {
const red = try self.rdr.pread(buf, self.offset);
self.offset += red;
return red;
}
};
}
+5
View File
@@ -0,0 +1,5 @@
const std = @import("std");
pub const Reader = @import("reader.zig").Reader;
pub const FileReader = Reader(std.fs.File);
+5 -1
View File
@@ -1,5 +1,7 @@
const std = @import("std");
const InodeRef = @import("inode.zig").Ref;
pub const Superblock = packed struct {
magic: u32,
inode_count: u32,
@@ -26,7 +28,7 @@ pub const Superblock = packed struct {
id_count: u16,
ver_maj: u16,
ver_min: u16,
root_ref: u64,
root_ref: InodeRef,
size: u64,
id_start: u64,
xattr_start: u64,
@@ -57,11 +59,13 @@ pub const Compression = enum(u16) {
},
.lzma => {
const decomp = try std.compress.lzma.decompress(alloc, source);
defer decomp.deinit();
return decomp.read(dest);
},
.lzo => return DecompressError.LzoUnavailable,
.xz => {
const decomp = try std.compress.xz.decompress(alloc, source);
defer decomp.deinit();
return decomp.read(dest);
},
.lz4 => return DecompressError.Lz4Unavailable,
+68
View File
@@ -0,0 +1,68 @@
const std = @import("std");
const TableError = error{
InvalidIndex,
};
pub fn Table(comptime T: type, comptime R: type) type {
comptime std.debug.assert(std.meta.hasFn(R, "pread"));
return struct {
const Self = @This();
alloc: std.mem.Allocator,
rdr: R,
offset: u64,
table_count: u32,
mut: std.Thread.RwLock = .{},
table: []T = &[0]T{},
pub fn init(alloc: std.mem.Allocator, rdr: R, offset: u64, table_count: u32) Self {
return .{
.alloc = alloc,
.rdr = rdr,
.offset = offset,
.table_count = table_count,
};
}
pub fn deinit(self: Self) void {
self.alloc.free(self.table);
}
fn resize(self: *Self, to_add: usize) !void {
if (!self.alloc.resize(self.table, self.table.len + to_add)) {
const new_table = try self.alloc.alloc(T, self.table.len + to_add);
@memcpy(new_table[0..self.table.len], self.table);
self.alloc.free(self.table);
self.table = new_table;
}
}
pub fn get(self: *Self, idx: u32) !T {
if (idx >= self.table_count) return TableError.InvalidIndex;
self.mut.lockShared();
defer self.mut.unlockShared();
if (idx >= self.table.len) {
return self.getAndFill(idx);
}
return self.table[idx];
}
fn getAndFill(self: *Self, idx: u32) !T {
self.mut.unlockShared();
defer self.mut.lockShared();
self.mut.lock();
defer self.mut.unlock();
var to_read: usize = 0;
var offset: u64 = 0;
while (idx >= self.table.len) {
to_read = @min(self.table_count - self.table.len, comptime 8192 / @sizeOf(T));
try self.resize(to_read);
_ = try self.rdr.pread(std.mem.asBytes(&offset), self.offset);
self.offset += 8;
_ = try self.rdr.pread(std.mem.sliceAsBytes(self.table[self.table.len - to_read ..]), offset);
}
return self.table[idx];
}
};
}