From 428f938c3a939c2fa07329e02423f28cdf74b008 Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Thu, 15 Jan 2026 06:40:59 -0600 Subject: [PATCH] Restart (once again) --- .gitignore | 2 + LICENSE | 2 +- build.zig | 40 ++++++++++++++++++++ build.zig.zon | 81 ++++++++++++++++++++++++++++++++++++++++ src/archive.zig | 53 ++++++++++++++++++++++++++ src/bin/unsquashfs.zig | 5 +++ src/decomp.zig | 11 ++++++ src/inode.zig | 5 +++ src/root.zig | 1 + src/super.zig | 62 ++++++++++++++++++++++++++++++ src/table.zig | 72 +++++++++++++++++++++++++++++++++++ src/test.zig | 8 ++++ src/util/metadata.zig | 34 +++++++++++++++++ src/util/offset_file.zig | 20 ++++++++++ 14 files changed, 395 insertions(+), 1 deletion(-) create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100644 src/archive.zig create mode 100644 src/bin/unsquashfs.zig create mode 100644 src/decomp.zig create mode 100644 src/inode.zig create mode 100644 src/root.zig create mode 100644 src/super.zig create mode 100644 src/table.zig create mode 100644 src/test.zig create mode 100644 src/util/metadata.zig create mode 100644 src/util/offset_file.zig diff --git a/.gitignore b/.gitignore index 3389c86..3400e46 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ +testing/ + .zig-cache/ zig-out/ diff --git a/LICENSE b/LICENSE index cb10259..0399227 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2025 Caleb Gardner +Copyright (c) 2026 Caleb Gardner Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..688bfb5 --- /dev/null +++ b/build.zig @@ -0,0 +1,40 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + const mod = b.addModule("zig_squashfs", .{ + .root_source_file = b.path("src/root.zig"), + .target = target, + }); + const exe = b.addExecutable(.{ + .name = "unsquashfs", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/bin/unsquashfs.zig"), + .target = target, + .optimize = optimize, + .imports = &.{ + .{ .name = "zig_squashfs", .module = mod }, + }, + }), + }); + b.installArtifact(exe); + const run_step = b.step("run", "Run the app"); + const run_cmd = b.addRunArtifact(exe); + run_step.dependOn(&run_cmd.step); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_cmd.addArgs(args); + } + const mod_tests = b.addTest(.{ + .root_module = mod, + }); + const run_mod_tests = b.addRunArtifact(mod_tests); + const exe_tests = b.addTest(.{ + .root_module = exe.root_module, + }); + const run_exe_tests = b.addRunArtifact(exe_tests); + const test_step = b.step("test", "Run tests"); + test_step.dependOn(&run_mod_tests.step); + test_step.dependOn(&run_exe_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..ac82177 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,81 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = .zig_squashfs, + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = 0x527960c74dddb509, // Changing this has security and trust implications. + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "0.15.2", + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. If the contents of a URL change this will result in a hash mismatch + // // which will prevent zig from using it. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + // + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/src/archive.zig b/src/archive.zig new file mode 100644 index 0000000..8926e8d --- /dev/null +++ b/src/archive.zig @@ -0,0 +1,53 @@ +//! A squashfs archive read from a file. +//! Can be used to directly access File's contents or extract to the filesystem. + +const std = @import("std"); +const File = std.fs.File; + +const Superblock = @import("super.zig").Superblock; +const OffsetFile = @import("util/offset_file.zig"); + +const Archive = @This(); + +// 4 Gigs +const MIN_MEM_SIZE = 4 * 1024 * 1024 * 1024; + +parent_alloc: std.mem.Allocator, +alloc: std.heap.FixedBufferAllocator, +fixed_buf: []u8, +fil: OffsetFile, + +super: Superblock, + +/// Default settings using std.Thread.getCpuCount() threads and the minimum of 4gb or half of system memory for memory usage. +pub fn init(alloc: std.mem.Allocator, fil: File) !Archive { + return initAdvanced( + alloc, + fil, + 0, + try std.Thread.getCpuCount(), + @min(MIN_MEM_SIZE, std.process.totalSystemMemory() / 2), + ); +} +/// Create the Archive dictating the amount of threads & memory used. +/// If trying to extract a full archive, a large memory size & thread count could help. +/// If you're planning on only interacting with a small number of files, it should be fine to use few threads and a small memory size. +pub fn initAdvanced(alloc: std.mem.Allocator, fil: File, offset: u64, threads: usize, mem: usize) !Archive { + _ = threads; + var super: Superblock = undefined; + const red = try fil.pread(@ptrCast(&super), offset); + std.debug.assert(red == @sizeOf(Superblock)); + const fixed_buf = alloc.alloc(u8, mem); + return .{ + .parent_alloc = alloc, + .alloc = .init(fixed_buf), + .fixed_buf = fixed_buf, + .fil = .init(fil, offset), + + .super = super, + }; +} + +pub fn deinit(self: *Archive) void { + self.parent_alloc.free(self.fixed_buf); +} diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig new file mode 100644 index 0000000..43b93a1 --- /dev/null +++ b/src/bin/unsquashfs.zig @@ -0,0 +1,5 @@ +const std = @import("std"); + +const squashfs = @import("zig_squashfs"); + +pub fn main() !void {} diff --git a/src/decomp.zig b/src/decomp.zig new file mode 100644 index 0000000..13b618b --- /dev/null +++ b/src/decomp.zig @@ -0,0 +1,11 @@ +const std = @import("std"); +const compress = std.compress; + +pub const CompressionType = enum(u16) { + gzig = 1, + lzma, + lzo, + xz, + lz4, + zstd, +}; diff --git a/src/inode.zig b/src/inode.zig new file mode 100644 index 0000000..1e92a2d --- /dev/null +++ b/src/inode.zig @@ -0,0 +1,5 @@ +pub const Ref = packed struct { + _: u16, + table_offset: u32, + block_offset: u16, +}; diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..9f4c6cd --- /dev/null +++ b/src/root.zig @@ -0,0 +1 @@ +const Archive = @import("archive.zig"); diff --git a/src/super.zig b/src/super.zig new file mode 100644 index 0000000..fc28f5c --- /dev/null +++ b/src/super.zig @@ -0,0 +1,62 @@ +const math = @import("std").math; + +const CompressionType = @import("decomp.zig").CompressionType; +const InodeRef = @import("inode.zig").Ref; + +const SQUASHFS_MAGIC: u32 = "hsqs"; + +const SuperblockError = error{ + InvalidMagic, + InvalidBlockLog, + InvalidVersion, + InvalidCheck, +}; + +/// A squashfs Superblock +pub const Superblock = packed struct { + magic: u32, + inode_count: u32, + mod_time: u32, + block_size: u32, + frag_count: u32, + compression: CompressionType, + block_log: u16, + flags: packed struct { + inode_uncompressed: bool, + data_uncompressed: bool, + check: bool, + frag_uncompressed: bool, + fragment_never: bool, + fragment_always: bool, + duplicates: bool, + exportable: bool, + xattr_uncompressed: bool, + xattr_never: bool, + compression_options: bool, + ids_uncompressed: bool, + _: u4, + }, + id_count: u16, + ver_maj: u16, + ver_min: u16, + root_ref: InodeRef, + size: u64, + id_start: u64, + xattr_start: u64, + inode_start: u64, + dir_start: u64, + frag_start: u64, + export_start: u64, + + /// Validate the Superblock. If an error is returned, it's likely the archive is corrupted or not a squashfs archive. + pub fn validate(self: Superblock) !void { + if (self.magic != SQUASHFS_MAGIC) + return SuperblockError.InvalidMagic; + if (self.magic.flags.check) + return SuperblockError.InvalidCheck; + if (self.ver_maj != 4 or self.ver_min != 0) + return SuperblockError.InvalidVersion; + if (math.log2(self.block_size) != self.block_log) + return SuperblockError.InvalidBlockLog; + } +}; diff --git a/src/table.zig b/src/table.zig new file mode 100644 index 0000000..c8b6264 --- /dev/null +++ b/src/table.zig @@ -0,0 +1,72 @@ +const std = @import("std"); +const Mutex = std.Thread.Mutex; + +const DecompMgr = @import("decomp.zig"); +const MetadataReader = @import("util/metadata.zig"); +const OffsetFile = @import("util/offset_file.zig"); + +const TableError = error{ + InvalidIndex, +}; + +pub fn Table(T: anytype) type { + return struct { + const This = @This(); + + const VALS_PER_BLOCK = 8192 / @sizeOf(T); + + alloc: std.mem.Allocator, + fil: OffsetFile, + decomp: *DecompMgr, + tab_start: u64, + + mut: Mutex = .{}, + + tab: std.AutoHashMap(u32, []T), + values: u32, + + pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *DecompMgr, tab_start: u64, values: u32) !This { + return .{ + .alloc = alloc, + .fil = fil, + .decomp = decomp, + .tab_start = tab_start, + + .tab = .init(alloc), + .values = values, + }; + } + + pub fn deinit(self: *This) void { + self.tab.deinit(); + } + + pub fn get(self: *This, idx: u32) !T { + if (idx >= self.values) return TableError.InvalidIndex; + const block_num = idx / VALS_PER_BLOCK; + const idx_offset = idx - (block_num * VALS_PER_BLOCK); + if (self.tab.contains(block_num)) { + const block = self.tab.get(block_num).?; + return block[idx_offset]; + } + self.mut.lock(); + defer self.mut.unlock(); + // Double check in case of race condition.. + if (self.tab.contains(block_num)) { + const block = self.tab.get(block_num).?; + return block[idx_offset]; + } + const is_last = (self.values - 1) / VALS_PER_BLOCK == block_num; + const slice_size = if (is_last) self.values - (block_num * VALS_PER_BLOCK) else VALS_PER_BLOCK; + const slice = try self.alloc.alloc(slice_size); + var rdr = try self.fil.readerAt(self.tab_start + (8 * block_num), &[0]u8{}); + const offset: u64 = 0; + try rdr.interface.readSliceAll(@ptrCast(&idx_offset)); + rdr = try self.fil.readerAt(offset, &[0]u8{}); + var meta: MetadataReader = .init(&rdr.interface, self.decomp); + try meta.interface.readSliceAll(@ptrCast(slice)); + //TODO: read & decompress block. + self.tab.put(block_num, slice); + } + }; +} diff --git a/src/test.zig b/src/test.zig new file mode 100644 index 0000000..d688c50 --- /dev/null +++ b/src/test.zig @@ -0,0 +1,8 @@ +const std = @import("std"); +const stuff = @import("builtin"); + +test "Basics" {} + +test "ExtractSingleFile" {} + +test "ExtractCompleteArchive" {} diff --git a/src/util/metadata.zig b/src/util/metadata.zig new file mode 100644 index 0000000..14379f9 --- /dev/null +++ b/src/util/metadata.zig @@ -0,0 +1,34 @@ +const std = @import("std"); +const Reader = std.Io.Reader; +const Writer = std.Io.Writer; +const Limit = std.Io.Limit; +const StreamError = std.Io.Reader.StreamError; + +const DecompMgr = @import("../decomp.zig"); + +const This = @This(); + +rdr: Reader, +decomp: *DecompMgr, + +buf: [8192]u8 = undefined, + +interface: Reader, + +pub fn init(rdr: Reader, decomp: *DecompMgr) This { + return .{ + .rdr = rdr, + .decomp = decomp, + .interface = .{ + .buffer = &[0]u8{}, + .end = 0, + .seek = 0, + .vtable = &{ + .stream = stream, + }, + }, + }; +} + +fn stream(rdr: *Reader, wrt: *Writer, limit: Limit) StreamError!usize{ +} diff --git a/src/util/offset_file.zig b/src/util/offset_file.zig new file mode 100644 index 0000000..8ba8be7 --- /dev/null +++ b/src/util/offset_file.zig @@ -0,0 +1,20 @@ +//! A File that's meant where it's meaningful content starts at a given offset. + +const std = @import("std"); +const File = std.fs.File; +const Reader = std.fs.File.Reader; + +const OffsetFile = @This(); + +fil: File, +offset: u64, + +pub fn init(fil: File, init_offset: u64) OffsetFile { + return .{ .fil = fil, .offset = init_offset }; +} + +pub fn readerAt(self: OffsetFile, offset: u64, buffer: []u8) !Reader { + var rdr = self.fil.reader(buffer); + try rdr.seekTo(self.offset + offset); + return rdr; +}