Restart (once again)

2026-01-15 06:40:59 -06:00
parent ad7aa271ea
commit 428f938c3a
14 changed files with 395 additions and 1 deletions
@@ -1,2 +1,4 @@
 testing/
 .zig-cache/
 zig-out/
@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2025 Caleb Gardner
+Copyright (c) 2026 Caleb Gardner
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,40 @@
 const std = @import("std");
 pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});
    const mod = b.addModule("zig_squashfs", .{
        .root_source_file = b.path("src/root.zig"),
        .target = target,
    });
    const exe = b.addExecutable(.{
        .name = "unsquashfs",
        .root_module = b.createModule(.{
            .root_source_file = b.path("src/bin/unsquashfs.zig"),
            .target = target,
            .optimize = optimize,
            .imports = &.{
                .{ .name = "zig_squashfs", .module = mod },
            },
        }),
    });
    b.installArtifact(exe);
    const run_step = b.step("run", "Run the app");
    const run_cmd = b.addRunArtifact(exe);
    run_step.dependOn(&run_cmd.step);
    run_cmd.step.dependOn(b.getInstallStep());
    if (b.args) |args| {
        run_cmd.addArgs(args);
    }
    const mod_tests = b.addTest(.{
        .root_module = mod,
    });
    const run_mod_tests = b.addRunArtifact(mod_tests);
    const exe_tests = b.addTest(.{
        .root_module = exe.root_module,
    });
    const run_exe_tests = b.addRunArtifact(exe_tests);
    const test_step = b.step("test", "Run tests");
    test_step.dependOn(&run_mod_tests.step);
    test_step.dependOn(&run_exe_tests.step);
 }
@@ -0,0 +1,81 @@
 .{
    // This is the default name used by packages depending on this one. For
    // example, when a user runs `zig fetch --save <url>`, this field is used
    // as the key in the `dependencies` table. Although the user can choose a
    // different name, most users will stick with this provided value.
    //
    // It is redundant to include "zig" in this name because it is already
    // within the Zig package namespace.
    .name = .zig_squashfs,
    // This is a [Semantic Version](https://semver.org/).
    // In a future version of Zig it will be used for package deduplication.
    .version = "0.0.0",
    // Together with name, this represents a globally unique package
    // identifier. This field is generated by the Zig toolchain when the
    // package is first created, and then *never changes*. This allows
    // unambiguous detection of one package being an updated version of
    // another.
    //
    // When forking a Zig project, this id should be regenerated (delete the
    // field and run `zig build`) if the upstream project is still maintained.
    // Otherwise, the fork is *hostile*, attempting to take control over the
    // original project's identity. Thus it is recommended to leave the comment
    // on the following line intact, so that it shows up in code reviews that
    // modify the field.
    .fingerprint = 0x527960c74dddb509, // Changing this has security and trust implications.
    // Tracks the earliest Zig version that the package considers to be a
    // supported use case.
    .minimum_zig_version = "0.15.2",
    // This field is optional.
    // Each dependency must either provide a `url` and `hash`, or a `path`.
    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
    // Once all dependencies are fetched, `zig build` no longer requires
    // internet connectivity.
    .dependencies = .{
        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
        //.example = .{
        //    // When updating this field to a new URL, be sure to delete the corresponding
        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
        //    // the new URL. If the contents of a URL change this will result in a hash mismatch
        //    // which will prevent zig from using it.
        //    .url = "https://example.com/foo.tar.gz",
        //
        //    // This is computed from the file contents of the directory of files that is
        //    // obtained after fetching `url` and applying the inclusion rules given by
        //    // `paths`.
        //    //
        //    // This field is the source of truth; packages do not come from a `url`; they
        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
        //    // obtain a package matching this `hash`.
        //    //
        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
        //    .hash = "...",
        //
        //    // When this is provided, the package is found in a directory relative to the
        //    // build root. In this case the package's hash is irrelevant and therefore not
        //    // computed. This field and `url` are mutually exclusive.
        //    .path = "foo",
        //
        //    // When this is set to `true`, a package is declared to be lazily
        //    // fetched. This makes the dependency only get fetched if it is
        //    // actually used.
        //    .lazy = false,
        //},
    },
    // Specifies the set of files and directories that are included in this package.
    // Only files and directories listed here are included in the `hash` that
    // is computed for this package. Only files listed here will remain on disk
    // when using the zig package manager. As a rule of thumb, one should list
    // files required for compilation plus any license(s).
    // Paths are relative to the build root. Use the empty string (`""`) to refer to
    // the build root itself.
    // A directory listed here means that all files within, recursively, are included.
    .paths = .{
        "build.zig",
        "build.zig.zon",
        "src",
        // For example...
        //"LICENSE",
        //"README.md",
    },
 }
@@ -0,0 +1,53 @@
 //! A squashfs archive read from a file.
 //! Can be used to directly access File's contents or extract to the filesystem.
 const std = @import("std");
 const File = std.fs.File;
 const Superblock = @import("super.zig").Superblock;
 const OffsetFile = @import("util/offset_file.zig");
 const Archive = @This();
 // 4 Gigs
 const MIN_MEM_SIZE = 4 * 1024 * 1024 * 1024;
 parent_alloc: std.mem.Allocator,
 alloc: std.heap.FixedBufferAllocator,
 fixed_buf: []u8,
 fil: OffsetFile,
 super: Superblock,
 /// Default settings using std.Thread.getCpuCount() threads and the minimum of 4gb or half of system memory for memory usage.
 pub fn init(alloc: std.mem.Allocator, fil: File) !Archive {
    return initAdvanced(
        alloc,
        fil,
        0,
        try std.Thread.getCpuCount(),
        @min(MIN_MEM_SIZE, std.process.totalSystemMemory() / 2),
    );
 }
 /// Create the Archive dictating the amount of threads & memory used.
 /// If trying to extract a full archive, a large memory size & thread count could help.
 /// If you're planning on only interacting with a small number of files, it should be fine to use few threads and a small memory size.
 pub fn initAdvanced(alloc: std.mem.Allocator, fil: File, offset: u64, threads: usize, mem: usize) !Archive {
    _ = threads;
    var super: Superblock = undefined;
    const red = try fil.pread(@ptrCast(&super), offset);
    std.debug.assert(red == @sizeOf(Superblock));
    const fixed_buf = alloc.alloc(u8, mem);
    return .{
        .parent_alloc = alloc,
        .alloc = .init(fixed_buf),
        .fixed_buf = fixed_buf,
        .fil = .init(fil, offset),
        .super = super,
    };
 }
 pub fn deinit(self: *Archive) void {
    self.parent_alloc.free(self.fixed_buf);
 }
@@ -0,0 +1,5 @@
 const std = @import("std");
 const squashfs = @import("zig_squashfs");
 pub fn main() !void {}
@@ -0,0 +1,11 @@
 const std = @import("std");
 const compress = std.compress;
 pub const CompressionType = enum(u16) {
    gzig = 1,
    lzma,
    lzo,
    xz,
    lz4,
    zstd,
 };
@@ -0,0 +1,5 @@
 pub const Ref = packed struct {
    _: u16,
    table_offset: u32,
    block_offset: u16,
 };
@@ -0,0 +1 @@
 const Archive = @import("archive.zig");
@@ -0,0 +1,62 @@
 const math = @import("std").math;
 const CompressionType = @import("decomp.zig").CompressionType;
 const InodeRef = @import("inode.zig").Ref;
 const SQUASHFS_MAGIC: u32 = "hsqs";
 const SuperblockError = error{
    InvalidMagic,
    InvalidBlockLog,
    InvalidVersion,
    InvalidCheck,
 };
 /// A squashfs Superblock
 pub const Superblock = packed struct {
    magic: u32,
    inode_count: u32,
    mod_time: u32,
    block_size: u32,
    frag_count: u32,
    compression: CompressionType,
    block_log: u16,
    flags: packed struct {
        inode_uncompressed: bool,
        data_uncompressed: bool,
        check: bool,
        frag_uncompressed: bool,
        fragment_never: bool,
        fragment_always: bool,
        duplicates: bool,
        exportable: bool,
        xattr_uncompressed: bool,
        xattr_never: bool,
        compression_options: bool,
        ids_uncompressed: bool,
        _: u4,
    },
    id_count: u16,
    ver_maj: u16,
    ver_min: u16,
    root_ref: InodeRef,
    size: u64,
    id_start: u64,
    xattr_start: u64,
    inode_start: u64,
    dir_start: u64,
    frag_start: u64,
    export_start: u64,
    /// Validate the Superblock. If an error is returned, it's likely the archive is corrupted or not a squashfs archive.
    pub fn validate(self: Superblock) !void {
        if (self.magic != SQUASHFS_MAGIC)
            return SuperblockError.InvalidMagic;
        if (self.magic.flags.check)
            return SuperblockError.InvalidCheck;
        if (self.ver_maj != 4 or self.ver_min != 0)
            return SuperblockError.InvalidVersion;
        if (math.log2(self.block_size) != self.block_log)
            return SuperblockError.InvalidBlockLog;
    }
 };
@@ -0,0 +1,72 @@
 const std = @import("std");
 const Mutex = std.Thread.Mutex;
 const DecompMgr = @import("decomp.zig");
 const MetadataReader = @import("util/metadata.zig");
 const OffsetFile = @import("util/offset_file.zig");
 const TableError = error{
    InvalidIndex,
 };
 pub fn Table(T: anytype) type {
    return struct {
        const This = @This();
        const VALS_PER_BLOCK = 8192 / @sizeOf(T);
        alloc: std.mem.Allocator,
        fil: OffsetFile,
        decomp: *DecompMgr,
        tab_start: u64,
        mut: Mutex = .{},
        tab: std.AutoHashMap(u32, []T),
        values: u32,
        pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *DecompMgr, tab_start: u64, values: u32) !This {
            return .{
                .alloc = alloc,
                .fil = fil,
                .decomp = decomp,
                .tab_start = tab_start,
                .tab = .init(alloc),
                .values = values,
            };
        }
        pub fn deinit(self: *This) void {
            self.tab.deinit();
        }
        pub fn get(self: *This, idx: u32) !T {
            if (idx >= self.values) return TableError.InvalidIndex;
            const block_num = idx / VALS_PER_BLOCK;
            const idx_offset = idx - (block_num * VALS_PER_BLOCK);
            if (self.tab.contains(block_num)) {
                const block = self.tab.get(block_num).?;
                return block[idx_offset];
            }
            self.mut.lock();
            defer self.mut.unlock();
            // Double check in case of race condition..
            if (self.tab.contains(block_num)) {
                const block = self.tab.get(block_num).?;
                return block[idx_offset];
            }
            const is_last = (self.values - 1) / VALS_PER_BLOCK == block_num;
            const slice_size = if (is_last) self.values - (block_num * VALS_PER_BLOCK) else VALS_PER_BLOCK;
            const slice = try self.alloc.alloc(slice_size);
            var rdr = try self.fil.readerAt(self.tab_start + (8 * block_num), &[0]u8{});
            const offset: u64 = 0;
            try rdr.interface.readSliceAll(@ptrCast(&idx_offset));
            rdr = try self.fil.readerAt(offset, &[0]u8{});
            var meta: MetadataReader = .init(&rdr.interface, self.decomp);
            try meta.interface.readSliceAll(@ptrCast(slice));
            //TODO: read & decompress block.
            self.tab.put(block_num, slice);
        }
    };
 }
@@ -0,0 +1,8 @@
 const std = @import("std");
 const stuff = @import("builtin");
 test "Basics" {}
 test "ExtractSingleFile" {}
 test "ExtractCompleteArchive" {}
@@ -0,0 +1,34 @@
 const std = @import("std");
 const Reader = std.Io.Reader;
 const Writer = std.Io.Writer;
 const Limit = std.Io.Limit;
 const StreamError = std.Io.Reader.StreamError;
 const DecompMgr = @import("../decomp.zig");
 const This = @This();
 rdr: Reader,
 decomp: *DecompMgr,
 buf: [8192]u8 = undefined,
 interface: Reader,
 pub fn init(rdr: Reader, decomp: *DecompMgr) This {
    return .{
        .rdr = rdr,
        .decomp = decomp,
        .interface = .{
            .buffer = &[0]u8{},
            .end = 0,
            .seek = 0,
            .vtable = &{
                .stream = stream,
            },
        },
    };
 }
 fn stream(rdr: *Reader, wrt: *Writer, limit: Limit) StreamError!usize{
 }
@@ -0,0 +1,20 @@
 //! A File that's meant where it's meaningful content starts at a given offset.
 const std = @import("std");
 const File = std.fs.File;
 const Reader = std.fs.File.Reader;
 const OffsetFile = @This();
 fil: File,
 offset: u64,
 pub fn init(fil: File, init_offset: u64) OffsetFile {
    return .{ .fil = fil, .offset = init_offset };
 }
 pub fn readerAt(self: OffsetFile, offset: u64, buffer: []u8) !Reader {
    var rdr = self.fil.reader(buffer);
    try rdr.seekTo(self.offset + offset);
    return rdr;
 }