From 2c392cf25050004083929a46b77ae905db5b7ce5 Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Wed, 28 Jan 2026 06:50:26 -0600 Subject: [PATCH] Fixed a few bugs preventing basic functions Worked on extraction, including creating DataReader Added proper access to id, fragment, and export tables --- src/archive.zig | 34 ++++++++--- src/decomp.zig | 13 ++-- src/dir_entry.zig | 2 +- src/file.zig | 112 ++++++++++++++++++++++++++++++---- src/inode_data/file.zig | 7 ++- src/inode_data/misc.zig | 4 +- src/options.zig | 9 +-- src/util/data.zig | 130 ++++++++++++++++++++++++++++++++++++++++ src/util/metadata.zig | 2 +- 9 files changed, 276 insertions(+), 37 deletions(-) create mode 100644 src/util/data.zig diff --git a/src/archive.zig b/src/archive.zig index 8ece980..aa991df 100644 --- a/src/archive.zig +++ b/src/archive.zig @@ -8,26 +8,25 @@ const DecompMgr = @import("decomp.zig"); const ExtractionOptions = @import("options.zig"); const Inode = @import("inode.zig"); const InodeRef = Inode.Ref; +const BlockSize = @import("inode_data/file.zig").BlockSize; const SfsFile = @import("file.zig"); const Superblock = @import("super.zig").Superblock; const Table = @import("table.zig").Table; const MetadataReader = @import("util/metadata.zig"); const OffsetFile = @import("util/offset_file.zig"); -const FragEntry = packed struct { +/// Information about a fragment section. Multiple fragments are contained in the block described by a single FragEntry. +/// The offset into the block and fragment size is stored in the file's inode. +pub const FragEntry = packed struct { start: u64, - size: packed struct { - size: u24, - uncompressed: bool, - _: u7, - }, + size: BlockSize, _: u32, }; const Archive = @This(); // 4 Gigs -const MEM_SIZE = 4 * 1024 * 1024 * 1024; +const DEFAULT_MEM_SIZE = 4 * 1024 * 1024 * 1024; parent_alloc: std.mem.Allocator, alloc: std.heap.FixedBufferAllocator, @@ -53,7 +52,7 @@ pub fn init(alloc: std.mem.Allocator, fil: File) !Archive { fil, 0, try std.Thread.getCpuCount(), - @min(MEM_SIZE, try std.process.totalSystemMemory() / 2), + @min(DEFAULT_MEM_SIZE, try std.process.totalSystemMemory() / 2), ); } /// Create the Archive dictating the amount of threads & memory used. @@ -96,6 +95,25 @@ fn setupValues(self: *Archive) !void { self.export_table = try .init(self.allocator(), self.fil, &self.decomp, self.super.export_start, self.super.inode_count); } +pub fn id(self: *Archive, idx: u32) !u16 { + if (!self.setup) try self.setupValues(); + return self.id_table.get(idx); +} + +pub fn frag(self: *Archive, idx: u32) !FragEntry { + if (!self.setup) try self.setupValues(); + return self.frag_table.get(idx); +} + +pub fn inode(self: *Archive, num: u32) !Inode { + if (!self.setup) try self.setupValues(); + const ref = try self.export_table.get(num - 1); + var rdr = try self.fil.readerAt(ref.block_start + self.super.inode_start, &[0]u8{}); + var meta: MetadataReader = .init(self.allocator(), &rdr.interface, &self.decomp); + try meta.interface.discardAll(ref.block_offset); + return try .read(self.allocator(), &meta.interface, self.super.block_size); +} + pub fn root(self: *Archive) !SfsFile { if (!self.setup) try self.setupValues(); var rdr = try self.fil.readerAt(self.super.root_ref.block_start + self.super.inode_start, &[0]u8{}); diff --git a/src/decomp.zig b/src/decomp.zig index 711f9d8..8b8241d 100644 --- a/src/decomp.zig +++ b/src/decomp.zig @@ -79,7 +79,8 @@ pub const DecompThread = struct { self.rdr = rdr; defer self.rdr = null; self.res = res; - self.status.raw = 2; + self.status.store(2, .release); + Futex.wake(&self.status, 1); while (self.status.raw == 2) Futex.wait(&self.status, 2); return self.res_size; } @@ -94,7 +95,9 @@ pub const DecompThread = struct { self.res_size = blk: switch (comp_type) { .gzip => { var decomp_rdr = compress.flate.Decompress.init(rdr, .zlib, self.buf); - break :blk decomp_rdr.reader.readSliceShort(self.res); + break :blk decomp_rdr.reader.readSliceShort(self.res) catch |err| { + break :blk decomp_rdr.err orelse err; + }; }, .lzma => { var decomp_rdr = compress.lzma.decompress(self.mgr.alloc, rdr.adaptToOldInterface()) catch |err| { @@ -110,7 +113,9 @@ pub const DecompThread = struct { }, .zstd => { var decomp_rdr = compress.zstd.Decompress.init(rdr, self.buf, .{}); - break :blk decomp_rdr.reader.readSliceShort(self.res); + break :blk decomp_rdr.reader.readSliceShort(self.res) catch |err| { + break :blk decomp_rdr.err orelse err; + }; }, else => unreachable, }; @@ -182,8 +187,6 @@ pub fn decompSlice(self: *DecompMgr, dat: []u8, res: []u8) !usize { return thr.submitData(dat, res); } pub fn decompReader(self: *DecompMgr, rdr: *Reader, res: []u8) !usize { - std.debug.print("HELLO\n", .{}); - defer std.debug.print("GOODBYE\n", .{}); self.mut.lock(); var thr: *DecompThread = undefined; var node = self.queue.popFirst(); diff --git a/src/dir_entry.zig b/src/dir_entry.zig index 0eee76a..c3dd882 100644 --- a/src/dir_entry.zig +++ b/src/dir_entry.zig @@ -38,7 +38,7 @@ pub fn readDir(alloc: std.mem.Allocator, rdr: *Reader, size: u32) ![]Entry { try rdr.readSliceEndian(Header, @ptrCast(&hdr), .little); cur_red += @sizeOf(Header); try out.ensureUnusedCapacity(alloc, hdr.num + 1); - for (0..hdr.num + 1) |_| { + for (0..hdr.count + 1) |_| { try rdr.readSliceEndian(RawEntry, @ptrCast(&raw), .little); const name = try alloc.alloc(u8, raw.name_size + 1); try rdr.readSliceEndian(u8, name, .little); diff --git a/src/file.zig b/src/file.zig index 90dff2b..822a8cc 100644 --- a/src/file.zig +++ b/src/file.zig @@ -12,6 +12,7 @@ const FileError = error{ NotDirectory, NotRegularFile, NotFound, + InvalidExtractionPath, }; const File = @This(); @@ -49,6 +50,13 @@ pub fn deinit(self: File) void { self.inode.deinit(alloc); } +pub fn ownerUid(self: File) !u16 { + return self.archive.id(self.inode.hdr.uid_idx); +} +pub fn ownerGid(self: File) !u16 { + return self.archive.id(self.inode.hdr.gid_idx); +} + fn getEntries(self: File) ![]DirEntry { if (!self.isDir()) return FileError.NotDirectory; var block_start: u32 = undefined; @@ -71,7 +79,7 @@ fn getEntries(self: File) ![]DirEntry { const alloc = self.archive.allocator(); var meta: MetadataReader = .init(alloc, &rdr.interface, &self.archive.decomp); try meta.interface.discardAll(block_offset); - return DirEntry.readDir(alloc, &rdr.interface, size); + return DirEntry.readDir(alloc, &meta.interface, size); } pub fn isDir(self: File) bool { @@ -80,6 +88,10 @@ pub fn isDir(self: File) bool { else => false, }; } +pub fn iter(self: File) !Iterator { + var entries = try self.getEntries(); + return error.TODO; +} /// Open a file/folder within a directory at the given path. /// If path is ".", "/", or "./", this File is returned. @@ -92,13 +104,13 @@ pub fn open(self: File, path: []const u8) !File { if (path[path.len - 1] == '/') return self.open(path[0 .. path.len - 1]); const idx = std.mem.indexOf(u8, path, "/") orelse path.len; const first_element = path[0..idx]; - if (std.mem.eql(u8, first_element, ".")) return self; + if (std.mem.eql(u8, first_element, ".")) return self.open(path[idx + 1 ..]); const entries = try self.getEntries(); var cur_slice = entries; var split = cur_slice.len / 2; - while (cur_slice.len == 0) { + while (cur_slice.len > 0) { split = cur_slice.len / 2; - const comp = std.mem.order(u8, entries[split].name, first_element); + const comp = std.mem.order(u8, first_element, cur_slice[split].name); switch (comp) { .eq => { var fil: File = try .fromEntry(self.archive, cur_slice[split]); @@ -109,27 +121,101 @@ pub fn open(self: File, path: []const u8) !File { return fil.open(path[idx + 1 ..]); }, .lt => cur_slice = cur_slice[0..split], - .gt => cur_slice = cur_slice[split..], + .gt => cur_slice = cur_slice[split + 1 ..], } } return FileError.NotFound; } pub fn extract(self: *File, path: []const u8, options: ExtractionOptions) !void { - _ = self; - _ = path; - _ = options; - return error.TODO; + std.Options = .{ + .log_level = options.log_level, + }; + var alloc = self.archive.allocator(); + var ext_path: []u8 = undefined; + if (std.fs.cwd().statFile(path)) |stat| { + if (stat.kind == .directory) { + if (!self.isDir()) { + const has_end_sep = path[path.len - 1] == '/'; + const alloc_size = if (has_end_sep) + path.len + self.name.len + else + path.len + self.name.len + 1; + ext_path = alloc.alloc(u8, alloc_size); + @memcpy(ext_path[0..path.len], path); + @memcpy(ext_path[ext_path.len - self.name.len ..], self.name); + if (!has_end_sep) ext_path[path.len] = '/'; + } else { + ext_path = path; + } + } else return FileError.InvalidExtractionPath; + } else |err| { + if (err == .FileNotFound) { + ext_path = path; + } else { + std.log.err("Error stat-ing extraction path {s}: {}\n", .{ path, err }); + return err; + } + } + defer if (ext_path.len > path.len) alloc.free(ext_path); + var pool: std.Thread.Pool = .{}; + try pool.init(.{ .allocator = alloc }); + var wg: WaitGroup = .{}; + defer pool.deinit(); + var err: ?anyerror = null; + self.extractReal(ext_path, options, &pool, &wg, &err, null); + wg.wait(); + if (err != null) return err.?; } const ParentInfo = struct { fil: *File, + mut: Mutex = .{}, + + fn finish(self: *ParentInfo) void {} }; -fn extractReal(self: *File, path: []const u8, options: ExtractionOptions) void { - _ = self; - _ = path; - _ = options; +fn extractReal(self: *File, path: []const u8, options: ExtractionOptions, pol: *std.Thread.Pool, wg: *WaitGroup, out_err: *?anyerror, parent: ?ParentInfo) void { + std.log.info("Extracting {s} (inode {}) to {s}\n", .{ self.name, self.inode.hdr.num, path }); + defer if (parent != null) parent.?.finish(); + switch (self.inode.hdr.inode_type) { + .file, .ext_file => { + var fil = std.fs.cwd().createFile(path, .{}) catch |err| { + std.log.err("Error creating {}: {}\n", .{ path, err }); + out_err = err; + return; + }; + //TODO: + self.setPerm(fil, options) catch |err| { + std.log.err("Error setting permissions for {}: {}\n", .{ path, err }); + out_err = err; + return; + }; + }, + .symlink, .ext_symlink => {}, + .block_dev, + .char_dev, + .fifo, + .ext_block_dev, + .ext_char_dev, + .ext_fifo, + => {}, + .dir, .ext_dir => { + var parent_info: ParentInfo = .{ + .fil = self, + }; + var dir_wg: WaitGroup = .{}; + var iter: Iterator = self.iter() catch |err| {}; + }, + .socket, .ext_socket => { + std.log.info("Ignoring socket file {s} (inode {})\n", .{ self.name, self.inode.hdr.num }); + }, + } +} + +pub fn setPerm(self: File, fil: *std.fs.File, options: ExtractionOptions) !void { + if (!options.ignoreOwner) try fil.chmod(self.inode.hdr.permissions); + if (!options.ignorePermissions) try fil.chown(try self.ownerUid(), try self.ownerGid()); } pub fn pathIsSelf(path: []const u8) bool { diff --git a/src/inode_data/file.zig b/src/inode_data/file.zig index 12240f8..826fc65 100644 --- a/src/inode_data/file.zig +++ b/src/inode_data/file.zig @@ -2,8 +2,9 @@ const std = @import("std"); const Reader = std.Io.Reader; pub const BlockSize = packed struct { - size: u31, + size: u24, uncompressed: bool, + _: u7, }; pub const File = struct { @@ -15,7 +16,7 @@ pub const File = struct { pub fn read(alloc: std.mem.Allocator, rdr: *Reader, block_size: u32) !File { var start: [16]u8 = undefined; - try rdr.readSliceEndian(u8, &start, .little); + try rdr.readSliceAll(u8, &start, .little); const frag_idx: u32 = std.mem.readInt(u32, start[4..8], .little); const size: u32 = std.mem.readInt(u32, start[12..16], .little); var num_blocks: u32 = size / block_size; @@ -49,7 +50,7 @@ pub const ExtFile = struct { pub fn read(alloc: std.mem.Allocator, rdr: *Reader, block_size: u32) !ExtFile { var start: [40]u8 = undefined; - try rdr.readSliceEndian(u8, &start, .little); + try rdr.readSliceAll(u8, &start, .little); const frag_idx: u32 = std.mem.readInt(u32, start[28..32], .little); const size: u64 = std.mem.readInt(u64, start[8..16], .little); var num_blocks: u32 = @truncate(size / block_size); diff --git a/src/inode_data/misc.zig b/src/inode_data/misc.zig index 2bed10e..e1c8a01 100644 --- a/src/inode_data/misc.zig +++ b/src/inode_data/misc.zig @@ -7,7 +7,7 @@ pub const Symlink = struct { pub fn read(alloc: std.mem.Allocator, rdr: *Reader) !Symlink { var start: [8]u8 = undefined; - try rdr.readSliceEndian(u8, &start, .little); + try rdr.readSliceAll(u8, &start, .little); const target_size = std.mem.readInt(u32, start[4..8], .little); const target = try alloc.alloc(u8, target_size + 1); errdefer alloc.free(target); @@ -30,7 +30,7 @@ pub const ExtSymlink = struct { pub fn read(alloc: std.mem.Allocator, rdr: *Reader) !ExtSymlink { var start: [8]u8 = undefined; - try rdr.readSliceEndian(u8, &start, .little); + try rdr.readSliceAll(u8, &start, .little); const target_size = std.mem.readInt(u32, start[4..8], .little); const target = try alloc.alloc(u8, target_size + 1); errdefer alloc.free(target); diff --git a/src/options.zig b/src/options.zig index e681331..00a88f8 100644 --- a/src/options.zig +++ b/src/options.zig @@ -1,4 +1,5 @@ -const Writer = @import("std").Io.Writer; +const std = @import("std"); +const Writer = std.Io.Writer; const ExtractionOptions = @This(); @@ -9,9 +10,9 @@ ignoreOwner: bool = false, /// Replace symlinks with their target. dereferenceSymlinks: bool = false, -verbose: bool = false, -/// If options verbose and verboseWriter not set, logs are printed to stdout. -verboseWriter: ?Writer = null, +log_level: std.log.Level = .err, +// /// If options verbose and verboseWriter not set, logs are printed to stdout. +// verboseWriter: ?*Writer = null, pub const Default: ExtractionOptions = .{}; pub const VerboseDefault: ExtractionOptions = .{ .verbose = true }; diff --git a/src/util/data.zig b/src/util/data.zig new file mode 100644 index 0000000..274474f --- /dev/null +++ b/src/util/data.zig @@ -0,0 +1,130 @@ +const std = @import("std"); +const Reader = std.Io.Reader; +const Writer = std.Io.Writer; +const Limit = std.Io.Limit; + +const Archive = @import("../archive.zig"); +const FragEntry = Archive.FragEntry; +const DecompMgr = @import("../decomp.zig"); +const BlockSize = @import("../inode_data/file.zig").BlockSize; +const OffsetFile = @import("offset_file.zig"); + +const DataReader = @This(); + +alloc: std.mem.Allocator, +fil: OffsetFile, +decomp: *DecompMgr, +block_size: u32, + +blocks: []BlockSize, + +frag: ?FragEntry, // TODO: do something better? +frag_offset: u32 = 0, +size: u64, + +interface: Reader, + +cur_offset: u64, +block_idx: u32 = 0, + +pub fn init(archive: *Archive, blocks: []BlockSize, start: u64, size: u64) DataReader { + return .{ + .alloc = archive.allocator(), + .fil = archive.fil, + .decomp = &archive.decomp, + .block_size = archive.super.block_size, + .blocks = blocks, + .size = size, + .cur_offset = start, + .interface = .{ + .end = 0, + .seek = 0, + .buffer = &[0]u8{}, + .vtable = &.{ + .stream = stream, + .discard = discard, + .readVec = readVec, + }, + }, + }; +} +pub fn deinit(self: *DataReader) void { + self.alloc.free(self.inteface.buffer); +} + +pub fn addFragment(self: *DataReader, entry: FragEntry, frag_offset: u32) void { + self.frag = entry; + self.frag_offset = frag_offset; +} + +fn blockNum(self: DataReader) u32 { + var res = self.blocks.len; + if (self.frag != null) res += 1; + return res; +} + +fn advance(self: *DataReader) !void { + if (self.block_idx > self.blocks.len) return Reader.Error.EndOfStream; + defer self.block_idx += 1; + self.interface.seek = 0; + self.alloc.free(self.interface.buffer); + const cur_block_size = if (self.block_idx == self.blockNum() - 1) self.size % self.block_size else self.block_size; + if (self.block_idx == self.blocks.len) { + if (self.frag == null) return Reader.Error.EndOfStream; + // TODO: Fragment + return error.TODO; + } + const block = self.blocks[self.block_idx]; + if (block.uncompressed) { + var rdr = try self.fil.readerAt(self.cur_offset, &[0]u8); + self.interface.buffer = try rdr.interface.readAlloc(self.alloc, cur_block_size); + self.interface.end = self.interface.buffer.len; + return; + } + return error.TODO; +} + +fn stream(rdr: *Reader, wrt: *Writer, limit: Limit) Reader.StreamError!usize { + var self: *DataReader = @fieldParentPtr("interface", rdr); + if (rdr.seek >= rdr.end) self.advance() catch |err| { + if (err == .EndOfStream) return err; + std.log.err("Error advancing data reader: {}\n", .{err}); + return Reader.Error.ReadFailed; + }; + if (limit == .nothing) return 0; + const to_read = @min(rdr.end - rdr.seek, @intFromEnum(limit)); + const res = try wrt.write(rdr.buffer[rdr.seek .. rdr.seek + to_read]); + rdr.seek += res; + return res; +} + +fn discard(rdr: *Reader, limit: Limit) Reader.Error!usize { + var self: *DataReader = @fieldParentPtr("interface", rdr); + if (rdr.seek >= rdr.end) self.advance() catch |err| { + if (err == .EndOfStream) return err; + std.log.err("Error advancing data reader: {}\n", .{err}); + return Reader.Error.ReadFailed; + }; + if (limit == .nothing) return 0; + const to_adv = @min(rdr.end - rdr.seek, @intFromEnum(limit)); + rdr.seek += to_adv; + return to_adv; +} + +fn readVec(rdr: *Reader, vec: [][]u8) Reader.Error!usize { + var self: *DataReader = @fieldParentPtr("interface", rdr); + if (rdr.seek >= rdr.end) self.advance() catch |err| { + if (err == .EndOfStream) return err; + std.log.err("Error advancing data reader: {}\n", .{err}); + return Reader.Error.ReadFailed; + }; + var cur_red: usize = 0; + for (vec) |s| { + const to_copy: usize = @min(rdr.end - rdr.seek, s.len); + @memcpy(s[0..to_copy], self.buf[rdr.seek .. rdr.seek + to_copy]); + rdr.seek += to_copy; + cur_red += to_copy; + if (rdr.end == rdr.seek) break; + } + return cur_red; +} diff --git a/src/util/metadata.zig b/src/util/metadata.zig index e9b5801..bb0419f 100644 --- a/src/util/metadata.zig +++ b/src/util/metadata.zig @@ -50,7 +50,7 @@ fn advance(self: *This) !void { self.interface.buffer = self.buf[0..hdr.size]; return; } - var tmp_buf: [1024]u8 = undefined; + var tmp_buf: [8192]u8 = undefined; var limit_rdr = self.rdr.limited(@enumFromInt(hdr.size), &tmp_buf); self.interface.end = try self.decomp.decompReader(&limit_rdr.interface, &self.buf); self.interface.buffer = self.buf[0..self.interface.end];