From 3c98cf2cdb4357f899b464f070e741c1c9dbb007 Mon Sep 17 00:00:00 2001 From: "Caleb J. Gardner" Date: Wed, 28 Jan 2026 11:19:38 -0600 Subject: [PATCH] EXTRACTION --- src/file.zig | 227 +++++++++++++++++++++++++++++++++++++++------- src/util/data.zig | 39 +++++--- 2 files changed, 219 insertions(+), 47 deletions(-) diff --git a/src/file.zig b/src/file.zig index 822a8cc..875a129 100644 --- a/src/file.zig +++ b/src/file.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const File = std.fs.File; const WaitGroup = std.Thread.WaitGroup; const Mutex = std.Thread.Mutex; @@ -11,11 +12,13 @@ const MetadataReader = @import("util/metadata.zig"); const FileError = error{ NotDirectory, NotRegularFile, + NotSymlink, + NotDevice, NotFound, InvalidExtractionPath, }; -const File = @This(); +const SfsFile = @This(); archive: *Archive, @@ -24,7 +27,7 @@ name: []const u8, /// Initialize a new File. /// name is copied to the File so can be safely freed afterwards. -pub fn init(archive: *Archive, inode: Inode, name: []const u8) !File { +pub fn init(archive: *Archive, inode: Inode, name: []const u8) !SfsFile { const new_name = try archive.allocator().alloc(u8, name.len); @memcpy(new_name, name); return .{ @@ -33,7 +36,7 @@ pub fn init(archive: *Archive, inode: Inode, name: []const u8) !File { .name = new_name, }; } -pub fn fromEntry(archive: *Archive, entry: DirEntry) !File { +pub fn fromEntry(archive: *Archive, entry: DirEntry) !SfsFile { var rdr = try archive.fil.readerAt(entry.block_start + archive.super.inode_start, &[0]u8{}); var meta: MetadataReader = .init(archive.allocator(), &rdr.interface, &archive.decomp); try meta.interface.discardAll(entry.block_offset); @@ -44,20 +47,13 @@ pub fn fromEntry(archive: *Archive, entry: DirEntry) !File { return .init(archive, inode, new_name); } -pub fn deinit(self: File) void { +pub fn deinit(self: SfsFile) void { var alloc = self.archive.allocator(); alloc.free(self.name); self.inode.deinit(alloc); } -pub fn ownerUid(self: File) !u16 { - return self.archive.id(self.inode.hdr.uid_idx); -} -pub fn ownerGid(self: File) !u16 { - return self.archive.id(self.inode.hdr.gid_idx); -} - -fn getEntries(self: File) ![]DirEntry { +fn getEntries(self: SfsFile) ![]DirEntry { if (!self.isDir()) return FileError.NotDirectory; var block_start: u32 = undefined; var block_offset: u16 = undefined; @@ -82,24 +78,36 @@ fn getEntries(self: File) ![]DirEntry { return DirEntry.readDir(alloc, &meta.interface, size); } -pub fn isDir(self: File) bool { +pub fn ownerUid(self: SfsFile) !u16 { + return self.archive.id(self.inode.hdr.uid_idx); +} +pub fn ownerGid(self: SfsFile) !u16 { + return self.archive.id(self.inode.hdr.gid_idx); +} +pub fn permissions(self: SfsFile) u16 { + return self.inode.hdr.permissions; +} + +pub fn isDir(self: SfsFile) bool { return switch (self.inode.hdr.inode_type) { .dir, .ext_dir => true, else => false, }; } -pub fn iter(self: File) !Iterator { - var entries = try self.getEntries(); - return error.TODO; +pub fn iterate(self: SfsFile) !Iterator { + if (!self.isDir()) return FileError.NotDirectory; + return .{ + .entries = try self.getEntries(), + .archive = self.archive, + }; } - /// Open a file/folder within a directory at the given path. /// If path is ".", "/", or "./", this File is returned. -pub fn open(self: File, path: []const u8) !File { +pub fn open(self: SfsFile, path: []const u8) !SfsFile { if (!self.isDir()) return FileError.NotDirectory; if (pathIsSelf(path)) return self; // Recursively stip ending & leading path separators. - // TODO: potentially do this more efficiently or have stricter requirements. + // TODO: potentially do this more efficiently or have stricter path requirements. if (path[0] == '/') return self.open(path[1..]); if (path[path.len - 1] == '/') return self.open(path[0 .. path.len - 1]); const idx = std.mem.indexOf(u8, path, "/") orelse path.len; @@ -113,7 +121,7 @@ pub fn open(self: File, path: []const u8) !File { const comp = std.mem.order(u8, first_element, cur_slice[split].name); switch (comp) { .eq => { - var fil: File = try .fromEntry(self.archive, cur_slice[split]); + var fil: SfsFile = try .fromEntry(self.archive, cur_slice[split]); if (idx == path.len) { return fil; } @@ -127,7 +135,41 @@ pub fn open(self: File, path: []const u8) !File { return FileError.NotFound; } -pub fn extract(self: *File, path: []const u8, options: ExtractionOptions) !void { +pub fn isSymlink(self: SfsFile) bool { + return switch (self.inode.hdr.inode_type) { + .symlink, .ext_symlink => true, + else => false, + }; +} +pub fn symlinkPath(self: SfsFile) ![]const u8 { + if (!self.isSymlink()) FileError.NotSymlink; + return switch (self.inode.data) { + .symlink => |s| s.target, + .ext_symlink => |s| s.target, + else => unreachable, + }; +} + +/// Check if the File is a block or character device. +pub fn isDevice(self: SfsFile) bool { + return switch (self.inode.hdr.inode_type) { + .block_dev, .char_dev, .ext_block_dev, .ext_char_dev => true, + else => false, + }; +} +/// If the File is a block or character device, get's it's device number. +pub fn dev(self: SfsFile) !u32 { + if (!self.isDevice()) return FileError.NotDevice; + return switch (self.inode.data) { + .block_dev, .char_dev => |d| d.dev, + .ext_block_dev, .ext_char_dev => |d| d.dev, + else => unreachable, + }; +} + +/// Extract the given File to the path. If File is a regular file, the path must be a directory or not exist. +/// If the gievn path is a folder, the File's contents will be extracted within. +pub fn extract(self: *SfsFile, path: []const u8, options: ExtractionOptions) !void { std.Options = .{ .log_level = options.log_level, }; @@ -163,32 +205,72 @@ pub fn extract(self: *File, path: []const u8, options: ExtractionOptions) !void var wg: WaitGroup = .{}; defer pool.deinit(); var err: ?anyerror = null; + wg.start(); self.extractReal(ext_path, options, &pool, &wg, &err, null); wg.wait(); if (err != null) return err.?; } const ParentInfo = struct { - fil: *File, - mut: Mutex = .{}, + sfs_fil: SfsFile, + path: []const u8, + mut: *Mutex, + dir_wg: *WaitGroup, + parent_wg: *WaitGroup, + options: ExtractionOptions, + err: *?anyerror, - fn finish(self: *ParentInfo) void {} + fn finish(self: *ParentInfo) void { + { + self.mut.lock(); + defer self.mut.unlock(); + self.dir_wg.finish(); + if (!self.dir_wg.isDone()) { + return; + } + } + self.sfs_fil.archive.allocator().destroy(self.mut); + defer self.parent_wg.finish(); + var fil = std.fs.cwd().openFile(self.path, .{}) catch |err| { + std.log.err("Error opening folder {s} to set permissions: {}\n", .{ self.path, err }); + self.err.* = err; + return; + }; + defer fil.close(); + self.sfs_fil.setPerm(fil, self.options) catch |err| { + std.log.err("Error setting permissions to {s}: {}\n", .{ self.path, err }); + self.err.* = err; + return; + }; + } }; -fn extractReal(self: *File, path: []const u8, options: ExtractionOptions, pol: *std.Thread.Pool, wg: *WaitGroup, out_err: *?anyerror, parent: ?ParentInfo) void { +fn extractReal(self: SfsFile, path: []const u8, options: ExtractionOptions, pol: *std.Thread.Pool, wg: *WaitGroup, out_err: *?anyerror, parent: ?ParentInfo) void { std.log.info("Extracting {s} (inode {}) to {s}\n", .{ self.name, self.inode.hdr.num, path }); - defer if (parent != null) parent.?.finish(); + defer { + if (parent != null) { + parent.?.finish(); + self.archive.allocator().free(path); + self.deinit(); + } else { + wg.finish(); + } + } + if (out_err.* != null) { + return; + } switch (self.inode.hdr.inode_type) { .file, .ext_file => { var fil = std.fs.cwd().createFile(path, .{}) catch |err| { - std.log.err("Error creating {}: {}\n", .{ path, err }); - out_err = err; + std.log.err("Error creating {s}: {}\n", .{ path, err }); + out_err.* = err; return; }; + defer fil.close(); //TODO: self.setPerm(fil, options) catch |err| { - std.log.err("Error setting permissions for {}: {}\n", .{ path, err }); - out_err = err; + std.log.err("Error setting permissions for {s}: {}\n", .{ path, err }); + out_err.* = err; return; }; }, @@ -201,11 +283,65 @@ fn extractReal(self: *File, path: []const u8, options: ExtractionOptions, pol: * .ext_fifo, => {}, .dir, .ext_dir => { - var parent_info: ParentInfo = .{ - .fil = self, + _ = std.fs.cwd().statFile(path) catch |err| { + if (err == .NotFound) {} }; - var dir_wg: WaitGroup = .{}; - var iter: Iterator = self.iter() catch |err| {}; + var dir_wg: *WaitGroup = self.archive.allocator().create(WaitGroup) catch |err| { + std.log.err("Error allocating mutex for {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); + out_err.* = err; + return; + }; + const parent_info: ParentInfo = .{ + .fil = self, + .path = path, + .dir_wg = dir_wg, + .parent_wg = wg, + .options = options, + .err = out_err, + }; + var iter: Iterator = self.iterate() catch |err| { + std.log.err("Error getting iterator for {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); + out_err.* = err; + return; + }; + defer iter.deinit(); + const path_has_end_sep = path[path.len - 1] == '/'; + while (true) { + const iter_fil = iter.next() catch |err| { + std.log.err("Error getting next iterator value {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); + out_err.* = err; + break; + }; + if (iter_fil == null) break; + var fil = iter_fil.?; + dir_wg.start(); + const path_len = path.len + fil.name.len; + if (!path_has_end_sep) path_len += 1; + var new_path = self.archive.allocator().alloc(u8, path_len) catch |err| { + std.log.err("Error allocating subpath for {s} (inode {}): {}\n", .{ path, self.inode.hdr.num, err }); + out_err.* = err; + dir_wg.finish(); + break; + }; + @memcpy(new_path[0..path.len], path); + @memcpy(new_path[new_path.len - fil.name.len ..], fil.name.len); + if (!path_has_end_sep) new_path[path.len] = '/'; + pol.spawn(extractReal, .{ + fil, + new_path, + options, + pol, + wg, + out_err, + parent_info, + }) catch |err| { + std.log.err("Error starting sub-file extraction thread: {}\n", .{err}); + out_err.* = err; + dir_wg.finish(); + break; + }; + fil.extractReal; + } }, .socket, .ext_socket => { std.log.info("Ignoring socket file {s} (inode {})\n", .{ self.name, self.inode.hdr.num }); @@ -213,14 +349,35 @@ fn extractReal(self: *File, path: []const u8, options: ExtractionOptions, pol: * } } -pub fn setPerm(self: File, fil: *std.fs.File, options: ExtractionOptions) !void { +fn setPerm(self: SfsFile, fil: File, options: ExtractionOptions) !void { if (!options.ignoreOwner) try fil.chmod(self.inode.hdr.permissions); if (!options.ignorePermissions) try fil.chown(try self.ownerUid(), try self.ownerGid()); } +/// Utility function. pub fn pathIsSelf(path: []const u8) bool { if (path.len == 0) return true; if (path.len == 1 and (path[0] == '/' or path[0] == '.')) return true; if (path.len == 2 and (path[0] == '.' and path[1] == '/')) return true; return false; } + +pub const Iterator = struct { + entries: []DirEntry, + archive: *Archive, + + idx: u32 = 0, + + pub fn next(self: *Iterator) !?SfsFile { + if (self.idx >= self.entries.len) return null; + defer self.idx += 1; + return try SfsFile.fromEntry(self.archive, self.entries[self.idx]); + } + pub fn deinit(self: Iterator) void { + var alloc = self.archive.allocator(); + for (self.entries) |e| { + e.deinit(alloc); + } + alloc.free(self.entries); + } +}; diff --git a/src/util/data.zig b/src/util/data.zig index 274474f..24d244e 100644 --- a/src/util/data.zig +++ b/src/util/data.zig @@ -64,24 +64,39 @@ fn blockNum(self: DataReader) u32 { } fn advance(self: *DataReader) !void { - if (self.block_idx > self.blocks.len) return Reader.Error.EndOfStream; + if (self.block_idx > self.blocks.len or (self.block_idx == self.blocks.len and self.frag == null)) return Reader.Error.EndOfStream; defer self.block_idx += 1; self.interface.seek = 0; self.alloc.free(self.interface.buffer); - const cur_block_size = if (self.block_idx == self.blockNum() - 1) self.size % self.block_size else self.block_size; - if (self.block_idx == self.blocks.len) { - if (self.frag == null) return Reader.Error.EndOfStream; - // TODO: Fragment - return error.TODO; - } - const block = self.blocks[self.block_idx]; - if (block.uncompressed) { - var rdr = try self.fil.readerAt(self.cur_offset, &[0]u8); - self.interface.buffer = try rdr.interface.readAlloc(self.alloc, cur_block_size); + if (self.block_idx == self.blocks.len) { // fragment + var rdr = try self.fil.readerAt(self.frag.?.start + self.frag_offset, &[0]u8); + self.interface.buffer = try rdr.interface.readAlloc(self.alloc, self.size % self.block_size); self.interface.end = self.interface.buffer.len; + if (self.frag.?.size.uncompressed) { + try rdr.interface.discardAll(self.frag_offset); + try rdr.interface.readSliceAll(self.interface.buffer); + return; + } + const tmp_buf = try self.alloc.alloc(u8, self.frag.?.size.size); + defer self.alloc.free(tmp_buf); + var limit_rdr = Reader.limited(&rdr.interface, self.frag.?.size.size, tmp_buf); + const needed_block = try self.alloc.alloc(u8, self.frag_offset + self.interface.buffer.len); + defer self.alloc.free(needed_block); + _ = try self.decomp.decompReader(&limit_rdr.interface, needed_block); + @memcpy(self.interface.buffer, needed_block[self.frag_offset..]); return; } - return error.TODO; + const cur_block_size = if (self.block_idx == self.blockNum() - 1) self.size % self.block_size else self.block_size; + const block = self.blocks[self.block_idx]; + var rdr = try self.fil.readerAt(self.cur_offset, &[0]u8); + self.interface.end = cur_block_size; + if (block.uncompressed) { + self.interface.buffer = try rdr.interface.readAlloc(self.alloc, cur_block_size); + return; + } + var buf: [8192]u8 = undefined; + var limit_rdr = Reader.limited(&rdr.interface, block.size, &buf); + _ = try self.decomp.decompReader(&limit_rdr.interface, self.interface.buffer); } fn stream(rdr: *Reader, wrt: *Writer, limit: Limit) Reader.StreamError!usize {