From d1d453ac296b2b8adbc50caeb0f004f9f31a8d78 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Thu, 21 May 2026 05:07:02 -0500 Subject: [PATCH] Finished an initial version of extraction It works, but is very slow. --- src/decomp/zstd.zig | 47 ++++---- src/directory.zig | 4 +- src/inode.zig | 227 +++++++++++++++++++++++++++++++----- src/lookup_table.zig | 33 +++++- src/test.zig | 6 + src/util/data_extractor.zig | 14 ++- src/util/offset_file.zig | 9 +- 7 files changed, 276 insertions(+), 64 deletions(-) diff --git a/src/decomp/zstd.zig b/src/decomp/zstd.zig index 33998bc..4f41d83 100644 --- a/src/decomp/zstd.zig +++ b/src/decomp/zstd.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const Io = std.Io; const Reader = std.Io.Reader; const zstd = std.compress.zstd; const Node = std.SinglyLinkedList.Node; @@ -6,33 +7,39 @@ const Node = std.SinglyLinkedList.Node; const Decompressor = @import("../util/decompressor.zig"); const Error = Decompressor.Error; -const Self = @This(); +const Queue = std.Io.Queue([]u8); -const Buffer = struct { - node: Node, - buf: []u8, -}; +const Self = @This(); interface: Decompressor = .{ .decomp_fn = decomp }, alloc: std.mem.Allocator, +io: Io, block_size: u32, -buffers: std.ArrayList(Buffer), -buffer_queue: std.SinglyLinkedList = .{}, +buf: [][]u8, +buf_queue: Queue, + +pub fn init(alloc: std.mem.Allocator, io: Io, block_size: u32) !Self { + const buf = try alloc.alloc([]u8, 20); // TODO: Choose a better number instead of a random one. + var queue: Queue = .init(buf); + for (0..20) |_| + try queue.putOne(io, try alloc.alloc(u8, block_size + zstd.block_size_max)); -pub fn init(alloc: std.mem.Allocator, block_size: u32) !Self { return .{ .alloc = alloc, + .io = io, .block_size = block_size, - .buffers = try .initCapacity(alloc, 5), + .buf = buf, + .buf_queue = queue, }; } pub fn deinit(self: *Self) void { - for (self.buffers.items) |buf| - self.alloc.free(buf.buf); - self.buffers.deinit(self.alloc); + self.buf_queue.close(self.io); + for (self.buf) |buf| + self.alloc.free(buf); + self.alloc.free(self.buf); } fn decomp(d: ?*const Decompressor, alloc: std.mem.Allocator, in: []u8, out: []u8) Error!usize { @@ -42,17 +49,11 @@ fn decomp(d: ?*const Decompressor, alloc: std.mem.Allocator, in: []u8, out: []u8 return zstdDecomp(buf, in, out); } var self: *Self = @fieldParentPtr("interface", @constCast(d.?)); - const buf_node = self.buffer_queue.popFirst(); - var buf: *Buffer = undefined; - if (buf_node == null) { - const new_buf = try self.buffers.addOne(self.alloc); - new_buf.* = .{ .node = .{}, .buf = try self.alloc.alloc(u8, self.block_size + zstd.block_size_max) }; - buf = new_buf; - } else { - buf = @fieldParentPtr("node", buf_node.?); - } - defer self.buffer_queue.prepend(&buf.node); - return zstdDecomp(buf.buf, in, out); + + const buf = self.buf_queue.getOne(self.io) catch return Error.ReadFailed; + defer self.buf_queue.putOne(self.io, buf) catch {}; + + return zstdDecomp(buf, in, out); } inline fn zstdDecomp(buffer: []u8, in: []u8, out: []u8) !usize { diff --git a/src/directory.zig b/src/directory.zig index d3fd314..8af793a 100644 --- a/src/directory.zig +++ b/src/directory.zig @@ -3,6 +3,8 @@ const Reader = std.Io.Reader; const Inode = @import("inode.zig"); +pub const Error = error{OutOfMemory} || Reader.Error; + const DirEntry = @This(); block_start: u32, @@ -14,7 +16,7 @@ pub fn deinit(self: DirEntry, alloc: std.mem.Allocator) void { alloc.free(self.name); } -pub fn readDirectory(alloc: std.mem.Allocator, rdr: *Reader, size: u32) ![]DirEntry { +pub fn readDirectory(alloc: std.mem.Allocator, rdr: *Reader, size: u32) Error![]DirEntry { var hdr: Header = undefined; var raw: RawEntry = undefined; var out: std.ArrayList(DirEntry) = try .initCapacity(alloc, 30); diff --git a/src/inode.zig b/src/inode.zig index 8b608d4..ba53fd9 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -207,16 +207,16 @@ pub const Header = extern struct { // Extract +const ExtractError = error{ MknodFailed, CannotSetXattr, ConcurrencyUnavailable } || DataExtractor.Error || Io.Dir.CreateFileAtomicError || LookupTable.Error || + Io.File.Reader.SeekError || Io.File.Atomic.LinkError || Io.Dir.CreateDirError || Io.File.OpenError || + Io.File.SetPermissionsError || Io.File.SetOwnerError || Io.Dir.SymLinkError || Io.Dir.CreateDirPathError; const PathRet = struct { path: []const u8, - permissions: u16, - uid_idx: u16, - gid_idx: u16, + inode: Inode, xattr_idx: ?u32 = null, }; const ExtractReturnUnion = union(enum) { - path_ret: anyerror!PathRet, // TODO: convert to concrete error type instead of anyerror. - void_ret: anyerror!void, + path_ret: ExtractError!PathRet, }; const Tables = struct { id: LookupTable.CachedTable(u16), @@ -224,12 +224,24 @@ const Tables = struct { xattr: XattrTable, }; -pub fn extract(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, super: Archive.Superblock, path: []const u8, options: ExtractionOptions) !void { +/// Extracts the given inode to the given path. If the inode not a directory, the given path must not exist. +/// If the inode is a directory the path must not exist or be a directory. +pub fn extract( + self: Inode, + alloc: std.mem.Allocator, + io: Io, + fil: OffsetFile, + super: Archive.Superblock, + filepath: []const u8, + options: ExtractionOptions, +) !void { + const path = std.mem.trimEnd(u8, filepath, "/"); + var decomp_base: Decomp = switch (super.compression) { .gzip => .{ .gzip = try .init(alloc, super.block_size) }, .lzma => .{ .lzma = try .init(alloc, super.block_size) }, .xz => .{ .xz = try .init(alloc, super.block_size) }, - .zstd => .{ .zstd = try .init(alloc, super.block_size) }, + .zstd => .{ .zstd = try .init(alloc, io, super.block_size) }, else => unreachable, }; defer decomp_base.deinit(); @@ -237,14 +249,29 @@ pub fn extract(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, s var frag_table: CachedTable(FragEntry) = .init(alloc, fil, decomp, super.frag_start, super.frag_count); defer if (!options.ignore_permissions) frag_table.deinit(io); + try frag_table.fill(io); + + var arena: std.heap.ArenaAllocator = .init(alloc); + defer arena.deinit(); var sel_buf = [1]ExtractReturnUnion{undefined} ** 10; var sel: Io.Select(ExtractReturnUnion) = .init(io, &sel_buf); defer sel.cancelDiscard(); switch (self.hdr.inode_type) { - .file, .ext_file => sel.async(.path_ret, extractFile, .{ self, alloc, io, fil, decomp, &frag_table, super.block_size, path }), - else => return error.TODO, + .dir, .ext_dir => try sel.concurrent( + .path_ret, + extractDir, + .{ self, alloc, io, fil, decomp, &sel, &arena, super.dir_start, super.inode_start, &frag_table, super.block_size, path }, + ), + .file, .ext_file => try sel.concurrent( + .path_ret, + extractFile, + .{ self, alloc, io, fil, decomp, &frag_table, super.block_size, path }, + ), + .symlink, .ext_symlink => try sel.concurrent(.path_ret, extractSymlink, .{ self, io, path }), + else => if (@hasField(std.os, "linux")) + try sel.concurrent(.path_ret, extractDevOrIPC, .{ self, alloc, path }), } var xattr_table: ?XattrTable = if (!options.ignore_xattr) @@ -252,26 +279,24 @@ pub fn extract(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, s else null; defer if (!options.ignore_xattr) xattr_table.?.deinit(io); + if (xattr_table != null) try xattr_table.?.table.fill(io); var id_table: ?CachedTable(u16) = if (!options.ignore_xattr) .init(alloc, fil, decomp, super.id_start, super.id_count) else null; defer if (!options.ignore_xattr) id_table.?.deinit(io); + if (id_table != null) try id_table.?.fill(io); while (true) { - if (sel.group.token.load(.unordered) == null) break; + const group_token = sel.group.token.load(.acquire); + if (group_token == null) break; + // std.debug.print("{any}\n", .{sel.group.state}); - const ret = try sel.queue.getOne(io); - switch (ret) { - .void_ret => { - try ret.void_ret; - continue; - }, - else => {}, - } + // std.debug.print("Waiting for return...", .{}); + const ret = try sel.await(); + // std.debug.print("Got One...\n", .{}); const path_ret = try ret.path_ret; - defer if (path_ret.path.len != path.len) alloc.free(path_ret.path); if (options.ignore_permissions and options.ignore_xattr) continue; if (options.ignore_permissions and path_ret.xattr_idx == null) continue; @@ -280,10 +305,14 @@ pub fn extract(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, s defer ret_file.close(io); if (!options.ignore_permissions) { - try ret_file.setPermissions(io, @enumFromInt(path_ret.permissions)); - try ret_file.setOwner(io, try id_table.?.get(io, path_ret.uid_idx), try id_table.?.get(io, path_ret.gid_idx)); + try ret_file.setPermissions(io, @enumFromInt(path_ret.inode.hdr.permissions)); + try ret_file.setOwner( + io, + try id_table.?.get(io, path_ret.inode.hdr.uid_idx), + try id_table.?.get(io, path_ret.inode.hdr.gid_idx), + ); } - if (!options.ignore_xattr and path_ret.xattr_idx != null) { + if (@hasField(std.os, "linux") and !options.ignore_xattr and path_ret.xattr_idx != null) { const xattrs = try xattr_table.?.get(alloc, io, path_ret.xattr_idx.?); defer { for (xattrs) |x| @@ -299,14 +328,95 @@ pub fn extract(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, s } } } -pub fn extractFile(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, frag: *CachedTable(FragEntry), block_size: u32, path: []const u8) anyerror!PathRet { +fn extractDir( + self: Inode, + alloc: std.mem.Allocator, + io: Io, + fil: OffsetFile, + decomp: *const Decompressor, + parent_select: *Io.Select(ExtractReturnUnion), + arena: *std.heap.ArenaAllocator, + dir_start: u64, + inode_start: u64, + frag: *CachedTable(FragEntry), + block_size: u32, + path: []const u8, +) ExtractError!PathRet { + try Io.Dir.cwd().createDirPath(io, path); + + var sel_buf = [1]ExtractReturnUnion{undefined} ** 10; + var sel: Io.Select(ExtractReturnUnion) = .init(io, &sel_buf); + defer sel.cancelDiscard(); + + var num: usize = 0; + { + const dir_entries = self.readDirectory(alloc, io, fil, decomp, dir_start) catch |err| switch (err) { + Error.NotDirectory => unreachable, + else => return @errorCast(err), + }; + num = dir_entries.len; + defer { + for (dir_entries) |d| + d.deinit(alloc); + alloc.free(dir_entries); + } + + for (dir_entries) |d| { + var rdr = try fil.readerAt(io, d.block_start + inode_start, &[0]u8{}); + var meta_rdr: MetadataReader = .init(alloc, &rdr.interface, decomp); + try meta_rdr.interface.discardAll(d.block_offset); + const inode = try read(arena.allocator(), &meta_rdr.interface, block_size); + errdefer inode.deinit(arena.allocator()); + + const new_path = try std.mem.concat(arena.allocator(), u8, &[_][]const u8{ path, "/", d.name }); + errdefer arena.allocator().free(new_path); + + switch (d.type) { + .dir => try sel.concurrent( + .path_ret, + extractDir, + .{ inode, alloc, io, fil, decomp, parent_select, arena, dir_start, inode_start, frag, block_size, new_path }, + ), + .file => try sel.concurrent( + .path_ret, + extractFile, + .{ inode, alloc, io, fil, decomp, frag, block_size, new_path }, + ), + .symlink => try sel.concurrent(.path_ret, extractSymlink, .{ inode, io, new_path }), + else => try sel.concurrent(.path_ret, extractDevOrIPC, .{ inode, alloc, new_path }), + } + } + } + + while (num > 0) { + const ret = sel.await() catch break; + num -= 1; + + parent_select.queue.putOne(io, ret) catch |err| switch (err) { + error.Canceled => return error.Canceled, + else => break, + }; + } + return .{ + .path = path, + .inode = self, + }; +} +fn extractFile( + self: Inode, + alloc: std.mem.Allocator, + io: Io, + fil: OffsetFile, + decomp: *const Decompressor, + frag: *CachedTable(FragEntry), + block_size: u32, + path: []const u8, +) ExtractError!PathRet { var atomic = try Io.Dir.cwd().createFileAtomic(io, path, .{}); defer atomic.deinit(io); var ret: PathRet = .{ - .gid_idx = self.hdr.gid_idx, - .uid_idx = self.hdr.uid_idx, - .permissions = self.hdr.permissions, + .inode = self, .path = path, }; const data: DataExtractor = blk: { @@ -335,3 +445,68 @@ pub fn extractFile(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFil return ret; } +fn extractSymlink(self: Inode, io: Io, path: []const u8) ExtractError!PathRet { + const target = switch (self.data) { + .symlink => |s| s.target, + .ext_symlink => |s| s.target, + else => unreachable, + }; + + try Io.Dir.cwd().symLink(io, target, path, .{}); + + return .{ + .path = path, + .inode = self, + }; +} +fn extractDevOrIPC(self: Inode, alloc: std.mem.Allocator, path: []const u8) ExtractError!PathRet { + var dev_num: u32 = 0; + var mode: u32 = 0; + + const DT = std.posix.DT; + + var ret: PathRet = .{ + .inode = self, + .path = path, + }; + + switch (self.data) { + .block_dev => |d| { + dev_num = d.dev; + mode = DT.BLK; + }, + .ext_block_dev => |d| { + dev_num = d.dev; + mode = DT.BLK; + if (d.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = d.xattr_idx; + }, + .char_dev => |d| { + dev_num = d.dev; + mode = DT.CHR; + }, + .ext_char_dev => |d| { + dev_num = d.dev; + mode = DT.CHR; + if (d.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = d.xattr_idx; + }, + .fifo => mode = DT.FIFO, + .ext_fifo => |f| { + mode = DT.FIFO; + if (f.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = f.xattr_idx; + }, + .socket => mode = DT.SOCK, + .ext_socket => |s| { + mode = DT.SOCK; + if (s.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = s.xattr_idx; + }, + else => unreachable, + } + + const sentinel_path = try std.mem.concatWithSentinel(alloc, u8, &[_][]const u8{path}, 0); + defer alloc.free(sentinel_path); + + const res = std.os.linux.mknod(sentinel_path, mode, dev_num); + if (res != 0) return ExtractError.MknodFailed; + + return ret; +} diff --git a/src/lookup_table.zig b/src/lookup_table.zig index e60a476..fc395f0 100644 --- a/src/lookup_table.zig +++ b/src/lookup_table.zig @@ -24,6 +24,8 @@ pub fn lookupValue(comptime T: anytype, alloc: std.mem.Allocator, io: Io, decomp return out; } +pub const Error = Io.Cancelable || Io.File.Reader.SeekError || Io.Reader.ReadAllocError; + pub fn CachedTable(comptime T: anytype) type { return struct { const T_PER_BLOCK: u16 = 8192 / @sizeOf(T); @@ -55,10 +57,39 @@ pub fn CachedTable(comptime T: anytype) type { } pub fn deinit(self: *Table, io: Io) void { self.mut.lockUncancelable(io); + var iter = self.table.valueIterator(); + while (iter.next()) |val| + self.alloc.free(val.*); self.table.deinit(); } - pub fn get(self: *Table, io: Io, idx: u32) !T { + pub fn fill(self: *Table, io: Io) Error!void { + try self.mut.lock(io); + defer self.mut.unlock(io); + + var num_blocks = self.total_num / T_PER_BLOCK; + if (self.total_num % T_PER_BLOCK > 0) + num_blocks += 1; + + for (0..num_blocks) |block| { + var rdr = try self.fil.readerAt(io, self.table_start + (8 * block), &[0]u8{}); + var offset: u64 = undefined; + try rdr.interface.readSliceEndian(u64, @ptrCast(&offset), .little); + + const len: u16 = if (self.total_num % T_PER_BLOCK != 0 and block == (self.total_num - 1) / T_PER_BLOCK) + @truncate(self.total_num % T_PER_BLOCK) + else + T_PER_BLOCK; + + rdr = try self.fil.readerAt(io, offset, &[0]u8{}); + var meta: MetadataReader = .init(self.alloc, &rdr.interface, self.decomp); + + const slice = try meta.interface.readSliceEndianAlloc(self.alloc, T, len, .little); + try self.table.put(@truncate(block), slice); + } + } + + pub fn get(self: *Table, io: Io, idx: u32) Error!T { const block = idx / T_PER_BLOCK; const block_offset = idx % T_PER_BLOCK; if (self.table.contains(block)) diff --git a/src/test.zig b/src/test.zig index ae74f1f..0f5032b 100644 --- a/src/test.zig +++ b/src/test.zig @@ -10,6 +10,8 @@ const Superblock = Archive.Superblock; const TestArchive = "testing/LinuxPATest.sfs"; test "Basics" { + std.debug.print("Starting test: Basics...\n", .{}); + var fil = try Io.Dir.cwd().openFile(io, TestArchive, .{}); defer fil.close(io); var sfs: Archive = try .init(io, fil, 0); @@ -22,6 +24,8 @@ const TestFile = "Start.exe"; const TestFileExtractLocation = "testing/Start.exe"; test "ExtractSingleFile" { + std.debug.print("Starting test: ExtractSingleFile...\n", .{}); + Io.Dir.cwd().deleteFile(io, TestFileExtractLocation) catch {}; var fil = try Io.Dir.cwd().openFile(io, TestArchive, .{}); defer fil.close(io); @@ -35,6 +39,8 @@ test "ExtractSingleFile" { const TestFullExtractLocation = "testing/TestExtract"; test "ExtractCompleteArchive" { + std.debug.print("Starting test: ExtractCompleteArchive...\n", .{}); + Io.Dir.cwd().deleteTree(io, TestFullExtractLocation) catch {}; var fil = try Io.Dir.cwd().openFile(io, TestArchive, .{}); defer fil.close(io); diff --git a/src/util/data_extractor.zig b/src/util/data_extractor.zig index abc0404..4e6a6ba 100644 --- a/src/util/data_extractor.zig +++ b/src/util/data_extractor.zig @@ -10,6 +10,8 @@ const OffsetFile = @import("offset_file.zig"); // const SharedCache = @import("shared_cache.zig"); +pub const Error = error{OutOfMemory} || Io.File.Reader.SeekError || Io.Writer.Error; + const DataExtractor = @This(); fil: OffsetFile, @@ -23,7 +25,7 @@ blocks: []BlockSize, frag_offset: u32 = 0, frag_entry: ?FragEntry = null, -err: ?anyerror = null, +err: ?Error = null, pub fn init(fil: OffsetFile, decomp: *const Decompressor, block_size: u32, file_size: u64, data_start: u64, blocks: []BlockSize) DataExtractor { return .{ @@ -48,10 +50,10 @@ fn numBlocks(self: DataExtractor) usize { } /// Starts extracting the data using the given group to spawn async tasks. -pub fn extractAsync(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File) !void { +pub fn extractAsync(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File) Error!void { var group: Io.Group = .init; defer group.cancel(io); - var err: ?anyerror = null; + var err: ?Error = null; var read_offset: u64 = self.start; for (0..self.blocks.len) |idx| { @@ -64,7 +66,7 @@ pub fn extractAsync(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: group.await(io) catch |cancel| return err orelse cancel; } -fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, read_offset: u64, idx: usize, ret_err: *?anyerror) Io.Cancelable!void { +fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, read_offset: u64, idx: usize, ret_err: *?Error) Io.Cancelable!void { const block = self.blocks[idx]; const cur_block_size = if (idx == self.numBlocks() - 1) @@ -124,7 +126,7 @@ fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.Fi }; } } -fn fragThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, ret_err: *?anyerror) Io.Cancelable!void { +fn fragThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, ret_err: *?Error) Io.Cancelable!void { const frag = self.frag_entry.?; const cur_block_size = self.file_size % self.block_size; @@ -169,7 +171,7 @@ fn fragThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.Fil if (err == error.Canceled) io.recancel(); return Io.Cancelable.Canceled; }; - wrt.interface.writeAll(tmp[0..cur_block_size]) catch |err| { + wrt.interface.writeAll(tmp[self.frag_offset .. self.frag_offset + cur_block_size]) catch |err| { ret_err.* = err; if (err == error.Canceled) io.recancel(); return Io.Cancelable.Canceled; diff --git a/src/util/offset_file.zig b/src/util/offset_file.zig index 84c6588..4852c49 100644 --- a/src/util/offset_file.zig +++ b/src/util/offset_file.zig @@ -14,16 +14,11 @@ pub fn init(fil: File, init_offset: u64) OffsetFile { return .{ .fil = fil, .offset = init_offset }; } -pub fn readerAt(self: OffsetFile, io: Io, offset: u64, buffer: []u8) !Reader { +pub fn readerAt(self: OffsetFile, io: Io, offset: u64, buffer: []u8) Reader.SeekError!Reader { var rdr = self.fil.reader(io, buffer); try rdr.seekTo(self.offset + offset); return rdr; } -pub fn readAt(self: OffsetFile, io: Io, offset: u64, buf: []u8) !void { +pub fn readAt(self: OffsetFile, io: Io, offset: u64, buf: []u8) File.ReadPositionalError!void { _ = try self.fil.readPositionalAll(io, buf, self.offset + offset); } -pub fn readValueAt(self: OffsetFile, comptime T: anytype, io: Io, offset: u64) !void { - //TODO: check for endianess and decode accordingly. - var new: T = undefined; - _ = try self.fil.readPositionalAll(io, @ptrCast(&new), self.offset + offset); -}