Re-doing extraction with learning about Io

This commit is contained in:
Caleb Gardner
2026-05-17 12:32:58 -05:00
parent 10e9b66ac6
commit 69ce562b6c
2 changed files with 183 additions and 333 deletions
+95 -303
View File
@@ -13,6 +13,7 @@ const dir = @import("inode_data/dir.zig");
const file = @import("inode_data/file.zig"); const file = @import("inode_data/file.zig");
const misc = @import("inode_data/misc.zig"); const misc = @import("inode_data/misc.zig");
const LookupTable = @import("lookup_table.zig"); const LookupTable = @import("lookup_table.zig");
const CachedTable = LookupTable.CachedTable;
const DataExtractor = @import("util/data_extractor.zig"); const DataExtractor = @import("util/data_extractor.zig");
const DataReader = @import("util/data_reader.zig"); const DataReader = @import("util/data_reader.zig");
const Decompressor = @import("util/decompressor.zig"); const Decompressor = @import("util/decompressor.zig");
@@ -206,23 +207,16 @@ pub const Header = extern struct {
// Extract // Extract
const FileRet = struct {
file: Io.File,
permissions: u16,
uid_idx: u16,
gid_idx: u16,
xattr_idx: ?u32,
};
const PathRet = struct { const PathRet = struct {
path: []const u8, path: []const u8,
permissions: u16, permissions: u16,
uid_idx: u16, uid_idx: u16,
gid_idx: u16, gid_idx: u16,
xattr_idx: ?u32, xattr_idx: ?u32 = null,
}; };
const ReturnUnion = union(enum) { const ExtractReturnUnion = union(enum) {
file_ret: anyerror!FileRet, path_ret: anyerror!PathRet, // TODO: convert to concrete error type instead of anyerror.
path_ret: anyerror!PathRet, void_ret: anyerror!void,
}; };
const Tables = struct { const Tables = struct {
id: LookupTable.CachedTable(u16), id: LookupTable.CachedTable(u16),
@@ -231,315 +225,113 @@ const Tables = struct {
}; };
pub fn extract(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, super: Archive.Superblock, path: []const u8, options: ExtractionOptions) !void { pub fn extract(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, super: Archive.Superblock, path: []const u8, options: ExtractionOptions) !void {
var decomp: Decomp = switch (super.compression) { var decomp_base: Decomp = switch (super.compression) {
.gzip => .{ .gzip = try @import("decomp/zlib.zig").init(alloc, super.block_size) }, .gzip => .{ .gzip = try .init(alloc, super.block_size) },
.lzma => .{ .lzma = try @import("decomp/lzma.zig").init(alloc, super.block_size) }, .lzma => .{ .lzma = try .init(alloc, super.block_size) },
.xz => .{ .xz = try @import("decomp/xz.zig").init(alloc, super.block_size) }, .xz => .{ .xz = try .init(alloc, super.block_size) },
.zstd => .{ .zstd = try @import("decomp/zstd.zig").init(alloc, super.block_size) }, .zstd => .{ .zstd = try .init(alloc, super.block_size) },
else => unreachable, else => unreachable,
}; };
defer decomp.deinit(); defer decomp_base.deinit();
const decomp = decomp_base.decompressor();
var frag_table: LookupTable.CachedTable(FragEntry) = .init(alloc, fil, decomp.decompressor(), super.frag_start, super.frag_count); var frag_table: CachedTable(FragEntry) = .init(alloc, fil, decomp, super.frag_start, super.frag_count);
defer frag_table.deinit(io); defer if (!options.ignore_permissions) frag_table.deinit(io);
// var group: Io.Group = .init; var sel_buf = [1]ExtractReturnUnion{undefined} ** 10;
// defer group.cancel(io); var sel: Io.Select(ExtractReturnUnion) = .init(io, &sel_buf);
defer sel.cancelDiscard();
var que_arr = [1]FileRet{undefined} ** 12;
var que: Io.Queue(FileRet) = .init(&que_arr);
defer que.close(io);
const cache_buf = try alloc.alloc([1024 * 1024]u8, 12);
defer alloc.free(cache_buf);
var cache: Io.Queue([1024 * 1024]u8) = .init(cache_buf);
defer cache.close(io);
const sel_buf: []ReturnUnion = try alloc.alloc(ReturnUnion, 10);
var group: Io.Select(ReturnUnion) = .init(io, sel_buf);
defer group.cancelDiscard();
switch (self.hdr.inode_type) { switch (self.hdr.inode_type) {
.dir, .ext_dir => group.async(.file_ret, extractDir, .{ .file, .ext_file => sel.async(.path_ret, extractFile, .{ self, alloc, io, fil, decomp, &frag_table, super.block_size, path }),
self, else => return error.TODO,
alloc,
io,
fil,
decomp.decompressor(),
&frag_table,
super.block_size,
super.dir_start,
super.inode_start,
path,
options,
&que,
&cache,
}),
.file, .ext_file => group.async(.file_ret, extractRegFile, .{
self,
alloc,
io,
fil,
decomp.decompressor(),
&frag_table,
super.block_size,
path,
&que,
&cache,
}),
.symlink, .ext_symlink => group.async(.file_ret, extractSymlink, .{ self, alloc, io, path, options, &que }),
else => group.async(.file_ret, extractDevice, .{ self, alloc, io, path, options, &que }),
} }
var id_table: LookupTable.CachedTable(u16) = .init(alloc, fil, decomp.decompressor(), super.id_start, super.id_count); var xattr_table: ?XattrTable = if (!options.ignore_xattr)
defer id_table.deinit(io); try .init(alloc, io, fil, decomp, super.xattr_start)
var xattr_table: XattrTable = try .init(alloc, io, fil, decomp.decompressor(), super.xattr_start); else
defer xattr_table.deinit(io); null;
defer if (!options.ignore_xattr) xattr_table.?.deinit(io);
var id_table: ?CachedTable(u16) = if (!options.ignore_xattr)
.init(alloc, fil, decomp, super.id_start, super.id_count)
else
null;
defer if (!options.ignore_xattr) id_table.?.deinit(io);
while (true) { while (true) {
const ret = que.getOne(io) catch break; if (sel.group.token.load(.unordered) == null) break;
const inode: Inode = ret.inode; const ret = try sel.queue.getOne(io);
defer inode.deinit(alloc); switch (ret) {
const ret_file: Io.File = ret.file; .void_ret => {
try ret.void_ret;
continue;
},
else => {},
}
const path_ret = try ret.path_ret;
defer if (path_ret.path.len != path.len) alloc.free(path_ret.path);
if (options.ignore_permissions and options.ignore_xattr) continue;
if (options.ignore_permissions and path_ret.xattr_idx == null) continue;
var ret_file = try Io.Dir.cwd().openFile(io, path_ret.path, .{});
defer ret_file.close(io); defer ret_file.close(io);
if (!options.ignore_xattr) {
if (inode.xattrIndex()) |idx| {
const xattrs = try xattr_table.get(alloc, io, idx);
for (xattrs) |x| {
// TODO: Check error.
const xattr_res = std.os.linux.fsetxattr(ret_file.handle, x.key, x.value.ptr, x.value.len, 0);
if (xattr_res != 0 and options.verbose)
options.verbose_writer.?.print("setxattr failed with code: {}\n", .{xattr_res}) catch {};
alloc.free(x.key);
}
alloc.free(xattrs);
} else |_| {}
}
if (!options.ignore_permissions) { if (!options.ignore_permissions) {
try ret_file.setPermissions(io, @enumFromInt(inode.hdr.permissions)); try ret_file.setPermissions(io, @enumFromInt(path_ret.permissions));
try ret_file.setOwner(io, try id_table.get(io, inode.hdr.uid_idx), try id_table.get(io, inode.hdr.gid_idx)); try ret_file.setOwner(io, try id_table.?.get(io, path_ret.uid_idx), try id_table.?.get(io, path_ret.gid_idx));
} }
if (!que.type_erased.closed and group.token.raw == null) que.close(io); if (!options.ignore_xattr and path_ret.xattr_idx != null) {
} const xattrs = try xattr_table.?.get(alloc, io, path_ret.xattr_idx.?);
} defer {
fn extractDir( for (xattrs) |x|
self: Inode, alloc.free(x.key);
alloc: std.mem.Allocator, alloc.free(xattrs);
io: Io, }
fil: OffsetFile,
decomp: *const Decompressor,
frag: *LookupTable.CachedTable(FragEntry),
block_size: u32,
dir_start: u64,
inode_start: u64,
path: []const u8,
options: ExtractionOptions,
que: *Io.Queue(FileRet),
cache: *Io.Queue([1024 * 1024]u8),
) !void {
defer alloc.free(path);
const dirs = try self.readDirectory(alloc, io, fil, decomp, dir_start); for (xattrs) |x| {
defer { const res = std.os.linux.fsetxattr(ret_file.handle, x.key, x.value.ptr, x.value.len, 0);
for (dirs) |d| if (res != 0)
d.deinit(alloc); return error.CannotSetXattr;
alloc.free(dirs);
}
var group: Io.Select() = .init;
defer group.cancel(io);
for (dirs) |d| {
var rdr = try fil.readerAt(io, d.block_start + inode_start, &[0]u8{});
var meta: MetadataReader = .init(alloc, &rdr.interface, decomp);
try meta.interface.discardAll(d.block_offset);
const inode = try read(alloc, &meta.interface, block_size);
const new_path = try std.mem.concat(alloc, u8, &[_][]const u8{ path, "/", d.name });
switch (inode.hdr.inode_type) {
.dir, .ext_dir => group.async(io, extractDir, .{
self,
alloc,
io,
fil,
decomp,
frag,
block_size,
dir_start,
inode_start,
new_path,
options,
que,
cache,
}),
.file, .ext_file => group.async(io, extractRegFile, .{
self,
alloc,
io,
fil,
decomp,
frag,
block_size,
new_path,
que,
cache,
}),
.symlink, .ext_symlink => group.async(io, extractSymlink, .{ self, alloc, io, new_path, options, que }),
else => group.async(io, extractDevice, .{ self, alloc, io, new_path, options, que }),
}
}
try group.await(io);
try que.putOne(io, .{ .file = try Io.Dir.cwd().openFile(io, path, .{}), .inode = self });
}
fn extractRegFile(
self: Inode,
alloc: std.mem.Allocator,
io: Io,
fil: OffsetFile,
decomp: *const Decompressor,
frag: *LookupTable.CachedTable(FragEntry),
block_size: u32,
path: []const u8,
que: *Io.Queue(FileRet),
cache: *Io.Queue([1024 * 1024]u8),
) !void {
defer alloc.free(path);
var atom = try Io.Dir.cwd().createFileAtomic(io, path, .{});
defer atom.deinit(io);
var size: u64 = undefined;
var start: u64 = undefined;
var blocks: []file.BlockSize = undefined;
var frag_idx: u32 = undefined;
var frag_offset: u32 = undefined;
switch (self.data) {
.file => |f| {
size = f.size;
start = f.block_start;
blocks = f.block_sizes;
frag_idx = f.frag_idx;
frag_offset = f.frag_block_offset;
},
.ext_file => |f| {
size = f.size;
start = f.block_start;
blocks = f.block_sizes;
frag_idx = f.frag_idx;
frag_offset = f.frag_block_offset;
},
else => unreachable,
}
var ext: DataExtractor = .init(fil, decomp, cache, block_size, size, start, blocks);
ext.addFrag(frag_offset, try frag.get(io, frag_idx));
var group: Io.Group = .init;
defer group.cancel(io);
ext.extractAsync(alloc, io, &group, atom.file);
try group.await(io);
try atom.link(io);
try que.putOne(io, .{ .file = atom.file, .inode = self });
}
fn extractSymlink(
self: Inode,
alloc: std.mem.Allocator,
io: Io,
path: []const u8,
options: ExtractionOptions,
que: *Io.Queue(FileRet),
) !void {
defer alloc.free(path);
_ = options;
_ = que;
// TODO: handle symlink options
const target = try self.symlinkTarget();
try Io.Dir.cwd().symLink(io, target, path, .{});
// TODO: On Linux you can't set permission & xattrs on symlinks (they inherit from their target), but on Mac you can.
}
fn extractDevice(
self: Inode,
alloc: std.mem.Allocator,
io: Io,
path: []const u8,
options: ExtractionOptions,
que: *Io.Queue(FileRet),
) !void {
defer alloc.free(path);
var dev: u32 = 0;
var mode: u32 = undefined;
switch (self.data) {
.char_dev => |d| {
dev = d.dev;
mode = std.posix.DT.CHR;
},
.block_dev => |d| {
dev = d.dev;
mode = std.posix.DT.BLK;
},
.ext_char_dev => |d| {
dev = d.dev;
mode = std.posix.DT.BLK;
},
.ext_block_dev => |d| {
dev = d.dev;
mode = std.posix.DT.BLK;
},
.fifo, .ext_fifo => mode = std.posix.DT.FIFO,
.socket, .ext_socket => mode = std.posix.DT.SOCK,
else => unreachable,
}
const sentinel_path = try std.mem.concatMaybeSentinel(alloc, u8, &[1][]const u8{path}, 0);
defer alloc.free(sentinel_path);
const res = std.os.linux.mknod(@ptrCast(sentinel_path), mode, dev);
if (res != 0 and options.verbose)
options.verbose_writer.?.print("mknod failed with code: {}\n", .{res}) catch {};
try que.putOne(io, .{
.file = try Io.Dir.cwd().openFile(io, path, .{}),
.inode = self,
});
}
fn applyMetadataLoop(alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, super: Archive.Superblock, que: *Io.Queue(FileRet), options: ExtractionOptions) !void {
var id_table: LookupTable.CachedTable(u16) = .init(alloc, fil, decomp, super.id_start, super.id_count);
defer id_table.deinit(io);
var xattr_table: XattrTable = try .init(alloc, io, fil, decomp, super.xattr_start);
defer xattr_table.deinit(io);
for (try que.getOne(io)) |ret| {
const inode: Inode = ret.inode;
defer inode.deinit(alloc);
const ret_file: Io.File = ret.file;
defer ret_file.close(io);
if (!options.ignore_xattr) {
if (inode.xattrIndex()) |idx| {
const xattrs = try xattr_table.get(io, idx);
for (xattrs) |x| {
// TODO: Check error.
_ = std.os.linux.fsetxattr(ret_file.handle, x.key, x.value.ptr, x.value.len, 0);
}
} }
} }
if (!options.ignore_permissions) {
try ret_file.setPermissions(io, inode.hdr.permissions);
try ret_file.setOwner(io, try id_table.get(io, inode.hdr.uid_idx), try id_table.get(io, inode.hdr.gid_idx));
}
} }
} }
pub fn extractFile(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, frag: *CachedTable(FragEntry), block_size: u32, path: []const u8) anyerror!PathRet {
var atomic = try Io.Dir.cwd().createFileAtomic(io, path, .{});
defer atomic.deinit(io);
var ret: PathRet = .{
.gid_idx = self.hdr.gid_idx,
.uid_idx = self.hdr.uid_idx,
.permissions = self.hdr.permissions,
.path = path,
};
const data: DataExtractor = blk: {
switch (self.data) {
.file => |f| {
var data: DataExtractor = .init(fil, decomp, block_size, f.size, f.block_start, f.block_sizes);
if (f.frag_idx != 0xFFFFFFFF)
data.addFrag(f.frag_block_offset, try frag.get(io, f.frag_idx));
break :blk data;
},
.ext_file => |f| {
if (f.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = f.xattr_idx;
var data: DataExtractor = .init(fil, decomp, block_size, f.size, f.block_start, f.block_sizes);
if (f.frag_idx != 0xFFFFFFFF)
data.addFrag(f.frag_block_offset, try frag.get(io, f.frag_idx));
break :blk data;
},
else => unreachable,
}
};
try data.extractAsync(alloc, io, atomic.file);
try atomic.link(io);
return ret;
}
+88 -30
View File
@@ -14,7 +14,6 @@ const DataExtractor = @This();
fil: OffsetFile, fil: OffsetFile,
decomp: *const Decompressor, decomp: *const Decompressor,
cache: *Io.Queue([1024 * 1024]u8),
block_size: u32, block_size: u32,
file_size: u64, file_size: u64,
@@ -24,11 +23,12 @@ blocks: []BlockSize,
frag_offset: u32 = 0, frag_offset: u32 = 0,
frag_entry: ?FragEntry = null, frag_entry: ?FragEntry = null,
pub fn init(fil: OffsetFile, decomp: *const Decompressor, cache: *Io.Queue([1024 * 1024]u8), block_size: u32, file_size: u64, data_start: u64, blocks: []BlockSize) DataExtractor { err: ?anyerror = null,
pub fn init(fil: OffsetFile, decomp: *const Decompressor, block_size: u32, file_size: u64, data_start: u64, blocks: []BlockSize) DataExtractor {
return .{ return .{
.fil = fil, .fil = fil,
.decomp = decomp, .decomp = decomp,
.cache = cache,
.block_size = block_size, .block_size = block_size,
.file_size = file_size, .file_size = file_size,
@@ -48,17 +48,23 @@ fn numBlocks(self: DataExtractor) usize {
} }
/// Starts extracting the data using the given group to spawn async tasks. /// Starts extracting the data using the given group to spawn async tasks.
pub fn extractAsync(self: DataExtractor, alloc: std.mem.Allocator, io: Io, group: *Io.Group, fil: Io.File) void { pub fn extractAsync(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File) !void {
var group: Io.Group = .init;
defer group.cancel(io);
var err: ?anyerror = null;
var read_offset: u64 = self.start; var read_offset: u64 = self.start;
for (0..self.blocks.len) |idx| { for (0..self.blocks.len) |idx| {
group.async(io, blockThread, .{ self, alloc, io, fil, read_offset, idx }); group.async(io, blockThread, .{ self, alloc, io, fil, read_offset, idx, &err });
read_offset += self.blocks[idx].size; read_offset += self.blocks[idx].size;
} }
if (self.frag_entry != null) if (self.frag_entry != null)
group.async(io, fragThread, .{ self, alloc, io, fil }); group.async(io, fragThread, .{ self, alloc, io, fil, &err });
group.await(io) catch |cancel| return err orelse cancel;
} }
fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, read_offset: u64, idx: usize) !void { fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, read_offset: u64, idx: usize, ret_err: *?anyerror) Io.Cancelable!void {
const block = self.blocks[idx]; const block = self.blocks[idx];
const cur_block_size = if (idx == self.numBlocks() - 1) const cur_block_size = if (idx == self.numBlocks() - 1)
@@ -67,54 +73,106 @@ fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.Fi
self.block_size; self.block_size;
var wrt = fil.writer(io, &[0]u8{}); var wrt = fil.writer(io, &[0]u8{});
try wrt.seekTo(self.block_size * idx); wrt.seekTo(self.block_size * idx) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
defer wrt.flush() catch {}; defer wrt.flush() catch {};
if (block.size == 0) { if (block.size == 0) {
try wrt.interface.splatByteAll(0, cur_block_size); wrt.interface.splatByteAll(0, cur_block_size) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
return; return;
} }
var rdr = try self.fil.readerAt(io, read_offset, &[0]u8{}); var rdr = self.fil.readerAt(io, read_offset, &[0]u8{}) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
if (block.uncompressed) { if (block.uncompressed) {
try rdr.interface.streamExact(&wrt.interface, cur_block_size); rdr.interface.streamExact(&wrt.interface, cur_block_size) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
return; return;
} else { } else {
@branchHint(.likely); @branchHint(.likely);
var cache = try self.cache.getOne(io);
defer self.cache.putOne(io, cache) catch {};
var tmp = try self.cache.getOne(io); var cache: [1024 * 1024]u8 = undefined;
defer self.cache.putOne(io, tmp) catch {}; var tmp: [1024 * 1024]u8 = undefined;
try rdr.interface.readSliceAll(cache[0..block.size]); rdr.interface.readSliceAll(cache[0..block.size]) catch |err| {
_ = try self.decomp.Decompress(alloc, cache[0..block.size], tmp[0..cur_block_size]); ret_err.* = err;
try wrt.interface.writeAll(tmp[0..cur_block_size]); if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
_ = self.decomp.Decompress(alloc, cache[0..block.size], tmp[0..cur_block_size]) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
wrt.interface.writeAll(tmp[0..cur_block_size]) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
} }
} }
fn fragThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File) !void { fn fragThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, ret_err: *?anyerror) Io.Cancelable!void {
const frag = self.frag_entry.?; const frag = self.frag_entry.?;
const cur_block_size = self.file_size % self.block_size; const cur_block_size = self.file_size % self.block_size;
var wrt = fil.writer(io, &[0]u8{}); var wrt = fil.writer(io, &[0]u8{});
try wrt.seekTo(self.blocks.len * self.block_size); wrt.seekTo(self.blocks.len * self.block_size) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
defer wrt.flush() catch {}; defer wrt.flush() catch {};
var rdr = try self.fil.readerAt(io, frag.start, &[0]u8{}); var rdr = self.fil.readerAt(io, frag.start, &[0]u8{}) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
if (frag.size.uncompressed) { if (frag.size.uncompressed) {
try rdr.interface.discardAll(self.frag_offset); rdr.interface.discardAll(self.frag_offset) catch |err| {
try rdr.interface.streamExact(&wrt.interface, cur_block_size); ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
rdr.interface.streamExact(&wrt.interface, cur_block_size) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
return; return;
} else { } else {
@branchHint(.likely); @branchHint(.likely);
var cache = try self.cache.getOne(io);
defer self.cache.putOne(io, cache) catch {};
var tmp = try self.cache.getOne(io); var cache: [1024 * 1024]u8 = undefined;
defer self.cache.putOne(io, tmp) catch {}; var tmp: [1024 * 1024]u8 = undefined;
try rdr.interface.readSliceAll(cache[0..frag.size.size]); rdr.interface.readSliceAll(cache[0..frag.size.size]) catch |err| {
_ = try self.decomp.Decompress(alloc, cache[0..frag.size.size], tmp[0..self.block_size]); ret_err.* = err;
try wrt.interface.writeAll(tmp[0..cur_block_size]); if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
_ = self.decomp.Decompress(alloc, cache[0..frag.size.size], tmp[0..self.block_size]) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
wrt.interface.writeAll(tmp[0..cur_block_size]) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
} }
} }