Move extract logic to util/extract.zig to make it easier to read.

This commit is contained in:
Caleb J. Gardner
2026-03-05 03:02:41 -06:00
parent a4e23a840d
commit a606f5e11a
2 changed files with 309 additions and 348 deletions
+2 -348
View File
@@ -14,6 +14,7 @@ const file = @import("inode_data/file.zig");
const misc = @import("inode_data/misc.zig");
const DataReader = @import("util/data.zig");
const ThreadedDataReader = @import("util/data_threaded.zig");
const InodeExtract = @import("util/extract.zig");
const InodeFinish = @import("util/inode_finish.zig");
const FinishUnion = InodeFinish.FinishUnion;
const MetadataReader = @import("util/metadata.zig");
@@ -126,20 +127,6 @@ fn readerFromData(alloc: std.mem.Allocator, archive: *Archive, data: anytype) !D
out.addFragment(try archive.frag_table.get(data.frag_idx), data.frag_block_offset);
return out;
}
/// Get a threaded data reader for a file inode.
pub fn threadedDataReader(self: Inode, alloc: std.mem.Allocator, archive: *Archive) !ThreadedDataReader {
return switch (self.hdr.inode_type) {
.file => threadedReaderFromData(alloc, archive, self.data.file),
.ext_file => threadedReaderFromData(alloc, archive, self.data.ext_file),
else => error.NotRegularFile,
};
}
fn threadedReaderFromData(alloc: std.mem.Allocator, archive: *Archive, data: anytype) !ThreadedDataReader {
var out: ThreadedDataReader = .init(alloc, archive.*, data.block_sizes, data.block_start, data.size);
if (data.frag_idx != 0xFFFFFFFF)
out.addFragment(try archive.frag_table.get(data.frag_idx), data.frag_block_offset);
return out;
}
/// Get the directory entries for a directory inode.
pub fn dirEntries(self: Inode, alloc: std.mem.Allocator, archive: Archive) ![]DirEntry {
@@ -198,338 +185,5 @@ pub fn setMetadata(self: Inode, alloc: std.mem.Allocator, archive: *Archive, fil
/// Extract the inode to the given path.
pub fn extractTo(self: Inode, alloc: std.mem.Allocator, archive: *Archive, path: []const u8, options: ExtractionOptions) !void {
if (options.threads > 1) return self.extractToThreaded(alloc, archive, path, options);
switch (self.hdr.inode_type) {
.dir, .ext_dir => {
// Removing any trailing separators since that's the easiest path forward.
if (path[path.len - 1] == '/') return self.extractTo(alloc, archive, path[0 .. path.len - 1], options);
std.fs.cwd().makeDir(path) catch |err| {
if (err != std.fs.Dir.MakeError.PathAlreadyExists) return err;
};
const entries = try self.dirEntries(alloc, archive.*);
defer {
for (entries) |entry| entry.deinit(alloc);
alloc.free(entries);
}
for (entries) |entry| {
var new_path = try alloc.alloc(u8, path.len + 1 + entry.name.len);
@memcpy(new_path[0..path.len], path);
@memcpy(new_path[path.len + 1 ..], entry.name);
new_path[path.len] = '/';
defer alloc.free(new_path);
var inode: Inode = try readFromEntry(alloc, archive, entry);
defer inode.deinit(alloc);
try inode.extractTo(alloc, archive, new_path, options);
}
var fil = try std.fs.cwd().openFile(path, .{});
defer fil.close();
try self.setMetadata(alloc, archive, fil, options);
},
.file, .ext_file => try self.extractRegFile(alloc, archive, path, options),
.symlink, .ext_symlink => try self.extractSymlink(path),
else => try self.extractDevice(alloc, archive, path, options),
}
}
/// Extract the inode to the given path. Multi-threaded.
/// Functions identically to extractTo on all but regular files and directories.
fn extractToThreaded(self: Inode, allocator: std.mem.Allocator, archive: *Archive, path: []const u8, options: ExtractionOptions) !void {
switch (self.hdr.inode_type) {
.dir, .ext_dir => {
// Removing any trailing separators since that's the easiest path forward.
if (path[path.len - 1] == '/') return self.extractToThreaded(allocator, archive, path[0 .. path.len - 1], options);
// Arena Allocator
var stack_alloc = std.heap.stackFallback(1024 * 1024, allocator);
var arena_alloc: std.heap.ArenaAllocator = .init(stack_alloc.get());
defer arena_alloc.deinit();
var thread_alloc: std.heap.ThreadSafeAllocator = .{ .child_allocator = arena_alloc.allocator() };
const alloc = thread_alloc.allocator();
var wg: WaitGroup = .{};
// defer if(!options.ignore_permissions) perms.?.deinit(alloc); We don't need to do this due to ArenaAllocator
var pool: Pool = undefined;
try pool.init(.{ .allocator = alloc, .n_jobs = options.threads - 1 });
defer pool.deinit();
var out_err: ?anyerror = null;
wg.start();
self.extractThread(alloc, archive, path, options, .{ .wg = &wg }, &pool, &out_err);
pool.waitAndWork(&wg);
if (out_err != null) return out_err.?;
var fil = try std.fs.cwd().openFile(path, .{});
defer fil.close();
try self.setMetadata(alloc, archive, fil, options);
},
.file, .ext_file => {
var pool: Pool = undefined;
try pool.init(.{ .allocator = allocator, .n_jobs = options.threads - 1 });
defer pool.deinit();
// Arena Allocator
var stack_alloc = std.heap.stackFallback(1024 * 1024, allocator);
var arena_alloc: std.heap.ArenaAllocator = .init(stack_alloc.get());
defer arena_alloc.deinit();
var thread_alloc: std.heap.ThreadSafeAllocator = .{ .child_allocator = arena_alloc.allocator() };
const alloc = thread_alloc.allocator();
var wg: WaitGroup = .{};
var out_err: ?anyerror = null;
self.extractThread(alloc, archive, path, options, .{ .wg = &wg }, &pool, &out_err);
pool.waitAndWork(&wg);
if (out_err != null) return out_err.?;
var fil = try std.fs.cwd().openFile(path, .{});
defer fil.close();
try self.setMetadata(alloc, archive, fil, options);
},
.symlink, .ext_symlink => try self.extractSymlink(path),
else => try self.extractDevice(allocator, archive, path, options),
}
}
fn extractThreadEntry(
entry: DirEntry,
alloc: std.mem.Allocator,
archive: *Archive,
path: []const u8,
options: ExtractionOptions,
finish: FinishUnion,
pool: *Pool,
out_err: *?anyerror,
) void {
var new_path = alloc.alloc(u8, path.len + entry.name.len + 1) catch |err| {
finish.finish();
out_err.* = err;
return;
};
@memcpy(new_path[0..path.len], path);
@memcpy(new_path[path.len + 1 ..], entry.name);
new_path[path.len] = '/';
var inode = readFromEntry(alloc, archive, entry) catch |err| {
out_err.* = err;
finish.finish();
return;
};
inode.extractThread(alloc, archive, new_path, options, finish, pool, out_err);
}
/// Extract threadedly the inode to the path.
fn extractThread(
self: Inode,
alloc: std.mem.Allocator,
archive: *Archive,
path: []const u8,
options: ExtractionOptions,
finish: FinishUnion,
pool: *Pool,
out_err: *?anyerror,
) void {
if (options.verbose)
options.verbose_writer.?.print("Extracting inode #{} to {s}\n", .{ self.hdr.num, path }) catch {};
defer finish.finish();
if (out_err.* != null) return;
switch (self.hdr.inode_type) {
.dir, .ext_dir => {
_ = std.fs.cwd().makePathStatus(path) catch |err| {
if (options.verbose)
options.verbose_writer.?.print("Error creating {s}: {}\n", .{ path, err }) catch {};
out_err.* = err;
return;
};
const entries = self.dirEntries(alloc, archive.*) catch |err| {
if (options.verbose)
options.verbose_writer.?.print("Error getting directory entries for inode #{} (extracting to {s}): {}\n", .{ self.hdr.num, path, err }) catch {};
out_err.* = err;
return;
};
const fin = InodeFinish.create(
alloc,
self,
path,
archive,
options,
finish,
out_err,
null,
entries.len,
) catch |err| {
if (options.verbose)
options.verbose_writer.?.print("Error allocating memory\n", .{}) catch {};
out_err.* = err;
return;
};
// defer files.deinit(alloc); We don't need to do this due to ArenaAllocator
for (entries) |entry| {
if (entry.inode_type == .dir) {
extractThreadEntry(
entry,
alloc,
archive,
path,
options,
.{ .fin = fin },
pool,
out_err,
);
continue;
}
pool.spawn(
extractThreadEntry,
.{
entry,
alloc,
archive,
path,
options,
FinishUnion{ .fin = fin },
pool,
out_err,
},
) catch |err| {
fin.finish();
if (options.verbose)
options.verbose_writer.?.print("Error starting extraction thread: {}\n", .{err}) catch {};
out_err.* = err;
continue;
};
}
},
.file, .ext_file => {
const fil = std.fs.cwd().createFile(path, .{}) catch |err| {
if (options.verbose)
options.verbose_writer.?.print("Error creating {s}: {}\n", .{ path, err }) catch {};
out_err.* = err;
return;
};
var data = self.threadedDataReader(alloc, archive) catch |err| {
if (options.verbose)
options.verbose_writer.?.print(
"Error creating data reader for inode #{} (extracting to {s}): {}\n",
.{ self.hdr.num, path, err },
) catch {};
out_err.* = err;
return;
};
const fin = InodeFinish.create(
alloc,
self,
path,
archive,
options,
finish,
out_err,
fil,
data.num_blocks,
) catch |err| {
if (options.verbose)
options.verbose_writer.?.print("Error allocating memory\n", .{}) catch {};
out_err.* = err;
return;
};
data.extractThreaded(fil, pool, fin) catch |err| {
if (options.verbose)
options.verbose_writer.?.print("Error spawning threads: {}\n", .{err}) catch {};
out_err.* = err;
return;
};
},
.symlink, .ext_symlink => {
self.extractSymlink(path) catch |err| {
if (options.verbose)
options.verbose_writer.?.print("Error extracting symlink inode #{} to {s}: {}\n", .{ self.hdr.num, path, err }) catch {};
out_err.* = err;
};
},
else => {
self.extractDevice(alloc, archive, path, options) catch |err| {
if (options.verbose)
options.verbose_writer.?.print("Error extracting device/IPC inode #{} to {s}: {}\n", .{ self.hdr.num, path, err }) catch {};
out_err.* = err;
};
},
}
}
/// Creates and writes the inode file contents to the given path.
/// Optionally set owner & permissions.
///
/// Assumes the inode is a file or ext_file type.
fn extractRegFile(self: Inode, alloc: std.mem.Allocator, archive: *Archive, path: []const u8, options: ExtractionOptions) !void {
var fil = try std.fs.cwd().createFile(path, .{ .exclusive = true });
defer fil.close();
var wrt = fil.writer(&[0]u8{});
var dat_rdr = try self.dataReader(alloc, archive);
defer dat_rdr.deinit();
_ = try dat_rdr.interface.streamRemaining(&wrt.interface);
try wrt.interface.flush();
try self.setMetadata(alloc, archive, fil, options);
}
/// Creates the symlink described by the inode.
///
/// Assumes the inode is a symlink or ext_symlink type.
fn extractSymlink(self: Inode, path: []const u8) !void {
const target = switch (self.data) {
.symlink => |s| s.target,
.ext_symlink => |s| s.target,
else => unreachable,
};
try std.fs.cwd().symLink(target, path, .{});
}
/// Creates the device described by the inode.
///
/// Optionally set owner & permissions.
/// Assumes the inode is a char_dev, block_dev, fifo, socket, or their extended counterparts.
fn extractDevice(self: Inode, alloc: std.mem.Allocator, archive: *Archive, path: []const u8, options: ExtractionOptions) !void {
var mode: u32 = undefined;
var dev: u32 = 0;
switch (self.data) {
.char_dev => |d| {
mode = std.posix.S.IFCHR;
dev = d.dev;
},
.ext_char_dev => |d| {
mode = std.posix.S.IFCHR;
dev = d.dev;
},
.block_dev => |d| {
mode = std.posix.S.IFBLK;
dev = d.dev;
},
.ext_block_dev => |d| {
mode = std.posix.S.IFBLK;
dev = d.dev;
},
.fifo, .ext_fifo => mode = std.posix.S.IFIFO,
.socket, .ext_socket => mode = std.posix.S.IFSOCK,
else => unreachable,
}
const res: std.os.linux.E = @enumFromInt(std.os.linux.mknod(@ptrCast(path), mode, dev));
switch (res) {
.SUCCESS => {},
.ACCES => return std.fs.Dir.MakeError.AccessDenied,
.DQUOT => return std.fs.Dir.MakeError.DiskQuota,
.EXIST => return std.fs.Dir.MakeError.PathAlreadyExists,
.FAULT, .NOENT => return std.fs.Dir.MakeError.BadPathName,
.LOOP => return std.fs.Dir.MakeError.SymLinkLoop,
.NAMETOOLONG => return std.fs.Dir.MakeError.NameTooLong,
.NOMEM => return std.fs.Dir.MakeError.SystemResources,
.NOSPC => return std.fs.Dir.MakeError.NoSpaceLeft,
.NOTDIR => return std.fs.Dir.MakeError.NotDir,
.PERM => return std.fs.Dir.MakeError.PermissionDenied,
.ROFS => return std.fs.Dir.MakeError.ReadOnlyFileSystem,
else => return blk: {
std.debug.print("unhandled mknod result: {}\n", .{res});
break :blk std.fs.Dir.MakeError.Unexpected;
},
}
var fil = try std.fs.cwd().openFile(path, .{});
defer fil.close();
try self.setMetadata(alloc, archive, fil, options);
return InodeExtract.extractTo(alloc, self, archive, path, options);
}
+307
View File
@@ -0,0 +1,307 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const Pool = std.Thread.Pool;
const WaitGroup = std.Thread.WaitGroup;
const Archive = @import("../archive.zig");
const DirEntry = @import("../dir_entry.zig");
const Inode = @import("../inode.zig");
const ExtractionOptions = @import("../options.zig");
const InodeFinish = @import("inode_finish.zig");
const FinishUnion = InodeFinish.FinishUnion;
const ThreadedDataReader = @import("data_threaded.zig");
// 1 MB
const STACK_ALLOC_SIZE = 1024 * 1024;
pub fn extractTo(
allocator: Allocator,
inode: Inode,
archive: *Archive,
path: []const u8,
options: ExtractionOptions,
) !void {
if (path[path.len - 1] == '/')
return extractTo(allocator, inode, archive, path[0 .. path.len - 2], options);
var stack_alloc = std.heap.stackFallback(STACK_ALLOC_SIZE, allocator);
var arena: std.heap.ArenaAllocator = .init(stack_alloc.get());
defer arena.deinit();
if (options.threads <= 1)
return extractToSingle(arena.allocator(), inode, archive, path, options);
var thread_alloc = std.heap.ThreadSafeAllocator{ .child_allocator = arena.allocator() };
const alloc = thread_alloc.allocator();
var pool_alloc = std.heap.stackFallback(10 * 1024, alloc);
var pool: Pool = undefined;
try pool.init(.{ .allocator = pool_alloc.get(), .n_jobs = options.threads - 1 });
var wg: WaitGroup = .{};
var err: ?anyerror = null;
extractToMulti(
alloc,
inode,
archive,
path,
options,
&pool,
.{ .wg = &wg },
&err,
);
pool.waitAndWork(&wg);
if (err != null) return err.?;
}
fn extractToSingle(
alloc: Allocator,
inode: Inode,
archive: *Archive,
path: []const u8,
options: ExtractionOptions,
) !void {
switch (inode.hdr.inode_type) {
.dir, .ext_dir => {
_ = std.fs.cwd().makeDir(path) catch |err| switch (err) {
std.fs.Dir.MakeError.PathAlreadyExists => {},
else => return err,
};
// Currently we are ignoring any deinit or free calls since we know we are under an ArenaAllocator.
// Possibly in the future, do some simple math to see if it would be safe to ONLY deinit via Arena,
// otherwise be more conscientious about freeing memory.
// For now, this is good enough.
const entries = try inode.dirEntries(alloc, archive);
for (entries) |ent| {
const sub_inode: Inode = try .readFromEntry(alloc, archive, ent);
const new_path = try std.mem.concat(alloc, u8, []const []const u8{ path, "/", ent.name });
extractToSingle(alloc, sub_inode, archive, new_path, options);
}
const fil = try std.fs.cwd().openFile(path);
defer fil.close();
try inode.setMetadata(alloc, archive, fil, options);
},
.file, .ext_file => {
var fil = try std.fs.cwd().createFile(path, .{ .exclusive = true });
defer fil.close();
var wrt = fil.writer(&[0]u8{});
var dat_rdr = try inode.dataReader(alloc, archive);
defer dat_rdr.deinit();
_ = try dat_rdr.interface.streamRemaining(&wrt.interface);
try wrt.interface.flush();
try inode.setMetadata(alloc, archive, fil, options);
},
.symlink, .ext_symlink => return extractSymlink(inode, path),
else => return extractDeviceAndIPC(inode, alloc, archive, path, options),
}
}
fn extractToMulti(
alloc: Allocator,
inode: Inode,
archive: *Archive,
path: []const u8,
options: ExtractionOptions,
pool: *Pool,
fin: FinishUnion,
err: *?anyerror,
) void {
if (err != null) {
fin.finish();
return;
}
switch (inode.hdr.inode_type) {
.dir, .ext_dir => {
_ = std.fs.cwd().makeDir(path) catch |res_err| switch (res_err) {
std.fs.Dir.MakeError.PathAlreadyExists => {},
else => {
err.* = res_err;
fin.finish();
return;
},
};
// Currently we are ignoring any deinit or free calls since we know we are under an ArenaAllocator.
// Possibly in the future, do some simple math to see if it would be safe to ONLY deinit via Arena,
// otherwise be more conscientious about freeing memory.
// For now, this is good enough.
const entries = try inode.dirEntries(alloc, archive) catch |res_err| {
err.* = res_err;
fin.finish();
return;
};
var dir_fin = InodeFinish.create(
alloc,
inode,
path,
archive,
options,
fin,
err,
null,
entries.len,
) catch |res_err| {
err.* = res_err;
fin.finish();
return;
};
for (entries) |ent| {
if (ent.inode_type == .dir)
extractEntry(alloc, ent, archive, path, options, pool, dir_fin, err);
pool.spawn(extractEntry, .{ alloc, ent, archive, path, options, pool, dir_fin, err }) catch |res_err| {
err.* = res_err;
dir_fin.finish();
return;
};
}
},
.file, .ext_file => {
const fil = std.fs.cwd().createFile(path, .{ .exclusive = true }) catch |res_err| {
err.* = res_err;
fin.finish();
return;
};
var data_rdr = threadedDataReader(inode, alloc, archive) catch |res_err| {
err.* = res_err;
fin.finish();
return;
};
const file_fin = InodeFinish.create(
alloc,
inode,
path,
archive,
options,
fin,
err,
fil,
data_rdr.num_blocks,
) catch |res_err| {
err.* = res_err;
fin.finish();
return;
};
data_rdr.extractThreaded(fil, pool, file_fin);
},
.symlink, .ext_symlink => {
extractSymlink(inode, path) catch |res_err| {
err.* = res_err;
};
fin.finish();
},
else => {
extractDeviceAndIPC(inode, alloc, archive, path, options) catch |res_err| {
err.* = res_err;
};
fin.finish();
},
}
}
inline fn extractEntry(
alloc: Allocator,
ent: DirEntry,
archive: *Archive,
path: []const u8,
options: ExtractionOptions,
pool: *Pool,
fin: FinishUnion,
err: *?anyerror,
) void {
const new_path = std.mem.concat(alloc, u8, []const []const u8{ path, "/", ent.name }) catch |res_err| {
err.* = res_err;
fin.finish();
return;
};
const inode: Inode = .readFromEntry(alloc, archive, ent) catch |res_err| {
err.* = res_err;
fin.finish();
return;
};
extractToMulti(alloc, inode, archive, new_path, options, pool, fin, err);
}
/// Get a threaded data reader for a file inode.
fn threadedDataReader(self: Inode, alloc: std.mem.Allocator, archive: *Archive) !ThreadedDataReader {
return switch (self.hdr.inode_type) {
.file => threadedReaderFromData(alloc, archive, self.data.file),
.ext_file => threadedReaderFromData(alloc, archive, self.data.ext_file),
else => error.NotRegularFile,
};
}
fn threadedReaderFromData(alloc: std.mem.Allocator, archive: *Archive, data: anytype) !ThreadedDataReader {
var out: ThreadedDataReader = .init(alloc, archive.*, data.block_sizes, data.block_start, data.size);
if (data.frag_idx != 0xFFFFFFFF)
out.addFragment(try archive.frag_table.get(data.frag_idx), data.frag_block_offset);
return out;
}
/// Creates the symlink described by the inode.
/// Sets metadata.
fn extractSymlink(self: Inode, path: []const u8) !void {
const target = switch (self.data) {
.symlink => |s| s.target,
.ext_symlink => |s| s.target,
else => unreachable,
};
try std.fs.cwd().symLink(target, path, .{});
}
/// Creates the device described by the inode.
/// Sets metadata.
fn extractDeviceAndIPC(self: Inode, alloc: std.mem.Allocator, archive: *Archive, path: []const u8, options: ExtractionOptions) !void {
var mode: u32 = undefined;
var dev: u32 = 0;
switch (self.data) {
.char_dev => |d| {
mode = std.posix.S.IFCHR;
dev = d.dev;
},
.ext_char_dev => |d| {
mode = std.posix.S.IFCHR;
dev = d.dev;
},
.block_dev => |d| {
mode = std.posix.S.IFBLK;
dev = d.dev;
},
.ext_block_dev => |d| {
mode = std.posix.S.IFBLK;
dev = d.dev;
},
.fifo, .ext_fifo => mode = std.posix.S.IFIFO,
.socket, .ext_socket => mode = std.posix.S.IFSOCK,
else => unreachable,
}
const res: std.os.linux.E = @enumFromInt(std.os.linux.mknod(@ptrCast(path), mode, dev));
switch (res) {
.SUCCESS => {},
.ACCES => return std.fs.Dir.MakeError.AccessDenied,
.DQUOT => return std.fs.Dir.MakeError.DiskQuota,
.EXIST => return std.fs.Dir.MakeError.PathAlreadyExists,
.FAULT, .NOENT => return std.fs.Dir.MakeError.BadPathName,
.LOOP => return std.fs.Dir.MakeError.SymLinkLoop,
.NAMETOOLONG => return std.fs.Dir.MakeError.NameTooLong,
.NOMEM => return std.fs.Dir.MakeError.SystemResources,
.NOSPC => return std.fs.Dir.MakeError.NoSpaceLeft,
.NOTDIR => return std.fs.Dir.MakeError.NotDir,
.PERM => return std.fs.Dir.MakeError.PermissionDenied,
.ROFS => return std.fs.Dir.MakeError.ReadOnlyFileSystem,
else => return blk: {
std.debug.print("unhandled mknod result: {}\n", .{res});
break :blk std.fs.Dir.MakeError.Unexpected;
},
}
var fil = try std.fs.cwd().openFile(path, .{});
defer fil.close();
try self.setMetadata(alloc, archive, fil, options);
}