Finished (?) file extraction

This commit is contained in:
Caleb Gardner
2025-07-18 05:41:27 -05:00
parent b4af1233e5
commit de988f083f
6 changed files with 323 additions and 110 deletions
+1 -1
View File
@@ -14,7 +14,7 @@ verbose: bool = false,
/// Verbose logging writer. If not set, stdout is used.
verbose_logger: ?std.io.AnyWriter = null,
/// Number of threads used during extraction. Defualts to std.Thread.getCpuCount().
thread_count: u32,
thread_count: usize,
pub fn init() !Self {
return .{
+121 -33
View File
@@ -1,4 +1,5 @@
const std = @import("std");
const builtin = @import("builtin");
const dir = @import("directory.zig");
@@ -11,14 +12,14 @@ const DataReader = @import("reader/data.zig").DataReader;
const Compression = @import("superblock.zig").Compression;
const MetadataReader = @import("reader/metadata.zig").MetadataReader;
pub const FileError = error{
NotRegular,
NotDirectory,
NotFound,
};
pub fn File(comptime T: type) type {
return struct {
pub const FileError = error{
NotRegular,
NotDirectory,
NotFound,
};
const Self = @This();
rdr: *SfsReader(T),
@@ -32,10 +33,12 @@ pub fn File(comptime T: type) type {
data_reader: ?DataReader(T) = null,
pub fn init(rdr: *SfsReader(T), inode: Inode, name: []const u8) !Self {
const name_cpy: []u8 = try rdr.alloc.alloc(u8, name.len);
@memcpy(name_cpy, name);
var out = Self{
.rdr = rdr,
.inode = inode,
.name = name,
.name = name_cpy,
};
switch (inode.data) {
.dir => |d| {
@@ -109,6 +112,7 @@ pub fn File(comptime T: type) type {
return .init(rdr, inode, ent.name);
}
pub fn deinit(self: Self) void {
self.rdr.alloc.free(self.name);
self.inode.deinit(self.rdr.alloc);
if (self.entries != null) {
for (self.entries.?) |e| {
@@ -121,6 +125,13 @@ pub fn File(comptime T: type) type {
}
}
pub fn uid(self: Self) !u32 {
return self.rdr.id_table.get(self.inode.hdr.uid_idx);
}
pub fn gid(self: Self) !u32 {
return self.rdr.id_table.get(self.inode.hdr.uid_idx);
}
const Reader = std.io.GenericReader(*DataReader(T), anyerror, DataReader(T).read);
pub fn read(self: *Self, buf: []u8) !usize {
@@ -172,59 +183,136 @@ pub fn File(comptime T: type) type {
}
};
const WaitGroup = std.Thread.WaitGroup;
const Pool = std.Thread.Pool;
pub const ExtractError = error{FileExists};
pub fn extract(self: Self, op: *ExtractionOptions, path: []const u8) !void {
if(op.verbose and op.verbose_logger == null){
if (op.verbose and op.verbose_logger == null) {
op.verbose_logger = std.io.getStdOut().writer().any();
}
var wg: std.Thread.WaitGroup = .{};
var pol: std.Thread.Pool = undefined;
var exists = true;
var stat: ?std.fs.File.Stat = null;
if (std.fs.cwd().statFile(path)) |s| {
stat = s;
} else |err| {
if (err == std.fs.File.OpenError.FileNotFound) {
exists = false;
} else {
return err;
}
}
switch (self.inode.hdr.type) {
.dir, .ext_dir => {
if (exists and stat.?.kind != .directory) {
return ExtractError.FileExists;
} else if (!exists) {
try std.fs.cwd().makeDir(path);
}
},
else => if (exists) return ExtractError.FileExists,
}
var wg: WaitGroup = .{};
var pol: Pool = undefined;
try pol.init(.{
.n_jobs = op.thread_count,
.allocator = self.rdr.alloc,
});
defer pol.deinit();
var errs: std.ArrayList(anyerror) = .init(self.rdr.alloc);
defer errs.deinit();
try self.extractReal(op, &errs, &wg, &pol, path);
wg.wait();
if (errs.items.len > 0) return errs.items[0];
}
fn extractReal(self: Self, op: *ExtractionOptions, path: []const u8) !void{
switch (self.inode.hdr.type) {
.dir, .ext_dir => self.extractDir(path),
.file, .ext_file => self.extractReg(op, path),
.symlink, .ext_symlink => self.extractSymlink(op, path),
fn extractReal(
self: Self,
op: *ExtractionOptions,
errs: *std.ArrayList(anyerror),
wg: *WaitGroup,
pol: *Pool,
path: []const u8,
) !void {
return switch (self.inode.hdr.type) {
.dir, .ext_dir => self.extractDir(op, errs, wg, pol, path),
.file, .ext_file => self.extractReg(op, errs, wg, pol, path),
.symlink, .ext_symlink => self.extractSymlink(op, errs, wg, pol, path),
.block_dev,
.ext_block_dev,
.char_dev,
.ext_char_dev,
.fifo,
.ext_fifo,
=> self.extractDev(path),
=> self.extractDev(op, path),
else => {
if(op.verbose){
std.fmt.format(op.verbose_logger.?, "inode {} \"{}\" is a socket. Ignoring.\n");
return;
if (op.verbose) {
std.fmt.format(
op.verbose_logger.?,
"inode {} \"{s}\" is a socket file. Ignoring.\n",
.{ self.inode.hdr.num, self.name },
) catch {};
}
}
}
},
};
}
fn extractDir(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void {
if (errs.items.len > 0) return;
_ = self;
_ = op;
_ = wg;
_ = pol;
_ = path;
return error{TODO}.TODO;
}
fn extractReg(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void {
if (errs.items.len > 0) return;
const fil = try std.fs.cwd().createFile(path, .{});
@constCast(&self.data_reader.?).setPool(pol);
wg.start();
try self.data_reader.?.writeToNoBlock(errs, fil, wg, extractRegFinish, .{ self, fil });
_ = op;
//TODO: add some way of verbose logging of the errors for this file in particular.
return;
}
fn extractRegFinish(self: Self, fil: std.fs.File) void {
defer fil.close();
//TODO: set owners & permissions. Check if we need to call self.deinit();
_ = self;
}
fn extractSymlink(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void {
if (errs.items.len > 0) return;
_ = self;
_ = op;
_ = wg;
_ = pol;
_ = path;
return error{TODO}.TODO;
}
fn extractDir(self: Self, op: *ExtractionOptions, path: []const u8) !void {}
fn extractReg(self: Self, op: *ExtractionOptions, path: []const u8) !void {}
fn extractSymlink(self: Self, op: *ExtractionOptions, path: []const u8) !void {}
fn extractDev(self: Self, op: *ExtractionOptions, path: []const u8) !void {
if (exists) return ExtractError.FileExists;
comptime if (builtin.os.tag != .linux) {
if(op.ver)
if (comptime builtin.os.tag != .linux) {
if (op.verbose) {
std.fmt.format(
op.verbose_logger.?,
"inode {} \"{s}\" is a device/fifo file and the OS is not Linux. Ignoring.\n",
.{ self.inode.hdr.num, self.name },
) catch {};
}
return;
}
const mode: u32 = switch (self.inode.header.inode_type) {
.block, .ext_block => std.posix.S.IFBLK,
.char, .ext_char => std.posix.S.IFCHR,
const mode: u32 = switch (self.inode.hdr.type) {
.block_dev, .ext_block_dev => std.posix.S.IFBLK,
.char_dev, .ext_char_dev => std.posix.S.IFCHR,
.fifo, .ext_fifo => std.posix.S.IFIFO,
else => unreachable,
};
const dev = switch (self.inode.data) {
.block, .char => |b| b.device,
.ext_block, .ext_char => |b| b.device,
.block_dev, .char_dev => |b| b.device,
.ext_block_dev, .ext_char_dev => |b| b.device,
.fifo, .ext_fifo => 0,
else => unreachable,
};
_ = std.os.linux.mknod(@ptrCast(real_path), mode, dev);
_ = std.os.linux.mknod(@ptrCast(path), mode, dev);
}
};
}
+1 -1
View File
@@ -19,7 +19,7 @@ pub const File = struct {
const frag_idx = std.mem.readInt(u32, fixed[4..8], .little);
const size = std.mem.readInt(u32, fixed[12..16], .little);
var blocks: u32 = size / block_size;
if (size % block_size > 0 and frag_idx != 0xffffffff) {
if (size % block_size > 0 and frag_idx == 0xffffffff) {
blocks += 1;
}
const block_sizes = try alloc.alloc(BlockSize, blocks);
+12 -2
View File
@@ -6,6 +6,7 @@ const Table = @import("table.zig").Table;
const PRead = @import("reader/p_read.zig").PRead;
const FragEntry = @import("fragment.zig").FragEntry;
const Superblock = @import("superblock.zig").Superblock;
const ExtractionOptions = @import("extract_options.zig");
const MetadataReader = @import("reader/metadata.zig").MetadataReader;
pub const SfsError = error{
@@ -47,17 +48,26 @@ pub fn SfsReader(comptime T: type) type {
self.export_table.deinit();
}
/// A representation of the archives root folder.
pub fn root(self: *Self) !File(T) {
return .initFromRef(self, self.super.root_ref, "");
}
/// Get the file at path. Equivelent to calling open on the root File.
pub fn open(self: *Self, path: []const u8) !File(T) {
var rt = try self.root();
if (path.len == 0 or (path.len == 1 and path[0] == '/')) return rt;
if (path.len == 0 or (path.len == 1 and path[0] == '/') or path.len == 1 and path[0] == '.') return rt;
defer rt.deinit();
return rt.open(path);
}
/// Extract the entire archive to the given path & with the given options.
/// Equivelent to calling extract on the root File.
pub fn extract(self: *Self, op: *ExtractionOptions, path: []const u8) !void {
var rt = try self.root();
defer rt.deinit();
return rt.extract(op, path);
}
/// Returns the inode with the given Inode Number.
/// Returns the Inode with the given Inode Number.
/// Requires the archive to have an export table.
pub fn inodeAt(self: Self, num: u32) !Inode {
if (!self.super.flags.has_export) return SfsError.NotExportable;
+172 -58
View File
@@ -8,6 +8,7 @@ const Compression = @import("../superblock.zig").Compression;
const DataReaderError = error{
EOF,
ThreadPoolNotSet,
InvalidIndex,
};
pub fn DataReader(comptime T: type) type {
@@ -85,13 +86,14 @@ pub fn DataReader(comptime T: type) type {
self.pool = pool;
}
fn blockAt(self: Self, idx: u32) ![]u8 {
fn blockAt(self: Self, idx: usize) ![]u8 {
if (self.frag.len > 0 and idx == self.sizes.len) return self.frag;
if (idx >= self.sizes.len) return DataReaderError.InvalidIndex;
const size = if (idx == self.sizes.len - 1 and self.frag.len == 0) {
self.file_size % self.block_size;
} else {
self.block_size;
const size = blk: {
if (idx == self.sizes.len - 1 and self.frag.len == 0) {
break :blk self.file_size % self.block_size;
}
break :blk self.block_size;
};
const block = try self.alloc.alloc(u8, size);
errdefer self.alloc.free(block);
@@ -143,56 +145,91 @@ pub fn DataReader(comptime T: type) type {
return .{ .context = self };
}
/// Write the entire file's contents to the writer.
/// Write the entire file's contents to the writer using multiple threads.
/// If availble, pwrite will be used.
pub fn writeTo(self: Self, writer: anytype) !usize {
if (comptime self.pool == null) return DataReaderError.ThreadPoolNotSet;
const mut: std.Thread.Mutex = .{};
if (self.pool == null) return DataReaderError.ThreadPoolNotSet;
var mut: std.Thread.Mutex = .{};
var cur_idx: usize = 0;
const wg: std.Thread.WaitGroup = .{};
const completed = comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) {
std.ArrayList(anyerror).init(self.alloc);
} else {
std.AutoArrayHashMap(usize, anyerror![]u8).init(self.alloc);
};
var wg: std.Thread.WaitGroup = .{};
var completed: std.AutoArrayHashMap(usize, anyerror![]u8) = .init(self.alloc);
defer completed.deinit();
var errs: std.ArrayList(anyerror) = .init(self.alloc);
defer errs.deinit();
for (0..self.numBlocks()) |i| {
wg.start();
self.pool.?.spawn(
comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) {
extractThreadedPWrite;
} else {
extractThreaded;
comptime blk: {
if (std.meta.hasFn(@TypeOf(writer), "pwrite")) {
break :blk writeToThreadPWrite;
}
break :blk writeToThread;
},
comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) {
.{ self, &wg, &completed, i, writer };
} else {
.{ self, &mut, &cur_idx, &wg, &completed, i, writer };
blk: {
if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) {
break :blk .{ self, &wg, &errs, i, writer };
}
break :blk .{ self, &wg, &mut, &cur_idx, &completed, i, writer };
},
);
}
std.Thread.yield();
wg.wait();
if (completed.items.len > 0) {
return completed.items.get(0);
}
if (errs.items.len > 0) return errs.items[0];
return self.file_size;
}
pub fn writeToThreaded(self: Self, errs: *std.ArrayList(anyerror), wg: *std.Thread.WaitGroup, writer: anytype) void {}
fn extractThreaded(
/// Similiar to writeTo, but does not block until finished.
/// When all blocks have been written, on_finish and wg.finish() (in that order) will be called.
/// NOTE: wg.start() is not called;
pub fn writeToNoBlock(
self: Self,
errs: *std.ArrayList(anyerror),
writer: anytype,
wg: *std.Thread.WaitGroup,
comptime on_finish: anytype,
on_finish_args: anytype,
) !void {
if (self.pool == null) return DataReaderError.ThreadPoolNotSet;
const mut: std.Thread.Mutex = .{};
var cur_idx: usize = 0;
var block_wg: std.Thread.WaitGroup = .{};
var finish_mut: std.Thread.Mutex = .{};
var completed: ?std.AutoHashMap(usize, []u8) = null;
if (!comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) {
completed = std.AutoHashMap(usize, []u8).init(self.alloc);
}
block_wg.startMany(self.numBlocks());
for (0..self.numBlocks()) |i| {
try self.pool.?.spawn(
comptime blk: {
if (std.meta.hasFn(@TypeOf(writer), "pwrite")) {
break :blk noBlockThreadPWrite;
}
break :blk noBlockThread;
},
blk: {
if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) {
break :blk .{ self, &block_wg, errs, i, writer, wg, &finish_mut, on_finish, on_finish_args };
} else {
break :blk .{ self, &block_wg, &mut, &cur_idx, errs, &completed.?, i, writer, wg, &finish_mut, on_finish, on_finish_args };
}
},
);
}
}
fn writeBlockTo(
self: Self,
mut: *std.Thread.Mutex,
cur_idx: *usize,
wg: *std.Thread.WaitGroup,
completed: *std.AutoArrayHashMap(usize, anyerror![]u8),
errs: *std.ArrayList(anyerror),
completed: *std.AutoHashMap(usize, []u8),
idx: usize,
writer: anytype,
) void {
if (cur_idx.* >= self.sizes.len + 1) return;
defer wg.finish();
) !void {
//TODO: We can marginally reduce memory usage if we don't store sparse blocks in completed.
if (errs.items.len > 0) return; // Indicates an error has occured in another thread.
const block = self.blockAt(idx) catch |err| {
cur_idx.* = self.sizes.len + 1;
completed.put(idx, err) catch {};
errs.append(err) catch {};
return;
};
defer if (idx < self.sizes.len) {
@@ -202,14 +239,12 @@ pub fn DataReader(comptime T: type) type {
defer mut.unlock();
if (cur_idx.* == idx) {
_ = writer.write(block) catch |err| {
cur_idx.* = self.sizes.len + 1;
completed.put(idx, err) catch {};
errs.append(err) catch {};
return;
};
} else {
completed.put(idx, block) catch |err| {
cur_idx.* = self.sizes.len + 1;
completed.put(idx, err) catch {};
errs.append(err) catch {};
return;
};
}
@@ -217,35 +252,114 @@ pub fn DataReader(comptime T: type) type {
for (cur_idx.*..self.numBlocks()) |i| {
const val = completed.get(i);
if (val == null) return;
_ = writer.write(block) catch |err| {
cur_idx.* = self.sizes.len + 1;
completed.put(i, err) catch {};
_ = writer.write(val.?) catch |err| {
errs.append(err) catch {};
return;
};
_ = completed.remove(i);
cur_idx.* += 1;
if (completed.count() == 0) return;
}
}
fn extractThreadedPWrite(
fn writeBlockToPWrite(
self: Self,
wg: *std.Thread.WaitGroup,
completed: *std.ArrayList(anyerror),
errs: *std.ArrayList(anyerror),
idx: usize,
writer: anytype,
) void {
if (completed.items.len > 0) return;
defer wg.finish();
const block = self.blockAt(idx) catch |err| {
completed.append(err) catch {};
return;
};
defer if (idx < self.sizes.len) {
self.alloc.free(block);
};
_ = writer.pwrite(idx * self.block_size, block) catch |err| {
completed.append(err) catch {};
return;
};
if (errs.items.len > 0) return;
if (self.sizes[idx].size == 0) {
var pos = idx * self.block_size;
if (self.frag.len == 0 and idx == self.sizes.len - 1) {
pos += self.file_size % self.block_size;
} else {
pos += self.block_size;
}
_ = writer.pwrite(&[1]u8{0}, pos - 1) catch |err| {
errs.append(err) catch {};
};
} else {
const block = self.blockAt(idx) catch |err| {
errs.append(err) catch {};
return;
};
defer if (idx < self.sizes.len) {
self.alloc.free(block);
};
_ = writer.pwrite(block, idx * self.block_size) catch |err| {
errs.append(err) catch {};
return;
};
}
}
fn writeToThread(
self: Self,
wg: *std.Thread.WaitGroup,
mut: *std.Thread.Mutex,
cur_idx: *usize,
errs: *std.ArrayList(anyerror),
completed: *std.AutoArrayHashMap(usize, anyerror![]u8),
idx: usize,
writer: anytype,
) void {
self.writeBlockTo(mut, cur_idx, errs, completed, idx, writer);
wg.finish();
}
fn writeToThreadPWrite(
self: Self,
wg: *std.Thread.WaitGroup,
errs: *std.ArrayList(anyerror),
idx: usize,
writer: anytype,
) void {
self.writeBlockToPWrite(errs, idx, writer);
wg.finish();
}
fn noBlockThread(
self: Self,
block_wg: *std.Thread.WaitGroup,
mut: *std.Thread.Mutex,
cur_idx: *usize,
errs: *std.ArrayList(anyerror),
completed: *std.AutoArrayHashMap(usize, anyerror![]u8),
idx: usize,
writer: anytype,
finish_wg: *std.Thread.WaitGroup,
finish_mut: *std.Thread.Mutex,
comptime on_finish: anytype,
on_finish_args: anytype,
) void {
self.writeBlockTo(mut, cur_idx, errs, completed, idx, writer);
block_wg.finish();
finish_mut.lock();
defer finish_mut.unlock();
if (block_wg.isDone()) {
@call(.auto, on_finish, on_finish_args);
finish_wg.finish();
completed.deinit();
}
}
fn noBlockThreadPWrite(
self: Self,
block_wg: *std.Thread.WaitGroup,
errs: *std.ArrayList(anyerror),
idx: usize,
writer: anytype,
finish_wg: *std.Thread.WaitGroup,
finish_mut: *std.Thread.Mutex,
comptime on_finish: anytype,
on_finish_args: anytype,
) void {
self.writeBlockToPWrite(errs, idx, writer);
block_wg.finish();
finish_mut.lock();
defer finish_mut.unlock();
if (block_wg.isDone()) {
@call(.auto, on_finish, on_finish_args);
finish_wg.finish();
}
}
};
}
+16 -15
View File
@@ -5,12 +5,14 @@ pub const ExtractionOptions = @import("extract_options.zig");
pub const SfsFile = SfsReader(std.fs.File);
const test_file = "testing/LinuxPATest.sfs";
const test_archive = "testing/LinuxPATest.sfs";
const test_file = "Start.exe";
const file_extr_loc = "testing/Start.exe";
test "OpenFile" {
const fil = try std.fs.cwd().openFile(test_file, .{});
defer fil.close();
var rdr: SfsFile = try .init(std.testing.allocator, fil, 0);
const sfs_fil = try std.fs.cwd().openFile(test_archive, .{});
defer sfs_fil.close();
var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0);
defer rdr.deinit();
std.debug.print("{}\n", .{rdr.super});
const root = try rdr.root();
@@ -20,18 +22,17 @@ test "OpenFile" {
defer f.deinit();
std.debug.print("{s}\n", .{f.name});
}
var start = try root.open("Start.exe");
defer start.deinit();
const startReader = try start.reader();
_ = startReader;
}
test "ReadFile" {
const fil = try std.fs.cwd().openFile(test_file, .{});
defer fil.close();
var rdr: SfsFile = try .init(std.testing.allocator, fil, 0);
test "ExtractFile" {
std.fs.cwd().deleteFile(file_extr_loc) catch {};
const sfs_fil = try std.fs.cwd().openFile(test_archive, .{});
defer sfs_fil.close();
var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0);
defer rdr.deinit();
std.debug.print("{}\n", .{rdr.super});
const root = try rdr.root();
defer root.deinit();
const fil = try rdr.open(test_file);
defer fil.deinit();
var op: ExtractionOptions = try .init();
op.verbose = true;
try fil.extract(&op, file_extr_loc);
}