diff --git a/src/extract_options.zig b/src/extract_options.zig index 604d9cf..36b2232 100644 --- a/src/extract_options.zig +++ b/src/extract_options.zig @@ -14,7 +14,7 @@ verbose: bool = false, /// Verbose logging writer. If not set, stdout is used. verbose_logger: ?std.io.AnyWriter = null, /// Number of threads used during extraction. Defualts to std.Thread.getCpuCount(). -thread_count: u32, +thread_count: usize, pub fn init() !Self { return .{ diff --git a/src/file.zig b/src/file.zig index dc465af..bb10126 100644 --- a/src/file.zig +++ b/src/file.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const builtin = @import("builtin"); const dir = @import("directory.zig"); @@ -11,14 +12,14 @@ const DataReader = @import("reader/data.zig").DataReader; const Compression = @import("superblock.zig").Compression; const MetadataReader = @import("reader/metadata.zig").MetadataReader; -pub const FileError = error{ - NotRegular, - NotDirectory, - NotFound, -}; - pub fn File(comptime T: type) type { return struct { + pub const FileError = error{ + NotRegular, + NotDirectory, + NotFound, + }; + const Self = @This(); rdr: *SfsReader(T), @@ -32,10 +33,12 @@ pub fn File(comptime T: type) type { data_reader: ?DataReader(T) = null, pub fn init(rdr: *SfsReader(T), inode: Inode, name: []const u8) !Self { + const name_cpy: []u8 = try rdr.alloc.alloc(u8, name.len); + @memcpy(name_cpy, name); var out = Self{ .rdr = rdr, .inode = inode, - .name = name, + .name = name_cpy, }; switch (inode.data) { .dir => |d| { @@ -109,6 +112,7 @@ pub fn File(comptime T: type) type { return .init(rdr, inode, ent.name); } pub fn deinit(self: Self) void { + self.rdr.alloc.free(self.name); self.inode.deinit(self.rdr.alloc); if (self.entries != null) { for (self.entries.?) |e| { @@ -121,6 +125,13 @@ pub fn File(comptime T: type) type { } } + pub fn uid(self: Self) !u32 { + return self.rdr.id_table.get(self.inode.hdr.uid_idx); + } + pub fn gid(self: Self) !u32 { + return self.rdr.id_table.get(self.inode.hdr.uid_idx); + } + const Reader = std.io.GenericReader(*DataReader(T), anyerror, DataReader(T).read); pub fn read(self: *Self, buf: []u8) !usize { @@ -172,59 +183,136 @@ pub fn File(comptime T: type) type { } }; + const WaitGroup = std.Thread.WaitGroup; + const Pool = std.Thread.Pool; + + pub const ExtractError = error{FileExists}; + pub fn extract(self: Self, op: *ExtractionOptions, path: []const u8) !void { - if(op.verbose and op.verbose_logger == null){ + if (op.verbose and op.verbose_logger == null) { op.verbose_logger = std.io.getStdOut().writer().any(); } - var wg: std.Thread.WaitGroup = .{}; - var pol: std.Thread.Pool = undefined; + var exists = true; + var stat: ?std.fs.File.Stat = null; + if (std.fs.cwd().statFile(path)) |s| { + stat = s; + } else |err| { + if (err == std.fs.File.OpenError.FileNotFound) { + exists = false; + } else { + return err; + } + } + switch (self.inode.hdr.type) { + .dir, .ext_dir => { + if (exists and stat.?.kind != .directory) { + return ExtractError.FileExists; + } else if (!exists) { + try std.fs.cwd().makeDir(path); + } + }, + else => if (exists) return ExtractError.FileExists, + } + var wg: WaitGroup = .{}; + var pol: Pool = undefined; try pol.init(.{ .n_jobs = op.thread_count, .allocator = self.rdr.alloc, }); + defer pol.deinit(); + var errs: std.ArrayList(anyerror) = .init(self.rdr.alloc); + defer errs.deinit(); + try self.extractReal(op, &errs, &wg, &pol, path); + wg.wait(); + if (errs.items.len > 0) return errs.items[0]; } - fn extractReal(self: Self, op: *ExtractionOptions, path: []const u8) !void{ - switch (self.inode.hdr.type) { - .dir, .ext_dir => self.extractDir(path), - .file, .ext_file => self.extractReg(op, path), - .symlink, .ext_symlink => self.extractSymlink(op, path), + fn extractReal( + self: Self, + op: *ExtractionOptions, + errs: *std.ArrayList(anyerror), + wg: *WaitGroup, + pol: *Pool, + path: []const u8, + ) !void { + return switch (self.inode.hdr.type) { + .dir, .ext_dir => self.extractDir(op, errs, wg, pol, path), + .file, .ext_file => self.extractReg(op, errs, wg, pol, path), + .symlink, .ext_symlink => self.extractSymlink(op, errs, wg, pol, path), .block_dev, .ext_block_dev, .char_dev, .ext_char_dev, .fifo, .ext_fifo, - => self.extractDev(path), + => self.extractDev(op, path), else => { - if(op.verbose){ - std.fmt.format(op.verbose_logger.?, "inode {} \"{}\" is a socket. Ignoring.\n"); - return; + if (op.verbose) { + std.fmt.format( + op.verbose_logger.?, + "inode {} \"{s}\" is a socket file. Ignoring.\n", + .{ self.inode.hdr.num, self.name }, + ) catch {}; } - } - } + }, + }; + } + fn extractDir(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + if (errs.items.len > 0) return; + _ = self; + _ = op; + _ = wg; + _ = pol; + _ = path; + return error{TODO}.TODO; + } + fn extractReg(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + if (errs.items.len > 0) return; + const fil = try std.fs.cwd().createFile(path, .{}); + @constCast(&self.data_reader.?).setPool(pol); + wg.start(); + try self.data_reader.?.writeToNoBlock(errs, fil, wg, extractRegFinish, .{ self, fil }); + _ = op; + //TODO: add some way of verbose logging of the errors for this file in particular. + return; + } + fn extractRegFinish(self: Self, fil: std.fs.File) void { + defer fil.close(); + //TODO: set owners & permissions. Check if we need to call self.deinit(); + _ = self; + } + fn extractSymlink(self: Self, op: *ExtractionOptions, errs: *std.ArrayList(anyerror), wg: *WaitGroup, pol: *Pool, path: []const u8) !void { + if (errs.items.len > 0) return; + _ = self; + _ = op; + _ = wg; + _ = pol; + _ = path; + return error{TODO}.TODO; } - fn extractDir(self: Self, op: *ExtractionOptions, path: []const u8) !void {} - fn extractReg(self: Self, op: *ExtractionOptions, path: []const u8) !void {} - fn extractSymlink(self: Self, op: *ExtractionOptions, path: []const u8) !void {} fn extractDev(self: Self, op: *ExtractionOptions, path: []const u8) !void { - if (exists) return ExtractError.FileExists; - comptime if (builtin.os.tag != .linux) { - if(op.ver) + if (comptime builtin.os.tag != .linux) { + if (op.verbose) { + std.fmt.format( + op.verbose_logger.?, + "inode {} \"{s}\" is a device/fifo file and the OS is not Linux. Ignoring.\n", + .{ self.inode.hdr.num, self.name }, + ) catch {}; + } return; } - const mode: u32 = switch (self.inode.header.inode_type) { - .block, .ext_block => std.posix.S.IFBLK, - .char, .ext_char => std.posix.S.IFCHR, + const mode: u32 = switch (self.inode.hdr.type) { + .block_dev, .ext_block_dev => std.posix.S.IFBLK, + .char_dev, .ext_char_dev => std.posix.S.IFCHR, .fifo, .ext_fifo => std.posix.S.IFIFO, else => unreachable, }; const dev = switch (self.inode.data) { - .block, .char => |b| b.device, - .ext_block, .ext_char => |b| b.device, + .block_dev, .char_dev => |b| b.device, + .ext_block_dev, .ext_char_dev => |b| b.device, .fifo, .ext_fifo => 0, else => unreachable, }; - _ = std.os.linux.mknod(@ptrCast(real_path), mode, dev); + _ = std.os.linux.mknod(@ptrCast(path), mode, dev); } }; } diff --git a/src/inode/file.zig b/src/inode/file.zig index fb6b659..86af63b 100644 --- a/src/inode/file.zig +++ b/src/inode/file.zig @@ -19,7 +19,7 @@ pub const File = struct { const frag_idx = std.mem.readInt(u32, fixed[4..8], .little); const size = std.mem.readInt(u32, fixed[12..16], .little); var blocks: u32 = size / block_size; - if (size % block_size > 0 and frag_idx != 0xffffffff) { + if (size % block_size > 0 and frag_idx == 0xffffffff) { blocks += 1; } const block_sizes = try alloc.alloc(BlockSize, blocks); diff --git a/src/reader.zig b/src/reader.zig index 4c20de0..b0f69e2 100644 --- a/src/reader.zig +++ b/src/reader.zig @@ -6,6 +6,7 @@ const Table = @import("table.zig").Table; const PRead = @import("reader/p_read.zig").PRead; const FragEntry = @import("fragment.zig").FragEntry; const Superblock = @import("superblock.zig").Superblock; +const ExtractionOptions = @import("extract_options.zig"); const MetadataReader = @import("reader/metadata.zig").MetadataReader; pub const SfsError = error{ @@ -47,17 +48,26 @@ pub fn SfsReader(comptime T: type) type { self.export_table.deinit(); } + /// A representation of the archives root folder. pub fn root(self: *Self) !File(T) { return .initFromRef(self, self.super.root_ref, ""); } + /// Get the file at path. Equivelent to calling open on the root File. pub fn open(self: *Self, path: []const u8) !File(T) { var rt = try self.root(); - if (path.len == 0 or (path.len == 1 and path[0] == '/')) return rt; + if (path.len == 0 or (path.len == 1 and path[0] == '/') or path.len == 1 and path[0] == '.') return rt; defer rt.deinit(); return rt.open(path); } + /// Extract the entire archive to the given path & with the given options. + /// Equivelent to calling extract on the root File. + pub fn extract(self: *Self, op: *ExtractionOptions, path: []const u8) !void { + var rt = try self.root(); + defer rt.deinit(); + return rt.extract(op, path); + } - /// Returns the inode with the given Inode Number. + /// Returns the Inode with the given Inode Number. /// Requires the archive to have an export table. pub fn inodeAt(self: Self, num: u32) !Inode { if (!self.super.flags.has_export) return SfsError.NotExportable; diff --git a/src/reader/data.zig b/src/reader/data.zig index 2541ace..2cffe48 100644 --- a/src/reader/data.zig +++ b/src/reader/data.zig @@ -8,6 +8,7 @@ const Compression = @import("../superblock.zig").Compression; const DataReaderError = error{ EOF, ThreadPoolNotSet, + InvalidIndex, }; pub fn DataReader(comptime T: type) type { @@ -85,13 +86,14 @@ pub fn DataReader(comptime T: type) type { self.pool = pool; } - fn blockAt(self: Self, idx: u32) ![]u8 { + fn blockAt(self: Self, idx: usize) ![]u8 { if (self.frag.len > 0 and idx == self.sizes.len) return self.frag; if (idx >= self.sizes.len) return DataReaderError.InvalidIndex; - const size = if (idx == self.sizes.len - 1 and self.frag.len == 0) { - self.file_size % self.block_size; - } else { - self.block_size; + const size = blk: { + if (idx == self.sizes.len - 1 and self.frag.len == 0) { + break :blk self.file_size % self.block_size; + } + break :blk self.block_size; }; const block = try self.alloc.alloc(u8, size); errdefer self.alloc.free(block); @@ -143,56 +145,91 @@ pub fn DataReader(comptime T: type) type { return .{ .context = self }; } - /// Write the entire file's contents to the writer. + /// Write the entire file's contents to the writer using multiple threads. /// If availble, pwrite will be used. pub fn writeTo(self: Self, writer: anytype) !usize { - if (comptime self.pool == null) return DataReaderError.ThreadPoolNotSet; - const mut: std.Thread.Mutex = .{}; + if (self.pool == null) return DataReaderError.ThreadPoolNotSet; + var mut: std.Thread.Mutex = .{}; var cur_idx: usize = 0; - const wg: std.Thread.WaitGroup = .{}; - const completed = comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { - std.ArrayList(anyerror).init(self.alloc); - } else { - std.AutoArrayHashMap(usize, anyerror![]u8).init(self.alloc); - }; + var wg: std.Thread.WaitGroup = .{}; + var completed: std.AutoArrayHashMap(usize, anyerror![]u8) = .init(self.alloc); defer completed.deinit(); + var errs: std.ArrayList(anyerror) = .init(self.alloc); + defer errs.deinit(); for (0..self.numBlocks()) |i| { wg.start(); self.pool.?.spawn( - comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { - extractThreadedPWrite; - } else { - extractThreaded; + comptime blk: { + if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { + break :blk writeToThreadPWrite; + } + break :blk writeToThread; }, - comptime if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { - .{ self, &wg, &completed, i, writer }; - } else { - .{ self, &mut, &cur_idx, &wg, &completed, i, writer }; + blk: { + if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { + break :blk .{ self, &wg, &errs, i, writer }; + } + break :blk .{ self, &wg, &mut, &cur_idx, &completed, i, writer }; }, ); } - std.Thread.yield(); wg.wait(); - if (completed.items.len > 0) { - return completed.items.get(0); - } + if (errs.items.len > 0) return errs.items[0]; return self.file_size; } - pub fn writeToThreaded(self: Self, errs: *std.ArrayList(anyerror), wg: *std.Thread.WaitGroup, writer: anytype) void {} - fn extractThreaded( + /// Similiar to writeTo, but does not block until finished. + /// When all blocks have been written, on_finish and wg.finish() (in that order) will be called. + /// NOTE: wg.start() is not called; + pub fn writeToNoBlock( + self: Self, + errs: *std.ArrayList(anyerror), + writer: anytype, + wg: *std.Thread.WaitGroup, + comptime on_finish: anytype, + on_finish_args: anytype, + ) !void { + if (self.pool == null) return DataReaderError.ThreadPoolNotSet; + const mut: std.Thread.Mutex = .{}; + var cur_idx: usize = 0; + var block_wg: std.Thread.WaitGroup = .{}; + var finish_mut: std.Thread.Mutex = .{}; + var completed: ?std.AutoHashMap(usize, []u8) = null; + if (!comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { + completed = std.AutoHashMap(usize, []u8).init(self.alloc); + } + block_wg.startMany(self.numBlocks()); + for (0..self.numBlocks()) |i| { + try self.pool.?.spawn( + comptime blk: { + if (std.meta.hasFn(@TypeOf(writer), "pwrite")) { + break :blk noBlockThreadPWrite; + } + break :blk noBlockThread; + }, + blk: { + if (comptime std.meta.hasFn(@TypeOf(writer), "pwrite")) { + break :blk .{ self, &block_wg, errs, i, writer, wg, &finish_mut, on_finish, on_finish_args }; + } else { + break :blk .{ self, &block_wg, &mut, &cur_idx, errs, &completed.?, i, writer, wg, &finish_mut, on_finish, on_finish_args }; + } + }, + ); + } + } + + fn writeBlockTo( self: Self, mut: *std.Thread.Mutex, cur_idx: *usize, - wg: *std.Thread.WaitGroup, - completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + errs: *std.ArrayList(anyerror), + completed: *std.AutoHashMap(usize, []u8), idx: usize, writer: anytype, - ) void { - if (cur_idx.* >= self.sizes.len + 1) return; - defer wg.finish(); + ) !void { + //TODO: We can marginally reduce memory usage if we don't store sparse blocks in completed. + if (errs.items.len > 0) return; // Indicates an error has occured in another thread. const block = self.blockAt(idx) catch |err| { - cur_idx.* = self.sizes.len + 1; - completed.put(idx, err) catch {}; + errs.append(err) catch {}; return; }; defer if (idx < self.sizes.len) { @@ -202,14 +239,12 @@ pub fn DataReader(comptime T: type) type { defer mut.unlock(); if (cur_idx.* == idx) { _ = writer.write(block) catch |err| { - cur_idx.* = self.sizes.len + 1; - completed.put(idx, err) catch {}; + errs.append(err) catch {}; return; }; } else { completed.put(idx, block) catch |err| { - cur_idx.* = self.sizes.len + 1; - completed.put(idx, err) catch {}; + errs.append(err) catch {}; return; }; } @@ -217,35 +252,114 @@ pub fn DataReader(comptime T: type) type { for (cur_idx.*..self.numBlocks()) |i| { const val = completed.get(i); if (val == null) return; - _ = writer.write(block) catch |err| { - cur_idx.* = self.sizes.len + 1; - completed.put(i, err) catch {}; + _ = writer.write(val.?) catch |err| { + errs.append(err) catch {}; return; }; + _ = completed.remove(i); cur_idx.* += 1; if (completed.count() == 0) return; } } - fn extractThreadedPWrite( + fn writeBlockToPWrite( self: Self, - wg: *std.Thread.WaitGroup, - completed: *std.ArrayList(anyerror), + errs: *std.ArrayList(anyerror), idx: usize, writer: anytype, ) void { - if (completed.items.len > 0) return; - defer wg.finish(); - const block = self.blockAt(idx) catch |err| { - completed.append(err) catch {}; - return; - }; - defer if (idx < self.sizes.len) { - self.alloc.free(block); - }; - _ = writer.pwrite(idx * self.block_size, block) catch |err| { - completed.append(err) catch {}; - return; - }; + if (errs.items.len > 0) return; + if (self.sizes[idx].size == 0) { + var pos = idx * self.block_size; + if (self.frag.len == 0 and idx == self.sizes.len - 1) { + pos += self.file_size % self.block_size; + } else { + pos += self.block_size; + } + _ = writer.pwrite(&[1]u8{0}, pos - 1) catch |err| { + errs.append(err) catch {}; + }; + } else { + const block = self.blockAt(idx) catch |err| { + errs.append(err) catch {}; + return; + }; + defer if (idx < self.sizes.len) { + self.alloc.free(block); + }; + _ = writer.pwrite(block, idx * self.block_size) catch |err| { + errs.append(err) catch {}; + return; + }; + } + } + + fn writeToThread( + self: Self, + wg: *std.Thread.WaitGroup, + mut: *std.Thread.Mutex, + cur_idx: *usize, + errs: *std.ArrayList(anyerror), + completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + idx: usize, + writer: anytype, + ) void { + self.writeBlockTo(mut, cur_idx, errs, completed, idx, writer); + wg.finish(); + } + fn writeToThreadPWrite( + self: Self, + wg: *std.Thread.WaitGroup, + errs: *std.ArrayList(anyerror), + idx: usize, + writer: anytype, + ) void { + self.writeBlockToPWrite(errs, idx, writer); + wg.finish(); + } + + fn noBlockThread( + self: Self, + block_wg: *std.Thread.WaitGroup, + mut: *std.Thread.Mutex, + cur_idx: *usize, + errs: *std.ArrayList(anyerror), + completed: *std.AutoArrayHashMap(usize, anyerror![]u8), + idx: usize, + writer: anytype, + finish_wg: *std.Thread.WaitGroup, + finish_mut: *std.Thread.Mutex, + comptime on_finish: anytype, + on_finish_args: anytype, + ) void { + self.writeBlockTo(mut, cur_idx, errs, completed, idx, writer); + block_wg.finish(); + finish_mut.lock(); + defer finish_mut.unlock(); + if (block_wg.isDone()) { + @call(.auto, on_finish, on_finish_args); + finish_wg.finish(); + completed.deinit(); + } + } + fn noBlockThreadPWrite( + self: Self, + block_wg: *std.Thread.WaitGroup, + errs: *std.ArrayList(anyerror), + idx: usize, + writer: anytype, + finish_wg: *std.Thread.WaitGroup, + finish_mut: *std.Thread.Mutex, + comptime on_finish: anytype, + on_finish_args: anytype, + ) void { + self.writeBlockToPWrite(errs, idx, writer); + block_wg.finish(); + finish_mut.lock(); + defer finish_mut.unlock(); + if (block_wg.isDone()) { + @call(.auto, on_finish, on_finish_args); + finish_wg.finish(); + } } }; } diff --git a/src/root.zig b/src/root.zig index c65635e..a7fd31e 100644 --- a/src/root.zig +++ b/src/root.zig @@ -5,12 +5,14 @@ pub const ExtractionOptions = @import("extract_options.zig"); pub const SfsFile = SfsReader(std.fs.File); -const test_file = "testing/LinuxPATest.sfs"; +const test_archive = "testing/LinuxPATest.sfs"; +const test_file = "Start.exe"; +const file_extr_loc = "testing/Start.exe"; test "OpenFile" { - const fil = try std.fs.cwd().openFile(test_file, .{}); - defer fil.close(); - var rdr: SfsFile = try .init(std.testing.allocator, fil, 0); + const sfs_fil = try std.fs.cwd().openFile(test_archive, .{}); + defer sfs_fil.close(); + var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0); defer rdr.deinit(); std.debug.print("{}\n", .{rdr.super}); const root = try rdr.root(); @@ -20,18 +22,17 @@ test "OpenFile" { defer f.deinit(); std.debug.print("{s}\n", .{f.name}); } - var start = try root.open("Start.exe"); - defer start.deinit(); - const startReader = try start.reader(); - _ = startReader; } -test "ReadFile" { - const fil = try std.fs.cwd().openFile(test_file, .{}); - defer fil.close(); - var rdr: SfsFile = try .init(std.testing.allocator, fil, 0); +test "ExtractFile" { + std.fs.cwd().deleteFile(file_extr_loc) catch {}; + const sfs_fil = try std.fs.cwd().openFile(test_archive, .{}); + defer sfs_fil.close(); + var rdr: SfsFile = try .init(std.testing.allocator, sfs_fil, 0); defer rdr.deinit(); - std.debug.print("{}\n", .{rdr.super}); - const root = try rdr.root(); - defer root.deinit(); + const fil = try rdr.open(test_file); + defer fil.deinit(); + var op: ExtractionOptions = try .init(); + op.verbose = true; + try fil.extract(&op, file_extr_loc); }