From 0df14b8adc329c1912297251c410479b4866904d Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Fri, 22 May 2026 12:49:07 -0500 Subject: [PATCH] Moved to File.MemoryMap instead of direct file I/O --- build.zig | 4 +- src/archive.zig | 30 ++-- src/bin/unsquashfs.zig | 15 +- src/file.zig | 4 +- src/frag.zig | 27 ++-- src/inode.zig | 277 ++---------------------------------- src/lookup_table.zig | 39 ++--- src/options.zig | 4 +- src/util/data_extractor.zig | 77 +++------- src/util/data_reader.zig | 18 +-- src/util/misc.zig | 6 +- src/util/offset_file.zig | 27 ++-- src/xattr_table.zig | 25 ++-- 13 files changed, 126 insertions(+), 427 deletions(-) diff --git a/build.zig b/build.zig index 76103bc..492d302 100644 --- a/build.zig +++ b/build.zig @@ -3,7 +3,7 @@ const std = @import("std"); pub fn build(b: *std.Build) !void { // const use_zig_decomp = b.option(bool, "use_zig_decomp", "Use zig standard library for decompression.") orelse false; // const allow_lzo = b.option(bool, "allow_lzo", "Compile with lzo support") orelse false; - const debug = b.option(bool, "debug", "Enable options to make debugging easier."); + var debug = b.option(bool, "debug", "Enable options to make debugging easier."); const version_string_option = b.option([]const u8, "version", "Version of the library/binary"); // const zig_squashfs_options = b.addOptions(); @@ -13,6 +13,8 @@ pub fn build(b: *std.Build) !void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); + if (optimize == .Debug) debug = true; + const lib = b.addLibrary(.{ .name = "squashfs", .root_module = b.createModule(.{ diff --git a/src/archive.zig b/src/archive.zig index d91878c..6a199c2 100644 --- a/src/archive.zig +++ b/src/archive.zig @@ -24,7 +24,7 @@ pub fn init(io: Io, file: std.Io.File, offset: u64) !Archive { var super: Superblock = undefined; try rdr.interface.readSliceEndian(Superblock, @ptrCast(&super), .little); return .{ - .file = .init(file, offset), + .file = try .init(io, file, super.size, offset), .super = super, .stateless_decomp = try Decomp.StatelessDecomp(super.compression), @@ -53,19 +53,6 @@ pub fn open(self: Archive, alloc: std.mem.Allocator, io: Io, filepath: []const u defer root_file.deinit(); return root_file.open(alloc, io, filepath); } -/// Extract the entire archive contents to the given directory. -pub fn extract(self: Archive, alloc: std.mem.Allocator, io: Io, extract_dir: []const u8, options: ExtractionOptions) !void { - const root_inode = try Utils.inodeFromRef( - alloc, - io, - self.file, - self.stateless_decomp, - self.super.inode_start, - self.super.block_size, - self.super.root_ref, - ); - return root_inode.extract(alloc, io, self.file, self.super, extract_dir, options); -} /// Returns the inode with the given inode number. /// Requires that the archive is exportable (has an export lookup table). @@ -163,3 +150,18 @@ pub const Superblock = extern struct { return SuperblockError.InvalidBlockLog; } }; + +// Extraction + +/// Extract the entire archive contents to the given directory. +pub fn extract(self: Archive, alloc: std.mem.Allocator, io: Io, extract_dir: []const u8, options: ExtractionOptions) !void { + const root_inode = try Utils.inodeFromRef( + alloc, + self.file, + self.stateless_decomp, + self.super.inode_start, + self.super.block_size, + self.super.root_ref, + ); + return root_inode.extract(alloc, io, self.file, self.super, extract_dir, options); +} diff --git a/src/bin/unsquashfs.zig b/src/bin/unsquashfs.zig index 53c3f08..c278ec3 100644 --- a/src/bin/unsquashfs.zig +++ b/src/bin/unsquashfs.zig @@ -48,21 +48,18 @@ pub fn main(init: std.process.Init) !void { var out = stdout.writer(io, &[0]u8{}); defer out.interface.flush() catch {}; - // try handleArgs(init.minimal.args, &out.interface); - // if (archive.len == 0) { - // try out.interface.print("You must provide a squashfs archive\n", .{}); - // try out.interface.print(help_mgs, .{}); - // return; - // } - archive = "testing/LinuxPATest.sfs"; - extLoc = "testing/LinuxPABinTest"; + try handleArgs(init.minimal.args, &out.interface); + if (archive.len == 0) { + try out.interface.print("You must provide a squashfs archive\n", .{}); + try out.interface.print(help_mgs, .{}); + return; + } var fil = try Io.Dir.cwd().openFile(io, archive, .{}); //TODO: Handle error gracefully. defer fil.close(io); var arc: squashfs.Archive = try .init(io, fil, offset); //TODO: Handle error gracefully. const options: squashfs.ExtractionOptions = .{ - .threads = if (threads == 0) try std.Thread.getCpuCount() else threads, .verbose = verbose, .verbose_writer = if (verbose) &out.interface else null, .ignore_xattr = ignore_xattrs, diff --git a/src/file.zig b/src/file.zig index 7933e70..878d2cf 100644 --- a/src/file.zig +++ b/src/file.zig @@ -35,8 +35,8 @@ pub fn init(alloc: std.mem.Allocator, archive: Archive, in: Inode, name: []const .name = new_name, }; } -pub fn fromDirEntry(alloc: std.mem.Allocator, io: Io, archive: Archive, ent: DirEntry) !File { - var rdr = try archive.file.readerAt(io, archive.super.inode_start + ent.block_start, &[0]u8{}); +pub fn fromDirEntry(alloc: std.mem.Allocator, archive: Archive, ent: DirEntry) !File { + var rdr = archive.file.readerAt(archive.super.inode_start + ent.block_start); var meta: MetadataReader = .init(alloc, &rdr.interface, archive.stateless_decomp); try meta.interface.discardAll(ent.block_offset); diff --git a/src/frag.zig b/src/frag.zig index f3be568..e07fa4e 100644 --- a/src/frag.zig +++ b/src/frag.zig @@ -23,15 +23,14 @@ block_size: u32, entries: []FragEntry, frag_cache: std.array_hash_map.Auto(u32, []u8), -cache_mut: std.Io.Mutex = .init, +cache_mut: std.Io.RwLock = .init, -pub fn init(alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, frag_start: u64, frag_num: u32, block_size: u32) !FragManager { - var buf: [8 * 1024]u8 = undefined; - var rdr = try fil.readerAt(io, frag_start, &buf); +pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompressor, frag_start: u64, frag_num: u32, block_size: u32) !FragManager { + var rdr = fil.readerAt(frag_start); var first_offset: u64 = undefined; try rdr.interface.readSliceEndian(u64, @ptrCast(&first_offset), .little); - rdr = try fil.readerAt(io, first_offset, &buf); + rdr = fil.readerAt(first_offset); var meta: MetadataReader = .init(alloc, &rdr.interface, decomp); const entries = try alloc.alloc(FragEntry, frag_num); @@ -59,24 +58,28 @@ pub fn deinit(self: *FragManager, io: Io) void { } pub fn get(self: *FragManager, io: Io, idx: u32) ![]u8 { - if (self.frag_cache.contains(idx)) - return self.frag_cache.get(idx).?; + { + try self.cache_mut.lockShared(io); + defer self.cache_mut.unlockShared(io); + if (self.frag_cache.contains(idx)) + return self.frag_cache.get(idx).?; + } try self.cache_mut.lock(io); defer self.cache_mut.unlock(io); + if (self.frag_cache.contains(idx)) + return self.frag_cache.get(idx).?; + const entry = self.entries[idx]; const out = try self.alloc.alloc(u8, if (entry.size.uncompressed) entry.size.size else self.block_size); - var buf: [1024 * 1024]u8 = undefined; - var rdr = try self.fil.readerAt(io, entry.start, &buf); if (entry.size.uncompressed) { - try rdr.interface.readSliceAll(out); + @memcpy(out, self.fil.map.memory[entry.start .. entry.start + entry.size.size]); } else { @branchHint(.likely); - try rdr.interface.fill(entry.size.size); - _ = try self.decomp.Decompress(self.alloc, buf[0..entry.size.size], out); + _ = try self.decomp.Decompress(self.alloc, self.fil.map.memory[entry.start .. entry.start + entry.size.size], out); } try self.frag_cache.put(self.alloc, idx, out); diff --git a/src/inode.zig b/src/inode.zig index 7791d01..8f1c08f 100644 --- a/src/inode.zig +++ b/src/inode.zig @@ -71,8 +71,8 @@ pub fn readDirectory(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetF else => Error.NotDirectory, }; } -fn readDirFromData(alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, dir_offset: u64, d: anytype) ![]DirEntry { - var rdr = try fil.readerAt(io, dir_offset + d.block_start, &[0]u8{}); +fn readDirFromData(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompressor, dir_offset: u64, d: anytype) ![]DirEntry { + var rdr = fil.readerAt(dir_offset + d.block_start); var meta: MetadataReader = .init(alloc, &rdr.interface, decomp); try meta.interface.discardAll(d.block_offset); @@ -216,8 +216,8 @@ const PathRet = struct { inode: Inode, xattr_idx: ?u32 = null, }; -fn DirCompare(_: void, a: PathRet, b: PathRet) std.math.Order{ - +fn DirCompare(_: void, a: PathRet, b: PathRet) std.math.Order { + return std.math.order(std.mem.count(u8, a.path, "/"), std.mem.count(u8, b.path, "/")); } const ExtractReturnUnion = union(enum) { path_ret: ExtractError!PathRet, @@ -239,265 +239,12 @@ pub fn extract( filepath: []const u8, options: ExtractionOptions, ) !void { - const path = std.mem.trimEnd(u8, filepath, "/"); - - var decomp_base: Decomp = switch (super.compression) { - .gzip => .{ .gzip = try .init(alloc, super.block_size) }, - .lzma => .{ .lzma = try .init(alloc, super.block_size) }, - .xz => .{ .xz = try .init(alloc, super.block_size) }, - .zstd => .{ .zstd = try .init(alloc, io, super.block_size) }, - else => unreachable, - }; - defer decomp_base.deinit(); - const decomp = decomp_base.decompressor(); - - var frags: FragManager = try .init(alloc, io, fil, decomp, super.frag_start, super.frag_count, super.block_size); - defer frags.deinit(io); - - var sel_buf = [1]ExtractReturnUnion{undefined} ** 10; - var sel: Io.Select(ExtractReturnUnion) = .init(io, &sel_buf); - defer sel.cancelDiscard(); - - var fold_queu: std.PriorityDequeue(PathRet, void, comptime compareFn: fn (Context, T, T) Order) - - try sel.concurrent(.path_ret, extractReal, .{ self, alloc, io, fil, decomp, super, &frags, &sel, path }); - - var xattr_table: ?XattrTable = if (!options.ignore_xattr) - try .init(alloc, io, fil, decomp, super.xattr_start) - else - null; - defer if (!options.ignore_xattr) xattr_table.?.deinit(io); - if (xattr_table != null) try xattr_table.?.table.fill(io); - - var id_table: ?CachedTable(u16) = if (!options.ignore_xattr) - .init(alloc, fil, decomp, super.id_start, super.id_count) - else - null; - defer if (!options.ignore_xattr) id_table.?.deinit(io); - if (id_table != null) try id_table.?.fill(io); - - while (true) { - const group_token = sel.group.token.load(.acquire); - if (group_token == null) break; - // std.debug.print("{any}\n", .{sel.group.state}); - - // std.debug.print("Waiting for return...", .{}); - const ret = try sel.await(); - defer sel.queue.putOneUncancelable(io, ret) catch {}; - // std.debug.print("Got One...\n", .{}); - const path_ret = try ret.path_ret; - - if (options.ignore_permissions and options.ignore_xattr) continue; - if (options.ignore_permissions and path_ret.xattr_idx == null) continue; - - var ret_file = try Io.Dir.cwd().openFile(io, path_ret.path, .{}); - defer ret_file.close(io); - - if (!options.ignore_permissions) { - try ret_file.setPermissions(io, @enumFromInt(path_ret.inode.hdr.permissions)); - try ret_file.setOwner( - io, - try id_table.?.get(io, path_ret.inode.hdr.uid_idx), - try id_table.?.get(io, path_ret.inode.hdr.gid_idx), - ); - } - if (@hasField(std.os, "linux") and !options.ignore_xattr and path_ret.xattr_idx != null) { - const xattrs = try xattr_table.?.get(alloc, io, path_ret.xattr_idx.?); - defer { - for (xattrs) |x| - alloc.free(x.key); - alloc.free(xattrs); - } - - for (xattrs) |x| { - const res = std.os.linux.fsetxattr(ret_file.handle, x.key, x.value.ptr, x.value.len, 0); - if (res != 0) - return error.CannotSetXattr; - } - } - } -} -pub fn extractReal( - self: Inode, - alloc: std.mem.Allocator, - io: Io, - fil: OffsetFile, - decomp: *const Decompressor, - super: Archive.Superblock, - frags: *FragManager, - sel: *Io.Select(ExtractReturnUnion), - path: []const u8, -) ExtractError!PathRet { - return switch (self.hdr.inode_type) { - .dir, .ext_dir => extractDir(self, alloc, io, fil, decomp, super, sel, frags, path), - .file, .ext_file => extractFile(self, alloc, io, fil, decomp, frags, super.block_size, path), - .symlink, .ext_symlink => extractSymlink(self, io, path), - else => extractDevOrIPC(self, alloc, path), - }; -} -fn extractDir( - self: Inode, - alloc: std.mem.Allocator, - io: Io, - fil: OffsetFile, - decomp: *const Decompressor, - super: Archive.Superblock, - parent_select: *Io.Select(ExtractReturnUnion), - frags: *FragManager, - path: []const u8, -) ExtractError!PathRet { - try Io.Dir.cwd().createDirPath(io, path); - - var sel_buf = [1]ExtractReturnUnion{undefined} ** 10; - var sel: Io.Select(ExtractReturnUnion) = .init(io, &sel_buf); - defer sel.cancelDiscard(); - - var num: usize = 0; - { - const dir_entries = self.readDirectory(alloc, io, fil, decomp, super.dir_start) catch |err| switch (err) { - Error.NotDirectory => unreachable, - else => return @errorCast(err), - }; - num = dir_entries.len; - defer { - for (dir_entries) |d| - d.deinit(alloc); - alloc.free(dir_entries); - } - - for (dir_entries) |d| { - var rdr = try fil.readerAt(io, d.block_start + super.inode_start, &[0]u8{}); - var meta_rdr: MetadataReader = .init(alloc, &rdr.interface, decomp); - try meta_rdr.interface.discardAll(d.block_offset); - const inode = try read(alloc, &meta_rdr.interface, super.block_size); - errdefer inode.deinit(alloc); - - const new_path = try std.mem.concat(alloc, u8, &[_][]const u8{ path, "/", d.name }); - errdefer alloc.free(new_path); - - try sel.concurrent(.path_ret, extractReal, .{ self, alloc, io, fil, decomp, super, frags, &sel, new_path }); - } - } - - while (num > 0) { - const ret = sel.await() catch break; - num -= 1; - - parent_select.queue.putOne(io, ret) catch |err| switch (err) { - error.Canceled => return error.Canceled, - else => break, - }; - } - return .{ - .path = path, - .inode = self, - }; -} -fn extractFile( - self: Inode, - alloc: std.mem.Allocator, - io: Io, - fil: OffsetFile, - decomp: *const Decompressor, - frag: *FragManager, - block_size: u32, - path: []const u8, -) ExtractError!PathRet { - var atomic = try Io.Dir.cwd().createFileAtomic(io, path, .{}); - defer atomic.deinit(io); - - var ret: PathRet = .{ - .inode = self, - .path = path, - }; - const data: DataExtractor = blk: { - switch (self.data) { - .file => |f| { - var data: DataExtractor = .init(fil, decomp, block_size, f.size, f.block_start, f.block_sizes); - if (f.frag_idx != 0xFFFFFFFF) - data.addFrag(f.frag_block_offset, try frag.get(io, f.frag_idx)); - - break :blk data; - }, - .ext_file => |f| { - if (f.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = f.xattr_idx; - var data: DataExtractor = .init(fil, decomp, block_size, f.size, f.block_start, f.block_sizes); - if (f.frag_idx != 0xFFFFFFFF) - data.addFrag(f.frag_block_offset, try frag.get(io, f.frag_idx)); - - break :blk data; - }, - else => unreachable, - } - }; - - try data.extractAsync(alloc, io, atomic.file); - try atomic.link(io); - - return ret; -} -fn extractSymlink(self: Inode, io: Io, path: []const u8) ExtractError!PathRet { - const target = switch (self.data) { - .symlink => |s| s.target, - .ext_symlink => |s| s.target, - else => unreachable, - }; - - try Io.Dir.cwd().symLink(io, target, path, .{}); - - return .{ - .path = path, - .inode = self, - }; -} -fn extractDevOrIPC(self: Inode, alloc: std.mem.Allocator, path: []const u8) ExtractError!PathRet { - var dev_num: u32 = 0; - var mode: u32 = 0; - - const DT = std.posix.DT; - - var ret: PathRet = .{ - .inode = self, - .path = path, - }; - - switch (self.data) { - .block_dev => |d| { - dev_num = d.dev; - mode = DT.BLK; - }, - .ext_block_dev => |d| { - dev_num = d.dev; - mode = DT.BLK; - if (d.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = d.xattr_idx; - }, - .char_dev => |d| { - dev_num = d.dev; - mode = DT.CHR; - }, - .ext_char_dev => |d| { - dev_num = d.dev; - mode = DT.CHR; - if (d.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = d.xattr_idx; - }, - .fifo => mode = DT.FIFO, - .ext_fifo => |f| { - mode = DT.FIFO; - if (f.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = f.xattr_idx; - }, - .socket => mode = DT.SOCK, - .ext_socket => |s| { - mode = DT.SOCK; - if (s.xattr_idx != 0xFFFFFFFF) ret.xattr_idx = s.xattr_idx; - }, - else => unreachable, - } - - const sentinel_path = try std.mem.concatWithSentinel(alloc, u8, &[_][]const u8{path}, 0); - defer alloc.free(sentinel_path); - - const res = std.os.linux.mknod(sentinel_path, mode, dev_num); - if (res != 0) return ExtractError.MknodFailed; - - return ret; + _ = self; + _ = alloc; + _ = io; + _ = fil; + _ = super; + _ = filepath; + _ = options; + return error.TODO; } diff --git a/src/lookup_table.zig b/src/lookup_table.zig index 9169141..7532bb6 100644 --- a/src/lookup_table.zig +++ b/src/lookup_table.zig @@ -5,18 +5,17 @@ const Decompressor = @import("util/decompressor.zig"); const MetadataReader = @import("util/metadata.zig"); const OffsetFile = @import("util/offset_file.zig"); -pub fn lookupValue(comptime T: anytype, alloc: std.mem.Allocator, io: Io, decomp: *const Decompressor, file: OffsetFile, table_start: u64, idx: u32) !T { +pub fn lookupValue(comptime T: anytype, alloc: std.mem.Allocator, decomp: *const Decompressor, file: OffsetFile, table_start: u64, idx: u32) !T { const T_PER_BLOCK: u16 = 8192 / @sizeOf(T); const block = idx / T_PER_BLOCK; const block_offset = idx % T_PER_BLOCK; - var rdr = try file.readerAt(io, table_start + (8 * block), &[0]u8{}); - var offset: u64 = undefined; - try rdr.interface.readSliceEndian(u64, @ptrCast(&offset), .little); + const offset_pos = table_start + (8 * block); + const offset: u64 = std.mem.readInt(u64, file.map.memory[offset_pos .. offset_pos + 8], .little); - rdr = try file.readerAt(io, offset, &[0]u8{}); - var meta: MetadataReader = .init(alloc, &rdr.interface, decomp); + var rdr = file.readerAt(offset); + var meta: MetadataReader = .init(alloc, &rdr, decomp); try meta.interface.discardAll(@sizeOf(T) * block_offset); var out: T = undefined; @@ -41,7 +40,7 @@ pub fn CachedTable(comptime T: anytype) type { table: std.AutoHashMap(u32, []T), - mut: Io.Mutex = .init, + mut: Io.RwLock = .init, pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompressor, offset: u64, total_num: u32) Table { return .{ @@ -72,16 +71,15 @@ pub fn CachedTable(comptime T: anytype) type { num_blocks += 1; for (0..num_blocks) |block| { - var rdr = try self.fil.readerAt(io, self.table_start + (8 * block), &[0]u8{}); - var offset: u64 = undefined; - try rdr.interface.readSliceEndian(u64, @ptrCast(&offset), .little); + const offset_pos = self.table_start + (8 * block); + const offset: u64 = std.mem.readInt(u64, self.fil.map.memory[offset_pos .. offset_pos + 8], .little); const len: u16 = if (self.total_num % T_PER_BLOCK != 0 and block == (self.total_num - 1) / T_PER_BLOCK) @truncate(self.total_num % T_PER_BLOCK) else T_PER_BLOCK; - rdr = try self.fil.readerAt(io, offset, &[0]u8{}); + var rdr = self.fil.readerAt(offset); var meta: MetadataReader = .init(self.alloc, &rdr.interface, self.decomp); const slice = try meta.interface.readSliceEndianAlloc(self.alloc, T, len, .little); @@ -92,8 +90,14 @@ pub fn CachedTable(comptime T: anytype) type { pub fn get(self: *Table, io: Io, idx: u32) Error!T { const block = idx / T_PER_BLOCK; const block_offset = idx % T_PER_BLOCK; - if (self.table.contains(block)) - return self.table.get(block).?[block_offset]; + + { + try self.mut.lockShared(io); + defer self.mut.unlockShared(io); + + if (self.table.contains(block)) + return self.table.get(block).?[block_offset]; + } try self.mut.lock(io); defer self.mut.unlock(io); @@ -101,20 +105,19 @@ pub fn CachedTable(comptime T: anytype) type { if (self.table.contains(block)) return self.table.get(block).?[block_offset]; - var rdr = try self.fil.readerAt(io, self.table_start + (8 * block), &[0]u8{}); - var offset: u64 = undefined; - try rdr.interface.readSliceEndian(u64, @ptrCast(&offset), .little); + const offset_pos = self.table_start + (8 * block); + const offset: u64 = std.mem.readInt(u64, self.fil.map.memory[offset_pos .. offset_pos + 8], .little); const len: u16 = if (self.total_num % T_PER_BLOCK != 0 and block == (self.total_num - 1) / T_PER_BLOCK) @truncate(self.total_num % T_PER_BLOCK) else T_PER_BLOCK; - rdr = try self.fil.readerAt(io, offset, &[0]u8{}); + var rdr = self.fil.readerAt(offset); var meta: MetadataReader = .init(self.alloc, &rdr.interface, self.decomp); const slice = try meta.interface.readSliceEndianAlloc(self.alloc, T, len, .little); - try self.table.put(block, slice); + try self.table.put(@truncate(block), slice); return slice[block_offset]; } diff --git a/src/options.zig b/src/options.zig index 44cb816..1f6ba00 100644 --- a/src/options.zig +++ b/src/options.zig @@ -5,8 +5,8 @@ const Writer = std.Io.Writer; const ExtractionOptions = @This(); -/// The number of threads used for extraction. 0 implies single threaded. -threads: usize = 1, // TODO: Update to better integrate with zig 0.16 Io. Maybe limit to only single or multi-threaded. +// /// The number of threads used for extraction. 0 implies single threaded. +// threads: usize = 1, // As of Zig 0.16 this should no longer be necessary, instead this should be set by the io instance used. /// Don't set the file's owner & permissions after extraction ignore_permissions: bool = false, /// Don't set xattr values. Currently xattrs are never set anyway. diff --git a/src/util/data_extractor.zig b/src/util/data_extractor.zig index 81419c1..4e5a8b1 100644 --- a/src/util/data_extractor.zig +++ b/src/util/data_extractor.zig @@ -10,7 +10,7 @@ const OffsetFile = @import("offset_file.zig"); // const SharedCache = @import("shared_cache.zig"); -pub const Error = error{OutOfMemory} || Io.File.Reader.SeekError || Io.Writer.Error || Io.File.Writer.Error; +pub const Error = Decompressor.Error || Io.File.MemoryMap.CreateError || Io.File.WritePositionalError; const DataExtractor = @This(); @@ -51,22 +51,27 @@ fn numBlocks(self: DataExtractor) usize { /// Starts extracting the data using the given group to spawn async tasks. pub fn extractAsync(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File) Error!void { + var map = try fil.createMemoryMap(io, .{ .len = self.file_size, .protection = .{ .write = true } }); + defer map.destroy(io); + var group: Io.Group = .init; defer group.cancel(io); var err: ?Error = null; var read_offset: u64 = self.start; for (0..self.blocks.len) |idx| { - group.async(io, blockThread, .{ self, alloc, io, fil, read_offset, idx, &err }); + group.async(io, blockThread, .{ self, alloc, fil, read_offset, idx, &err }); read_offset += self.blocks[idx].size; } if (self.frag_block != null) - group.async(io, fragThread, .{ self, io, fil, &err }); + group.async(io, fragThread, .{ self, map }); group.await(io) catch |cancel| return err orelse cancel; + + try map.write(io); } -fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, read_offset: u64, idx: usize, ret_err: *?Error) Io.Cancelable!void { +fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, map: Io.File.MemoryMap, read_offset: u64, idx: usize, ret_err: *?Error) Io.Cancelable!void { const block = self.blocks[idx]; const cur_block_size = if (idx == self.numBlocks() - 1) @@ -74,78 +79,28 @@ fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.Fi else self.block_size; - var wrt = fil.writer(io, &[0]u8{}); - wrt.seekTo(self.block_size * idx) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - defer wrt.flush() catch {}; + const write_offset = self.block_size * idx; if (block.size == 0) { - wrt.interface.splatByteAll(0, cur_block_size) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; + @memset(map.memory[write_offset .. write_offset + cur_block_size], 0); return; } - var rdr = self.fil.readerAt(io, read_offset, &[0]u8{}) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; if (block.uncompressed) { - rdr.interface.streamExact(&wrt.interface, cur_block_size) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - return; + @memcpy(map.memory[write_offset .. write_offset + cur_block_size], self.fil.map.memory[read_offset .. read_offset + cur_block_size]); } else { @branchHint(.likely); - var cache: [1024 * 1024]u8 = undefined; - var tmp: [1024 * 1024]u8 = undefined; - - rdr.interface.readSliceAll(cache[0..block.size]) catch |err| { + _ = self.decomp.Decompress(alloc, self.fil.map.memory[read_offset .. read_offset + block.size], map.memory[write_offset .. write_offset + cur_block_size]) catch |err| { ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - _ = self.decomp.Decompress(alloc, cache[0..block.size], tmp[0..cur_block_size]) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - wrt.interface.writeAll(tmp[0..cur_block_size]) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); return Io.Cancelable.Canceled; }; } } -fn fragThread(self: DataExtractor, io: Io, fil: Io.File, ret_err: *?Error) Io.Cancelable!void { +fn fragThread(self: DataExtractor, map: Io.File.MemoryMap) Io.Cancelable!void { const cur_block_size = self.file_size % self.block_size; - var write_buf: [10 * 1024]u8 = undefined; - var wrt = fil.writer(io, &write_buf); - wrt.seekTo(self.blocks.len * self.block_size) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; + const write_offset = self.blocks.len * self.block_size; - wrt.interface.writeAll(self.frag_block.?[self.frag_offset .. self.frag_offset + cur_block_size]) catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; - - wrt.flush() catch |err| { - ret_err.* = err; - if (err == error.Canceled) io.recancel(); - return Io.Cancelable.Canceled; - }; + @memcpy(map.memory[write_offset .. write_offset + cur_block_size], self.frag_block.?[self.frag_offset .. self.frag_offset + cur_block_size]); } diff --git a/src/util/data_reader.zig b/src/util/data_reader.zig index ddcce19..5989a47 100644 --- a/src/util/data_reader.zig +++ b/src/util/data_reader.zig @@ -19,7 +19,6 @@ alloc: std.mem.Allocator, fil: OffsetFile, io: Io, decomp: *const Decompressor, -cache: *Io.Queue([]u8), block_size: u32, file_size: u64, @@ -34,14 +33,13 @@ sparse_block: bool = false, interface: Io.Reader, -pub fn init(alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, cache: *Io.Queue([]u8), block_size: u32, file_size: u64, data_start: u64, blocks: []BlockSize) !DataReader { +pub fn init(alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, block_size: u32, file_size: u64, data_start: u64, blocks: []BlockSize) !DataReader { return .{ .alloc = alloc, .fil = fil, .io = io, .decomp = decomp, - .cache = cache, .block_size = block_size, .file_size = file_size, @@ -74,9 +72,10 @@ fn numBlocks(self: DataReader) usize { return num; } fn advanceBuffer(self: *DataReader) !void { - if (self.block_idx >= self.numBlocks()) { + if (self.block_idx >= self.numBlocks()) return Reader.Error.EndOfStream; - } + + errdefer self.interface.end = 0; defer self.block_idx += 1; self.interface.end = if (self.block_idx == self.numBlocks() - 1) @@ -101,18 +100,13 @@ fn advanceBuffer(self: *DataReader) !void { self.sparse_block = false; } if (block.uncompressed) { - try self.fil.readAt(self.io, self.cur_offset, self.interface.buffer[0..self.interface.end]); + @memcpy(self.interface.buffer[0..self.interface.end], self.fil.map.memory[self.cur_offset .. self.cur_offset + self.interface.end]); self.cur_offset += self.interface.end; } else { @branchHint(.likely); - const tmp = try self.cache.getOne(self.io); - defer self.cache.putOne(self.io, tmp) catch {}; - var rdr_buf: [50 * 1024]u8 = undefined; - var rdr = try self.fil.readerAt(self.io, self.cur_offset, &rdr_buf); - try rdr.interface.readSliceAll(tmp[0..block.size]); + _ = try self.decomp.Decompress(self.alloc, self.fil.map.memory[self.cur_offset .. self.cur_offset + block.size], self.interface.buffer[0..self.interface.end]); self.cur_offset += block.size; - _ = try self.decomp.Decompress(self.alloc, tmp[0..block.size], self.interface.buffer[0..self.interface.end]); } self.interface.seek = 0; } diff --git a/src/util/misc.zig b/src/util/misc.zig index 6666caa..e7d5d63 100644 --- a/src/util/misc.zig +++ b/src/util/misc.zig @@ -16,9 +16,9 @@ pub fn pathIsSelf(path: []const u8) bool { return path[0] == '.'; } /// Creates an Inode from an Inode.Ref. -pub fn inodeFromRef(alloc: std.mem.Allocator, io: Io, file: OffsetFile, decomp: *const Decompressor, inode_start: u64, block_size: u32, ref: Inode.Ref) !Inode { - var rdr = try file.readerAt(io, inode_start + ref.block_start, &[0]u8{}); - var meta: MetadataReader = .init(alloc, &rdr.interface, decomp); +pub fn inodeFromRef(alloc: std.mem.Allocator, file: OffsetFile, decomp: *const Decompressor, inode_start: u64, block_size: u32, ref: Inode.Ref) !Inode { + var rdr = file.readerAt(inode_start + ref.block_start); + var meta: MetadataReader = .init(alloc, &rdr, decomp); try meta.interface.discardAll(ref.block_offset); return .read(alloc, &meta.interface, block_size); diff --git a/src/util/offset_file.zig b/src/util/offset_file.zig index 4852c49..45e3688 100644 --- a/src/util/offset_file.zig +++ b/src/util/offset_file.zig @@ -3,22 +3,25 @@ const std = @import("std"); const Io = std.Io; const File = Io.File; -const Reader = File.Reader; +const Reader = Io.Reader; const OffsetFile = @This(); -fil: File, -offset: u64, +map: Io.File.MemoryMap, -pub fn init(fil: File, init_offset: u64) OffsetFile { - return .{ .fil = fil, .offset = init_offset }; +pub fn init(io: Io, fil: File, archive_size: u64, init_offset: u64) !OffsetFile { + return .{ + .map = try fil.createMemoryMap(io, .{ + .protection = .{ .read = true, .write = false, .execute = false }, + .len = archive_size, + .offset = init_offset, + }), + }; +} +pub fn deinit(self: @This(), io: Io) void { + self.map.destroy(io); } -pub fn readerAt(self: OffsetFile, io: Io, offset: u64, buffer: []u8) Reader.SeekError!Reader { - var rdr = self.fil.reader(io, buffer); - try rdr.seekTo(self.offset + offset); - return rdr; -} -pub fn readAt(self: OffsetFile, io: Io, offset: u64, buf: []u8) File.ReadPositionalError!void { - _ = try self.fil.readPositionalAll(io, buf, self.offset + offset); +pub fn readerAt(self: OffsetFile, offset: u64) Reader { + return .fixed(self.map.memory[offset..]); } diff --git a/src/xattr_table.zig b/src/xattr_table.zig index c12cc0a..abc9bc9 100644 --- a/src/xattr_table.zig +++ b/src/xattr_table.zig @@ -18,15 +18,11 @@ kv_start: u64, table: LookupTable.CachedTable(TableValue), value_cache: std.AutoHashMap(InodeRef, []const u8), -value_mut: Io.Mutex = .init, +value_mut: Io.RWLock = .init, -pub fn init(alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, xattr_start: u64) !XattrCachedTable { - var rdr = try fil.readerAt(io, xattr_start, &[0]u8{}); - - var start: u64 = undefined; - try rdr.interface.readSliceEndian(u64, @ptrCast(&start), .little); - var num: u32 = undefined; - try rdr.interface.readSliceEndian(u32, @ptrCast(&num), .little); +pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompressor, xattr_start: u64) !XattrCachedTable { + const start: u64 = std.mem.readInt(u64, fil.map.memory[xattr_start .. xattr_start + 8], .little); + const num: u64 = std.mem.readInt(u64, fil.map.memory[xattr_start + 8 .. xattr_start + 16], .little); return .{ .alloc = alloc, @@ -48,7 +44,7 @@ pub fn deinit(self: *XattrCachedTable, io: Io) void { pub fn get(self: *XattrCachedTable, alloc: std.mem.Allocator, io: Io, idx: u32) ![]XattrSemiOwned { const lookup = try self.table.get(io, idx); - var rdr = try self.fil.readerAt(io, self.kv_start + lookup.ref.block_start, &[0]u8{}); + var rdr = self.fil.readerAt(self.kv_start + lookup.ref.block_start); var meta: MetadataReader = .init(alloc, &rdr.interface, self.decomp); try meta.interface.discardAll(lookup.ref.block_offset); @@ -131,7 +127,7 @@ fn valueAt(self: *XattrCachedTable, io: Io, ref: InodeRef) ![]const u8 { if (self.value_cache.contains(ref)) return self.value_cache.get(ref).?; - var rdr = try self.fil.readerAt(io, self.kv_start + ref.block_start, &[0]u8{}); + var rdr = self.fil.readerAt(self.kv_start + ref.block_start); var meta: MetadataReader = .init(self.alloc, &rdr.interface, self.decomp); try meta.interface.discardAll(ref.block_offset); @@ -204,14 +200,11 @@ const XattrPrefix = packed struct(u16) { // Stateless pub fn statelessLookup(alloc: std.mem.Allocator, io: Io, decomp: *const Decompressor, fil: OffsetFile, table_start: u64, idx: u16) ![]XattrOwned { - var rdr = try fil.readerAt(io, table_start, &[0]u8{}); - - var kv_start: u64 = undefined; - try rdr.interface.readSliceEndian(u64, @ptrCast(&kv_start), .little); + const kv_start: u64 = std.mem.readInt(u64, fil.map.memory[table_start .. table_start + 8], .little); const lookup = try LookupTable.lookupValue(TableValue, alloc, io, decomp, fil, table_start + 16, idx); - rdr = try fil.readerAt(io, kv_start + lookup.ref.block_start, &[0]u8{}); + var rdr = fil.readerAt(kv_start + lookup.ref.block_start); var meta: MetadataReader = .init(alloc, &rdr.interface, decomp); try meta.interface.discardAll(lookup.ref.block_offset); @@ -252,7 +245,7 @@ pub fn statelessLookup(alloc: std.mem.Allocator, io: Io, decomp: *const Decompre const value: ValueOutOfLineEntry = undefined; try meta.interface.readSliceEndian(ValueOutOfLineEntry, @ptrCast(&value), .little); - var ool_rdr = try fil.readerAt(io, kv_start + value.ref.block_start, &[0]u8{}); + var ool_rdr = fil.readerAt(kv_start + value.ref.block_start); var ool_meta: MetadataReader = .init(alloc, &ool_rdr.interface, decomp); try ool_meta.interface.discardAll(value.ref.block_offset);