IT WORKS AND IS FAST

This commit is contained in:
Caleb Gardner
2026-05-22 15:45:44 -05:00
parent 0df14b8adc
commit 3239bf0e01
10 changed files with 287 additions and 64 deletions
Executable
+8
View File
@@ -0,0 +1,8 @@
#! /usr/bin/env bash
ARCHIVE="testing/LinuxPATest.sfs"
REF_EXT_LOC="testing/LinuxPAReference"
PROG_EXT_LOC="testing/LinuxPABinTest"
hyperfine --warmup 5 --prepare "rm -rf $REF_EXT_LOC && rm -rf $PROG_EXT_LOC" "unsquashfs -d $REF_EXT_LOC $ARCHIVE" "zig-out/bin/unsquashfs -d $PROG_EXT_LOC $ARCHIVE"
+1
View File
@@ -22,6 +22,7 @@ pub fn build(b: *std.Build) !void {
.target = target,
.valgrind = debug,
.root_source_file = b.path("src/root.zig"),
.link_libc = true,
}),
.use_llvm = debug,
});
-10
View File
@@ -1,10 +0,0 @@
#!/bin/sh
zig test \
-lc \
-lz \
-llzma \
-lminilzo \
-llz4 \
-lzstd \
src/test.zig
+1
View File
@@ -23,6 +23,7 @@ pub fn init(io: Io, file: std.Io.File, offset: u64) !Archive {
try rdr.seekTo(offset);
var super: Superblock = undefined;
try rdr.interface.readSliceEndian(Superblock, @ptrCast(&super), .little);
return .{
.file = try .init(io, file, super.size, offset),
.super = super,
+1 -2
View File
@@ -69,8 +69,7 @@ inline fn zstdDecomp(buffer: []u8, in: []u8, out: []u8) !usize {
pub const stateless_decompressor: Decompressor = .{ .decomp_fn = statelessDecomp };
fn statelessDecomp(d: ?*const Decompressor, alloc: std.mem.Allocator, in: []u8, out: []u8) Error!usize {
_ = d;
fn statelessDecomp(_: ?*const Decompressor, alloc: std.mem.Allocator, in: []u8, out: []u8) Error!usize {
_ = alloc;
const res = c.ZSTD_decompress(out.ptr, out.len, in.ptr, in.len);
if (c.ZSTD_isError(res) == 1)
+3 -5
View File
@@ -26,12 +26,10 @@ frag_cache: std.array_hash_map.Auto(u32, []u8),
cache_mut: std.Io.RwLock = .init,
pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompressor, frag_start: u64, frag_num: u32, block_size: u32) !FragManager {
var rdr = fil.readerAt(frag_start);
var first_offset: u64 = undefined;
try rdr.interface.readSliceEndian(u64, @ptrCast(&first_offset), .little);
const first_offset: u64 = std.mem.readInt(u64, @ptrCast(fil.map.memory[frag_start .. frag_start + 8]), .little);
rdr = fil.readerAt(first_offset);
var meta: MetadataReader = .init(alloc, &rdr.interface, decomp);
var rdr = fil.readerAt(first_offset);
var meta: MetadataReader = .init(alloc, &rdr, decomp);
const entries = try alloc.alloc(FragEntry, frag_num);
errdefer alloc.free(entries);
+191 -17
View File
@@ -64,16 +64,16 @@ pub fn deinit(self: Inode, alloc: std.mem.Allocator) void {
// Utility Functions
/// Read the directory entries
pub fn readDirectory(self: Inode, alloc: std.mem.Allocator, io: Io, fil: OffsetFile, decomp: *const Decompressor, dir_offset: u64) ![]DirEntry {
pub fn readDirectory(self: Inode, alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompressor, dir_offset: u64) ![]DirEntry {
return switch (self.data) {
.dir => |d| readDirFromData(alloc, io, fil, decomp, dir_offset, d),
.ext_dir => |d| readDirFromData(alloc, io, fil, decomp, dir_offset, d),
.dir => |d| readDirFromData(alloc, fil, decomp, dir_offset, d),
.ext_dir => |d| readDirFromData(alloc, fil, decomp, dir_offset, d),
else => Error.NotDirectory,
};
}
fn readDirFromData(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompressor, dir_offset: u64, d: anytype) ![]DirEntry {
var rdr = fil.readerAt(dir_offset + d.block_start);
var meta: MetadataReader = .init(alloc, &rdr.interface, decomp);
var meta: MetadataReader = .init(alloc, &rdr, decomp);
try meta.interface.discardAll(d.block_offset);
return DirEntry.readDirectory(alloc, &meta.interface, d.size);
@@ -137,9 +137,9 @@ pub fn xattrIndex(self: Inode) !u32 {
.ext_char_dev => |e| e.xattr_idx,
.ext_fifo => |e| e.xattr_idx,
.ext_socket => |e| e.xattr_idx,
else => Error.NoXattr,
else => return error.NoXattr,
};
if (idx == 0xFFFFFFFF) return Error.NoXattr;
if (idx == 0xFFFFFFFF) return error.NoXattr;
return idx;
}
// Get an inode's xattr values. If the inode does not have xattr values (including if the inode is not an extended type), an empty slice is returned.
@@ -208,13 +208,43 @@ pub const Header = extern struct {
// Extract
const ExtractError = error{ MknodFailed, CannotSetXattr, ConcurrencyUnavailable } || DataExtractor.Error || Io.Dir.CreateFileAtomicError || LookupTable.Error ||
Io.File.Reader.SeekError || Io.File.Atomic.LinkError || Io.Dir.CreateDirError || Io.File.OpenError ||
Io.File.SetPermissionsError || Io.File.SetOwnerError || Io.Dir.SymLinkError || Io.Dir.CreateDirPathError;
const ExtractError = error{ MknodFailed, CannotSetXattr } || DataExtractor.Error || DirEntry.Error ||
Decompressor.Error || Io.File.Atomic.InitError || Io.File.Atomic.LinkError || Io.Dir.SymLinkError;
const PathRet = struct {
path: []const u8,
inode: Inode,
xattr_idx: ?u32 = null,
fn setMetadata(path_ret: PathRet, alloc: std.mem.Allocator, io: Io, id_table: *CachedTable(u16), xattr_table: ?*XattrTable, options: ExtractionOptions) !void {
var fil = Io.Dir.cwd().openFile(io, path_ret.path, .{}) catch |err| {
std.debug.print("{s}: {}\n", .{ path_ret.path, err });
return err;
};
defer fil.close(io);
if (!options.ignore_permissions) {
try fil.setPermissions(io, @enumFromInt(path_ret.inode.hdr.permissions));
try fil.setOwner(io, try id_table.get(io, path_ret.inode.hdr.uid_idx), try id_table.get(io, path_ret.inode.hdr.gid_idx));
}
if (xattr_table != null) {
const idx = path_ret.inode.xattrIndex() catch return;
const xattrs = try xattr_table.?.get(alloc, io, idx);
defer {
for (xattrs) |x|
x.deinit(alloc);
alloc.free(xattrs);
}
const sentinel_path = try std.mem.concatWithSentinel(alloc, u8, &[_][]const u8{path_ret.path}, 0);
defer alloc.free(sentinel_path);
for (xattrs) |x| {
const xattr_ret = std.os.linux.fsetxattr(fil.handle, x.key, x.value.ptr, x.value.len, 0);
if (xattr_ret != 0)
return ExtractError.CannotSetXattr;
}
}
}
};
fn DirCompare(_: void, a: PathRet, b: PathRet) std.math.Order {
return std.math.order(std.mem.count(u8, a.path, "/"), std.mem.count(u8, b.path, "/"));
@@ -239,12 +269,156 @@ pub fn extract(
filepath: []const u8,
options: ExtractionOptions,
) !void {
_ = self;
_ = alloc;
_ = io;
_ = fil;
_ = super;
_ = filepath;
_ = options;
return error.TODO;
const path = std.mem.trimEnd(u8, filepath, "/");
const decomp = try @import("decomp.zig").StatelessDecomp(super.compression);
var frag_mgr: FragManager = try .init(alloc, fil, decomp, super.frag_start, super.frag_count, super.block_size);
defer frag_mgr.deinit(io);
var arena: std.heap.ArenaAllocator = .init(alloc);
defer arena.deinit();
var sel_buf: [10]ExtractReturnUnion = undefined;
var sel: Io.Select(ExtractReturnUnion) = .init(io, &sel_buf);
defer sel.cancelDiscard();
sel.async(.path_ret, extractReal, .{ self, alloc, io, fil, super, decomp, &arena, &sel, &frag_mgr, path });
var id_table: CachedTable(u16) = .init(alloc, fil, decomp, super.id_start, super.id_count);
defer id_table.deinit(io);
var xattr_table: ?XattrTable = if (super.flags.xattr_never or options.ignore_xattr or !@hasField(std.os, "linux"))
null
else
try .init(alloc, fil, decomp, super.xattr_start);
defer if (xattr_table != null) xattr_table.?.deinit(io);
var dir_queue: std.PriorityDequeue(PathRet, void, DirCompare) = .empty;
defer dir_queue.deinit(alloc);
while (true) {
if (sel.group.token.load(.unordered) == null) break;
const ret = try sel.await();
const path_ret = try ret.path_ret;
if (options.ignore_permissions and xattr_table == null) continue;
if (path_ret.inode.hdr.inode_type == .dir or path_ret.inode.hdr.inode_type == .ext_dir) {
try dir_queue.push(alloc, path_ret);
continue;
}
try path_ret.setMetadata(alloc, io, &id_table, if (xattr_table == null) null else &xattr_table.?, options);
}
var iter = dir_queue.iterator();
while (iter.next()) |path_ret|
try path_ret.setMetadata(alloc, io, &id_table, if (xattr_table == null) null else &xattr_table.?, options);
}
pub fn extractReal(
self: Inode,
alloc: std.mem.Allocator,
io: Io,
fil: OffsetFile,
super: Archive.Superblock,
decomp: *const Decompressor,
inode_arena: *std.heap.ArenaAllocator,
sel: *Io.Select(ExtractReturnUnion),
frag_mgr: *FragManager,
path: []const u8,
) ExtractError!PathRet {
switch (self.hdr.inode_type) {
.dir, .ext_dir => {
try Io.Dir.cwd().createDir(io, path, @enumFromInt(0o777));
const entries = self.readDirectory(alloc, fil, decomp, super.dir_start) catch |err| switch (err) {
Error.NotDirectory, Error.NotExtended, Error.NotRegularFile, Error.NotSymlink => unreachable,
else => |e| return e,
};
defer {
for (entries) |e|
e.deinit(alloc);
alloc.free(entries);
}
const inode_alloc = inode_arena.allocator();
for (entries) |e| {
const new_path = try std.mem.concat(inode_alloc, u8, &[_][]const u8{ path, "/", e.name });
var rdr = fil.readerAt(super.inode_start + e.block_start);
var meta: MetadataReader = .init(alloc, &rdr, decomp);
try meta.interface.discardAll(e.block_offset);
const new_inode = try read(inode_alloc, &meta.interface, super.block_size);
sel.async(.path_ret, extractReal, .{ new_inode, alloc, io, fil, super, decomp, inode_arena, sel, frag_mgr, new_path });
}
},
.file, .ext_file => {
var atomic = try Io.Dir.cwd().createFileAtomic(io, path, .{ .make_path = true });
defer atomic.deinit(io);
var ext: DataExtractor = switch (self.data) {
.file => |f| blk: {
var ext: DataExtractor = .init(fil, decomp, super.block_size, f.size, f.block_start, f.block_sizes);
if (f.frag_idx != 0xFFFFFFFF)
ext.addFrag(f.frag_block_offset, try frag_mgr.get(io, f.frag_idx));
break :blk ext;
},
.ext_file => |f| blk: {
var ext: DataExtractor = .init(fil, decomp, super.block_size, f.size, f.block_start, f.block_sizes);
if (f.frag_idx != 0xFFFFFFFF)
ext.addFrag(f.frag_block_offset, try frag_mgr.get(io, f.frag_idx));
break :blk ext;
},
else => unreachable,
};
try ext.extractAsync(alloc, io, atomic.file);
try atomic.link(io);
},
.symlink, .ext_symlink => try Io.Dir.cwd().symLink(io, self.symlinkTarget() catch unreachable, path, .{}),
else => {
var mode: u32 = undefined;
var dev: u32 = 0;
const DT = std.posix.DT;
switch (self.data) {
.char_dev => |d| {
dev = d.dev;
mode = DT.CHR;
},
.ext_char_dev => |d| {
dev = d.dev;
mode = DT.CHR;
},
.block_dev => |d| {
dev = d.dev;
mode = DT.BLK;
},
.ext_block_dev => |d| {
dev = d.dev;
mode = DT.BLK;
},
.fifo, .ext_fifo => mode = DT.FIFO,
.socket, .ext_socket => mode = DT.SOCK,
else => unreachable,
}
const sentinel_path = try std.mem.concatWithSentinel(alloc, u8, &[_][]const u8{path}, 0);
const res = std.os.linux.mknod(sentinel_path, mode, dev);
alloc.free(sentinel_path);
if (res != 0)
return ExtractError.MknodFailed;
},
}
return .{
.path = path,
.inode = self,
};
}
+4 -4
View File
@@ -12,7 +12,7 @@ pub fn lookupValue(comptime T: anytype, alloc: std.mem.Allocator, decomp: *const
const block_offset = idx % T_PER_BLOCK;
const offset_pos = table_start + (8 * block);
const offset: u64 = std.mem.readInt(u64, file.map.memory[offset_pos .. offset_pos + 8], .little);
const offset: u64 = std.mem.readInt(u64, @ptrCast(file.map.memory[offset_pos .. offset_pos + 8]), .little);
var rdr = file.readerAt(offset);
var meta: MetadataReader = .init(alloc, &rdr, decomp);
@@ -72,7 +72,7 @@ pub fn CachedTable(comptime T: anytype) type {
for (0..num_blocks) |block| {
const offset_pos = self.table_start + (8 * block);
const offset: u64 = std.mem.readInt(u64, self.fil.map.memory[offset_pos .. offset_pos + 8], .little);
const offset: u64 = std.mem.readInt(u64, @ptrCast(self.fil.map.memory[offset_pos .. offset_pos + 8]), .little);
const len: u16 = if (self.total_num % T_PER_BLOCK != 0 and block == (self.total_num - 1) / T_PER_BLOCK)
@truncate(self.total_num % T_PER_BLOCK)
@@ -106,7 +106,7 @@ pub fn CachedTable(comptime T: anytype) type {
return self.table.get(block).?[block_offset];
const offset_pos = self.table_start + (8 * block);
const offset: u64 = std.mem.readInt(u64, self.fil.map.memory[offset_pos .. offset_pos + 8], .little);
const offset: u64 = std.mem.readInt(u64, @ptrCast(self.fil.map.memory[offset_pos .. offset_pos + 8]), .little);
const len: u16 = if (self.total_num % T_PER_BLOCK != 0 and block == (self.total_num - 1) / T_PER_BLOCK)
@truncate(self.total_num % T_PER_BLOCK)
@@ -114,7 +114,7 @@ pub fn CachedTable(comptime T: anytype) type {
T_PER_BLOCK;
var rdr = self.fil.readerAt(offset);
var meta: MetadataReader = .init(self.alloc, &rdr.interface, self.decomp);
var meta: MetadataReader = .init(self.alloc, &rdr, self.decomp);
const slice = try meta.interface.readSliceEndianAlloc(self.alloc, T, len, .little);
try self.table.put(@truncate(block), slice);
+59 -20
View File
@@ -51,27 +51,22 @@ fn numBlocks(self: DataExtractor) usize {
/// Starts extracting the data using the given group to spawn async tasks.
pub fn extractAsync(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File) Error!void {
var map = try fil.createMemoryMap(io, .{ .len = self.file_size, .protection = .{ .write = true } });
defer map.destroy(io);
var group: Io.Group = .init;
defer group.cancel(io);
var err: ?Error = null;
var read_offset: u64 = self.start;
for (0..self.blocks.len) |idx| {
group.async(io, blockThread, .{ self, alloc, fil, read_offset, idx, &err });
group.async(io, blockThread, .{ self, alloc, io, fil, read_offset, idx, &err });
read_offset += self.blocks[idx].size;
}
if (self.frag_block != null)
group.async(io, fragThread, .{ self, map });
group.async(io, fragThread, .{ self, io, fil, &err });
group.await(io) catch |cancel| return err orelse cancel;
try map.write(io);
}
fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, map: Io.File.MemoryMap, read_offset: u64, idx: usize, ret_err: *?Error) Io.Cancelable!void {
fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, io: Io, fil: Io.File, read_offset: u64, idx: usize, ret_err: *?Error) Io.Cancelable!void {
const block = self.blocks[idx];
const cur_block_size = if (idx == self.numBlocks() - 1)
@@ -81,26 +76,70 @@ fn blockThread(self: DataExtractor, alloc: std.mem.Allocator, map: Io.File.Memor
const write_offset = self.block_size * idx;
var wrt = fil.writer(io, &[0]u8{});
wrt.seekTo(write_offset) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
if (block.size == 0) {
@memset(map.memory[write_offset .. write_offset + cur_block_size], 0);
return;
}
if (block.uncompressed) {
@memcpy(map.memory[write_offset .. write_offset + cur_block_size], self.fil.map.memory[read_offset .. read_offset + cur_block_size]);
} else {
@branchHint(.likely);
_ = self.decomp.Decompress(alloc, self.fil.map.memory[read_offset .. read_offset + block.size], map.memory[write_offset .. write_offset + cur_block_size]) catch |err| {
wrt.interface.splatByteAll(0, cur_block_size) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
} else {
if (block.uncompressed) {
wrt.interface.writeAll(self.fil.map.memory[read_offset..][0..cur_block_size]) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
} else {
@branchHint(.likely);
var tmp: [1024 * 1024]u8 = undefined;
_ = self.decomp.Decompress(alloc, self.fil.map.memory[read_offset..][0..block.size], tmp[0..cur_block_size]) catch |err| {
ret_err.* = err;
return Io.Cancelable.Canceled;
};
wrt.interface.writeAll(tmp[0..cur_block_size]) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
}
}
wrt.flush() catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
}
fn fragThread(self: DataExtractor, map: Io.File.MemoryMap) Io.Cancelable!void {
fn fragThread(self: DataExtractor, io: Io, fil: Io.File, ret_err: *?Error) Io.Cancelable!void {
const cur_block_size = self.file_size % self.block_size;
const write_offset = self.blocks.len * self.block_size;
@memcpy(map.memory[write_offset .. write_offset + cur_block_size], self.frag_block.?[self.frag_offset .. self.frag_offset + cur_block_size]);
var wrt = fil.writer(io, &[0]u8{});
wrt.seekTo(write_offset) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
wrt.interface.writeAll(self.frag_block.?[self.frag_offset..][0..cur_block_size]) catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
wrt.flush() catch |err| {
ret_err.* = err;
if (err == error.Canceled) io.recancel();
return Io.Cancelable.Canceled;
};
}
+19 -6
View File
@@ -18,11 +18,11 @@ kv_start: u64,
table: LookupTable.CachedTable(TableValue),
value_cache: std.AutoHashMap(InodeRef, []const u8),
value_mut: Io.RWLock = .init,
value_mut: Io.RwLock = .init,
pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompressor, xattr_start: u64) !XattrCachedTable {
const start: u64 = std.mem.readInt(u64, fil.map.memory[xattr_start .. xattr_start + 8], .little);
const num: u64 = std.mem.readInt(u64, fil.map.memory[xattr_start + 8 .. xattr_start + 16], .little);
const start: u64 = std.mem.readInt(u64, @ptrCast(fil.map.memory[xattr_start .. xattr_start + 8]), .little);
const num: u64 = std.mem.readInt(u64, @ptrCast(fil.map.memory[xattr_start + 8 .. xattr_start + 16]), .little);
return .{
.alloc = alloc,
@@ -37,6 +37,7 @@ pub fn init(alloc: std.mem.Allocator, fil: OffsetFile, decomp: *const Decompress
};
}
pub fn deinit(self: *XattrCachedTable, io: Io) void {
self.value_mut.lockUncancelable(io);
self.table.deinit(io);
self.value_cache.deinit();
}
@@ -45,7 +46,7 @@ pub fn get(self: *XattrCachedTable, alloc: std.mem.Allocator, io: Io, idx: u32)
const lookup = try self.table.get(io, idx);
var rdr = self.fil.readerAt(self.kv_start + lookup.ref.block_start);
var meta: MetadataReader = .init(alloc, &rdr.interface, self.decomp);
var meta: MetadataReader = .init(alloc, &rdr, self.decomp);
try meta.interface.discardAll(lookup.ref.block_offset);
const out = try alloc.alloc(XattrSemiOwned, lookup.count);
@@ -95,8 +96,20 @@ pub fn get(self: *XattrCachedTable, alloc: std.mem.Allocator, io: Io, idx: u32)
}
const val_ref: InodeRef = .{ .block_start = meta.cur_block_start, .block_offset = @truncate(meta.interface.seek) };
{
try self.value_mut.lockShared(io);
defer self.value_mut.unlockShared(io);
if (self.value_cache.contains(val_ref)) {
out[i] = .{
.key = key,
.value = try self.valueAt(io, val_ref),
};
continue;
}
}
try self.value_mut.lock(io);
defer self.value_mut.unlock(io);
if (self.value_cache.contains(val_ref)) {
out[i] = .{
.key = key,
@@ -128,7 +141,7 @@ fn valueAt(self: *XattrCachedTable, io: Io, ref: InodeRef) ![]const u8 {
if (self.value_cache.contains(ref)) return self.value_cache.get(ref).?;
var rdr = self.fil.readerAt(self.kv_start + ref.block_start);
var meta: MetadataReader = .init(self.alloc, &rdr.interface, self.decomp);
var meta: MetadataReader = .init(self.alloc, &rdr, self.decomp);
try meta.interface.discardAll(ref.block_offset);
var val_size: u32 = undefined;
@@ -200,7 +213,7 @@ const XattrPrefix = packed struct(u16) {
// Stateless
pub fn statelessLookup(alloc: std.mem.Allocator, io: Io, decomp: *const Decompressor, fil: OffsetFile, table_start: u64, idx: u16) ![]XattrOwned {
const kv_start: u64 = std.mem.readInt(u64, fil.map.memory[table_start .. table_start + 8], .little);
const kv_start: u64 = std.mem.readInt(u64, @ptrCast(fil.map.memory[table_start .. table_start + 8]), .little);
const lookup = try LookupTable.lookupValue(TableValue, alloc, io, decomp, fil, table_start + 16, idx);