Work on extraction

Created DataExtractor & DataReader
Created Lookup tables
This commit is contained in:
Caleb Gardner
2026-05-30 06:22:26 -05:00
parent 56ad79ba94
commit 578911ba67
5 changed files with 569 additions and 19 deletions
+102
View File
@@ -0,0 +1,102 @@
const std = @import("std");
const Io = std.Io;
const DecompCache = @import("../decomp_cache.zig");
const DataBlock = @import("../inode.zig").DataBlock;
const Extractor = @This();
cache: *DecompCache,
block_size: u32,
start: u64,
size: u64,
blocks: []DataBlock,
frag_data: ?[]u8 = null,
frag_offset: u32 = 0,
pub fn init(cache: *DecompCache, block_size: u32, size: u64, start: u64, blocks: []DataBlock) Extractor {
return .{
.cache = cache,
.block_size = block_size,
.start = start,
.size = size,
.blocks = blocks,
};
}
pub fn addFragment(self: *Extractor, data: []u8, offset: u32) void {
self.frag_data = data;
self.frag_offset = offset;
}
pub fn asyncExtract(self: Extractor, io: Io, fil: Io.File) Error!void {
try fil.writePositionalAll(io, &.{&.{0}}, self.size - 1);
var map = try fil.createMemoryMap(io, .{ .len = self.size, .protection = .{ .write = true } });
defer map.destroy(io);
var group: Io.Group = .init;
defer group.cancel(io);
var ret_err: ?Error = null;
var offset = self.start;
for (0..self.blocks.len) |i| {
group.async(io, blockThread, .{ self, io, map, offset, i, &ret_err });
offset += self.blocks[i].size;
}
if (self.frag_data != null)
group.async(io, fragThread, .{ self, map });
group.await(io) catch |err| return ret_err orelse err;
try map.write(io);
}
fn blockThread(self: Extractor, io: Io, map: Io.File.MemoryMap, read_offset: u64, idx: usize, ret_err: *?Error) error{Canceled}!void {
const write_pos = idx * self.block_size;
const size = if (self.frag_data == null and idx == self.block_size.len - 1)
self.size % self.block_size
else
self.block_size;
const block = self.blocks[idx];
if (block.size == 0) {
@memset(map.memory[write_pos..][0..size], 0);
return;
}
if (block.uncompressed) {
@memcpy(map[write_pos..][0..size], self.cache.map.memory[read_offset..][0..size]);
return;
}
const data = self.cache.get(io, read_offset, block.size, size) catch |err| switch (err) {
error.Canceled => {
io.recancel();
return error.Canceled;
},
else => |e| {
ret_err.* = e;
return error.Canceled;
},
};
defer self.cache.finished(io, read_offset);
if (data.len != size) {
std.debug.print("Size of decompression at {} is {} and should be {}\n", .{ read_offset, data.len, size });
return Error.BadDecompressionSize;
}
@memcpy(map[write_pos..][0..size], data);
}
fn fragThread(self: Extractor, map: Io.File.MemoryMap) error{Canceled}!void {
const write_pos = self.blocks.len * self.block_size;
const size = self.size % self.block_size;
@memcpy(map.memory[write_pos..][0..size], self.frag_data.?[self.frag_offset..][0..size]);
}
// Types
pub const Error = error{BadDecompressionSize} || Io.File.WritePositionalError || Io.File.MemoryMap.CreateError;
+151
View File
@@ -0,0 +1,151 @@
const std = @import("std");
const Io = std.Io;
const DecompCache = @import("../decomp_cache.zig");
const DataBlock = @import("../inode.zig").DataBlock;
const Reader = @This();
io: Io,
cache: *DecompCache,
block_size: u32,
size: u64,
blocks: []DataBlock,
frag_data: ?[]u8 = null,
frag_offset: u32 = 0,
cur_offset: u64 = 0,
next_offset: u64,
idx: u32 = 0,
cur_block_sparse: bool = false,
interface: Io.Reader = .{
.buffer = &[0]u8{},
.end = 0,
.seek = 0,
.vtable = &.{
.stream = stream,
.discard = discard,
.readVec = readVec,
},
},
pub fn init(io: Io, cache: *DecompCache, block_size: u32, size: u64, start: u64, blocks: []DataBlock) Reader {
return .{
.io = io,
.cache = cache,
.block_size = block_size,
.size = size,
.blocks = blocks,
.next_offset = start,
};
}
pub fn deinit(self: Reader) void {
self.cache.finished(self.io);
}
pub fn addFragment(self: *Reader, data: []u8, offset: u32) void {
self.frag_data = data;
self.frag_offset = offset;
}
fn advance(self: *Reader) Io.Reader.Error!void {
errdefer self.interface.end = 0;
self.interface.seek = 0;
if (self.idx > self.blocks.len) return error.EndOfStream;
defer self.idx += 1;
self.cache.finished(self.io, self.cur_offset);
if (self.idx == self.blocks.len) {
if (self.frag_data == null) return error.EndOfStream;
self.cur_offset = 0;
const size = self.size % self.block_size;
self.interface.buffer = self.frag_data.?[self.frag_offset..][0..size];
self.interface.end = size;
return;
}
const block = self.blocks[self.idx];
const size = if (self.idx == self.blocks.len - 1 and self.frag_data == null)
self.size % self.block_size
else
self.block_size;
if (block.size == 0) {
self.interface.buffer = &[0]u8{};
self.cur_block_sparse = true;
self.interface.end = size;
return;
} else {
self.cur_block_sparse = false;
}
self.cur_offset = self.next_offset;
self.next_offset = self.cur_offset + block.size;
if (block.uncompressed) {
self.interface.buffer = self.cache.map.memory[self.cur_offset..][0..size];
self.interface.end = size;
return;
}
const data = self.cache.get(self.io, self.cur_offset, block.size, size);
if (data.len != size) {
std.debug.print("Size of decompression at {} is {} and should be {}\n", .{ self.cur_offset, data.len, size });
return Io.Reader.Error.ReadFailed;
}
self.interface.buffer = data;
self.interface.end = size;
}
fn stream(r: *Io.Reader, w: *Io.Writer, limit: Io.Limit) Io.Reader.StreamError!usize {
const self: *Reader = @fieldParentPtr("interface", r);
if (r.seek >= r.end) {
try self.advance();
}
const to_write = @min(@intFromEnum(limit), r.end - r.seek);
const wrote = try if (self.cur_block_sparse)
w.splatByte(0, to_write)
else
w.write(r.buffer[r.seek..][0..to_write]);
r.seek += wrote;
return wrote;
}
fn discard(r: *Io.Reader, limit: Io.Limit) Io.Reader.Error!usize {
if (r.seek >= r.end) {
const self: *Reader = @fieldParentPtr("interface", r);
try self.advance();
}
const to_discard = @min(@intFromEnum(limit), r.end - r.seek);
r.seek += to_discard;
return to_discard;
}
fn readVec(r: *Io.Reader, vec: [][]u8) Io.Reader.Error!usize {
const self: *Reader = @fieldParentPtr("interface", r);
if (r.seek >= r.end) {
try self.advance();
}
var total: usize = 0;
for (vec) |v| {
const to_copy = @min(v.len, r.end - r.seek);
if (self.cur_block_sparse) {
@memset(v[0..to_copy], 0);
} else {
@memcpy(v[0..to_copy], r.buffer[r.seek..][0..to_copy]);
}
total += to_copy;
r.seek += to_copy;
if (r.seek >= r.end) break;
}
return total;
}
+3 -1
View File
@@ -8,7 +8,7 @@ const Directory = @This();
entries: []Entry,
pub fn init(alloc: std.mem.Allocator, rdr: *Reader, size: u32) !Directory {
pub fn init(alloc: std.mem.Allocator, rdr: *Reader, size: u32) Error!Directory {
if (size <= 3) return .{ .entries = &[0]Entry{} };
var entries: std.ArrayList(Entry) = try .initCapacity(alloc, 50);
@@ -56,6 +56,8 @@ pub fn deinit(self: Directory, alloc: std.mem.Allocator) void {
// Types
pub const Error = Reader.Error || std.mem.Allocator.Error;
pub const Entry = struct {
inode_num: u32,
block_start: u32,
+209 -18
View File
@@ -1,51 +1,242 @@
const std = @import("std");
const Io = std.Io;
const Atomic = std.atomic.Value;
const DecompCache = @import("decomp_cache.zig");
const ExtractionOptions = @import("options.zig");
const Inode = @import("inode.zig");
const Superblock = @import("archive.zig").Superblock;
const Directory = @import("directory.zig");
const DataExtractor = @import("data/extractor.zig");
const DataReader = @import("data/reader.zig");
pub fn extract(alloc: std.mem.Allocator, io: Io, inode: Inode, cache: *DecompCache, super: Superblock, ext_loc: []const u8, options: ExtractionOptions) !void {
_ = alloc;
_ = io;
_ = inode;
_ = cache;
_ = super;
_ = ext_loc;
_ = options;
return error.TODO;
const path = std.mem.trim(u8, ext_loc, "/");
var buf: [50]ReturnUnion = undefined;
var sel: Io.Select(ReturnUnion) = .init(io, &buf);
defer sel.cancelDiscard();
var ret_loop = io.async(returnLoop, .{ alloc, &sel, options });
try extractReal(alloc, io, cache, super, &sel, path, inode, null, false);
ret_loop.await(io) catch |err| {
// TODO: Drain sel
return err;
};
}
pub fn extractDir(alloc: std.mem.Allocator, io: Io, path: []const u8, d: anytype) Error!PathReturn {}
pub fn extractFile(alloc: std.mem.Allocator, io: Io, path: []const u8, d: anytype) Error!PathReturn {
fn extractReal(
alloc: std.mem.Allocator,
io: Io,
cache: *DecompCache,
super: Superblock,
sel: *Io.Select(ReturnUnion),
path: []const u8,
inode: Inode,
parent: ?*Atomic(usize),
origin: bool,
) Error!void {
try io.checkCancel();
switch (inode.data) {
.dir, .ext_dir => sel.async(
.dir_ret,
extractDir,
.{ alloc, io, cache, super, sel, path, inode, parent, origin },
),
else => return error.Canceled,
}
}
fn extractDir(
alloc: std.mem.Allocator,
io: Io,
cache: *DecompCache,
super: Superblock,
sel: *Io.Select(ReturnUnion),
path: []const u8,
inode: Inode,
parent: ?*Atomic(usize),
origin: bool,
) Error!DirReturn {
defer {
if (parent != null)
_ = parent.?.fetchSub(1, .acquire);
if (!origin) inode.deinit(alloc);
}
errdefer if (!origin) alloc.free(path);
const dir = inode.directory(alloc, io, cache, super.dir_start) catch |err| switch (err) {
error.NotDirectory => unreachable,
else => |e| return e,
};
defer dir.deinit(alloc);
const sub_files = try alloc.create(Atomic(usize));
sub_files.* = .init(dir.entries.len);
const ret: DirReturn = .{
.path = path,
.sub_files = sub_files,
.origin = origin,
.uid_idx = inode.hdr.uid_idx,
.gid_idx = inode.hdr.gid_idx,
.mod_time = inode.hdr.mod_time,
.permissions = inode.hdr.permission,
.xattr_idx = switch (inode.data) {
.ext_dir => |d| if (d.xattr_idx != 0xFFFFFFFF) d.xattr_idx else null,
else => null,
},
};
for (dir.entries) |entry| {
const new_inode: Inode = try .initDirEntry(alloc, io, cache, super.inode_start, super.block_size, entry);
errdefer new_inode.deinit(alloc);
const new_path = try std.mem.concat(alloc, u8, &.{ path, "/", entry.name });
try extractReal(
alloc,
io,
cache,
super,
sel,
new_path,
new_inode,
sub_files,
false,
);
}
return ret;
}
fn extractFile(
alloc: std.mem.Allocator,
io: Io,
cache: *DecompCache,
block_size: u32,
path: []const u8,
inode: Inode,
parent: ?*Atomic(usize),
origin: bool,
) Error!FileReturn {
defer {
if (parent != null)
_ = parent.?.fetchSub(1, .acquire);
if (!origin) inode.deinit(alloc);
}
errdefer if (!origin) alloc.free(path);
const atomic = try Io.Dir.cwd().createFileAtomic(io, path, .{});
defer atomic.deinit(io);
// TODO
var ret: FileReturn = .{
.path = path,
.origin = origin,
.uid_idx = inode.hdr.uid_idx,
.gid_idx = inode.hdr.gid_idx,
.permissions = inode.hdr.permission,
.mod_time = inode.hdr.mod_time,
};
var data: DataExtractor = switch (inode.data) {
.file => |f| blk: {
var data: DataExtractor = .init(cache, block_size, f.size, f.data_start, f.blocks);
if (f.frag_idx != 0xFFFFFFFF) {
// TODO
}
break :blk data;
},
.ext_file => |f| blk: {
var data: DataExtractor = .init(cache, block_size, f.size, f.data_start, f.blocks);
if (f.frag_idx != 0xFFFFFFFF) {
//TODO
}
break :blk data;
},
else => unreachable,
};
try atomic.link(io);
// return .{
// .path = path,
// };
return error.TODO;
return error.Canceled;
}
// Loop
fn returnLoop(alloc: std.mem.Allocator, sel: *Io.Select(ReturnUnion), options: ExtractionOptions) !void {
while (true) {
const finished = try sel.await();
switch (finished) {
.dir_ret => |d| {
const ret = try d;
if (ret.sub_files.load(.unordered) != 0) {
sel.queue.putOne(sel.io, .{ .dir_ret = ret }) catch |err| {
if (!ret.origin) alloc.free(ret.path);
return err;
};
continue;
}
if (!ret.origin) alloc.free(ret.path);
alloc.destroy(ret.sub_files);
if (!options.ignore_permissions and !options.ignore_xattr) {
// TODO: set permissions & xattr.
}
},
.file_ret => |f| {
const ret = try f;
if (!ret.origin) alloc.free(ret.path);
if (!options.ignore_permissions and !options.ignore_xattr) {
// TODO: set permissions & xattr.
}
},
.void_ret => |v| try v,
}
if (sel.group.token.load(.unordered) == null) break;
}
}
// Utility types
const ReturnUnion = union {
path_ret: Error!PathReturn,
const ReturnUnion = union(enum) {
file_ret: Error!FileReturn,
dir_ret: Error!DirReturn,
void_ret: Error!void,
};
const Error = error{};
const Error = error{Canceled} || Directory.Error;
const PathReturn = struct {
const FileReturn = struct {
path: []const u8,
origin: bool,
uid_idx: u32,
gid_idx: u32,
mod_time: u32,
permission: u16,
permissions: u16,
xattr_idx: ?u32,
xattr_idx: ?u32 = null,
};
const DirReturn = struct {
path: []const u8,
sub_files: *Atomic(usize),
origin: bool,
uid_idx: u32,
gid_idx: u32,
mod_time: u32,
permissions: u16,
xattr_idx: ?u32 = null,
};
+104
View File
@@ -0,0 +1,104 @@
const std = @import("std");
const Io = std.Io;
const DecompCache = @import("decomp_cache.zig");
const MetadataReader = @import("meta_rdr.zig");
pub fn stateless(comptime T: anytype, io: Io, cache: *DecompCache, table_start: u64, idx: u32) !T {
const PER_BLOCK = 8192 / @sizeOf(T);
const block = idx / PER_BLOCK;
const block_idx = idx % PER_BLOCK;
const offset_offset = table_start + (block * 8);
const offset: u64 = std.mem.readInt(u64, cache.map.memory[offset_offset..][0..2], .little);
var meta: MetadataReader = .init(io, cache, offset);
defer meta.deinit(io);
try meta.discardAll(block_idx * @sizeOf(T));
var new: T = undefined;
try meta.interface.readSliceEndian(T, @ptrCast(&new), .little);
return new;
}
pub fn Table(comptime T: anytype) type {
return struct {
const PER_BLOCK = 8192 / @sizeOf(T);
const Table = @This();
alloc: std.mem.Allocator,
cache: *DecompCache,
table_start: u64,
num: u32,
values: std.AutoHashMap(u32, []T),
mut: Io.RwLock,
pub fn init(alloc: std.mem.Allocator, cache: *DecompCache, table_start: u64, num_values: u32) Table {
return .{
.alloc = alloc,
.cache = cache,
.table_start = table_start,
.num = num_values,
.values = .init(alloc),
};
}
pub fn deinit(self: *Table) void {
var iter = self.values.valueIterator();
while (iter.next()) |v|
self.alloc.free(v);
self.values.deinit();
}
pub fn get(self: *Table, io: Io, idx: u32) Error!T {
const block = idx / PER_BLOCK;
const block_idx = idx % PER_BLOCK;
{
try self.mut.lockShared(io);
defer self.mut.unlockShared(io);
const val = self.values.get(block);
if (val != null) return val.*[block_idx];
}
try self.mut.lock(io);
defer self.mut.unlock(io);
const val = try self.values.getOrPut(block);
if (val.found_existing)
return val.value_ptr.*[block_idx];
errdefer self.values.removeByPtr(val.key_ptr);
const offset_offset = self.table_start + (block * 8);
const offset: u64 = std.mem.readInt(u64, self.cache.map.memory[offset_offset..][0..2], .little);
var meta: MetadataReader = .init(io, self.cache, offset);
defer meta.deinit(io);
const size = if (block == ((self.num - 1) / PER_BLOCK))
self.num % PER_BLOCK
else
PER_BLOCK;
const new_block = try self.alloc.alloc(T, size);
errdefer self.alloc.free(new_block);
try meta.interface.readSliceEndian(T, new_block, .little);
val.value_ptr.* = new_block;
return new_block[block_idx];
}
};
}
// Types
pub const Error = error{} || std.mem.Allocator.Error;
pub const FragmentEntry = extern struct {};
pub const XattrEntry = extern struct {};