diff --git a/src/data/extractor.zig b/src/data/extractor.zig new file mode 100644 index 0000000..1f7d1d2 --- /dev/null +++ b/src/data/extractor.zig @@ -0,0 +1,102 @@ +const std = @import("std"); +const Io = std.Io; + +const DecompCache = @import("../decomp_cache.zig"); +const DataBlock = @import("../inode.zig").DataBlock; + +const Extractor = @This(); + +cache: *DecompCache, +block_size: u32, + +start: u64, +size: u64, +blocks: []DataBlock, + +frag_data: ?[]u8 = null, +frag_offset: u32 = 0, + +pub fn init(cache: *DecompCache, block_size: u32, size: u64, start: u64, blocks: []DataBlock) Extractor { + return .{ + .cache = cache, + .block_size = block_size, + + .start = start, + .size = size, + .blocks = blocks, + }; +} + +pub fn addFragment(self: *Extractor, data: []u8, offset: u32) void { + self.frag_data = data; + self.frag_offset = offset; +} + +pub fn asyncExtract(self: Extractor, io: Io, fil: Io.File) Error!void { + try fil.writePositionalAll(io, &.{&.{0}}, self.size - 1); + + var map = try fil.createMemoryMap(io, .{ .len = self.size, .protection = .{ .write = true } }); + defer map.destroy(io); + + var group: Io.Group = .init; + defer group.cancel(io); + + var ret_err: ?Error = null; + + var offset = self.start; + for (0..self.blocks.len) |i| { + group.async(io, blockThread, .{ self, io, map, offset, i, &ret_err }); + + offset += self.blocks[i].size; + } + if (self.frag_data != null) + group.async(io, fragThread, .{ self, map }); + + group.await(io) catch |err| return ret_err orelse err; + + try map.write(io); +} + +fn blockThread(self: Extractor, io: Io, map: Io.File.MemoryMap, read_offset: u64, idx: usize, ret_err: *?Error) error{Canceled}!void { + const write_pos = idx * self.block_size; + const size = if (self.frag_data == null and idx == self.block_size.len - 1) + self.size % self.block_size + else + self.block_size; + const block = self.blocks[idx]; + + if (block.size == 0) { + @memset(map.memory[write_pos..][0..size], 0); + return; + } + if (block.uncompressed) { + @memcpy(map[write_pos..][0..size], self.cache.map.memory[read_offset..][0..size]); + return; + } + const data = self.cache.get(io, read_offset, block.size, size) catch |err| switch (err) { + error.Canceled => { + io.recancel(); + return error.Canceled; + }, + else => |e| { + ret_err.* = e; + return error.Canceled; + }, + }; + defer self.cache.finished(io, read_offset); + if (data.len != size) { + std.debug.print("Size of decompression at {} is {} and should be {}\n", .{ read_offset, data.len, size }); + return Error.BadDecompressionSize; + } + @memcpy(map[write_pos..][0..size], data); +} +fn fragThread(self: Extractor, map: Io.File.MemoryMap) error{Canceled}!void { + const write_pos = self.blocks.len * self.block_size; + const size = self.size % self.block_size; + + @memcpy(map.memory[write_pos..][0..size], self.frag_data.?[self.frag_offset..][0..size]); +} + +// Types + +pub const Error = error{BadDecompressionSize} || Io.File.WritePositionalError || Io.File.MemoryMap.CreateError; diff --git a/src/data/reader.zig b/src/data/reader.zig new file mode 100644 index 0000000..78d064a --- /dev/null +++ b/src/data/reader.zig @@ -0,0 +1,151 @@ +const std = @import("std"); +const Io = std.Io; + +const DecompCache = @import("../decomp_cache.zig"); +const DataBlock = @import("../inode.zig").DataBlock; + +const Reader = @This(); + +io: Io, + +cache: *DecompCache, +block_size: u32, + +size: u64, +blocks: []DataBlock, + +frag_data: ?[]u8 = null, +frag_offset: u32 = 0, + +cur_offset: u64 = 0, +next_offset: u64, + +idx: u32 = 0, +cur_block_sparse: bool = false, + +interface: Io.Reader = .{ + .buffer = &[0]u8{}, + .end = 0, + .seek = 0, + .vtable = &.{ + .stream = stream, + .discard = discard, + .readVec = readVec, + }, +}, + +pub fn init(io: Io, cache: *DecompCache, block_size: u32, size: u64, start: u64, blocks: []DataBlock) Reader { + return .{ + .io = io, + + .cache = cache, + .block_size = block_size, + + .size = size, + .blocks = blocks, + + .next_offset = start, + }; +} +pub fn deinit(self: Reader) void { + self.cache.finished(self.io); +} + +pub fn addFragment(self: *Reader, data: []u8, offset: u32) void { + self.frag_data = data; + self.frag_offset = offset; +} + +fn advance(self: *Reader) Io.Reader.Error!void { + errdefer self.interface.end = 0; + self.interface.seek = 0; + + if (self.idx > self.blocks.len) return error.EndOfStream; + defer self.idx += 1; + self.cache.finished(self.io, self.cur_offset); + + if (self.idx == self.blocks.len) { + if (self.frag_data == null) return error.EndOfStream; + self.cur_offset = 0; + + const size = self.size % self.block_size; + self.interface.buffer = self.frag_data.?[self.frag_offset..][0..size]; + self.interface.end = size; + return; + } + + const block = self.blocks[self.idx]; + + const size = if (self.idx == self.blocks.len - 1 and self.frag_data == null) + self.size % self.block_size + else + self.block_size; + + if (block.size == 0) { + self.interface.buffer = &[0]u8{}; + self.cur_block_sparse = true; + self.interface.end = size; + return; + } else { + self.cur_block_sparse = false; + } + + self.cur_offset = self.next_offset; + self.next_offset = self.cur_offset + block.size; + + if (block.uncompressed) { + self.interface.buffer = self.cache.map.memory[self.cur_offset..][0..size]; + self.interface.end = size; + return; + } + const data = self.cache.get(self.io, self.cur_offset, block.size, size); + if (data.len != size) { + std.debug.print("Size of decompression at {} is {} and should be {}\n", .{ self.cur_offset, data.len, size }); + return Io.Reader.Error.ReadFailed; + } + self.interface.buffer = data; + self.interface.end = size; +} + +fn stream(r: *Io.Reader, w: *Io.Writer, limit: Io.Limit) Io.Reader.StreamError!usize { + const self: *Reader = @fieldParentPtr("interface", r); + if (r.seek >= r.end) { + try self.advance(); + } + const to_write = @min(@intFromEnum(limit), r.end - r.seek); + const wrote = try if (self.cur_block_sparse) + w.splatByte(0, to_write) + else + w.write(r.buffer[r.seek..][0..to_write]); + r.seek += wrote; + return wrote; +} +fn discard(r: *Io.Reader, limit: Io.Limit) Io.Reader.Error!usize { + if (r.seek >= r.end) { + const self: *Reader = @fieldParentPtr("interface", r); + try self.advance(); + } + const to_discard = @min(@intFromEnum(limit), r.end - r.seek); + r.seek += to_discard; + return to_discard; +} +fn readVec(r: *Io.Reader, vec: [][]u8) Io.Reader.Error!usize { + const self: *Reader = @fieldParentPtr("interface", r); + if (r.seek >= r.end) { + try self.advance(); + } + var total: usize = 0; + for (vec) |v| { + const to_copy = @min(v.len, r.end - r.seek); + if (self.cur_block_sparse) { + @memset(v[0..to_copy], 0); + } else { + @memcpy(v[0..to_copy], r.buffer[r.seek..][0..to_copy]); + } + total += to_copy; + r.seek += to_copy; + + if (r.seek >= r.end) break; + } + return total; +} diff --git a/src/directory.zig b/src/directory.zig index 7eee8f8..93fa5b4 100644 --- a/src/directory.zig +++ b/src/directory.zig @@ -8,7 +8,7 @@ const Directory = @This(); entries: []Entry, -pub fn init(alloc: std.mem.Allocator, rdr: *Reader, size: u32) !Directory { +pub fn init(alloc: std.mem.Allocator, rdr: *Reader, size: u32) Error!Directory { if (size <= 3) return .{ .entries = &[0]Entry{} }; var entries: std.ArrayList(Entry) = try .initCapacity(alloc, 50); @@ -56,6 +56,8 @@ pub fn deinit(self: Directory, alloc: std.mem.Allocator) void { // Types +pub const Error = Reader.Error || std.mem.Allocator.Error; + pub const Entry = struct { inode_num: u32, block_start: u32, diff --git a/src/extract.zig b/src/extract.zig index 914cab2..6d571f7 100644 --- a/src/extract.zig +++ b/src/extract.zig @@ -1,51 +1,242 @@ const std = @import("std"); const Io = std.Io; +const Atomic = std.atomic.Value; + const DecompCache = @import("decomp_cache.zig"); const ExtractionOptions = @import("options.zig"); const Inode = @import("inode.zig"); const Superblock = @import("archive.zig").Superblock; +const Directory = @import("directory.zig"); +const DataExtractor = @import("data/extractor.zig"); +const DataReader = @import("data/reader.zig"); pub fn extract(alloc: std.mem.Allocator, io: Io, inode: Inode, cache: *DecompCache, super: Superblock, ext_loc: []const u8, options: ExtractionOptions) !void { - _ = alloc; - _ = io; - _ = inode; - _ = cache; - _ = super; - _ = ext_loc; - _ = options; - return error.TODO; + const path = std.mem.trim(u8, ext_loc, "/"); + + var buf: [50]ReturnUnion = undefined; + var sel: Io.Select(ReturnUnion) = .init(io, &buf); + defer sel.cancelDiscard(); + + var ret_loop = io.async(returnLoop, .{ alloc, &sel, options }); + + try extractReal(alloc, io, cache, super, &sel, path, inode, null, false); + + ret_loop.await(io) catch |err| { + // TODO: Drain sel + return err; + }; } -pub fn extractDir(alloc: std.mem.Allocator, io: Io, path: []const u8, d: anytype) Error!PathReturn {} -pub fn extractFile(alloc: std.mem.Allocator, io: Io, path: []const u8, d: anytype) Error!PathReturn { +fn extractReal( + alloc: std.mem.Allocator, + io: Io, + cache: *DecompCache, + super: Superblock, + sel: *Io.Select(ReturnUnion), + path: []const u8, + inode: Inode, + parent: ?*Atomic(usize), + origin: bool, +) Error!void { + try io.checkCancel(); + + switch (inode.data) { + .dir, .ext_dir => sel.async( + .dir_ret, + extractDir, + .{ alloc, io, cache, super, sel, path, inode, parent, origin }, + ), + else => return error.Canceled, + } +} + +fn extractDir( + alloc: std.mem.Allocator, + io: Io, + cache: *DecompCache, + super: Superblock, + sel: *Io.Select(ReturnUnion), + path: []const u8, + inode: Inode, + parent: ?*Atomic(usize), + origin: bool, +) Error!DirReturn { + defer { + if (parent != null) + _ = parent.?.fetchSub(1, .acquire); + if (!origin) inode.deinit(alloc); + } + errdefer if (!origin) alloc.free(path); + + const dir = inode.directory(alloc, io, cache, super.dir_start) catch |err| switch (err) { + error.NotDirectory => unreachable, + else => |e| return e, + }; + defer dir.deinit(alloc); + + const sub_files = try alloc.create(Atomic(usize)); + sub_files.* = .init(dir.entries.len); + + const ret: DirReturn = .{ + .path = path, + .sub_files = sub_files, + .origin = origin, + + .uid_idx = inode.hdr.uid_idx, + .gid_idx = inode.hdr.gid_idx, + .mod_time = inode.hdr.mod_time, + .permissions = inode.hdr.permission, + + .xattr_idx = switch (inode.data) { + .ext_dir => |d| if (d.xattr_idx != 0xFFFFFFFF) d.xattr_idx else null, + else => null, + }, + }; + + for (dir.entries) |entry| { + const new_inode: Inode = try .initDirEntry(alloc, io, cache, super.inode_start, super.block_size, entry); + errdefer new_inode.deinit(alloc); + + const new_path = try std.mem.concat(alloc, u8, &.{ path, "/", entry.name }); + + try extractReal( + alloc, + io, + cache, + super, + sel, + new_path, + new_inode, + sub_files, + false, + ); + } + return ret; +} +fn extractFile( + alloc: std.mem.Allocator, + io: Io, + cache: *DecompCache, + block_size: u32, + path: []const u8, + inode: Inode, + parent: ?*Atomic(usize), + origin: bool, +) Error!FileReturn { + defer { + if (parent != null) + _ = parent.?.fetchSub(1, .acquire); + if (!origin) inode.deinit(alloc); + } + errdefer if (!origin) alloc.free(path); + const atomic = try Io.Dir.cwd().createFileAtomic(io, path, .{}); defer atomic.deinit(io); - // TODO + var ret: FileReturn = .{ + .path = path, + .origin = origin, + + .uid_idx = inode.hdr.uid_idx, + .gid_idx = inode.hdr.gid_idx, + .permissions = inode.hdr.permission, + .mod_time = inode.hdr.mod_time, + }; + + var data: DataExtractor = switch (inode.data) { + .file => |f| blk: { + var data: DataExtractor = .init(cache, block_size, f.size, f.data_start, f.blocks); + if (f.frag_idx != 0xFFFFFFFF) { + // TODO + } + break :blk data; + }, + .ext_file => |f| blk: { + var data: DataExtractor = .init(cache, block_size, f.size, f.data_start, f.blocks); + if (f.frag_idx != 0xFFFFFFFF) { + //TODO + } + break :blk data; + }, + else => unreachable, + }; try atomic.link(io); // return .{ // .path = path, // }; - return error.TODO; + return error.Canceled; +} + +// Loop + +fn returnLoop(alloc: std.mem.Allocator, sel: *Io.Select(ReturnUnion), options: ExtractionOptions) !void { + while (true) { + const finished = try sel.await(); + + switch (finished) { + .dir_ret => |d| { + const ret = try d; + if (ret.sub_files.load(.unordered) != 0) { + sel.queue.putOne(sel.io, .{ .dir_ret = ret }) catch |err| { + if (!ret.origin) alloc.free(ret.path); + return err; + }; + continue; + } + if (!ret.origin) alloc.free(ret.path); + alloc.destroy(ret.sub_files); + + if (!options.ignore_permissions and !options.ignore_xattr) { + // TODO: set permissions & xattr. + } + }, + .file_ret => |f| { + const ret = try f; + if (!ret.origin) alloc.free(ret.path); + + if (!options.ignore_permissions and !options.ignore_xattr) { + // TODO: set permissions & xattr. + } + }, + .void_ret => |v| try v, + } + + if (sel.group.token.load(.unordered) == null) break; + } } // Utility types -const ReturnUnion = union { - path_ret: Error!PathReturn, +const ReturnUnion = union(enum) { + file_ret: Error!FileReturn, + dir_ret: Error!DirReturn, + void_ret: Error!void, }; -const Error = error{}; +const Error = error{Canceled} || Directory.Error; -const PathReturn = struct { +const FileReturn = struct { path: []const u8, + origin: bool, uid_idx: u32, gid_idx: u32, mod_time: u32, - permission: u16, + permissions: u16, - xattr_idx: ?u32, + xattr_idx: ?u32 = null, +}; +const DirReturn = struct { + path: []const u8, + sub_files: *Atomic(usize), + origin: bool, + + uid_idx: u32, + gid_idx: u32, + mod_time: u32, + permissions: u16, + + xattr_idx: ?u32 = null, }; diff --git a/src/lookup.zig b/src/lookup.zig new file mode 100644 index 0000000..44cd2cf --- /dev/null +++ b/src/lookup.zig @@ -0,0 +1,104 @@ +const std = @import("std"); +const Io = std.Io; + +const DecompCache = @import("decomp_cache.zig"); +const MetadataReader = @import("meta_rdr.zig"); + +pub fn stateless(comptime T: anytype, io: Io, cache: *DecompCache, table_start: u64, idx: u32) !T { + const PER_BLOCK = 8192 / @sizeOf(T); + + const block = idx / PER_BLOCK; + const block_idx = idx % PER_BLOCK; + + const offset_offset = table_start + (block * 8); + const offset: u64 = std.mem.readInt(u64, cache.map.memory[offset_offset..][0..2], .little); + + var meta: MetadataReader = .init(io, cache, offset); + defer meta.deinit(io); + try meta.discardAll(block_idx * @sizeOf(T)); + + var new: T = undefined; + try meta.interface.readSliceEndian(T, @ptrCast(&new), .little); + return new; +} + +pub fn Table(comptime T: anytype) type { + return struct { + const PER_BLOCK = 8192 / @sizeOf(T); + + const Table = @This(); + + alloc: std.mem.Allocator, + + cache: *DecompCache, + table_start: u64, + + num: u32, + values: std.AutoHashMap(u32, []T), + mut: Io.RwLock, + + pub fn init(alloc: std.mem.Allocator, cache: *DecompCache, table_start: u64, num_values: u32) Table { + return .{ + .alloc = alloc, + + .cache = cache, + .table_start = table_start, + + .num = num_values, + .values = .init(alloc), + }; + } + pub fn deinit(self: *Table) void { + var iter = self.values.valueIterator(); + while (iter.next()) |v| + self.alloc.free(v); + self.values.deinit(); + } + + pub fn get(self: *Table, io: Io, idx: u32) Error!T { + const block = idx / PER_BLOCK; + const block_idx = idx % PER_BLOCK; + { + try self.mut.lockShared(io); + defer self.mut.unlockShared(io); + + const val = self.values.get(block); + if (val != null) return val.*[block_idx]; + } + try self.mut.lock(io); + defer self.mut.unlock(io); + + const val = try self.values.getOrPut(block); + if (val.found_existing) + return val.value_ptr.*[block_idx]; + errdefer self.values.removeByPtr(val.key_ptr); + + const offset_offset = self.table_start + (block * 8); + const offset: u64 = std.mem.readInt(u64, self.cache.map.memory[offset_offset..][0..2], .little); + + var meta: MetadataReader = .init(io, self.cache, offset); + defer meta.deinit(io); + + const size = if (block == ((self.num - 1) / PER_BLOCK)) + self.num % PER_BLOCK + else + PER_BLOCK; + + const new_block = try self.alloc.alloc(T, size); + errdefer self.alloc.free(new_block); + try meta.interface.readSliceEndian(T, new_block, .little); + + val.value_ptr.* = new_block; + + return new_block[block_idx]; + } + }; +} + +// Types + +pub const Error = error{} || std.mem.Allocator.Error; + +pub const FragmentEntry = extern struct {}; + +pub const XattrEntry = extern struct {};