Extraction?

There's still a problem with fragments...
This commit is contained in:
Caleb Gardner
2025-05-26 07:06:03 -05:00
parent 7a4105bebd
commit 985e2bd7e5
5 changed files with 139 additions and 65 deletions
+7 -7
View File
@@ -44,16 +44,16 @@ pub const DecompressType = enum(u16) {
return out;
}
pub fn decompressTo(self: DecompressType, alloc: std.mem.Allocator, rdr: io.AnyReader, writer: io.AnyWriter) !void {
pub fn decompressTo(self: DecompressType, alloc: std.mem.Allocator, rdr: io.AnyReader, writer: io.AnyWriter) anyerror!void {
const buf_size: usize = 1024;
switch (self) {
.zlib => try compress.zlib.decompress(rdr, writer),
.lzma => {
var decomp = try compress.lzma.decompress(alloc, rdr);
defer decomp.deinit();
const buf: [buf_size]u8 = {};
var buf: [buf_size]u8 = undefined;
var red = try decomp.read(&buf);
while (red > 0) : (red = try decomp.read()) {
while (red > 0) : (red = try decomp.read(&buf)) {
_ = try writer.writeAll(&buf);
}
},
@@ -61,9 +61,9 @@ pub const DecompressType = enum(u16) {
.xz => {
var decomp = try compress.xz.decompress(alloc, rdr);
defer decomp.deinit();
const buf: [buf_size]u8 = {};
var buf: [buf_size]u8 = undefined;
var red = try decomp.read(&buf);
while (red > 0) : (red = try decomp.read()) {
while (red > 0) : (red = try decomp.read(&buf)) {
_ = try writer.writeAll(&buf);
}
},
@@ -74,9 +74,9 @@ pub const DecompressType = enum(u16) {
var decomp = compress.zstd.decompressor(rdr, .{
.window_buffer = window_buf,
});
const buf: [buf_size]u8 = {};
var buf: [buf_size]u8 = undefined;
var red = try decomp.read(&buf);
while (red > 0) : (red = try decomp.read()) {
while (red > 0) : (red = try decomp.read(&buf)) {
_ = try writer.writeAll(&buf);
}
},
+44 -12
View File
@@ -128,17 +128,17 @@ pub const File = struct {
if (self.dirEntries != null) return;
var block_start: u32 = 0;
var offset: u16 = 0;
var size: u32 = 0;
var siz: u32 = 0;
switch (self.inode.data) {
.dir => |d| {
block_start = d.block_start;
offset = d.offset;
size = d.size;
siz = d.size;
},
.ext_dir => |d| {
block_start = d.block_start;
offset = d.offset;
size = d.size;
siz = d.size;
},
else => return FileError.NotDirectory,
}
@@ -150,7 +150,15 @@ pub const File = struct {
);
defer meta_rdr.deinit();
try meta_rdr.skip(offset);
self.dirEntries = try directory.readDirectory(rdr.alloc, meta_rdr.any(), size);
self.dirEntries = try directory.readDirectory(rdr.alloc, meta_rdr.any(), siz);
}
pub fn size(self: File) u64 {
switch (self.inode.data) {
.file => |f| f.size,
.ext_file => |f| f.size,
else => 0,
}
}
/// If the file is a normal file, reads it's data.
@@ -169,21 +177,45 @@ pub const File = struct {
};
}
/// Returns a struct meant to read the file's complete data at once.
pub fn extractor(self: *File, rdr: *Reader) !DataExtractor {
fn extractor(self: *File, rdr: *Reader) !DataExtractor {
return .init(self, rdr);
}
pub const ExtractConfig = struct {
/// The amount of worker threads to spawn. Defaults to your cpu core count.
thread_count: u16,
/// The maximum amount of additional memory this extraction will use.
/// Default is 1GB or a quarter of your system memory, whichever is smaller.
/// Actually memory usage will be higher, as this does not account of vaious metadata (such as file names).
max_mem: u64,
deref_sym: bool = false,
unbreak_sym: bool = false,
verbose: bool = false,
pub fn init() !ExtractConfig {
const sys_mem = try std.process.totalSystemMemory();
return .{
.thread_count = @truncate(try std.Thread.getCpuCount()),
.max_mem = @min(sys_mem / 4, 1024 * 1024 * 1024),
};
}
};
pub const ExtractError = error{
FileExists,
};
/// Extract's the File to the path.
pub fn extract(self: *File, rdr: *Reader, path: []const u8) (ExtractError || anyerror)!void {
return self.extractReal(rdr, path, true);
pub fn extract(self: *File, rdr: *Reader, config: ExtractConfig, path: []const u8) (ExtractError || anyerror)!void {
var pol: std.Thread.Pool = undefined;
try pol.init(.{
.allocator = std.heap.smp_allocator,
.n_jobs = config.thread_count,
});
defer pol.deinit();
return self.extractReal(rdr, config, &pol, path, true);
}
pub fn extractReal(self: *File, rdr: *Reader, path: []const u8, first: bool) (ExtractError || anyerror)!void {
fn extractReal(self: *File, rdr: *Reader, config: ExtractConfig, pool: *std.Thread.Pool, path: []const u8, first: bool) (ExtractError || anyerror)!void {
const real_path = std.mem.trimRight(u8, path, "/");
var exists = true;
var stat: ?fs.File.Stat = null;
@@ -204,7 +236,7 @@ pub const File = struct {
while (iter.next()) |*f| {
const extr_path = try std.mem.concat(rdr.alloc, u8, &[3][]const u8{ real_path, "/", f.name });
defer rdr.alloc.free(extr_path);
try @constCast(f).extractReal(rdr, extr_path, false);
try @constCast(f).extractReal(rdr, config, pool, extr_path, false);
}
},
.file, .ext_file => {
@@ -220,9 +252,9 @@ pub const File = struct {
defer rdr.alloc.free(extr_path);
var ext = try self.extractor(rdr);
defer ext.deinit();
const fil = try fs.cwd().createFile(extr_path, .{});
var fil = try fs.cwd().createFile(extr_path, .{});
defer fil.close();
try ext.writeToFile(try .init(), &fil);
try ext.writeToFile(pool, &fil);
},
.sym, .ext_sym => {
if (exists) return ExtractError.FileExists;
+1 -1
View File
@@ -95,5 +95,5 @@ test "extract" {
try std.fs.cwd().deleteTree(extract_path);
var rdr: Reader = try .init(std.testing.allocator, test_sfs_path, 0);
defer rdr.deinit();
try rdr.root.extract(&rdr, extract_path);
try rdr.root.extract(&rdr, try .init(), extract_path);
}
+50 -45
View File
@@ -7,7 +7,9 @@ const Reader = @import("../reader.zig").Reader;
const BlockSize = @import("../inode/file.zig").BlockSize;
const DecompressionType = @import("../decompress.zig").DecompressType;
const FileHolder = @import("../readers/file_holder.zig").FileHolder;
const FileOffsetWriter = @import("../readers/file_holder.zig").FileOffsetWriter;
const DataReader = @import("data_reader.zig").DataReader;
const Config = @import("../file.zig").Config;
/// A specialized File data reader that's meant to write all of it's data at once.
/// Can be re-used freely until deinit() is called.
@@ -20,20 +22,6 @@ pub const DataExtractor = struct {
block_offset: []u64,
frag_data: ?[]u8 = null,
pub const Config = struct {
/// The amount of worker threads to spawn. Defaults to your cpu core count.
thread_count: u16,
/// The maximum amount of additional memory this extraction will use.
/// Default is 1GB.
max_mem: u64,
pub fn init() !Config {
return .{
.thread_count = @truncate(try std.Thread.getCpuCount()),
.max_mem = comptime 1024 * 1024 * 1024,
};
}
};
pub fn init(fil: *File, reader: *Reader) !DataExtractor {
var data_start: u64 = 0;
var sizes: []BlockSize = undefined;
@@ -91,17 +79,19 @@ pub const DataExtractor = struct {
if (self.frag_data != null) self.alloc.free(self.frag_data.?);
}
fn processBlock(self: DataExtractor, block_ind: u32) ![]u8 {
_ = self;
_ = block_ind;
//TODO
}
fn processBlockToFile(self: DataExtractor, block_ind: u32, fil: *const fs.File) !void {
_ = self;
_ = block_ind;
_ = fil;
//TODO
fn processBlockToFile(self: *DataExtractor, errs: *std.ArrayList(anyerror), block_ind: usize, fil: *fs.File) void {
const offset_rdr = self.holder.readerAt(self.block_offset[block_ind]);
var fil_wrtr: FileOffsetWriter = .init(fil, block_ind * self.block_size);
var limit = std.io.limitedReader(offset_rdr, self.sizes[block_ind].size);
self.decomp.decompressTo(
self.alloc,
limit.reader().any(),
fil_wrtr.any(),
) catch |err| {
errs.append(err) catch |ignored_err| {
std.debug.print("{}\n", .{ignored_err});
};
};
}
/// Write the data completely to the given file.
@@ -109,26 +99,41 @@ pub const DataExtractor = struct {
/// Returns the amount of bytes written.
///
/// Optimized for lower memory usage by using File.pwrite.
pub fn writeToFile(self: DataExtractor, conf: Config, fil: *const fs.File) !void {
_ = self;
_ = fil;
_ = conf;
//TODO
pub fn writeToFile(self: *DataExtractor, pool: *std.Thread.Pool, fil: *fs.File) !void {
var wg: std.Thread.WaitGroup = .{};
var errs: std.ArrayList(anyerror) = .init(self.alloc);
defer errs.deinit();
for (0..self.sizes.len) |i| {
pool.spawnWg(&wg, processBlockToFile, .{ self, &errs, i, fil });
}
wg.wait();
}
/// Write the data completely to the given writer.
/// Returns the amount of bytes written.
///
/// To write data in order, some data may end up cached temporarily.
pub fn writeToWriter(self: DataExtractor, conf: Config, writer: io.AnyWriter) !void {
var pol: std.Thread.Pool = .{};
pol.init(std.Thread.Pool.Options{
.allocator = std.heap.page_allocator,
.n_jobs = 5,
});
_ = self;
_ = writer;
_ = conf;
//TODO
}
// fn processBlock(self: *DataExtractor, errs: std.ArrayList(anyerror), data_out: std.AutoHashMap([]u8), block_ind: u32) void {
// const offset_rdr = self.holder.readerAt(self.block_offset[block_ind]);
// const out = self.decomp.decompress(
// self.alloc,
// std.io.limitedReader(offset_rdr, self.sizes[block_ind].size),
// ) catch |err| {
// errs.append(err);
// return;
// };
// data_out.put(block_ind, )
// }
// Write the data completely to the given writer.
// Returns the amount of bytes written.
//
// To write data in order, some data may end up cached temporarily.
// pub fn writeToWriter(self: DataExtractor, pool: *std.Thread.Pool, writer: io.AnyWriter) !void {
// const wg: std.Thread.WaitGroup = .{};
// const errs: std.ArrayList(anyerror) = .init(self.alloc);
// const data: std.AutoHashMap(u32, []u8) = .init(self.alloc);
// const cond: std.Thread. = .{};
// defer errs.deinit();
// for (0..self.sizes.len) |i| {
// pool.spawnWg(&wg, processBlock, .{ &self, i, fil });
// }
// wg.wait();
// }
};
+37
View File
@@ -27,6 +27,43 @@ pub const FileHolder = struct {
.offset = self.offset + offset,
};
}
// pub fn writerAt(self: *FileHolder, offset: u64) FileOffsetWriter {
// return .{
// .file = &self.file,
// .offset = self.offset + offset,
// };
// }
};
pub const FileOffsetWriter = struct {
file: *File,
offset: u64,
pub fn init(fil: *File, init_offset: u64) FileOffsetWriter {
return .{
.file = fil,
.offset = init_offset,
};
}
pub const Error = fs.File.PWriteError;
pub fn write(self: *FileOffsetWriter, bytes: []const u8) !usize {
try self.file.pwriteAll(bytes, self.offset);
self.offset += bytes.len;
return bytes.len;
}
pub fn any(self: *FileOffsetWriter) io.AnyWriter {
return .{
.context = @ptrCast(self),
.writeFn = writeOpaque,
};
}
fn writeOpaque(context: *const anyopaque, bytes: []const u8) anyerror!usize {
var rdr: *FileOffsetWriter = @constCast(@ptrCast(@alignCast(context)));
return try rdr.write(bytes);
}
};
pub const FileOffsetReader = struct {