Extraction?

There's still a problem with fragments...
This commit is contained in:
Caleb Gardner
2025-05-26 07:06:03 -05:00
parent 7a4105bebd
commit 985e2bd7e5
5 changed files with 139 additions and 65 deletions
+7 -7
View File
@@ -44,16 +44,16 @@ pub const DecompressType = enum(u16) {
return out; return out;
} }
pub fn decompressTo(self: DecompressType, alloc: std.mem.Allocator, rdr: io.AnyReader, writer: io.AnyWriter) !void { pub fn decompressTo(self: DecompressType, alloc: std.mem.Allocator, rdr: io.AnyReader, writer: io.AnyWriter) anyerror!void {
const buf_size: usize = 1024; const buf_size: usize = 1024;
switch (self) { switch (self) {
.zlib => try compress.zlib.decompress(rdr, writer), .zlib => try compress.zlib.decompress(rdr, writer),
.lzma => { .lzma => {
var decomp = try compress.lzma.decompress(alloc, rdr); var decomp = try compress.lzma.decompress(alloc, rdr);
defer decomp.deinit(); defer decomp.deinit();
const buf: [buf_size]u8 = {}; var buf: [buf_size]u8 = undefined;
var red = try decomp.read(&buf); var red = try decomp.read(&buf);
while (red > 0) : (red = try decomp.read()) { while (red > 0) : (red = try decomp.read(&buf)) {
_ = try writer.writeAll(&buf); _ = try writer.writeAll(&buf);
} }
}, },
@@ -61,9 +61,9 @@ pub const DecompressType = enum(u16) {
.xz => { .xz => {
var decomp = try compress.xz.decompress(alloc, rdr); var decomp = try compress.xz.decompress(alloc, rdr);
defer decomp.deinit(); defer decomp.deinit();
const buf: [buf_size]u8 = {}; var buf: [buf_size]u8 = undefined;
var red = try decomp.read(&buf); var red = try decomp.read(&buf);
while (red > 0) : (red = try decomp.read()) { while (red > 0) : (red = try decomp.read(&buf)) {
_ = try writer.writeAll(&buf); _ = try writer.writeAll(&buf);
} }
}, },
@@ -74,9 +74,9 @@ pub const DecompressType = enum(u16) {
var decomp = compress.zstd.decompressor(rdr, .{ var decomp = compress.zstd.decompressor(rdr, .{
.window_buffer = window_buf, .window_buffer = window_buf,
}); });
const buf: [buf_size]u8 = {}; var buf: [buf_size]u8 = undefined;
var red = try decomp.read(&buf); var red = try decomp.read(&buf);
while (red > 0) : (red = try decomp.read()) { while (red > 0) : (red = try decomp.read(&buf)) {
_ = try writer.writeAll(&buf); _ = try writer.writeAll(&buf);
} }
}, },
+44 -12
View File
@@ -128,17 +128,17 @@ pub const File = struct {
if (self.dirEntries != null) return; if (self.dirEntries != null) return;
var block_start: u32 = 0; var block_start: u32 = 0;
var offset: u16 = 0; var offset: u16 = 0;
var size: u32 = 0; var siz: u32 = 0;
switch (self.inode.data) { switch (self.inode.data) {
.dir => |d| { .dir => |d| {
block_start = d.block_start; block_start = d.block_start;
offset = d.offset; offset = d.offset;
size = d.size; siz = d.size;
}, },
.ext_dir => |d| { .ext_dir => |d| {
block_start = d.block_start; block_start = d.block_start;
offset = d.offset; offset = d.offset;
size = d.size; siz = d.size;
}, },
else => return FileError.NotDirectory, else => return FileError.NotDirectory,
} }
@@ -150,7 +150,15 @@ pub const File = struct {
); );
defer meta_rdr.deinit(); defer meta_rdr.deinit();
try meta_rdr.skip(offset); try meta_rdr.skip(offset);
self.dirEntries = try directory.readDirectory(rdr.alloc, meta_rdr.any(), size); self.dirEntries = try directory.readDirectory(rdr.alloc, meta_rdr.any(), siz);
}
pub fn size(self: File) u64 {
switch (self.inode.data) {
.file => |f| f.size,
.ext_file => |f| f.size,
else => 0,
}
} }
/// If the file is a normal file, reads it's data. /// If the file is a normal file, reads it's data.
@@ -169,21 +177,45 @@ pub const File = struct {
}; };
} }
/// Returns a struct meant to read the file's complete data at once. fn extractor(self: *File, rdr: *Reader) !DataExtractor {
pub fn extractor(self: *File, rdr: *Reader) !DataExtractor {
return .init(self, rdr); return .init(self, rdr);
} }
pub const ExtractConfig = struct {
/// The amount of worker threads to spawn. Defaults to your cpu core count.
thread_count: u16,
/// The maximum amount of additional memory this extraction will use.
/// Default is 1GB or a quarter of your system memory, whichever is smaller.
/// Actually memory usage will be higher, as this does not account of vaious metadata (such as file names).
max_mem: u64,
deref_sym: bool = false,
unbreak_sym: bool = false,
verbose: bool = false,
pub fn init() !ExtractConfig {
const sys_mem = try std.process.totalSystemMemory();
return .{
.thread_count = @truncate(try std.Thread.getCpuCount()),
.max_mem = @min(sys_mem / 4, 1024 * 1024 * 1024),
};
}
};
pub const ExtractError = error{ pub const ExtractError = error{
FileExists, FileExists,
}; };
/// Extract's the File to the path. /// Extract's the File to the path.
pub fn extract(self: *File, rdr: *Reader, path: []const u8) (ExtractError || anyerror)!void { pub fn extract(self: *File, rdr: *Reader, config: ExtractConfig, path: []const u8) (ExtractError || anyerror)!void {
return self.extractReal(rdr, path, true); var pol: std.Thread.Pool = undefined;
try pol.init(.{
.allocator = std.heap.smp_allocator,
.n_jobs = config.thread_count,
});
defer pol.deinit();
return self.extractReal(rdr, config, &pol, path, true);
} }
pub fn extractReal(self: *File, rdr: *Reader, path: []const u8, first: bool) (ExtractError || anyerror)!void { fn extractReal(self: *File, rdr: *Reader, config: ExtractConfig, pool: *std.Thread.Pool, path: []const u8, first: bool) (ExtractError || anyerror)!void {
const real_path = std.mem.trimRight(u8, path, "/"); const real_path = std.mem.trimRight(u8, path, "/");
var exists = true; var exists = true;
var stat: ?fs.File.Stat = null; var stat: ?fs.File.Stat = null;
@@ -204,7 +236,7 @@ pub const File = struct {
while (iter.next()) |*f| { while (iter.next()) |*f| {
const extr_path = try std.mem.concat(rdr.alloc, u8, &[3][]const u8{ real_path, "/", f.name }); const extr_path = try std.mem.concat(rdr.alloc, u8, &[3][]const u8{ real_path, "/", f.name });
defer rdr.alloc.free(extr_path); defer rdr.alloc.free(extr_path);
try @constCast(f).extractReal(rdr, extr_path, false); try @constCast(f).extractReal(rdr, config, pool, extr_path, false);
} }
}, },
.file, .ext_file => { .file, .ext_file => {
@@ -220,9 +252,9 @@ pub const File = struct {
defer rdr.alloc.free(extr_path); defer rdr.alloc.free(extr_path);
var ext = try self.extractor(rdr); var ext = try self.extractor(rdr);
defer ext.deinit(); defer ext.deinit();
const fil = try fs.cwd().createFile(extr_path, .{}); var fil = try fs.cwd().createFile(extr_path, .{});
defer fil.close(); defer fil.close();
try ext.writeToFile(try .init(), &fil); try ext.writeToFile(pool, &fil);
}, },
.sym, .ext_sym => { .sym, .ext_sym => {
if (exists) return ExtractError.FileExists; if (exists) return ExtractError.FileExists;
+1 -1
View File
@@ -95,5 +95,5 @@ test "extract" {
try std.fs.cwd().deleteTree(extract_path); try std.fs.cwd().deleteTree(extract_path);
var rdr: Reader = try .init(std.testing.allocator, test_sfs_path, 0); var rdr: Reader = try .init(std.testing.allocator, test_sfs_path, 0);
defer rdr.deinit(); defer rdr.deinit();
try rdr.root.extract(&rdr, extract_path); try rdr.root.extract(&rdr, try .init(), extract_path);
} }
+50 -45
View File
@@ -7,7 +7,9 @@ const Reader = @import("../reader.zig").Reader;
const BlockSize = @import("../inode/file.zig").BlockSize; const BlockSize = @import("../inode/file.zig").BlockSize;
const DecompressionType = @import("../decompress.zig").DecompressType; const DecompressionType = @import("../decompress.zig").DecompressType;
const FileHolder = @import("../readers/file_holder.zig").FileHolder; const FileHolder = @import("../readers/file_holder.zig").FileHolder;
const FileOffsetWriter = @import("../readers/file_holder.zig").FileOffsetWriter;
const DataReader = @import("data_reader.zig").DataReader; const DataReader = @import("data_reader.zig").DataReader;
const Config = @import("../file.zig").Config;
/// A specialized File data reader that's meant to write all of it's data at once. /// A specialized File data reader that's meant to write all of it's data at once.
/// Can be re-used freely until deinit() is called. /// Can be re-used freely until deinit() is called.
@@ -20,20 +22,6 @@ pub const DataExtractor = struct {
block_offset: []u64, block_offset: []u64,
frag_data: ?[]u8 = null, frag_data: ?[]u8 = null,
pub const Config = struct {
/// The amount of worker threads to spawn. Defaults to your cpu core count.
thread_count: u16,
/// The maximum amount of additional memory this extraction will use.
/// Default is 1GB.
max_mem: u64,
pub fn init() !Config {
return .{
.thread_count = @truncate(try std.Thread.getCpuCount()),
.max_mem = comptime 1024 * 1024 * 1024,
};
}
};
pub fn init(fil: *File, reader: *Reader) !DataExtractor { pub fn init(fil: *File, reader: *Reader) !DataExtractor {
var data_start: u64 = 0; var data_start: u64 = 0;
var sizes: []BlockSize = undefined; var sizes: []BlockSize = undefined;
@@ -91,17 +79,19 @@ pub const DataExtractor = struct {
if (self.frag_data != null) self.alloc.free(self.frag_data.?); if (self.frag_data != null) self.alloc.free(self.frag_data.?);
} }
fn processBlock(self: DataExtractor, block_ind: u32) ![]u8 { fn processBlockToFile(self: *DataExtractor, errs: *std.ArrayList(anyerror), block_ind: usize, fil: *fs.File) void {
_ = self; const offset_rdr = self.holder.readerAt(self.block_offset[block_ind]);
_ = block_ind; var fil_wrtr: FileOffsetWriter = .init(fil, block_ind * self.block_size);
//TODO var limit = std.io.limitedReader(offset_rdr, self.sizes[block_ind].size);
} self.decomp.decompressTo(
self.alloc,
fn processBlockToFile(self: DataExtractor, block_ind: u32, fil: *const fs.File) !void { limit.reader().any(),
_ = self; fil_wrtr.any(),
_ = block_ind; ) catch |err| {
_ = fil; errs.append(err) catch |ignored_err| {
//TODO std.debug.print("{}\n", .{ignored_err});
};
};
} }
/// Write the data completely to the given file. /// Write the data completely to the given file.
@@ -109,26 +99,41 @@ pub const DataExtractor = struct {
/// Returns the amount of bytes written. /// Returns the amount of bytes written.
/// ///
/// Optimized for lower memory usage by using File.pwrite. /// Optimized for lower memory usage by using File.pwrite.
pub fn writeToFile(self: DataExtractor, conf: Config, fil: *const fs.File) !void { pub fn writeToFile(self: *DataExtractor, pool: *std.Thread.Pool, fil: *fs.File) !void {
_ = self; var wg: std.Thread.WaitGroup = .{};
_ = fil; var errs: std.ArrayList(anyerror) = .init(self.alloc);
_ = conf; defer errs.deinit();
//TODO for (0..self.sizes.len) |i| {
pool.spawnWg(&wg, processBlockToFile, .{ self, &errs, i, fil });
}
wg.wait();
} }
/// Write the data completely to the given writer. // fn processBlock(self: *DataExtractor, errs: std.ArrayList(anyerror), data_out: std.AutoHashMap([]u8), block_ind: u32) void {
/// Returns the amount of bytes written. // const offset_rdr = self.holder.readerAt(self.block_offset[block_ind]);
/// // const out = self.decomp.decompress(
/// To write data in order, some data may end up cached temporarily. // self.alloc,
pub fn writeToWriter(self: DataExtractor, conf: Config, writer: io.AnyWriter) !void { // std.io.limitedReader(offset_rdr, self.sizes[block_ind].size),
var pol: std.Thread.Pool = .{}; // ) catch |err| {
pol.init(std.Thread.Pool.Options{ // errs.append(err);
.allocator = std.heap.page_allocator, // return;
.n_jobs = 5, // };
}); // data_out.put(block_ind, )
_ = self; // }
_ = writer;
_ = conf; // Write the data completely to the given writer.
//TODO // Returns the amount of bytes written.
} //
// To write data in order, some data may end up cached temporarily.
// pub fn writeToWriter(self: DataExtractor, pool: *std.Thread.Pool, writer: io.AnyWriter) !void {
// const wg: std.Thread.WaitGroup = .{};
// const errs: std.ArrayList(anyerror) = .init(self.alloc);
// const data: std.AutoHashMap(u32, []u8) = .init(self.alloc);
// const cond: std.Thread. = .{};
// defer errs.deinit();
// for (0..self.sizes.len) |i| {
// pool.spawnWg(&wg, processBlock, .{ &self, i, fil });
// }
// wg.wait();
// }
}; };
+37
View File
@@ -27,6 +27,43 @@ pub const FileHolder = struct {
.offset = self.offset + offset, .offset = self.offset + offset,
}; };
} }
// pub fn writerAt(self: *FileHolder, offset: u64) FileOffsetWriter {
// return .{
// .file = &self.file,
// .offset = self.offset + offset,
// };
// }
};
pub const FileOffsetWriter = struct {
file: *File,
offset: u64,
pub fn init(fil: *File, init_offset: u64) FileOffsetWriter {
return .{
.file = fil,
.offset = init_offset,
};
}
pub const Error = fs.File.PWriteError;
pub fn write(self: *FileOffsetWriter, bytes: []const u8) !usize {
try self.file.pwriteAll(bytes, self.offset);
self.offset += bytes.len;
return bytes.len;
}
pub fn any(self: *FileOffsetWriter) io.AnyWriter {
return .{
.context = @ptrCast(self),
.writeFn = writeOpaque,
};
}
fn writeOpaque(context: *const anyopaque, bytes: []const u8) anyerror!usize {
var rdr: *FileOffsetWriter = @constCast(@ptrCast(@alignCast(context)));
return try rdr.write(bytes);
}
}; };
pub const FileOffsetReader = struct { pub const FileOffsetReader = struct {