Start work on File data extraction

This commit is contained in:
Caleb Gardner
2025-05-23 03:58:26 -05:00
parent 66f6cfa069
commit 6f02f9f14d
6 changed files with 189 additions and 48 deletions
+11
View File
@@ -0,0 +1,11 @@
name: New model
version: 0.0.1
schema: v1
models:
- provider: anthropic
model: claude-3-7-sonnet-latest
apiKey: ${{ secrets.ANTHROPIC_API_KEY }}
name: Claude 3.7 Sonnet
roles:
- chat
- edit
+68 -41
View File
@@ -4,10 +4,11 @@ const fs = std.fs;
const inode = @import("inode/inode.zig"); const inode = @import("inode/inode.zig");
const directory = @import("directory.zig"); const directory = @import("directory.zig");
const data = @import("readers/data.zig");
const Reader = @import("reader.zig").Reader; const Reader = @import("reader.zig").Reader;
const DirEntry = @import("directory.zig").DirEntry; const DirEntry = @import("directory.zig").DirEntry;
const DataReader = @import("readers/data_reader.zig").DataReader;
const DataExtractor = @import("readers/data_extractor.zig").DataExtractor;
const MetadataReader = @import("readers/metadata.zig").MetadataReader; const MetadataReader = @import("readers/metadata.zig").MetadataReader;
/// A file or directory inside of a squashfs. /// A file or directory inside of a squashfs.
@@ -17,7 +18,7 @@ pub const File = struct {
inode: inode.Inode, inode: inode.Inode,
dirEntries: ?std.StringHashMap(DirEntry) = null, dirEntries: ?std.StringHashMap(DirEntry) = null,
data_rdr: ?data.DataReader = null, data_rdr: ?DataReader = null,
pub const FileError = error{ pub const FileError = error{
NotDirectory, NotDirectory,
@@ -43,7 +44,7 @@ pub const File = struct {
rdr.super.block_size, rdr.super.block_size,
), ),
}; };
std.mem.copyForwards(u8, @constCast(out.name), ent.name); @memcpy(out.name, ent.name);
return out; return out;
} }
@@ -72,7 +73,7 @@ pub const File = struct {
return self.realOpen(rdr, path, true); return self.realOpen(rdr, path, true);
} }
fn realOpen(self: *File, rdr: *Reader, path: []const u8, first: bool) !File { fn realOpen(self: *File, rdr: *Reader, path: []const u8, first: bool) (FileError || anyerror)!File {
const clean_path: []const u8 = std.mem.trim(u8, path, "/"); const clean_path: []const u8 = std.mem.trim(u8, path, "/");
if (clean_path.len == 0) { if (clean_path.len == 0) {
return self.*; return self.*;
@@ -94,7 +95,7 @@ pub const File = struct {
} }
/// If the File is a symlink, returns the symlink's target path. /// If the File is a symlink, returns the symlink's target path.
pub fn symPath(self: File) ![]const u8 { pub fn symPath(self: File) (FileError || anyerror)![]const u8 {
return switch (self.inode.data) { return switch (self.inode.data) {
.sym => |s| s.target, .sym => |s| s.target,
.ext_sym => |s| s.target, .ext_sym => |s| s.target,
@@ -103,7 +104,7 @@ pub const File = struct {
} }
/// If the File is a directory, returns an iterator that iterates over it's children. /// If the File is a directory, returns an iterator that iterates over it's children.
pub fn iterator(self: *File, rdr: *Reader) !FileIterator { pub fn iterator(self: *File, rdr: *Reader) (FileError || anyerror)!FileIterator {
switch (self.inode.header.inode_type) { switch (self.inode.header.inode_type) {
.dir, .ext_dir => {}, .dir, .ext_dir => {},
else => return FileError.NotDirectory, else => return FileError.NotDirectory,
@@ -121,7 +122,7 @@ pub const File = struct {
}; };
} }
fn readDirEntries(self: *File, rdr: *Reader) !void { fn readDirEntries(self: *File, rdr: *Reader) (FileError || anyerror)!void {
if (self.dirEntries != null) return; if (self.dirEntries != null) return;
var block_start: u32 = 0; var block_start: u32 = 0;
var offset: u16 = 0; var offset: u16 = 0;
@@ -151,14 +152,14 @@ pub const File = struct {
} }
/// If the file is a normal file, reads it's data. /// If the file is a normal file, reads it's data.
pub fn read(self: *File, bytes: []u8) !usize { pub fn read(self: *File, bytes: []u8) (FileError || anyerror)!usize {
if (self.data_rdr == null) { if (self.data_rdr == null) {
return FileError.NotNormalFile; return FileError.NotNormalFile;
} }
return self.data_rdr.?.read(bytes); return self.data_rdr.?.read(bytes);
} }
pub const FileReader = io.GenericReader(*File, anyerror, read); pub const FileReader = io.GenericReader(*File, (FileError || anyerror), read);
pub fn reader(self: *File) FileReader { pub fn reader(self: *File) FileReader {
return .{ return .{
@@ -166,39 +167,65 @@ pub const File = struct {
}; };
} }
/// Extract's the File to the path. /// Returns a struct meant to read the file's complete data at once.
// pub fn extract(self: *File, rdr: *Reader, path: []const u8) !void { pub fn extractor(self: *File, rdr: *Reader) !DataExtractor {
// return self.extractReal(rdr, path, true); return .init(self, rdr);
// } }
// pub fn extractReal(self: *File, rdr: *Reader, path: []const u8, first: bool) !void { pub const ExtractError = error{
// var real_path = try rdr.alloc.alloc(u8, path.len); FileExists,
// @memcpy(real_path, path); };
// defer rdr.alloc.free(real_path);
// real_path = std.mem.trimRight(u8, real_path, "/"); /// Extract's the File to the path.
// switch (self.inode.header.inode_type) { pub fn extract(self: *File, rdr: *Reader, path: []const u8) (ExtractError || anyerror)!void {
// .dir, .ext_dir => {}, return self.extractReal(rdr, path, true);
// .file, .ext_file => { }
// if(first){
// const stat = try fs.cwd().statFile(path); pub fn extractReal(self: *File, rdr: *Reader, path: []const u8, first: bool) (ExtractError || anyerror)!void {
// fs.File.Kind.unknown const real_path = std.mem.trimRight(u8, path, "/");
// switch(stat.kind){ var exists = true;
// .file => {}, var stat: ?fs.File.Stat = null;
// .directory => { if (fs.cwd().statFile(real_path)) |s| {
// if(!rdr.alloc.resize(real_path, real_path.len + self.name.len+1)){ stat = s;
// const len = real_path.len + self.name.len+1; } else |err| {
// rdr.alloc.free(real_path); if (err == fs.File.OpenError.FileNotFound) {
// real_path = try rdr.alloc.alloc(u8, len) exists = false;
// } } else return err;
// }, }
// else => error{InvalidPath}.InvalidPath, switch (self.inode.header.inode_type) {
// } .dir, .ext_dir => {
// } if (!exists) {
// }, try fs.cwd().makeDir(real_path);
// .sym, .ext_sym => {}, }
// .block, .ext_block, .char, .ext_char => {}, var iter = try self.iterator(rdr);
// } defer iter.deinit();
// } while (iter.next()) |f| {
//TODO: Threading
try f.extractReal(rdr, path + "/" + f.name, false);
}
},
.file, .ext_file => {
if (exists) {
if (!first) {
return ExtractError.FileExists;
} else {
if (stat.?.kind == .directory) {
real_path += "/" + self.name;
} else {
return ExtractError.FileExists;
}
}
}
const ext = try self.extractor(rdr);
defer ext.deinit();
const fil = try fs.cwd().createFile(real_path, .{});
try ext.writeToFile(fil);
},
.sym, .ext_sym => {},
.block, .ext_block, .char, .ext_char => {},
}
//TODO: permissions
}
}; };
const FileIterator = struct { const FileIterator = struct {
+2 -2
View File
@@ -5,8 +5,8 @@ const Reader = @import("reader.zig");
const stdout = std.io.getStdOut(); const stdout = std.io.getStdOut();
pub fn main() !void { pub fn main() !void {
const alloc: std.heap.GeneralPurposeAllocator(.{}) = .init(); var alloc: std.heap.GeneralPurposeAllocator(.{}) = .init;
const args = try std.process.argsWithAllocator(alloc.allocator()); var args = try std.process.argsWithAllocator(alloc.allocator());
defer args.deinit(); defer args.deinit();
while (args.next()) |arg| { while (args.next()) |arg| {
if (std.mem.eql(u8, arg, "--help")) { if (std.mem.eql(u8, arg, "--help")) {
+104
View File
@@ -0,0 +1,104 @@
const std = @import("std");
const fs = std.fs;
const File = @import("../file.zig").File;
const Reader = @import("../reader.zig").Reader;
const BlockSize = @import("../inode/file.zig").BlockSize;
const DecompressionType = @import("../decompress.zig").DecompressType;
const FileHolder = @import("../readers/file_holder.zig").FileHolder;
const DataReader = @import("data_reader.zig").DataReader;
/// A specialized File data reader that's meant to write all of it's data at once.
/// Can be re-used freely until deinit() is called.
pub const DataExtractor = struct {
alloc: std.mem.Allocator,
decomp: DecompressionType,
holder: *FileHolder,
block_size: u32,
sizes: []BlockSize,
block_offset: []u64,
frag_data: ?[]u8 = null,
pub fn init(fil: *File, reader: *Reader) !DataExtractor {
var data_start: u64 = 0;
var sizes: []BlockSize = undefined;
var size: u64 = 0;
var frag_idx: u32 = 0;
var frag_offset: u32 = 0;
switch (fil.inode.data) {
.file => |f| {
sizes = try reader.alloc.alloc(BlockSize, f.blocks.len);
@memcpy(sizes, f.blocks);
data_start = f.data_start;
size = f.size;
frag_idx = f.frag_idx;
frag_offset = f.frag_offset;
},
.ext_file => |f| {
sizes = try reader.alloc.alloc(BlockSize, f.blocks.len);
@memcpy(sizes, f.blocks);
data_start = f.data_start;
size = f.size;
frag_idx = f.frag_idx;
frag_offset = f.frag_offset;
},
else => return File.FileError.NotNormalFile,
}
var out: DataExtractor = .{
.alloc = reader.alloc,
.decomp = reader.super.decomp,
.holder = reader.holder,
.block_size = reader.super.block_size,
.sizes = sizes,
.block_offset = try reader.alloc.alloc(u64, sizes.len),
.data_start = data_start,
};
errdefer out.deinit();
var offset: u64 = data_start;
for (0..out.block_offset) |i| {
out.block_offset[i] = offset;
offset += out.sizes[i].size;
}
if (frag_idx != 0xFFFFFFFF) {
const frag_entry = try reader.frag_table.getValue(frag_idx);
var frag_rdr: DataReader = try .fromFragEntry(reader, frag_entry);
defer frag_rdr.deinit();
try frag_rdr.skip(frag_offset);
out.frag_data = try reader.alloc.alloc(u8, size % out.block_size);
_ = try frag_rdr.any().readAll(out.frag_data);
}
return out;
}
pub fn deinit(self: *DataExtractor) void {
self.alloc.free(self.sizes);
self.alloc.free(self.block_offset);
if (self.cur_bloc.len > 0) self.alloc.free(self.cur_bloc);
if (self.frag_data != null) self.alloc.free(self.frag_data);
}
fn processBlock(self: DataExtractor, block_ind: u32) ![]u8 {
//TODO
}
fn processBlockToFile(self: DataExtractor, block_ind: u32, fil: *fs.File) !void {
//TODO
}
/// Write the data completely to the given file.
/// Ignores the file's current offset and writes from the beginning of the file.
/// Returns the amount of bytes written.
///
/// Optimized for lower memory usage by using File.pwrite.
pub fn writeToFile(self: DataExtractor, fil: *fs.File) !void {
//TODO
}
/// Write the data completely to the given writer.
/// Returns the amount of bytes written.
///
/// To write data in order, some data may end up cached temporarily.
pub fn writeToWriter(self: DataExtractor, writer: io.AnyWriter) !void {
//TODO
}
};
@@ -1,5 +1,6 @@
const std = @import("std"); const std = @import("std");
const io = std.io; const io = std.io;
const fs = std.fs;
const File = @import("../file.zig").File; const File = @import("../file.zig").File;
const Reader = @import("../reader.zig").Reader; const Reader = @import("../reader.zig").Reader;
@@ -68,7 +69,7 @@ pub const DataReader = struct {
} }
return out; return out;
} }
fn fromFragEntry(reader: *Reader, ent: FragEntry) !DataReader { pub fn fromFragEntry(reader: *Reader, ent: FragEntry) !DataReader {
const size = try reader.alloc.alloc(BlockSize, 1); const size = try reader.alloc.alloc(BlockSize, 1);
size[0] = ent.size; size[0] = ent.size;
return .{ return .{
@@ -143,7 +144,7 @@ pub const DataReader = struct {
} }
} }
to_read = @min(bytes.len - cur_read, self.block.len - self.offset); to_read = @min(bytes.len - cur_read, self.block.len - self.offset);
std.mem.copyForwards(u8, bytes[cur_read..], self.block[self.offset .. @as(usize, self.offset) + to_read]); @memcpy(bytes[cur_read..], self.block[self.offset .. @as(usize, self.offset) + to_read]);
self.offset += @truncate(to_read); self.offset += @truncate(to_read);
cur_read += to_read; cur_read += to_read;
} }
@@ -162,5 +163,3 @@ pub const DataReader = struct {
return self.read(bytes); return self.read(bytes);
} }
}; };
pub const DataExtractor = struct {};
+1 -1
View File
@@ -64,7 +64,7 @@ pub const MetadataReader = struct {
while (cur_read < bytes.len) { while (cur_read < bytes.len) {
if (self.offset >= self.block.len) try self.readNextBlock(); if (self.offset >= self.block.len) try self.readNextBlock();
to_read = @min(bytes.len - cur_read, self.block.len - self.offset); to_read = @min(bytes.len - cur_read, self.block.len - self.offset);
std.mem.copyForwards(u8, bytes[cur_read..], self.block[self.offset .. @as(usize, self.offset) + to_read]); @memcpy(bytes[cur_read..], self.block[self.offset .. @as(usize, self.offset) + to_read]);
self.offset += @truncate(to_read); self.offset += @truncate(to_read);
cur_read += to_read; cur_read += to_read;
} }