Merge pull request #41 from CalebQ42/redo-extract

Redo extract
This commit is contained in:
Caleb Gardner
2025-06-07 03:12:55 -05:00
committed by GitHub
21 changed files with 708 additions and 706 deletions
+5
View File
@@ -1,2 +1,7 @@
testing testing
/go-unsquashfs /go-unsquashfs
squashfs.test
# Memory and CPU pprof profiles
mem.out
cpu.out
+14 -26
View File
@@ -4,46 +4,34 @@ import (
"io" "io"
"io/fs" "io/fs"
"runtime" "runtime"
"sync"
"github.com/CalebQ42/squashfs/internal/routinemanager"
) )
type ExtractionOptions struct { type ExtractionOptions struct {
manager *routinemanager.Manager dispatcher chan struct{} // Limits the amount of work being done simultaneously.
LogOutput io.Writer //Where the verbose log should write. fullRdrPool sync.Pool // Pool for data.FullReader results.
DereferenceSymlink bool //Replace symlinks with the target file. LogOutput io.Writer //Where the verbose log should write.
UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink. DereferenceSymlink bool //Replace symlinks with the target file.
Verbose bool //Prints extra info to log on an error. UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink.
IgnorePerm bool //Ignore file's permissions and instead use Perm. Verbose bool //Prints extra info to log on an error.
Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777. IgnorePerm bool //Ignore file's permissions and instead use Perm.
SimultaneousFiles uint16 //Number of files to process in parallel. Default set based on runtime.NumCPU(). Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777.
ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Default set based on runtime.NumCPU(). ExtractionRoutines uint16 //The number of threads to use during extraction. Defaults to a number based on runtime.NumCPU().
SimultaneousFiles uint16 //Depreciated: Only use ExtractionRoutines
} }
// The default extraction options. // The default extraction options. Uses half of your CPU cores.
func DefaultOptions() *ExtractionOptions { func DefaultOptions() *ExtractionOptions {
cores := uint16(runtime.NumCPU() / 2)
var files, routines uint16
if cores <= 4 {
files = 1
routines = cores
} else {
files = cores - 4
routines = 4
}
return &ExtractionOptions{ return &ExtractionOptions{
Perm: 0777, Perm: 0777,
SimultaneousFiles: files, ExtractionRoutines: uint16(runtime.NumCPU() / 2),
ExtractionRoutines: routines,
} }
} }
// Less limited default options. Can run up 2x faster than DefaultOptions. // Faster extraction option. Uses all CPU cores.
// Tends to use all available CPU resources.
func FastOptions() *ExtractionOptions { func FastOptions() *ExtractionOptions {
return &ExtractionOptions{ return &ExtractionOptions{
Perm: 0777, Perm: 0777,
SimultaneousFiles: uint16(runtime.NumCPU()),
ExtractionRoutines: uint16(runtime.NumCPU()), ExtractionRoutines: uint16(runtime.NumCPU()),
} }
} }
+30 -8
View File
@@ -10,8 +10,8 @@ import (
"path/filepath" "path/filepath"
"runtime" "runtime"
"strconv" "strconv"
"sync"
"github.com/CalebQ42/squashfs/internal/routinemanager"
squashfslow "github.com/CalebQ42/squashfs/low" squashfslow "github.com/CalebQ42/squashfs/low"
"github.com/CalebQ42/squashfs/low/data" "github.com/CalebQ42/squashfs/low/data"
"github.com/CalebQ42/squashfs/low/inode" "github.com/CalebQ42/squashfs/low/inode"
@@ -54,6 +54,7 @@ func (f File) FS() (FS, error) {
func (f *File) Close() error { func (f *File) Close() error {
f.rdr.Close() f.rdr.Close()
f.full.Close() f.full.Close()
f.rdrInit = false
return nil return nil
} }
@@ -216,8 +217,16 @@ func (f File) Extract(folder string) error {
// Extract the file to the given folder. If the file is a folder, the folder's contents will be extracted to the folder. // Extract the file to the given folder. If the file is a folder, the folder's contents will be extracted to the folder.
// Allows setting various extraction options via ExtractionOptions. // Allows setting various extraction options via ExtractionOptions.
func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error { func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
if op.manager == nil { if op.dispatcher == nil {
op.manager = routinemanager.NewManager(op.SimultaneousFiles) op.fullRdrPool = sync.Pool{
New: func() any {
return &data.BlockResults{}
},
}
op.dispatcher = make(chan struct{}, op.ExtractionRoutines)
for range op.ExtractionRoutines {
op.dispatcher <- struct{}{}
}
if op.LogOutput != nil { if op.LogOutput != nil {
log.SetOutput(op.LogOutput) log.SetOutput(op.LogOutput)
} }
@@ -231,11 +240,13 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
} }
switch f.Low.Inode.Type { switch f.Low.Inode.Type {
case inode.Dir, inode.EDir: case inode.Dir, inode.EDir:
<-op.dispatcher
d, err := f.Low.ToDir(f.r.Low) d, err := f.Low.ToDir(f.r.Low)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to create squashfs.Directory for", path) log.Println("Failed to create squashfs.Directory for", path)
} }
op.dispatcher <- struct{}{}
return errors.Join(errors.New("failed to create squashfs.Directory: "+path), err) return errors.Join(errors.New("failed to create squashfs.Directory: "+path), err)
} }
errChan := make(chan error, len(d.Entries)) errChan := make(chan error, len(d.Entries))
@@ -248,19 +259,21 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
return errors.Join(errors.New("failed to get base from entry: "+path), err) return errors.Join(errors.New("failed to get base from entry: "+path), err)
} }
go func(b squashfslow.FileBase, path string) { go func(b squashfslow.FileBase, path string) {
i := op.manager.Lock()
if b.IsDir() { if b.IsDir() {
<-op.dispatcher
extDir := filepath.Join(path, b.Name) extDir := filepath.Join(path, b.Name)
err = os.Mkdir(extDir, 0777) err = os.Mkdir(extDir, 0777)
op.manager.Unlock(i)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to create directory", path) log.Println("Failed to create directory", path)
} }
op.dispatcher <- struct{}{}
errChan <- errors.Join(errors.New("failed to create directory: "+path), err) errChan <- errors.Join(errors.New("failed to create directory: "+path), err)
return return
} }
err = f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)).ExtractWithOptions(extDir, op) fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent))
op.dispatcher <- struct{}{}
err = fil.ExtractWithOptions(extDir, op)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to extract directory", path) log.Println("Failed to extract directory", path)
@@ -272,12 +285,12 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
} else { } else {
fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)) fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent))
err = fil.ExtractWithOptions(path, op) err = fil.ExtractWithOptions(path, op)
op.manager.Unlock(i)
fil.Close() fil.Close()
errChan <- err errChan <- err
} }
}(b, path) }(b, path)
} }
op.dispatcher <- struct{}{}
var errCache []error var errCache []error
for range d.Entries { for range d.Entries {
err := <-errChan err := <-errChan
@@ -289,23 +302,28 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
return errors.Join(errors.New("failed to extract folder: "+path), errors.Join(errCache...)) return errors.Join(errors.New("failed to extract folder: "+path), errors.Join(errCache...))
} }
case inode.Fil, inode.EFil: case inode.Fil, inode.EFil:
<-op.dispatcher
path = filepath.Join(path, f.Low.Name) path = filepath.Join(path, f.Low.Name)
outFil, err := os.Create(path) outFil, err := os.Create(path)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to create file", path) log.Println("Failed to create file", path)
} }
op.dispatcher <- struct{}{}
return errors.Join(errors.New("failed to create file: "+path), err) return errors.Join(errors.New("failed to create file: "+path), err)
} }
defer outFil.Close() defer outFil.Close()
full, err := f.Low.GetFullReader(&f.r.Low) full, err := f.Low.GetFullReader(&f.r.Low)
defer full.Close()
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to create full reader for", path) log.Println("Failed to create full reader for", path)
} }
op.dispatcher <- struct{}{}
return errors.Join(errors.New("failed to create full reader: "+path), err) return errors.Join(errors.New("failed to create full reader: "+path), err)
} }
full.SetGoroutineLimit(op.ExtractionRoutines) full.SetDispatcherPool(op.dispatcher, &op.fullRdrPool)
op.dispatcher <- struct{}{}
_, err = full.WriteTo(outFil) _, err = full.WriteTo(outFil)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
@@ -314,6 +332,8 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
return errors.Join(errors.New("failed to write file: "+path), err) return errors.Join(errors.New("failed to write file: "+path), err)
} }
case inode.Sym, inode.ESym: case inode.Sym, inode.ESym:
<-op.dispatcher
defer func() { op.dispatcher <- struct{}{} }()
symPath := f.SymlinkPath() symPath := f.SymlinkPath()
if op.DereferenceSymlink { if op.DereferenceSymlink {
filTmp := f.GetSymlinkFile() filTmp := f.GetSymlinkFile()
@@ -361,6 +381,8 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
} }
} }
case inode.Char, inode.EChar, inode.Block, inode.EBlock, inode.Fifo, inode.EFifo: case inode.Char, inode.EChar, inode.Block, inode.EBlock, inode.Fifo, inode.EFifo:
<-op.dispatcher
defer func() { op.dispatcher <- struct{}{} }()
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
if op.Verbose { if op.Verbose {
log.Println(f.path(), "ignored. A device link and can't be created on Windows.") log.Println(f.path(), "ignored. A device link and can't be created on Windows.")
+5 -1
View File
@@ -16,5 +16,9 @@ func NewZstd() Zstd {
} }
func (z Zstd) Decompress(data []byte) ([]byte, error) { func (z Zstd) Decompress(data []byte) ([]byte, error) {
return z.rdr.DecodeAll(data, nil) dat, err := z.rdr.DecodeAll(data, nil)
if err != nil {
return nil, err
}
return dat, err
} }
+2
View File
@@ -1,3 +1,5 @@
# Lower-Level Squashfs # Lower-Level Squashfs
This library is a lower level version of the main [squashfs](https://github.com/CalebQ42/squashfs) library that doesn't try to be easy to use and exposes a lot of information that is not necesary for must use cases. This library is a lower level version of the main [squashfs](https://github.com/CalebQ42/squashfs) library that doesn't try to be easy to use and exposes a lot of information that is not necesary for must use cases.
I will try to keep the API stable, but it is not guarenteed.
+110 -113
View File
@@ -1,130 +1,127 @@
package squashfslow package squashfslow
import ( // TODO: Make work
"errors" // func requireNoError(t *testing.T, err error) {
"testing" // t.Helper()
) // if err != nil {
// t.Fatal(err)
// }
// }
func requireNoError(t *testing.T, err error) { // func assertEqual(t *testing.T, want int, got int) {
t.Helper() // t.Helper()
if err != nil { // if want != got {
t.Fatal(err) // t.Errorf("want %d, got %d", want, got)
} // }
} // }
func assertEqual(t *testing.T, want int, got int) { // func assertLength(t *testing.T, want int, slice []int) {
t.Helper() // t.Helper()
if want != got { // if len(slice) != want {
t.Errorf("want %d, got %d", want, got) // t.Errorf("want len %d, got %d", want, len(slice))
} // }
} // }
func assertLength(t *testing.T, want int, slice []int) { // func assertErrorIs(t *testing.T, err error, wantErr error) {
t.Helper() // t.Helper()
if len(slice) != want { // if err == nil {
t.Errorf("want len %d, got %d", want, len(slice)) // t.Errorf("want %s, got nil", wantErr)
} // return
} // }
// if !errors.Is(err, wantErr) {
// t.Errorf("want %s, got %v", wantErr, err)
// }
// }
func assertErrorIs(t *testing.T, err error, wantErr error) { // func TestCachingPagedReader(t *testing.T) {
t.Helper() // // Mock readBlocks function
if err == nil { // mockReadNMore := func(startBlock, numItems int) ([]int, error) {
t.Errorf("want %s, got nil", wantErr) // if startBlock < 0 {
return // return nil, errors.New("invalid block start")
} // }
if !errors.Is(err, wantErr) { // var result []int
t.Errorf("want %s, got %v", wantErr, err) // for i := 0; i < numItems; i++ {
} // result = append(result, startBlock*512+i)
} // }
// return result, nil
// }
func TestCachingPagedReader(t *testing.T) { // t.Run("ValidRequestWithinFirstBlock", func(t *testing.T) {
// Mock readBlocks function // tab := NewTable[int]()
mockReadNMore := func(startBlock, numItems int) ([]int, error) { // currentItems := make([]int, 0)
if startBlock < 0 { // item, err := readPagedItems(300, 512, &currentItems, 2048, mockReadNMore)
return nil, errors.New("invalid block start") // requireNoError(t, err)
} // assertEqual(t, 300, item)
var result []int // assertLength(t, 512, currentItems) // Ensure one block is read
for i := 0; i < numItems; i++ { // })
result = append(result, startBlock*512+i)
}
return result, nil
}
t.Run("ValidRequestWithinFirstBlock", func(t *testing.T) { // t.Run("ValidRequestAcrossMultipleBlocks", func(t *testing.T) {
currentItems := make([]int, 0) // currentItems := make([]int, 0)
item, err := readPagedItems(300, 512, &currentItems, 2048, mockReadNMore) // item, err := readPagedItems(600, 512, &currentItems, 2048, mockReadNMore)
requireNoError(t, err) // requireNoError(t, err)
assertEqual(t, 300, item) // assertEqual(t, 600, item)
assertLength(t, 512, currentItems) // Ensure one block is read // assertLength(t, 1024, currentItems)
}) // })
t.Run("ValidRequestAcrossMultipleBlocks", func(t *testing.T) { // t.Run("SequentialRequestsWithinBlocks", func(t *testing.T) {
currentItems := make([]int, 0) // currentItems := make([]int, 0)
item, err := readPagedItems(600, 512, &currentItems, 2048, mockReadNMore) // // First request
requireNoError(t, err) // item, err := readPagedItems(300, 512, &currentItems, 2048, mockReadNMore)
assertEqual(t, 600, item) // requireNoError(t, err)
assertLength(t, 1024, currentItems) // assertEqual(t, 300, item)
})
t.Run("SequentialRequestsWithinBlocks", func(t *testing.T) { // // Second request in the same block
currentItems := make([]int, 0) // item, err = readPagedItems(400, 512, &currentItems, 2048, mockReadNMore)
// First request // requireNoError(t, err)
item, err := readPagedItems(300, 512, &currentItems, 2048, mockReadNMore) // assertEqual(t, 400, item)
requireNoError(t, err) // assertLength(t, 512, currentItems)
assertEqual(t, 300, item) // })
// Second request in the same block // t.Run("RequestExactBlockBoundary", func(t *testing.T) {
item, err = readPagedItems(400, 512, &currentItems, 2048, mockReadNMore) // currentItems := make([]int, 0)
requireNoError(t, err) // item, err := readPagedItems(511, 512, &currentItems, 2048, mockReadNMore)
assertEqual(t, 400, item) // requireNoError(t, err)
assertLength(t, 512, currentItems) // assertEqual(t, 511, item)
}) // assertLength(t, 512, currentItems)
t.Run("RequestExactBlockBoundary", func(t *testing.T) { // // Request the next block's first item
currentItems := make([]int, 0) // item, err = readPagedItems(512, 512, &currentItems, 2048, mockReadNMore)
item, err := readPagedItems(511, 512, &currentItems, 2048, mockReadNMore) // requireNoError(t, err)
requireNoError(t, err) // assertEqual(t, 512, item)
assertEqual(t, 511, item) // assertLength(t, 1024, currentItems)
assertLength(t, 512, currentItems) // })
// Request the next block's first item // t.Run("OutOfBoundsRequest", func(t *testing.T) {
item, err = readPagedItems(512, 512, &currentItems, 2048, mockReadNMore) // currentItems := make([]int, 0)
requireNoError(t, err) // _, err := readPagedItems(2048, 512, &currentItems, 2048, mockReadNMore)
assertEqual(t, 512, item) // assertErrorIs(t, err, errOutOfBounds)
assertLength(t, 1024, currentItems) // })
})
t.Run("OutOfBoundsRequest", func(t *testing.T) { // t.Run("RequestBeyondReadBlocks", func(t *testing.T) {
currentItems := make([]int, 0) // readFail := errors.New("failed to read block")
_, err := readPagedItems(2048, 512, &currentItems, 2048, mockReadNMore) // failingReadBlocks := func(startBlock, numBlocks int) ([]int, error) {
assertErrorIs(t, err, errOutOfBounds) // if startBlock > 1 {
}) // return nil, readFail
// }
// var result []int
// for i := 0; i < numBlocks*512; i++ {
// result = append(result, startBlock*512+i)
// }
// return result, nil
// }
t.Run("RequestBeyondReadBlocks", func(t *testing.T) { // currentItems := make([]int, 0)
readFail := errors.New("failed to read block") // _, err := readPagedItems(1024, 512, &currentItems, 2048, failingReadBlocks)
failingReadBlocks := func(startBlock, numBlocks int) ([]int, error) { // assertErrorIs(t, err, readFail)
if startBlock > 1 { // })
return nil, readFail
}
var result []int
for i := 0; i < numBlocks*512; i++ {
result = append(result, startBlock*512+i)
}
return result, nil
}
currentItems := make([]int, 0) // t.Run("partial last page", func(t *testing.T) {
_, err := readPagedItems(1024, 512, &currentItems, 2048, failingReadBlocks) // currentItems := make([]int, 0)
assertErrorIs(t, err, readFail)
})
t.Run("partial last page", func(t *testing.T) { // // Request the next block's first item
currentItems := make([]int, 0) // item, err := readPagedItems(512, 512, &currentItems, 612, mockReadNMore)
// requireNoError(t, err)
// Request the next block's first item // assertEqual(t, 512, item)
item, err := readPagedItems(512, 512, &currentItems, 612, mockReadNMore) // assertLength(t, 612, currentItems)
requireNoError(t, err) // })
assertEqual(t, 512, item) // }
assertLength(t, 612, currentItems)
})
}
+188 -216
View File
@@ -3,258 +3,230 @@ package data
import ( import (
"errors" "errors"
"io" "io"
"io/fs"
"math"
"runtime" "runtime"
"sync" "sync"
"github.com/CalebQ42/squashfs/internal/decompress" "github.com/CalebQ42/squashfs/internal/decompress"
) )
type FragReaderConstructor func() (io.Reader, error)
type FullReader struct { type FullReader struct {
r io.ReaderAt fileSize uint64
d decompress.Decompressor blockSize uint32
frag FragReaderConstructor dispatcher chan struct{}
sizes []uint32 pool *sync.Pool
initialOffset int64 rdr io.ReaderAt
finalBlockSize uint64 decomp decompress.Decompressor
blockSize uint32 sizes []uint32
goroutineLimit uint16 blockOffsets []uint64
closed bool fragDat []byte
} }
func NewFullReader(r io.ReaderAt, initialOffset int64, d decompress.Decompressor, sizes []uint32, finalBlockSize uint64, blockSize uint32) FullReader { func NewFullReader(rdr io.ReaderAt, decomp decompress.Decompressor, blockSize uint32, size uint64, start uint64, sizes []uint32) FullReader {
return FullReader{ out := FullReader{
r: r, fileSize: size,
d: d, blockSize: blockSize,
sizes: sizes, rdr: rdr,
initialOffset: initialOffset, decomp: decomp,
goroutineLimit: uint16(runtime.NumCPU()), sizes: sizes,
finalBlockSize: finalBlockSize,
blockSize: blockSize,
} }
out.blockOffsets = make([]uint64, len(sizes))
curOffset := start
for i := range sizes {
out.blockOffsets[i] = curOffset
curOffset += uint64(sizes[i]) &^ (1 << 24)
}
return out
} }
func (r *FullReader) Close() error { func (f *FullReader) Close() error {
r.closed = true f.fragDat = nil
r.r = nil f.sizes = nil
r.d = nil f.blockOffsets = nil
r.frag = nil
r.sizes = nil
return nil return nil
} }
func (r *FullReader) AddFrag(frag FragReaderConstructor) { func (f *FullReader) AddFragData(blockStart uint64, blockSize uint32, offset uint32) error {
r.frag = frag realSize := blockSize &^ (1 << 24)
} dat := make([]byte, realSize)
_, err := f.rdr.ReadAt(dat, int64(blockStart))
func (r *FullReader) SetGoroutineLimit(limit uint16) { if err != nil {
if limit <= 0 { return err
r.goroutineLimit = 1
} }
r.goroutineLimit = limit if blockSize == realSize {
dat, err = f.decomp.Decompress(dat)
if err != nil {
return err
}
}
f.fragDat = make([]byte, f.fileSize%uint64(f.blockSize))
copy(f.fragDat, dat[offset:])
dat = nil
return nil
} }
type retValue struct { func (f *FullReader) SetDispatcherPool(dispatcher chan struct{}, pool *sync.Pool) {
err error f.dispatcher = dispatcher
data []byte f.pool = pool
index uint64
} }
func (r FullReader) process(index uint64, fileOffset uint64, pool *sync.Pool, retChan chan *retValue) { // The number of blocks, including the fragment block if present
ret := pool.Get().(*retValue) func (f FullReader) BlockNum() uint32 {
ret.index = index out := len(f.sizes)
realSize := r.sizes[index] &^ (1 << 24) if f.fragDat != nil {
out++
}
return uint32(out)
}
// Returns the data block at the given index
func (f FullReader) Block(i uint32) ([]byte, error) {
if i == uint32(len(f.sizes)) && f.fragDat != nil {
return f.fragDat, nil
}
if i >= uint32(len(f.sizes)) {
return nil, errors.New("invalid block index")
}
realSize := f.sizes[i] &^ (1 << 24)
if realSize == 0 { if realSize == 0 {
if index == uint64(len(r.sizes))-1 && r.frag == nil { if i == uint32(len(f.sizes)-1) && f.fragDat == nil {
ret.data = make([]byte, r.finalBlockSize) return make([]byte, f.fileSize%uint64(f.blockSize)), nil
} else {
ret.data = make([]byte, r.blockSize)
} }
ret.err = nil return make([]byte, f.blockSize), nil
retChan <- ret
return
} }
ret.data = make([]byte, realSize) dat := make([]byte, realSize)
_, ret.err = r.r.ReadAt(ret.data, r.initialOffset+int64(fileOffset)) _, err := f.rdr.ReadAt(dat, int64(f.blockOffsets[i]))
if r.sizes[index] == realSize { if err != nil {
ret.data, ret.err = r.d.Decompress(ret.data) return nil, err
} }
retChan <- ret if realSize == f.sizes[i] {
dat, err = f.decomp.Decompress(dat)
}
return dat, err
} }
func (r FullReader) WriteTo(w io.Writer) (int64, error) { func (f FullReader) blockFromPool(i uint32) *BlockResults {
if r.closed { out := f.pool.Get().(*BlockResults)
return 0, fs.ErrClosed out.idx = i
out.err = nil
if i == uint32(len(f.sizes)) && f.fragDat != nil {
out.block = f.fragDat
return out
} }
// if wa, is := w.(io.WriterAt); is { if i >= uint32(len(f.sizes)) {
// return r.writeToWriteAt(wa) out.err = errors.New("invalid block index")
// } return out
var curIndex uint64
var curOffset uint64
var toProcess uint16
var wrote int64
cache := make(map[uint64]*retValue)
var errCache []error
retChan := make(chan *retValue, r.goroutineLimit)
pool := &sync.Pool{
New: func() any {
return &retValue{}
},
} }
for i := uint64(0); i < uint64(math.Ceil(float64(len(r.sizes))/float64(r.goroutineLimit))); i++ { realSize := f.sizes[i] &^ (1 << 24)
toProcess = min(uint16(len(r.sizes))-(uint16(i)*r.goroutineLimit), r.goroutineLimit) if realSize == 0 {
// Start all the goroutines if i == uint32(len(f.sizes)-1) && f.fragDat == nil {
for j := uint16(0); j < toProcess; j++ { out.block = make([]byte, f.fileSize%uint64(f.blockSize))
go r.process((i*uint64(r.goroutineLimit))+uint64(j), curOffset, pool, retChan) return out
curOffset += uint64(r.sizes[(i*uint64(r.goroutineLimit))+uint64(j)]) &^ (1 << 24)
} }
// Then consume the results on retChan out.block = make([]byte, f.blockSize)
for j := uint16(0); j < toProcess; j++ { }
res := <-retChan out.block = make([]byte, realSize)
// If there's an error, we don't care about the results. _, out.err = f.rdr.ReadAt(out.block, int64(f.blockOffsets[i]))
if res.err != nil { if out.err != nil {
errCache = append(errCache, res.err) return out
if len(cache) > 0 { }
clear(cache) if realSize == f.sizes[i] {
} out.block, out.err = f.decomp.Decompress(out.block)
continue }
return out
}
type BlockResults struct {
idx uint32
block []byte
err error
}
func (f FullReader) WriteTo(w io.Writer) (wrote int64, err error) {
if f.dispatcher == nil {
f.dispatcher = make(chan struct{}, runtime.NumCPU())
for range runtime.NumCPU() {
f.dispatcher <- struct{}{}
}
}
if f.pool == nil {
f.pool = &sync.Pool{
New: func() any {
return &BlockResults{}
},
}
}
open := true
resChan := make(chan *BlockResults, len(f.dispatcher))
var results map[uint32]*BlockResults
if _, is := w.(io.WriterAt); !is {
results = make(map[uint32]*BlockResults)
}
for i := range f.BlockNum() {
go func(idx uint32) {
<-f.dispatcher
defer func() { f.dispatcher <- struct{}{} }()
if !open {
resChan <- f.pool.Get().(*BlockResults)
return
} }
// If there has been an error previously, we don't care about the results. resChan <- f.blockFromPool(idx)
// We still want to wait for all the goroutines to prevent resources being wasted. }(i)
if len(errCache) > 0 { }
continue out := int64(0)
} errOut := make([]error, 0)
// If we don't need the data yet, we cache it and move on for i := uint32(0); i < f.BlockNum(); {
if res.index != curIndex { res := <-resChan
cache[res.index] = res defer f.pool.Put(res)
continue if res.err != nil {
} open = false
// If we do need the data, we write it errOut = append(errOut, res.err)
wr, err := w.Write(res.data) }
wrote += int64(wr) if len(errOut) > 0 {
i++
continue
}
if wa, is := w.(io.WriterAt); is {
_, err := wa.WriteAt(res.block, int64(res.idx)*int64(f.blockSize))
if err != nil { if err != nil {
errCache = append(errCache, err) errOut = append(errOut, err)
if len(cache) > 0 { } else {
clear(cache) out = max(out, int64(res.idx)*int64(f.blockSize)+int64(len(res.block)))
}
continue
} }
pool.Put(res) i++
curIndex++ continue
// Now we recursively try to clear the cache }
for len(cache) > 0 { var err error
res, ok := cache[curIndex] if res.idx == i {
if !ok { _, err = w.Write(res.block)
break if err != nil {
} errOut = append(errOut, err)
wr, err := w.Write(res.data) } else {
wrote += int64(wr) out = max(out, int64(res.idx)*int64(f.blockSize)+int64(len(res.block)))
}
i++
} else {
results[res.idx] = res
}
var has bool
for {
res, has = results[i]
if has {
_, err = w.Write(res.block)
if err != nil { if err != nil {
errCache = append(errCache, err) errOut = append(errOut, err)
if len(cache) > 0 { } else {
clear(cache) out = max(out, int64(res.idx)*int64(f.blockSize)+int64(len(res.block)))
}
break
} }
delete(cache, curIndex) i++
pool.Put(res) delete(results, i)
curIndex++ f.pool.Put(res)
} else {
break
} }
} }
if len(errCache) > 0 {
return wrote, errors.Join(errCache...)
}
} }
if r.frag != nil { if len(errOut) > 0 {
rdr, err := r.frag() return out, errors.Join(errOut...)
if err != nil {
return wrote, err
}
wr, err := io.Copy(w, rdr)
wrote += wr
if l, ok := rdr.(*io.LimitedReader); ok {
if cl, ok := l.R.(io.Closer); ok {
cl.Close()
}
}
if err != nil {
return wrote, err
}
} }
return wrote, nil return out, nil
} }
// func (r FullReader) writeToWriteAt(w io.WriterAt) (out int64, outErr error) {
// wait := &sync.WaitGroup{}
// wait.Add(len(r.sizes))
// mgr := routinemanager.NewManager(r.goroutineLimit)
// curOffset := r.initialOffset
// for i := uint64(0); i < uint64(len(r.sizes)); i++ {
// go func(index uint64, fileOffset int64) {
// lckNum := mgr.Lock()
// defer mgr.Unlock(lckNum)
// defer wait.Done()
// realSize := r.sizes[index] &^ (1 << 24)
// if realSize == 0 {
// if index == uint64(len(r.sizes))-1 && r.frag == nil {
// _, err := w.WriteAt([]byte{0}, int64((uint64(r.blockSize)*index)+r.finalBlockSize)-1)
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// out = max(out, int64((uint64(r.blockSize)*index)+r.finalBlockSize))
// }
// return
// }
// data := make([]byte, realSize)
// err := binary.Read(toreader.NewReader(r.r, int64(fileOffset)), binary.LittleEndian, &data)
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// if r.sizes[index] == realSize {
// data, err = r.d.Decompress(data)
// }
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// _, err = w.WriteAt(data, int64(uint64(r.blockSize)*index))
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// out = max(out, int64(uint64(r.blockSize)*(index+1)))
// }(i, curOffset)
// curOffset += int64(r.sizes[i]) &^ (1 << 24)
// }
// if r.frag != nil {
// wait.Add(1)
// go func() {
// lckNum := mgr.Lock()
// defer mgr.Unlock(lckNum)
// defer wait.Done()
// rdr, err := r.frag()
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// dat, err := io.ReadAll(rdr)
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// _, err = w.WriteAt(dat, int64(int(r.blockSize)*len(r.sizes)))
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// out = int64(int(r.blockSize)*len(r.sizes)) + int64(r.finalBlockSize)
// }()
// }
// wait.Wait()
// return
// }
+42 -86
View File
@@ -1,104 +1,60 @@
package data package data
import ( import "io"
"io"
"io/fs"
"github.com/CalebQ42/squashfs/internal/decompress"
)
type Reader struct { type Reader struct {
r io.Reader f *FullReader
d decompress.Decompressor curBlock []byte
frag io.Reader nextIdx uint32
sizes []uint32 curOffset uint32
dat []byte
curOffset int
curIndex uint64
finalBlockSize uint64
blockSize uint32
closed bool
} }
func NewReader(r io.Reader, d decompress.Decompressor, sizes []uint32, finalBlockSize uint64, blockSize uint32) Reader { func NewReader(f *FullReader) (Reader, error) {
return Reader{ dat, err := f.Block(0)
r: r, if err != nil {
d: d, return Reader{}, err
sizes: sizes,
finalBlockSize: finalBlockSize,
blockSize: blockSize,
} }
return Reader{
f: f,
curBlock: dat,
nextIdx: 1,
curOffset: 0,
}, nil
} }
func (r *Reader) AddFrag(fragRdr io.Reader) { func (d *Reader) Close() error {
r.frag = fragRdr d.curBlock = nil
return nil
} }
func (r *Reader) advance() error { func (d *Reader) advanceBlock() error {
r.curOffset = 0 if d.nextIdx >= d.f.BlockNum() {
defer func() { r.curIndex++ }() d.curBlock = nil
var err error
if r.curIndex == uint64(len(r.sizes)) && r.frag != nil {
r.dat, err = io.ReadAll(r.frag)
return err
} else if r.curIndex >= uint64(len(r.sizes)) {
r.dat = []byte{}
return io.EOF return io.EOF
} }
realSize := r.sizes[r.curIndex] &^ (1 << 24) var err error
if realSize == 0 { d.curBlock, err = d.f.Block(d.nextIdx)
if r.curIndex == uint64(len(r.sizes))-1 && r.frag == nil {
r.dat = make([]byte, r.finalBlockSize)
} else {
r.dat = make([]byte, r.blockSize)
}
return nil
}
r.dat = make([]byte, realSize)
_, err = r.r.Read(r.dat)
if err != nil { if err != nil {
return err return err
} }
if r.sizes[r.curIndex] != realSize { d.nextIdx++
return nil d.curOffset = 0
}
r.dat, err = r.d.Decompress(r.dat)
return err
}
func (r *Reader) Read(b []byte) (int, error) {
if r.closed {
return 0, fs.ErrClosed
}
curRead := 0
var toRead int
for curRead < len(b) {
if r.curOffset >= len(r.dat) {
if err := r.advance(); err != nil {
return curRead, err
}
}
toRead = min(len(b)-curRead, len(r.dat)-r.curOffset)
toRead = copy(b[curRead:], r.dat[r.curOffset:r.curOffset+toRead])
r.curOffset += toRead
curRead += toRead
}
return curRead, nil
}
func (r *Reader) Close() error {
r.closed = true
r.r = nil
r.d = nil
if r.frag != nil {
if l, ok := r.frag.(*io.LimitedReader); ok {
if cl, ok := l.R.(io.Closer); ok {
cl.Close()
}
}
}
r.frag = nil
r.sizes = nil
r.dat = nil
return nil return nil
} }
func (d *Reader) Read(buf []byte) (int, error) {
totRed := 0
toRead := 0
var err error
for totRed < len(buf) {
if int(d.curOffset) >= len(d.curBlock) {
err = d.advanceBlock()
if err != nil {
return totRed, err
}
}
toRead = min(len(d.curBlock)-int(d.curOffset), len(buf)-totRed)
copy(buf[totRed:], d.curBlock[d.curOffset:d.curOffset+uint32(toRead)])
}
return totRed, nil
}
+39 -11
View File
@@ -11,12 +11,45 @@ type header struct {
Num uint32 Num uint32
} }
type decEntry struct { func readHeader(r io.Reader) (h header, err error) {
dat := make([]byte, 12)
_, err = r.Read(dat)
if err != nil {
return
}
h.Count = binary.LittleEndian.Uint32(dat)
h.BlockStart = binary.LittleEndian.Uint32(dat[4:])
h.Num = binary.LittleEndian.Uint32(dat[8:])
return
}
type dirEntry struct {
Offset uint16 Offset uint16
NumOffset int16 NumOffset int16
InodeType uint16 InodeType uint16
NameSize uint16 NameSize uint16
// Name []byte (not decoded along with decEntry) Name []byte
}
func readEntry(r io.Reader) (e dirEntry, err error) {
dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil {
return
}
e.Offset = binary.LittleEndian.Uint16(dat)
_, err = binary.Decode(dat[2:], binary.LittleEndian, &e.NumOffset)
if err != nil {
return
}
e.InodeType = binary.LittleEndian.Uint16(dat[4:])
e.NameSize = binary.LittleEndian.Uint16(dat[6:])
e.Name = make([]byte, e.NameSize+1)
_, err = r.Read(e.Name)
if err != nil {
return
}
return
} }
type Entry struct { type Entry struct {
@@ -31,20 +64,15 @@ func ReadDirectory(r io.Reader, size uint32) (out []Entry, err error) {
size -= 3 size -= 3
var curRead uint32 var curRead uint32
var h header var h header
var de decEntry var de dirEntry
for curRead < size { for curRead < size {
err = binary.Read(r, binary.LittleEndian, &h) h, err = readHeader(r)
if err != nil { if err != nil {
return return
} }
curRead += 12 curRead += 12
for i := uint32(0); i < h.Count+1 && curRead < size; i++ { for i := uint32(0); i < h.Count+1 && curRead < size; i++ {
err = binary.Read(r, binary.LittleEndian, &de) de, err = readEntry(r)
if err != nil {
return
}
nameTmp := make([]byte, de.NameSize+1)
err = binary.Read(r, binary.LittleEndian, &nameTmp)
if err != nil { if err != nil {
return return
} }
@@ -52,7 +80,7 @@ func ReadDirectory(r io.Reader, size uint32) (out []Entry, err error) {
out = append(out, Entry{ out = append(out, Entry{
BlockStart: h.BlockStart, BlockStart: h.BlockStart,
Offset: de.Offset, Offset: de.Offset,
Name: string(nameTmp), Name: string(de.Name),
InodeType: de.InodeType, InodeType: de.InodeType,
Num: h.Num + uint32(de.NumOffset), Num: h.Num + uint32(de.NumOffset),
}) })
+21 -68
View File
@@ -2,7 +2,6 @@ package squashfslow
import ( import (
"errors" "errors"
"io"
"github.com/CalebQ42/squashfs/internal/metadata" "github.com/CalebQ42/squashfs/internal/metadata"
"github.com/CalebQ42/squashfs/internal/toreader" "github.com/CalebQ42/squashfs/internal/toreader"
@@ -84,6 +83,8 @@ func (b FileBase) IsRegular() bool {
return b.Inode.Type == inode.Fil || b.Inode.Type == inode.EFil return b.Inode.Type == inode.Fil || b.Inode.Type == inode.EFil
} }
// Returns a regular file's readers. They are linked, so the data.Reader calls to the data.FullReader.
// Aka: closing the FullReader breaks the Reader
func (b FileBase) GetRegFileReaders(r Reader) (data.Reader, data.FullReader, error) { func (b FileBase) GetRegFileReaders(r Reader) (data.Reader, data.FullReader, error) {
if !b.IsRegular() { if !b.IsRegular() {
return data.Reader{}, data.FullReader{}, errors.New("not a regular file") return data.Reader{}, data.FullReader{}, errors.New("not a regular file")
@@ -91,41 +92,32 @@ func (b FileBase) GetRegFileReaders(r Reader) (data.Reader, data.FullReader, err
var blockStart uint64 var blockStart uint64
var fragIndex uint32 var fragIndex uint32
var fragOffset uint32 var fragOffset uint32
var fragSize uint64
var sizes []uint32 var sizes []uint32
var fileSize uint64
if b.Inode.Type == inode.Fil { if b.Inode.Type == inode.Fil {
blockStart = uint64(b.Inode.Data.(inode.File).BlockStart) blockStart = uint64(b.Inode.Data.(inode.File).BlockStart)
fragIndex = b.Inode.Data.(inode.File).FragInd fragIndex = b.Inode.Data.(inode.File).FragInd
fragOffset = b.Inode.Data.(inode.File).FragOffset fragOffset = b.Inode.Data.(inode.File).FragOffset
sizes = b.Inode.Data.(inode.File).BlockSizes sizes = b.Inode.Data.(inode.File).BlockSizes
fragSize = uint64(b.Inode.Data.(inode.File).Size % r.Superblock.BlockSize) fileSize = uint64(b.Inode.Data.(inode.File).Size)
} else { } else {
blockStart = b.Inode.Data.(inode.EFile).BlockStart blockStart = b.Inode.Data.(inode.EFile).BlockStart
fragIndex = b.Inode.Data.(inode.EFile).FragInd fragIndex = b.Inode.Data.(inode.EFile).FragInd
fragOffset = b.Inode.Data.(inode.EFile).FragOffset fragOffset = b.Inode.Data.(inode.EFile).FragOffset
sizes = b.Inode.Data.(inode.EFile).BlockSizes sizes = b.Inode.Data.(inode.EFile).BlockSizes
fragSize = b.Inode.Data.(inode.EFile).Size % uint64(r.Superblock.BlockSize) fileSize = b.Inode.Data.(inode.EFile).Size
} }
frag := func() (io.Reader, error) { outFull := data.NewFullReader(r.r, r.d, r.Superblock.BlockSize, fileSize, blockStart, sizes)
if fragIndex != 0xFFFFFFFF {
ent, err := r.fragEntry(fragIndex) ent, err := r.fragEntry(fragIndex)
if err != nil {
return nil, err
}
frag := data.NewReader(toreader.NewReader(r.r, int64(ent.Start)), r.d, []uint32{ent.Size}, uint64(r.Superblock.BlockSize), r.Superblock.BlockSize)
frag.Read(make([]byte, fragOffset))
return io.LimitReader(&frag, int64(fragSize)), nil
}
outRdr := data.NewReader(toreader.NewReader(r.r, int64(blockStart)), r.d, sizes, fragSize, r.Superblock.BlockSize)
if fragIndex != 0xffffffff {
f, err := frag()
if err != nil { if err != nil {
return data.Reader{}, data.FullReader{}, err return data.Reader{}, data.FullReader{}, err
} }
outRdr.AddFrag(f) outFull.AddFragData(ent.Start, ent.Size, fragOffset)
} }
outFull := data.NewFullReader(r.r, int64(blockStart), r.d, sizes, fragSize, r.Superblock.BlockSize) outRdr, err := data.NewReader(&outFull)
if fragIndex != 0xffffffff { if err != nil {
outFull.AddFrag(frag) return data.Reader{}, data.FullReader{}, err
} }
return outRdr, outFull, nil return outRdr, outFull, nil
} }
@@ -137,67 +129,28 @@ func (b FileBase) GetFullReader(r *Reader) (data.FullReader, error) {
var blockStart uint64 var blockStart uint64
var fragIndex uint32 var fragIndex uint32
var fragOffset uint32 var fragOffset uint32
var fragSize uint64
var sizes []uint32 var sizes []uint32
var fileSize uint64
if b.Inode.Type == inode.Fil { if b.Inode.Type == inode.Fil {
blockStart = uint64(b.Inode.Data.(inode.File).BlockStart) blockStart = uint64(b.Inode.Data.(inode.File).BlockStart)
fragIndex = b.Inode.Data.(inode.File).FragInd fragIndex = b.Inode.Data.(inode.File).FragInd
fragOffset = b.Inode.Data.(inode.File).FragOffset fragOffset = b.Inode.Data.(inode.File).FragOffset
sizes = b.Inode.Data.(inode.File).BlockSizes sizes = b.Inode.Data.(inode.File).BlockSizes
fragSize = uint64(b.Inode.Data.(inode.File).Size % r.Superblock.BlockSize) fileSize = uint64(b.Inode.Data.(inode.File).Size)
} else { } else {
blockStart = b.Inode.Data.(inode.EFile).BlockStart blockStart = b.Inode.Data.(inode.EFile).BlockStart
fragIndex = b.Inode.Data.(inode.EFile).FragInd fragIndex = b.Inode.Data.(inode.EFile).FragInd
fragOffset = b.Inode.Data.(inode.EFile).FragOffset fragOffset = b.Inode.Data.(inode.EFile).FragOffset
sizes = b.Inode.Data.(inode.EFile).BlockSizes sizes = b.Inode.Data.(inode.EFile).BlockSizes
fragSize = b.Inode.Data.(inode.EFile).Size % uint64(r.Superblock.BlockSize) fileSize = b.Inode.Data.(inode.EFile).Size
} }
outFull := data.NewFullReader(r.r, int64(blockStart), r.d, sizes, fragSize, r.Superblock.BlockSize) outFull := data.NewFullReader(r.r, r.d, r.Superblock.BlockSize, fileSize, blockStart, sizes)
if fragIndex != 0xffffffff { if fragIndex != 0xFFFFFFFF {
outFull.AddFrag(func() (io.Reader, error) { ent, err := r.fragEntry(fragIndex)
ent, err := r.fragEntry(fragIndex) if err != nil {
if err != nil { return data.FullReader{}, err
return nil, err }
} outFull.AddFragData(ent.Start, ent.Size, fragOffset)
frag := data.NewReader(toreader.NewReader(r.r, int64(ent.Start)), r.d, []uint32{ent.Size}, uint64(r.Superblock.BlockSize), r.Superblock.BlockSize)
frag.Read(make([]byte, fragOffset))
return io.LimitReader(&frag, int64(fragSize)), nil
})
} }
return outFull, nil return outFull, nil
} }
func (b FileBase) GetReader(r *Reader) (data.Reader, error) {
if !b.IsRegular() {
return data.Reader{}, errors.New("not a regular file")
}
var blockStart uint64
var fragIndex uint32
var fragOffset uint32
var fragSize uint64
var sizes []uint32
if b.Inode.Type == inode.Fil {
blockStart = uint64(b.Inode.Data.(inode.File).BlockStart)
fragIndex = b.Inode.Data.(inode.File).FragInd
fragOffset = b.Inode.Data.(inode.File).FragOffset
sizes = b.Inode.Data.(inode.File).BlockSizes
fragSize = uint64(b.Inode.Data.(inode.File).Size % r.Superblock.BlockSize)
} else {
blockStart = b.Inode.Data.(inode.EFile).BlockStart
fragIndex = b.Inode.Data.(inode.EFile).FragInd
fragOffset = b.Inode.Data.(inode.EFile).FragOffset
sizes = b.Inode.Data.(inode.EFile).BlockSizes
fragSize = b.Inode.Data.(inode.EFile).Size % uint64(r.Superblock.BlockSize)
}
outRdr := data.NewReader(toreader.NewReader(r.r, int64(blockStart)), r.d, sizes, fragSize, r.Superblock.BlockSize)
if fragIndex != 0xffffffff {
ent, err := r.fragEntry(fragIndex)
if err != nil {
return data.Reader{}, err
}
frag := data.NewReader(toreader.NewReader(r.r, int64(ent.Start)), r.d, []uint32{ent.Size}, uint64(r.Superblock.BlockSize), r.Superblock.BlockSize)
frag.Read(make([]byte, fragOffset))
outRdr.AddFrag(io.LimitReader(&frag, int64(fragSize)))
}
return outRdr, nil
}
+3 -1
View File
@@ -7,7 +7,9 @@ import (
"github.com/CalebQ42/squashfs/low/inode" "github.com/CalebQ42/squashfs/low/inode"
) )
func (r Reader) InodeFromRef(ref uint64) (inode.Inode, error) { type InodeRef = uint64
func (r Reader) InodeFromRef(ref InodeRef) (inode.Inode, error) {
offset, meta := (ref>>16)+r.Superblock.InodeTableStart, ref&0xFFFF offset, meta := (ref>>16)+r.Superblock.InodeTableStart, ref&0xFFFF
rdr := metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d) rdr := metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d)
defer rdr.Close() defer rdr.Close()
+14 -14
View File
@@ -13,6 +13,20 @@ type Directory struct {
ParentNum uint32 ParentNum uint32
} }
func ReadDir(r io.Reader) (d Directory, err error) {
dat := make([]byte, 16)
_, err = r.Read(dat)
if err != nil {
return
}
d.BlockStart = binary.LittleEndian.Uint32(dat)
d.LinkCount = binary.LittleEndian.Uint32(dat[4:])
d.Size = binary.LittleEndian.Uint16(dat[8:])
d.Offset = binary.LittleEndian.Uint16(dat[10:])
d.ParentNum = binary.LittleEndian.Uint32(dat[12:])
return
}
type EDirectory struct { type EDirectory struct {
LinkCount uint32 LinkCount uint32
Size uint32 Size uint32
@@ -31,20 +45,6 @@ type DirectoryIndex struct {
Name []byte Name []byte
} }
func ReadDir(r io.Reader) (d Directory, err error) {
dat := make([]byte, 16)
_, err = r.Read(dat)
if err != nil {
return
}
d.BlockStart = binary.LittleEndian.Uint32(dat)
d.LinkCount = binary.LittleEndian.Uint32(dat[4:])
d.Size = binary.LittleEndian.Uint16(dat[8:])
d.Offset = binary.LittleEndian.Uint16(dat[10:])
d.ParentNum = binary.LittleEndian.Uint32(dat[12:])
return
}
func ReadEDir(r io.Reader) (d EDirectory, err error) { func ReadEDir(r io.Reader) (d EDirectory, err error) {
dat := make([]byte, 24) dat := make([]byte, 24)
_, err = r.Read(dat) _, err = r.Read(dat)
+33 -22
View File
@@ -14,31 +14,16 @@ type File struct {
BlockSizes []uint32 BlockSizes []uint32
} }
type eFileInit struct {
BlockStart uint64
Size uint64
Sparse uint64
LinkCount uint32
FragInd uint32
FragOffset uint32
XattrInd uint32
}
type EFile struct {
eFileInit
BlockSizes []uint32
}
func ReadFile(r io.Reader, blockSize uint32) (f File, err error) { func ReadFile(r io.Reader, blockSize uint32) (f File, err error) {
dat := make([]byte, 16) dat := make([]byte, 16)
_, err = r.Read(dat) _, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
f.BlockStart = binary.LittleEndian.Uint32(dat) f.BlockStart = binary.LittleEndian.Uint32(dat[0:4])
f.FragInd = binary.LittleEndian.Uint32(dat[4:]) f.FragInd = binary.LittleEndian.Uint32(dat[4:8])
f.FragOffset = binary.LittleEndian.Uint32(dat[8:]) f.FragOffset = binary.LittleEndian.Uint32(dat[8:12])
f.Size = binary.LittleEndian.Uint32(dat[12:]) f.Size = binary.LittleEndian.Uint32(dat[12:16])
toRead := int(math.Floor(float64(f.Size) / float64(blockSize))) toRead := int(math.Floor(float64(f.Size) / float64(blockSize)))
if f.FragInd == 0xFFFFFFFF && f.Size%blockSize > 0 { if f.FragInd == 0xFFFFFFFF && f.Size%blockSize > 0 {
toRead++ toRead++
@@ -55,16 +40,42 @@ func ReadFile(r io.Reader, blockSize uint32) (f File, err error) {
return return
} }
type EFile struct {
BlockStart uint64
Size uint64
Sparse uint64
LinkCount uint32
FragInd uint32
FragOffset uint32
XattrInd uint32
BlockSizes []uint32
}
func ReadEFile(r io.Reader, blockSize uint32) (f EFile, err error) { func ReadEFile(r io.Reader, blockSize uint32) (f EFile, err error) {
err = binary.Read(r, binary.LittleEndian, &f.eFileInit) dat := make([]byte, 40)
_, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
toRead := int(math.Floor(float64(f.Size) / float64(blockSize))) f.BlockStart = binary.LittleEndian.Uint64(dat[0:8])
f.Size = binary.LittleEndian.Uint64(dat[8:16])
f.Sparse = binary.LittleEndian.Uint64(dat[16:24])
f.LinkCount = binary.LittleEndian.Uint32(dat[24:28])
f.FragInd = binary.LittleEndian.Uint32(dat[28:32])
f.FragOffset = binary.LittleEndian.Uint32(dat[32:36])
f.XattrInd = binary.LittleEndian.Uint32(dat[36:40])
toRead := f.Size / uint64(blockSize)
if f.FragInd == 0xFFFFFFFF && f.Size%uint64(blockSize) > 0 { if f.FragInd == 0xFFFFFFFF && f.Size%uint64(blockSize) > 0 {
toRead++ toRead++
} }
dat = make([]byte, toRead*4)
_, err = r.Read(dat)
if err != nil {
return
}
f.BlockSizes = make([]uint32, toRead) f.BlockSizes = make([]uint32, toRead)
err = binary.Read(r, binary.LittleEndian, &f.BlockSizes) for i := range toRead {
f.BlockSizes[i] = binary.LittleEndian.Uint32(dat[i*4:])
}
return return
} }
+8 -1
View File
@@ -40,10 +40,17 @@ type Inode struct {
} }
func Read(r io.Reader, blockSize uint32) (i Inode, err error) { func Read(r io.Reader, blockSize uint32) (i Inode, err error) {
err = binary.Read(r, binary.LittleEndian, &i.Header) dat := make([]byte, 16)
_, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
i.Type = binary.LittleEndian.Uint16(dat[0:2])
i.Perm = binary.LittleEndian.Uint16(dat[2:4])
i.UidInd = binary.LittleEndian.Uint16(dat[4:6])
i.GidInd = binary.LittleEndian.Uint16(dat[6:8])
i.ModTime = binary.LittleEndian.Uint32(dat[8:12])
i.Num = binary.LittleEndian.Uint32(dat[12:16])
switch i.Type { switch i.Type {
case Dir: case Dir:
i.Data, err = ReadDir(r) i.Data, err = ReadDir(r)
+36 -12
View File
@@ -10,18 +10,31 @@ type Device struct {
Dev uint32 Dev uint32
} }
func ReadDevice(r io.Reader) (d Device, err error) {
dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil {
return
}
d.LinkCount = binary.LittleEndian.Uint32(dat)
d.Dev = binary.LittleEndian.Uint32(dat[4:])
return
}
type EDevice struct { type EDevice struct {
Device Device
XattrInd uint32 XattrInd uint32
} }
func ReadDevice(r io.Reader) (d Device, err error) {
err = binary.Read(r, binary.LittleEndian, &d)
return
}
func ReadEDevice(r io.Reader) (d EDevice, err error) { func ReadEDevice(r io.Reader) (d EDevice, err error) {
err = binary.Read(r, binary.LittleEndian, &d) dat := make([]byte, 12)
_, err = r.Read(dat)
if err != nil {
return
}
d.LinkCount = binary.LittleEndian.Uint32(dat)
d.Dev = binary.LittleEndian.Uint32(dat[4:])
d.XattrInd = binary.LittleEndian.Uint32(dat[8:])
return return
} }
@@ -29,17 +42,28 @@ type IPC struct {
LinkCount uint32 LinkCount uint32
} }
func ReadIPC(r io.Reader) (i IPC, err error) {
dat := make([]byte, 4)
_, err = r.Read(dat)
if err != nil {
return
}
i.LinkCount = binary.LittleEndian.Uint32(dat)
return
}
type EIPC struct { type EIPC struct {
IPC IPC
XattrInd uint32 XattrInd uint32
} }
func ReadIPC(r io.Reader) (i IPC, err error) {
err = binary.Read(r, binary.LittleEndian, &i)
return
}
func ReadEIPC(r io.Reader) (i EIPC, err error) { func ReadEIPC(r io.Reader) (i EIPC, err error) {
err = binary.Read(r, binary.LittleEndian, &i) dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil {
return
}
i.LinkCount = binary.LittleEndian.Uint32(dat)
i.XattrInd = binary.LittleEndian.Uint32(dat[4:])
return return
} }
+25 -17
View File
@@ -5,42 +5,50 @@ import (
"io" "io"
) )
type symlinkInit struct { type Symlink struct {
LinkCount uint32 LinkCount uint32
TargetSize uint32 TargetSize uint32
} Target []byte
type Symlink struct {
symlinkInit
Target []byte
}
type ESymlink struct {
symlinkInit
Target []byte
XattrInd uint32
} }
func ReadSym(r io.Reader) (s Symlink, err error) { func ReadSym(r io.Reader) (s Symlink, err error) {
err = binary.Read(r, binary.LittleEndian, &s.symlinkInit) dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
s.LinkCount = binary.LittleEndian.Uint32(dat)
s.TargetSize = binary.LittleEndian.Uint32(dat[4:])
s.Target = make([]byte, s.TargetSize) s.Target = make([]byte, s.TargetSize)
err = binary.Read(r, binary.LittleEndian, &s.Target) _, err = r.Read(s.Target)
return return
} }
type ESymlink struct {
LinkCount uint32
TargetSize uint32
Target []byte
XattrInd uint32
}
func ReadESym(r io.Reader) (s ESymlink, err error) { func ReadESym(r io.Reader) (s ESymlink, err error) {
err = binary.Read(r, binary.LittleEndian, &s.symlinkInit) dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
s.LinkCount = binary.LittleEndian.Uint32(dat)
s.TargetSize = binary.LittleEndian.Uint32(dat[4:])
s.Target = make([]byte, s.TargetSize) s.Target = make([]byte, s.TargetSize)
err = binary.Read(r, binary.LittleEndian, &s.Target) _, err = r.Read(s.Target)
if err != nil { if err != nil {
return return
} }
err = binary.Read(r, binary.LittleEndian, &s.XattrInd) dat = make([]byte, 4)
_, err = r.Read(dat)
if err != nil {
return
}
s.XattrInd = binary.LittleEndian.Uint32(dat)
return return
} }
+43 -105
View File
@@ -4,10 +4,8 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"io" "io"
"math"
"github.com/CalebQ42/squashfs/internal/decompress" "github.com/CalebQ42/squashfs/internal/decompress"
"github.com/CalebQ42/squashfs/internal/metadata"
"github.com/CalebQ42/squashfs/internal/toreader" "github.com/CalebQ42/squashfs/internal/toreader"
"github.com/CalebQ42/squashfs/low/inode" "github.com/CalebQ42/squashfs/low/inode"
) )
@@ -30,13 +28,13 @@ var (
) )
type Reader struct { type Reader struct {
Root Directory
Superblock superblock
r io.ReaderAt r io.ReaderAt
d decompress.Decompressor d decompress.Decompressor
Root Directory fragTable *Table[fragEntry]
fragTable []fragEntry idTable *Table[uint32]
idTable []uint32 exportTable *Table[InodeRef]
exportTable []uint64
Superblock superblock
} }
func NewReader(r io.ReaderAt) (rdr Reader, err error) { func NewReader(r io.ReaderAt) (rdr Reader, err error) {
@@ -80,119 +78,59 @@ func NewReader(r io.ReaderAt) (rdr Reader, err error) {
if err != nil { if err != nil {
return rdr, errors.Join(errors.New("failed to read root directory"), err) return rdr, errors.Join(errors.New("failed to read root directory"), err)
} }
rdr.fragTable = NewTable(&rdr, rdr.Superblock.FragTableStart, rdr.Superblock.FragCount, readFrag)
rdr.idTable = NewTable(&rdr, rdr.Superblock.IdTableStart, uint32(rdr.Superblock.IdCount), readId)
rdr.exportTable = NewTable(&rdr, rdr.Superblock.ExportTableStart, rdr.Superblock.InodeCount, readRef)
return return
} }
func readFrag(r io.Reader) (fragEntry, error) {
dat := make([]byte, 16)
_, err := r.Read(dat)
if err != nil {
return fragEntry{}, err
}
return fragEntry{
Start: binary.LittleEndian.Uint64(dat[0:8]),
Size: binary.LittleEndian.Uint32(dat[8:12]),
}, nil
}
func readId(r io.Reader) (uint32, error) {
dat := make([]byte, 4)
_, err := r.Read(dat)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(dat), nil
}
func readRef(r io.Reader) (InodeRef, error) {
dat := make([]byte, 8)
_, err := r.Read(dat)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint64(dat), nil
}
// Get a uid/gid at the given index. Lazily populates the reader's Id table as necessary. // Get a uid/gid at the given index. Lazily populates the reader's Id table as necessary.
func (r *Reader) Id(i uint16) (uint32, error) { func (r *Reader) Id(i uint16) (uint32, error) {
if len(r.idTable) > int(i) { return r.idTable.Get(uint32(i))
return r.idTable[i], nil
} else if i >= r.Superblock.IdCount {
return 0, errors.New("id out of bounds")
}
// Populate the id table as needed
var blockNum uint32
if i != 0 { // If i == 0, we go negatives causing issues with uint32s
blockNum = uint32(math.Ceil(float64(i+1)/2048)) - 1
} else {
blockNum = 0
}
blocksRead := len(r.idTable) / 2048
blocksToRead := int(blockNum) - blocksRead + 1
var offset uint64
var idsToRead uint16
var idsTmp []uint32
var err error
var rdr metadata.Reader
// We can *maybe* have a slight speed increase by manually decoding instead of using reflection via binary.Read
for i := blocksRead; i < int(blocksRead)+blocksToRead; i++ {
err = binary.Read(toreader.NewReader(r.r, int64(r.Superblock.IdTableStart)+int64(8*i)), binary.LittleEndian, &offset)
if err != nil {
return 0, err
}
idsToRead = min(r.Superblock.IdCount-uint16(len(r.idTable)), 2048)
idsTmp = make([]uint32, idsToRead)
rdr = metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d)
err = binary.Read(&rdr, binary.LittleEndian, &idsTmp)
rdr.Close()
if err != nil {
return 0, err
}
r.idTable = append(r.idTable, idsTmp...)
}
return r.idTable[i], nil
} }
// Get a fragment entry at the given index. Lazily populates the reader's fragment table as necessary. // Get a fragment entry at the given index. Lazily populates the reader's fragment table as necessary.
func (r *Reader) fragEntry(i uint32) (fragEntry, error) { func (r *Reader) fragEntry(i uint32) (fragEntry, error) {
return readPagedItems(int(i), 512, &r.fragTable, int(r.Superblock.FragCount), return r.fragTable.Get(i)
func(startBlock, fragsToRead int) ([]fragEntry, error) {
// get the offset of the next block of fragments
var offset uint64
err := binary.Read(toreader.NewReader(r.r, int64(r.Superblock.FragTableStart)+int64(8*startBlock)), binary.LittleEndian, &offset)
if err != nil {
return nil, err
}
fragsTmp := make([]fragEntry, fragsToRead)
rdr := metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d)
defer rdr.Close()
err = binary.Read(rdr, binary.LittleEndian, &fragsTmp)
if err != nil {
return nil, err
}
return fragsTmp, nil
})
} }
// Get an inode reference at the given index. Lazily populates the reader's export table as necessary. // Get an inode reference at the given index. Lazily populates the reader's export table as necessary.
func (r *Reader) inodeRef(i uint32) (uint64, error) { func (r *Reader) inodeRef(i uint32) (InodeRef, error) {
if !r.Superblock.Exportable() { return r.exportTable.Get(i)
return 0, ErrorNotExportable
}
if len(r.exportTable) > int(i) {
return r.exportTable[i], nil
} else if i >= r.Superblock.InodeCount {
return 0, errors.New("inode out of bounds")
}
// Populate the export table as needed
var blockNum uint32
if i != 0 { // If i == 0, we go negatives causing issues with uint32s
blockNum = uint32(math.Ceil(float64(i+1)/1024)) - 1
} else {
blockNum = 0
}
blocksRead := len(r.exportTable) / 1024
blocksToRead := int(blockNum) - blocksRead + 1
var offset uint64
var refsToRead uint32
var refsTmp []uint64
var err error
var rdr metadata.Reader
// We can *maybe* have a slight speed increase by manually decoding instead of using reflection via binary.Read
for i := blocksRead; i < int(blocksRead)+blocksToRead; i++ {
err = binary.Read(toreader.NewReader(r.r, int64(r.Superblock.ExportTableStart)+int64(8*i)), binary.LittleEndian, &offset)
if err != nil {
return 0, err
}
refsToRead = min(r.Superblock.InodeCount-uint32(len(r.exportTable)), 1024)
refsTmp = make([]uint64, refsToRead)
rdr = metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d)
err = binary.Read(&rdr, binary.LittleEndian, &refsTmp)
rdr.Close()
if err != nil {
return 0, err
}
r.exportTable = append(r.exportTable, refsTmp...)
}
return r.exportTable[i], nil
} }
func (r Reader) Inode(i uint32) (inode.Inode, error) { func (r Reader) Inode(i uint32) (inode.Inode, error) {
ref, err := r.inodeRef(i) ref, err := r.inodeRef(i - 1) // Inode table is 1 indexed
if err != nil { if err != nil {
return inode.Inode{}, err return inode.Inode{}, err
} }
+2 -2
View File
@@ -77,7 +77,7 @@ func TestReader(t *testing.T) {
path := filepath.Join(tmpDir, "extractTest") path := filepath.Join(tmpDir, "extractTest")
os.RemoveAll(path) os.RemoveAll(path)
os.MkdirAll(path, 0777) os.MkdirAll(path, 0777)
err = extractToDir(rdr, &rdr.Root.FileBase, path) err = extractToDir(rdr, rdr.Root.FileBase, path)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -103,7 +103,7 @@ func TestSingleFile(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
err = extractToDir(rdr, &b, path) err = extractToDir(rdr, b, path)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
+1 -1
View File
@@ -14,7 +14,7 @@ type superblock struct {
IdCount uint16 IdCount uint16
VerMaj uint16 VerMaj uint16
VerMin uint16 VerMin uint16
RootInodeRef uint64 RootInodeRef InodeRef
Size uint64 Size uint64
IdTableStart uint64 IdTableStart uint64
XattrTableStart uint64 XattrTableStart uint64
+85
View File
@@ -0,0 +1,85 @@
package squashfslow
import (
"encoding/binary"
"errors"
"io"
"sync"
"github.com/CalebQ42/squashfs/internal/metadata"
"github.com/CalebQ42/squashfs/internal/toreader"
)
var errOutOfBounds = errors.New("out of bounds")
var errUnexpectedOutOfBounds = errors.New("unexpected out of bounds")
var errNilCollection = errors.New("nil collection")
type CreateFunction[T any] = func(io.Reader) (T, error)
type Table[T any] struct {
totalItems uint32
itemsPerBlock uint32
offset uint64
mut sync.RWMutex
currentItems []T
rdr *Reader
createFunc CreateFunction[T]
}
func NewTable[T any](rdr *Reader, start uint64, totalItems uint32, createFunc CreateFunction[T]) *Table[T] {
var zero T
return &Table[T]{
totalItems: totalItems,
itemsPerBlock: 8192 / uint32(binary.Size(zero)),
offset: start,
mut: sync.RWMutex{},
rdr: rdr,
createFunc: createFunc,
}
}
func (t *Table[T]) Get(requestedItemIndex uint32) (T, error) {
t.mut.RLock()
if requestedItemIndex >= t.totalItems {
t.mut.RUnlock()
var zero T
return zero, errOutOfBounds
}
if uint32(len(t.currentItems)) > requestedItemIndex {
t.mut.RUnlock()
return t.currentItems[requestedItemIndex], nil
}
t.mut.RUnlock()
return t.fillAndGet(requestedItemIndex)
}
func (t *Table[T]) fillAndGet(requestedItemIndex uint32) (T, error) {
t.mut.Lock()
defer t.mut.Unlock()
var offset uint64
var toRead uint32
var rdr *toreader.Reader
var metaRdr metadata.Reader
var err error
for uint32(len(t.currentItems)) <= requestedItemIndex {
rdr = toreader.NewReader(t.rdr.r, int64(t.offset))
err = binary.Read(rdr, binary.LittleEndian, &offset)
if err != nil {
var zero T
return zero, err
}
t.offset += 8
toRead = min(t.itemsPerBlock, t.totalItems-uint32(len(t.currentItems)))
oldLen := uint32(len(t.currentItems))
t.currentItems = append(t.currentItems, make([]T, toRead)...)
metaRdr = metadata.NewReader(toreader.NewReader(t.rdr.r, int64(offset)), t.rdr.d)
for i := range toRead {
t.currentItems[oldLen+i], err = t.createFunc(&metaRdr)
if err != nil {
var zero T
return zero, err
}
}
}
return t.currentItems[requestedItemIndex], nil
}
+2 -2
View File
@@ -116,8 +116,8 @@ func BenchmarkRace(b *testing.B) {
b.Log("Unsquashfs error:", err) b.Log("Unsquashfs error:", err)
} }
unsquashTime = time.Since(start) unsquashTime = time.Since(start)
b.Log("Library took:", libTime.Round(time.Millisecond)) // b.Log("Library took:", libTime.Round(time.Millisecond))
b.Log("unsquashfs took:", unsquashTime.Round(time.Millisecond)) // b.Log("unsquashfs took:", unsquashTime.Round(time.Millisecond))
b.Log("unsquashfs is", strconv.FormatFloat(float64(libTime.Milliseconds())/float64(unsquashTime.Milliseconds()), 'f', 2, 64), "times faster") b.Log("unsquashfs is", strconv.FormatFloat(float64(libTime.Milliseconds())/float64(unsquashTime.Milliseconds()), 'f', 2, 64), "times faster")
} }