Merge pull request #41 from CalebQ42/redo-extract

Redo extract
This commit is contained in:
Caleb Gardner
2025-06-07 03:12:55 -05:00
committed by GitHub
21 changed files with 708 additions and 706 deletions
+5
View File
@@ -1,2 +1,7 @@
testing testing
/go-unsquashfs /go-unsquashfs
squashfs.test
# Memory and CPU pprof profiles
mem.out
cpu.out
+8 -20
View File
@@ -4,46 +4,34 @@ import (
"io" "io"
"io/fs" "io/fs"
"runtime" "runtime"
"sync"
"github.com/CalebQ42/squashfs/internal/routinemanager"
) )
type ExtractionOptions struct { type ExtractionOptions struct {
manager *routinemanager.Manager dispatcher chan struct{} // Limits the amount of work being done simultaneously.
fullRdrPool sync.Pool // Pool for data.FullReader results.
LogOutput io.Writer //Where the verbose log should write. LogOutput io.Writer //Where the verbose log should write.
DereferenceSymlink bool //Replace symlinks with the target file. DereferenceSymlink bool //Replace symlinks with the target file.
UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink. UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink.
Verbose bool //Prints extra info to log on an error. Verbose bool //Prints extra info to log on an error.
IgnorePerm bool //Ignore file's permissions and instead use Perm. IgnorePerm bool //Ignore file's permissions and instead use Perm.
Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777. Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777.
SimultaneousFiles uint16 //Number of files to process in parallel. Default set based on runtime.NumCPU(). ExtractionRoutines uint16 //The number of threads to use during extraction. Defaults to a number based on runtime.NumCPU().
ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Default set based on runtime.NumCPU(). SimultaneousFiles uint16 //Depreciated: Only use ExtractionRoutines
} }
// The default extraction options. // The default extraction options. Uses half of your CPU cores.
func DefaultOptions() *ExtractionOptions { func DefaultOptions() *ExtractionOptions {
cores := uint16(runtime.NumCPU() / 2)
var files, routines uint16
if cores <= 4 {
files = 1
routines = cores
} else {
files = cores - 4
routines = 4
}
return &ExtractionOptions{ return &ExtractionOptions{
Perm: 0777, Perm: 0777,
SimultaneousFiles: files, ExtractionRoutines: uint16(runtime.NumCPU() / 2),
ExtractionRoutines: routines,
} }
} }
// Less limited default options. Can run up 2x faster than DefaultOptions. // Faster extraction option. Uses all CPU cores.
// Tends to use all available CPU resources.
func FastOptions() *ExtractionOptions { func FastOptions() *ExtractionOptions {
return &ExtractionOptions{ return &ExtractionOptions{
Perm: 0777, Perm: 0777,
SimultaneousFiles: uint16(runtime.NumCPU()),
ExtractionRoutines: uint16(runtime.NumCPU()), ExtractionRoutines: uint16(runtime.NumCPU()),
} }
} }
+30 -8
View File
@@ -10,8 +10,8 @@ import (
"path/filepath" "path/filepath"
"runtime" "runtime"
"strconv" "strconv"
"sync"
"github.com/CalebQ42/squashfs/internal/routinemanager"
squashfslow "github.com/CalebQ42/squashfs/low" squashfslow "github.com/CalebQ42/squashfs/low"
"github.com/CalebQ42/squashfs/low/data" "github.com/CalebQ42/squashfs/low/data"
"github.com/CalebQ42/squashfs/low/inode" "github.com/CalebQ42/squashfs/low/inode"
@@ -54,6 +54,7 @@ func (f File) FS() (FS, error) {
func (f *File) Close() error { func (f *File) Close() error {
f.rdr.Close() f.rdr.Close()
f.full.Close() f.full.Close()
f.rdrInit = false
return nil return nil
} }
@@ -216,8 +217,16 @@ func (f File) Extract(folder string) error {
// Extract the file to the given folder. If the file is a folder, the folder's contents will be extracted to the folder. // Extract the file to the given folder. If the file is a folder, the folder's contents will be extracted to the folder.
// Allows setting various extraction options via ExtractionOptions. // Allows setting various extraction options via ExtractionOptions.
func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error { func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
if op.manager == nil { if op.dispatcher == nil {
op.manager = routinemanager.NewManager(op.SimultaneousFiles) op.fullRdrPool = sync.Pool{
New: func() any {
return &data.BlockResults{}
},
}
op.dispatcher = make(chan struct{}, op.ExtractionRoutines)
for range op.ExtractionRoutines {
op.dispatcher <- struct{}{}
}
if op.LogOutput != nil { if op.LogOutput != nil {
log.SetOutput(op.LogOutput) log.SetOutput(op.LogOutput)
} }
@@ -231,11 +240,13 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
} }
switch f.Low.Inode.Type { switch f.Low.Inode.Type {
case inode.Dir, inode.EDir: case inode.Dir, inode.EDir:
<-op.dispatcher
d, err := f.Low.ToDir(f.r.Low) d, err := f.Low.ToDir(f.r.Low)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to create squashfs.Directory for", path) log.Println("Failed to create squashfs.Directory for", path)
} }
op.dispatcher <- struct{}{}
return errors.Join(errors.New("failed to create squashfs.Directory: "+path), err) return errors.Join(errors.New("failed to create squashfs.Directory: "+path), err)
} }
errChan := make(chan error, len(d.Entries)) errChan := make(chan error, len(d.Entries))
@@ -248,19 +259,21 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
return errors.Join(errors.New("failed to get base from entry: "+path), err) return errors.Join(errors.New("failed to get base from entry: "+path), err)
} }
go func(b squashfslow.FileBase, path string) { go func(b squashfslow.FileBase, path string) {
i := op.manager.Lock()
if b.IsDir() { if b.IsDir() {
<-op.dispatcher
extDir := filepath.Join(path, b.Name) extDir := filepath.Join(path, b.Name)
err = os.Mkdir(extDir, 0777) err = os.Mkdir(extDir, 0777)
op.manager.Unlock(i)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to create directory", path) log.Println("Failed to create directory", path)
} }
op.dispatcher <- struct{}{}
errChan <- errors.Join(errors.New("failed to create directory: "+path), err) errChan <- errors.Join(errors.New("failed to create directory: "+path), err)
return return
} }
err = f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)).ExtractWithOptions(extDir, op) fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent))
op.dispatcher <- struct{}{}
err = fil.ExtractWithOptions(extDir, op)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to extract directory", path) log.Println("Failed to extract directory", path)
@@ -272,12 +285,12 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
} else { } else {
fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)) fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent))
err = fil.ExtractWithOptions(path, op) err = fil.ExtractWithOptions(path, op)
op.manager.Unlock(i)
fil.Close() fil.Close()
errChan <- err errChan <- err
} }
}(b, path) }(b, path)
} }
op.dispatcher <- struct{}{}
var errCache []error var errCache []error
for range d.Entries { for range d.Entries {
err := <-errChan err := <-errChan
@@ -289,23 +302,28 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
return errors.Join(errors.New("failed to extract folder: "+path), errors.Join(errCache...)) return errors.Join(errors.New("failed to extract folder: "+path), errors.Join(errCache...))
} }
case inode.Fil, inode.EFil: case inode.Fil, inode.EFil:
<-op.dispatcher
path = filepath.Join(path, f.Low.Name) path = filepath.Join(path, f.Low.Name)
outFil, err := os.Create(path) outFil, err := os.Create(path)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to create file", path) log.Println("Failed to create file", path)
} }
op.dispatcher <- struct{}{}
return errors.Join(errors.New("failed to create file: "+path), err) return errors.Join(errors.New("failed to create file: "+path), err)
} }
defer outFil.Close() defer outFil.Close()
full, err := f.Low.GetFullReader(&f.r.Low) full, err := f.Low.GetFullReader(&f.r.Low)
defer full.Close()
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
log.Println("Failed to create full reader for", path) log.Println("Failed to create full reader for", path)
} }
op.dispatcher <- struct{}{}
return errors.Join(errors.New("failed to create full reader: "+path), err) return errors.Join(errors.New("failed to create full reader: "+path), err)
} }
full.SetGoroutineLimit(op.ExtractionRoutines) full.SetDispatcherPool(op.dispatcher, &op.fullRdrPool)
op.dispatcher <- struct{}{}
_, err = full.WriteTo(outFil) _, err = full.WriteTo(outFil)
if err != nil { if err != nil {
if op.Verbose { if op.Verbose {
@@ -314,6 +332,8 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
return errors.Join(errors.New("failed to write file: "+path), err) return errors.Join(errors.New("failed to write file: "+path), err)
} }
case inode.Sym, inode.ESym: case inode.Sym, inode.ESym:
<-op.dispatcher
defer func() { op.dispatcher <- struct{}{} }()
symPath := f.SymlinkPath() symPath := f.SymlinkPath()
if op.DereferenceSymlink { if op.DereferenceSymlink {
filTmp := f.GetSymlinkFile() filTmp := f.GetSymlinkFile()
@@ -361,6 +381,8 @@ func (f File) ExtractWithOptions(path string, op *ExtractionOptions) error {
} }
} }
case inode.Char, inode.EChar, inode.Block, inode.EBlock, inode.Fifo, inode.EFifo: case inode.Char, inode.EChar, inode.Block, inode.EBlock, inode.Fifo, inode.EFifo:
<-op.dispatcher
defer func() { op.dispatcher <- struct{}{} }()
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
if op.Verbose { if op.Verbose {
log.Println(f.path(), "ignored. A device link and can't be created on Windows.") log.Println(f.path(), "ignored. A device link and can't be created on Windows.")
+5 -1
View File
@@ -16,5 +16,9 @@ func NewZstd() Zstd {
} }
func (z Zstd) Decompress(data []byte) ([]byte, error) { func (z Zstd) Decompress(data []byte) ([]byte, error) {
return z.rdr.DecodeAll(data, nil) dat, err := z.rdr.DecodeAll(data, nil)
if err != nil {
return nil, err
}
return dat, err
} }
+2
View File
@@ -1,3 +1,5 @@
# Lower-Level Squashfs # Lower-Level Squashfs
This library is a lower level version of the main [squashfs](https://github.com/CalebQ42/squashfs) library that doesn't try to be easy to use and exposes a lot of information that is not necesary for must use cases. This library is a lower level version of the main [squashfs](https://github.com/CalebQ42/squashfs) library that doesn't try to be easy to use and exposes a lot of information that is not necesary for must use cases.
I will try to keep the API stable, but it is not guarenteed.
+110 -113
View File
@@ -1,130 +1,127 @@
package squashfslow package squashfslow
import ( // TODO: Make work
"errors" // func requireNoError(t *testing.T, err error) {
"testing" // t.Helper()
) // if err != nil {
// t.Fatal(err)
// }
// }
func requireNoError(t *testing.T, err error) { // func assertEqual(t *testing.T, want int, got int) {
t.Helper() // t.Helper()
if err != nil { // if want != got {
t.Fatal(err) // t.Errorf("want %d, got %d", want, got)
} // }
} // }
func assertEqual(t *testing.T, want int, got int) { // func assertLength(t *testing.T, want int, slice []int) {
t.Helper() // t.Helper()
if want != got { // if len(slice) != want {
t.Errorf("want %d, got %d", want, got) // t.Errorf("want len %d, got %d", want, len(slice))
} // }
} // }
func assertLength(t *testing.T, want int, slice []int) { // func assertErrorIs(t *testing.T, err error, wantErr error) {
t.Helper() // t.Helper()
if len(slice) != want { // if err == nil {
t.Errorf("want len %d, got %d", want, len(slice)) // t.Errorf("want %s, got nil", wantErr)
} // return
} // }
// if !errors.Is(err, wantErr) {
// t.Errorf("want %s, got %v", wantErr, err)
// }
// }
func assertErrorIs(t *testing.T, err error, wantErr error) { // func TestCachingPagedReader(t *testing.T) {
t.Helper() // // Mock readBlocks function
if err == nil { // mockReadNMore := func(startBlock, numItems int) ([]int, error) {
t.Errorf("want %s, got nil", wantErr) // if startBlock < 0 {
return // return nil, errors.New("invalid block start")
} // }
if !errors.Is(err, wantErr) { // var result []int
t.Errorf("want %s, got %v", wantErr, err) // for i := 0; i < numItems; i++ {
} // result = append(result, startBlock*512+i)
} // }
// return result, nil
// }
func TestCachingPagedReader(t *testing.T) { // t.Run("ValidRequestWithinFirstBlock", func(t *testing.T) {
// Mock readBlocks function // tab := NewTable[int]()
mockReadNMore := func(startBlock, numItems int) ([]int, error) { // currentItems := make([]int, 0)
if startBlock < 0 { // item, err := readPagedItems(300, 512, &currentItems, 2048, mockReadNMore)
return nil, errors.New("invalid block start") // requireNoError(t, err)
} // assertEqual(t, 300, item)
var result []int // assertLength(t, 512, currentItems) // Ensure one block is read
for i := 0; i < numItems; i++ { // })
result = append(result, startBlock*512+i)
}
return result, nil
}
t.Run("ValidRequestWithinFirstBlock", func(t *testing.T) { // t.Run("ValidRequestAcrossMultipleBlocks", func(t *testing.T) {
currentItems := make([]int, 0) // currentItems := make([]int, 0)
item, err := readPagedItems(300, 512, &currentItems, 2048, mockReadNMore) // item, err := readPagedItems(600, 512, &currentItems, 2048, mockReadNMore)
requireNoError(t, err) // requireNoError(t, err)
assertEqual(t, 300, item) // assertEqual(t, 600, item)
assertLength(t, 512, currentItems) // Ensure one block is read // assertLength(t, 1024, currentItems)
}) // })
t.Run("ValidRequestAcrossMultipleBlocks", func(t *testing.T) { // t.Run("SequentialRequestsWithinBlocks", func(t *testing.T) {
currentItems := make([]int, 0) // currentItems := make([]int, 0)
item, err := readPagedItems(600, 512, &currentItems, 2048, mockReadNMore) // // First request
requireNoError(t, err) // item, err := readPagedItems(300, 512, &currentItems, 2048, mockReadNMore)
assertEqual(t, 600, item) // requireNoError(t, err)
assertLength(t, 1024, currentItems) // assertEqual(t, 300, item)
})
t.Run("SequentialRequestsWithinBlocks", func(t *testing.T) { // // Second request in the same block
currentItems := make([]int, 0) // item, err = readPagedItems(400, 512, &currentItems, 2048, mockReadNMore)
// First request // requireNoError(t, err)
item, err := readPagedItems(300, 512, &currentItems, 2048, mockReadNMore) // assertEqual(t, 400, item)
requireNoError(t, err) // assertLength(t, 512, currentItems)
assertEqual(t, 300, item) // })
// Second request in the same block // t.Run("RequestExactBlockBoundary", func(t *testing.T) {
item, err = readPagedItems(400, 512, &currentItems, 2048, mockReadNMore) // currentItems := make([]int, 0)
requireNoError(t, err) // item, err := readPagedItems(511, 512, &currentItems, 2048, mockReadNMore)
assertEqual(t, 400, item) // requireNoError(t, err)
assertLength(t, 512, currentItems) // assertEqual(t, 511, item)
}) // assertLength(t, 512, currentItems)
t.Run("RequestExactBlockBoundary", func(t *testing.T) { // // Request the next block's first item
currentItems := make([]int, 0) // item, err = readPagedItems(512, 512, &currentItems, 2048, mockReadNMore)
item, err := readPagedItems(511, 512, &currentItems, 2048, mockReadNMore) // requireNoError(t, err)
requireNoError(t, err) // assertEqual(t, 512, item)
assertEqual(t, 511, item) // assertLength(t, 1024, currentItems)
assertLength(t, 512, currentItems) // })
// Request the next block's first item // t.Run("OutOfBoundsRequest", func(t *testing.T) {
item, err = readPagedItems(512, 512, &currentItems, 2048, mockReadNMore) // currentItems := make([]int, 0)
requireNoError(t, err) // _, err := readPagedItems(2048, 512, &currentItems, 2048, mockReadNMore)
assertEqual(t, 512, item) // assertErrorIs(t, err, errOutOfBounds)
assertLength(t, 1024, currentItems) // })
})
t.Run("OutOfBoundsRequest", func(t *testing.T) { // t.Run("RequestBeyondReadBlocks", func(t *testing.T) {
currentItems := make([]int, 0) // readFail := errors.New("failed to read block")
_, err := readPagedItems(2048, 512, &currentItems, 2048, mockReadNMore) // failingReadBlocks := func(startBlock, numBlocks int) ([]int, error) {
assertErrorIs(t, err, errOutOfBounds) // if startBlock > 1 {
}) // return nil, readFail
// }
// var result []int
// for i := 0; i < numBlocks*512; i++ {
// result = append(result, startBlock*512+i)
// }
// return result, nil
// }
t.Run("RequestBeyondReadBlocks", func(t *testing.T) { // currentItems := make([]int, 0)
readFail := errors.New("failed to read block") // _, err := readPagedItems(1024, 512, &currentItems, 2048, failingReadBlocks)
failingReadBlocks := func(startBlock, numBlocks int) ([]int, error) { // assertErrorIs(t, err, readFail)
if startBlock > 1 { // })
return nil, readFail
}
var result []int
for i := 0; i < numBlocks*512; i++ {
result = append(result, startBlock*512+i)
}
return result, nil
}
currentItems := make([]int, 0) // t.Run("partial last page", func(t *testing.T) {
_, err := readPagedItems(1024, 512, &currentItems, 2048, failingReadBlocks) // currentItems := make([]int, 0)
assertErrorIs(t, err, readFail)
})
t.Run("partial last page", func(t *testing.T) { // // Request the next block's first item
currentItems := make([]int, 0) // item, err := readPagedItems(512, 512, &currentItems, 612, mockReadNMore)
// requireNoError(t, err)
// Request the next block's first item // assertEqual(t, 512, item)
item, err := readPagedItems(512, 512, &currentItems, 612, mockReadNMore) // assertLength(t, 612, currentItems)
requireNoError(t, err) // })
assertEqual(t, 512, item) // }
assertLength(t, 612, currentItems)
})
}
+179 -207
View File
@@ -3,258 +3,230 @@ package data
import ( import (
"errors" "errors"
"io" "io"
"io/fs"
"math"
"runtime" "runtime"
"sync" "sync"
"github.com/CalebQ42/squashfs/internal/decompress" "github.com/CalebQ42/squashfs/internal/decompress"
) )
type FragReaderConstructor func() (io.Reader, error)
type FullReader struct { type FullReader struct {
r io.ReaderAt fileSize uint64
d decompress.Decompressor
frag FragReaderConstructor
sizes []uint32
initialOffset int64
finalBlockSize uint64
blockSize uint32 blockSize uint32
goroutineLimit uint16 dispatcher chan struct{}
closed bool pool *sync.Pool
rdr io.ReaderAt
decomp decompress.Decompressor
sizes []uint32
blockOffsets []uint64
fragDat []byte
} }
func NewFullReader(r io.ReaderAt, initialOffset int64, d decompress.Decompressor, sizes []uint32, finalBlockSize uint64, blockSize uint32) FullReader { func NewFullReader(rdr io.ReaderAt, decomp decompress.Decompressor, blockSize uint32, size uint64, start uint64, sizes []uint32) FullReader {
return FullReader{ out := FullReader{
r: r, fileSize: size,
d: d,
sizes: sizes,
initialOffset: initialOffset,
goroutineLimit: uint16(runtime.NumCPU()),
finalBlockSize: finalBlockSize,
blockSize: blockSize, blockSize: blockSize,
rdr: rdr,
decomp: decomp,
sizes: sizes,
} }
out.blockOffsets = make([]uint64, len(sizes))
curOffset := start
for i := range sizes {
out.blockOffsets[i] = curOffset
curOffset += uint64(sizes[i]) &^ (1 << 24)
}
return out
} }
func (r *FullReader) Close() error { func (f *FullReader) Close() error {
r.closed = true f.fragDat = nil
r.r = nil f.sizes = nil
r.d = nil f.blockOffsets = nil
r.frag = nil
r.sizes = nil
return nil return nil
} }
func (r *FullReader) AddFrag(frag FragReaderConstructor) { func (f *FullReader) AddFragData(blockStart uint64, blockSize uint32, offset uint32) error {
r.frag = frag realSize := blockSize &^ (1 << 24)
dat := make([]byte, realSize)
_, err := f.rdr.ReadAt(dat, int64(blockStart))
if err != nil {
return err
}
if blockSize == realSize {
dat, err = f.decomp.Decompress(dat)
if err != nil {
return err
}
}
f.fragDat = make([]byte, f.fileSize%uint64(f.blockSize))
copy(f.fragDat, dat[offset:])
dat = nil
return nil
} }
func (r *FullReader) SetGoroutineLimit(limit uint16) { func (f *FullReader) SetDispatcherPool(dispatcher chan struct{}, pool *sync.Pool) {
if limit <= 0 { f.dispatcher = dispatcher
r.goroutineLimit = 1 f.pool = pool
}
r.goroutineLimit = limit
} }
type retValue struct { // The number of blocks, including the fragment block if present
err error func (f FullReader) BlockNum() uint32 {
data []byte out := len(f.sizes)
index uint64 if f.fragDat != nil {
out++
}
return uint32(out)
} }
func (r FullReader) process(index uint64, fileOffset uint64, pool *sync.Pool, retChan chan *retValue) { // Returns the data block at the given index
ret := pool.Get().(*retValue) func (f FullReader) Block(i uint32) ([]byte, error) {
ret.index = index if i == uint32(len(f.sizes)) && f.fragDat != nil {
realSize := r.sizes[index] &^ (1 << 24) return f.fragDat, nil
}
if i >= uint32(len(f.sizes)) {
return nil, errors.New("invalid block index")
}
realSize := f.sizes[i] &^ (1 << 24)
if realSize == 0 { if realSize == 0 {
if index == uint64(len(r.sizes))-1 && r.frag == nil { if i == uint32(len(f.sizes)-1) && f.fragDat == nil {
ret.data = make([]byte, r.finalBlockSize) return make([]byte, f.fileSize%uint64(f.blockSize)), nil
} else {
ret.data = make([]byte, r.blockSize)
} }
ret.err = nil return make([]byte, f.blockSize), nil
retChan <- ret
return
} }
ret.data = make([]byte, realSize) dat := make([]byte, realSize)
_, ret.err = r.r.ReadAt(ret.data, r.initialOffset+int64(fileOffset)) _, err := f.rdr.ReadAt(dat, int64(f.blockOffsets[i]))
if r.sizes[index] == realSize { if err != nil {
ret.data, ret.err = r.d.Decompress(ret.data) return nil, err
} }
retChan <- ret if realSize == f.sizes[i] {
dat, err = f.decomp.Decompress(dat)
}
return dat, err
} }
func (r FullReader) WriteTo(w io.Writer) (int64, error) { func (f FullReader) blockFromPool(i uint32) *BlockResults {
if r.closed { out := f.pool.Get().(*BlockResults)
return 0, fs.ErrClosed out.idx = i
out.err = nil
if i == uint32(len(f.sizes)) && f.fragDat != nil {
out.block = f.fragDat
return out
} }
// if wa, is := w.(io.WriterAt); is { if i >= uint32(len(f.sizes)) {
// return r.writeToWriteAt(wa) out.err = errors.New("invalid block index")
// } return out
var curIndex uint64 }
var curOffset uint64 realSize := f.sizes[i] &^ (1 << 24)
var toProcess uint16 if realSize == 0 {
var wrote int64 if i == uint32(len(f.sizes)-1) && f.fragDat == nil {
cache := make(map[uint64]*retValue) out.block = make([]byte, f.fileSize%uint64(f.blockSize))
var errCache []error return out
retChan := make(chan *retValue, r.goroutineLimit) }
pool := &sync.Pool{ out.block = make([]byte, f.blockSize)
}
out.block = make([]byte, realSize)
_, out.err = f.rdr.ReadAt(out.block, int64(f.blockOffsets[i]))
if out.err != nil {
return out
}
if realSize == f.sizes[i] {
out.block, out.err = f.decomp.Decompress(out.block)
}
return out
}
type BlockResults struct {
idx uint32
block []byte
err error
}
func (f FullReader) WriteTo(w io.Writer) (wrote int64, err error) {
if f.dispatcher == nil {
f.dispatcher = make(chan struct{}, runtime.NumCPU())
for range runtime.NumCPU() {
f.dispatcher <- struct{}{}
}
}
if f.pool == nil {
f.pool = &sync.Pool{
New: func() any { New: func() any {
return &retValue{} return &BlockResults{}
}, },
} }
for i := uint64(0); i < uint64(math.Ceil(float64(len(r.sizes))/float64(r.goroutineLimit))); i++ {
toProcess = min(uint16(len(r.sizes))-(uint16(i)*r.goroutineLimit), r.goroutineLimit)
// Start all the goroutines
for j := uint16(0); j < toProcess; j++ {
go r.process((i*uint64(r.goroutineLimit))+uint64(j), curOffset, pool, retChan)
curOffset += uint64(r.sizes[(i*uint64(r.goroutineLimit))+uint64(j)]) &^ (1 << 24)
} }
// Then consume the results on retChan open := true
for j := uint16(0); j < toProcess; j++ { resChan := make(chan *BlockResults, len(f.dispatcher))
res := <-retChan var results map[uint32]*BlockResults
// If there's an error, we don't care about the results. if _, is := w.(io.WriterAt); !is {
results = make(map[uint32]*BlockResults)
}
for i := range f.BlockNum() {
go func(idx uint32) {
<-f.dispatcher
defer func() { f.dispatcher <- struct{}{} }()
if !open {
resChan <- f.pool.Get().(*BlockResults)
return
}
resChan <- f.blockFromPool(idx)
}(i)
}
out := int64(0)
errOut := make([]error, 0)
for i := uint32(0); i < f.BlockNum(); {
res := <-resChan
defer f.pool.Put(res)
if res.err != nil { if res.err != nil {
errCache = append(errCache, res.err) open = false
if len(cache) > 0 { errOut = append(errOut, res.err)
clear(cache)
} }
if len(errOut) > 0 {
i++
continue continue
} }
// If there has been an error previously, we don't care about the results. if wa, is := w.(io.WriterAt); is {
// We still want to wait for all the goroutines to prevent resources being wasted. _, err := wa.WriteAt(res.block, int64(res.idx)*int64(f.blockSize))
if len(errCache) > 0 {
continue
}
// If we don't need the data yet, we cache it and move on
if res.index != curIndex {
cache[res.index] = res
continue
}
// If we do need the data, we write it
wr, err := w.Write(res.data)
wrote += int64(wr)
if err != nil { if err != nil {
errCache = append(errCache, err) errOut = append(errOut, err)
if len(cache) > 0 { } else {
clear(cache) out = max(out, int64(res.idx)*int64(f.blockSize)+int64(len(res.block)))
} }
i++
continue continue
} }
pool.Put(res) var err error
curIndex++ if res.idx == i {
// Now we recursively try to clear the cache _, err = w.Write(res.block)
for len(cache) > 0 { if err != nil {
res, ok := cache[curIndex] errOut = append(errOut, err)
if !ok { } else {
out = max(out, int64(res.idx)*int64(f.blockSize)+int64(len(res.block)))
}
i++
} else {
results[res.idx] = res
}
var has bool
for {
res, has = results[i]
if has {
_, err = w.Write(res.block)
if err != nil {
errOut = append(errOut, err)
} else {
out = max(out, int64(res.idx)*int64(f.blockSize)+int64(len(res.block)))
}
i++
delete(results, i)
f.pool.Put(res)
} else {
break break
} }
wr, err := w.Write(res.data)
wrote += int64(wr)
if err != nil {
errCache = append(errCache, err)
if len(cache) > 0 {
clear(cache)
}
break
}
delete(cache, curIndex)
pool.Put(res)
curIndex++
} }
} }
if len(errCache) > 0 { if len(errOut) > 0 {
return wrote, errors.Join(errCache...) return out, errors.Join(errOut...)
} }
return out, nil
} }
if r.frag != nil {
rdr, err := r.frag()
if err != nil {
return wrote, err
}
wr, err := io.Copy(w, rdr)
wrote += wr
if l, ok := rdr.(*io.LimitedReader); ok {
if cl, ok := l.R.(io.Closer); ok {
cl.Close()
}
}
if err != nil {
return wrote, err
}
}
return wrote, nil
}
// func (r FullReader) writeToWriteAt(w io.WriterAt) (out int64, outErr error) {
// wait := &sync.WaitGroup{}
// wait.Add(len(r.sizes))
// mgr := routinemanager.NewManager(r.goroutineLimit)
// curOffset := r.initialOffset
// for i := uint64(0); i < uint64(len(r.sizes)); i++ {
// go func(index uint64, fileOffset int64) {
// lckNum := mgr.Lock()
// defer mgr.Unlock(lckNum)
// defer wait.Done()
// realSize := r.sizes[index] &^ (1 << 24)
// if realSize == 0 {
// if index == uint64(len(r.sizes))-1 && r.frag == nil {
// _, err := w.WriteAt([]byte{0}, int64((uint64(r.blockSize)*index)+r.finalBlockSize)-1)
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// out = max(out, int64((uint64(r.blockSize)*index)+r.finalBlockSize))
// }
// return
// }
// data := make([]byte, realSize)
// err := binary.Read(toreader.NewReader(r.r, int64(fileOffset)), binary.LittleEndian, &data)
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// if r.sizes[index] == realSize {
// data, err = r.d.Decompress(data)
// }
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// _, err = w.WriteAt(data, int64(uint64(r.blockSize)*index))
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// out = max(out, int64(uint64(r.blockSize)*(index+1)))
// }(i, curOffset)
// curOffset += int64(r.sizes[i]) &^ (1 << 24)
// }
// if r.frag != nil {
// wait.Add(1)
// go func() {
// lckNum := mgr.Lock()
// defer mgr.Unlock(lckNum)
// defer wait.Done()
// rdr, err := r.frag()
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// dat, err := io.ReadAll(rdr)
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// _, err = w.WriteAt(dat, int64(int(r.blockSize)*len(r.sizes)))
// if err != nil {
// outErr = errors.Join(outErr, err)
// return
// }
// out = int64(int(r.blockSize)*len(r.sizes)) + int64(r.finalBlockSize)
// }()
// }
// wait.Wait()
// return
// }
+39 -83
View File
@@ -1,104 +1,60 @@
package data package data
import ( import "io"
"io"
"io/fs"
"github.com/CalebQ42/squashfs/internal/decompress"
)
type Reader struct { type Reader struct {
r io.Reader f *FullReader
d decompress.Decompressor curBlock []byte
frag io.Reader nextIdx uint32
sizes []uint32 curOffset uint32
dat []byte
curOffset int
curIndex uint64
finalBlockSize uint64
blockSize uint32
closed bool
} }
func NewReader(r io.Reader, d decompress.Decompressor, sizes []uint32, finalBlockSize uint64, blockSize uint32) Reader { func NewReader(f *FullReader) (Reader, error) {
dat, err := f.Block(0)
if err != nil {
return Reader{}, err
}
return Reader{ return Reader{
r: r, f: f,
d: d, curBlock: dat,
sizes: sizes, nextIdx: 1,
finalBlockSize: finalBlockSize, curOffset: 0,
blockSize: blockSize, }, nil
}
} }
func (r *Reader) AddFrag(fragRdr io.Reader) { func (d *Reader) Close() error {
r.frag = fragRdr d.curBlock = nil
}
func (r *Reader) advance() error {
r.curOffset = 0
defer func() { r.curIndex++ }()
var err error
if r.curIndex == uint64(len(r.sizes)) && r.frag != nil {
r.dat, err = io.ReadAll(r.frag)
return err
} else if r.curIndex >= uint64(len(r.sizes)) {
r.dat = []byte{}
return io.EOF
}
realSize := r.sizes[r.curIndex] &^ (1 << 24)
if realSize == 0 {
if r.curIndex == uint64(len(r.sizes))-1 && r.frag == nil {
r.dat = make([]byte, r.finalBlockSize)
} else {
r.dat = make([]byte, r.blockSize)
}
return nil return nil
} }
r.dat = make([]byte, realSize)
_, err = r.r.Read(r.dat) func (d *Reader) advanceBlock() error {
if d.nextIdx >= d.f.BlockNum() {
d.curBlock = nil
return io.EOF
}
var err error
d.curBlock, err = d.f.Block(d.nextIdx)
if err != nil { if err != nil {
return err return err
} }
if r.sizes[r.curIndex] != realSize { d.nextIdx++
d.curOffset = 0
return nil return nil
} }
r.dat, err = r.d.Decompress(r.dat)
return err
}
func (r *Reader) Read(b []byte) (int, error) { func (d *Reader) Read(buf []byte) (int, error) {
if r.closed { totRed := 0
return 0, fs.ErrClosed toRead := 0
} var err error
curRead := 0 for totRed < len(buf) {
var toRead int if int(d.curOffset) >= len(d.curBlock) {
for curRead < len(b) { err = d.advanceBlock()
if r.curOffset >= len(r.dat) { if err != nil {
if err := r.advance(); err != nil { return totRed, err
return curRead, err
} }
} }
toRead = min(len(b)-curRead, len(r.dat)-r.curOffset) toRead = min(len(d.curBlock)-int(d.curOffset), len(buf)-totRed)
toRead = copy(b[curRead:], r.dat[r.curOffset:r.curOffset+toRead]) copy(buf[totRed:], d.curBlock[d.curOffset:d.curOffset+uint32(toRead)])
r.curOffset += toRead
curRead += toRead
} }
return curRead, nil return totRed, nil
}
func (r *Reader) Close() error {
r.closed = true
r.r = nil
r.d = nil
if r.frag != nil {
if l, ok := r.frag.(*io.LimitedReader); ok {
if cl, ok := l.R.(io.Closer); ok {
cl.Close()
}
}
}
r.frag = nil
r.sizes = nil
r.dat = nil
return nil
} }
+39 -11
View File
@@ -11,12 +11,45 @@ type header struct {
Num uint32 Num uint32
} }
type decEntry struct { func readHeader(r io.Reader) (h header, err error) {
dat := make([]byte, 12)
_, err = r.Read(dat)
if err != nil {
return
}
h.Count = binary.LittleEndian.Uint32(dat)
h.BlockStart = binary.LittleEndian.Uint32(dat[4:])
h.Num = binary.LittleEndian.Uint32(dat[8:])
return
}
type dirEntry struct {
Offset uint16 Offset uint16
NumOffset int16 NumOffset int16
InodeType uint16 InodeType uint16
NameSize uint16 NameSize uint16
// Name []byte (not decoded along with decEntry) Name []byte
}
func readEntry(r io.Reader) (e dirEntry, err error) {
dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil {
return
}
e.Offset = binary.LittleEndian.Uint16(dat)
_, err = binary.Decode(dat[2:], binary.LittleEndian, &e.NumOffset)
if err != nil {
return
}
e.InodeType = binary.LittleEndian.Uint16(dat[4:])
e.NameSize = binary.LittleEndian.Uint16(dat[6:])
e.Name = make([]byte, e.NameSize+1)
_, err = r.Read(e.Name)
if err != nil {
return
}
return
} }
type Entry struct { type Entry struct {
@@ -31,20 +64,15 @@ func ReadDirectory(r io.Reader, size uint32) (out []Entry, err error) {
size -= 3 size -= 3
var curRead uint32 var curRead uint32
var h header var h header
var de decEntry var de dirEntry
for curRead < size { for curRead < size {
err = binary.Read(r, binary.LittleEndian, &h) h, err = readHeader(r)
if err != nil { if err != nil {
return return
} }
curRead += 12 curRead += 12
for i := uint32(0); i < h.Count+1 && curRead < size; i++ { for i := uint32(0); i < h.Count+1 && curRead < size; i++ {
err = binary.Read(r, binary.LittleEndian, &de) de, err = readEntry(r)
if err != nil {
return
}
nameTmp := make([]byte, de.NameSize+1)
err = binary.Read(r, binary.LittleEndian, &nameTmp)
if err != nil { if err != nil {
return return
} }
@@ -52,7 +80,7 @@ func ReadDirectory(r io.Reader, size uint32) (out []Entry, err error) {
out = append(out, Entry{ out = append(out, Entry{
BlockStart: h.BlockStart, BlockStart: h.BlockStart,
Offset: de.Offset, Offset: de.Offset,
Name: string(nameTmp), Name: string(de.Name),
InodeType: de.InodeType, InodeType: de.InodeType,
Num: h.Num + uint32(de.NumOffset), Num: h.Num + uint32(de.NumOffset),
}) })
+18 -65
View File
@@ -2,7 +2,6 @@ package squashfslow
import ( import (
"errors" "errors"
"io"
"github.com/CalebQ42/squashfs/internal/metadata" "github.com/CalebQ42/squashfs/internal/metadata"
"github.com/CalebQ42/squashfs/internal/toreader" "github.com/CalebQ42/squashfs/internal/toreader"
@@ -84,6 +83,8 @@ func (b FileBase) IsRegular() bool {
return b.Inode.Type == inode.Fil || b.Inode.Type == inode.EFil return b.Inode.Type == inode.Fil || b.Inode.Type == inode.EFil
} }
// Returns a regular file's readers. They are linked, so the data.Reader calls to the data.FullReader.
// Aka: closing the FullReader breaks the Reader
func (b FileBase) GetRegFileReaders(r Reader) (data.Reader, data.FullReader, error) { func (b FileBase) GetRegFileReaders(r Reader) (data.Reader, data.FullReader, error) {
if !b.IsRegular() { if !b.IsRegular() {
return data.Reader{}, data.FullReader{}, errors.New("not a regular file") return data.Reader{}, data.FullReader{}, errors.New("not a regular file")
@@ -91,41 +92,32 @@ func (b FileBase) GetRegFileReaders(r Reader) (data.Reader, data.FullReader, err
var blockStart uint64 var blockStart uint64
var fragIndex uint32 var fragIndex uint32
var fragOffset uint32 var fragOffset uint32
var fragSize uint64
var sizes []uint32 var sizes []uint32
var fileSize uint64
if b.Inode.Type == inode.Fil { if b.Inode.Type == inode.Fil {
blockStart = uint64(b.Inode.Data.(inode.File).BlockStart) blockStart = uint64(b.Inode.Data.(inode.File).BlockStart)
fragIndex = b.Inode.Data.(inode.File).FragInd fragIndex = b.Inode.Data.(inode.File).FragInd
fragOffset = b.Inode.Data.(inode.File).FragOffset fragOffset = b.Inode.Data.(inode.File).FragOffset
sizes = b.Inode.Data.(inode.File).BlockSizes sizes = b.Inode.Data.(inode.File).BlockSizes
fragSize = uint64(b.Inode.Data.(inode.File).Size % r.Superblock.BlockSize) fileSize = uint64(b.Inode.Data.(inode.File).Size)
} else { } else {
blockStart = b.Inode.Data.(inode.EFile).BlockStart blockStart = b.Inode.Data.(inode.EFile).BlockStart
fragIndex = b.Inode.Data.(inode.EFile).FragInd fragIndex = b.Inode.Data.(inode.EFile).FragInd
fragOffset = b.Inode.Data.(inode.EFile).FragOffset fragOffset = b.Inode.Data.(inode.EFile).FragOffset
sizes = b.Inode.Data.(inode.EFile).BlockSizes sizes = b.Inode.Data.(inode.EFile).BlockSizes
fragSize = b.Inode.Data.(inode.EFile).Size % uint64(r.Superblock.BlockSize) fileSize = b.Inode.Data.(inode.EFile).Size
} }
frag := func() (io.Reader, error) { outFull := data.NewFullReader(r.r, r.d, r.Superblock.BlockSize, fileSize, blockStart, sizes)
if fragIndex != 0xFFFFFFFF {
ent, err := r.fragEntry(fragIndex) ent, err := r.fragEntry(fragIndex)
if err != nil {
return nil, err
}
frag := data.NewReader(toreader.NewReader(r.r, int64(ent.Start)), r.d, []uint32{ent.Size}, uint64(r.Superblock.BlockSize), r.Superblock.BlockSize)
frag.Read(make([]byte, fragOffset))
return io.LimitReader(&frag, int64(fragSize)), nil
}
outRdr := data.NewReader(toreader.NewReader(r.r, int64(blockStart)), r.d, sizes, fragSize, r.Superblock.BlockSize)
if fragIndex != 0xffffffff {
f, err := frag()
if err != nil { if err != nil {
return data.Reader{}, data.FullReader{}, err return data.Reader{}, data.FullReader{}, err
} }
outRdr.AddFrag(f) outFull.AddFragData(ent.Start, ent.Size, fragOffset)
} }
outFull := data.NewFullReader(r.r, int64(blockStart), r.d, sizes, fragSize, r.Superblock.BlockSize) outRdr, err := data.NewReader(&outFull)
if fragIndex != 0xffffffff { if err != nil {
outFull.AddFrag(frag) return data.Reader{}, data.FullReader{}, err
} }
return outRdr, outFull, nil return outRdr, outFull, nil
} }
@@ -137,67 +129,28 @@ func (b FileBase) GetFullReader(r *Reader) (data.FullReader, error) {
var blockStart uint64 var blockStart uint64
var fragIndex uint32 var fragIndex uint32
var fragOffset uint32 var fragOffset uint32
var fragSize uint64
var sizes []uint32 var sizes []uint32
var fileSize uint64
if b.Inode.Type == inode.Fil { if b.Inode.Type == inode.Fil {
blockStart = uint64(b.Inode.Data.(inode.File).BlockStart) blockStart = uint64(b.Inode.Data.(inode.File).BlockStart)
fragIndex = b.Inode.Data.(inode.File).FragInd fragIndex = b.Inode.Data.(inode.File).FragInd
fragOffset = b.Inode.Data.(inode.File).FragOffset fragOffset = b.Inode.Data.(inode.File).FragOffset
sizes = b.Inode.Data.(inode.File).BlockSizes sizes = b.Inode.Data.(inode.File).BlockSizes
fragSize = uint64(b.Inode.Data.(inode.File).Size % r.Superblock.BlockSize) fileSize = uint64(b.Inode.Data.(inode.File).Size)
} else { } else {
blockStart = b.Inode.Data.(inode.EFile).BlockStart blockStart = b.Inode.Data.(inode.EFile).BlockStart
fragIndex = b.Inode.Data.(inode.EFile).FragInd fragIndex = b.Inode.Data.(inode.EFile).FragInd
fragOffset = b.Inode.Data.(inode.EFile).FragOffset fragOffset = b.Inode.Data.(inode.EFile).FragOffset
sizes = b.Inode.Data.(inode.EFile).BlockSizes sizes = b.Inode.Data.(inode.EFile).BlockSizes
fragSize = b.Inode.Data.(inode.EFile).Size % uint64(r.Superblock.BlockSize) fileSize = b.Inode.Data.(inode.EFile).Size
} }
outFull := data.NewFullReader(r.r, int64(blockStart), r.d, sizes, fragSize, r.Superblock.BlockSize) outFull := data.NewFullReader(r.r, r.d, r.Superblock.BlockSize, fileSize, blockStart, sizes)
if fragIndex != 0xffffffff { if fragIndex != 0xFFFFFFFF {
outFull.AddFrag(func() (io.Reader, error) {
ent, err := r.fragEntry(fragIndex) ent, err := r.fragEntry(fragIndex)
if err != nil { if err != nil {
return nil, err return data.FullReader{}, err
} }
frag := data.NewReader(toreader.NewReader(r.r, int64(ent.Start)), r.d, []uint32{ent.Size}, uint64(r.Superblock.BlockSize), r.Superblock.BlockSize) outFull.AddFragData(ent.Start, ent.Size, fragOffset)
frag.Read(make([]byte, fragOffset))
return io.LimitReader(&frag, int64(fragSize)), nil
})
} }
return outFull, nil return outFull, nil
} }
func (b FileBase) GetReader(r *Reader) (data.Reader, error) {
if !b.IsRegular() {
return data.Reader{}, errors.New("not a regular file")
}
var blockStart uint64
var fragIndex uint32
var fragOffset uint32
var fragSize uint64
var sizes []uint32
if b.Inode.Type == inode.Fil {
blockStart = uint64(b.Inode.Data.(inode.File).BlockStart)
fragIndex = b.Inode.Data.(inode.File).FragInd
fragOffset = b.Inode.Data.(inode.File).FragOffset
sizes = b.Inode.Data.(inode.File).BlockSizes
fragSize = uint64(b.Inode.Data.(inode.File).Size % r.Superblock.BlockSize)
} else {
blockStart = b.Inode.Data.(inode.EFile).BlockStart
fragIndex = b.Inode.Data.(inode.EFile).FragInd
fragOffset = b.Inode.Data.(inode.EFile).FragOffset
sizes = b.Inode.Data.(inode.EFile).BlockSizes
fragSize = b.Inode.Data.(inode.EFile).Size % uint64(r.Superblock.BlockSize)
}
outRdr := data.NewReader(toreader.NewReader(r.r, int64(blockStart)), r.d, sizes, fragSize, r.Superblock.BlockSize)
if fragIndex != 0xffffffff {
ent, err := r.fragEntry(fragIndex)
if err != nil {
return data.Reader{}, err
}
frag := data.NewReader(toreader.NewReader(r.r, int64(ent.Start)), r.d, []uint32{ent.Size}, uint64(r.Superblock.BlockSize), r.Superblock.BlockSize)
frag.Read(make([]byte, fragOffset))
outRdr.AddFrag(io.LimitReader(&frag, int64(fragSize)))
}
return outRdr, nil
}
+3 -1
View File
@@ -7,7 +7,9 @@ import (
"github.com/CalebQ42/squashfs/low/inode" "github.com/CalebQ42/squashfs/low/inode"
) )
func (r Reader) InodeFromRef(ref uint64) (inode.Inode, error) { type InodeRef = uint64
func (r Reader) InodeFromRef(ref InodeRef) (inode.Inode, error) {
offset, meta := (ref>>16)+r.Superblock.InodeTableStart, ref&0xFFFF offset, meta := (ref>>16)+r.Superblock.InodeTableStart, ref&0xFFFF
rdr := metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d) rdr := metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d)
defer rdr.Close() defer rdr.Close()
+14 -14
View File
@@ -13,6 +13,20 @@ type Directory struct {
ParentNum uint32 ParentNum uint32
} }
func ReadDir(r io.Reader) (d Directory, err error) {
dat := make([]byte, 16)
_, err = r.Read(dat)
if err != nil {
return
}
d.BlockStart = binary.LittleEndian.Uint32(dat)
d.LinkCount = binary.LittleEndian.Uint32(dat[4:])
d.Size = binary.LittleEndian.Uint16(dat[8:])
d.Offset = binary.LittleEndian.Uint16(dat[10:])
d.ParentNum = binary.LittleEndian.Uint32(dat[12:])
return
}
type EDirectory struct { type EDirectory struct {
LinkCount uint32 LinkCount uint32
Size uint32 Size uint32
@@ -31,20 +45,6 @@ type DirectoryIndex struct {
Name []byte Name []byte
} }
func ReadDir(r io.Reader) (d Directory, err error) {
dat := make([]byte, 16)
_, err = r.Read(dat)
if err != nil {
return
}
d.BlockStart = binary.LittleEndian.Uint32(dat)
d.LinkCount = binary.LittleEndian.Uint32(dat[4:])
d.Size = binary.LittleEndian.Uint16(dat[8:])
d.Offset = binary.LittleEndian.Uint16(dat[10:])
d.ParentNum = binary.LittleEndian.Uint32(dat[12:])
return
}
func ReadEDir(r io.Reader) (d EDirectory, err error) { func ReadEDir(r io.Reader) (d EDirectory, err error) {
dat := make([]byte, 24) dat := make([]byte, 24)
_, err = r.Read(dat) _, err = r.Read(dat)
+33 -22
View File
@@ -14,31 +14,16 @@ type File struct {
BlockSizes []uint32 BlockSizes []uint32
} }
type eFileInit struct {
BlockStart uint64
Size uint64
Sparse uint64
LinkCount uint32
FragInd uint32
FragOffset uint32
XattrInd uint32
}
type EFile struct {
eFileInit
BlockSizes []uint32
}
func ReadFile(r io.Reader, blockSize uint32) (f File, err error) { func ReadFile(r io.Reader, blockSize uint32) (f File, err error) {
dat := make([]byte, 16) dat := make([]byte, 16)
_, err = r.Read(dat) _, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
f.BlockStart = binary.LittleEndian.Uint32(dat) f.BlockStart = binary.LittleEndian.Uint32(dat[0:4])
f.FragInd = binary.LittleEndian.Uint32(dat[4:]) f.FragInd = binary.LittleEndian.Uint32(dat[4:8])
f.FragOffset = binary.LittleEndian.Uint32(dat[8:]) f.FragOffset = binary.LittleEndian.Uint32(dat[8:12])
f.Size = binary.LittleEndian.Uint32(dat[12:]) f.Size = binary.LittleEndian.Uint32(dat[12:16])
toRead := int(math.Floor(float64(f.Size) / float64(blockSize))) toRead := int(math.Floor(float64(f.Size) / float64(blockSize)))
if f.FragInd == 0xFFFFFFFF && f.Size%blockSize > 0 { if f.FragInd == 0xFFFFFFFF && f.Size%blockSize > 0 {
toRead++ toRead++
@@ -55,16 +40,42 @@ func ReadFile(r io.Reader, blockSize uint32) (f File, err error) {
return return
} }
type EFile struct {
BlockStart uint64
Size uint64
Sparse uint64
LinkCount uint32
FragInd uint32
FragOffset uint32
XattrInd uint32
BlockSizes []uint32
}
func ReadEFile(r io.Reader, blockSize uint32) (f EFile, err error) { func ReadEFile(r io.Reader, blockSize uint32) (f EFile, err error) {
err = binary.Read(r, binary.LittleEndian, &f.eFileInit) dat := make([]byte, 40)
_, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
toRead := int(math.Floor(float64(f.Size) / float64(blockSize))) f.BlockStart = binary.LittleEndian.Uint64(dat[0:8])
f.Size = binary.LittleEndian.Uint64(dat[8:16])
f.Sparse = binary.LittleEndian.Uint64(dat[16:24])
f.LinkCount = binary.LittleEndian.Uint32(dat[24:28])
f.FragInd = binary.LittleEndian.Uint32(dat[28:32])
f.FragOffset = binary.LittleEndian.Uint32(dat[32:36])
f.XattrInd = binary.LittleEndian.Uint32(dat[36:40])
toRead := f.Size / uint64(blockSize)
if f.FragInd == 0xFFFFFFFF && f.Size%uint64(blockSize) > 0 { if f.FragInd == 0xFFFFFFFF && f.Size%uint64(blockSize) > 0 {
toRead++ toRead++
} }
dat = make([]byte, toRead*4)
_, err = r.Read(dat)
if err != nil {
return
}
f.BlockSizes = make([]uint32, toRead) f.BlockSizes = make([]uint32, toRead)
err = binary.Read(r, binary.LittleEndian, &f.BlockSizes) for i := range toRead {
f.BlockSizes[i] = binary.LittleEndian.Uint32(dat[i*4:])
}
return return
} }
+8 -1
View File
@@ -40,10 +40,17 @@ type Inode struct {
} }
func Read(r io.Reader, blockSize uint32) (i Inode, err error) { func Read(r io.Reader, blockSize uint32) (i Inode, err error) {
err = binary.Read(r, binary.LittleEndian, &i.Header) dat := make([]byte, 16)
_, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
i.Type = binary.LittleEndian.Uint16(dat[0:2])
i.Perm = binary.LittleEndian.Uint16(dat[2:4])
i.UidInd = binary.LittleEndian.Uint16(dat[4:6])
i.GidInd = binary.LittleEndian.Uint16(dat[6:8])
i.ModTime = binary.LittleEndian.Uint32(dat[8:12])
i.Num = binary.LittleEndian.Uint32(dat[12:16])
switch i.Type { switch i.Type {
case Dir: case Dir:
i.Data, err = ReadDir(r) i.Data, err = ReadDir(r)
+35 -11
View File
@@ -10,18 +10,31 @@ type Device struct {
Dev uint32 Dev uint32
} }
func ReadDevice(r io.Reader) (d Device, err error) {
dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil {
return
}
d.LinkCount = binary.LittleEndian.Uint32(dat)
d.Dev = binary.LittleEndian.Uint32(dat[4:])
return
}
type EDevice struct { type EDevice struct {
Device Device
XattrInd uint32 XattrInd uint32
} }
func ReadDevice(r io.Reader) (d Device, err error) { func ReadEDevice(r io.Reader) (d EDevice, err error) {
err = binary.Read(r, binary.LittleEndian, &d) dat := make([]byte, 12)
_, err = r.Read(dat)
if err != nil {
return return
} }
d.LinkCount = binary.LittleEndian.Uint32(dat)
func ReadEDevice(r io.Reader) (d EDevice, err error) { d.Dev = binary.LittleEndian.Uint32(dat[4:])
err = binary.Read(r, binary.LittleEndian, &d) d.XattrInd = binary.LittleEndian.Uint32(dat[8:])
return return
} }
@@ -29,17 +42,28 @@ type IPC struct {
LinkCount uint32 LinkCount uint32
} }
func ReadIPC(r io.Reader) (i IPC, err error) {
dat := make([]byte, 4)
_, err = r.Read(dat)
if err != nil {
return
}
i.LinkCount = binary.LittleEndian.Uint32(dat)
return
}
type EIPC struct { type EIPC struct {
IPC IPC
XattrInd uint32 XattrInd uint32
} }
func ReadIPC(r io.Reader) (i IPC, err error) {
err = binary.Read(r, binary.LittleEndian, &i)
return
}
func ReadEIPC(r io.Reader) (i EIPC, err error) { func ReadEIPC(r io.Reader) (i EIPC, err error) {
err = binary.Read(r, binary.LittleEndian, &i) dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil {
return
}
i.LinkCount = binary.LittleEndian.Uint32(dat)
i.XattrInd = binary.LittleEndian.Uint32(dat[4:])
return return
} }
+27 -19
View File
@@ -5,42 +5,50 @@ import (
"io" "io"
) )
type symlinkInit struct { type Symlink struct {
LinkCount uint32 LinkCount uint32
TargetSize uint32 TargetSize uint32
}
type Symlink struct {
symlinkInit
Target []byte Target []byte
} }
func ReadSym(r io.Reader) (s Symlink, err error) {
dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil {
return
}
s.LinkCount = binary.LittleEndian.Uint32(dat)
s.TargetSize = binary.LittleEndian.Uint32(dat[4:])
s.Target = make([]byte, s.TargetSize)
_, err = r.Read(s.Target)
return
}
type ESymlink struct { type ESymlink struct {
symlinkInit LinkCount uint32
TargetSize uint32
Target []byte Target []byte
XattrInd uint32 XattrInd uint32
} }
func ReadSym(r io.Reader) (s Symlink, err error) {
err = binary.Read(r, binary.LittleEndian, &s.symlinkInit)
if err != nil {
return
}
s.Target = make([]byte, s.TargetSize)
err = binary.Read(r, binary.LittleEndian, &s.Target)
return
}
func ReadESym(r io.Reader) (s ESymlink, err error) { func ReadESym(r io.Reader) (s ESymlink, err error) {
err = binary.Read(r, binary.LittleEndian, &s.symlinkInit) dat := make([]byte, 8)
_, err = r.Read(dat)
if err != nil { if err != nil {
return return
} }
s.LinkCount = binary.LittleEndian.Uint32(dat)
s.TargetSize = binary.LittleEndian.Uint32(dat[4:])
s.Target = make([]byte, s.TargetSize) s.Target = make([]byte, s.TargetSize)
err = binary.Read(r, binary.LittleEndian, &s.Target) _, err = r.Read(s.Target)
if err != nil { if err != nil {
return return
} }
err = binary.Read(r, binary.LittleEndian, &s.XattrInd) dat = make([]byte, 4)
_, err = r.Read(dat)
if err != nil {
return
}
s.XattrInd = binary.LittleEndian.Uint32(dat)
return return
} }
+43 -105
View File
@@ -4,10 +4,8 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"io" "io"
"math"
"github.com/CalebQ42/squashfs/internal/decompress" "github.com/CalebQ42/squashfs/internal/decompress"
"github.com/CalebQ42/squashfs/internal/metadata"
"github.com/CalebQ42/squashfs/internal/toreader" "github.com/CalebQ42/squashfs/internal/toreader"
"github.com/CalebQ42/squashfs/low/inode" "github.com/CalebQ42/squashfs/low/inode"
) )
@@ -30,13 +28,13 @@ var (
) )
type Reader struct { type Reader struct {
Root Directory
Superblock superblock
r io.ReaderAt r io.ReaderAt
d decompress.Decompressor d decompress.Decompressor
Root Directory fragTable *Table[fragEntry]
fragTable []fragEntry idTable *Table[uint32]
idTable []uint32 exportTable *Table[InodeRef]
exportTable []uint64
Superblock superblock
} }
func NewReader(r io.ReaderAt) (rdr Reader, err error) { func NewReader(r io.ReaderAt) (rdr Reader, err error) {
@@ -80,119 +78,59 @@ func NewReader(r io.ReaderAt) (rdr Reader, err error) {
if err != nil { if err != nil {
return rdr, errors.Join(errors.New("failed to read root directory"), err) return rdr, errors.Join(errors.New("failed to read root directory"), err)
} }
rdr.fragTable = NewTable(&rdr, rdr.Superblock.FragTableStart, rdr.Superblock.FragCount, readFrag)
rdr.idTable = NewTable(&rdr, rdr.Superblock.IdTableStart, uint32(rdr.Superblock.IdCount), readId)
rdr.exportTable = NewTable(&rdr, rdr.Superblock.ExportTableStart, rdr.Superblock.InodeCount, readRef)
return return
} }
func readFrag(r io.Reader) (fragEntry, error) {
dat := make([]byte, 16)
_, err := r.Read(dat)
if err != nil {
return fragEntry{}, err
}
return fragEntry{
Start: binary.LittleEndian.Uint64(dat[0:8]),
Size: binary.LittleEndian.Uint32(dat[8:12]),
}, nil
}
func readId(r io.Reader) (uint32, error) {
dat := make([]byte, 4)
_, err := r.Read(dat)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint32(dat), nil
}
func readRef(r io.Reader) (InodeRef, error) {
dat := make([]byte, 8)
_, err := r.Read(dat)
if err != nil {
return 0, err
}
return binary.LittleEndian.Uint64(dat), nil
}
// Get a uid/gid at the given index. Lazily populates the reader's Id table as necessary. // Get a uid/gid at the given index. Lazily populates the reader's Id table as necessary.
func (r *Reader) Id(i uint16) (uint32, error) { func (r *Reader) Id(i uint16) (uint32, error) {
if len(r.idTable) > int(i) { return r.idTable.Get(uint32(i))
return r.idTable[i], nil
} else if i >= r.Superblock.IdCount {
return 0, errors.New("id out of bounds")
}
// Populate the id table as needed
var blockNum uint32
if i != 0 { // If i == 0, we go negatives causing issues with uint32s
blockNum = uint32(math.Ceil(float64(i+1)/2048)) - 1
} else {
blockNum = 0
}
blocksRead := len(r.idTable) / 2048
blocksToRead := int(blockNum) - blocksRead + 1
var offset uint64
var idsToRead uint16
var idsTmp []uint32
var err error
var rdr metadata.Reader
// We can *maybe* have a slight speed increase by manually decoding instead of using reflection via binary.Read
for i := blocksRead; i < int(blocksRead)+blocksToRead; i++ {
err = binary.Read(toreader.NewReader(r.r, int64(r.Superblock.IdTableStart)+int64(8*i)), binary.LittleEndian, &offset)
if err != nil {
return 0, err
}
idsToRead = min(r.Superblock.IdCount-uint16(len(r.idTable)), 2048)
idsTmp = make([]uint32, idsToRead)
rdr = metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d)
err = binary.Read(&rdr, binary.LittleEndian, &idsTmp)
rdr.Close()
if err != nil {
return 0, err
}
r.idTable = append(r.idTable, idsTmp...)
}
return r.idTable[i], nil
} }
// Get a fragment entry at the given index. Lazily populates the reader's fragment table as necessary. // Get a fragment entry at the given index. Lazily populates the reader's fragment table as necessary.
func (r *Reader) fragEntry(i uint32) (fragEntry, error) { func (r *Reader) fragEntry(i uint32) (fragEntry, error) {
return readPagedItems(int(i), 512, &r.fragTable, int(r.Superblock.FragCount), return r.fragTable.Get(i)
func(startBlock, fragsToRead int) ([]fragEntry, error) {
// get the offset of the next block of fragments
var offset uint64
err := binary.Read(toreader.NewReader(r.r, int64(r.Superblock.FragTableStart)+int64(8*startBlock)), binary.LittleEndian, &offset)
if err != nil {
return nil, err
}
fragsTmp := make([]fragEntry, fragsToRead)
rdr := metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d)
defer rdr.Close()
err = binary.Read(rdr, binary.LittleEndian, &fragsTmp)
if err != nil {
return nil, err
}
return fragsTmp, nil
})
} }
// Get an inode reference at the given index. Lazily populates the reader's export table as necessary. // Get an inode reference at the given index. Lazily populates the reader's export table as necessary.
func (r *Reader) inodeRef(i uint32) (uint64, error) { func (r *Reader) inodeRef(i uint32) (InodeRef, error) {
if !r.Superblock.Exportable() { return r.exportTable.Get(i)
return 0, ErrorNotExportable
}
if len(r.exportTable) > int(i) {
return r.exportTable[i], nil
} else if i >= r.Superblock.InodeCount {
return 0, errors.New("inode out of bounds")
}
// Populate the export table as needed
var blockNum uint32
if i != 0 { // If i == 0, we go negatives causing issues with uint32s
blockNum = uint32(math.Ceil(float64(i+1)/1024)) - 1
} else {
blockNum = 0
}
blocksRead := len(r.exportTable) / 1024
blocksToRead := int(blockNum) - blocksRead + 1
var offset uint64
var refsToRead uint32
var refsTmp []uint64
var err error
var rdr metadata.Reader
// We can *maybe* have a slight speed increase by manually decoding instead of using reflection via binary.Read
for i := blocksRead; i < int(blocksRead)+blocksToRead; i++ {
err = binary.Read(toreader.NewReader(r.r, int64(r.Superblock.ExportTableStart)+int64(8*i)), binary.LittleEndian, &offset)
if err != nil {
return 0, err
}
refsToRead = min(r.Superblock.InodeCount-uint32(len(r.exportTable)), 1024)
refsTmp = make([]uint64, refsToRead)
rdr = metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d)
err = binary.Read(&rdr, binary.LittleEndian, &refsTmp)
rdr.Close()
if err != nil {
return 0, err
}
r.exportTable = append(r.exportTable, refsTmp...)
}
return r.exportTable[i], nil
} }
func (r Reader) Inode(i uint32) (inode.Inode, error) { func (r Reader) Inode(i uint32) (inode.Inode, error) {
ref, err := r.inodeRef(i) ref, err := r.inodeRef(i - 1) // Inode table is 1 indexed
if err != nil { if err != nil {
return inode.Inode{}, err return inode.Inode{}, err
} }
+2 -2
View File
@@ -77,7 +77,7 @@ func TestReader(t *testing.T) {
path := filepath.Join(tmpDir, "extractTest") path := filepath.Join(tmpDir, "extractTest")
os.RemoveAll(path) os.RemoveAll(path)
os.MkdirAll(path, 0777) os.MkdirAll(path, 0777)
err = extractToDir(rdr, &rdr.Root.FileBase, path) err = extractToDir(rdr, rdr.Root.FileBase, path)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -103,7 +103,7 @@ func TestSingleFile(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
err = extractToDir(rdr, &b, path) err = extractToDir(rdr, b, path)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
+1 -1
View File
@@ -14,7 +14,7 @@ type superblock struct {
IdCount uint16 IdCount uint16
VerMaj uint16 VerMaj uint16
VerMin uint16 VerMin uint16
RootInodeRef uint64 RootInodeRef InodeRef
Size uint64 Size uint64
IdTableStart uint64 IdTableStart uint64
XattrTableStart uint64 XattrTableStart uint64
+85
View File
@@ -0,0 +1,85 @@
package squashfslow
import (
"encoding/binary"
"errors"
"io"
"sync"
"github.com/CalebQ42/squashfs/internal/metadata"
"github.com/CalebQ42/squashfs/internal/toreader"
)
var errOutOfBounds = errors.New("out of bounds")
var errUnexpectedOutOfBounds = errors.New("unexpected out of bounds")
var errNilCollection = errors.New("nil collection")
type CreateFunction[T any] = func(io.Reader) (T, error)
type Table[T any] struct {
totalItems uint32
itemsPerBlock uint32
offset uint64
mut sync.RWMutex
currentItems []T
rdr *Reader
createFunc CreateFunction[T]
}
func NewTable[T any](rdr *Reader, start uint64, totalItems uint32, createFunc CreateFunction[T]) *Table[T] {
var zero T
return &Table[T]{
totalItems: totalItems,
itemsPerBlock: 8192 / uint32(binary.Size(zero)),
offset: start,
mut: sync.RWMutex{},
rdr: rdr,
createFunc: createFunc,
}
}
func (t *Table[T]) Get(requestedItemIndex uint32) (T, error) {
t.mut.RLock()
if requestedItemIndex >= t.totalItems {
t.mut.RUnlock()
var zero T
return zero, errOutOfBounds
}
if uint32(len(t.currentItems)) > requestedItemIndex {
t.mut.RUnlock()
return t.currentItems[requestedItemIndex], nil
}
t.mut.RUnlock()
return t.fillAndGet(requestedItemIndex)
}
func (t *Table[T]) fillAndGet(requestedItemIndex uint32) (T, error) {
t.mut.Lock()
defer t.mut.Unlock()
var offset uint64
var toRead uint32
var rdr *toreader.Reader
var metaRdr metadata.Reader
var err error
for uint32(len(t.currentItems)) <= requestedItemIndex {
rdr = toreader.NewReader(t.rdr.r, int64(t.offset))
err = binary.Read(rdr, binary.LittleEndian, &offset)
if err != nil {
var zero T
return zero, err
}
t.offset += 8
toRead = min(t.itemsPerBlock, t.totalItems-uint32(len(t.currentItems)))
oldLen := uint32(len(t.currentItems))
t.currentItems = append(t.currentItems, make([]T, toRead)...)
metaRdr = metadata.NewReader(toreader.NewReader(t.rdr.r, int64(offset)), t.rdr.d)
for i := range toRead {
t.currentItems[oldLen+i], err = t.createFunc(&metaRdr)
if err != nil {
var zero T
return zero, err
}
}
}
return t.currentItems[requestedItemIndex], nil
}
+2 -2
View File
@@ -116,8 +116,8 @@ func BenchmarkRace(b *testing.B) {
b.Log("Unsquashfs error:", err) b.Log("Unsquashfs error:", err)
} }
unsquashTime = time.Since(start) unsquashTime = time.Since(start)
b.Log("Library took:", libTime.Round(time.Millisecond)) // b.Log("Library took:", libTime.Round(time.Millisecond))
b.Log("unsquashfs took:", unsquashTime.Round(time.Millisecond)) // b.Log("unsquashfs took:", unsquashTime.Round(time.Millisecond))
b.Log("unsquashfs is", strconv.FormatFloat(float64(libTime.Milliseconds())/float64(unsquashTime.Milliseconds()), 'f', 2, 64), "times faster") b.Log("unsquashfs is", strconv.FormatFloat(float64(libTime.Milliseconds())/float64(unsquashTime.Milliseconds()), 'f', 2, 64), "times faster")
} }