Finishing touches
Added FastOptions as an alternative to DefaultOptions A few performance improvements A few bug fixes
This commit is contained in:
@@ -11,18 +11,27 @@ Currently has support for reading squashfs files and extracting files and folder
|
||||
Special thanks to <https://dr-emann.github.io/squashfs/> for some VERY important information in an easy to understand format.
|
||||
Thanks also to [distri's squashfs library](https://github.com/distr1/distri/tree/master/internal/squashfs) as I referenced it to figure some things out (and double check others).
|
||||
|
||||
## FUSE
|
||||
|
||||
As of `v1.0`, FUSE capabilities has been moved to [a separate library](https://github.com/CalebQ42/squashfuse).
|
||||
|
||||
## Limitations
|
||||
|
||||
* No Xattr parsing. This is simply because I haven't done any research on it and how to apply these in a pure go way.
|
||||
* No Xattr parsing.
|
||||
* Socket files are not extracted.
|
||||
* From my research, it seems like a socket file would be useless if it could be created. They are still exposed when fuse mounted.
|
||||
* From my research, it seems like a socket file would be useless if it could be created.
|
||||
* Fifo files are ignored on `darwin`
|
||||
|
||||
## Issues
|
||||
|
||||
* Significantly slower then `unsquashfs` when extracting folders (about 5 ~ 7 times slower on a ~100MB archive using zstd compression)
|
||||
* Significantly slower then `unsquashfs` when extracting folders
|
||||
* This seems to be related to above along with the general optimization of `unsquashfs` and it's compression libraries.
|
||||
* The larger the file's tree, the slower the extraction will be. Arch Linux's Live USB's airootfs.sfs takes ~35x longer for a full extraction.
|
||||
* Times seem to be largely dependent on file tree size and compression type.
|
||||
* My main testing image (~100MB) using Zstd takes about 6x longer.
|
||||
* An Arch Linux airootfs image (~780MB) using XZ compression with LZMA filters takes about 32x longer.
|
||||
* A Tensorflow docker image (~3.3GB) using Zstd takes about 12x longer.
|
||||
|
||||
Note: These numbers are using `FastOptions()`. `DefaultOptions()` takes about 2x longer.
|
||||
|
||||
## Recommendations on Usage
|
||||
|
||||
|
||||
+26
-7
@@ -3,28 +3,47 @@ package squashfs
|
||||
import (
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/CalebQ42/squashfs/internal/routinemanager"
|
||||
)
|
||||
|
||||
type ExtractionOptions struct {
|
||||
manager *routinemanager.Manager
|
||||
LogOutput io.Writer //Where the verbose log should write. Defaults to os.Stdout.
|
||||
LogOutput io.Writer //Where the verbose log should write.
|
||||
DereferenceSymlink bool //Replace symlinks with the target file.
|
||||
UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink.
|
||||
Verbose bool //Prints extra info to log on an error.
|
||||
IgnorePerm bool //Ignore file's permissions and instead use Perm.
|
||||
Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777.
|
||||
SimultaneousFiles uint16 //Number of files to process in parallel. Defaults to 10.
|
||||
ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Defaults to 10.
|
||||
SimultaneousFiles uint16 //Number of files to process in parallel. Default set based on runtime.NumCPU().
|
||||
ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Default set based on runtime.NumCPU().
|
||||
}
|
||||
|
||||
// The default extraction options.
|
||||
func DefaultOptions() *ExtractionOptions {
|
||||
cores := uint16(runtime.NumCPU() / 2)
|
||||
var files, routines uint16
|
||||
if cores <= 4 {
|
||||
files = 1
|
||||
routines = cores
|
||||
} else {
|
||||
files = cores - 4
|
||||
routines = 4
|
||||
}
|
||||
return &ExtractionOptions{
|
||||
LogOutput: os.Stdout,
|
||||
Perm: 0777,
|
||||
SimultaneousFiles: 10,
|
||||
ExtractionRoutines: 10,
|
||||
SimultaneousFiles: files,
|
||||
ExtractionRoutines: routines,
|
||||
}
|
||||
}
|
||||
|
||||
// Less limited default options. Can run up 2x faster than DefaultOptions.
|
||||
// Tends to use all available CPU resources.
|
||||
func FastOptions() *ExtractionOptions {
|
||||
return &ExtractionOptions{
|
||||
Perm: 0777,
|
||||
SimultaneousFiles: uint16(runtime.NumCPU()),
|
||||
ExtractionRoutines: uint16(runtime.NumCPU()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,6 +27,15 @@ type File struct {
|
||||
dirsRead int
|
||||
}
|
||||
|
||||
// Creates a new *File from the given *squashfs.Base
|
||||
func (r *Reader) FileFromBase(b *squashfs.Base, parent *FS) *File {
|
||||
return &File{
|
||||
b: b,
|
||||
parent: parent,
|
||||
r: r,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *File) FS() (*FS, error) {
|
||||
if !f.IsDir() {
|
||||
return nil, errors.New("not a directory")
|
||||
@@ -179,6 +188,9 @@ func (f *File) deviceDevices() (maj uint32, min uint32) {
|
||||
}
|
||||
|
||||
func (f *File) path() string {
|
||||
if f.parent == nil {
|
||||
return f.b.Name
|
||||
}
|
||||
return filepath.Join(f.parent.path(), f.b.Name)
|
||||
}
|
||||
|
||||
@@ -193,7 +205,16 @@ func (f *File) Extract(folder string) error {
|
||||
func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
|
||||
if op.manager == nil {
|
||||
op.manager = routinemanager.NewManager(op.SimultaneousFiles)
|
||||
log.SetOutput(op.LogOutput)
|
||||
if op.LogOutput != nil {
|
||||
log.SetOutput(op.LogOutput)
|
||||
}
|
||||
err := os.MkdirAll(path, 0777)
|
||||
if err != nil {
|
||||
if op.Verbose {
|
||||
log.Println("Failed to create initial directory", path)
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
switch f.b.Inode.Type {
|
||||
case inode.Dir, inode.EDir:
|
||||
@@ -205,7 +226,6 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
|
||||
return errors.Join(errors.New("failed to create squashfs.Directory: "+path), err)
|
||||
}
|
||||
errChan := make(chan error, len(d.Entries))
|
||||
files := len(d.Entries)
|
||||
for i := range d.Entries {
|
||||
b, err := f.r.r.BaseFromEntry(d.Entries[i])
|
||||
if err != nil {
|
||||
@@ -214,37 +234,39 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
|
||||
}
|
||||
return errors.Join(errors.New("failed to get base from entry: "+path), err)
|
||||
}
|
||||
if b.IsDir() {
|
||||
files--
|
||||
extDir := filepath.Join(path, b.Name)
|
||||
err = os.Mkdir(extDir, 0777)
|
||||
if err != nil {
|
||||
if op.Verbose {
|
||||
log.Println("Failed to create directory", path)
|
||||
go func(b *squashfs.Base, path string) {
|
||||
i := op.manager.Lock()
|
||||
if b.IsDir() {
|
||||
extDir := filepath.Join(path, b.Name)
|
||||
err = os.Mkdir(extDir, 0777)
|
||||
op.manager.Unlock(i)
|
||||
if err != nil {
|
||||
if op.Verbose {
|
||||
log.Println("Failed to create directory", path)
|
||||
}
|
||||
errChan <- errors.Join(errors.New("failed to create directory: "+path), err)
|
||||
return
|
||||
}
|
||||
return errors.Join(errors.New("failed to create directory: "+path), err)
|
||||
}
|
||||
err = f.ExtractWithOptions(extDir, op)
|
||||
if err != nil {
|
||||
if op.Verbose {
|
||||
log.Println("Failed to extract directory", path)
|
||||
err = f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)).ExtractWithOptions(extDir, op)
|
||||
if err != nil {
|
||||
if op.Verbose {
|
||||
log.Println("Failed to extract directory", path)
|
||||
}
|
||||
errChan <- errors.Join(errors.New("failed to extract directory: "+path), err)
|
||||
return
|
||||
}
|
||||
return errors.Join(errors.New("failed to extract directory: "+path), err)
|
||||
errChan <- nil
|
||||
} else {
|
||||
fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent))
|
||||
err = fil.ExtractWithOptions(path, op)
|
||||
op.manager.Unlock(i)
|
||||
fil.Close()
|
||||
errChan <- err
|
||||
}
|
||||
} else {
|
||||
fil := &File{
|
||||
b: b,
|
||||
r: f.r,
|
||||
}
|
||||
go func(fil *File, folder string) {
|
||||
i := op.manager.Lock()
|
||||
defer op.manager.Unlock(i)
|
||||
errChan <- fil.ExtractWithOptions(folder, op)
|
||||
}(fil, path)
|
||||
}
|
||||
}(b, path)
|
||||
}
|
||||
var errCache []error
|
||||
for i := 0; i < files; i++ {
|
||||
for i := 0; i < len(d.Entries); i++ {
|
||||
err := <-errChan
|
||||
if err != nil {
|
||||
errCache = append(errCache, err)
|
||||
@@ -278,9 +300,6 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error {
|
||||
}
|
||||
return errors.Join(errors.New("failed to write file: "+path), err)
|
||||
}
|
||||
if op.Verbose {
|
||||
log.Println(f.path(), "extracted to", path)
|
||||
}
|
||||
case inode.Sym, inode.ESym:
|
||||
symPath := f.SymlinkPath()
|
||||
if op.DereferenceSymlink {
|
||||
|
||||
@@ -20,6 +20,15 @@ type FS struct {
|
||||
parent *FS
|
||||
}
|
||||
|
||||
// Creates a new *FS from the given squashfs.directory
|
||||
func (r *Reader) FSFromDirectory(d *squashfs.Directory, parent *FS) *FS {
|
||||
return &FS{
|
||||
d: d,
|
||||
r: r,
|
||||
parent: parent,
|
||||
}
|
||||
}
|
||||
|
||||
// Glob returns the name of the files at the given pattern.
|
||||
// All paths are relative to the FS.
|
||||
// Uses filepath.Match to compare names.
|
||||
@@ -101,9 +110,9 @@ func (f *FS) Open(name string) (fs.File, error) {
|
||||
Path: name,
|
||||
Err: fs.ErrNotExist,
|
||||
}
|
||||
} else {
|
||||
return f.parent.Open(strings.Join(split[1:], "/"))
|
||||
}
|
||||
} else {
|
||||
return f.parent.Open(strings.Join(split[1:], "/"))
|
||||
}
|
||||
i, found := slices.BinarySearchFunc(f.d.Entries, split[0], func(e directory.Entry, name string) int {
|
||||
return strings.Compare(e.Name, name)
|
||||
@@ -137,11 +146,7 @@ func (f *FS) Open(name string) (fs.File, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return (&FS{
|
||||
d: d,
|
||||
r: f.r,
|
||||
parent: f,
|
||||
}).Open(strings.Join(split[1:], "/"))
|
||||
return f.r.FSFromDirectory(d, f).Open(strings.Join(split[1:], "/"))
|
||||
}
|
||||
|
||||
// Returns all DirEntry's for the directory at name.
|
||||
|
||||
@@ -17,12 +17,14 @@ func NewReader(r io.ReaderAt) (*Reader, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Reader{
|
||||
out := &Reader{
|
||||
r: rdr,
|
||||
FS: &FS{
|
||||
d: rdr.Root,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
out.FS = &FS{
|
||||
d: rdr.Root,
|
||||
r: out,
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (r *Reader) ModTime() time.Time {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
"math"
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
"github.com/CalebQ42/squashfs/internal/decompress"
|
||||
@@ -31,7 +32,7 @@ func NewFullReader(r io.ReaderAt, initialOffset int64, d decompress.Decompressor
|
||||
d: d,
|
||||
sizes: sizes,
|
||||
initialOffset: initialOffset,
|
||||
goroutineLimit: 10,
|
||||
goroutineLimit: uint16(runtime.NumCPU()),
|
||||
finalBlockSize: finalBlockSize,
|
||||
blockSize: blockSize,
|
||||
retPool: &sync.Pool{
|
||||
|
||||
@@ -2,7 +2,6 @@ package squashfs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
@@ -22,7 +21,6 @@ type Directory struct {
|
||||
func (r *Reader) directoryFromRef(ref uint64, name string) (*Directory, error) {
|
||||
i, err := r.InodeFromRef(ref)
|
||||
if err != nil {
|
||||
fmt.Println("yo")
|
||||
return nil, err
|
||||
}
|
||||
var blockStart uint32
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package squashfs
|
||||
package squashfs_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -8,6 +8,8 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/CalebQ42/squashfs/squashfs"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -55,7 +57,7 @@ func TestReader(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fil.Close()
|
||||
rdr, err := NewReader(fil)
|
||||
rdr, err := squashfs.NewReader(fil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -75,7 +77,7 @@ func TestSingleFile(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fil.Close()
|
||||
rdr, err := NewReader(fil)
|
||||
rdr, err := squashfs.NewReader(fil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -90,7 +92,7 @@ func TestSingleFile(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
func extractToDir(rdr *Reader, b *Base, folder string) error {
|
||||
func extractToDir(rdr *squashfs.Reader, b *squashfs.Base, folder string) error {
|
||||
path := filepath.Join(folder, b.Name)
|
||||
if b.IsDir() {
|
||||
d, err := b.ToDir(rdr)
|
||||
@@ -101,7 +103,7 @@ func extractToDir(rdr *Reader, b *Base, folder string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var nestBast *Base
|
||||
var nestBast *squashfs.Base
|
||||
for _, e := range d.Entries {
|
||||
nestBast, err = rdr.BaseFromEntry(e)
|
||||
if err != nil {
|
||||
@@ -115,7 +117,6 @@ func extractToDir(rdr *Reader, b *Base, folder string) error {
|
||||
} else if b.IsRegular() {
|
||||
_, full, err := b.GetRegFileReaders(rdr)
|
||||
if err != nil {
|
||||
fmt.Println("yo", path)
|
||||
return err
|
||||
}
|
||||
fil, err := os.Create(path)
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
package squashfs_test
|
||||
|
||||
//Actually proper tests go here.
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"io/fs"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/CalebQ42/squashfs"
|
||||
)
|
||||
|
||||
const (
|
||||
squashfsURL = "https://darkstorm.tech/files/LinuxPATest.sfs"
|
||||
squashfsName = "airootfs.sfs"
|
||||
)
|
||||
|
||||
func preTest(dir string) (fil *os.File, err error) {
|
||||
fil, err = os.Open(filepath.Join(dir, squashfsName))
|
||||
if err != nil {
|
||||
_, err = os.Open(dir)
|
||||
if os.IsNotExist(err) {
|
||||
err = os.Mkdir(dir, 0755)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
os.Remove(filepath.Join(dir, squashfsName))
|
||||
fil, err = os.Create(filepath.Join(dir, squashfsName))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
var resp *http.Response
|
||||
resp, err = http.DefaultClient.Get(squashfsURL)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_, err = io.Copy(fil, resp.Body)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
_, err = exec.LookPath("unsquashfs")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_, err = exec.LookPath("mksquashfs")
|
||||
return
|
||||
}
|
||||
|
||||
func TestMisc(t *testing.T) {
|
||||
tmpDir := "testing"
|
||||
fil, err := preTest(tmpDir)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
rdr, err := squashfs.NewReader(fil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_ = rdr
|
||||
// Put testing here
|
||||
t.Fatal("UM")
|
||||
}
|
||||
|
||||
func BenchmarkRace(b *testing.B) {
|
||||
tmpDir := "testing"
|
||||
fil, err := preTest(tmpDir)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
libPath := filepath.Join(tmpDir, "ExtractLib")
|
||||
unsquashPath := filepath.Join(tmpDir, "ExtractSquashfs")
|
||||
os.RemoveAll(libPath)
|
||||
os.RemoveAll(unsquashPath)
|
||||
var libTime, unsquashTime time.Duration
|
||||
op := squashfs.FastOptions()
|
||||
start := time.Now()
|
||||
rdr, err := squashfs.NewReader(fil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
err = rdr.ExtractWithOptions(libPath, op)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
libTime = time.Since(start)
|
||||
cmd := exec.Command("unsquashfs", "-d", unsquashPath, fil.Name())
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
start = time.Now()
|
||||
err = cmd.Run()
|
||||
if err != nil {
|
||||
b.Log("Unsquashfs error:", err)
|
||||
}
|
||||
unsquashTime = time.Since(start)
|
||||
b.Log("Library took:", libTime.Round(time.Millisecond))
|
||||
b.Log("unsquashfs took:", unsquashTime.Round(time.Millisecond))
|
||||
b.Log("unsquashfs is", strconv.FormatFloat(float64(libTime.Milliseconds())/float64(unsquashTime.Milliseconds()), 'f', 2, 64), "times faster")
|
||||
}
|
||||
|
||||
func TestExtractQuick(t *testing.T) {
|
||||
//First, setup everything and extract the archive using the library and unsquashfs
|
||||
|
||||
// tmpDir := b.TempDir()
|
||||
tmpDir := "testing"
|
||||
fil, err := preTest(tmpDir)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
libPath := filepath.Join(tmpDir, "ExtractLib")
|
||||
unsquashPath := filepath.Join(tmpDir, "ExtractSquashfs")
|
||||
os.RemoveAll(libPath)
|
||||
os.RemoveAll(unsquashPath)
|
||||
rdr, err := squashfs.NewReader(fil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
os.RemoveAll(filepath.Join(tmpDir, "testLog.txt"))
|
||||
logFil, _ := os.Create(filepath.Join(tmpDir, "testLog.txt"))
|
||||
op := squashfs.DefaultOptions()
|
||||
op.Verbose = true
|
||||
op.IgnorePerm = true
|
||||
op.LogOutput = logFil
|
||||
err = rdr.ExtractWithOptions(libPath, op)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
cmd := exec.Command("unsquashfs", "-d", unsquashPath, fil.Name())
|
||||
err = cmd.Run()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
//Then compare the sizes and existance between the two (using unsquashfs as a reference).
|
||||
//If the file doesn't exist, or the size is different, we exit.
|
||||
//TODO: Add long test that checks contents.
|
||||
|
||||
squashFils := os.DirFS(unsquashPath)
|
||||
err = fs.WalkDir(squashFils, ".", func(path string, _ fs.DirEntry, _ error) error {
|
||||
libFil, e := os.Open(filepath.Join(libPath, path))
|
||||
if e != nil {
|
||||
return e
|
||||
}
|
||||
sfsFile, e := os.Open(filepath.Join(unsquashPath, path))
|
||||
if e != nil {
|
||||
return e
|
||||
}
|
||||
sfsStat, _ := sfsFile.Stat()
|
||||
libStat, _ := libFil.Stat()
|
||||
if sfsStat.Size() != libStat.Size() {
|
||||
t.Log(libFil.Name(), "not the same size between library and unsquashfs")
|
||||
t.Log("File is", libStat.Size())
|
||||
t.Log("Should be", sfsStat.Size())
|
||||
return errors.New("file not the correct size")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
var filePath = "bin"
|
||||
|
||||
func TestSingleFile(t *testing.T) {
|
||||
tmpDir := "testing"
|
||||
fil, err := preTest(tmpDir)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
os.Remove(filepath.Join(tmpDir, filePath))
|
||||
rdr, err := squashfs.NewReader(fil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
f, err := rdr.Open(filePath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = f.(*squashfs.File).ExtractWithOptions("testing", &squashfs.ExtractionOptions{Verbose: true})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Fatal("HI")
|
||||
}
|
||||
Reference in New Issue
Block a user