From 17d45eea505ef4a8191728ce4275bd0c816394a1 Mon Sep 17 00:00:00 2001 From: Caleb Gardner Date: Wed, 27 Dec 2023 21:35:40 -0600 Subject: [PATCH] Finishing touches Added FastOptions as an alternative to DefaultOptions A few performance improvements A few bug fixes --- README.md | 17 +++- extraction_options.go | 33 ++++-- file.go | 81 +++++++++------ fs.go | 19 ++-- reader.go | 12 ++- squashfs/data/fullreader.go | 3 +- squashfs/directory.go | 2 - squashfs/reader_test.go | 13 +-- squashfs_test.go | 193 ++++++++++++++++++++++++++++++++++++ 9 files changed, 310 insertions(+), 63 deletions(-) create mode 100644 squashfs_test.go diff --git a/README.md b/README.md index fcd6f07..92dd55a 100644 --- a/README.md +++ b/README.md @@ -11,18 +11,27 @@ Currently has support for reading squashfs files and extracting files and folder Special thanks to for some VERY important information in an easy to understand format. Thanks also to [distri's squashfs library](https://github.com/distr1/distri/tree/master/internal/squashfs) as I referenced it to figure some things out (and double check others). +## FUSE + +As of `v1.0`, FUSE capabilities has been moved to [a separate library](https://github.com/CalebQ42/squashfuse). + ## Limitations -* No Xattr parsing. This is simply because I haven't done any research on it and how to apply these in a pure go way. +* No Xattr parsing. * Socket files are not extracted. - * From my research, it seems like a socket file would be useless if it could be created. They are still exposed when fuse mounted. + * From my research, it seems like a socket file would be useless if it could be created. * Fifo files are ignored on `darwin` ## Issues -* Significantly slower then `unsquashfs` when extracting folders (about 5 ~ 7 times slower on a ~100MB archive using zstd compression) +* Significantly slower then `unsquashfs` when extracting folders * This seems to be related to above along with the general optimization of `unsquashfs` and it's compression libraries. - * The larger the file's tree, the slower the extraction will be. Arch Linux's Live USB's airootfs.sfs takes ~35x longer for a full extraction. + * Times seem to be largely dependent on file tree size and compression type. + * My main testing image (~100MB) using Zstd takes about 6x longer. + * An Arch Linux airootfs image (~780MB) using XZ compression with LZMA filters takes about 32x longer. + * A Tensorflow docker image (~3.3GB) using Zstd takes about 12x longer. + +Note: These numbers are using `FastOptions()`. `DefaultOptions()` takes about 2x longer. ## Recommendations on Usage diff --git a/extraction_options.go b/extraction_options.go index ad917de..c55a5ec 100644 --- a/extraction_options.go +++ b/extraction_options.go @@ -3,28 +3,47 @@ package squashfs import ( "io" "io/fs" - "os" + "runtime" "github.com/CalebQ42/squashfs/internal/routinemanager" ) type ExtractionOptions struct { manager *routinemanager.Manager - LogOutput io.Writer //Where the verbose log should write. Defaults to os.Stdout. + LogOutput io.Writer //Where the verbose log should write. DereferenceSymlink bool //Replace symlinks with the target file. UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink. Verbose bool //Prints extra info to log on an error. IgnorePerm bool //Ignore file's permissions and instead use Perm. Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777. - SimultaneousFiles uint16 //Number of files to process in parallel. Defaults to 10. - ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Defaults to 10. + SimultaneousFiles uint16 //Number of files to process in parallel. Default set based on runtime.NumCPU(). + ExtractionRoutines uint16 //Number of goroutines to use for each file's extraction. Only applies to regular files. Default set based on runtime.NumCPU(). } +// The default extraction options. func DefaultOptions() *ExtractionOptions { + cores := uint16(runtime.NumCPU() / 2) + var files, routines uint16 + if cores <= 4 { + files = 1 + routines = cores + } else { + files = cores - 4 + routines = 4 + } return &ExtractionOptions{ - LogOutput: os.Stdout, Perm: 0777, - SimultaneousFiles: 10, - ExtractionRoutines: 10, + SimultaneousFiles: files, + ExtractionRoutines: routines, + } +} + +// Less limited default options. Can run up 2x faster than DefaultOptions. +// Tends to use all available CPU resources. +func FastOptions() *ExtractionOptions { + return &ExtractionOptions{ + Perm: 0777, + SimultaneousFiles: uint16(runtime.NumCPU()), + ExtractionRoutines: uint16(runtime.NumCPU()), } } diff --git a/file.go b/file.go index 2c51214..1b7cbd1 100644 --- a/file.go +++ b/file.go @@ -27,6 +27,15 @@ type File struct { dirsRead int } +// Creates a new *File from the given *squashfs.Base +func (r *Reader) FileFromBase(b *squashfs.Base, parent *FS) *File { + return &File{ + b: b, + parent: parent, + r: r, + } +} + func (f *File) FS() (*FS, error) { if !f.IsDir() { return nil, errors.New("not a directory") @@ -179,6 +188,9 @@ func (f *File) deviceDevices() (maj uint32, min uint32) { } func (f *File) path() string { + if f.parent == nil { + return f.b.Name + } return filepath.Join(f.parent.path(), f.b.Name) } @@ -193,7 +205,16 @@ func (f *File) Extract(folder string) error { func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error { if op.manager == nil { op.manager = routinemanager.NewManager(op.SimultaneousFiles) - log.SetOutput(op.LogOutput) + if op.LogOutput != nil { + log.SetOutput(op.LogOutput) + } + err := os.MkdirAll(path, 0777) + if err != nil { + if op.Verbose { + log.Println("Failed to create initial directory", path) + } + return err + } } switch f.b.Inode.Type { case inode.Dir, inode.EDir: @@ -205,7 +226,6 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error { return errors.Join(errors.New("failed to create squashfs.Directory: "+path), err) } errChan := make(chan error, len(d.Entries)) - files := len(d.Entries) for i := range d.Entries { b, err := f.r.r.BaseFromEntry(d.Entries[i]) if err != nil { @@ -214,37 +234,39 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error { } return errors.Join(errors.New("failed to get base from entry: "+path), err) } - if b.IsDir() { - files-- - extDir := filepath.Join(path, b.Name) - err = os.Mkdir(extDir, 0777) - if err != nil { - if op.Verbose { - log.Println("Failed to create directory", path) + go func(b *squashfs.Base, path string) { + i := op.manager.Lock() + if b.IsDir() { + extDir := filepath.Join(path, b.Name) + err = os.Mkdir(extDir, 0777) + op.manager.Unlock(i) + if err != nil { + if op.Verbose { + log.Println("Failed to create directory", path) + } + errChan <- errors.Join(errors.New("failed to create directory: "+path), err) + return } - return errors.Join(errors.New("failed to create directory: "+path), err) - } - err = f.ExtractWithOptions(extDir, op) - if err != nil { - if op.Verbose { - log.Println("Failed to extract directory", path) + err = f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)).ExtractWithOptions(extDir, op) + if err != nil { + if op.Verbose { + log.Println("Failed to extract directory", path) + } + errChan <- errors.Join(errors.New("failed to extract directory: "+path), err) + return } - return errors.Join(errors.New("failed to extract directory: "+path), err) + errChan <- nil + } else { + fil := f.r.FileFromBase(b, f.r.FSFromDirectory(d, f.parent)) + err = fil.ExtractWithOptions(path, op) + op.manager.Unlock(i) + fil.Close() + errChan <- err } - } else { - fil := &File{ - b: b, - r: f.r, - } - go func(fil *File, folder string) { - i := op.manager.Lock() - defer op.manager.Unlock(i) - errChan <- fil.ExtractWithOptions(folder, op) - }(fil, path) - } + }(b, path) } var errCache []error - for i := 0; i < files; i++ { + for i := 0; i < len(d.Entries); i++ { err := <-errChan if err != nil { errCache = append(errCache, err) @@ -278,9 +300,6 @@ func (f *File) ExtractWithOptions(path string, op *ExtractionOptions) error { } return errors.Join(errors.New("failed to write file: "+path), err) } - if op.Verbose { - log.Println(f.path(), "extracted to", path) - } case inode.Sym, inode.ESym: symPath := f.SymlinkPath() if op.DereferenceSymlink { diff --git a/fs.go b/fs.go index 39f08cd..e81acb0 100644 --- a/fs.go +++ b/fs.go @@ -20,6 +20,15 @@ type FS struct { parent *FS } +// Creates a new *FS from the given squashfs.directory +func (r *Reader) FSFromDirectory(d *squashfs.Directory, parent *FS) *FS { + return &FS{ + d: d, + r: r, + parent: parent, + } +} + // Glob returns the name of the files at the given pattern. // All paths are relative to the FS. // Uses filepath.Match to compare names. @@ -101,9 +110,9 @@ func (f *FS) Open(name string) (fs.File, error) { Path: name, Err: fs.ErrNotExist, } + } else { + return f.parent.Open(strings.Join(split[1:], "/")) } - } else { - return f.parent.Open(strings.Join(split[1:], "/")) } i, found := slices.BinarySearchFunc(f.d.Entries, split[0], func(e directory.Entry, name string) int { return strings.Compare(e.Name, name) @@ -137,11 +146,7 @@ func (f *FS) Open(name string) (fs.File, error) { if err != nil { return nil, err } - return (&FS{ - d: d, - r: f.r, - parent: f, - }).Open(strings.Join(split[1:], "/")) + return f.r.FSFromDirectory(d, f).Open(strings.Join(split[1:], "/")) } // Returns all DirEntry's for the directory at name. diff --git a/reader.go b/reader.go index 61946f4..43058c1 100644 --- a/reader.go +++ b/reader.go @@ -17,12 +17,14 @@ func NewReader(r io.ReaderAt) (*Reader, error) { if err != nil { return nil, err } - return &Reader{ + out := &Reader{ r: rdr, - FS: &FS{ - d: rdr.Root, - }, - }, nil + } + out.FS = &FS{ + d: rdr.Root, + r: out, + } + return out, nil } func (r *Reader) ModTime() time.Time { diff --git a/squashfs/data/fullreader.go b/squashfs/data/fullreader.go index d085e90..d31853d 100644 --- a/squashfs/data/fullreader.go +++ b/squashfs/data/fullreader.go @@ -5,6 +5,7 @@ import ( "errors" "io" "math" + "runtime" "sync" "github.com/CalebQ42/squashfs/internal/decompress" @@ -31,7 +32,7 @@ func NewFullReader(r io.ReaderAt, initialOffset int64, d decompress.Decompressor d: d, sizes: sizes, initialOffset: initialOffset, - goroutineLimit: 10, + goroutineLimit: uint16(runtime.NumCPU()), finalBlockSize: finalBlockSize, blockSize: blockSize, retPool: &sync.Pool{ diff --git a/squashfs/directory.go b/squashfs/directory.go index 07fcb82..1681e89 100644 --- a/squashfs/directory.go +++ b/squashfs/directory.go @@ -2,7 +2,6 @@ package squashfs import ( "errors" - "fmt" "io/fs" "path/filepath" "slices" @@ -22,7 +21,6 @@ type Directory struct { func (r *Reader) directoryFromRef(ref uint64, name string) (*Directory, error) { i, err := r.InodeFromRef(ref) if err != nil { - fmt.Println("yo") return nil, err } var blockStart uint32 diff --git a/squashfs/reader_test.go b/squashfs/reader_test.go index 38bd4ad..fa85bf9 100644 --- a/squashfs/reader_test.go +++ b/squashfs/reader_test.go @@ -1,4 +1,4 @@ -package squashfs +package squashfs_test import ( "fmt" @@ -8,6 +8,8 @@ import ( "os/exec" "path/filepath" "testing" + + "github.com/CalebQ42/squashfs/squashfs" ) const ( @@ -55,7 +57,7 @@ func TestReader(t *testing.T) { t.Fatal(err) } defer fil.Close() - rdr, err := NewReader(fil) + rdr, err := squashfs.NewReader(fil) if err != nil { t.Fatal(err) } @@ -75,7 +77,7 @@ func TestSingleFile(t *testing.T) { t.Fatal(err) } defer fil.Close() - rdr, err := NewReader(fil) + rdr, err := squashfs.NewReader(fil) if err != nil { t.Fatal(err) } @@ -90,7 +92,7 @@ func TestSingleFile(t *testing.T) { t.Fatal(err) } -func extractToDir(rdr *Reader, b *Base, folder string) error { +func extractToDir(rdr *squashfs.Reader, b *squashfs.Base, folder string) error { path := filepath.Join(folder, b.Name) if b.IsDir() { d, err := b.ToDir(rdr) @@ -101,7 +103,7 @@ func extractToDir(rdr *Reader, b *Base, folder string) error { if err != nil { return err } - var nestBast *Base + var nestBast *squashfs.Base for _, e := range d.Entries { nestBast, err = rdr.BaseFromEntry(e) if err != nil { @@ -115,7 +117,6 @@ func extractToDir(rdr *Reader, b *Base, folder string) error { } else if b.IsRegular() { _, full, err := b.GetRegFileReaders(rdr) if err != nil { - fmt.Println("yo", path) return err } fil, err := os.Create(path) diff --git a/squashfs_test.go b/squashfs_test.go new file mode 100644 index 0000000..0ae1658 --- /dev/null +++ b/squashfs_test.go @@ -0,0 +1,193 @@ +package squashfs_test + +//Actually proper tests go here. + +import ( + "errors" + "io" + "io/fs" + "net/http" + "os" + "os/exec" + "path/filepath" + "strconv" + "testing" + "time" + + "github.com/CalebQ42/squashfs" +) + +const ( + squashfsURL = "https://darkstorm.tech/files/LinuxPATest.sfs" + squashfsName = "airootfs.sfs" +) + +func preTest(dir string) (fil *os.File, err error) { + fil, err = os.Open(filepath.Join(dir, squashfsName)) + if err != nil { + _, err = os.Open(dir) + if os.IsNotExist(err) { + err = os.Mkdir(dir, 0755) + } + if err != nil { + return + } + os.Remove(filepath.Join(dir, squashfsName)) + fil, err = os.Create(filepath.Join(dir, squashfsName)) + if err != nil { + return + } + var resp *http.Response + resp, err = http.DefaultClient.Get(squashfsURL) + if err != nil { + return + } + _, err = io.Copy(fil, resp.Body) + if err != nil { + return + } + } + _, err = exec.LookPath("unsquashfs") + if err != nil { + return + } + _, err = exec.LookPath("mksquashfs") + return +} + +func TestMisc(t *testing.T) { + tmpDir := "testing" + fil, err := preTest(tmpDir) + if err != nil { + t.Fatal(err) + } + rdr, err := squashfs.NewReader(fil) + if err != nil { + t.Fatal(err) + } + _ = rdr + // Put testing here + t.Fatal("UM") +} + +func BenchmarkRace(b *testing.B) { + tmpDir := "testing" + fil, err := preTest(tmpDir) + if err != nil { + b.Fatal(err) + } + libPath := filepath.Join(tmpDir, "ExtractLib") + unsquashPath := filepath.Join(tmpDir, "ExtractSquashfs") + os.RemoveAll(libPath) + os.RemoveAll(unsquashPath) + var libTime, unsquashTime time.Duration + op := squashfs.FastOptions() + start := time.Now() + rdr, err := squashfs.NewReader(fil) + if err != nil { + b.Fatal(err) + } + err = rdr.ExtractWithOptions(libPath, op) + if err != nil { + b.Fatal(err) + } + libTime = time.Since(start) + cmd := exec.Command("unsquashfs", "-d", unsquashPath, fil.Name()) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + start = time.Now() + err = cmd.Run() + if err != nil { + b.Log("Unsquashfs error:", err) + } + unsquashTime = time.Since(start) + b.Log("Library took:", libTime.Round(time.Millisecond)) + b.Log("unsquashfs took:", unsquashTime.Round(time.Millisecond)) + b.Log("unsquashfs is", strconv.FormatFloat(float64(libTime.Milliseconds())/float64(unsquashTime.Milliseconds()), 'f', 2, 64), "times faster") +} + +func TestExtractQuick(t *testing.T) { + //First, setup everything and extract the archive using the library and unsquashfs + + // tmpDir := b.TempDir() + tmpDir := "testing" + fil, err := preTest(tmpDir) + if err != nil { + t.Fatal(err) + } + libPath := filepath.Join(tmpDir, "ExtractLib") + unsquashPath := filepath.Join(tmpDir, "ExtractSquashfs") + os.RemoveAll(libPath) + os.RemoveAll(unsquashPath) + rdr, err := squashfs.NewReader(fil) + if err != nil { + t.Fatal(err) + } + os.RemoveAll(filepath.Join(tmpDir, "testLog.txt")) + logFil, _ := os.Create(filepath.Join(tmpDir, "testLog.txt")) + op := squashfs.DefaultOptions() + op.Verbose = true + op.IgnorePerm = true + op.LogOutput = logFil + err = rdr.ExtractWithOptions(libPath, op) + if err != nil { + t.Fatal(err) + } + cmd := exec.Command("unsquashfs", "-d", unsquashPath, fil.Name()) + err = cmd.Run() + if err != nil { + t.Fatal(err) + } + + //Then compare the sizes and existance between the two (using unsquashfs as a reference). + //If the file doesn't exist, or the size is different, we exit. + //TODO: Add long test that checks contents. + + squashFils := os.DirFS(unsquashPath) + err = fs.WalkDir(squashFils, ".", func(path string, _ fs.DirEntry, _ error) error { + libFil, e := os.Open(filepath.Join(libPath, path)) + if e != nil { + return e + } + sfsFile, e := os.Open(filepath.Join(unsquashPath, path)) + if e != nil { + return e + } + sfsStat, _ := sfsFile.Stat() + libStat, _ := libFil.Stat() + if sfsStat.Size() != libStat.Size() { + t.Log(libFil.Name(), "not the same size between library and unsquashfs") + t.Log("File is", libStat.Size()) + t.Log("Should be", sfsStat.Size()) + return errors.New("file not the correct size") + } + return nil + }) + if err != nil { + t.Fatal(err) + } +} + +var filePath = "bin" + +func TestSingleFile(t *testing.T) { + tmpDir := "testing" + fil, err := preTest(tmpDir) + if err != nil { + t.Fatal(err) + } + os.Remove(filepath.Join(tmpDir, filePath)) + rdr, err := squashfs.NewReader(fil) + if err != nil { + t.Fatal(err) + } + f, err := rdr.Open(filePath) + if err != nil { + t.Fatal(err) + } + err = f.(*squashfs.File).ExtractWithOptions("testing", &squashfs.ExtractionOptions{Verbose: true}) + if err != nil { + t.Fatal(err) + } + t.Fatal("HI") +}