diff --git a/README.md b/README.md index 791abc8..ffb180c 100644 --- a/README.md +++ b/README.md @@ -11,4 +11,8 @@ The only major thing missing from squashfs reading is Xattr parsing. Special thanks to for some VERY important information in an easy to understand format. Thanks also to [distri's squashfs library](https://github.com/distr1/distri/tree/master/internal/squashfs) as I referenced it to figure some things out (and double check others). +## Performane + +This library, decompressing the firefox AppImage and using go tests, takes about twice as long as `unsquashfs` on my quad core laptop. (~1 second with the libarary and about half a second with `unsquashfs`) + ## [TODO](https://github.com/CalebQ42/squashfs/projects/1?fullscreen=true) diff --git a/datareader.go b/datareader.go index 5d61fbe..71402e9 100644 --- a/datareader.go +++ b/datareader.go @@ -18,39 +18,19 @@ var ( //DataReader reads data from data blocks. type dataReader struct { r *Reader - blocks []dataBlock curData []byte + sizes []uint32 offset int64 //offset relative to the beginning of the squash file curBlock int //Which block in sizes is currently cached curReadOffset int //offset relative to the currently cached data } -//DataBlock holds info about a given data block from it's size -type dataBlock struct { - begOffset int64 //The offset relative to the beginning of the squash file. Makes it easier to seek to it. - size uint32 - compressed bool - uncompressedSize uint32 -} - -//NewDataBlock creates a new squashfs.datablock from a given size. -func newDataBlock(raw uint32) (dbs dataBlock) { - dbs.compressed = raw&(1<<24) != (1 << 24) - dbs.size = raw &^ (1 << 24) - if !dbs.compressed { - dbs.uncompressedSize = dbs.size - } - return -} - //NewDataReader creates a new data reader at the given offset, with the blocks defined by sizes func (r *Reader) newDataReader(offset int64, sizes []uint32) (*dataReader, error) { var dr dataReader dr.r = r dr.offset = offset - for _, size := range sizes { - dr.blocks = append(dr.blocks, newDataBlock(size)) - } + dr.sizes = sizes err := dr.readCurBlock() if err != nil { return nil, err @@ -70,10 +50,10 @@ func (r *Reader) newDataReaderFromInode(i *inode.Inode) (*dataReader, error) { } rdr.offset = int64(fil.BlockStart) for _, sizes := range fil.BlockSizes { - rdr.blocks = append(rdr.blocks, newDataBlock(sizes)) + rdr.sizes = append(rdr.sizes, sizes) } if fil.Fragmented { - rdr.blocks = rdr.blocks[:len(rdr.blocks)-1] + rdr.sizes = rdr.sizes[:len(rdr.sizes)-1] } case inode.ExtFileType: fil := i.Info.(inode.ExtFile) @@ -82,10 +62,10 @@ func (r *Reader) newDataReaderFromInode(i *inode.Inode) (*dataReader, error) { } rdr.offset = int64(fil.BlockStart) for _, sizes := range fil.BlockSizes { - rdr.blocks = append(rdr.blocks, newDataBlock(sizes)) + rdr.sizes = append(rdr.sizes, sizes) } if fil.Fragmented { - rdr.blocks = rdr.blocks[:len(rdr.blocks)-1] + rdr.sizes = rdr.sizes[:len(rdr.sizes)-1] } default: return nil, errInodeNotFile @@ -97,9 +77,14 @@ func (r *Reader) newDataReaderFromInode(i *inode.Inode) (*dataReader, error) { return &rdr, nil } +//removed the compression bit from a data block size +func actualDataSize(size uint32) uint32 { + return size &^ (1 << 24) +} + func (d *dataReader) readNextBlock() error { d.curBlock++ - if d.curBlock >= len(d.blocks) { + if d.curBlock >= len(d.sizes) { d.curBlock-- return io.EOF } @@ -112,37 +97,47 @@ func (d *dataReader) readNextBlock() error { return nil } -func (d *dataReader) readCurBlock() error { - if d.curBlock >= len(d.blocks) { - return io.EOF +func (d *dataReader) readBlockAt(offset int64, size uint32) ([]byte, error) { + compressed := size&(1<<24) != (1 << 24) + size = size &^ (1 << 24) + if d.sizes[d.curBlock] == 0 { + return make([]byte, d.r.super.BlockSize), nil } - if d.blocks[d.curBlock].size == 0 { - d.curData = make([]byte, d.r.super.BlockSize) - d.blocks[d.curBlock].uncompressedSize = d.r.super.BlockSize - d.blocks[d.curBlock].begOffset = d.offset - return nil - } - sec := io.NewSectionReader(d.r.r, d.offset, int64(d.blocks[d.curBlock].size)) - if d.blocks[d.curBlock].compressed { + sec := io.NewSectionReader(d.r.r, offset, int64(size)) + if compressed { btys, err := d.r.decompressor.Decompress(sec) if err != nil { - return err + return nil, err } - d.blocks[d.curBlock].uncompressedSize = uint32(len(btys)) - d.curData = btys - d.blocks[d.curBlock].begOffset = d.offset - d.offset += int64(d.blocks[d.curBlock].size) - return nil + return btys, nil } var buf bytes.Buffer _, err := io.Copy(&buf, sec) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func (d *dataReader) offsetForBlock(index int) int64 { + out := d.offset + for i := 0; i < index; i++ { + out += int64(actualDataSize(d.sizes[i])) + } + return out +} + +func (d *dataReader) readCurBlock() error { + if d.curBlock >= len(d.sizes) { + return io.EOF + } + offset := d.offsetForBlock(d.curBlock) + data, err := d.readBlockAt(offset, d.sizes[d.curBlock]) if err != nil { return err } - d.curData = buf.Bytes() - d.blocks[d.curBlock].begOffset = d.offset - d.offset += int64(d.blocks[d.curBlock].size) - return err + d.curData = data + return nil } func (d *dataReader) Read(p []byte) (int, error) { @@ -182,3 +177,71 @@ func (d *dataReader) Read(p []byte) (int, error) { } return read, nil } + +// WriteTo writes all the data in the datablock to the writer. MUST BE USED ON A FRESH DATA READER. +func (d *dataReader) WriteTo(w io.Writer) (int64, error) { + type dataCache struct { + err error + data []byte + index int + } + dataChan := make(chan *dataCache) + for i := range d.sizes { + go func(index int, c chan *dataCache) { + var cache dataCache + cache.index = index + defer func() { + c <- &cache + }() + data, err := d.readBlockAt(d.offsetForBlock(index), d.sizes[index]) + if err != nil { + cache.err = err + return + } + cache.data = data + return + }(i, dataChan) + } + curIndex := 0 + totalWrite := int64(0) + var backlog []*dataCache +mainLoop: + for { + if curIndex == len(d.sizes) { + return totalWrite, nil + } + if len(backlog) > 0 { + for i, cache := range backlog { + if cache.index == curIndex { + writen, err := w.Write(cache.data) + totalWrite += int64(writen) + if err != nil { + return totalWrite, err + } + if len(backlog) > 0 { + backlog[i] = backlog[len(backlog)-1] + backlog = backlog[:len(backlog)-1] + } else { + backlog = nil + } + curIndex++ + continue mainLoop + } + } + } + cache := <-dataChan + if cache.err != nil { + return totalWrite, cache.err + } + if cache.index == curIndex { + writen, err := w.Write(cache.data) + totalWrite += int64(writen) + if err != nil { + return totalWrite, err + } + } else { + backlog = append(backlog, cache) + } + curIndex++ + } +} diff --git a/file.go b/file.go index 706c0be..0a72bbd 100644 --- a/file.go +++ b/file.go @@ -398,7 +398,18 @@ func (f *File) ExtractWithOptions(path string, dereferenceSymlink, unbreakSymlin errs = append(errs, err) return } //Since we will be reading from the file - _, err = io.Copy(fil, f) + if f.Reader == nil && f.r != nil { + f.Reader, err = f.r.newFileReader(f.in) + if err != nil { + if verbose { + fmt.Println("Error while Copying data to:", path+"/"+f.name) + fmt.Println(err) + } + errs = append(errs, err) + return + } + } + _, err = io.Copy(fil, f.Reader) if err != nil { if verbose { fmt.Println("Error while Copying data to:", path+"/"+f.name) @@ -480,7 +491,7 @@ func (f *File) ExtractWithOptions(path string, dereferenceSymlink, unbreakSymlin //Read from the file. Doesn't do anything fancy, just pases it to the underlying io.Reader. If a directory, return io.EOF. func (f *File) Read(p []byte) (int, error) { - if f.IsDir() { + if !f.IsFile() { return 0, io.EOF } var err error diff --git a/filereader.go b/filereader.go index d9bf52e..73be928 100644 --- a/filereader.go +++ b/filereader.go @@ -83,3 +83,19 @@ func (f *fileReader) Read(p []byte) (int, error) { } return read, nil } + +func (f *fileReader) WriteTo(w io.Writer) (int64, error) { + if f.fragOnly { + n, err := w.Write(f.fragmentData) + return int64(n), err + } + if !f.fragged { + return f.data.WriteTo(w) + } + n, err := f.data.WriteTo(w) + if err != nil { + return int64(n), err + } + nn, err := w.Write(f.fragmentData) + return int64(nn) + n, err +} diff --git a/internal/compression/gzip.go b/internal/compression/gzip.go index a5cc629..cdc64fd 100644 --- a/internal/compression/gzip.go +++ b/internal/compression/gzip.go @@ -15,6 +15,7 @@ type gzipInit struct { //Gzip is a decompressor for gzip type compression. Uses zlib for compression and decompression type Gzip struct { + wrt *zlib.Writer gzipInit HasCustomWindow bool HasStrategies bool @@ -50,6 +51,17 @@ func (g *Gzip) Decompress(r io.Reader) ([]byte, error) { //Compress compresses the given data (as a byte array) and returns the compressed data. func (g *Gzip) Compress(data []byte) ([]byte, error) { var buf bytes.Buffer + var err error + if g.wrt == nil { + if g.CompressionLevel == 0 { + g.wrt = zlib.NewWriter(&buf) + } else { + g.wrt, err = zlib.NewWriterLevel(&buf, int(g.CompressionLevel)) + if err != nil { + return nil, err + } + } + } wrt, err := zlib.NewWriterLevel(&buf, int(g.CompressionLevel)) if err != nil { return nil, err diff --git a/squash_test.go b/squash_test.go index fd105af..3633a6e 100644 --- a/squash_test.go +++ b/squash_test.go @@ -56,7 +56,10 @@ func TestAppImage(t *testing.T) { } aiFil, err := os.Open(wd + "/testing/" + appImageName) if os.IsNotExist(err) { - downloadTestAppImage(t, wd+"/testing") + err = downloadTestAppImage(wd + "/testing") + if err != nil { + t.Fatal(err) + } aiFil, err = os.Open(wd + "/testing/" + appImageName) if err != nil { t.Fatal(err) @@ -66,13 +69,13 @@ func TestAppImage(t *testing.T) { } defer aiFil.Close() stat, _ := aiFil.Stat() + os.RemoveAll(wd + "/testing/firefox") ai := goappimage.NewAppImage(wd + "/testing/" + appImageName) + start := time.Now() rdr, err := NewSquashfsReader(io.NewSectionReader(aiFil, ai.Offset, stat.Size()-ai.Offset)) if err != nil { t.Fatal(err) } - start := time.Now() - // os.RemoveAll(wd + "testing/firefox") errs := rdr.ExtractTo(wd + "/testing/firefox") if len(errs) > 0 { t.Fatal(errs) @@ -93,7 +96,10 @@ func TestUnsquashfs(t *testing.T) { } aiFil, err := os.Open(wd + "/testing/" + appImageName) if os.IsNotExist(err) { - downloadTestAppImage(t, wd+"/testing") + err = downloadTestAppImage(wd + "/testing") + if err != nil { + t.Fatal(err) + } aiFil, err = os.Open(wd + "/testing/" + appImageName) if err != nil { t.Fatal(err) @@ -101,6 +107,8 @@ func TestUnsquashfs(t *testing.T) { } else if err != nil { t.Fatal(err) } + os.RemoveAll(wd + "/testing/unsquashFirefox") + os.RemoveAll(wd + "/testing/firefox") ai := goappimage.NewAppImage(wd + "/testing/" + appImageName) fmt.Println("Command:", "unsquashfs", "-d", wd+"/testing/unsquashFirefox", "-o", strconv.Itoa(int(ai.Offset)), aiFil.Name()) cmd := exec.Command("unsquashfs", "-d", wd+"/testing/unsquashFirefox", "-o", strconv.Itoa(int(ai.Offset)), aiFil.Name()) @@ -113,12 +121,56 @@ func TestUnsquashfs(t *testing.T) { t.Fatal("HI") } -func downloadTestAppImage(t *testing.T, dir string) { +func BenchmarkDragRace(b *testing.B) { + wd, err := os.Getwd() + if err != nil { + b.Fatal(err) + } + aiFil, err := os.Open(wd + "/testing/" + appImageName) + if os.IsNotExist(err) { + err = downloadTestAppImage(wd + "/testing") + if err != nil { + b.Fatal(err) + } + aiFil, err = os.Open(wd + "/testing/" + appImageName) + if err != nil { + b.Fatal(err) + } + } else if err != nil { + b.Fatal(err) + } + stat, _ := aiFil.Stat() + ai := goappimage.NewAppImage(wd + "/testing/" + appImageName) + os.RemoveAll(wd + "/testing/unsquashFirefox") + os.RemoveAll(wd + "/testing/firefox") + cmd := exec.Command("unsquashfs", "-d", wd+"/testing/unsquashFirefox", "-o", strconv.Itoa(int(ai.Offset)), aiFil.Name()) + start := time.Now() + err = cmd.Run() + if err != nil { + b.Fatal(err) + } + unsquashTime := time.Since(start) + start = time.Now() + rdr, err := NewSquashfsReader(io.NewSectionReader(aiFil, ai.Offset, stat.Size()-ai.Offset)) + if err != nil { + b.Fatal(err) + } + errs := rdr.ExtractTo(wd + "/testing/firefox") + if len(errs) > 0 { + b.Fatal(errs) + } + libTime := time.Since(start) + b.Log("Unsqushfs:", unsquashTime.Round(time.Millisecond)) + b.Log("Library:", libTime.Round(time.Millisecond)) + b.Log("unsquashfs is " + strconv.FormatFloat(float64(libTime.Milliseconds())/float64(unsquashTime.Milliseconds()), 'f', 2, 64) + "x faster") +} + +func downloadTestAppImage(dir string) error { //seems to time out on slow connections. Might fix that at some point... or not. It's just a test... os.Mkdir(dir, os.ModePerm) appImage, err := os.Create(dir + "/" + appImageName) if err != nil { - t.Fatal(err) + return err } defer appImage.Close() check := http.Client{ @@ -129,13 +181,14 @@ func downloadTestAppImage(t *testing.T, dir string) { } resp, err := check.Get(downloadURL) if err != nil { - t.Fatal(err) + return err } defer resp.Body.Close() _, err = io.Copy(appImage, resp.Body) if err != nil { - t.Fatal(err) + return err } + return nil } func TestCreateSquashFromAppImage(t *testing.T) { @@ -149,7 +202,10 @@ func TestCreateSquashFromAppImage(t *testing.T) { } _, err = os.Open(wd + "/testing/" + appImageName) if os.IsNotExist(err) { - downloadTestAppImage(t, wd+"/testing") + err = downloadTestAppImage(wd + "/testing") + if err != nil { + t.Fatal(err) + } _, err = os.Open(wd + "/testing/" + appImageName) if err != nil { t.Fatal(err)