Implemented WriteTo which halves decompress times.

Added a drag race benchmark (for the fun of it)
This commit is contained in:
Caleb Gardner
2021-01-10 03:33:33 -06:00
parent ee9406513c
commit 76649fde7f
6 changed files with 221 additions and 59 deletions
+4
View File
@@ -11,4 +11,8 @@ The only major thing missing from squashfs reading is Xattr parsing.
Special thanks to <https://dr-emann.github.io/squashfs/> for some VERY important information in an easy to understand format. Special thanks to <https://dr-emann.github.io/squashfs/> for some VERY important information in an easy to understand format.
Thanks also to [distri's squashfs library](https://github.com/distr1/distri/tree/master/internal/squashfs) as I referenced it to figure some things out (and double check others). Thanks also to [distri's squashfs library](https://github.com/distr1/distri/tree/master/internal/squashfs) as I referenced it to figure some things out (and double check others).
## Performane
This library, decompressing the firefox AppImage and using go tests, takes about twice as long as `unsquashfs` on my quad core laptop. (~1 second with the libarary and about half a second with `unsquashfs`)
## [TODO](https://github.com/CalebQ42/squashfs/projects/1?fullscreen=true) ## [TODO](https://github.com/CalebQ42/squashfs/projects/1?fullscreen=true)
+111 -48
View File
@@ -18,39 +18,19 @@ var (
//DataReader reads data from data blocks. //DataReader reads data from data blocks.
type dataReader struct { type dataReader struct {
r *Reader r *Reader
blocks []dataBlock
curData []byte curData []byte
sizes []uint32
offset int64 //offset relative to the beginning of the squash file offset int64 //offset relative to the beginning of the squash file
curBlock int //Which block in sizes is currently cached curBlock int //Which block in sizes is currently cached
curReadOffset int //offset relative to the currently cached data curReadOffset int //offset relative to the currently cached data
} }
//DataBlock holds info about a given data block from it's size
type dataBlock struct {
begOffset int64 //The offset relative to the beginning of the squash file. Makes it easier to seek to it.
size uint32
compressed bool
uncompressedSize uint32
}
//NewDataBlock creates a new squashfs.datablock from a given size.
func newDataBlock(raw uint32) (dbs dataBlock) {
dbs.compressed = raw&(1<<24) != (1 << 24)
dbs.size = raw &^ (1 << 24)
if !dbs.compressed {
dbs.uncompressedSize = dbs.size
}
return
}
//NewDataReader creates a new data reader at the given offset, with the blocks defined by sizes //NewDataReader creates a new data reader at the given offset, with the blocks defined by sizes
func (r *Reader) newDataReader(offset int64, sizes []uint32) (*dataReader, error) { func (r *Reader) newDataReader(offset int64, sizes []uint32) (*dataReader, error) {
var dr dataReader var dr dataReader
dr.r = r dr.r = r
dr.offset = offset dr.offset = offset
for _, size := range sizes { dr.sizes = sizes
dr.blocks = append(dr.blocks, newDataBlock(size))
}
err := dr.readCurBlock() err := dr.readCurBlock()
if err != nil { if err != nil {
return nil, err return nil, err
@@ -70,10 +50,10 @@ func (r *Reader) newDataReaderFromInode(i *inode.Inode) (*dataReader, error) {
} }
rdr.offset = int64(fil.BlockStart) rdr.offset = int64(fil.BlockStart)
for _, sizes := range fil.BlockSizes { for _, sizes := range fil.BlockSizes {
rdr.blocks = append(rdr.blocks, newDataBlock(sizes)) rdr.sizes = append(rdr.sizes, sizes)
} }
if fil.Fragmented { if fil.Fragmented {
rdr.blocks = rdr.blocks[:len(rdr.blocks)-1] rdr.sizes = rdr.sizes[:len(rdr.sizes)-1]
} }
case inode.ExtFileType: case inode.ExtFileType:
fil := i.Info.(inode.ExtFile) fil := i.Info.(inode.ExtFile)
@@ -82,10 +62,10 @@ func (r *Reader) newDataReaderFromInode(i *inode.Inode) (*dataReader, error) {
} }
rdr.offset = int64(fil.BlockStart) rdr.offset = int64(fil.BlockStart)
for _, sizes := range fil.BlockSizes { for _, sizes := range fil.BlockSizes {
rdr.blocks = append(rdr.blocks, newDataBlock(sizes)) rdr.sizes = append(rdr.sizes, sizes)
} }
if fil.Fragmented { if fil.Fragmented {
rdr.blocks = rdr.blocks[:len(rdr.blocks)-1] rdr.sizes = rdr.sizes[:len(rdr.sizes)-1]
} }
default: default:
return nil, errInodeNotFile return nil, errInodeNotFile
@@ -97,9 +77,14 @@ func (r *Reader) newDataReaderFromInode(i *inode.Inode) (*dataReader, error) {
return &rdr, nil return &rdr, nil
} }
//removed the compression bit from a data block size
func actualDataSize(size uint32) uint32 {
return size &^ (1 << 24)
}
func (d *dataReader) readNextBlock() error { func (d *dataReader) readNextBlock() error {
d.curBlock++ d.curBlock++
if d.curBlock >= len(d.blocks) { if d.curBlock >= len(d.sizes) {
d.curBlock-- d.curBlock--
return io.EOF return io.EOF
} }
@@ -112,37 +97,47 @@ func (d *dataReader) readNextBlock() error {
return nil return nil
} }
func (d *dataReader) readCurBlock() error { func (d *dataReader) readBlockAt(offset int64, size uint32) ([]byte, error) {
if d.curBlock >= len(d.blocks) { compressed := size&(1<<24) != (1 << 24)
return io.EOF size = size &^ (1 << 24)
if d.sizes[d.curBlock] == 0 {
return make([]byte, d.r.super.BlockSize), nil
} }
if d.blocks[d.curBlock].size == 0 { sec := io.NewSectionReader(d.r.r, offset, int64(size))
d.curData = make([]byte, d.r.super.BlockSize) if compressed {
d.blocks[d.curBlock].uncompressedSize = d.r.super.BlockSize
d.blocks[d.curBlock].begOffset = d.offset
return nil
}
sec := io.NewSectionReader(d.r.r, d.offset, int64(d.blocks[d.curBlock].size))
if d.blocks[d.curBlock].compressed {
btys, err := d.r.decompressor.Decompress(sec) btys, err := d.r.decompressor.Decompress(sec)
if err != nil { if err != nil {
return err return nil, err
} }
d.blocks[d.curBlock].uncompressedSize = uint32(len(btys)) return btys, nil
d.curData = btys
d.blocks[d.curBlock].begOffset = d.offset
d.offset += int64(d.blocks[d.curBlock].size)
return nil
} }
var buf bytes.Buffer var buf bytes.Buffer
_, err := io.Copy(&buf, sec) _, err := io.Copy(&buf, sec)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func (d *dataReader) offsetForBlock(index int) int64 {
out := d.offset
for i := 0; i < index; i++ {
out += int64(actualDataSize(d.sizes[i]))
}
return out
}
func (d *dataReader) readCurBlock() error {
if d.curBlock >= len(d.sizes) {
return io.EOF
}
offset := d.offsetForBlock(d.curBlock)
data, err := d.readBlockAt(offset, d.sizes[d.curBlock])
if err != nil { if err != nil {
return err return err
} }
d.curData = buf.Bytes() d.curData = data
d.blocks[d.curBlock].begOffset = d.offset return nil
d.offset += int64(d.blocks[d.curBlock].size)
return err
} }
func (d *dataReader) Read(p []byte) (int, error) { func (d *dataReader) Read(p []byte) (int, error) {
@@ -182,3 +177,71 @@ func (d *dataReader) Read(p []byte) (int, error) {
} }
return read, nil return read, nil
} }
// WriteTo writes all the data in the datablock to the writer. MUST BE USED ON A FRESH DATA READER.
func (d *dataReader) WriteTo(w io.Writer) (int64, error) {
type dataCache struct {
err error
data []byte
index int
}
dataChan := make(chan *dataCache)
for i := range d.sizes {
go func(index int, c chan *dataCache) {
var cache dataCache
cache.index = index
defer func() {
c <- &cache
}()
data, err := d.readBlockAt(d.offsetForBlock(index), d.sizes[index])
if err != nil {
cache.err = err
return
}
cache.data = data
return
}(i, dataChan)
}
curIndex := 0
totalWrite := int64(0)
var backlog []*dataCache
mainLoop:
for {
if curIndex == len(d.sizes) {
return totalWrite, nil
}
if len(backlog) > 0 {
for i, cache := range backlog {
if cache.index == curIndex {
writen, err := w.Write(cache.data)
totalWrite += int64(writen)
if err != nil {
return totalWrite, err
}
if len(backlog) > 0 {
backlog[i] = backlog[len(backlog)-1]
backlog = backlog[:len(backlog)-1]
} else {
backlog = nil
}
curIndex++
continue mainLoop
}
}
}
cache := <-dataChan
if cache.err != nil {
return totalWrite, cache.err
}
if cache.index == curIndex {
writen, err := w.Write(cache.data)
totalWrite += int64(writen)
if err != nil {
return totalWrite, err
}
} else {
backlog = append(backlog, cache)
}
curIndex++
}
}
+13 -2
View File
@@ -398,7 +398,18 @@ func (f *File) ExtractWithOptions(path string, dereferenceSymlink, unbreakSymlin
errs = append(errs, err) errs = append(errs, err)
return return
} //Since we will be reading from the file } //Since we will be reading from the file
_, err = io.Copy(fil, f) if f.Reader == nil && f.r != nil {
f.Reader, err = f.r.newFileReader(f.in)
if err != nil {
if verbose {
fmt.Println("Error while Copying data to:", path+"/"+f.name)
fmt.Println(err)
}
errs = append(errs, err)
return
}
}
_, err = io.Copy(fil, f.Reader)
if err != nil { if err != nil {
if verbose { if verbose {
fmt.Println("Error while Copying data to:", path+"/"+f.name) fmt.Println("Error while Copying data to:", path+"/"+f.name)
@@ -480,7 +491,7 @@ func (f *File) ExtractWithOptions(path string, dereferenceSymlink, unbreakSymlin
//Read from the file. Doesn't do anything fancy, just pases it to the underlying io.Reader. If a directory, return io.EOF. //Read from the file. Doesn't do anything fancy, just pases it to the underlying io.Reader. If a directory, return io.EOF.
func (f *File) Read(p []byte) (int, error) { func (f *File) Read(p []byte) (int, error) {
if f.IsDir() { if !f.IsFile() {
return 0, io.EOF return 0, io.EOF
} }
var err error var err error
+16
View File
@@ -83,3 +83,19 @@ func (f *fileReader) Read(p []byte) (int, error) {
} }
return read, nil return read, nil
} }
func (f *fileReader) WriteTo(w io.Writer) (int64, error) {
if f.fragOnly {
n, err := w.Write(f.fragmentData)
return int64(n), err
}
if !f.fragged {
return f.data.WriteTo(w)
}
n, err := f.data.WriteTo(w)
if err != nil {
return int64(n), err
}
nn, err := w.Write(f.fragmentData)
return int64(nn) + n, err
}
+12
View File
@@ -15,6 +15,7 @@ type gzipInit struct {
//Gzip is a decompressor for gzip type compression. Uses zlib for compression and decompression //Gzip is a decompressor for gzip type compression. Uses zlib for compression and decompression
type Gzip struct { type Gzip struct {
wrt *zlib.Writer
gzipInit gzipInit
HasCustomWindow bool HasCustomWindow bool
HasStrategies bool HasStrategies bool
@@ -50,6 +51,17 @@ func (g *Gzip) Decompress(r io.Reader) ([]byte, error) {
//Compress compresses the given data (as a byte array) and returns the compressed data. //Compress compresses the given data (as a byte array) and returns the compressed data.
func (g *Gzip) Compress(data []byte) ([]byte, error) { func (g *Gzip) Compress(data []byte) ([]byte, error) {
var buf bytes.Buffer var buf bytes.Buffer
var err error
if g.wrt == nil {
if g.CompressionLevel == 0 {
g.wrt = zlib.NewWriter(&buf)
} else {
g.wrt, err = zlib.NewWriterLevel(&buf, int(g.CompressionLevel))
if err != nil {
return nil, err
}
}
}
wrt, err := zlib.NewWriterLevel(&buf, int(g.CompressionLevel)) wrt, err := zlib.NewWriterLevel(&buf, int(g.CompressionLevel))
if err != nil { if err != nil {
return nil, err return nil, err
+65 -9
View File
@@ -56,7 +56,10 @@ func TestAppImage(t *testing.T) {
} }
aiFil, err := os.Open(wd + "/testing/" + appImageName) aiFil, err := os.Open(wd + "/testing/" + appImageName)
if os.IsNotExist(err) { if os.IsNotExist(err) {
downloadTestAppImage(t, wd+"/testing") err = downloadTestAppImage(wd + "/testing")
if err != nil {
t.Fatal(err)
}
aiFil, err = os.Open(wd + "/testing/" + appImageName) aiFil, err = os.Open(wd + "/testing/" + appImageName)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@@ -66,13 +69,13 @@ func TestAppImage(t *testing.T) {
} }
defer aiFil.Close() defer aiFil.Close()
stat, _ := aiFil.Stat() stat, _ := aiFil.Stat()
os.RemoveAll(wd + "/testing/firefox")
ai := goappimage.NewAppImage(wd + "/testing/" + appImageName) ai := goappimage.NewAppImage(wd + "/testing/" + appImageName)
start := time.Now()
rdr, err := NewSquashfsReader(io.NewSectionReader(aiFil, ai.Offset, stat.Size()-ai.Offset)) rdr, err := NewSquashfsReader(io.NewSectionReader(aiFil, ai.Offset, stat.Size()-ai.Offset))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
start := time.Now()
// os.RemoveAll(wd + "testing/firefox")
errs := rdr.ExtractTo(wd + "/testing/firefox") errs := rdr.ExtractTo(wd + "/testing/firefox")
if len(errs) > 0 { if len(errs) > 0 {
t.Fatal(errs) t.Fatal(errs)
@@ -93,7 +96,10 @@ func TestUnsquashfs(t *testing.T) {
} }
aiFil, err := os.Open(wd + "/testing/" + appImageName) aiFil, err := os.Open(wd + "/testing/" + appImageName)
if os.IsNotExist(err) { if os.IsNotExist(err) {
downloadTestAppImage(t, wd+"/testing") err = downloadTestAppImage(wd + "/testing")
if err != nil {
t.Fatal(err)
}
aiFil, err = os.Open(wd + "/testing/" + appImageName) aiFil, err = os.Open(wd + "/testing/" + appImageName)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@@ -101,6 +107,8 @@ func TestUnsquashfs(t *testing.T) {
} else if err != nil { } else if err != nil {
t.Fatal(err) t.Fatal(err)
} }
os.RemoveAll(wd + "/testing/unsquashFirefox")
os.RemoveAll(wd + "/testing/firefox")
ai := goappimage.NewAppImage(wd + "/testing/" + appImageName) ai := goappimage.NewAppImage(wd + "/testing/" + appImageName)
fmt.Println("Command:", "unsquashfs", "-d", wd+"/testing/unsquashFirefox", "-o", strconv.Itoa(int(ai.Offset)), aiFil.Name()) fmt.Println("Command:", "unsquashfs", "-d", wd+"/testing/unsquashFirefox", "-o", strconv.Itoa(int(ai.Offset)), aiFil.Name())
cmd := exec.Command("unsquashfs", "-d", wd+"/testing/unsquashFirefox", "-o", strconv.Itoa(int(ai.Offset)), aiFil.Name()) cmd := exec.Command("unsquashfs", "-d", wd+"/testing/unsquashFirefox", "-o", strconv.Itoa(int(ai.Offset)), aiFil.Name())
@@ -113,12 +121,56 @@ func TestUnsquashfs(t *testing.T) {
t.Fatal("HI") t.Fatal("HI")
} }
func downloadTestAppImage(t *testing.T, dir string) { func BenchmarkDragRace(b *testing.B) {
wd, err := os.Getwd()
if err != nil {
b.Fatal(err)
}
aiFil, err := os.Open(wd + "/testing/" + appImageName)
if os.IsNotExist(err) {
err = downloadTestAppImage(wd + "/testing")
if err != nil {
b.Fatal(err)
}
aiFil, err = os.Open(wd + "/testing/" + appImageName)
if err != nil {
b.Fatal(err)
}
} else if err != nil {
b.Fatal(err)
}
stat, _ := aiFil.Stat()
ai := goappimage.NewAppImage(wd + "/testing/" + appImageName)
os.RemoveAll(wd + "/testing/unsquashFirefox")
os.RemoveAll(wd + "/testing/firefox")
cmd := exec.Command("unsquashfs", "-d", wd+"/testing/unsquashFirefox", "-o", strconv.Itoa(int(ai.Offset)), aiFil.Name())
start := time.Now()
err = cmd.Run()
if err != nil {
b.Fatal(err)
}
unsquashTime := time.Since(start)
start = time.Now()
rdr, err := NewSquashfsReader(io.NewSectionReader(aiFil, ai.Offset, stat.Size()-ai.Offset))
if err != nil {
b.Fatal(err)
}
errs := rdr.ExtractTo(wd + "/testing/firefox")
if len(errs) > 0 {
b.Fatal(errs)
}
libTime := time.Since(start)
b.Log("Unsqushfs:", unsquashTime.Round(time.Millisecond))
b.Log("Library:", libTime.Round(time.Millisecond))
b.Log("unsquashfs is " + strconv.FormatFloat(float64(libTime.Milliseconds())/float64(unsquashTime.Milliseconds()), 'f', 2, 64) + "x faster")
}
func downloadTestAppImage(dir string) error {
//seems to time out on slow connections. Might fix that at some point... or not. It's just a test... //seems to time out on slow connections. Might fix that at some point... or not. It's just a test...
os.Mkdir(dir, os.ModePerm) os.Mkdir(dir, os.ModePerm)
appImage, err := os.Create(dir + "/" + appImageName) appImage, err := os.Create(dir + "/" + appImageName)
if err != nil { if err != nil {
t.Fatal(err) return err
} }
defer appImage.Close() defer appImage.Close()
check := http.Client{ check := http.Client{
@@ -129,13 +181,14 @@ func downloadTestAppImage(t *testing.T, dir string) {
} }
resp, err := check.Get(downloadURL) resp, err := check.Get(downloadURL)
if err != nil { if err != nil {
t.Fatal(err) return err
} }
defer resp.Body.Close() defer resp.Body.Close()
_, err = io.Copy(appImage, resp.Body) _, err = io.Copy(appImage, resp.Body)
if err != nil { if err != nil {
t.Fatal(err) return err
} }
return nil
} }
func TestCreateSquashFromAppImage(t *testing.T) { func TestCreateSquashFromAppImage(t *testing.T) {
@@ -149,7 +202,10 @@ func TestCreateSquashFromAppImage(t *testing.T) {
} }
_, err = os.Open(wd + "/testing/" + appImageName) _, err = os.Open(wd + "/testing/" + appImageName)
if os.IsNotExist(err) { if os.IsNotExist(err) {
downloadTestAppImage(t, wd+"/testing") err = downloadTestAppImage(wd + "/testing")
if err != nil {
t.Fatal(err)
}
_, err = os.Open(wd + "/testing/" + appImageName) _, err = os.Open(wd + "/testing/" + appImageName)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)