diff --git a/file.go b/file.go index 8c99b27..b06819c 100644 --- a/file.go +++ b/file.go @@ -13,14 +13,13 @@ import ( "github.com/CalebQ42/squashfs/internal/routinemanager" squashfslow "github.com/CalebQ42/squashfs/low" - "github.com/CalebQ42/squashfs/low/data" "github.com/CalebQ42/squashfs/low/inode" ) // File represents a file inside a squashfs archive. type File struct { - full data.FullReader - rdr data.Reader + // full data.FullReader + // rdr data.Reader rdrInit bool parent FS r *Reader diff --git a/low/data/fullreader.go b/low/data/fullreader.go index 8ab50d6..59c9e9a 100644 --- a/low/data/fullreader.go +++ b/low/data/fullreader.go @@ -3,258 +3,79 @@ package data import ( "errors" "io" - "io/fs" - "math" - "runtime" - "sync" "github.com/CalebQ42/squashfs/internal/decompress" ) -type FragReaderConstructor func() (io.Reader, error) - type FullReader struct { - r io.ReaderAt - d decompress.Decompressor - frag FragReaderConstructor - sizes []uint32 - initialOffset int64 - finalBlockSize uint64 - blockSize uint32 - goroutineLimit uint16 - closed bool + fileSize uint64 + blockSize uint32 + rdr io.ReaderAt + decomp decompress.Decompressor + sizes []uint32 + blockOffsets []uint64 + fragDat []byte } -func NewFullReader(r io.ReaderAt, initialOffset int64, d decompress.Decompressor, sizes []uint32, finalBlockSize uint64, blockSize uint32) FullReader { - return FullReader{ - r: r, - d: d, - sizes: sizes, - initialOffset: initialOffset, - goroutineLimit: uint16(runtime.NumCPU()), - finalBlockSize: finalBlockSize, - blockSize: blockSize, +func NewFullReader(rdr io.ReaderAt, decomp decompress.Decompressor, size uint64, start uint64, blockSizes []uint32) FullReader { + out := FullReader{ + fileSize: size, + rdr: rdr, + decomp: decomp, + sizes: blockSizes, } + out.blockOffsets = make([]uint64, len(blockSizes)) + curOffset := start + for i := range blockSizes { + out.blockOffsets[i] = curOffset + curOffset += uint64(blockSizes[i]) &^ (1 << 24) + } + return out } -func (r *FullReader) Close() error { - r.closed = true - r.r = nil - r.d = nil - r.frag = nil - r.sizes = nil +func (f *FullReader) AddFragData(blockStart uint64, offset uint32, blockSize uint32) error { + realSize := blockSize &^ (1 << 24) + dat := make([]byte, realSize) + _, err := f.rdr.ReadAt(dat, int64(blockStart)) + if err != nil { + return err + } + if blockSize == realSize { + dat, err = f.decomp.Decompress(dat) + if err != nil { + return err + } + } + f.fragDat = dat[offset : offset+uint32(f.fileSize%uint64(f.blockSize))] return nil } -func (r *FullReader) AddFrag(frag FragReaderConstructor) { - r.frag = frag -} - -func (r *FullReader) SetGoroutineLimit(limit uint16) { - if limit <= 0 { - r.goroutineLimit = 1 +// Returns the data block at the given index +func (f FullReader) Block(i int) ([]byte, error) { + if i == len(f.sizes) && f.fragDat != nil { + return f.fragDat, nil } - r.goroutineLimit = limit -} - -type retValue struct { - err error - data []byte - index uint64 -} - -func (r FullReader) process(index uint64, fileOffset uint64, pool *sync.Pool, retChan chan *retValue) { - ret := pool.Get().(*retValue) - ret.index = index - realSize := r.sizes[index] &^ (1 << 24) + if i >= len(f.sizes) { + return nil, errors.New("invalid block index") + } + realSize := f.sizes[i] &^ (1 << 24) if realSize == 0 { - if index == uint64(len(r.sizes))-1 && r.frag == nil { - ret.data = make([]byte, r.finalBlockSize) - } else { - ret.data = make([]byte, r.blockSize) + if i == len(f.sizes)-1 && f.fragDat == nil { + return make([]byte, f.fileSize%uint64(f.blockSize)), nil } - ret.err = nil - retChan <- ret - return + return make([]byte, f.blockSize), nil } - ret.data = make([]byte, realSize) - _, ret.err = r.r.ReadAt(ret.data, r.initialOffset+int64(fileOffset)) - if r.sizes[index] == realSize { - ret.data, ret.err = r.d.Decompress(ret.data) + dat := make([]byte, realSize) + _, err := f.rdr.ReadAt(dat, int64(f.blockOffsets[i])) + if err != nil { + return nil, err } - retChan <- ret + if realSize == f.sizes[i] { + return f.decomp.Decompress(dat) + } + return dat, nil } -func (r FullReader) WriteTo(w io.Writer) (int64, error) { - if r.closed { - return 0, fs.ErrClosed - } - // if wa, is := w.(io.WriterAt); is { - // return r.writeToWriteAt(wa) - // } - var curIndex uint64 - var curOffset uint64 - var toProcess uint16 - var wrote int64 - cache := make(map[uint64]*retValue) - var errCache []error - retChan := make(chan *retValue, r.goroutineLimit) - pool := &sync.Pool{ - New: func() any { - return &retValue{} - }, - } - for i := uint64(0); i < uint64(math.Ceil(float64(len(r.sizes))/float64(r.goroutineLimit))); i++ { - toProcess = min(uint16(len(r.sizes))-(uint16(i)*r.goroutineLimit), r.goroutineLimit) - // Start all the goroutines - for j := uint16(0); j < toProcess; j++ { - go r.process((i*uint64(r.goroutineLimit))+uint64(j), curOffset, pool, retChan) - curOffset += uint64(r.sizes[(i*uint64(r.goroutineLimit))+uint64(j)]) &^ (1 << 24) - } - // Then consume the results on retChan - for j := uint16(0); j < toProcess; j++ { - res := <-retChan - // If there's an error, we don't care about the results. - if res.err != nil { - errCache = append(errCache, res.err) - if len(cache) > 0 { - clear(cache) - } - continue - } - // If there has been an error previously, we don't care about the results. - // We still want to wait for all the goroutines to prevent resources being wasted. - if len(errCache) > 0 { - continue - } - // If we don't need the data yet, we cache it and move on - if res.index != curIndex { - cache[res.index] = res - continue - } - // If we do need the data, we write it - wr, err := w.Write(res.data) - wrote += int64(wr) - if err != nil { - errCache = append(errCache, err) - if len(cache) > 0 { - clear(cache) - } - continue - } - pool.Put(res) - curIndex++ - // Now we recursively try to clear the cache - for len(cache) > 0 { - res, ok := cache[curIndex] - if !ok { - break - } - wr, err := w.Write(res.data) - wrote += int64(wr) - if err != nil { - errCache = append(errCache, err) - if len(cache) > 0 { - clear(cache) - } - break - } - delete(cache, curIndex) - pool.Put(res) - curIndex++ - } - } - if len(errCache) > 0 { - return wrote, errors.Join(errCache...) - } - } - if r.frag != nil { - rdr, err := r.frag() - if err != nil { - return wrote, err - } - wr, err := io.Copy(w, rdr) - wrote += wr - if l, ok := rdr.(*io.LimitedReader); ok { - if cl, ok := l.R.(io.Closer); ok { - cl.Close() - } - } - if err != nil { - return wrote, err - } - } - return wrote, nil -} +func (f FullReader) WriteTo(w io.Writer) (int64, error) { -// func (r FullReader) writeToWriteAt(w io.WriterAt) (out int64, outErr error) { -// wait := &sync.WaitGroup{} -// wait.Add(len(r.sizes)) -// mgr := routinemanager.NewManager(r.goroutineLimit) -// curOffset := r.initialOffset -// for i := uint64(0); i < uint64(len(r.sizes)); i++ { -// go func(index uint64, fileOffset int64) { -// lckNum := mgr.Lock() -// defer mgr.Unlock(lckNum) -// defer wait.Done() -// realSize := r.sizes[index] &^ (1 << 24) -// if realSize == 0 { -// if index == uint64(len(r.sizes))-1 && r.frag == nil { -// _, err := w.WriteAt([]byte{0}, int64((uint64(r.blockSize)*index)+r.finalBlockSize)-1) -// if err != nil { -// outErr = errors.Join(outErr, err) -// return -// } -// out = max(out, int64((uint64(r.blockSize)*index)+r.finalBlockSize)) -// } -// return -// } -// data := make([]byte, realSize) -// err := binary.Read(toreader.NewReader(r.r, int64(fileOffset)), binary.LittleEndian, &data) -// if err != nil { -// outErr = errors.Join(outErr, err) -// return -// } -// if r.sizes[index] == realSize { -// data, err = r.d.Decompress(data) -// } -// if err != nil { -// outErr = errors.Join(outErr, err) -// return -// } -// _, err = w.WriteAt(data, int64(uint64(r.blockSize)*index)) -// if err != nil { -// outErr = errors.Join(outErr, err) -// return -// } -// out = max(out, int64(uint64(r.blockSize)*(index+1))) -// }(i, curOffset) -// curOffset += int64(r.sizes[i]) &^ (1 << 24) -// } -// if r.frag != nil { -// wait.Add(1) -// go func() { -// lckNum := mgr.Lock() -// defer mgr.Unlock(lckNum) -// defer wait.Done() -// rdr, err := r.frag() -// if err != nil { -// outErr = errors.Join(outErr, err) -// return -// } -// dat, err := io.ReadAll(rdr) -// if err != nil { -// outErr = errors.Join(outErr, err) -// return -// } -// _, err = w.WriteAt(dat, int64(int(r.blockSize)*len(r.sizes))) -// if err != nil { -// outErr = errors.Join(outErr, err) -// return -// } -// out = int64(int(r.blockSize)*len(r.sizes)) + int64(r.finalBlockSize) -// }() -// } -// wait.Wait() -// return -// } +} diff --git a/low/data/reader.go b/low/data/reader.go index 898ab09..0ad59c2 100644 --- a/low/data/reader.go +++ b/low/data/reader.go @@ -1,104 +1 @@ package data - -import ( - "io" - "io/fs" - - "github.com/CalebQ42/squashfs/internal/decompress" -) - -type Reader struct { - r io.Reader - d decompress.Decompressor - frag io.Reader - sizes []uint32 - dat []byte - curOffset int - curIndex uint64 - finalBlockSize uint64 - blockSize uint32 - closed bool -} - -func NewReader(r io.Reader, d decompress.Decompressor, sizes []uint32, finalBlockSize uint64, blockSize uint32) Reader { - return Reader{ - r: r, - d: d, - sizes: sizes, - finalBlockSize: finalBlockSize, - blockSize: blockSize, - } -} - -func (r *Reader) AddFrag(fragRdr io.Reader) { - r.frag = fragRdr -} - -func (r *Reader) advance() error { - r.curOffset = 0 - defer func() { r.curIndex++ }() - var err error - if r.curIndex == uint64(len(r.sizes)) && r.frag != nil { - r.dat, err = io.ReadAll(r.frag) - return err - } else if r.curIndex >= uint64(len(r.sizes)) { - r.dat = []byte{} - return io.EOF - } - realSize := r.sizes[r.curIndex] &^ (1 << 24) - if realSize == 0 { - if r.curIndex == uint64(len(r.sizes))-1 && r.frag == nil { - r.dat = make([]byte, r.finalBlockSize) - } else { - r.dat = make([]byte, r.blockSize) - } - return nil - } - r.dat = make([]byte, realSize) - _, err = r.r.Read(r.dat) - if err != nil { - return err - } - if r.sizes[r.curIndex] != realSize { - return nil - } - r.dat, err = r.d.Decompress(r.dat) - return err -} - -func (r *Reader) Read(b []byte) (int, error) { - if r.closed { - return 0, fs.ErrClosed - } - curRead := 0 - var toRead int - for curRead < len(b) { - if r.curOffset >= len(r.dat) { - if err := r.advance(); err != nil { - return curRead, err - } - } - toRead = min(len(b)-curRead, len(r.dat)-r.curOffset) - toRead = copy(b[curRead:], r.dat[r.curOffset:r.curOffset+toRead]) - r.curOffset += toRead - curRead += toRead - } - return curRead, nil -} - -func (r *Reader) Close() error { - r.closed = true - r.r = nil - r.d = nil - if r.frag != nil { - if l, ok := r.frag.(*io.LimitedReader); ok { - if cl, ok := l.R.(io.Closer); ok { - cl.Close() - } - } - } - r.frag = nil - r.sizes = nil - r.dat = nil - return nil -} diff --git a/low/directory/directory.go b/low/directory/directory.go index be58199..47abff0 100644 --- a/low/directory/directory.go +++ b/low/directory/directory.go @@ -11,12 +11,45 @@ type header struct { Num uint32 } -type decEntry struct { +func readHeader(r io.Reader) (h header, err error) { + dat := make([]byte, 12) + _, err = r.Read(dat) + if err != nil { + return + } + h.Count = binary.LittleEndian.Uint32(dat) + h.BlockStart = binary.LittleEndian.Uint32(dat[4:]) + h.Num = binary.LittleEndian.Uint32(dat[8:]) + return +} + +type dirEntry struct { Offset uint16 NumOffset int16 InodeType uint16 NameSize uint16 - // Name []byte (not decoded along with decEntry) + Name []byte +} + +func readEntry(r io.Reader) (e dirEntry, err error) { + dat := make([]byte, 8) + _, err = r.Read(dat) + if err != nil { + return + } + e.Offset = binary.LittleEndian.Uint16(dat) + _, err = binary.Decode(dat[2:], binary.LittleEndian, &e.NumOffset) + if err != nil { + return + } + e.InodeType = binary.LittleEndian.Uint16(dat[4:]) + e.NameSize = binary.LittleEndian.Uint16(dat[6:]) + e.Name = make([]byte, e.NameSize+1) + _, err = r.Read(e.Name) + if err != nil { + return + } + return } type Entry struct { @@ -31,20 +64,15 @@ func ReadDirectory(r io.Reader, size uint32) (out []Entry, err error) { size -= 3 var curRead uint32 var h header - var de decEntry + var de dirEntry for curRead < size { - err = binary.Read(r, binary.LittleEndian, &h) + h, err = readHeader(r) if err != nil { return } curRead += 12 for i := uint32(0); i < h.Count+1 && curRead < size; i++ { - err = binary.Read(r, binary.LittleEndian, &de) - if err != nil { - return - } - nameTmp := make([]byte, de.NameSize+1) - err = binary.Read(r, binary.LittleEndian, &nameTmp) + de, err = readEntry(r) if err != nil { return } @@ -52,7 +80,7 @@ func ReadDirectory(r io.Reader, size uint32) (out []Entry, err error) { out = append(out, Entry{ BlockStart: h.BlockStart, Offset: de.Offset, - Name: string(nameTmp), + Name: string(de.Name), InodeType: de.InodeType, Num: h.Num + uint32(de.NumOffset), }) diff --git a/low/inode/dir.go b/low/inode/dir.go index ec5ff7e..3cb0cea 100644 --- a/low/inode/dir.go +++ b/low/inode/dir.go @@ -13,6 +13,20 @@ type Directory struct { ParentNum uint32 } +func ReadDir(r io.Reader) (d Directory, err error) { + dat := make([]byte, 16) + _, err = r.Read(dat) + if err != nil { + return + } + d.BlockStart = binary.LittleEndian.Uint32(dat) + d.LinkCount = binary.LittleEndian.Uint32(dat[4:]) + d.Size = binary.LittleEndian.Uint16(dat[8:]) + d.Offset = binary.LittleEndian.Uint16(dat[10:]) + d.ParentNum = binary.LittleEndian.Uint32(dat[12:]) + return +} + type EDirectory struct { LinkCount uint32 Size uint32 @@ -31,20 +45,6 @@ type DirectoryIndex struct { Name []byte } -func ReadDir(r io.Reader) (d Directory, err error) { - dat := make([]byte, 16) - _, err = r.Read(dat) - if err != nil { - return - } - d.BlockStart = binary.LittleEndian.Uint32(dat) - d.LinkCount = binary.LittleEndian.Uint32(dat[4:]) - d.Size = binary.LittleEndian.Uint16(dat[8:]) - d.Offset = binary.LittleEndian.Uint16(dat[10:]) - d.ParentNum = binary.LittleEndian.Uint32(dat[12:]) - return -} - func ReadEDir(r io.Reader) (d EDirectory, err error) { dat := make([]byte, 24) _, err = r.Read(dat) diff --git a/low/inode/file.go b/low/inode/file.go index 2b81014..a82b75b 100644 --- a/low/inode/file.go +++ b/low/inode/file.go @@ -14,21 +14,6 @@ type File struct { BlockSizes []uint32 } -type eFileInit struct { - BlockStart uint64 - Size uint64 - Sparse uint64 - LinkCount uint32 - FragInd uint32 - FragOffset uint32 - XattrInd uint32 -} - -type EFile struct { - eFileInit - BlockSizes []uint32 -} - func ReadFile(r io.Reader, blockSize uint32) (f File, err error) { dat := make([]byte, 16) _, err = r.Read(dat) @@ -55,16 +40,42 @@ func ReadFile(r io.Reader, blockSize uint32) (f File, err error) { return } +type EFile struct { + BlockStart uint64 + Size uint64 + Sparse uint64 + LinkCount uint32 + FragInd uint32 + FragOffset uint32 + XattrInd uint32 + BlockSizes []uint32 +} + func ReadEFile(r io.Reader, blockSize uint32) (f EFile, err error) { - err = binary.Read(r, binary.LittleEndian, &f.eFileInit) + dat := make([]byte, 40) + _, err = r.Read(dat) if err != nil { return } + f.BlockStart = binary.LittleEndian.Uint64(dat) + f.Size = binary.LittleEndian.Uint64(dat[8:]) + f.Sparse = binary.LittleEndian.Uint64(dat[16:]) + f.LinkCount = binary.LittleEndian.Uint32(dat[24:]) + f.FragInd = binary.LittleEndian.Uint32(dat[28:]) + f.FragOffset = binary.LittleEndian.Uint32(dat[32:]) + f.XattrInd = binary.LittleEndian.Uint32(dat[36:]) toRead := int(math.Floor(float64(f.Size) / float64(blockSize))) if f.FragInd == 0xFFFFFFFF && f.Size%uint64(blockSize) > 0 { toRead++ } + dat = make([]byte, toRead*4) + _, err = r.Read(dat) + if err != nil { + return + } f.BlockSizes = make([]uint32, toRead) - err = binary.Read(r, binary.LittleEndian, &f.BlockSizes) + for i := range toRead { + f.BlockSizes[i] = binary.LittleEndian.Uint32(dat[i*4:]) + } return } diff --git a/low/inode/misc.go b/low/inode/misc.go index 8ba0b61..e39e6f6 100644 --- a/low/inode/misc.go +++ b/low/inode/misc.go @@ -10,18 +10,31 @@ type Device struct { Dev uint32 } +func ReadDevice(r io.Reader) (d Device, err error) { + dat := make([]byte, 8) + _, err = r.Read(dat) + if err != nil { + return + } + d.LinkCount = binary.LittleEndian.Uint32(dat) + d.Dev = binary.LittleEndian.Uint32(dat[4:]) + return +} + type EDevice struct { Device XattrInd uint32 } -func ReadDevice(r io.Reader) (d Device, err error) { - err = binary.Read(r, binary.LittleEndian, &d) - return -} - func ReadEDevice(r io.Reader) (d EDevice, err error) { - err = binary.Read(r, binary.LittleEndian, &d) + dat := make([]byte, 12) + _, err = r.Read(dat) + if err != nil { + return + } + d.LinkCount = binary.LittleEndian.Uint32(dat) + d.Dev = binary.LittleEndian.Uint32(dat[4:]) + d.XattrInd = binary.LittleEndian.Uint32(dat[8:]) return } @@ -29,17 +42,28 @@ type IPC struct { LinkCount uint32 } +func ReadIPC(r io.Reader) (i IPC, err error) { + dat := make([]byte, 4) + _, err = r.Read(dat) + if err != nil { + return + } + i.LinkCount = binary.LittleEndian.Uint32(dat) + return +} + type EIPC struct { IPC XattrInd uint32 } -func ReadIPC(r io.Reader) (i IPC, err error) { - err = binary.Read(r, binary.LittleEndian, &i) - return -} - func ReadEIPC(r io.Reader) (i EIPC, err error) { - err = binary.Read(r, binary.LittleEndian, &i) + dat := make([]byte, 8) + _, err = r.Read(dat) + if err != nil { + return + } + i.LinkCount = binary.LittleEndian.Uint32(dat) + i.XattrInd = binary.LittleEndian.Uint32(dat[4:]) return } diff --git a/low/inode/sym.go b/low/inode/sym.go index 43659bb..6b9480a 100644 --- a/low/inode/sym.go +++ b/low/inode/sym.go @@ -5,42 +5,50 @@ import ( "io" ) -type symlinkInit struct { +type Symlink struct { LinkCount uint32 TargetSize uint32 -} - -type Symlink struct { - symlinkInit - Target []byte -} - -type ESymlink struct { - symlinkInit - Target []byte - XattrInd uint32 + Target []byte } func ReadSym(r io.Reader) (s Symlink, err error) { - err = binary.Read(r, binary.LittleEndian, &s.symlinkInit) + dat := make([]byte, 8) + _, err = r.Read(dat) if err != nil { return } + s.LinkCount = binary.LittleEndian.Uint32(dat) + s.TargetSize = binary.LittleEndian.Uint32(dat[4:]) s.Target = make([]byte, s.TargetSize) - err = binary.Read(r, binary.LittleEndian, &s.Target) + _, err = r.Read(s.Target) return } +type ESymlink struct { + LinkCount uint32 + TargetSize uint32 + Target []byte + XattrInd uint32 +} + func ReadESym(r io.Reader) (s ESymlink, err error) { - err = binary.Read(r, binary.LittleEndian, &s.symlinkInit) + dat := make([]byte, 8) + _, err = r.Read(dat) if err != nil { return } + s.LinkCount = binary.LittleEndian.Uint32(dat) + s.TargetSize = binary.LittleEndian.Uint32(dat[4:]) s.Target = make([]byte, s.TargetSize) - err = binary.Read(r, binary.LittleEndian, &s.Target) + _, err = r.Read(s.Target) if err != nil { return } - err = binary.Read(r, binary.LittleEndian, &s.XattrInd) + dat = make([]byte, 4) + _, err = r.Read(dat) + if err != nil { + return + } + s.XattrInd = binary.LittleEndian.Uint32(dat) return } diff --git a/squashfs_test.go b/squashfs_test.go index 83c59c8..b679ad9 100644 --- a/squashfs_test.go +++ b/squashfs_test.go @@ -17,7 +17,7 @@ import ( const ( squashfsURL = "https://darkstorm.tech/files/LinuxPATest.sfs" - squashfsName = "tensorflow.sqfs" + squashfsName = "test.sfs" ) func preTest(dir string) (fil *os.File, err error) {