diff --git a/README.md b/README.md index 2206edf..fcd6f07 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@ A PURE Go library to read squashfs. There is currently no plans to add archive creation support as it will almost always be better to just call `mksquashfs`. I could see some possible use cases, but probably won't spend time on it unless it's requested (open a discussion if you want this feature). +The library has two parts with this `github.com/CalebQ42/squashfs` being easy to use as it implements `io/fs` interfaces and doesn't expose unnecessary information. 95% this is the library you want. If you need lower level access to the information, use `github.com/CalebQ42/squashfs/squashfs` where far more information is exposed. + Currently has support for reading squashfs files and extracting files and folders. Special thanks to for some VERY important information in an easy to understand format. Thanks also to [distri's squashfs library](https://github.com/distr1/distri/tree/master/internal/squashfs) as I referenced it to figure some things out (and double check others). -## [TODO](https://github.com/CalebQ42/squashfs/projects/1?fullscreen=true) - ## Limitations * No Xattr parsing. This is simply because I haven't done any research on it and how to apply these in a pure go way. diff --git a/extraction_options.go b/extraction_options.go new file mode 100644 index 0000000..56535b8 --- /dev/null +++ b/extraction_options.go @@ -0,0 +1,21 @@ +package squashfs + +import ( + "io" + "io/fs" +) + +type ExtractionOptions struct { + LogOutput io.Writer //Where error log should write. + DereferenceSymlink bool //Replace symlinks with the target file. + UnbreakSymlink bool //Try to make sure symlinks remain unbroken when extracted, without changing the symlink. + Verbose bool //Prints extra info to log on an error. + IgnorePerm bool //Ignore file's permissions and instead use Perm. + Perm fs.FileMode //Permission to use when IgnorePerm. Defaults to 0777. +} + +func DefaultOptions() *ExtractionOptions { + return &ExtractionOptions{ + Perm: 0777, + } +} diff --git a/file.go b/file.go new file mode 100644 index 0000000..197e4bf --- /dev/null +++ b/file.go @@ -0,0 +1,175 @@ +package squashfs + +import ( + "errors" + "io" + "io/fs" + "path/filepath" + + "github.com/CalebQ42/squashfs/squashfs" + "github.com/CalebQ42/squashfs/squashfs/data" + "github.com/CalebQ42/squashfs/squashfs/inode" +) + +// File represents a file inside a squashfs archive. +type File struct { + b *squashfs.Base + full *data.FullReader + rdr *data.Reader + parent *FS + r *Reader + dirsRead int +} + +func (f *File) FS() (*FS, error) { + if !f.IsDir() { + return nil, errors.New("not a directory") + } + d, err := f.b.ToDir(f.r.r) + if err != nil { + return nil, err + } + return &FS{d: d, parent: f.parent, r: f.r}, nil +} + +// Closes the underlying readers. +// Further calls to Read and WriteTo will re-create the readers. +// Never returns an error. +func (f *File) Close() error { + if f.rdr != nil { + return f.rdr.Close() + } + f.rdr = nil + f.full = nil + return nil +} + +// Returns the file the symlink points to. +// If the file isn't a symlink, or points to a file outside the archive, returns nil. +func (f *File) GetSymlinkFile() fs.File { + if !f.IsSymlink() { + return nil + } + if filepath.IsAbs(f.SymlinkPath()) { + return nil + } + fil, err := f.parent.Open(f.SymlinkPath()) + if err != nil { + return nil + } + return fil +} + +// Returns whether the file is a directory. +func (f *File) IsDir() bool { + return f.b.IsDir() +} + +// Returns whether the file is a regular file. +func (f *File) IsRegular() bool { + return f.b.IsRegular() +} + +// Returns whether the file is a symlink. +func (f *File) IsSymlink() bool { + return f.b.Inode.Type == inode.Sym || f.b.Inode.Type == inode.ESym +} + +func (f *File) Mode() fs.FileMode { + return f.b.Inode.Mode() +} + +// Read reads the data from the file. Only works if file is a normal file. +func (f *File) Read(b []byte) (int, error) { + if !f.IsRegular() { + return 0, errors.New("file is not a regular file") + } + if f.rdr == nil { + err := f.initializeReaders() + if err != nil { + return 0, err + } + } + return f.rdr.Read(b) +} + +// ReadDir returns n fs.DirEntry's that's contained in the File (if it's a directory). +// If n <= 0 all fs.DirEntry's are returned. +func (f *File) ReadDir(n int) ([]fs.DirEntry, error) { + if !f.IsDir() { + return nil, errors.New("file is not a directory") + } + d, err := f.b.ToDir(f.r.r) + if err != nil { + return nil, err + } + start, end := 0, len(d.Entries) + if n > 0 { + start, end = f.dirsRead, f.dirsRead+n + if end > len(d.Entries) { + end = len(d.Entries) + err = io.EOF + } + } + var out []fs.DirEntry + var fi fileInfo + for _, e := range d.Entries[start:end] { + fi, err = f.r.newFileInfo(e) + if err != nil { + f.dirsRead += len(out) + return out, err + } + out = append(out, fs.FileInfoToDirEntry(fi)) + } + f.dirsRead += len(out) + return out, err +} + +// Returns the file's fs.FileInfo +func (f *File) Stat() (fs.FileInfo, error) { + return newFileInfo(f.b.Name, f.b.Inode), nil +} + +// SymlinkPath returns the symlink's target path. Is the File isn't a symlink, returns an empty string. +func (f *File) SymlinkPath() string { + switch f.b.Inode.Type { + case inode.Sym: + return string(f.b.Inode.Data.(inode.Symlink).Target) + case inode.ESym: + return string(f.b.Inode.Data.(inode.ESymlink).Target) + } + return "" +} + +// Writes all data from the file to the given writer in a multi-threaded manner. +// The underlying reader is separate +func (f *File) WriteTo(w io.Writer) (int64, error) { + if !f.IsRegular() { + return 0, errors.New("file is not a regular file") + } + if f.full == nil { + err := f.initializeReaders() + if err != nil { + return 0, err + } + } + return f.full.WriteTo(w) +} + +func (f *File) initializeReaders() error { + var err error + f.rdr, f.full, err = f.b.GetRegFileReaders(f.r.r) + return err +} + +// Extract the file to the given folder. If the file is a folder, the folder's contents will be extracted to the folder. +// Uses default extraction options. +func (f *File) Extract(folder string) error { + return f.ExtractWithOptions(folder, DefaultOptions()) +} + +// Extract the file to the given folder. If the file is a folder, the folder's contents will be extracted to the folder. +// Allows setting various extraction options via ExtractionOptions. +func (f *File) ExtractWithOptions(folder string, op *ExtractionOptions) error { + //TODO +} diff --git a/file_info.go b/file_info.go new file mode 100644 index 0000000..b4552a7 --- /dev/null +++ b/file_info.go @@ -0,0 +1,68 @@ +package squashfs + +import ( + "io/fs" + "time" + + "github.com/CalebQ42/squashfs/squashfs/directory" + "github.com/CalebQ42/squashfs/squashfs/inode" +) + +type fileInfo struct { + name string + size int64 + perm uint32 + modTime uint32 + fileType uint16 +} + +func (r Reader) newFileInfo(e directory.Entry) (fileInfo, error) { + i, err := r.r.InodeFromEntry(e) + if err != nil { + return fileInfo{}, err + } + return newFileInfo(e.Name, i), nil +} + +func newFileInfo(name string, i *inode.Inode) fileInfo { + var size int64 + if i.Type == inode.Fil { + size = int64(i.Data.(inode.File).Size) + } else if i.Type == inode.EFil { + size = int64(i.Data.(inode.EFile).Size) + } + return fileInfo{ + name: name, + size: size, + perm: uint32(i.Perm), + modTime: i.ModTime, + fileType: i.Type, + } +} + +func (f fileInfo) Name() string { + return f.name +} + +func (f fileInfo) Size() int64 { + return f.size +} + +func (f fileInfo) Mode() fs.FileMode { + if f.IsDir() { + return fs.FileMode(f.perm | uint32(fs.ModeDir)) + } + return fs.FileMode(f.perm) +} + +func (f fileInfo) ModTime() time.Time { + return time.Unix(int64(f.modTime), 0) +} + +func (f fileInfo) IsDir() bool { + return f.fileType == inode.Dir || f.fileType == inode.EDir +} + +func (f fileInfo) Sys() any { + return nil +} diff --git a/fs.go b/fs.go new file mode 100644 index 0000000..7f62f44 --- /dev/null +++ b/fs.go @@ -0,0 +1,241 @@ +package squashfs + +import ( + "io" + "io/fs" + "path" + "path/filepath" + "slices" + "strings" + + "github.com/CalebQ42/squashfs/squashfs" + "github.com/CalebQ42/squashfs/squashfs/directory" +) + +// FS is a fs.FS representation of a squashfs directory. +// Implements fs.GlobFS, fs.ReadDirFS, fs.ReadFileFS, fs.StatFS, and fs.SubFS +type FS struct { + d *squashfs.Directory + r *Reader + parent *FS +} + +// Glob returns the name of the files at the given pattern. +// All paths are relative to the FS. +// Uses filepath.Match to compare names. +func (f *FS) Glob(pattern string) (out []string, err error) { + pattern = filepath.Clean(pattern) + if !fs.ValidPath(pattern) { + return nil, &fs.PathError{ + Op: "glob", + Path: pattern, + Err: fs.ErrInvalid, + } + } + split := strings.Split(pattern, "/") + for i := 0; i < len(f.d.Entries); i++ { + if match, _ := path.Match(split[0], f.d.Entries[i].Name); match { + if len(split) == 1 { + out = append(out, f.d.Entries[i].Name) + continue + } + sub, err := f.Sub(split[0]) + if err != nil { + if pathErr, ok := err.(*fs.PathError); ok { + if pathErr.Err == fs.ErrNotExist { + continue + } + pathErr.Op = "glob" + pathErr.Path = pattern + return nil, pathErr + } + return nil, &fs.PathError{ + Op: "glob", + Path: pattern, + Err: err, + } + } + subGlob, err := sub.(fs.GlobFS).Glob(strings.Join(split[1:], "/")) + if err != nil { + if pathErr, ok := err.(*fs.PathError); ok { + if pathErr.Err == fs.ErrNotExist { + continue + } + pathErr.Op = "glob" + pathErr.Path = pattern + return nil, pathErr + } + return nil, &fs.PathError{ + Op: "glob", + Path: pattern, + Err: err, + } + } + for i := 0; i < len(subGlob); i++ { + subGlob[i] = f.d.Name + "/" + subGlob[i] + } + out = append(out, subGlob...) + } + } + return +} + +// Opens the file at name. Returns a *File as an fs.File. +func (f *FS) Open(name string) (fs.File, error) { + name = filepath.Clean(name) + if !fs.ValidPath(name) { + return nil, &fs.PathError{ + Op: "open", + Path: name, + Err: fs.ErrInvalid, + } + } + if name == "." || name == "" { + return &File{ + b: &f.d.Base, + r: f.r, + parent: f.parent, + }, nil + } + split := strings.Split(name, "/") + i, found := slices.BinarySearchFunc(f.d.Entries, split[0], func(e directory.Entry, name string) int { + return strings.Compare(e.Name, name) + }) + if !found { + return nil, &fs.PathError{ + Op: "open", + Path: name, + Err: fs.ErrNotExist, + } + } + b, err := f.r.r.BaseFromEntry(f.d.Entries[i]) + if err != nil { + return nil, err + } + if len(split) == 1 { + return &File{ + b: b, + r: f.r, + parent: f.parent, + }, nil + } + if !b.IsDir() { + return nil, &fs.PathError{ + Op: "open", + Path: name, + Err: fs.ErrNotExist, + } + } + d, err := b.ToDir(f.r.r) + if err != nil { + return nil, err + } + return (&FS{ + d: d, + r: f.r, + parent: f, + }).Open(strings.Join(split[1:], "/")) +} + +// Returns all DirEntry's for the directory at name. +// If name is not a directory, returns an error. +func (f *FS) ReadDir(name string) ([]fs.DirEntry, error) { + name = filepath.Clean(name) + if !fs.ValidPath(name) { + return nil, &fs.PathError{ + Op: "readdir", + Path: name, + Err: fs.ErrInvalid, + } + } + if name == "." || name == "" { + return (&File{ + b: &f.d.Base, + parent: f.parent, + r: f.r, + }).ReadDir(-1) + } + fil, err := f.Open(name) + if err != nil { + return nil, err + } + return fil.(*File).ReadDir(-1) +} + +// Returns the contents of the file at name. +func (f *FS) ReadFile(name string) (out []byte, err error) { + name = filepath.Clean(name) + if !fs.ValidPath(name) { + return nil, &fs.PathError{ + Op: "readfile", + Path: name, + Err: fs.ErrInvalid, + } + } + if name == "." || name == "" { + return nil, fs.ErrInvalid + } + fil, err := f.Open(name) + if err != nil { + return nil, err + } + if !fil.(*File).IsRegular() { + return nil, fs.ErrInvalid + } + return io.ReadAll(fil) +} + +// Returns the fs.FileInfo for the file at name. +func (f *FS) Stat(name string) (fs.FileInfo, error) { + name = filepath.Clean(name) + if !fs.ValidPath(name) { + return nil, &fs.PathError{ + Op: "stat", + Path: name, + Err: fs.ErrInvalid, + } + } + if name == "." || name == "" { + return (&File{ + b: &f.d.Base, + parent: f.parent, + r: f.r, + }).Stat() + } + fil, err := f.Open(name) + if err != nil { + return nil, err + } + return fil.(*File).Stat() +} + +// Returns the FS at dir +func (f *FS) Sub(dir string) (fs.FS, error) { + dir = filepath.Clean(dir) + if !fs.ValidPath(dir) { + return nil, &fs.PathError{ + Op: "dir", + Path: dir, + Err: fs.ErrInvalid, + } + } + if dir == "." || dir == "" { + return f, nil + } + fil, err := f.Open(dir) + if err != nil { + return nil, err + } + if !fil.(*File).IsDir() { + return nil, &fs.PathError{ + Op: "dir", + Path: dir, + Err: fs.ErrInvalid, + } + } + return fil.(*File).FS() +} + +func (f *FS) path() string { + return filepath.Join(f.parent.path(), f.d.Name) +} diff --git a/reader.go b/reader.go new file mode 100644 index 0000000..61946f4 --- /dev/null +++ b/reader.go @@ -0,0 +1,30 @@ +package squashfs + +import ( + "io" + "time" + + "github.com/CalebQ42/squashfs/squashfs" +) + +type Reader struct { + *FS + r *squashfs.Reader +} + +func NewReader(r io.ReaderAt) (*Reader, error) { + rdr, err := squashfs.NewReader(r) + if err != nil { + return nil, err + } + return &Reader{ + r: rdr, + FS: &FS{ + d: rdr.Root, + }, + }, nil +} + +func (r *Reader) ModTime() time.Time { + return time.Unix(int64(r.r.Superblock.ModTime), 0) +} diff --git a/squashfs/base.go b/squashfs/base.go index 2bac355..f799f1a 100644 --- a/squashfs/base.go +++ b/squashfs/base.go @@ -21,10 +21,7 @@ func (r *Reader) BaseFromInode(i *inode.Inode, name string) *Base { } func (r *Reader) BaseFromEntry(e directory.Entry) (*Base, error) { - rdr := metadata.NewReader(toreader.NewReader(r.r, int64(r.Superblock.InodeTableStart)+int64(e.BlockStart)), r.d) - defer rdr.Close() - rdr.Read(make([]byte, e.Offset)) - in, err := inode.Read(rdr, r.Superblock.BlockSize) + in, err := r.InodeFromEntry(e) if err != nil { return nil, err } @@ -32,7 +29,7 @@ func (r *Reader) BaseFromEntry(e directory.Entry) (*Base, error) { } func (r *Reader) BaseFromRef(ref uint64, name string) (*Base, error) { - in, err := r.inodeFromRef(ref) + in, err := r.InodeFromRef(ref) if err != nil { return nil, err } diff --git a/squashfs/directory.go b/squashfs/directory.go index 9e7db15..07fcb82 100644 --- a/squashfs/directory.go +++ b/squashfs/directory.go @@ -20,7 +20,7 @@ type Directory struct { } func (r *Reader) directoryFromRef(ref uint64, name string) (*Directory, error) { - i, err := r.inodeFromRef(ref) + i, err := r.InodeFromRef(ref) if err != nil { fmt.Println("yo") return nil, err diff --git a/squashfs/inode.go b/squashfs/inode.go index 205c5ce..d1cb5a2 100644 --- a/squashfs/inode.go +++ b/squashfs/inode.go @@ -3,10 +3,11 @@ package squashfs import ( "github.com/CalebQ42/squashfs/internal/metadata" "github.com/CalebQ42/squashfs/internal/toreader" + "github.com/CalebQ42/squashfs/squashfs/directory" "github.com/CalebQ42/squashfs/squashfs/inode" ) -func (r *Reader) inodeFromRef(ref uint64) (*inode.Inode, error) { +func (r *Reader) InodeFromRef(ref uint64) (*inode.Inode, error) { offset, meta := (ref>>16)+r.Superblock.InodeTableStart, ref&0xFFFF rdr := metadata.NewReader(toreader.NewReader(r.r, int64(offset)), r.d) defer rdr.Close() @@ -16,3 +17,10 @@ func (r *Reader) inodeFromRef(ref uint64) (*inode.Inode, error) { } return inode.Read(rdr, r.Superblock.BlockSize) } + +func (r *Reader) InodeFromEntry(e directory.Entry) (*inode.Inode, error) { + rdr := metadata.NewReader(toreader.NewReader(r.r, int64(r.Superblock.InodeTableStart)+int64(e.BlockStart)), r.d) + defer rdr.Close() + rdr.Read(make([]byte, e.Offset)) + return inode.Read(rdr, r.Superblock.BlockSize) +} diff --git a/squashfs/reader.go b/squashfs/reader.go index 3e8ec89..302cbc9 100644 --- a/squashfs/reader.go +++ b/squashfs/reader.go @@ -5,7 +5,6 @@ import ( "errors" "io" "math" - "time" "github.com/CalebQ42/squashfs/internal/decompress" "github.com/CalebQ42/squashfs/internal/metadata" @@ -79,11 +78,6 @@ func NewReader(r io.ReaderAt) (rdr *Reader, err error) { return } -// Returns the last time the archive was modified. -func (r *Reader) ModTime() time.Time { - return time.Unix(int64(r.Superblock.ModTime), 0) -} - // Get a uid/gid at the given index. Lazily populates the reader's Id table as necessary. func (r *Reader) Id(i uint16) (uint32, error) { if len(r.idTable) > int(i) { @@ -221,5 +215,5 @@ func (r *Reader) Inode(i uint32) (*inode.Inode, error) { if err != nil { return nil, err } - return r.inodeFromRef(ref) + return r.InodeFromRef(ref) }