447 lines
18 KiB
Go
447 lines
18 KiB
Go
// Copyright 2016 the Go-FUSE Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package fuse provides APIs to implement filesystems in
|
|
// userspace in terms of raw FUSE protocol.
|
|
//
|
|
// A filesystem is implemented by implementing its server that provides a
|
|
// RawFileSystem interface. Typically the server embeds
|
|
// NewDefaultRawFileSystem() and implements only subset of filesystem methods:
|
|
//
|
|
// type MyFS struct {
|
|
// fuse.RawFileSystem
|
|
// ...
|
|
// }
|
|
//
|
|
// func NewMyFS() *MyFS {
|
|
// return &MyFS{
|
|
// RawFileSystem: fuse.NewDefaultRawFileSystem(),
|
|
// ...
|
|
// }
|
|
// }
|
|
//
|
|
// // Mkdir implements "mkdir" request handler.
|
|
// //
|
|
// // For other requests - not explicitly implemented by MyFS - ENOSYS
|
|
// // will be typically returned to client.
|
|
// func (fs *MyFS) Mkdir(...) {
|
|
// ...
|
|
// }
|
|
//
|
|
// Then the filesystem can be mounted and served to a client (typically OS
|
|
// kernel) by creating Server:
|
|
//
|
|
// fs := NewMyFS() // implements RawFileSystem
|
|
// fssrv, err := fuse.NewServer(fs, mountpoint, &fuse.MountOptions{...})
|
|
// if err != nil {
|
|
// ...
|
|
// }
|
|
//
|
|
// and letting the server do its work:
|
|
//
|
|
// // either synchronously - .Serve() blocks until the filesystem is unmounted.
|
|
// fssrv.Serve()
|
|
//
|
|
// // or in the background - .Serve() is spawned in another goroutine, but
|
|
// // before interacting with fssrv from current context we have to wait
|
|
// // until the filesystem mounting is complete.
|
|
// go fssrv.Serve()
|
|
// err = fssrv.WaitMount()
|
|
// if err != nil {
|
|
// ...
|
|
// }
|
|
//
|
|
// The server will serve clients by dispatching their requests to the
|
|
// filesystem implementation and conveying responses back. For example "mkdir"
|
|
// FUSE request dispatches to call
|
|
//
|
|
// fs.Mkdir(*MkdirIn, ..., *EntryOut)
|
|
//
|
|
// "stat" to call
|
|
//
|
|
// fs.GetAttr(*GetAttrIn, *AttrOut)
|
|
//
|
|
// etc. Please refer to RawFileSystem documentation for details.
|
|
//
|
|
// Typically, each call of the API happens in its own
|
|
// goroutine, so take care to make the file system thread-safe.
|
|
//
|
|
// Be careful when you access the FUSE mount from the same process. An access can
|
|
// tie up two OS threads (one on the request side and one on the FUSE server side).
|
|
// This can deadlock if there is no free thread to handle the FUSE server side.
|
|
// Run your program with GOMAXPROCS=1 to make the problem easier to reproduce,
|
|
// see https://github.com/hanwen/go-fuse/issues/261 for an example of that
|
|
// problem.
|
|
//
|
|
// # Higher level interfaces
|
|
//
|
|
// As said above this packages provides way to implement filesystems in terms of
|
|
// raw FUSE protocol.
|
|
//
|
|
// Package github.com/hanwen/go-fuse/v2/fs provides way to implement
|
|
// filesystems in terms of paths and/or inodes.
|
|
//
|
|
// # Mount styles
|
|
//
|
|
// The NewServer() handles mounting the filesystem, which
|
|
// involves opening `/dev/fuse` and calling the
|
|
// `mount(2)` syscall. The latter needs root permissions.
|
|
// This is handled in one of three ways:
|
|
//
|
|
// 1) go-fuse opens `/dev/fuse` and executes the `fusermount`
|
|
// setuid-root helper to call `mount(2)` for us. This is the default.
|
|
// Does not need root permissions but needs `fusermount` installed.
|
|
//
|
|
// 2) If `MountOptions.DirectMount` is set, go-fuse calls `mount(2)` itself.
|
|
// Needs root permissions, but works without `fusermount`.
|
|
//
|
|
// 3) If `mountPoint` has the magic `/dev/fd/N` syntax, it means that that a
|
|
// privileged parent process:
|
|
//
|
|
// * Opened /dev/fuse
|
|
//
|
|
// * Called mount(2) on a real mountpoint directory that we don't know about
|
|
//
|
|
// * Inherited the fd to /dev/fuse to us
|
|
//
|
|
// * Informs us about the fd number via /dev/fd/N
|
|
//
|
|
// This magic syntax originates from libfuse [1] and allows the FUSE server to
|
|
// run without any privileges and without needing `fusermount`, as the parent
|
|
// process performs all privileged operations.
|
|
//
|
|
// The "privileged parent" is usually a container manager like Singularity [2],
|
|
// but for testing, it can also be the `mount.fuse3` helper with the
|
|
// `drop_privileges,setuid=$USER` flags. Example below for gocryptfs:
|
|
//
|
|
// $ sudo mount.fuse3 "/usr/local/bin/gocryptfs#/tmp/cipher" /tmp/mnt -o drop_privileges,setuid=$USER
|
|
//
|
|
// [1] https://github.com/libfuse/libfuse/commit/64e11073b9347fcf9c6d1eea143763ba9e946f70
|
|
//
|
|
// [2] https://sylabs.io/guides/3.7/user-guide/bind_paths_and_mounts.html#fuse-mounts
|
|
//
|
|
// # Aborting a file system
|
|
//
|
|
// A caller that has an open file in a buggy or crashed FUSE
|
|
// filesystem will be hung. The easiest way to clean up this situation
|
|
// is through the fusectl filesystem. By writing into
|
|
// /sys/fs/fuse/connection/$ID/abort, reads from the FUSE device fail,
|
|
// and all callers receive ENOTCONN (transport endpoint not connected)
|
|
// on their pending syscalls. The FUSE connection ID can be found as
|
|
// the Dev field in the Stat_t result for a file in the mount.
|
|
package fuse
|
|
|
|
import "log"
|
|
|
|
// Types for users to implement.
|
|
|
|
// The result of Read is an array of bytes, but for performance
|
|
// reasons, we can also return data as a file-descriptor/offset/size
|
|
// tuple. If the backing store for a file is another filesystem, this
|
|
// reduces the amount of copying between the kernel and the FUSE
|
|
// server. The ReadResult interface captures both cases.
|
|
type ReadResult interface {
|
|
// Returns the raw bytes for the read, possibly using the
|
|
// passed buffer. The buffer should be larger than the return
|
|
// value from Size.
|
|
Bytes(buf []byte) ([]byte, Status)
|
|
|
|
// Size returns how many bytes this return value takes at most.
|
|
Size() int
|
|
|
|
// Done() is called after sending the data to the kernel.
|
|
Done()
|
|
}
|
|
|
|
type MountOptions struct {
|
|
AllowOther bool
|
|
|
|
// Options are passed as -o string to fusermount.
|
|
Options []string
|
|
|
|
// Default is _DEFAULT_BACKGROUND_TASKS, 12. This numbers
|
|
// controls the allowed number of requests that relate to
|
|
// async I/O. Concurrency for synchronous I/O is not limited.
|
|
MaxBackground int
|
|
|
|
// MaxWrite is the max size for read and write requests. If 0, use
|
|
// go-fuse default (currently 64 kiB).
|
|
// This number is internally capped at MAX_KERNEL_WRITE (higher values don't make
|
|
// sense).
|
|
//
|
|
// Non-direct-io reads are mostly served via kernel readahead, which is
|
|
// additionally subject to the MaxReadAhead limit.
|
|
//
|
|
// Implementation notes:
|
|
//
|
|
// There's four values the Linux kernel looks at when deciding the request size:
|
|
// * MaxWrite, passed via InitOut.MaxWrite. Limits the WRITE size.
|
|
// * max_read, passed via a string mount option. Limits the READ size.
|
|
// go-fuse sets max_read equal to MaxWrite.
|
|
// You can see the current max_read value in /proc/self/mounts .
|
|
// * MaxPages, passed via InitOut.MaxPages. In Linux 4.20 and later, the value
|
|
// can go up to 1 MiB and go-fuse calculates the MaxPages value acc.
|
|
// to MaxWrite, rounding up.
|
|
// On older kernels, the value is fixed at 128 kiB and the
|
|
// passed value is ignored. No request can be larger than MaxPages, so
|
|
// READ and WRITE are effectively capped at MaxPages.
|
|
// * MaxReadAhead, passed via InitOut.MaxReadAhead.
|
|
MaxWrite int
|
|
|
|
// MaxReadAhead is the max read ahead size to use. It controls how much data the
|
|
// kernel reads in advance to satisfy future read requests from applications.
|
|
// How much exactly is subject to clever heuristics in the kernel
|
|
// (see https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/readahead.c?h=v6.2-rc5#n375
|
|
// if you are brave) and hence also depends on the kernel version.
|
|
//
|
|
// If 0, use kernel default. This number is capped at the kernel maximum
|
|
// (128 kiB on Linux) and cannot be larger than MaxWrite.
|
|
//
|
|
// MaxReadAhead only affects buffered reads (=non-direct-io), but even then, the
|
|
// kernel can and does send larger reads to satisfy read reqests from applications
|
|
// (up to MaxWrite or VM_READAHEAD_PAGES=128 kiB, whichever is less).
|
|
MaxReadAhead int
|
|
|
|
// If IgnoreSecurityLabels is set, all security related xattr
|
|
// requests will return NO_DATA without passing through the
|
|
// user defined filesystem. You should only set this if you
|
|
// file system implements extended attributes, and you are not
|
|
// interested in security labels.
|
|
IgnoreSecurityLabels bool // ignoring labels should be provided as a fusermount mount option.
|
|
|
|
// If RememberInodes is set, we will never forget inodes.
|
|
// This may be useful for NFS.
|
|
RememberInodes bool
|
|
|
|
// Values shown in "df -T" and friends
|
|
// First column, "Filesystem"
|
|
FsName string
|
|
|
|
// Second column, "Type", will be shown as "fuse." + Name
|
|
Name string
|
|
|
|
// If set, wrap the file system in a single-threaded locking wrapper.
|
|
SingleThreaded bool
|
|
|
|
// If set, return ENOSYS for Getxattr calls, so the kernel does not issue any
|
|
// Xattr operations at all.
|
|
DisableXAttrs bool
|
|
|
|
// If set, print debugging information.
|
|
Debug bool
|
|
|
|
// If set, sink for debug statements.
|
|
//
|
|
// To increase signal/noise ratio Go-FUSE uses abbreviations in its debug log
|
|
// output. Here is how to read it:
|
|
//
|
|
// - `iX` means `inode X`;
|
|
// - `gX` means `generation X`;
|
|
// - `tA` and `tE` means timeout for attributes and directory entry correspondingly;
|
|
// - `[<off> +<size>)` means data range from `<off>` inclusive till `<off>+<size>` exclusive;
|
|
// - `Xb` means `X bytes`.
|
|
// - `pX` means the request originated from PID `x`. 0 means the request originated from the kernel.
|
|
//
|
|
// Every line is prefixed with either `rx <unique>` (receive from kernel) or `tx <unique>` (send to kernel)
|
|
//
|
|
// Example debug log output:
|
|
//
|
|
// rx 2: LOOKUP i1 [".wcfs"] 6b p5874
|
|
// tx 2: OK, {i3 g2 tE=1s tA=1s {M040755 SZ=0 L=0 1000:1000 B0*0 i0:3 A 0.000000 M 0.000000 C 0.000000}}
|
|
// rx 3: LOOKUP i3 ["zurl"] 5b p5874
|
|
// tx 3: OK, {i4 g3 tE=1s tA=1s {M0100644 SZ=33 L=1 1000:1000 B0*0 i0:4 A 0.000000 M 0.000000 C 0.000000}}
|
|
// rx 4: OPEN i4 {O_RDONLY,0x8000} p5874
|
|
// tx 4: 38=function not implemented, {Fh 0 }
|
|
// rx 5: READ i4 {Fh 0 [0 +4096) L 0 RDONLY,0x8000} p5874
|
|
// tx 5: OK, 33b data "file:///"...
|
|
// rx 6: GETATTR i4 {Fh 0} p5874
|
|
// tx 6: OK, {tA=1s {M0100644 SZ=33 L=1 1000:1000 B0*0 i0:4 A 0.000000 M 0.000000 C 0.000000}}
|
|
// rx 7: FLUSH i4 {Fh 0} p5874
|
|
// tx 7: OK
|
|
// rx 8: LOOKUP i1 ["head"] 5b p5874
|
|
// tx 8: OK, {i5 g4 tE=1s tA=1s {M040755 SZ=0 L=0 1000:1000 B0*0 i0:5 A 0.000000 M 0.000000 C 0.000000}}
|
|
// rx 9: LOOKUP i5 ["bigfile"] 8b p5874
|
|
// tx 9: OK, {i6 g5 tE=1s tA=1s {M040755 SZ=0 L=0 1000:1000 B0*0 i0:6 A 0.000000 M 0.000000 C 0.000000}}
|
|
// rx 10: FLUSH i4 {Fh 0} p5874
|
|
// tx 10: OK
|
|
// rx 11: GETATTR i1 {Fh 0} p5874
|
|
// tx 11: OK, {tA=1s {M040755 SZ=0 L=1 1000:1000 B0*0 i0:1 A 0.000000 M 0.000000 C 0.000000}}
|
|
Logger *log.Logger
|
|
|
|
// If set, ask kernel to forward file locks to FUSE. If using,
|
|
// you must implement the GetLk/SetLk/SetLkw methods.
|
|
EnableLocks bool
|
|
|
|
// If set, the kernel caches all Readlink return values. The
|
|
// filesystem must use content notification to force the
|
|
// kernel to issue a new Readlink call.
|
|
EnableSymlinkCaching bool
|
|
|
|
// If set, ask kernel not to do automatic data cache invalidation.
|
|
// The filesystem is fully responsible for invalidating data cache.
|
|
ExplicitDataCacheControl bool
|
|
|
|
// SyncRead is off by default, which means that go-fuse enable the
|
|
// FUSE_CAP_ASYNC_READ capability.
|
|
// The kernel then submits multiple concurrent reads to service
|
|
// userspace requests and kernel readahead.
|
|
//
|
|
// Setting SyncRead disables the FUSE_CAP_ASYNC_READ capability.
|
|
// The kernel then only sends one read request per file handle at a time,
|
|
// and orders the requests by offset.
|
|
//
|
|
// This is useful if reading out of order or concurrently is expensive for
|
|
// (example: Amazon Cloud Drive).
|
|
//
|
|
// See the comment to FUSE_CAP_ASYNC_READ in
|
|
// https://github.com/libfuse/libfuse/blob/master/include/fuse_common.h
|
|
// for more details.
|
|
SyncRead bool
|
|
|
|
// If set, fuse will first attempt to use syscall.Mount instead of
|
|
// fusermount to mount the filesystem. This will not update /etc/mtab
|
|
// but might be needed if fusermount is not available.
|
|
// Also, Server.Unmount will attempt syscall.Unmount before calling
|
|
// fusermount.
|
|
DirectMount bool
|
|
|
|
// DirectMountStrict is like DirectMount but no fallback to fusermount is
|
|
// performed. If both DirectMount and DirectMountStrict are set,
|
|
// DirectMountStrict wins.
|
|
DirectMountStrict bool
|
|
|
|
// DirectMountFlags are the mountflags passed to syscall.Mount. If zero, the
|
|
// default value used by fusermount are used: syscall.MS_NOSUID|syscall.MS_NODEV.
|
|
//
|
|
// If you actually *want* zero flags, pass syscall.MS_MGC_VAL, which is ignored
|
|
// by the kernel. See `man 2 mount` for details about MS_MGC_VAL.
|
|
DirectMountFlags uintptr
|
|
|
|
// EnableAcls enables kernel ACL support.
|
|
//
|
|
// See the comments to FUSE_CAP_POSIX_ACL
|
|
// in https://github.com/libfuse/libfuse/blob/master/include/fuse_common.h
|
|
// for details.
|
|
EnableAcl bool
|
|
|
|
// Disable ReadDirPlus capability so ReadDir is used instead. Simple
|
|
// directory queries (i.e. 'ls' without '-l') can be faster with
|
|
// ReadDir, as no per-file stat calls are needed
|
|
DisableReadDirPlus bool
|
|
}
|
|
|
|
// RawFileSystem is an interface close to the FUSE wire protocol.
|
|
//
|
|
// Unless you really know what you are doing, you should not implement
|
|
// this, but rather the interfaces associated with
|
|
// fs.InodeEmbedder. The details of getting interactions with open
|
|
// files, renames, and threading right etc. are somewhat tricky and
|
|
// not very interesting.
|
|
//
|
|
// Each FUSE request results in a corresponding method called by Server.
|
|
// Several calls may be made simultaneously, because the server typically calls
|
|
// each method in separate goroutine.
|
|
//
|
|
// A null implementation is provided by NewDefaultRawFileSystem.
|
|
//
|
|
// After a successful FUSE API call returns, you may not read input or
|
|
// write output data: for performance reasons, memory is reused for
|
|
// following requests, and reading/writing the request data will lead
|
|
// to race conditions. If you spawn a background routine from a FUSE
|
|
// API call, any incoming request data it wants to reference should be
|
|
// copied over.
|
|
//
|
|
// If a FUSE API call is canceled (which is signaled by closing the
|
|
// `cancel` channel), the API call should return EINTR. In this case,
|
|
// the outstanding request data is not reused, so the API call may
|
|
// return EINTR without ensuring that child contexts have successfully
|
|
// completed.
|
|
type RawFileSystem interface {
|
|
String() string
|
|
|
|
// If called, provide debug output through the log package.
|
|
SetDebug(debug bool)
|
|
|
|
// Lookup is called by the kernel when the VFS wants to know
|
|
// about a file inside a directory. Many lookup calls can
|
|
// occur in parallel, but only one call happens for each (dir,
|
|
// name) pair.
|
|
Lookup(cancel <-chan struct{}, header *InHeader, name string, out *EntryOut) (status Status)
|
|
|
|
// Forget is called when the kernel discards entries from its
|
|
// dentry cache. This happens on unmount, and when the kernel
|
|
// is short on memory. Since it is not guaranteed to occur at
|
|
// any moment, and since there is no return value, Forget
|
|
// should not do I/O, as there is no channel to report back
|
|
// I/O errors.
|
|
Forget(nodeid, nlookup uint64)
|
|
|
|
// Attributes.
|
|
GetAttr(cancel <-chan struct{}, input *GetAttrIn, out *AttrOut) (code Status)
|
|
SetAttr(cancel <-chan struct{}, input *SetAttrIn, out *AttrOut) (code Status)
|
|
|
|
// Modifying structure.
|
|
Mknod(cancel <-chan struct{}, input *MknodIn, name string, out *EntryOut) (code Status)
|
|
Mkdir(cancel <-chan struct{}, input *MkdirIn, name string, out *EntryOut) (code Status)
|
|
Unlink(cancel <-chan struct{}, header *InHeader, name string) (code Status)
|
|
Rmdir(cancel <-chan struct{}, header *InHeader, name string) (code Status)
|
|
Rename(cancel <-chan struct{}, input *RenameIn, oldName string, newName string) (code Status)
|
|
Link(cancel <-chan struct{}, input *LinkIn, filename string, out *EntryOut) (code Status)
|
|
|
|
Symlink(cancel <-chan struct{}, header *InHeader, pointedTo string, linkName string, out *EntryOut) (code Status)
|
|
Readlink(cancel <-chan struct{}, header *InHeader) (out []byte, code Status)
|
|
Access(cancel <-chan struct{}, input *AccessIn) (code Status)
|
|
|
|
// Extended attributes.
|
|
|
|
// GetXAttr reads an extended attribute, and should return the
|
|
// number of bytes. If the buffer is too small, return ERANGE,
|
|
// with the required buffer size.
|
|
GetXAttr(cancel <-chan struct{}, header *InHeader, attr string, dest []byte) (sz uint32, code Status)
|
|
|
|
// ListXAttr lists extended attributes as '\0' delimited byte
|
|
// slice, and return the number of bytes. If the buffer is too
|
|
// small, return ERANGE, with the required buffer size.
|
|
ListXAttr(cancel <-chan struct{}, header *InHeader, dest []byte) (uint32, Status)
|
|
|
|
// SetAttr writes an extended attribute.
|
|
SetXAttr(cancel <-chan struct{}, input *SetXAttrIn, attr string, data []byte) Status
|
|
|
|
// RemoveXAttr removes an extended attribute.
|
|
RemoveXAttr(cancel <-chan struct{}, header *InHeader, attr string) (code Status)
|
|
|
|
// File handling.
|
|
Create(cancel <-chan struct{}, input *CreateIn, name string, out *CreateOut) (code Status)
|
|
Open(cancel <-chan struct{}, input *OpenIn, out *OpenOut) (status Status)
|
|
Read(cancel <-chan struct{}, input *ReadIn, buf []byte) (ReadResult, Status)
|
|
Lseek(cancel <-chan struct{}, in *LseekIn, out *LseekOut) Status
|
|
|
|
// File locking
|
|
GetLk(cancel <-chan struct{}, input *LkIn, out *LkOut) (code Status)
|
|
SetLk(cancel <-chan struct{}, input *LkIn) (code Status)
|
|
SetLkw(cancel <-chan struct{}, input *LkIn) (code Status)
|
|
|
|
Release(cancel <-chan struct{}, input *ReleaseIn)
|
|
Write(cancel <-chan struct{}, input *WriteIn, data []byte) (written uint32, code Status)
|
|
CopyFileRange(cancel <-chan struct{}, input *CopyFileRangeIn) (written uint32, code Status)
|
|
|
|
Flush(cancel <-chan struct{}, input *FlushIn) Status
|
|
Fsync(cancel <-chan struct{}, input *FsyncIn) (code Status)
|
|
Fallocate(cancel <-chan struct{}, input *FallocateIn) (code Status)
|
|
|
|
// Directory handling
|
|
OpenDir(cancel <-chan struct{}, input *OpenIn, out *OpenOut) (status Status)
|
|
ReadDir(cancel <-chan struct{}, input *ReadIn, out *DirEntryList) Status
|
|
ReadDirPlus(cancel <-chan struct{}, input *ReadIn, out *DirEntryList) Status
|
|
ReleaseDir(input *ReleaseIn)
|
|
FsyncDir(cancel <-chan struct{}, input *FsyncIn) (code Status)
|
|
|
|
StatFs(cancel <-chan struct{}, input *InHeader, out *StatfsOut) (code Status)
|
|
|
|
// This is called on processing the first request. The
|
|
// filesystem implementation can use the server argument to
|
|
// talk back to the kernel (through notify methods).
|
|
Init(*Server)
|
|
}
|