Skip to content

Commit

Permalink
Merge pull request #3967 from cyphar/remove-mount-fallback-flag
Browse files Browse the repository at this point in the history
rootfs: remove --no-mount-fallback and finally fix MS_REMOUNT
  • Loading branch information
lifubang committed Oct 24, 2023
2 parents 3272edf + 7c71a22 commit a68529c
Show file tree
Hide file tree
Showing 10 changed files with 548 additions and 132 deletions.
3 changes: 0 additions & 3 deletions contrib/completions/bash/runc
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,6 @@ _runc_run() {
--no-subreaper
--no-pivot
--no-new-keyring
--no-mount-fallback
"

local options_with_args="
Expand Down Expand Up @@ -568,7 +567,6 @@ _runc_create() {
--help
--no-pivot
--no-new-keyring
--no-mount-fallback
"

local options_with_args="
Expand Down Expand Up @@ -629,7 +627,6 @@ _runc_restore() {
--no-pivot
--auto-dedup
--lazy-pages
--no-mount-fallback
"

local options_with_args="
Expand Down
4 changes: 0 additions & 4 deletions create.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Name: "preserve-fds",
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
},
cli.BoolFlag{
Name: "no-mount-fallback",
Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
Expand Down
4 changes: 0 additions & 4 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,6 @@ type Config struct {
// When RootlessCgroups is set, cgroups errors are ignored.
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`

// Do not try to remount a bind mount again after the first attempt failed on source
// filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set
NoMountFallback bool `json:"no_mount_fallback,omitempty"`

// TimeOffsets specifies the offset for supporting time namespaces.
TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`

Expand Down
4 changes: 4 additions & 0 deletions libcontainer/configs/mount_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ type Mount struct {
// Mount flags.
Flags int `json:"flags"`

// Mount flags that were explicitly cleared in the configuration (meaning
// the user explicitly requested that these flags *not* be set).
ClearedFlags int `json:"cleared_flags"`

// Propagation Flags
PropagationFlags []int `json:"propagation_flags"`

Expand Down
174 changes: 136 additions & 38 deletions libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ type mountConfig struct {
cgroup2Path string
rootlessCgroups bool
cgroupns bool
noMountFallback bool
}

// mountEntry contains mount data specific to a mount point.
Expand Down Expand Up @@ -83,7 +82,6 @@ func prepareRootfs(pipe *syncSocket, iConfig *initConfig, mountFds mountFds) (er
cgroup2Path: iConfig.Cgroup2Path,
rootlessCgroups: iConfig.RootlessCgroups,
cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
noMountFallback: config.NoMountFallback,
}
for i, m := range config.Mounts {
entry := mountEntry{Mount: m}
Expand Down Expand Up @@ -409,6 +407,51 @@ func doTmpfsCopyUp(m mountEntry, rootfs, mountLabel string) (Err error) {
})
}

const (
// The atime "enum" flags (which are mutually exclusive).
mntAtimeEnumFlags = unix.MS_NOATIME | unix.MS_RELATIME | unix.MS_STRICTATIME
// All atime-related flags.
mntAtimeFlags = mntAtimeEnumFlags | unix.MS_NODIRATIME
// Flags which can be locked when inheriting mounts in a different userns.
// In the kernel, these are the mounts that are locked using MNT_LOCK_*.
mntLockFlags = unix.MS_RDONLY | unix.MS_NODEV | unix.MS_NOEXEC |
unix.MS_NOSUID | mntAtimeFlags
)

func statfsToMountFlags(st unix.Statfs_t) int {
// From <linux/statfs.h>.
const ST_NOSYMFOLLOW = 0x2000 //nolint:revive

var flags int
for _, f := range []struct {
st, ms int
}{
// See calculate_f_flags() in fs/statfs.c.
{unix.ST_RDONLY, unix.MS_RDONLY},
{unix.ST_NOSUID, unix.MS_NOSUID},
{unix.ST_NODEV, unix.MS_NODEV},
{unix.ST_NOEXEC, unix.MS_NOEXEC},
{unix.ST_MANDLOCK, unix.MS_MANDLOCK},
{unix.ST_SYNCHRONOUS, unix.MS_SYNCHRONOUS},
{unix.ST_NOATIME, unix.MS_NOATIME},
{unix.ST_NODIRATIME, unix.MS_NODIRATIME},
{unix.ST_RELATIME, unix.MS_RELATIME},
{ST_NOSYMFOLLOW, unix.MS_NOSYMFOLLOW},
// There is no ST_STRICTATIME -- see below.
} {
if int(st.Flags)&f.st == f.st {
flags |= f.ms
}
}
// MS_STRICTATIME is a "fake" MS_* flag. It isn't stored in mnt->mnt_flags,
// and so it doesn't show up in statfs(2). If none of the other flags in
// atime enum are present, the mount is MS_STRICTATIME.
if flags&mntAtimeEnumFlags == 0 {
flags |= unix.MS_STRICTATIME
}
return flags
}

func mountToRootfs(c *mountConfig, m mountEntry) error {
rootfs := c.root

Expand Down Expand Up @@ -509,11 +552,97 @@ func mountToRootfs(c *mountConfig, m mountEntry) error {
return err
}
}
// bind mount won't change mount options, we need remount to make mount options effective.
// first check that we have non-default options required before attempting a remount
if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
// only remount if unique mount options are set
if err := remount(m, rootfs, c.noMountFallback); err != nil {

// The initial MS_BIND won't change the mount options, we need to do a
// separate MS_BIND|MS_REMOUNT to apply the mount options. We skip
// doing this if the user has not specified any mount flags at all
// (including cleared flags) -- in which case we just keep the original
// mount flags.
//
// Note that the fact we check whether any clearing flags are set is in
// contrast to mount(8)'s current behaviour, but is what users probably
// expect. See <https://github.com/util-linux/util-linux/issues/2433>.
if m.Flags & ^(unix.MS_BIND|unix.MS_REC|unix.MS_REMOUNT) != 0 || m.ClearedFlags != 0 {
if err := utils.WithProcfd(rootfs, m.Destination, func(dstFD string) error {
flags := m.Flags | unix.MS_BIND | unix.MS_REMOUNT
// The runtime-spec says we SHOULD map to the relevant mount(8)
// behaviour. However, it's not clear whether we want the
// "mount --bind -o ..." or "mount --bind -o remount,..."
// behaviour here -- both of which are somewhat broken[1].
//
// So, if the user has passed "remount" as a mount option, we
// implement the "mount --bind -o remount" behaviour, otherwise
// we implement the spiritual intent of the "mount --bind -o"
// behaviour, which should match what users expect. Maybe
// mount(8) will eventually implement this behaviour too..
//
// [1]: https://github.com/util-linux/util-linux/issues/2433

// Initially, we emulate "mount --bind -o ..." where we set
// only the requested flags (clearing any existing flags). The
// only difference from mount(8) is that we do this
// unconditionally, regardless of whether any set-me mount
// options have been requested.
//
// TODO: We are not doing any special handling of the atime
// flags here, which means that the mount will inherit the old
// atime flags if the user didn't explicitly request a
// different set of flags. This also has the mount(8) bug where
// "nodiratime,norelatime" will result in a
// "nodiratime,relatime" mount.
mountErr := mountViaFDs("", nil, m.Destination, dstFD, "", uintptr(flags), "")
if mountErr == nil {
return nil
}

// If the mount failed, the mount may contain locked mount
// flags. In that case, we emulate "mount --bind -o
// remount,...", where we take the existing mount flags of the
// mount and apply the request flags (including clearing flags)
// on top. The main divergence we have from mount(8) here is
// that we handle atimes correctly to make sure we error out if
// we cannot fulfil the requested mount flags.

var st unix.Statfs_t
if err := unix.Statfs(m.src(), &st); err != nil {
return &os.PathError{Op: "statfs", Path: m.src(), Err: err}
}
srcFlags := statfsToMountFlags(st)
// If the user explicitly request one of the locked flags *not*
// be set, we need to return an error to avoid producing mounts
// that don't match the user's request.
if srcFlags&m.ClearedFlags&mntLockFlags != 0 {
return mountErr
}

// If an MS_*ATIME flag was requested, it must match the
// existing one. This handles two separate kernel bugs, and
// matches the logic of can_change_locked_flags() but without
// these bugs:
//
// * (2.6.30+) Since commit 613cbe3d4870 ("Don't set relatime
// when noatime is specified"), MS_RELATIME is ignored when
// MS_NOATIME is set. This means that us inheriting MS_NOATIME
// from a mount while requesting MS_RELATIME would *silently*
// produce an MS_NOATIME mount.
//
// * (2.6.30+) Since its introduction in commit d0adde574b84
// ("Add a strictatime mount option"), MS_STRICTATIME has
// caused any passed MS_RELATIME and MS_NOATIME flags to be
// ignored which results in us *silently* producing
// MS_STRICTATIME mounts even if the user requested MS_RELATIME
// or MS_NOATIME.
if m.Flags&mntAtimeFlags != 0 && m.Flags&mntAtimeFlags != srcFlags&mntAtimeFlags {
return mountErr
}

// Retry the mount with the existing lockable mount flags
// applied.
flags |= srcFlags & mntLockFlags
mountErr = mountViaFDs("", nil, m.Destination, dstFD, "", uintptr(flags), "")
logrus.Debugf("remount retry: srcFlags=0x%x flagsSet=0x%x flagsClr=0x%x: %v", srcFlags, m.Flags, m.ClearedFlags, mountErr)
return mountErr
}); err != nil {
return err
}
}
Expand Down Expand Up @@ -1103,37 +1232,6 @@ func writeSystemProperty(key, value string) error {
return os.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644)
}

func remount(m mountEntry, rootfs string, noMountFallback bool) error {
return utils.WithProcfd(rootfs, m.Destination, func(dstFD string) error {
flags := uintptr(m.Flags | unix.MS_REMOUNT)
err := mountViaFDs("", nil, m.Destination, dstFD, m.Device, flags, "")
if err == nil {
return nil
}
// Check if the source has flags set according to noMountFallback
src := m.src()
var s unix.Statfs_t
if err := unix.Statfs(src, &s); err != nil {
return &os.PathError{Op: "statfs", Path: src, Err: err}
}
var checkflags int
if noMountFallback {
// Check for ro only
checkflags = unix.MS_RDONLY
} else {
// Check for ro, nodev, noexec, nosuid, noatime, relatime, strictatime,
// nodiratime
checkflags = unix.MS_RDONLY | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NOATIME | unix.MS_RELATIME | unix.MS_STRICTATIME | unix.MS_NODIRATIME
}
if int(s.Flags)&checkflags == 0 {
return err
}
// ... and retry the mount with flags found above.
flags |= uintptr(int(s.Flags) & checkflags)
return mountViaFDs("", nil, m.Destination, dstFD, m.Device, flags, "")
})
}

// Do the mount operation followed by additional mounts required to take care
// of propagation flags. This will always be scoped inside the container rootfs.
func mountPropagate(m mountEntry, rootfs string, mountLabel string) error {
Expand Down
7 changes: 5 additions & 2 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,6 @@ type CreateOpts struct {
Spec *specs.Spec
RootlessEUID bool
RootlessCgroups bool
NoMountFallback bool
}

// getwd is a wrapper similar to os.Getwd, except it always gets
Expand Down Expand Up @@ -359,7 +358,6 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
NoNewKeyring: opts.NoNewKeyring,
RootlessEUID: opts.RootlessEUID,
RootlessCgroups: opts.RootlessCgroups,
NoMountFallback: opts.NoMountFallback,
}

for _, m := range spec.Mounts {
Expand Down Expand Up @@ -977,10 +975,15 @@ func parseMountOptions(options []string) *configs.Mount {
// or the flag is not supported on the platform,
// then it is a data value for a specific fs type.
if f, exists := mountFlags[o]; exists && f.flag != 0 {
// FIXME: The *atime flags are special (they are more of an enum
// with quite hairy semantics) and thus arguably setting some of
// them should clear unrelated flags.
if f.clear {
m.Flags &= ^f.flag
m.ClearedFlags |= f.flag
} else {
m.Flags |= f.flag
m.ClearedFlags &= ^f.flag
}
} else if f, exists := mountPropagationMapping[o]; exists && f != 0 {
m.PropagationFlags = append(m.PropagationFlags, f)
Expand Down
4 changes: 0 additions & 4 deletions restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ using the runc checkpoint command.`,
Value: "",
Usage: "Specify an LSM mount context to be used during restore.",
},
cli.BoolFlag{
Name: "no-mount-fallback",
Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
Expand Down
4 changes: 0 additions & 4 deletions run.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Name: "preserve-fds",
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
},
cli.BoolFlag{
Name: "no-mount-fallback",
Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
Expand Down
Loading

0 comments on commit a68529c

Please sign in to comment.