Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libcontainer/mount_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func (e *mountError) Unwrap() error {

// mount is a simple unix.Mount wrapper, returning an error with more context
// in case it failed.
func mount(source, target, fstype string, flags uintptr, data string) error {
func mount(source, target, fstype string, flags uintptr, data string) error { //nolint:unparam // mount(2) wrapper
return mountViaFds(source, nil, target, "", fstype, flags, data)
}

Expand Down
97 changes: 91 additions & 6 deletions libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ func mountCgroupV2(m mountEntry, c *mountConfig) error {
// Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted
// with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`).
err = utils.WithProcfdFile(m.dstFile, func(procfd string) error {
return maskPaths([]string{procfd}, c.label)
return maskPaths(c.root, []string{procfd}, c.label)
})
}
return err
Expand Down Expand Up @@ -1328,12 +1328,64 @@ func verifyDevNull(f *os.File) error {
})
}

// mountFunc matches mountViaFds; tests replace it with a recorder.
type mountFunc func(source string, srcFile *mountSource, target, dstFd, fstype string, flags uintptr, data string) error

// isProcFdPath reports whether path is one of the procfs fd aliases runc uses
// for mount targets. These aliases should not be kept as reusable sources.
//
// The accepted forms are (where <pid> and <tid> are decimal integers):
//
// /proc/thread-self/fd/...
// /proc/self/fd/...
// /proc/<pid>/fd/...
// /proc/self/task/<tid>/fd/...
// /proc/<pid>/task/<tid>/fd/...
func isProcFdPath(path string) bool {
Comment thread
dims marked this conversation as resolved.
parts := strings.Split(strings.Trim(pathrs.LexicallyCleanPath(path), "/"), "/")
if len(parts) < 3 || parts[0] != "proc" {
return false
}
if parts[1] == "thread-self" {
return parts[2] == "fd"
}
if parts[2] == "fd" && (parts[1] == "self" || isProcPidOrTid(parts[1])) {
return true
}
if len(parts) < 5 || parts[2] != "task" || parts[4] != "fd" {
return false
}
return isProcPidOrTid(parts[3]) && (parts[1] == "self" || isProcPidOrTid(parts[1]))
}

func isProcPidOrTid(value string) bool {
_, err := strconv.ParseUint(value, 10, 64)
return err == nil
}

func maskPathsAfterChroot(paths []string, mountLabel string) error {
if len(paths) == 0 {
return nil
}
rootFd, err := os.OpenFile("/", unix.O_DIRECTORY|unix.O_CLOEXEC|unix.O_PATH, 0)
if err != nil {
return fmt.Errorf("open rootfs handle for masked paths: %w", err)
}
defer rootFd.Close()
return maskPaths(rootFd, paths, mountLabel)
}

func maskPaths(rootFd *os.File, paths []string, mountLabel string) error {
return maskPathsWithMount(rootFd, paths, mountLabel, mountViaFds)
}

// maskPaths masks the top of the specified paths inside a container to avoid
// security issues from processes reading information from non-namespace aware
// mounts ( proc/kcore ).
// For files, maskPath bind mounts /dev/null over the top of the specified path.
// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
func maskPaths(paths []string, mountLabel string) error {
// For directories, maskPath uses a shared read-only tmpfs: the first directory
// gets a tmpfs mount, and later directories bind-mount the same tmpfs.
func maskPathsWithMount(rootFd *os.File, paths []string, mountLabel string, mountFn mountFunc) error {
devNull, err := os.OpenFile("/dev/null", unix.O_PATH, 0)
if err != nil {
return fmt.Errorf("can't mask paths: %w", err)
Expand All @@ -1346,6 +1398,15 @@ func maskPaths(paths []string, mountLabel string) error {
procSelfFd, closer := utils.ProcThreadSelf("fd/")
defer closer()

var sharedDirMask *os.File
defer func() {
if sharedDirMask != nil {
_ = sharedDirMask.Close()
}
}()
var sharedDirMaskSrc *mountSource
maskedDirs := make(map[string]struct{})

for _, path := range paths {
// Open the target path; skip if it doesn't exist.
dstFh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
Expand All @@ -1362,14 +1423,38 @@ func maskPaths(paths []string, mountLabel string) error {
}
var dstType string
if st.IsDir() {
// Destination is a directory: bind mount a ro tmpfs over it.
// Directory targets use a read-only tmpfs.
dstType = "dir"
err = mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel))
cleanPath := pathrs.LexicallyCleanPath(path)
if _, ok := maskedDirs[cleanPath]; ok {
dstFh.Close()
continue
}
maskedDirs[cleanPath] = struct{}{}

dstFd := filepath.Join(procSelfFd, strconv.Itoa(int(dstFh.Fd())))
if sharedDirMaskSrc == nil {
err = mountFn("tmpfs", nil, path, dstFd, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel))
// Procfs fd aliases are not durable rootfs paths; let the
// next ordinary directory become the reusable source.
if err == nil && !isProcFdPath(path) {
sharedDirMask, err = reopenAfterMount(rootFd, dstFh, unix.O_PATH|unix.O_CLOEXEC)
if err != nil {
err = fmt.Errorf("can't reopen shared directory mask: %w", err)
}
if err == nil {
sharedDirMaskSrc = &mountSource{Type: mountSourcePlain, file: sharedDirMask}
}
}
} else {
// clone_mnt copies MNT_READONLY from the source vfsmount.
err = mountFn("", sharedDirMaskSrc, path, dstFd, "", unix.MS_BIND, "")
}
} else {
// Destination is a file: mount it to /dev/null.
dstType = "path"
dstFd := filepath.Join(procSelfFd, strconv.Itoa(int(dstFh.Fd())))
err = mountViaFds("", devNullSrc, path, dstFd, "", unix.MS_BIND, "")
err = mountFn("", devNullSrc, path, dstFd, "", unix.MS_BIND, "")
}
dstFh.Close()
if err != nil {
Expand Down
152 changes: 152 additions & 0 deletions libcontainer/rootfs_linux_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package libcontainer

import (
"os"
"path/filepath"
"testing"

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"

"golang.org/x/sys/unix"
)
Expand Down Expand Up @@ -208,3 +211,152 @@ func TestNeedsSetupDevStrangeSourceDest(t *testing.T) {
t.Fatal("expected needsSetupDev to be true, got false")
}
}

type recordedMount struct {
source string
srcFileName string
srcFileType mountSourceType
target string
dstFd string
fstype string
flags uintptr
data string
}

func recordMounts(calls *[]recordedMount) mountFunc {
return func(source string, srcFile *mountSource, target, dstFd, fstype string, flags uintptr, data string) error {
call := recordedMount{
source: source,
target: target,
dstFd: dstFd,
fstype: fstype,
flags: flags,
data: data,
}
if srcFile != nil {
call.srcFileName = srcFile.file.Name()
call.srcFileType = srcFile.Type
}
*calls = append(*calls, call)
return nil
}
}

func TestMaskPathsWithSharedDirMask(t *testing.T) {
root := t.TempDir()
dir1 := filepath.Join(root, "dir1")
dir2 := filepath.Join(root, "dir2")
file := filepath.Join(root, "file")
missing := filepath.Join(root, "missing")
rootFd, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC|unix.O_PATH, 0)
if err != nil {
t.Fatal(err)
}
defer rootFd.Close()
for _, dir := range []string{dir1, dir2} {
if err := os.Mkdir(dir, 0o755); err != nil {
t.Fatal(err)
}
}
if err := os.WriteFile(file, []byte("secret"), 0o644); err != nil {
t.Fatal(err)
}

var calls []recordedMount
mountLabel := "system_u:object_r:container_file_t:s0"
if err := maskPathsWithMount(rootFd, []string{
missing,
dir1,
dir2,
filepath.Join(dir1, "."),
file,
}, mountLabel, recordMounts(&calls)); err != nil {
t.Fatal(err)
}

if len(calls) != 3 {
t.Fatalf("expected 3 mount calls, got %d: %#v", len(calls), calls)
}

if call := calls[0]; call.source != "tmpfs" || call.fstype != "tmpfs" || call.flags != unix.MS_RDONLY ||
call.target != dir1 || call.dstFd == "" || call.data != `context="system_u:object_r:container_file_t:s0"` {
t.Fatalf("unexpected shared tmpfs mount call: %#v", call)
}
if call := calls[1]; call.srcFileType != mountSourcePlain ||
call.target != dir2 || call.dstFd == "" || call.fstype != "" || call.flags != unix.MS_BIND || call.data != "" {
t.Fatalf("unexpected shared tmpfs bind mount call: %#v", call)
}
if call := calls[2]; call.srcFileName != "/dev/null" || call.srcFileType != mountSourcePlain ||
call.target != file || call.dstFd == "" || call.fstype != "" || call.flags != unix.MS_BIND || call.data != "" {
t.Fatalf("unexpected file mask mount call: %#v", call)
}
}

func TestIsProcFdPath(t *testing.T) {
for _, path := range []string{
"/proc/thread-self/fd/7",
"/proc/self/fd/7",
"/proc/self/task/123/fd/7",
"/proc/self/task/123/fd/../fd/7",
"/proc/123/fd/7",
"/proc/123/task/456/fd/7",
} {
if !isProcFdPath(path) {
t.Errorf("expected %q to be a procfd path", path)
}
}
for _, path := range []string{
"/proc/acpi",
"/proc/self/fdinfo/7",
"/proc/self/task/123/fdinfo/7",
"/proc/self/task/foo/fd/7",
"/proc/foo/fd/7",
"/proc/123/task/foo/fd/7",
"/sys/devices/system/cpu/cpu0/thermal_throttle",
} {
if isProcFdPath(path) {
t.Errorf("expected %q not to be a procfd path", path)
}
}
}

func TestMaskPathsDoesNotReuseProcFdMaskAsSharedSource(t *testing.T) {
root := t.TempDir()
dir1 := filepath.Join(root, "dir1")
dir2 := filepath.Join(root, "dir2")
dir3 := filepath.Join(root, "dir3")
rootFd, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC|unix.O_PATH, 0)
if err != nil {
t.Fatal(err)
}
defer rootFd.Close()
for _, dir := range []string{dir1, dir2, dir3} {
if err := os.Mkdir(dir, 0o755); err != nil {
t.Fatal(err)
}
}
dir1File, err := os.OpenFile(dir1, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
t.Fatal(err)
}
defer dir1File.Close()
procFd, closer := utils.ProcThreadSelfFd(dir1File.Fd())
defer closer()

var calls []recordedMount
if err := maskPathsWithMount(rootFd, []string{procFd, dir2, dir3}, "", recordMounts(&calls)); err != nil {
t.Fatal(err)
}
if len(calls) != 3 {
t.Fatalf("expected 3 mount calls, got %d: %#v", len(calls), calls)
}
for _, call := range calls[:2] {
if call.fstype != "tmpfs" || call.flags != unix.MS_RDONLY {
t.Fatalf("expected procfd source to force separate tmpfs mounts, got %#v", calls)
}
}
if call := calls[2]; call.srcFileType != mountSourcePlain ||
call.target != dir3 || call.fstype != "" || call.flags != unix.MS_BIND {
t.Fatalf("expected third directory to bind mount from second directory, got %#v", call)
}
}
2 changes: 1 addition & 1 deletion libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func (l *linuxStandardInit) Init() error {
}
}

if err := maskPaths(l.config.Config.MaskPaths, l.config.Config.MountLabel); err != nil {
if err := maskPathsAfterChroot(l.config.Config.MaskPaths, l.config.Config.MountLabel); err != nil {
return err
}
pdeath, err := system.GetParentDeathSignal()
Expand Down
32 changes: 30 additions & 2 deletions tests/integration/mask.bats
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ function setup() {
setup_busybox

# Create fake rootfs.
mkdir rootfs/testdir
mkdir rootfs/testdir rootfs/testdir2 rootfs/testdir3
echo "Forbidden information!" >rootfs/testfile

# add extra masked paths
update_config '(.. | select(.maskedPaths? != null)) .maskedPaths += ["/testdir", "/testfile"]'
update_config '(.. | select(.maskedPaths? != null)) .maskedPaths += ["/testdir", "/testdir2", "/testdir3", "/testfile"]'
}

function teardown() {
Expand Down Expand Up @@ -55,6 +55,34 @@ function teardown() {
[[ "${output}" == *"Operation not permitted"* ]]
}

@test "mask paths [directories share tmpfs]" {
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

# shellcheck disable=SC2016
runc exec test_busybox sh -euc '
set -- $(stat -c %d /testdir /testdir2 /testdir3)
[ "$1" = "$2" ]
[ "$2" = "$3" ]
'
[ "$status" -eq 0 ]

runc exec test_busybox touch /testdir2/foo
[ "$status" -eq 1 ]
[[ "${output}" == *"Read-only file system"* ]]
}

@test "mask paths [directory with read-only rootfs]" {
update_config '.root.readonly = true'

runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

runc exec test_busybox ls /testdir
[ "$status" -eq 0 ]
[ -z "$output" ]
}

@test "mask paths [prohibit symlink /proc]" {
ln -s /symlink rootfs/proc
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
Expand Down
Loading