From 8fe1e8bf8c02f7a5e687252dc85e59dd914aa4f9 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 22 Sep 2021 20:16:08 -0700 Subject: [PATCH 1/6] libct/specconv: rm some init allocations Eliminate some of these allocations when starting runc: > init github.com/opencontainers/runc/libcontainer/specconv @10 ms, 0.11 ms clock, 5408 bytes, 70 allocs Most of this (4K) is the two regexes, which are left intact for now. Signed-off-by: Kir Kolyshkin --- libcontainer/specconv/spec_linux.go | 51 ++++++++++++++++++----------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 44b035a86e6..363c371d8fe 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -9,6 +9,7 @@ import ( "path/filepath" "regexp" "strings" + "sync" "time" systemdDbus "github.com/coreos/go-systemd/v22/dbus" @@ -24,26 +25,36 @@ import ( "golang.org/x/sys/unix" ) -var namespaceMapping = map[specs.LinuxNamespaceType]configs.NamespaceType{ - specs.PIDNamespace: configs.NEWPID, - specs.NetworkNamespace: configs.NEWNET, - specs.MountNamespace: configs.NEWNS, - specs.UserNamespace: configs.NEWUSER, - specs.IPCNamespace: configs.NEWIPC, - specs.UTSNamespace: configs.NEWUTS, - specs.CgroupNamespace: configs.NEWCGROUP, -} +var ( + initMapsOnce sync.Once + namespaceMapping map[specs.LinuxNamespaceType]configs.NamespaceType + mountPropagationMapping map[string]int +) -var mountPropagationMapping = map[string]int{ - "rprivate": unix.MS_PRIVATE | unix.MS_REC, - "private": unix.MS_PRIVATE, - "rslave": unix.MS_SLAVE | unix.MS_REC, - "slave": unix.MS_SLAVE, - "rshared": unix.MS_SHARED | unix.MS_REC, - "shared": unix.MS_SHARED, - "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, - "unbindable": unix.MS_UNBINDABLE, - "": 0, +func initMaps() { + initMapsOnce.Do(func() { + namespaceMapping = map[specs.LinuxNamespaceType]configs.NamespaceType{ + specs.PIDNamespace: configs.NEWPID, + specs.NetworkNamespace: configs.NEWNET, + specs.MountNamespace: configs.NEWNS, + specs.UserNamespace: configs.NEWUSER, + specs.IPCNamespace: configs.NEWIPC, + specs.UTSNamespace: configs.NEWUTS, + specs.CgroupNamespace: configs.NEWCGROUP, + } + + mountPropagationMapping = map[string]int{ + "rprivate": unix.MS_PRIVATE | unix.MS_REC, + "private": unix.MS_PRIVATE, + "rslave": unix.MS_SLAVE | unix.MS_REC, + "slave": unix.MS_SLAVE, + "rshared": unix.MS_SHARED | unix.MS_REC, + "shared": unix.MS_SHARED, + "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, + "unbindable": unix.MS_UNBINDABLE, + "": 0, + } + }) } // AllowedDevices is the set of devices which are automatically included for @@ -256,6 +267,8 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { config.Cgroups = c // set linux-specific config if spec.Linux != nil { + initMaps() + var exists bool if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists { return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation) From 81586e19355b35dacdf7e15dd2cd057c9c9c8e14 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 12 Nov 2021 14:57:30 -0800 Subject: [PATCH 2/6] libct/specconv: reuse mountPropagationMapping in parseMountOptions These two maps are the same, except that mountPropagationMapping has an extra element with key of "" and value of 0. Since the code already checks for f != 0, this extra element is not a problem. Signed-off-by: Kir Kolyshkin --- libcontainer/specconv/spec_linux.go | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 363c371d8fe..d67820a49c4 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -781,6 +781,7 @@ func parseMountOptions(options []string) (int, []int, string, int) { data []string extFlags int ) + initMaps() flags := map[string]struct { clear bool flag int @@ -819,16 +820,6 @@ func parseMountOptions(options []string) (int, []int, string, int) { "suid": {true, unix.MS_NOSUID}, "sync": {false, unix.MS_SYNCHRONOUS}, } - propagationFlags := map[string]int{ - "private": unix.MS_PRIVATE, - "shared": unix.MS_SHARED, - "slave": unix.MS_SLAVE, - "unbindable": unix.MS_UNBINDABLE, - "rprivate": unix.MS_PRIVATE | unix.MS_REC, - "rshared": unix.MS_SHARED | unix.MS_REC, - "rslave": unix.MS_SLAVE | unix.MS_REC, - "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, - } extensionFlags := map[string]struct { clear bool flag int @@ -845,7 +836,7 @@ func parseMountOptions(options []string) (int, []int, string, int) { } else { flag |= f.flag } - } else if f, exists := propagationFlags[o]; exists && f != 0 { + } else if f, exists := mountPropagationMapping[o]; exists && f != 0 { pgflag = append(pgflag, f) } else if f, exists := extensionFlags[o]; exists && f.flag != 0 { if f.clear { From 2c3792baf9e7219af5ca5586af39461d5b0ba93f Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 12 Nov 2021 15:01:40 -0800 Subject: [PATCH 3/6] libct/specconv: make mountFlags and extensionFlags global This makes the repeated calls to parseMountOptions faster, and decreases the amount of garbage to collect. Signed-off-by: Kir Kolyshkin --- libcontainer/specconv/spec_linux.go | 107 +++++++++++++++------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index d67820a49c4..35f3215361c 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -26,9 +26,13 @@ import ( ) var ( - initMapsOnce sync.Once - namespaceMapping map[specs.LinuxNamespaceType]configs.NamespaceType - mountPropagationMapping map[string]int + initMapsOnce sync.Once + namespaceMapping map[specs.LinuxNamespaceType]configs.NamespaceType + mountPropagationMapping map[string]int + mountFlags, extensionFlags map[string]struct { + clear bool + flag int + } ) func initMaps() { @@ -54,6 +58,51 @@ func initMaps() { "unbindable": unix.MS_UNBINDABLE, "": 0, } + + mountFlags = map[string]struct { + clear bool + flag int + }{ + "acl": {false, unix.MS_POSIXACL}, + "async": {true, unix.MS_SYNCHRONOUS}, + "atime": {true, unix.MS_NOATIME}, + "bind": {false, unix.MS_BIND}, + "defaults": {false, 0}, + "dev": {true, unix.MS_NODEV}, + "diratime": {true, unix.MS_NODIRATIME}, + "dirsync": {false, unix.MS_DIRSYNC}, + "exec": {true, unix.MS_NOEXEC}, + "iversion": {false, unix.MS_I_VERSION}, + "lazytime": {false, unix.MS_LAZYTIME}, + "loud": {true, unix.MS_SILENT}, + "mand": {false, unix.MS_MANDLOCK}, + "noacl": {true, unix.MS_POSIXACL}, + "noatime": {false, unix.MS_NOATIME}, + "nodev": {false, unix.MS_NODEV}, + "nodiratime": {false, unix.MS_NODIRATIME}, + "noexec": {false, unix.MS_NOEXEC}, + "noiversion": {true, unix.MS_I_VERSION}, + "nolazytime": {true, unix.MS_LAZYTIME}, + "nomand": {true, unix.MS_MANDLOCK}, + "norelatime": {true, unix.MS_RELATIME}, + "nostrictatime": {true, unix.MS_STRICTATIME}, + "nosuid": {false, unix.MS_NOSUID}, + "rbind": {false, unix.MS_BIND | unix.MS_REC}, + "relatime": {false, unix.MS_RELATIME}, + "remount": {false, unix.MS_REMOUNT}, + "ro": {false, unix.MS_RDONLY}, + "rw": {true, unix.MS_RDONLY}, + "silent": {false, unix.MS_SILENT}, + "strictatime": {false, unix.MS_STRICTATIME}, + "suid": {true, unix.MS_NOSUID}, + "sync": {false, unix.MS_SYNCHRONOUS}, + } + extensionFlags = map[string]struct { + clear bool + flag int + }{ + "tmpcopyup": {false, configs.EXT_COPYUP}, + } }) } @@ -782,55 +831,11 @@ func parseMountOptions(options []string) (int, []int, string, int) { extFlags int ) initMaps() - flags := map[string]struct { - clear bool - flag int - }{ - "acl": {false, unix.MS_POSIXACL}, - "async": {true, unix.MS_SYNCHRONOUS}, - "atime": {true, unix.MS_NOATIME}, - "bind": {false, unix.MS_BIND}, - "defaults": {false, 0}, - "dev": {true, unix.MS_NODEV}, - "diratime": {true, unix.MS_NODIRATIME}, - "dirsync": {false, unix.MS_DIRSYNC}, - "exec": {true, unix.MS_NOEXEC}, - "iversion": {false, unix.MS_I_VERSION}, - "lazytime": {false, unix.MS_LAZYTIME}, - "loud": {true, unix.MS_SILENT}, - "mand": {false, unix.MS_MANDLOCK}, - "noacl": {true, unix.MS_POSIXACL}, - "noatime": {false, unix.MS_NOATIME}, - "nodev": {false, unix.MS_NODEV}, - "nodiratime": {false, unix.MS_NODIRATIME}, - "noexec": {false, unix.MS_NOEXEC}, - "noiversion": {true, unix.MS_I_VERSION}, - "nolazytime": {true, unix.MS_LAZYTIME}, - "nomand": {true, unix.MS_MANDLOCK}, - "norelatime": {true, unix.MS_RELATIME}, - "nostrictatime": {true, unix.MS_STRICTATIME}, - "nosuid": {false, unix.MS_NOSUID}, - "rbind": {false, unix.MS_BIND | unix.MS_REC}, - "relatime": {false, unix.MS_RELATIME}, - "remount": {false, unix.MS_REMOUNT}, - "ro": {false, unix.MS_RDONLY}, - "rw": {true, unix.MS_RDONLY}, - "silent": {false, unix.MS_SILENT}, - "strictatime": {false, unix.MS_STRICTATIME}, - "suid": {true, unix.MS_NOSUID}, - "sync": {false, unix.MS_SYNCHRONOUS}, - } - extensionFlags := map[string]struct { - clear bool - flag int - }{ - "tmpcopyup": {false, configs.EXT_COPYUP}, - } for _, o := range options { - // If the option does not exist in the flags table or the flag - // is not supported on the platform, - // then it is a data value for a specific fs type - if f, exists := flags[o]; exists && f.flag != 0 { + // If the option does not exist in the mountFlags table, + // or the flag is not supported on the platform, + // then it is a data value for a specific fs type. + if f, exists := mountFlags[o]; exists && f.flag != 0 { if f.clear { flag &= ^f.flag } else { From 37c5fd554e58ca586415863e768c40537d3b532a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 12 Nov 2021 15:54:35 -0800 Subject: [PATCH 4/6] libct/specconv: make parseMountOptions return Mount parseMountOption already returns way too many values, making the code kind of hard to read. Since all of the return values are used as is to populate the fields of configs.Mount, let's change it to return (semi-)populated *configs.Mount instead. Signed-off-by: Kir Kolyshkin --- libcontainer/specconv/spec_linux.go | 55 +++++++++++++---------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 35f3215361c..c1c7b94bac1 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -394,27 +394,21 @@ func createLibcontainerMount(cwd string, m specs.Mount) (*configs.Mount, error) // return nil, fmt.Errorf("mount destination %s is not absolute", m.Destination) logrus.Warnf("mount destination %s is not absolute. Support for non-absolute mount destinations will be removed in a future release.", m.Destination) } - flags, pgflags, data, ext := parseMountOptions(m.Options) - source := m.Source - device := m.Type - if flags&unix.MS_BIND != 0 { + mnt := parseMountOptions(m.Options) + + mnt.Destination = m.Destination + mnt.Source = m.Source + mnt.Device = m.Type + if mnt.Flags&unix.MS_BIND != 0 { // Any "type" the user specified is meaningless (and ignored) for // bind-mounts -- so we set it to "bind" because rootfs_linux.go // (incorrectly) relies on this for some checks. - device = "bind" - if !filepath.IsAbs(source) { - source = filepath.Join(cwd, m.Source) - } - } - return &configs.Mount{ - Device: device, - Source: source, - Destination: m.Destination, - Data: data, - Flags: flags, - PropagationFlags: pgflags, - Extensions: ext, - }, nil + mnt.Device = "bind" + if !filepath.IsAbs(mnt.Source) { + mnt.Source = filepath.Join(cwd, m.Source) + } + } + return mnt, nil } // systemd property name check: latin letters only, at least 3 of them @@ -821,14 +815,12 @@ func setupUserNamespace(spec *specs.Spec, config *configs.Config) error { return nil } -// parseMountOptions parses the string and returns the flags, propagation -// flags and any mount data that it contains. -func parseMountOptions(options []string) (int, []int, string, int) { +// parseMountOptions parses options and returns a configs.Mount +// structure with fields that depends on options set accordingly. +func parseMountOptions(options []string) *configs.Mount { var ( - flag int - pgflag []int - data []string - extFlags int + data []string + m configs.Mount ) initMaps() for _, o := range options { @@ -837,23 +829,24 @@ func parseMountOptions(options []string) (int, []int, string, int) { // then it is a data value for a specific fs type. if f, exists := mountFlags[o]; exists && f.flag != 0 { if f.clear { - flag &= ^f.flag + m.Flags &= ^f.flag } else { - flag |= f.flag + m.Flags |= f.flag } } else if f, exists := mountPropagationMapping[o]; exists && f != 0 { - pgflag = append(pgflag, f) + m.PropagationFlags = append(m.PropagationFlags, f) } else if f, exists := extensionFlags[o]; exists && f.flag != 0 { if f.clear { - extFlags &= ^f.flag + m.Extensions &= ^f.flag } else { - extFlags |= f.flag + m.Extensions |= f.flag } } else { data = append(data, o) } } - return flag, pgflag, strings.Join(data, ","), extFlags + m.Data = strings.Join(data, ",") + return &m } func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) { From 6907becaf9e124958225050eb8cf8cf32f0050e1 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 12 Nov 2021 16:03:17 -0800 Subject: [PATCH 5/6] libct/specconv: remove isSecSuffix regex Commit 1cd71dfd7 added isSecSuffix, but the same thing can be done easily without a regex. This is faster and saves some init time and memory. Signed-off-by: Kir Kolyshkin --- libcontainer/specconv/spec_linux.go | 17 ++++++++++------- libcontainer/specconv/spec_linux_test.go | 5 +++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index c1c7b94bac1..77ceef5d303 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -414,8 +414,6 @@ func createLibcontainerMount(cwd string, m specs.Mount) (*configs.Mount, error) // systemd property name check: latin letters only, at least 3 of them var isValidName = regexp.MustCompile(`^[a-zA-Z]{3,}$`).MatchString -var isSecSuffix = regexp.MustCompile(`[a-z]Sec$`).MatchString - // Some systemd properties are documented as having "Sec" suffix // (e.g. TimeoutStopSec) but are expected to have "USec" suffix // here, so let's provide conversion to improve compatibility. @@ -462,11 +460,16 @@ func initSystemdProps(spec *specs.Spec) ([]systemdDbus.Property, error) { if err != nil { return nil, fmt.Errorf("Annotation %s=%s value parse error: %w", k, v, err) } - if isSecSuffix(name) { - name = strings.TrimSuffix(name, "Sec") + "USec" - value, err = convertSecToUSec(value) - if err != nil { - return nil, fmt.Errorf("Annotation %s=%s value parse error: %w", k, v, err) + // Check for Sec suffix. + if trimName := strings.TrimSuffix(name, "Sec"); len(trimName) < len(name) { + // Check for a lowercase ascii a-z just before Sec. + if ch := trimName[len(trimName)-1]; ch >= 'a' && ch <= 'z' { + // Convert from Sec to USec. + name = trimName + "USec" + value, err = convertSecToUSec(value) + if err != nil { + return nil, fmt.Errorf("Annotation %s=%s value parse error: %w", k, v, err) + } } } sp = append(sp, systemdDbus.Property{Name: name, Value: value}) diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index 567f55b5329..96f31e8a06c 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -697,6 +697,11 @@ func TestInitSystemdProps(t *testing.T) { in: inT{"org.systemd.property.TimeoutStopSec", "'covfefe'"}, exp: expT{true, "", ""}, }, + { + desc: "convert USec to Sec (bad variable name, no conversion)", + in: inT{"org.systemd.property.FOOSec", "123"}, + exp: expT{false, "FOOSec", 123}, + }, { in: inT{"org.systemd.property.CollectMode", "'inactive-or-failed'"}, exp: expT{false, "CollectMode", "inactive-or-failed"}, From 029b73c1b0bc5abf89ec8495fca7d175499a6dbe Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 12 Nov 2021 17:03:28 -0800 Subject: [PATCH 6/6] libct/spec: replace isValidName regex with a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also, add a simple test and a benchmark (just out of sheer curiosity). Benchmark results: name old time/op new time/op delta IsValidName-4 540ns ± 3% 45ns ± 1% -91.76% (p=0.008 n=5+5) Signed-off-by: Kir Kolyshkin --- libcontainer/specconv/spec_linux.go | 18 ++++++++++--- libcontainer/specconv/spec_linux_test.go | 33 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 77ceef5d303..3676654ac1c 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -7,7 +7,6 @@ import ( "fmt" "os" "path/filepath" - "regexp" "strings" "sync" "time" @@ -411,8 +410,21 @@ func createLibcontainerMount(cwd string, m specs.Mount) (*configs.Mount, error) return mnt, nil } -// systemd property name check: latin letters only, at least 3 of them -var isValidName = regexp.MustCompile(`^[a-zA-Z]{3,}$`).MatchString +// isValidName checks if systemd property name is valid. It should consists +// of latin letters only, and have least 3 of them. +func isValidName(s string) bool { + if len(s) < 3 { + return false + } + // Check ASCII characters rather than Unicode runes. + for _, ch := range s { + if (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') { + continue + } + return false + } + return true +} // Some systemd properties are documented as having "Sec" suffix // (e.g. TimeoutStopSec) but are expected to have "USec" suffix diff --git a/libcontainer/specconv/spec_linux_test.go b/libcontainer/specconv/spec_linux_test.go index 96f31e8a06c..ffcac128eef 100644 --- a/libcontainer/specconv/spec_linux_test.go +++ b/libcontainer/specconv/spec_linux_test.go @@ -762,6 +762,39 @@ func TestInitSystemdProps(t *testing.T) { } } +func TestIsValidName(t *testing.T) { + testCases := []struct { + in string + valid bool + }{ + {"", false}, // too short + {"xx", false}, // too short + {"xxx", true}, + {"someValidName", true}, + {"A name", false}, // space + {"3335", false}, // numbers + {"Name1", false}, // numbers + {"Кир", false}, // non-ascii + {"მადლობა", false}, // non-ascii + {"合い言葉", false}, // non-ascii + } + + for _, tc := range testCases { + valid := isValidName(tc.in) + if valid != tc.valid { + t.Errorf("case %q: expected %v, got %v", tc.in, tc.valid, valid) + } + } +} + +func BenchmarkIsValidName(b *testing.B) { + for i := 0; i < b.N; i++ { + for _, s := range []string{"", "xx", "xxx", "someValidName", "A name", "Кир", "მადლობა", "合い言葉"} { + _ = isValidName(s) + } + } +} + func TestNullProcess(t *testing.T) { spec := Example() spec.Process = nil