Merge pull request #3697 from AkihiroSuda/fix-3098

rootless: support Bottlerocket OS
This commit is contained in:
Tõnis Tiigi
2023-03-10 18:36:59 -08:00
committed by GitHub
8 changed files with 204 additions and 0 deletions

8
cache/refs.go vendored
View File

@@ -14,6 +14,7 @@ import (
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/leases"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/userns"
"github.com/containerd/containerd/snapshots"
"github.com/docker/docker/pkg/idtools"
"github.com/hashicorp/go-multierror"
@@ -27,6 +28,7 @@ import (
"github.com/moby/buildkit/util/flightcontrol"
"github.com/moby/buildkit/util/leaseutil"
"github.com/moby/buildkit/util/progress"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
"github.com/moby/buildkit/util/winlayers"
"github.com/moby/sys/mountinfo"
digest "github.com/opencontainers/go-digest"
@@ -1640,6 +1642,12 @@ func (sm *sharableMountable) Mount() (_ []mount.Mount, _ func() error, retErr er
os.Remove(dir)
}
}()
if userns.RunningInUserNS() {
mounts, err = rootlessmountopts.FixUp(mounts)
if err != nil {
return nil, nil, err
}
}
if err := mount.All(mounts, dir); err != nil {
return nil, nil, err
}

View File

@@ -24,6 +24,12 @@ spec:
See also the [example manifests](#Kubernetes).
### Bottlerocket OS
Needs to run `sysctl -w user.max_user_namespaces=N` (N=positive integer, like 63359) on the host nodes.
See [`../examples/kubernetes/sysctl-userns.privileged.yaml`](../examples/kubernetes/sysctl-userns.privileged.yaml).
<details>
<summary>Old distributions</summary>
@@ -104,6 +110,11 @@ See https://rootlesscontaine.rs/getting-started/common/subuid/
### Error `Options:[rbind ro]}]: operation not permitted`
Make sure to mount an `emptyDir` volume on `/home/user/.local/share/buildkit` .
### Error `fork/exec /proc/self/exe: no space left on device` with `level=warning msg="/proc/sys/user/max_user_namespaces needs to be set to non-zero."`
Run `sysctl -w user.max_user_namespaces=N` (N=positive integer, like 63359) on the host nodes.
See [`../examples/kubernetes/sysctl-userns.privileged.yaml`](../examples/kubernetes/sysctl-userns.privileged.yaml).
## Containerized deployment
### Kubernetes

View File

@@ -0,0 +1,26 @@
# Run `sysctl -w user.max_user_namespaces=63359` on all the nodes,
# for errors like "/proc/sys/user/max_user_namespaces needs to be set to non-zero"
# on running rootless buildkitd pods.
#
# This workaround is known to be needed on Bottlerocket OS.
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: sysctl-userns
name: sysctl-userns
spec:
selector:
matchLabels:
app: sysctl-userns
template:
metadata:
labels:
app: sysctl-userns
spec:
containers:
- name: sysctl-userns
image: busybox
command: ["sh", "-euxc", "sysctl -w user.max_user_namespaces=63359 && sleep infinity"]
securityContext:
privileged: true

View File

@@ -11,12 +11,14 @@ import (
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/pkg/userns"
"github.com/containerd/continuity/fs"
"github.com/docker/docker/pkg/idtools"
"github.com/mitchellh/hashstructure/v2"
"github.com/moby/buildkit/executor"
"github.com/moby/buildkit/snapshot"
"github.com/moby/buildkit/util/network"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
traceexec "github.com/moby/buildkit/util/tracing/exec"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
@@ -193,6 +195,14 @@ func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mou
}
s.Mounts = dedupMounts(s.Mounts)
if userns.RunningInUserNS() {
s.Mounts, err = rootlessmountopts.FixUpOCI(s.Mounts)
if err != nil {
return nil, nil, err
}
}
return s, releaseAll, nil
}

View File

@@ -8,6 +8,8 @@ import (
"syscall"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/pkg/userns"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
"github.com/pkg/errors"
)
@@ -24,6 +26,14 @@ func (lm *localMounter) Mount() (string, error) {
lm.release = release
}
if userns.RunningInUserNS() {
var err error
lm.mounts, err = rootlessmountopts.FixUp(lm.mounts)
if err != nil {
return "", err
}
}
if len(lm.mounts) == 1 && (lm.mounts[0].Type == "bind" || lm.mounts[0].Type == "rbind") {
ro := false
for _, opt := range lm.mounts[0].Options {

View File

@@ -0,0 +1,88 @@
package mountopts
import (
"github.com/containerd/containerd/mount"
"github.com/moby/buildkit/util/strutil"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given
// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
// bind-mounting "with options" will not fail with user namespaces, due to
// kernel restrictions that require user namespace mounts to preserve
// CL_UNPRIVILEGED locked flags.
//
// From https://github.com/moby/moby/blob/v23.0.1/daemon/oci_linux.go#L430-L460
func UnprivilegedMountFlags(path string) ([]string, error) {
var statfs unix.Statfs_t
if err := unix.Statfs(path, &statfs); err != nil {
return nil, err
}
// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
unprivilegedFlags := map[uint64]string{
unix.MS_RDONLY: "ro",
unix.MS_NODEV: "nodev",
unix.MS_NOEXEC: "noexec",
unix.MS_NOSUID: "nosuid",
unix.MS_NOATIME: "noatime",
unix.MS_RELATIME: "relatime",
unix.MS_NODIRATIME: "nodiratime",
}
var flags []string
for mask, flag := range unprivilegedFlags {
if uint64(statfs.Flags)&mask == mask {
flags = append(flags, flag)
}
}
return flags, nil
}
// FixUp is for https://github.com/moby/buildkit/issues/3098
func FixUp(mounts []mount.Mount) ([]mount.Mount, error) {
for i, m := range mounts {
var isBind bool
for _, o := range m.Options {
switch o {
case "bind", "rbind":
isBind = true
}
}
if !isBind {
continue
}
unpriv, err := UnprivilegedMountFlags(m.Source)
if err != nil {
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
}
m.Options = strutil.DedupeSlice(append(m.Options, unpriv...))
mounts[i] = m
}
return mounts, nil
}
func FixUpOCI(mounts []specs.Mount) ([]specs.Mount, error) {
for i, m := range mounts {
var isBind bool
for _, o := range m.Options {
switch o {
case "bind", "rbind":
isBind = true
}
}
if !isBind {
continue
}
unpriv, err := UnprivilegedMountFlags(m.Source)
if err != nil {
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
}
m.Options = strutil.DedupeSlice(append(m.Options, unpriv...))
mounts[i] = m
}
return mounts, nil
}

View File

@@ -0,0 +1,21 @@
//go:build !linux
// +build !linux
package mountopts
import (
"github.com/containerd/containerd/mount"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func UnprivilegedMountFlags(path string) ([]string, error) {
return []string{}, nil
}
func FixUp(mounts []mount.Mount) ([]mount.Mount, error) {
return mounts, nil
}
func FixUpOCI(mounts []specs.Mount) ([]specs.Mount, error) {
return mounts, nil
}

30
util/strutil/strutil.go Normal file
View File

@@ -0,0 +1,30 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strutil
// DedupeSlice is from https://github.com/containerd/nerdctl/blob/v1.2.1/pkg/strutil/strutil.go#L72-L82
func DedupeSlice(in []string) []string {
m := make(map[string]struct{})
var res []string
for _, s := range in {
if _, ok := m[s]; !ok {
res = append(res, s)
m[s] = struct{}{}
}
}
return res
}