Files
buildkit/executor/oci/spec.go
Sebastiaan van Stijn 50b548a135 vendor: migrate to github.com/gohugoio/hashstructure
The github.com/mitchellh/hashstructure/v2 module was archived, and
there's a maintained fork in the gohugoio org.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2026-06-17 10:02:52 +02:00

369 lines
9.0 KiB
Go

package oci
import (
"context"
"os"
"path"
"path/filepath"
"runtime"
"strings"
"sync"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/pkg/namespaces"
"github.com/containerd/containerd/v2/pkg/oci"
"github.com/gohugoio/hashstructure"
"github.com/moby/buildkit/executor"
"github.com/moby/buildkit/snapshot"
"github.com/moby/buildkit/solver/llbsolver/cdidevices"
"github.com/moby/buildkit/util/network"
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
"github.com/moby/buildkit/util/system"
"github.com/moby/buildkit/util/tracing/childprocess"
"github.com/moby/sys/user"
"github.com/moby/sys/userns"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux"
"github.com/pkg/errors"
)
// ProcessMode configures PID namespaces
type ProcessMode int
const (
// ProcessSandbox unshares pidns and mount procfs.
ProcessSandbox ProcessMode = iota
// NoProcessSandbox uses host pidns and bind-mount procfs.
// Note that NoProcessSandbox allows build containers to kill (and potentially ptrace) an arbitrary process in the BuildKit host namespace.
// NoProcessSandbox should be enabled only when the BuildKit is running in a container as an unprivileged user.
NoProcessSandbox
)
var tracingEnvVars = []string{
"OTEL_TRACES_EXPORTER=otlp",
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=" + getTracingSocket(),
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=grpc",
}
func (pm ProcessMode) String() string {
switch pm {
case ProcessSandbox:
return "sandbox"
case NoProcessSandbox:
return "no-sandbox"
default:
return ""
}
}
// Ideally we don't have to import whole containerd just for the default spec
// GenerateSpec generates spec using containerd functionality.
// opts are ignored for s.Process, s.Hostname, and s.Mounts .
func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mount, id, resolvConf, hostsFile string, namespace network.Namespace, cgroupParent string, processMode ProcessMode, idmap *user.IdentityMapping, apparmorProfile string, selinuxB bool, tracingSocket string, cdiManager *cdidevices.Manager, opts ...oci.SpecOpts) (*specs.Spec, func(), error) {
c := &containers.Container{
ID: id,
}
if len(meta.CgroupParent) > 0 {
cgroupParent = meta.CgroupParent
}
if cgroupParent != "" {
var cgroupsPath string
lastSeparator := cgroupParent[len(cgroupParent)-1:]
if strings.Contains(cgroupParent, ".slice") && lastSeparator == ":" {
cgroupsPath = cgroupParent + id
} else {
cgroupsPath = filepath.Join("/", cgroupParent, "buildkit", id)
}
opts = append(opts, oci.WithCgroup(cgroupsPath))
}
// containerd/oci.GenerateSpec requires a namespace, which
// will be used to namespace specs.Linux.CgroupsPath if generated
if _, ok := namespaces.Namespace(ctx); !ok {
ctx = namespaces.WithNamespace(ctx, "buildkit")
}
opts = append(opts, generateMountOpts(resolvConf, hostsFile)...)
if securityOpts, err := generateSecurityOpts(meta.SecurityMode, apparmorProfile, selinuxB); err == nil {
opts = append(opts, securityOpts...)
} else {
return nil, nil, err
}
if processModeOpts, err := generateProcessModeOpts(processMode); err == nil {
opts = append(opts, processModeOpts...)
} else {
return nil, nil, err
}
if idmapOpts, err := generateIDmapOpts(idmap); err == nil {
opts = append(opts, idmapOpts...)
} else {
return nil, nil, err
}
if rlimitsOpts, err := generateRlimitOpts(meta.Ulimit); err == nil {
opts = append(opts, rlimitsOpts...)
} else {
return nil, nil, err
}
if linuxResOpts, err := generateLinuxResourceOpts(meta.LinuxResources); err == nil {
opts = append(opts, linuxResOpts...)
} else {
return nil, nil, err
}
hostname := defaultHostname
if meta.Hostname != "" {
hostname = meta.Hostname
}
if tracingSocket != "" {
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md
meta.Env = append(meta.Env, tracingEnvVars...)
meta.Env = append(meta.Env, childprocess.Environ(ctx)...)
}
opts = append(opts,
withProcessArgs(meta.Args...),
oci.WithEnv(meta.Env),
oci.WithProcessCwd(meta.Cwd),
oci.WithNewPrivileges,
oci.WithHostname(hostname),
)
if cdiManager != nil {
if cdiOpts, err := generateCDIOpts(cdiManager, meta.CDIDevices); err == nil {
opts = append(opts, cdiOpts...)
} else {
return nil, nil, err
}
}
s, err := oci.GenerateSpec(ctx, nil, c, opts...)
if err != nil {
return nil, nil, errors.WithStack(err)
}
if cgroupV2NamespaceSupported() {
s.Linux.Namespaces = append(s.Linux.Namespaces, specs.LinuxNamespace{
Type: specs.CgroupNamespace,
})
}
if len(meta.Ulimit) == 0 {
// reset open files limit
s.Process.Rlimits = nil
}
// set the networking information on the spec
if err := namespace.Set(s); err != nil {
return nil, nil, errors.WithStack(err)
}
sm := &submounts{}
var releasers []func() error
releaseAll := func() {
sm.cleanup()
for _, f := range releasers {
f()
}
if s.Process.SelinuxLabel != "" {
selinux.ReleaseLabel(s.Process.SelinuxLabel)
}
}
for _, m := range mounts {
if m.Src == nil {
return nil, nil, errors.Errorf("mount %s has no source", m.Dest)
}
mountable, err := m.Src.Mount(ctx, m.Readonly)
if err != nil {
releaseAll()
return nil, nil, errors.Wrapf(err, "failed to mount %s", m.Dest)
}
mounts, release, err := mountable.Mount()
if err != nil {
releaseAll()
return nil, nil, errors.WithStack(err)
}
releasers = append(releasers, release)
for _, mount := range mounts {
mount, release, err := compactLongOverlayMount(mount, m.Readonly)
if err != nil {
releaseAll()
return nil, nil, err
}
if release != nil {
releasers = append(releasers, release)
}
mount, err = sm.subMount(mount, m.Selector)
if err != nil {
releaseAll()
var os *os.PathError
if errors.As(err, &os) {
if strings.HasSuffix(os.Path, m.Selector) {
os.Path = m.Selector
}
}
return nil, nil, err
}
s.Mounts = append(s.Mounts, specs.Mount{
Destination: system.GetAbsolutePath(m.Dest),
Type: normalizeMountType(mount.Type),
Source: mount.Source,
Options: mount.Options,
})
}
}
if tracingSocket != "" {
// moby/buildkit#4764
if _, err := os.Stat(tracingSocket); err == nil {
if mount := getTracingSocketMount(tracingSocket); mount != nil {
s.Mounts = append(s.Mounts, *mount)
}
}
}
s.Mounts = dedupMounts(s.Mounts)
if userns.RunningInUserNS() {
s.Mounts, err = rootlessmountopts.FixUpOCI(s.Mounts)
if err != nil {
releaseAll()
return nil, nil, err
}
}
return s, releaseAll, nil
}
type mountRef struct {
mount mount.Mount
unmount func() error
subRefs map[string]mountRef
}
type submounts struct {
m map[uint64]mountRef
}
func (s *submounts) subMount(m mount.Mount, subPath string) (mount.Mount, error) {
// for Windows, always go through the sub-mounting process
if path.Join("/", subPath) == "/" && runtime.GOOS != "windows" {
return m, nil
}
if s.m == nil {
s.m = map[uint64]mountRef{}
}
h, err := hashstructure.Hash(m, nil)
if err != nil {
return mount.Mount{}, errors.WithStack(err)
}
if mr, ok := s.m[h]; ok {
if sm, ok := mr.subRefs[subPath]; ok {
return sm.mount, nil
}
sm, unmount, err := sub(mr.mount, subPath)
if err != nil {
return mount.Mount{}, err
}
mr.subRefs[subPath] = mountRef{
mount: sm,
unmount: unmount,
}
return sm, nil
}
lm := snapshot.LocalMounterWithMounts([]mount.Mount{m})
mp, err := lm.Mount()
if err != nil {
return mount.Mount{}, err
}
s.m[h] = mountRef{
mount: bind(mp, m.ReadOnly()),
unmount: lm.Unmount,
subRefs: map[string]mountRef{},
}
sm, unmount, err := sub(s.m[h].mount, subPath)
if err != nil {
return mount.Mount{}, err
}
s.m[h].subRefs[subPath] = mountRef{
mount: sm,
unmount: unmount,
}
return sm, nil
}
func (s *submounts) cleanup() {
var wg sync.WaitGroup
wg.Add(len(s.m))
for _, m := range s.m {
func(m mountRef) {
go func() {
for _, sm := range m.subRefs {
sm.unmount()
}
m.unmount()
wg.Done()
}()
}(m)
}
wg.Wait()
}
func bind(p string, ro bool) mount.Mount {
m := mount.Mount{
Source: p,
}
if runtime.GOOS != "windows" {
// Windows uses a mechanism similar to bind mounts, but will err out if we request
// a mount type it does not understand. Leaving the mount type empty on Windows will
// yield the same result.
m.Type = "bind"
m.Options = []string{"rbind"}
}
if ro {
m.Options = append(m.Options, "ro")
}
return m
}
func compactLongOverlayMount(m mount.Mount, ro bool) (mount.Mount, func() error, error) {
if m.Type != "overlay" {
return m, nil, nil
}
sz := 0
for _, opt := range m.Options {
sz += len(opt) + 1
}
// can fit to single page, no need to compact
if sz < 4096-512 {
return m, nil, nil
}
lm := snapshot.LocalMounterWithMounts([]mount.Mount{m})
mp, err := lm.Mount()
if err != nil {
return mount.Mount{}, nil, err
}
return bind(mp, ro), lm.Unmount, nil
}