From 44751e28b378ec8d1bc45ea9d8f1444d75fe0186 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Tue, 28 Oct 2025 17:13:12 -0500 Subject: [PATCH 1/3] core/mount: Don't apply uidmap/gidmap during ro instrospection containerd will do a read only mount of the mount string to inspect things like /etc/passwd for a username to uid mapping. It doesn't need to do a uidmap/gidmap for this, that's entirely for the actual container mount later. Let's stop doing the idmap here, its extra work and not necessary. Further, it causes complications when the mount manager is involved. Mount manager will by default create erofs layers at /run/containerd/io.containerd.mount-manager.v1.bolt/t/8103/1, and the snapshotter's upperdir is at (if configured to use a different path) /mnt/containerd/io.containerd.snapshotter.v1.erofs/snapshots/25795/fs. During the read only mount this upperdir is shoved into the lowerdirs list, and then the idmap code tries to find the common parent directory of all lowerdirs. In this case its "/", which is invalid, so the process fails. Let's stop doing the extra work and get away from this class of problems by doing less. Signed-off-by: Andrew Halaney --- core/mount/mount.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/core/mount/mount.go b/core/mount/mount.go index e8e0fe31c0..5b2d949ed2 100644 --- a/core/mount/mount.go +++ b/core/mount/mount.go @@ -129,13 +129,15 @@ func readonlyMounts(mounts []Mount) []Mount { // readonlyOverlay takes mount options for overlay mounts and makes them readonly by // removing workdir and upperdir (and appending the upperdir layer to lowerdir) - see: // https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html#multiple-lower-layers +// It also strips the uidmap/gidmap options to avoid needlessly doing an idmap of this +// temporary mount func readonlyOverlay(opt []string) []string { out := make([]string, 0, len(opt)) upper := "" for _, o := range opt { if strings.HasPrefix(o, "upperdir=") { upper = strings.TrimPrefix(o, "upperdir=") - } else if !strings.HasPrefix(o, "workdir=") { + } else if !isSkippedReadonlyOption(o) { out = append(out, o) } } @@ -149,6 +151,15 @@ func readonlyOverlay(opt []string) []string { return out } +// isSkippedReadonlyOption takes an overlayfs option string and returns +// true if such an option should be skipped when converting the mount +// to a readonly mount +func isSkippedReadonlyOption(o string) bool { + return strings.HasPrefix(o, "workdir=") || + strings.HasPrefix(o, "uidmap=") || + strings.HasPrefix(o, "gidmap=") +} + // ToProto converts from [Mount] to the containerd // APIs protobuf definition of a Mount. func ToProto(mounts []Mount) []*types.Mount { From 552500360870ba8706a9f3c9939242f449106f81 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Tue, 28 Oct 2025 17:16:12 -0500 Subject: [PATCH 2/3] core/mount/*linux: Do idmap bind mounts as private and recursive Recursive is needed to catch mounts under the bind mount, for example erofs mount points. i.e. with that in place things like: /foo/erofs1 /foo/erofs2 bind mount + idmap /foo/ -> /foo-idmapped/ makes it so erofs1 and erofs2 are at /foo-idmapped/, allowing us to use that when constructing the final overlay mount erofs snapshotters use. This let's the snapshotter claim to support idmap support later without issue (right now it goes through remapfs). As part of this, we want to make the bind mount have private mount propagation. If you umount /foo-idmapped/erofs1 it shouldn't affect /foo/erofs1, which could be reused in another container's lowerdirs and idmapped bind mount. Signed-off-by: Andrew Halaney --- core/mount/mount_idmapped_linux.go | 6 +++--- core/mount/mount_linux.go | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/core/mount/mount_idmapped_linux.go b/core/mount/mount_idmapped_linux.go index f8157a7ceb..c93b4cef5b 100644 --- a/core/mount/mount_idmapped_linux.go +++ b/core/mount/mount_idmapped_linux.go @@ -90,16 +90,16 @@ func IDMapMountWithAttrs(source, target string, usernsFd int, attrSet uint64, at attr.Attr_set = unix.MOUNT_ATTR_IDMAP | attrSet attr.Attr_clr = attrClr - attr.Propagation = 0 + attr.Propagation = unix.MS_PRIVATE attr.Userns_fd = uint64(usernsFd) - dFd, err := unix.OpenTree(-int(unix.EBADF), source, uint(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC|unix.AT_EMPTY_PATH)) + dFd, err := unix.OpenTree(-int(unix.EBADF), source, uint(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC|unix.AT_EMPTY_PATH|unix.AT_RECURSIVE)) if err != nil { return fmt.Errorf("unable to open tree for %s: %w", target, err) } defer unix.Close(dFd) - if err = unix.MountSetattr(dFd, "", unix.AT_EMPTY_PATH, &attr); err != nil { + if err = unix.MountSetattr(dFd, "", unix.AT_EMPTY_PATH|unix.AT_RECURSIVE, &attr); err != nil { return fmt.Errorf("unable to shift GID/UID or set mount attrs for %s: %w", target, err) } diff --git a/core/mount/mount_linux.go b/core/mount/mount_linux.go index e124797aa3..2f33ea9831 100644 --- a/core/mount/mount_linux.go +++ b/core/mount/mount_linux.go @@ -265,11 +265,12 @@ func doPrepareIDMappedOverlay(tmpDir string, lowerDirs []string, usernsFd int) ( if err := IDMapMountWithAttrs(commonDir, tempRemountsLocation, usernsFd, unix.MOUNT_ATTR_RDONLY, 0); err != nil { return nil, nil, err } + cleanMount := func() { // Use the Unmount helper that does retries because there can be easily an open fd // to the idmapped directory and when containerd forks to create a userns fd (maybe // for another container), it will make the mount busy for a few ms. - err := Unmount(tempRemountsLocation, 0) + err := UnmountRecursive(tempRemountsLocation, 0) if err != nil { log.L.WithError(err).Warnf("failed to unmount idmapped directory %s: %v", tempRemountsLocation, err) } From 9b50650d5c492962f5da15ff261c7d168736e741 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Tue, 28 Oct 2025 17:28:34 -0500 Subject: [PATCH 3/3] snapshots/erofs: Support idmap mounts This disables the slow_chown feature (nobody in their right mind is going to be choosing erofs and want to slowly chown each file), indicates that we support idmaps if the kernel supports it, and makes sure to chown the upperdir. This is more or less exactly how the overlay snapshotter does things, minus the slow_chown part (which has discussions about dropping altogether at some point anyways). Signed-off-by: Andrew Halaney --- docs/snapshotters/erofs.md | 3 +- plugins/snapshots/erofs/erofs.go | 68 +++++++++++++++++-- plugins/snapshots/erofs/erofs_linux.go | 12 ++-- plugins/snapshots/erofs/erofs_other.go | 8 +-- plugins/snapshots/erofs/plugin/plugin.go | 12 ++++ .../snapshots/erofs/plugin/plugin_linux.go | 27 ++++++++ .../snapshots/erofs/plugin/plugin_other.go | 23 +++++++ 7 files changed, 135 insertions(+), 18 deletions(-) create mode 100644 plugins/snapshots/erofs/plugin/plugin_linux.go create mode 100644 plugins/snapshots/erofs/plugin/plugin_other.go diff --git a/docs/snapshotters/erofs.md b/docs/snapshotters/erofs.md index 36d4a9fde7..6b509be60b 100644 --- a/docs/snapshotters/erofs.md +++ b/docs/snapshotters/erofs.md @@ -239,6 +239,5 @@ For the EROFS differ: ## TODO - - ID-mapped mount spport; - - DMVerity support. + - DMVerity support. \ No newline at end of file diff --git a/plugins/snapshots/erofs/erofs.go b/plugins/snapshots/erofs/erofs.go index 01ac743b9c..2ef5553ad1 100644 --- a/plugins/snapshots/erofs/erofs.go +++ b/plugins/snapshots/erofs/erofs.go @@ -34,6 +34,7 @@ import ( "github.com/containerd/containerd/v2/core/snapshots" "github.com/containerd/containerd/v2/core/snapshots/storage" "github.com/containerd/containerd/v2/internal/fsverity" + "github.com/containerd/containerd/v2/internal/userns" ) // SnapshotterConfig is used to configure the erofs snapshotter instance @@ -48,6 +49,7 @@ type SnapshotterConfig struct { defaultSize int64 // fsMergeThreshold (>0) enables fsmerge when the number of image layers exceeds this value fsMergeThreshold uint + remapIDs bool } // Opt is an option to configure the erofs snapshotter @@ -88,6 +90,13 @@ func WithFsMergeThreshold(v uint) Opt { } } +// WithRemapIDs enables kernel ID-mapped mounts for user namespace support +func WithRemapIDs() Opt { + return func(config *SnapshotterConfig) { + config.remapIDs = true + } +} + type MetaStore interface { TransactionContext(ctx context.Context, writable bool) (context.Context, storage.Transactor, error) WithTransaction(ctx context.Context, writable bool, fn storage.TransactionCallback) error @@ -103,6 +112,7 @@ type snapshotter struct { defaultWritable int64 blockMode bool fsMergeThreshold uint + remapIDs bool } // NewSnapshotter returns a Snapshotter which uses EROFS+OverlayFS. The layers @@ -160,6 +170,7 @@ func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) { defaultWritable: config.defaultSize, blockMode: config.defaultSize > 0, fsMergeThreshold: config.fsMergeThreshold, + remapIDs: config.remapIDs, }, nil } @@ -241,7 +252,7 @@ func (s *snapshotter) mountFsMeta(snap storage.Snapshot, id int) (mount.Mount, b return m, true } -func (s *snapshotter) mounts(snap storage.Snapshot, _ snapshots.Info) ([]mount.Mount, error) { +func (s *snapshotter) mounts(snap storage.Snapshot, info snapshots.Info) ([]mount.Mount, error) { var options []string if len(snap.ParentIDs) == 0 { @@ -377,6 +388,16 @@ func (s *snapshotter) mounts(snap storage.Snapshot, _ snapshots.Info) ([]mount.M } else { options = append(options, fmt.Sprintf("lowerdir={{ overlay %d %d }}", first, len(mounts)-1)) } + + if s.remapIDs { + if v, ok := info.Labels[snapshots.LabelSnapshotUIDMapping]; ok { + options = append(options, fmt.Sprintf("uidmap=%s", v)) + } + if v, ok := info.Labels[snapshots.LabelSnapshotGIDMapping]; ok { + options = append(options, fmt.Sprintf("gidmap=%s", v)) + } + } + options = append(options, s.ovlOptions...) return append(mounts, mount.Mount{ @@ -426,9 +447,48 @@ func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, k return fmt.Errorf("failed to get snapshot info: %w", err) } - if len(snap.ParentIDs) > 0 { - if err := upperDirectoryPermission(filepath.Join(td, "fs"), s.upperPath(snap.ParentIDs[0])); err != nil { - return err + var ( + mappedUID, mappedGID = -1, -1 + uidmapLabel, gidmapLabel string + needsRemap = false + ) + if v, ok := info.Labels[snapshots.LabelSnapshotUIDMapping]; ok { + uidmapLabel = v + needsRemap = true + } + if v, ok := info.Labels[snapshots.LabelSnapshotGIDMapping]; ok { + gidmapLabel = v + needsRemap = true + } + + if needsRemap { + var idMap userns.IDMap + if err = idMap.Unmarshal(uidmapLabel, gidmapLabel); err != nil { + return fmt.Errorf("failed to unmarshal snapshot ID mapped labels: %w", err) + } + root, err := idMap.RootPair() + if err != nil { + return fmt.Errorf("failed to find root pair: %w", err) + } + mappedUID, mappedGID = int(root.Uid), int(root.Gid) + } + + // Fall back to copying ownership from parent if no ID mapping labels + if mappedUID == -1 || mappedGID == -1 { + if len(snap.ParentIDs) > 0 { + uid, gid, err := getParentOwnership(s.upperPath(snap.ParentIDs[0])) + if err != nil { + return fmt.Errorf("failed to get parent ownership: %w", err) + } + mappedUID = uid + mappedGID = gid + } + } + + // Apply the ownership if we have valid UID/GID + if mappedUID != -1 && mappedGID != -1 { + if err := os.Lchown(filepath.Join(td, "fs"), mappedUID, mappedGID); err != nil { + return fmt.Errorf("failed to chown: %w", err) } } diff --git a/plugins/snapshots/erofs/erofs_linux.go b/plugins/snapshots/erofs/erofs_linux.go index 0ed6fcd401..9175f5ee81 100644 --- a/plugins/snapshots/erofs/erofs_linux.go +++ b/plugins/snapshots/erofs/erofs_linux.go @@ -122,16 +122,12 @@ func convertDirToErofs(ctx context.Context, layerBlob, upperDir string) error { return nil } -func upperDirectoryPermission(p, parent string) error { - st, err := os.Stat(parent) +func getParentOwnership(parentPath string) (uid, gid int, err error) { + st, err := os.Stat(parentPath) if err != nil { - return fmt.Errorf("failed to stat parent: %w", err) + return -1, -1, fmt.Errorf("failed to stat parent: %w", err) } stat := st.Sys().(*syscall.Stat_t) - if err := os.Lchown(p, int(stat.Uid), int(stat.Gid)); err != nil { - return fmt.Errorf("failed to chown: %w", err) - } - - return nil + return int(stat.Uid), int(stat.Gid), nil } diff --git a/plugins/snapshots/erofs/erofs_other.go b/plugins/snapshots/erofs/erofs_other.go index 92fdc311df..b2bec8b966 100644 --- a/plugins/snapshots/erofs/erofs_other.go +++ b/plugins/snapshots/erofs/erofs_other.go @@ -41,10 +41,10 @@ func cleanupUpper(upper string) error { return nil } -func upperDirectoryPermission(p, parent string) error { - return nil -} - func convertDirToErofs(ctx context.Context, layerBlob, upperDir string) error { return errdefs.ErrNotImplemented } + +func getParentOwnership(parentPath string) (uid, gid int, err error) { + return -1, -1, nil +} diff --git a/plugins/snapshots/erofs/plugin/plugin.go b/plugins/snapshots/erofs/plugin/plugin.go index e083c47411..7af5041544 100644 --- a/plugins/snapshots/erofs/plugin/plugin.go +++ b/plugins/snapshots/erofs/plugin/plugin.go @@ -29,6 +29,11 @@ import ( "github.com/docker/go-units" ) +const ( + capaRemapIDs = "remap-ids" + capaOnlyRemapIDs = "only-remap-ids" +) + // Config represents configuration for the native plugin. type Config struct { // Root directory for the plugin @@ -94,6 +99,13 @@ func init() { opts = append(opts, erofs.WithFsMergeThreshold(config.MaxUnmergedLayers)) } + // Don't bother supporting overlay's slow_chown, only RemapIDs + ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaOnlyRemapIDs) + if ok, err := supportsIDMappedMounts(); err == nil && ok { + opts = append(opts, erofs.WithRemapIDs()) + ic.Meta.Capabilities = append(ic.Meta.Capabilities, capaRemapIDs) + } + ic.Meta.Exports[plugins.SnapshotterRootDir] = root ic.Meta.Capabilities = append(ic.Meta.Capabilities, "rebase") return erofs.NewSnapshotter(root, opts...) diff --git a/plugins/snapshots/erofs/plugin/plugin_linux.go b/plugins/snapshots/erofs/plugin/plugin_linux.go new file mode 100644 index 0000000000..c1382544c2 --- /dev/null +++ b/plugins/snapshots/erofs/plugin/plugin_linux.go @@ -0,0 +1,27 @@ +//go:build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package plugin + +import ( + overlayutils "github.com/containerd/containerd/v2/plugins/snapshots/overlay/overlayutils" +) + +func supportsIDMappedMounts() (bool, error) { + return overlayutils.SupportsIDMappedMounts() +} diff --git a/plugins/snapshots/erofs/plugin/plugin_other.go b/plugins/snapshots/erofs/plugin/plugin_other.go new file mode 100644 index 0000000000..9fdf48d897 --- /dev/null +++ b/plugins/snapshots/erofs/plugin/plugin_other.go @@ -0,0 +1,23 @@ +//go:build !linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package plugin + +func supportsIDMappedMounts() (bool, error) { + return false, nil +}