From cff8184ffbcbd508e27995f339c227fd49290f64 Mon Sep 17 00:00:00 2001 From: Jin Dong Date: Sun, 30 Mar 2025 14:02:54 +0000 Subject: [PATCH] support image volume sub path Signed-off-by: Jin Dong --- integration/image_volume_linux_test.go | 111 ++++++++++++++---- integration/main_test.go | 7 +- internal/cri/server/container_image_mount.go | 113 ++++++++++++------- 3 files changed, 165 insertions(+), 66 deletions(-) diff --git a/integration/image_volume_linux_test.go b/integration/image_volume_linux_test.go index 1767908b40..440f824556 100644 --- a/integration/image_volume_linux_test.go +++ b/integration/image_volume_linux_test.go @@ -40,20 +40,22 @@ func TestImageVolumeBasic(t *testing.T) { snSrv := containerdClient.SnapshotService("overlayfs") for _, tc := range []struct { - name string - containerImage string - selinuxLevel string - imageVolumeImage, imageVolumePath string + name string + containerImage string + selinuxLevel string + imageVolumeImage, imageSubPath string + containerPath string - execSyncCommands []string - execSyncError string - execSyncOutput string + createContainerError string + execSyncCommands []string + execSyncError string + execSyncOutput string }{ { name: "should be readonly content", containerImage: images.Get(images.Alpine), imageVolumeImage: images.Get(images.Pause), - imageVolumePath: "/image-mount", + containerPath: "/image-mount", execSyncCommands: []string{"rm", "/image-mount/pause"}, execSyncError: "can't remove '/image-mount/pause': Read-only file system", }, @@ -62,7 +64,7 @@ func TestImageVolumeBasic(t *testing.T) { containerImage: images.Get(images.ResourceConsumer), selinuxLevel: "s0:c4,c5", imageVolumeImage: images.Get(images.Pause), - imageVolumePath: "/image-mount", + containerPath: "/image-mount", execSyncCommands: []string{"ls", "-Z", "/image-mount"}, execSyncOutput: "system_u:object_r:container_file_t:s0:c4,c5 pause", }, @@ -71,10 +73,59 @@ func TestImageVolumeBasic(t *testing.T) { containerImage: images.Get(images.ResourceConsumer), selinuxLevel: "s0:c200,c100", imageVolumeImage: images.Get(images.Pause), - imageVolumePath: "/image-mount", + containerPath: "/image-mount", execSyncCommands: []string{"ls", "-Z", "/image-mount"}, execSyncOutput: "system_u:object_r:container_file_t:s0:c100,c200 pause", }, + { + name: "should only mount image subpath", + containerImage: images.Get(images.Alpine), + imageVolumeImage: images.Get(images.Alpine), + imageSubPath: "etc", + containerPath: "/image-mount", + execSyncCommands: []string{"ls", filepath.Join("/image-mount", "os-release")}, + execSyncOutput: filepath.Join("/image-mount", "os-release"), + }, + { + name: "fail to mount single file subpath", + containerImage: images.Get(images.Alpine), + imageVolumeImage: images.Get(images.Pause), + imageSubPath: "pause", + containerPath: "/image-mount", + createContainerError: "only directory subpath is supported", + }, + { + name: "fail to mount non-existent subpath", + containerImage: images.Get(images.Alpine), + imageVolumeImage: images.Get(images.Alpine), + imageSubPath: "non-existent-subpath", + containerPath: "/image-mount", + createContainerError: "no such file or directory", + }, + { + name: "fail to mount absolute subpath", + containerImage: images.Get(images.Alpine), + imageVolumeImage: images.Get(images.Alpine), + imageSubPath: "/etc", + containerPath: "/image-mount", + createContainerError: "path escapes from parent", + }, + { + name: "fail to mount escaped subpath", + containerImage: images.Get(images.Alpine), + imageVolumeImage: images.Get(images.Alpine), + imageSubPath: "etc/../../..", + containerPath: "/image-mount", + createContainerError: "path escapes from parent", + }, + { + name: "fail to mount a symlink file that escapes subpath", + containerImage: images.Get(images.Alpine), + imageVolumeImage: images.Get(images.Alpine), + imageSubPath: "bin/sh", // `bin/sh` is a symlink to `/bin/busybox` in the mount image + containerPath: "/image-mount", + createContainerError: "path escapes from parent", + }, } { t.Run(tc.name, func(t *testing.T) { if tc.selinuxLevel != "" { @@ -83,7 +134,13 @@ func TestImageVolumeBasic(t *testing.T) { } } - podCtx, cnID := setupRunningContainerWithImageVolume(t, tc.selinuxLevel, tc.containerImage, tc.imageVolumeImage, tc.imageVolumePath) + podCtx, cnID, err := setupRunningContainerWithImageVolume(t, tc.selinuxLevel, tc.containerImage, tc.imageVolumeImage, tc.imageSubPath, tc.containerPath) + if err != nil { + require.NotEmpty(t, tc.createContainerError) + require.Contains(t, err.Error(), tc.createContainerError) + return + } + require.Empty(t, tc.createContainerError) cleanup := true defer func() { @@ -105,7 +162,7 @@ func TestImageVolumeBasic(t *testing.T) { cleanup = false t.Log("Check snapshot after deleting pod") - for i := 0; i < 30; i++ { + for range 30 { _, err := snSrv.Mounts(ctx, volumeImgTarget) if errdefs.IsNotFound(err) { return @@ -129,7 +186,7 @@ func TestImageVolumeBasic(t *testing.T) { } } -func setupRunningContainerWithImageVolume(t *testing.T, selinuxLevel string, containerImage string, imageVolumeName, imageVolumePath string) (*podTCtx, string) { +func setupRunningContainerWithImageVolume(t *testing.T, selinuxLevel string, containerImage string, imageVolumeName, imageSubPath, containerPath string) (podCtx *podTCtx, cnID string, err error) { podLogDir := t.TempDir() podOpts := []PodSandboxOpts{ @@ -138,9 +195,9 @@ func setupRunningContainerWithImageVolume(t *testing.T, selinuxLevel string, con if selinuxLevel != "" { podOpts = append(podOpts, WithSelinuxLevel(selinuxLevel)) } - podCtx := newPodTCtx(t, runtimeService, t.Name(), "image-voloume", podOpts...) + podCtx = newPodTCtx(t, runtimeService, t.Name(), "image-voloume", podOpts...) defer func() { - if t.Failed() { + if t.Failed() || err != nil { podCtx.stop(true) } }() @@ -148,13 +205,18 @@ func setupRunningContainerWithImageVolume(t *testing.T, selinuxLevel string, con pullImagesByCRI(t, imageService, containerImage, imageVolumeName) containerName := "running" - cnID := podCtx.createContainer(containerName, - containerImage, - criruntime.ContainerState_CONTAINER_RUNNING, + cfg := ContainerConfig(containerName, containerImage, WithCommand("sleep", "1d"), - WithImageVolumeMount(imageVolumeName, imageVolumePath), - WithLogPath(containerName)) - return podCtx, cnID + WithImageVolumeMount(imageVolumeName, imageSubPath, containerPath), + WithLogPath(containerName), + ) + cnID, err = podCtx.rSvc.CreateContainer(podCtx.id, cfg, podCtx.cfg) + if err != nil { + return podCtx, "", err + } + + require.NoError(t, podCtx.rSvc.StartContainer(cnID)) + return podCtx, cnID, nil } func TestImageVolumeCheckVolatileOption(t *testing.T) { @@ -168,7 +230,8 @@ func TestImageVolumeCheckVolatileOption(t *testing.T) { } containerImage := images.Get(images.Alpine) - podCtx, _ := setupRunningContainerWithImageVolume(t, "", containerImage, containerImage, "/alpine") + podCtx, _, err := setupRunningContainerWithImageVolume(t, "", containerImage, containerImage, "", "/alpine") + require.NoError(t, err) t.Cleanup(func() { podCtx.stop(true) }) @@ -242,7 +305,7 @@ func TestImageVolumeSetupIfContainerdRestarts(t *testing.T) { alpineImage, criruntime.ContainerState_CONTAINER_RUNNING, WithCommand("sleep", "1d"), - WithImageVolumeMount(alpineImage, "/alpine-2")) + WithImageVolumeMount(alpineImage, "", "/alpine-2")) mpInfo1, err := mount.Lookup(targetVolumeMount) require.NoError(t, err) @@ -252,7 +315,7 @@ func TestImageVolumeSetupIfContainerdRestarts(t *testing.T) { alpineImage, criruntime.ContainerState_CONTAINER_RUNNING, WithCommand("sleep", "1d"), - WithImageVolumeMount(alpineImage, "/alpine-2")) + WithImageVolumeMount(alpineImage, "", "/alpine-2")) mpInfo2, err := mount.Lookup(targetVolumeMount) require.NoError(t, err) diff --git a/integration/main_test.go b/integration/main_test.go index a0b97fd97e..3d73590d40 100644 --- a/integration/main_test.go +++ b/integration/main_test.go @@ -353,11 +353,11 @@ func WithIDMapVolumeMount(hostPath, containerPath string, uidMaps, gidMaps []*ru } } -func WithImageVolumeMount(image, containerPath string) ContainerOpts { - return WithIDMapImageVolumeMount(image, containerPath, nil, nil) +func WithImageVolumeMount(image, imageSubPath, containerPath string) ContainerOpts { + return WithIDMapImageVolumeMount(image, imageSubPath, containerPath, nil, nil) } -func WithIDMapImageVolumeMount(image string, containerPath string, uidMaps, gidMaps []*runtime.IDMapping) ContainerOpts { +func WithIDMapImageVolumeMount(image, imageSubPath, containerPath string, uidMaps, gidMaps []*runtime.IDMapping) ContainerOpts { return func(c *runtime.ContainerConfig) { containerPath, _ = filepath.Abs(containerPath) mount := &runtime.Mount{ @@ -367,6 +367,7 @@ func WithIDMapImageVolumeMount(image string, containerPath string, uidMaps, gidM Image: &runtime.ImageSpec{ Image: image, }, + ImageSubPath: imageSubPath, Readonly: true, SelinuxRelabel: true, } diff --git a/internal/cri/server/container_image_mount.go b/internal/cri/server/container_image_mount.go index 1f5aa55cf3..fb2c21a082 100644 --- a/internal/cri/server/container_image_mount.go +++ b/internal/cri/server/container_image_mount.go @@ -31,6 +31,7 @@ import ( "github.com/opencontainers/image-spec/identity" imagespec "github.com/opencontainers/image-spec/specs-go/v1" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" + crierrors "k8s.io/cri-api/pkg/errors" ) func (c *criService) mutateMounts( @@ -48,7 +49,7 @@ func (c *criService) mutateMounts( for _, m := range extraMounts { err := c.mutateImageMount(ctx, m, snapshotter, sandboxID, platform) if err != nil { - return err + return fmt.Errorf("%w: %w", crierrors.ErrImageVolumeMountFailed, err) } } return nil @@ -106,51 +107,56 @@ func (c *criService) mutateImageMount( if err != nil { return fmt.Errorf("failed to ensure %s is mounted: %w", target, err) } - if mounted { - extraMount.HostPath = target - return nil - } + if !mounted { + img, err := c.client.ImageService().Get(ctx, ref) + if err != nil { + return fmt.Errorf("failed to get image volume ref %q: %w", ref, err) + } - img, err := c.client.ImageService().Get(ctx, ref) - if err != nil { - return fmt.Errorf("failed to get image volume ref %q: %w", ref, err) - } + i := containerd.NewImageWithPlatform(c.client, img, platforms.Only(platform)) + if err := i.Unpack(ctx, snapshotter); err != nil { + return fmt.Errorf("failed to unpack image volume: %w", err) + } - i := containerd.NewImageWithPlatform(c.client, img, platforms.Only(platform)) - if err := i.Unpack(ctx, snapshotter); err != nil { - return fmt.Errorf("failed to unpack image volume: %w", err) - } + diffIDs, err := i.RootFS(ctx) + if err != nil { + return fmt.Errorf("failed to get diff IDs for image volume %q: %w", ref, err) + } + chainID := identity.ChainID(diffIDs).String() - diffIDs, err := i.RootFS(ctx) - if err != nil { - return fmt.Errorf("failed to get diff IDs for image volume %q: %w", ref, err) - } - chainID := identity.ChainID(diffIDs).String() + s := c.client.SnapshotService(snapshotter) + mounts, err := s.Prepare(ctx, target, chainID) + if err != nil { + if errdefs.IsAlreadyExists(err) { + mounts, err = s.Mounts(ctx, target) + } + } + if err != nil { + return fmt.Errorf("failed to prepare for image volume %q: %w", ref, err) + } + defer func() { + if retErr != nil { + _ = s.Remove(ctx, target) + } + }() - s := c.client.SnapshotService(snapshotter) - mounts, err := s.Prepare(ctx, target, chainID) - if err != nil { - if errdefs.IsAlreadyExists(err) { - mounts, err = s.Mounts(ctx, target) + err = os.MkdirAll(target, 0755) + if err != nil { + return fmt.Errorf("failed to create directory to image volume target path %q: %w", target, err) + } + + mounts = addVolatileOptionOnImageVolumeMount(mounts) + if err := mount.All(mounts, target); err != nil { + return fmt.Errorf("failed to mount image volume component %q: %w", target, err) } } - if err != nil { - return fmt.Errorf("failed to prepare for image volume %q: %w", ref, err) - } - defer func() { - if retErr != nil { - _ = s.Remove(ctx, target) + + if imageSubPath := extraMount.GetImageSubPath(); imageSubPath != "" { + mountPoint, err := ensureImageSubPath(target, imageSubPath) + if err != nil { + return fmt.Errorf("failed to ensure image subpath %q in %q: %w", imageSubPath, target, err) } - }() - - err = os.MkdirAll(target, 0755) - if err != nil { - return fmt.Errorf("failed to create directory to image volume target path %q: %w", target, err) - } - - mounts = addVolatileOptionOnImageVolumeMount(mounts) - if err := mount.All(mounts, target); err != nil { - return fmt.Errorf("failed to mount image volume component %q: %w", target, err) + target = mountPoint } extraMount.HostPath = target @@ -204,3 +210,32 @@ func (c *criService) cleanupImageMounts( } return nil } + +// ensureImageSubPath ensures the subPath exists **within** the mountPoint (i.e. +// not escape outside of mountPoint) and it's a directory. +// It returns the final absolute path of `subPath`. +func ensureImageSubPath(mountPoint, subPath string) (string, error) { + if subPath == "" { + return mountPoint, nil + } + + file, err := os.OpenInRoot(mountPoint, subPath) + if err != nil { + return "", err + } + defer file.Close() + + stat, err := file.Stat() + if err != nil { + return "", err + } + + if !stat.IsDir() { + // the current OCI volume source treats mounting a single file as non-goal + // and limits the mount output to directories. + // https://github.com/kubernetes/enhancements/tree/f3fa3a12d303a6b749efd072987a39aab159f9d5/keps/sig-node/4639-oci-volume-source#non-goals + return "", fmt.Errorf("only directory subpath is supported, subpath: %q, mountpoint: %q ", subPath, mountPoint) + } + + return file.Name(), nil +}