mirror of
https://github.com/containerd/containerd.git
synced 2026-06-24 08:48:48 +00:00
Previously, to address issue #11708, PR #11793 changed containerd to always invoke the shim binary to establish shim connections, rather than reusing the sandbox shim. However, this change did not ensure that the Shutdown API was called to stop the shim process. Starting with containerd v2.0.0, the Shutdown API is only invoked for sandbox containers (when container.SandboxID is empty). This approach works for groupable shims, where multiple containers share a single socket address and only require a single Shutdown call. However, for non-groupable shims, each container requires its own Shutdown call during cleanup to avoid leaking shim processes. Additionally, PR #11793 introduced a corner case during upgrades: - T1: An old container-shim-runc-v2 (<=v1.7.X) is running for pod A. - T2: containerd is upgraded to v2.X.Y. - T3: A new container A-C1 is created in pod A using the new shim-runc-v2 binary. - T4: bootstrap.json indicates version:3 protocol, but it is downgraded to version:2 in memory. - T5: containerd is restarted. - T6: containerd fails to connect to A-C1. - T7: The A-C1 container is left in EXITED status in the CRI plugin. To address this, ensure that loadShimTask downgrades to version:2 if necessary, and always invoke the Shutdown API for each non-groupable shim during cleanup to prevent resource leaks and handle upgrade scenarios correctly. (Introduced by #11793) Signed-off-by: Wei Fu <fuweid89@gmail.com>
122 lines
3.9 KiB
Go
122 lines
3.9 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package integration
|
|
|
|
import (
|
|
"fmt"
|
|
"path/filepath"
|
|
"syscall"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/containerd/continuity/fs"
|
|
"github.com/stretchr/testify/require"
|
|
"go.etcd.io/bbolt"
|
|
)
|
|
|
|
// TestIssue10467 tests the migration of sandboxes into the proper bucket. Prior to v1.7.21, the
|
|
// sandboxes were stored incorrectly in the root bucket. In order to verify the migration, a v1.7.20
|
|
// must run and create a sandbox, then check the migration after upgrading to a newer version.
|
|
func TestIssue10467(t *testing.T) {
|
|
latestVersion := "v1.7.20"
|
|
|
|
releaseBinDir := t.TempDir()
|
|
|
|
downloadReleaseBinary(t, releaseBinDir, latestVersion)
|
|
|
|
t.Logf("Install config for release %s", latestVersion)
|
|
workDir := t.TempDir()
|
|
oneSevenCtrdConfig(t, releaseBinDir, workDir)
|
|
|
|
t.Log("Starting the previous release's containerd")
|
|
previousCtrdBinPath := filepath.Join(releaseBinDir, "bin", "containerd")
|
|
previousProc := newCtrdProc(t, previousCtrdBinPath, workDir, []string{"ENABLE_CRI_SANDBOXES=yes"})
|
|
|
|
boltdbPath := filepath.Join(workDir, "root", "io.containerd.metadata.v1.bolt", "meta.db")
|
|
|
|
ctrdLogPath := previousProc.logPath()
|
|
t.Cleanup(func() {
|
|
if t.Failed() {
|
|
dumpFileContent(t, ctrdLogPath)
|
|
}
|
|
})
|
|
|
|
require.NoError(t, previousProc.isReady())
|
|
|
|
needToCleanup := true
|
|
t.Cleanup(func() {
|
|
if t.Failed() && needToCleanup {
|
|
t.Logf("Try to cleanup leaky pods")
|
|
cleanupPods(t, previousProc.criRuntimeService(t))
|
|
}
|
|
})
|
|
|
|
t.Log("Prepare pods for current release")
|
|
upgradeCaseFuncs, hookFunc := shouldManipulateContainersInPodAfterUpgrade("")(t, 2, previousProc.criRuntimeService(t), previousProc.criImageService(t))
|
|
upgradeCaseFunc := upgradeCaseFuncs[0]
|
|
needToCleanup = false
|
|
require.Nil(t, hookFunc)
|
|
|
|
t.Log("Gracefully stop previous release's containerd process")
|
|
require.NoError(t, previousProc.kill(syscall.SIGTERM))
|
|
require.NoError(t, previousProc.wait(5*time.Minute))
|
|
|
|
t.Logf("%s should have bucket k8s.io in root", boltdbPath)
|
|
db, err := bbolt.Open(boltdbPath, 0600, &bbolt.Options{ReadOnly: true})
|
|
require.NoError(t, err)
|
|
require.NoError(t, db.View(func(tx *bbolt.Tx) error {
|
|
if tx.Bucket([]byte("k8s.io")) == nil {
|
|
return fmt.Errorf("expected k8s.io bucket")
|
|
}
|
|
return nil
|
|
}))
|
|
require.NoError(t, db.Close())
|
|
|
|
t.Log("Install default config for current release")
|
|
currentReleaseCtrdDefaultConfig(t, workDir)
|
|
|
|
t.Log("Starting the current release's containerd")
|
|
currentProc := newCtrdProc(t, "containerd", workDir, nil)
|
|
require.NoError(t, currentProc.isReady())
|
|
|
|
t.Cleanup(func() {
|
|
t.Log("Cleanup all the pods")
|
|
cleanupPods(t, currentProc.criRuntimeService(t))
|
|
|
|
t.Log("Stopping current release's containerd process")
|
|
require.NoError(t, currentProc.kill(syscall.SIGTERM))
|
|
require.NoError(t, currentProc.wait(5*time.Minute))
|
|
})
|
|
|
|
t.Logf("%s should not have bucket k8s.io in root after restart", boltdbPath)
|
|
copiedBoltdbPath := filepath.Join(t.TempDir(), "meta.db.new")
|
|
require.NoError(t, fs.CopyFile(copiedBoltdbPath, boltdbPath))
|
|
|
|
db, err = bbolt.Open(copiedBoltdbPath, 0600, &bbolt.Options{ReadOnly: true})
|
|
require.NoError(t, err)
|
|
require.NoError(t, db.View(func(tx *bbolt.Tx) error {
|
|
if tx.Bucket([]byte("k8s.io")) != nil {
|
|
return fmt.Errorf("unexpected k8s.io bucket")
|
|
}
|
|
return nil
|
|
}))
|
|
require.NoError(t, db.Close())
|
|
|
|
t.Log("Verifing")
|
|
upgradeCaseFunc(t, currentProc.criRuntimeService(t), currentProc.criImageService(t))
|
|
}
|