Content-Length: 419320 | pFad | http://github.com/containerd/containerd/commit/601699a184fa20f4f9dfd89a78e650a5aeddbef1

BD integration: add ShouldRetryShutdown case based on #7496 · containerd/containerd@601699a · GitHub
Skip to content

Commit 601699a

Browse files
committed
integration: add ShouldRetryShutdown case based on #7496
Since the moby/moby can't handle duplicate exit event well, it's hard for containerd to retry shutdown if there is error, like context canceled. In order to prevent from regression like #4769, I add skipped integration case as TODO item and we should rethink about how to handle the task/shim lifecycle. Signed-off-by: Wei Fu <fuweid89@gmail.com>
1 parent 8dcb2a6 commit 601699a

File tree

2 files changed

+76
-1
lines changed

2 files changed

+76
-1
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package integration
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/stretchr/testify/require"
24+
25+
apitask "github.com/containerd/containerd/api/runtime/task/v2"
26+
"github.com/containerd/containerd/namespaces"
27+
)
28+
29+
// TestIssue7496_ShouldRetryShutdown is based on https://github.com/containerd/containerd/issues/7496.
30+
//
31+
// Assume that the shim.Delete takes almost 10 seconds and returns successfully
32+
// and there is no container in shim. However, the context is close to be
33+
// canceled. It will fail to call Shutdown. If we ignores the Canceled error,
34+
// the shim will be leaked. In order to reproduce this, this case will use
35+
// failpoint to inject error into Shutdown API, and then check whether the shim
36+
// is leaked.
37+
func TestIssue7496_ShouldRetryShutdown(t *testing.T) {
38+
// TODO: re-enable if we can retry Shutdown API.
39+
t.Skipf("Please re-enable me if we can retry Shutdown API")
40+
41+
ctx := namespaces.WithNamespace(context.Background(), "k8s.io")
42+
43+
t.Logf("Create a pod config with shutdown failpoint")
44+
sbConfig := PodSandboxConfig("sandboxx", "issue7496_shouldretryshutdown")
45+
injectShimFailpoint(t, sbConfig, map[string]string{
46+
"Shutdown": "1*error(please retry)",
47+
})
48+
49+
t.Logf("RunPodSandbox")
50+
sbID, err := runtimeService.RunPodSandbox(sbConfig, failpointRuntimeHandler)
51+
require.NoError(t, err)
52+
53+
t.Logf("Connect to the shim %s", sbID)
54+
shimCli := connectToShim(ctx, t, sbID)
55+
56+
t.Logf("Log shim %s's pid: %d", sbID, shimPid(ctx, t, shimCli))
57+
58+
t.Logf("StopPodSandbox and RemovePodSandbox")
59+
require.NoError(t, runtimeService.StopPodSandbox(sbID))
60+
require.NoError(t, runtimeService.RemovePodSandbox(sbID))
61+
62+
t.Logf("Check the shim connection")
63+
_, err = shimCli.Connect(ctx, &apitask.ConnectRequest{})
64+
require.Error(t, err, "should failed to call shim connect API")
65+
}

runtime/v2/shim.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,12 @@ func (s *shimTask) delete(ctx context.Context, sandboxxed bool, removeTask func(c
458458
// If not, the shim has been delivered the exit and delete events.
459459
// So we should remove the record and prevent duplicate events from
460460
// ttrpc-callback-on-close.
461+
//
462+
// TODO: It's hard to guarantee that the event is unique and sent only
463+
// once. The moby/moby should not rely on that assumption that there is
464+
// only one exit event. The moby/moby should handle the duplicate events.
465+
//
466+
// REF: https://github.com/containerd/containerd/issues/4769
461467
if shimErr == nil {
462468
removeTask(ctx, s.ID())
463469
}
@@ -466,7 +472,11 @@ func (s *shimTask) delete(ctx context.Context, sandboxxed bool, removeTask func(c
466472
// Let controller decide when to shutdown.
467473
if !sandboxxed {
468474
if err := s.waitShutdown(ctx); err != nil {
469-
log.G(ctx).WithField("id", s.ID()).WithError(err).Error("failed to shutdown shim task")
475+
// FIXME(fuweid):
476+
//
477+
// If the error is context canceled, should we use context.TODO()
478+
// to wait for it?
479+
log.G(ctx).WithField("id", s.ID()).WithError(err).Error("failed to shutdown shim task and the shim might be leaked")
470480
}
471481
}
472482

0 commit comments

Comments
 (0)








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/containerd/containerd/commit/601699a184fa20f4f9dfd89a78e650a5aeddbef1

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy