Skip to content

Commit c6f17b7

Browse files
fix: stuck at 'Progressing' #15317
Signed-off-by: Roelof Kuijpers <roelof.kuijpers@energyessentials.nl>
1 parent c617562 commit c6f17b7

File tree

2 files changed

+103
-10
lines changed

2 files changed

+103
-10
lines changed

pkg/health/health_pod.go

+24-10
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ import (
1212
"github.com/argoproj/gitops-engine/pkg/utils/kube"
1313
)
1414

15+
const (
16+
AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy"
17+
)
18+
1519
func getPodHealth(obj *unstructured.Unstructured) (*HealthStatus, error) {
1620
gvk := obj.GroupVersionKind()
1721
switch gvk {
@@ -93,9 +97,9 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
9397
}
9498

9599
return &HealthStatus{Status: HealthStatusDegraded, Message: ""}, nil
100+
96101
case corev1.PodRunning:
97-
switch pod.Spec.RestartPolicy {
98-
case corev1.RestartPolicyAlways:
102+
getHealthStatus := func(pod *corev1.Pod) (*HealthStatus, error) {
99103
// if pod is ready, it is automatically healthy
100104
if podutils.IsPodReady(pod) {
101105
return &HealthStatus{
@@ -117,14 +121,24 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
117121
Status: HealthStatusProgressing,
118122
Message: pod.Status.Message,
119123
}, nil
120-
case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever:
121-
// pods set with a restart policy of OnFailure or Never, have a finite life.
122-
// These pods are typically resource hooks. Thus, we consider these as Progressing
123-
// instead of healthy.
124-
return &HealthStatus{
125-
Status: HealthStatusProgressing,
126-
Message: pod.Status.Message,
127-
}, nil
124+
}
125+
if _, hook := pod.Annotations[AnnotationIgnoreRestartPolicy]; hook {
126+
return getHealthStatus(pod)
127+
} else {
128+
switch pod.Spec.RestartPolicy {
129+
case corev1.RestartPolicyAlways:
130+
return getHealthStatus(pod)
131+
case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever:
132+
// Most pods set with a restart policy of OnFailure or Never, have a finite life.
133+
// These pods are typically resource hooks. Thus, we consider these as Progressing
134+
// instead of healthy. If this is unwanted, e.g., when the pod is managed by an
135+
// operator and therefore has a restart policy of OnFailure or Never, then use the
136+
// the AnnotationIgnoreRestartPolicy annotation.
137+
return &HealthStatus{
138+
Status: HealthStatusProgressing,
139+
Message: pod.Status.Message,
140+
}, nil
141+
}
128142
}
129143
}
130144
return &HealthStatus{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
creationTimestamp: 2018-12-02T09:15:16Z
5+
name: my-pod
6+
namespace: argocd
7+
resourceVersion: "151053"
8+
selfLink: /api/v1/namespaces/argocd/pods/my-pod
9+
uid: c86e909c-f612-11e8-a057-fe5f49266390
10+
annotations:
11+
argocd.argoproj.io/ignore-restart-policy: "true"
12+
spec:
13+
containers:
14+
- command:
15+
- sh
16+
- -c
17+
- sleep 10
18+
image: alpine:latest
19+
imagePullPolicy: Always
20+
name: main
21+
resources: {}
22+
terminationMessagePath: /dev/termination-log
23+
terminationMessagePolicy: File
24+
volumeMounts:
25+
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
26+
name: default-token-f9jvj
27+
readOnly: true
28+
dnsPolicy: ClusterFirst
29+
nodeName: minikube
30+
restartPolicy: Never
31+
schedulerName: default-scheduler
32+
securityContext: {}
33+
serviceAccount: default
34+
serviceAccountName: default
35+
terminationGracePeriodSeconds: 30
36+
tolerations:
37+
- effect: NoExecute
38+
key: node.kubernetes.io/not-ready
39+
operator: Exists
40+
tolerationSeconds: 300
41+
- effect: NoExecute
42+
key: node.kubernetes.io/unreachable
43+
operator: Exists
44+
tolerationSeconds: 300
45+
volumes:
46+
- name: default-token-f9jvj
47+
secret:
48+
defaultMode: 420
49+
secretName: default-token-f9jvj
50+
status:
51+
conditions:
52+
- lastProbeTime: null
53+
lastTransitionTime: 2018-12-02T09:15:16Z
54+
status: "True"
55+
type: Initialized
56+
- lastProbeTime: null
57+
lastTransitionTime: 2018-12-02T09:15:19Z
58+
status: "True"
59+
type: Ready
60+
- lastProbeTime: null
61+
lastTransitionTime: 2018-12-02T09:15:16Z
62+
status: "True"
63+
type: PodScheduled
64+
containerStatuses:
65+
- containerID: docker://acfb261d6c1fe8c543438a202de62cb06c137fa93a2d59262d764470e96f3195
66+
image: alpine:latest
67+
imageID: docker-pullable://alpine@sha256:621c2f39f8133acb8e64023a94dbdf0d5ca81896102b9e57c0dc184cadaf5528
68+
lastState: {}
69+
name: main
70+
ready: true
71+
restartCount: 0
72+
state:
73+
running:
74+
startedAt: 2018-12-02T09:15:19Z
75+
hostIP: 192.168.64.41
76+
phase: Running
77+
podIP: 172.17.0.9
78+
qosClass: BestEffort
79+
startTime: 2018-12-02T09:15:16Z

0 commit comments

Comments
 (0)