argoproj · RoelofKuijpers · Apr 9, 2025 · Apr 9, 2025 · Apr 9, 2025 · Apr 9, 2025
@@ -12,6 +12,10 @@ import (
 	"github.com/argoproj/gitops-engine/pkg/utils/kube"
 )
 
+const (
+	AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy"
+)
+
 func getPodHealth(obj *unstructured.Unstructured) (*HealthStatus, error) {
 	gvk := obj.GroupVersionKind()
 	switch gvk {
@@ -93,9 +97,9 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
 		}
 
 		return &HealthStatus{Status: HealthStatusDegraded, Message: ""}, nil
+
 	case corev1.PodRunning:
-		switch pod.Spec.RestartPolicy {
-		case corev1.RestartPolicyAlways:
+		getHealthStatus := func(pod *corev1.Pod) (*HealthStatus, error) {
 			// if pod is ready, it is automatically healthy
 			if podutils.IsPodReady(pod) {
 				return &HealthStatus{
@@ -117,10 +121,18 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
 				Status:  HealthStatusProgressing,
 				Message: pod.Status.Message,
 			}, nil
-		case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever:
-			// pods set with a restart policy of OnFailure or Never, have a finite life.
+		}
+		policy := pod.Spec.RestartPolicy
+		if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways {
+			return getHealthStatus(pod)
+		}
+
+		if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever {
+			// Most pods set with a restart policy of OnFailure or Never, have a finite life.
 			// These pods are typically resource hooks. Thus, we consider these as Progressing
-			// instead of healthy.
+			// instead of healthy. If this is unwanted, e.g., when the pod is managed by an
+			// operator and therefore has a restart policy of OnFailure or Never, then use the
+			// the AnnotationIgnoreRestartPolicy annotation.
 			return &HealthStatus{
 				Status:  HealthStatusProgressing,
 				Message: pod.Status.Message,

@@ -103,6 +103,7 @@ func TestPod(t *testing.T) {
 	assertAppHealth(t, "./testdata/pod-error.yaml", HealthStatusDegraded)
 	assertAppHealth(t, "./testdata/pod-running-restart-always.yaml", HealthStatusHealthy)
 	assertAppHealth(t, "./testdata/pod-running-restart-never.yaml", HealthStatusProgressing)
+	assertAppHealth(t, "./testdata/pod-running-restart-never-with-ignore-annotation.yaml", HealthStatusHealthy)
 	assertAppHealth(t, "./testdata/pod-running-restart-onfailure.yaml", HealthStatusProgressing)
 	assertAppHealth(t, "./testdata/pod-failed.yaml", HealthStatusDegraded)
 	assertAppHealth(t, "./testdata/pod-succeeded.yaml", HealthStatusHealthy)

@@ -0,0 +1,87 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  creationTimestamp: 2018-12-02T09:15:16Z
+  name: my-pod
+  namespace: argocd
+  resourceVersion: "151053"
+  selfLink: /api/v1/namespaces/argocd/pods/my-pod
+  uid: c86e909c-f612-11e8-a057-fe5f49266390
+  annotations:
+    argocd.argoproj.io/ignore-restart-policy: "true"
+spec:
+  containers:
+  - command:
+    - sh
+    - -c
+    - sleep 10
+    image: alpine:3.21
+    imagePullPolicy: Always
+    name: main
+    resources:
+      requests:
+        ephemeral-storage: "100Mi"
+        memory: "128Mi"
+        cpu: "250m"
+      limits:
+        memory: "256Mi"
+        cpu: "500m"
+    terminationMessagePath: /dev/termination-log
+    terminationMessagePolicy: File
+    volumeMounts:
+    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
+      name: default-token-f9jvj
+      readOnly: true
+  dnsPolicy: ClusterFirst
+  nodeName: minikube
+  restartPolicy: Never
+  schedulerName: default-scheduler
+  securityContext: {}
+  serviceAccount: default
+  serviceAccountName: default
+  automountServiceAccountToken: false
+  terminationGracePeriodSeconds: 30
+  tolerations:
+  - effect: NoExecute
+    key: node.kubernetes.io/not-ready
+    operator: Exists
+    tolerationSeconds: 300
+  - effect: NoExecute
+    key: node.kubernetes.io/unreachable
+    operator: Exists
+    tolerationSeconds: 300
+  volumes:
+  - name: default-token-f9jvj
+    secret:
+      defaultMode: 420
+      secretName: default-token-f9jvj
+status:
+  conditions:
+  - lastProbeTime: null
+    lastTransitionTime: 2018-12-02T09:15:16Z
+    status: "True"
+    type: Initialized
+  - lastProbeTime: null
+    lastTransitionTime: 2018-12-02T09:15:19Z
+    status: "True"
+    type: Ready
+  - lastProbeTime: null
+    lastTransitionTime: 2018-12-02T09:15:16Z
+    status: "True"
+    type: PodScheduled
+  containerStatuses:
+  - containerID: containerd://adc73c2c0ae3f1fd9bf294abd834e740042ee375de680c0cfcdd90d863a73b8b
+    image: alpine:3.21
+    imageID: docker.io/library/alpine@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c
+    lastState: {}
+    name: main
+    ready: true
+    restartCount: 0
+    state:
+      running:
+        startedAt: 2018-12-02T09:15:19Z
+  hostIP: 192.168.64.41
+  phase: Running
+  podIP: 172.17.0.9
+  qosClass: BestEffort
+  startTime: 2018-12-02T09:15:16Z