diff --git a/pkg/monitor/sqsevent/asg-lifecycle-event.go b/pkg/monitor/sqsevent/asg-lifecycle-event.go index 6fd4d852..9716ba02 100644 --- a/pkg/monitor/sqsevent/asg-lifecycle-event.go +++ b/pkg/monitor/sqsevent/asg-lifecycle-event.go @@ -126,7 +126,18 @@ func (m SQSMonitor) asgTerminationToInterruptionEvent(event *EventBridgeEvent, m go m.SendHeartbeats(nthConfig.HeartbeatInterval, nthConfig.HeartbeatUntil, lifecycleDetail, stopHeartbeatCh, cancelHeartbeatCh) } - err := n.TaintASGLifecycleTermination(interruptionEvent.NodeName, interruptionEvent.EventID) + // Use provider ID to resolve the actual Kubernetes node name if UseProviderId is configured + nodeName := interruptionEvent.NodeName + if nthConfig.UseProviderId && interruptionEvent.ProviderID != "" { + resolvedNodeName, err := n.GetNodeNameFromProviderID(interruptionEvent.ProviderID) + if err != nil { + log.Warn().Err(err).Str("provider_id", interruptionEvent.ProviderID).Msg("Failed to resolve node name from provider ID, falling back to NodeName from event") + } else { + nodeName = resolvedNodeName + } + } + + err := n.TaintASGLifecycleTermination(nodeName, interruptionEvent.EventID) if err != nil { log.Err(err).Msgf("unable to taint node with taint %s:%s", node.ASGLifecycleTerminationTaint, interruptionEvent.EventID) } diff --git a/pkg/monitor/sqsevent/rebalance-recommendation-event.go b/pkg/monitor/sqsevent/rebalance-recommendation-event.go index 0ecc30fb..baa0aad2 100644 --- a/pkg/monitor/sqsevent/rebalance-recommendation-event.go +++ b/pkg/monitor/sqsevent/rebalance-recommendation-event.go @@ -78,7 +78,19 @@ func (m SQSMonitor) rebalanceRecommendationToInterruptionEvent(event *EventBridg return nil } interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error { - err := n.TaintRebalanceRecommendation(interruptionEvent.NodeName, interruptionEvent.EventID) + // Use provider ID to resolve the actual Kubernetes node name if UseProviderId is configured + nthConfig := n.GetNthConfig() + nodeName := interruptionEvent.NodeName + if nthConfig.UseProviderId && interruptionEvent.ProviderID != "" { + resolvedNodeName, err := n.GetNodeNameFromProviderID(interruptionEvent.ProviderID) + if err != nil { + log.Warn().Err(err).Str("provider_id", interruptionEvent.ProviderID).Msg("Failed to resolve node name from provider ID, falling back to NodeName from event") + } else { + nodeName = resolvedNodeName + } + } + + err := n.TaintRebalanceRecommendation(nodeName, interruptionEvent.EventID) if err != nil { log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.RebalanceRecommendationTaint, interruptionEvent.EventID) } diff --git a/pkg/monitor/sqsevent/scheduled-change-event.go b/pkg/monitor/sqsevent/scheduled-change-event.go index 1fe68cda..f688b9d5 100644 --- a/pkg/monitor/sqsevent/scheduled-change-event.go +++ b/pkg/monitor/sqsevent/scheduled-change-event.go @@ -113,7 +113,19 @@ func (m SQSMonitor) scheduledEventToInterruptionEvents(event *EventBridgeEvent, return nil } interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error { - if err := n.TaintScheduledMaintenance(interruptionEvent.NodeName, interruptionEvent.EventID); err != nil { + // Use provider ID to resolve the actual Kubernetes node name if UseProviderId is configured + nthConfig := n.GetNthConfig() + nodeName := interruptionEvent.NodeName + if nthConfig.UseProviderId && interruptionEvent.ProviderID != "" { + resolvedNodeName, err := n.GetNodeNameFromProviderID(interruptionEvent.ProviderID) + if err != nil { + log.Warn().Err(err).Str("provider_id", interruptionEvent.ProviderID).Msg("Failed to resolve node name from provider ID, falling back to NodeName from event") + } else { + nodeName = resolvedNodeName + } + } + + if err := n.TaintScheduledMaintenance(nodeName, interruptionEvent.EventID); err != nil { log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.ScheduledMaintenanceTaint, interruptionEvent.EventID) } return nil diff --git a/pkg/monitor/sqsevent/spot-itn-event.go b/pkg/monitor/sqsevent/spot-itn-event.go index d0aa476e..a5f21e1e 100644 --- a/pkg/monitor/sqsevent/spot-itn-event.go +++ b/pkg/monitor/sqsevent/spot-itn-event.go @@ -80,7 +80,19 @@ func (m SQSMonitor) spotITNTerminationToInterruptionEvent(event *EventBridgeEven return nil } interruptionEvent.PreDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error { - err := n.TaintSpotItn(interruptionEvent.NodeName, interruptionEvent.EventID) + // Use provider ID to resolve the actual Kubernetes node name if UseProviderId is configured + nthConfig := n.GetNthConfig() + nodeName := interruptionEvent.NodeName + if nthConfig.UseProviderId && interruptionEvent.ProviderID != "" { + resolvedNodeName, err := n.GetNodeNameFromProviderID(interruptionEvent.ProviderID) + if err != nil { + log.Warn().Err(err).Str("provider_id", interruptionEvent.ProviderID).Msg("Failed to resolve node name from provider ID, falling back to NodeName from event") + } else { + nodeName = resolvedNodeName + } + } + + err := n.TaintSpotItn(nodeName, interruptionEvent.EventID) if err != nil { log.Err(err).Msgf("Unable to taint node with taint %s:%s", node.SpotInterruptionTaint, interruptionEvent.EventID) } diff --git a/pkg/node/node.go b/pkg/node/node.go index 2e1022bf..b80d6ebf 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -662,6 +662,7 @@ func (n Node) fetchKubernetesNode(nodeName string) (*corev1.Node, error) { }, }, } + listOptions := metav1.ListOptions{LabelSelector: metav1.FormatLabelSelector(&labelSelector)} matchingNodes, err := n.drainHelper.Client.CoreV1().Nodes().List(context.TODO(), listOptions) if err != nil || len(matchingNodes.Items) == 0 {