Check for Node Restarts in Cluster gke_runwhen-nonprod-sandbox_us-central1_sandbox-cluster-1-cluster¶
What does it do?
This script is a Bash shell script to get node-related events within a specified time range in a Kubernetes context. It summarizes the events based on nodes and categorizes them as preemptible/spot instances for different cloud providers, finding unique nodes started and stopped.
#!/bin/bash# Read the context passed as an environment variablecontext=$CONTEXT# Set the time interval for fetching the events (e.g., 24 hours)interval=$INTERVAL# Get the current date and time in ISO 8601 formatCURRENT_DATE=$(date-u+"%Y-%m-%dT%H:%M:%SZ")# Calculate the start date for the specified time interval using GNU dateSTART_DATE=$(date-u-d"$interval ago"+"%Y-%m-%dT%H:%M:%SZ")# Fetch all node-related events within the specified time range using Kubernetes kubectl command and output it to a file
kubectlgetevents-A--context$context\--field-selectorinvolvedObject.kind=Node\--output=jsonpath='{range .items[*]}{.lastTimestamp}{" "}{.involvedObject.name}{" "}{.reason}{" "}{.message}{"\n"}{end}'\|awk-vstart="$START_DATE"-vend="$CURRENT_DATE"'$1 >= start && $1 <= end'\|grep-E"(Preempt|Shutdown|Drain|Termination|Removed|RemovingNode|Deleted|NodeReady|RegisteredNode)"\|sort|uniq>node_events.txt
# Function to check if a node is preemptible/spot based on annotations or labels
check_preemptible_node(){node=$1# Check for the presence of the preemptible/spot-related annotations or labels for GCP, AWS, and Azureis_preemptible=$(kubectlgetnode"$node"-ojsonpath='{.metadata.labels.cloud\.google\.com/gke-preemptible}'2>/dev/null)is_spot=$(kubectlgetnode"$node"-ojsonpath='{.metadata.labels.eks\.amazonaws\.com/capacityType}'2>/dev/null)is_azure_spot=$(kubectlgetnode"$node"-ojsonpath='{.metadata.labels.kubernetes\.azure\.com/scalesetpriority}'2>/dev/null)# Output the result based on the presence of annotations or labelsif[["$is_preemptible"=="true"]];thenecho"Preemptible (GCP)"elif[["$is_spot"=="SPOT"]];thenecho"Spot (AWS)"elif[["$is_azure_spot"=="spot"]];thenecho"Spot (Azure)"elseecho"Unidentified/Unplanned"fi}# Track unique nodes started and stopped using associative arraysdeclare-Anodes_started
declare-Anodes_stopped
# Read the node events from the file and summarize by nodewhileread-rline;donode=$(echo"$line"|awk'{print $2}')preempt_status=$(check_preemptible_node"$node")# Print node summary and determine if the node was started or stoppedif[[!"$current_node"=="$node"]];thenif[[-n"$current_node"]];thenecho""# Empty line between different nodes for readabilityfiecho"Node: $node"echo"Type: $preempt_status"echo"Activities:"current_node="$node"fi# Determine if the node was started or stopped and store the information in the associative arraysifecho"$line"|grep-qE"(NodeReady|RegisteredNode)";thennodes_started["$node"]=1elifecho"$line"|grep-qE"(Shutdown|Preempt|Termination|Removed)";thennodes_stopped["$node"]=1fi# Print the event details for the nodeecho" - $line"done<node_events.txt
# Summary of unique nodes started and stoppedunique_nodes_started=${#nodes_started[@]}unique_nodes_stopped=${#nodes_stopped[@]}total_node_events=$((unique_nodes_started+unique_nodes_stopped))# Print the summary of unique nodes started, stopped, and total start/stop eventsecho""echo"Summary:"echo"Unique nodes started: $unique_nodes_started"echo"Unique nodes stopped: $unique_nodes_stopped"echo"Total start/stop events: $total_node_events"