From 31b79461d8b02587618c658f5ad0f87a18236138 Mon Sep 17 00:00:00 2001 From: alimaazamat Date: Tue, 29 Jul 2025 16:20:35 -0700 Subject: [PATCH 1/4] fix: supports seamless upgrade socket path name with uid --- cmd/dra-example-kubeletplugin/health.go | 68 +++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/cmd/dra-example-kubeletplugin/health.go b/cmd/dra-example-kubeletplugin/health.go index 47f813d7..23e1a450 100644 --- a/cmd/dra-example-kubeletplugin/health.go +++ b/cmd/dra-example-kubeletplugin/health.go @@ -21,8 +21,11 @@ import ( "fmt" "net" "net/url" + "os" "path" + "path/filepath" "strconv" + "strings" "sync" "google.golang.org/grpc" @@ -63,9 +66,19 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) regSockPath := (&url.URL{ Scheme: "unix", - // TODO: this needs to adapt when seamless upgrades - // are enabled and the filename includes a uid. - Path: path.Join(config.flags.kubeletRegistrarDirectoryPath, consts.DriverName+"-reg.sock"), + // Support both legacy and seamless upgrade socket naming conventions + Path: func() string { + socketPath, err := findSocketPath( + config.flags.kubeletRegistrarDirectoryPath, + consts.DriverName, + "-reg.sock", + ) + if err != nil { + // Fallback to legacy path name + return path.Join(config.flags.kubeletRegistrarDirectoryPath, consts.DriverName+"-reg.sock") + } + return socketPath + }(), }).String() log.Info("connecting to registration socket", "path", regSockPath) regConn, err := grpc.NewClient( @@ -78,7 +91,18 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) draSockPath := (&url.URL{ Scheme: "unix", - Path: path.Join(config.DriverPluginPath(), "dra.sock"), + Path: func() string { + socketPath, err := findSocketPath( + config.DriverPluginPath(), + "dra", + ".sock", + ) + if err != nil { + // Fallback to legacy path name + return path.Join(config.DriverPluginPath(), "dra.sock") + } + return socketPath + }(), }).String() log.Info("connecting to DRA socket", "path", draSockPath) draConn, err := grpc.NewClient( @@ -147,3 +171,39 @@ func (h *healthcheck) Check(ctx context.Context, req *grpc_health_v1.HealthCheck status.Status = grpc_health_v1.HealthCheckResponse_SERVING return status, nil } + +// Finds driver's socket paths whether its legacy (fixed filename) or seamless upgrade (UID-based filename) formats. +func findSocketPath(dir, baseName, suffix string) (string, error) { + // First try the legacy path name: {baseName}{suffix} + legacyPath := filepath.Join(dir, baseName+suffix) + if _, err := os.Stat(legacyPath); err == nil { + return legacyPath, nil + } + + // Then try the seamless upgrade format: {baseName}-{uid}{suffix} + entries, err := os.ReadDir(dir) + if err != nil { + return "", fmt.Errorf("failed to read directory %s: %w", dir, err) + } + + for _, entry := range entries { + if entry.IsDir() { + continue + } + name := entry.Name() + + // Look for files matching pattern: {baseName}-{uid}{suffix} + if strings.HasPrefix(name, baseName+"-") && strings.HasSuffix(name, suffix) { + // Verify it's not the legacy format (which would be {baseName}{suffix}) + if name != baseName+suffix { + socketPath := filepath.Join(dir, name) + klog.Info("Found seamless upgrade socket", "path", socketPath, "uid", strings.TrimPrefix(strings.TrimSuffix(name, suffix), baseName+"-")) + return socketPath, nil + } + } + } + + // If neither path name types are found, return the legacy path for error reporting + return legacyPath, fmt.Errorf("socket file not found in dir %s (tried legacy path name %s and seamless upgrade with uid path name %s-*%s)", + dir, baseName+suffix, baseName, suffix) +} From 9bd686f91d0b181bad1884b4a07eaf73740c38f5 Mon Sep 17 00:00:00 2001 From: alimaazamat Date: Wed, 30 Jul 2025 15:18:36 -0700 Subject: [PATCH 2/4] feat: helm chart value + conditional POD_UID env var for seamless upgrades --- cmd/dra-example-kubeletplugin/driver.go | 15 +++-- cmd/dra-example-kubeletplugin/health.go | 64 ++----------------- cmd/dra-example-kubeletplugin/main.go | 7 ++ .../templates/kubeletplugin.yaml | 14 ++++ .../helm/dra-example-driver/values.yaml | 2 + 5 files changed, 40 insertions(+), 62 deletions(-) diff --git a/cmd/dra-example-kubeletplugin/driver.go b/cmd/dra-example-kubeletplugin/driver.go index 0f6e224c..d7b30b11 100644 --- a/cmd/dra-example-kubeletplugin/driver.go +++ b/cmd/dra-example-kubeletplugin/driver.go @@ -53,15 +53,22 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) { } driver.state = state - helper, err := kubeletplugin.Start( - ctx, - driver, + kubeletPluginOptions := []kubeletplugin.Option{ kubeletplugin.KubeClient(config.coreclient), kubeletplugin.NodeName(config.flags.nodeName), kubeletplugin.DriverName(consts.DriverName), kubeletplugin.RegistrarDirectoryPath(config.flags.kubeletRegistrarDirectoryPath), kubeletplugin.PluginDataDirectoryPath(config.DriverPluginPath()), - ) + } + + // Enable seamless upgrades when POD_UID is available (which gets set by Helm when seamlessUpgrades.enabled=true) + if config.flags.podUID != "" { + kubeletPluginOptions = append(kubeletPluginOptions, + kubeletplugin.RollingUpdate(types.UID(config.flags.podUID)), + ) + } + + helper, err := kubeletplugin.Start(ctx, driver, kubeletPluginOptions...) if err != nil { return nil, err } diff --git a/cmd/dra-example-kubeletplugin/health.go b/cmd/dra-example-kubeletplugin/health.go index 23e1a450..6c119a61 100644 --- a/cmd/dra-example-kubeletplugin/health.go +++ b/cmd/dra-example-kubeletplugin/health.go @@ -21,11 +21,8 @@ import ( "fmt" "net" "net/url" - "os" "path" - "path/filepath" "strconv" - "strings" "sync" "google.golang.org/grpc" @@ -66,18 +63,11 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) regSockPath := (&url.URL{ Scheme: "unix", - // Support both legacy and seamless upgrade socket naming conventions Path: func() string { - socketPath, err := findSocketPath( - config.flags.kubeletRegistrarDirectoryPath, - consts.DriverName, - "-reg.sock", - ) - if err != nil { - // Fallback to legacy path name - return path.Join(config.flags.kubeletRegistrarDirectoryPath, consts.DriverName+"-reg.sock") + if config.flags.podUID != "" { + return path.Join(config.flags.kubeletRegistrarDirectoryPath, consts.DriverName+"-"+config.flags.podUID+"-reg.sock") } - return socketPath + return path.Join(config.flags.kubeletRegistrarDirectoryPath, consts.DriverName+"-reg.sock") }(), }).String() log.Info("connecting to registration socket", "path", regSockPath) @@ -92,16 +82,10 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) draSockPath := (&url.URL{ Scheme: "unix", Path: func() string { - socketPath, err := findSocketPath( - config.DriverPluginPath(), - "dra", - ".sock", - ) - if err != nil { - // Fallback to legacy path name - return path.Join(config.DriverPluginPath(), "dra.sock") + if config.flags.podUID != "" { + return path.Join(config.DriverPluginPath(), "dra-"+config.flags.podUID+".sock") } - return socketPath + return path.Join(config.DriverPluginPath(), "dra.sock") }(), }).String() log.Info("connecting to DRA socket", "path", draSockPath) @@ -171,39 +155,3 @@ func (h *healthcheck) Check(ctx context.Context, req *grpc_health_v1.HealthCheck status.Status = grpc_health_v1.HealthCheckResponse_SERVING return status, nil } - -// Finds driver's socket paths whether its legacy (fixed filename) or seamless upgrade (UID-based filename) formats. -func findSocketPath(dir, baseName, suffix string) (string, error) { - // First try the legacy path name: {baseName}{suffix} - legacyPath := filepath.Join(dir, baseName+suffix) - if _, err := os.Stat(legacyPath); err == nil { - return legacyPath, nil - } - - // Then try the seamless upgrade format: {baseName}-{uid}{suffix} - entries, err := os.ReadDir(dir) - if err != nil { - return "", fmt.Errorf("failed to read directory %s: %w", dir, err) - } - - for _, entry := range entries { - if entry.IsDir() { - continue - } - name := entry.Name() - - // Look for files matching pattern: {baseName}-{uid}{suffix} - if strings.HasPrefix(name, baseName+"-") && strings.HasSuffix(name, suffix) { - // Verify it's not the legacy format (which would be {baseName}{suffix}) - if name != baseName+suffix { - socketPath := filepath.Join(dir, name) - klog.Info("Found seamless upgrade socket", "path", socketPath, "uid", strings.TrimPrefix(strings.TrimSuffix(name, suffix), baseName+"-")) - return socketPath, nil - } - } - } - - // If neither path name types are found, return the legacy path for error reporting - return legacyPath, fmt.Errorf("socket file not found in dir %s (tried legacy path name %s and seamless upgrade with uid path name %s-*%s)", - dir, baseName+suffix, baseName, suffix) -} diff --git a/cmd/dra-example-kubeletplugin/main.go b/cmd/dra-example-kubeletplugin/main.go index e0cfaf9c..30a9a05b 100644 --- a/cmd/dra-example-kubeletplugin/main.go +++ b/cmd/dra-example-kubeletplugin/main.go @@ -49,6 +49,7 @@ type Flags struct { kubeletRegistrarDirectoryPath string kubeletPluginsDirectoryPath string healthcheckPort int + podUID string } type Config struct { @@ -115,6 +116,12 @@ func newApp() *cli.App { Destination: &flags.healthcheckPort, EnvVars: []string{"HEALTHCHECK_PORT"}, }, + &cli.StringFlag{ + Name: "pod-uid", + Usage: "UID of the pod (used for seamless upgrades to create unique socket names).", + Destination: &flags.podUID, + EnvVars: []string{"POD_UID"}, + }, } cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...) cliFlags = append(cliFlags, flags.loggingConfig.Flags()...) diff --git a/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml b/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml index bcf44ffd..b376c793 100644 --- a/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml +++ b/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml @@ -12,10 +12,18 @@ spec: matchLabels: {{- include "dra-example-driver.selectorLabels" . | nindent 6 }} app.kubernetes.io/component: kubeletplugin + {{- if .Values.kubeletPlugin.seamlessUpgrades.enabled }} + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + {{- else }} {{- with .Values.kubeletPlugin.updateStrategy }} updateStrategy: {{- toYaml . | nindent 4 }} {{- end }} + {{- end }} template: metadata: {{- with .Values.kubeletPlugin.podAnnotations }} @@ -80,6 +88,12 @@ spec: - name: HEALTHCHECK_PORT value: {{ .Values.kubeletPlugin.containers.plugin.healthcheckPort | quote }} {{- end }} + {{- if .Values.kubeletPlugin.seamlessUpgrades.enabled }} + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + {{- end }} volumeMounts: - name: plugins-registry mountPath: {{ .Values.kubeletPlugin.kubeletRegistrarDirectoryPath | quote }} diff --git a/deployments/helm/dra-example-driver/values.yaml b/deployments/helm/dra-example-driver/values.yaml index e58c9d33..51ff8de8 100644 --- a/deployments/helm/dra-example-driver/values.yaml +++ b/deployments/helm/dra-example-driver/values.yaml @@ -47,6 +47,8 @@ controller: kubeletPlugin: numDevices: 8 priorityClassName: "system-node-critical" + seamlessUpgrades: + enabled: false updateStrategy: type: RollingUpdate podAnnotations: {} From ce936de629ca9b620af12ba638d93524fa1b3ced Mon Sep 17 00:00:00 2001 From: alimaazamat Date: Tue, 26 Aug 2025 15:05:17 -0700 Subject: [PATCH 3/4] seamless upgrades cli flag + kubeletplugin yaml accounts for updateStrategy values --- cmd/dra-example-kubeletplugin/driver.go | 4 ++-- cmd/dra-example-kubeletplugin/health.go | 4 ++-- cmd/dra-example-kubeletplugin/main.go | 7 +++++++ .../dra-example-driver/templates/kubeletplugin.yaml | 12 +++++++----- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/cmd/dra-example-kubeletplugin/driver.go b/cmd/dra-example-kubeletplugin/driver.go index d7b30b11..a8cfa1e4 100644 --- a/cmd/dra-example-kubeletplugin/driver.go +++ b/cmd/dra-example-kubeletplugin/driver.go @@ -61,8 +61,8 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) { kubeletplugin.PluginDataDirectoryPath(config.DriverPluginPath()), } - // Enable seamless upgrades when POD_UID is available (which gets set by Helm when seamlessUpgrades.enabled=true) - if config.flags.podUID != "" { + // Enable seamless upgrades when both seamless upgrades are enabled and POD_UID is available + if config.flags.seamlessUpgrades && config.flags.podUID != "" { kubeletPluginOptions = append(kubeletPluginOptions, kubeletplugin.RollingUpdate(types.UID(config.flags.podUID)), ) diff --git a/cmd/dra-example-kubeletplugin/health.go b/cmd/dra-example-kubeletplugin/health.go index 6c119a61..67d7fd3f 100644 --- a/cmd/dra-example-kubeletplugin/health.go +++ b/cmd/dra-example-kubeletplugin/health.go @@ -64,7 +64,7 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) regSockPath := (&url.URL{ Scheme: "unix", Path: func() string { - if config.flags.podUID != "" { + if config.flags.seamlessUpgrades && config.flags.podUID != "" { return path.Join(config.flags.kubeletRegistrarDirectoryPath, consts.DriverName+"-"+config.flags.podUID+"-reg.sock") } return path.Join(config.flags.kubeletRegistrarDirectoryPath, consts.DriverName+"-reg.sock") @@ -82,7 +82,7 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) draSockPath := (&url.URL{ Scheme: "unix", Path: func() string { - if config.flags.podUID != "" { + if config.flags.seamlessUpgrades && config.flags.podUID != "" { return path.Join(config.DriverPluginPath(), "dra-"+config.flags.podUID+".sock") } return path.Join(config.DriverPluginPath(), "dra.sock") diff --git a/cmd/dra-example-kubeletplugin/main.go b/cmd/dra-example-kubeletplugin/main.go index 30a9a05b..4ea40c4a 100644 --- a/cmd/dra-example-kubeletplugin/main.go +++ b/cmd/dra-example-kubeletplugin/main.go @@ -50,6 +50,7 @@ type Flags struct { kubeletPluginsDirectoryPath string healthcheckPort int podUID string + seamlessUpgrades bool } type Config struct { @@ -122,6 +123,12 @@ func newApp() *cli.App { Destination: &flags.podUID, EnvVars: []string{"POD_UID"}, }, + &cli.BoolFlag{ + Name: "seamless-upgrades", + Usage: "Enable seamless upgrades support. When enabled, the driver will use rolling update mode if pod-uid is available.", + Destination: &flags.seamlessUpgrades, + EnvVars: []string{"SEAMLESS_UPGRADES"}, + }, } cliFlags = append(cliFlags, flags.kubeClientConfig.Flags()...) cliFlags = append(cliFlags, flags.loggingConfig.Flags()...) diff --git a/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml b/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml index b376c793..2077523e 100644 --- a/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml +++ b/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml @@ -12,16 +12,16 @@ spec: matchLabels: {{- include "dra-example-driver.selectorLabels" . | nindent 6 }} app.kubernetes.io/component: kubeletplugin + {{- with .Values.kubeletPlugin.updateStrategy }} + updateStrategy: + {{- toYaml . | nindent 4 }} + {{- else }} {{- if .Values.kubeletPlugin.seamlessUpgrades.enabled }} updateStrategy: type: RollingUpdate rollingUpdate: maxSurge: 1 maxUnavailable: 0 - {{- else }} - {{- with .Values.kubeletPlugin.updateStrategy }} - updateStrategy: - {{- toYaml . | nindent 4 }} {{- end }} {{- end }} template: @@ -88,11 +88,13 @@ spec: - name: HEALTHCHECK_PORT value: {{ .Values.kubeletPlugin.containers.plugin.healthcheckPort | quote }} {{- end }} - {{- if .Values.kubeletPlugin.seamlessUpgrades.enabled }} - name: POD_UID valueFrom: fieldRef: fieldPath: metadata.uid + {{- if .Values.kubeletPlugin.seamlessUpgrades.enabled }} + - name: SEAMLESS_UPGRADES + value: "true" {{- end }} volumeMounts: - name: plugins-registry From ed5054b5c99db7d2f2d40f94a79b8ddad1b863b7 Mon Sep 17 00:00:00 2001 From: alimaazamat Date: Wed, 27 Aug 2025 16:23:07 -0700 Subject: [PATCH 4/4] overwite maxSurge/maxUnavailable if seamlessUpgrades enabled + seamlessUpgrades enabled for e2e tests --- .../templates/kubeletplugin.yaml | 29 +++++++++++++++++-- .../helm/dra-example-driver/values.yaml | 2 +- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml b/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml index 2077523e..36f2eec8 100644 --- a/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml +++ b/deployments/helm/dra-example-driver/templates/kubeletplugin.yaml @@ -14,15 +14,38 @@ spec: app.kubernetes.io/component: kubeletplugin {{- with .Values.kubeletPlugin.updateStrategy }} updateStrategy: + {{- if $.Values.kubeletPlugin.seamlessUpgrades.enabled }} + {{- $strategy := . }} + {{- range $key, $value := . }} + {{- if eq $key "rollingUpdate" }} + {{ $key }}: + maxSurge: 1 + maxUnavailable: 0 + {{- range $subkey, $subvalue := $value }} + {{- if and (ne $subkey "maxSurge") (ne $subkey "maxUnavailable") }} + {{ $subkey }}: {{ $subvalue }} + {{- end }} + {{- end }} + {{- else }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + {{- else }} {{- toYaml . | nindent 4 }} + {{- end }} {{- else }} - {{- if .Values.kubeletPlugin.seamlessUpgrades.enabled }} updateStrategy: - type: RollingUpdate + type: {{ .Values.kubeletPlugin.updateStrategy.type | default "RollingUpdate" }} + {{- if eq (.Values.kubeletPlugin.updateStrategy.type | default "RollingUpdate") "RollingUpdate" }} rollingUpdate: + {{- if .Values.kubeletPlugin.seamlessUpgrades.enabled }} maxSurge: 1 maxUnavailable: 0 - {{- end }} + {{- else }} + maxSurge: {{ .Values.kubeletPlugin.updateStrategy.rollingUpdate.maxSurge | default 0 }} + maxUnavailable: {{ .Values.kubeletPlugin.updateStrategy.rollingUpdate.maxUnavailable | default 1 }} + {{- end }} + {{- end }} {{- end }} template: metadata: diff --git a/deployments/helm/dra-example-driver/values.yaml b/deployments/helm/dra-example-driver/values.yaml index 51ff8de8..4fcf3722 100644 --- a/deployments/helm/dra-example-driver/values.yaml +++ b/deployments/helm/dra-example-driver/values.yaml @@ -48,7 +48,7 @@ kubeletPlugin: numDevices: 8 priorityClassName: "system-node-critical" seamlessUpgrades: - enabled: false + enabled: true updateStrategy: type: RollingUpdate podAnnotations: {}