feat(k3s): replace nginx loadbalancer with kube-vip for control-plane HA

Deploys kube-vip as a DaemonSet on all k3s server nodes, advertising a
VIP (192.168.20.2) via ARP. Eliminates the single-point-of-failure
k3s-loadbalancer VM.

- New kube_vip role: RBAC + DaemonSet templates, TLS SAN cert rotation
- playbooks/kube-vip.yaml: migration playbook (serial=1, idempotent)
- Updated k3s install tasks (server primary/secondary, agent) to use k3s_vip
  instead of the loadbalancer VM IP
- Added k3s_vip: 192.168.20.2 to group_vars (below DHCP range .11-.250)

Migration steps in playbook header comment.
This commit is contained in:
Tuan-Dat Tran
2026-04-26 12:08:42 +02:00
parent fce6f913ff
commit 5bc3024eaf
9 changed files with 216 additions and 5 deletions

18
playbooks/kube-vip.yaml Normal file
View File

@@ -0,0 +1,18 @@
---
# Deploys kube-vip on all k3s server nodes and adds the VIP to their TLS SANs.
#
# Migration steps (run once):
# 1. ansible-playbook playbooks/kube-vip.yaml
# 2. Update DNS: k3s.seyshiro.de → 192.168.20.2
# 3. Verify: kubectl get nodes (should work via VIP)
# 4. Decommission k3s-loadbalancer VM when satisfied
#
# The playbook is idempotent — re-running it after migration is safe.
- name: Deploy kube-vip on k3s server nodes
hosts: k3s_server
gather_facts: true
serial: 1
roles:
- role: kube_vip
tags:
- kube_vip

View File

@@ -24,6 +24,6 @@
ansible.builtin.command: |
/tmp/k3s_install.sh
environment:
K3S_URL: "https://{{ hostvars['k3s-loadbalancer'].ansible_default_ipv4.address }}:{{ k3s.loadbalancer.default_port }}"
K3S_URL: "https://{{ k3s_vip }}:{{ k3s.loadbalancer.default_port }}"
K3S_TOKEN: "{{ k3s_token }}"
become: true

View File

@@ -8,7 +8,7 @@
- name: Install K3s server with and TLS SAN
ansible.builtin.command: |
/tmp/k3s_install.sh server \
--cluster-init
--tls-san {{ hostvars['k3s-loadbalancer'].ansible_default_ipv4.address }} \
--cluster-init \
--tls-san {{ k3s_vip }} \
--tls-san {{ k3s_server_name }}
become: true

View File

@@ -13,8 +13,8 @@
- name: Install K3s on the secondary servers
ansible.builtin.command: |
/tmp/k3s_install.sh \
--server "https://{{ hostvars['k3s-loadbalancer'].ansible_default_ipv4.address }}:{{ k3s.loadbalancer.default_port }}" \
--tls-san {{ hostvars['k3s-loadbalancer'].ansible_default_ipv4.address }} \
--server "https://{{ k3s_vip }}:{{ k3s.loadbalancer.default_port }}" \
--tls-san {{ k3s_vip }} \
--tls-san {{ k3s_server_name }}
environment:
K3S_TOKEN: "{{ k3s_token_vault.k3s_token }}"

View File

@@ -0,0 +1,61 @@
---
- name: Remove stale static pod manifest if present
ansible.builtin.file:
path: "{{ kube_vip_static_pod_path }}"
state: absent
become: true
- name: Ensure k3s server manifests directory exists
ansible.builtin.file:
path: "{{ kube_vip_manifests_dir }}"
state: directory
mode: "0755"
become: true
- name: Deploy kube-vip RBAC manifest
ansible.builtin.template:
src: templates/kube-vip-rbac.yaml.j2
dest: "{{ kube_vip_manifests_dir }}/kube-vip-rbac.yaml"
owner: root
group: root
mode: "0644"
become: true
- name: Deploy kube-vip DaemonSet manifest
ansible.builtin.template:
src: templates/kube-vip.yaml.j2
dest: "{{ kube_vip_manifests_dir }}/kube-vip.yaml"
owner: root
group: root
mode: "0644"
become: true
- name: Ensure VIP is present in k3s TLS SANs config
ansible.builtin.blockinfile:
path: /etc/rancher/k3s/config.yaml
create: true
marker: "# {mark} ANSIBLE MANAGED kube-vip TLS SAN"
block: |
tls-san:
- "{{ k3s_vip }}"
become: true
register: tls_san_added
- name: Stop k3s for certificate rotation
ansible.builtin.systemd:
name: k3s
state: stopped
become: true
when: tls_san_added.changed
- name: Rotate k3s certificates to include VIP in SAN
ansible.builtin.command: k3s certificate rotate
become: true
when: tls_san_added.changed
- name: Start k3s after certificate rotation
ansible.builtin.systemd:
name: k3s
state: started
become: true
when: tls_san_added.changed

View File

@@ -0,0 +1,44 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-vip
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
rbac.authorization.kubernetes.io/autoupdate: "true"
name: system:kube-vip-role
rules:
- apiGroups: [""]
resources: ["services/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["services", "endpoints"]
verbs: ["list", "get", "watch", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["list", "get", "watch", "update", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["list", "get", "watch", "update", "create"]
- apiGroups: ["discovery.k8s.io"]
resources: ["endpointslices"]
verbs: ["list", "get", "watch", "update"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:kube-vip-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:kube-vip-role
subjects:
- kind: ServiceAccount
name: kube-vip
namespace: kube-system

View File

@@ -0,0 +1,81 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/name: kube-vip-ds
app.kubernetes.io/version: {{ kube_vip_version }}
name: kube-vip-ds
namespace: kube-system
spec:
selector:
matchLabels:
app.kubernetes.io/name: kube-vip-ds
template:
metadata:
labels:
app.kubernetes.io/name: kube-vip-ds
app.kubernetes.io/version: {{ kube_vip_version }}
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/master
operator: Exists
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
containers:
- name: kube-vip
image: ghcr.io/kube-vip/kube-vip:{{ kube_vip_version }}
imagePullPolicy: IfNotPresent
args:
- manager
env:
- name: vip_arp
value: "true"
- name: port
value: "6443"
- name: vip_nodename
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: vip_interface
value: "{{ kube_vip_interface }}"
- name: vip_cidr
value: "32"
- name: dns_mode
value: first
- name: cp_enable
value: "true"
- name: cp_namespace
value: kube-system
- name: svc_enable
value: "false"
- name: vip_leaderelection
value: "true"
- name: vip_leasename
value: plndr-cp-lock
- name: vip_leaseduration
value: "5"
- name: vip_renewdeadline
value: "3"
- name: vip_retryperiod
value: "1"
- name: address
value: "{{ k3s_vip }}"
- name: prometheus_server
value: :2112
securityContext:
capabilities:
add:
- NET_ADMIN
- NET_RAW
hostNetwork: true
serviceAccountName: kube-vip
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists

View File

@@ -0,0 +1,5 @@
---
kube_vip_version: "v0.8.9"
kube_vip_interface: "eth0"
kube_vip_manifests_dir: "/var/lib/rancher/k3s/server/manifests"
kube_vip_static_pod_path: "/var/lib/rancher/k3s/agent/pod-manifests/kube-vip.yaml"

View File

@@ -2,6 +2,8 @@ k3s:
loadbalancer:
default_port: 6443
k3s_vip: "192.168.20.2"
k3s_primary_server_ip: "{{ groups['k3s_server'] | map('extract', hostvars, 'ansible_default_ipv4') | map(attribute='address') | unique | list | first }}"
k3s_server_ips: "{{ groups['k3s_server'] | map('extract', hostvars, 'ansible_default_ipv4') | map(attribute='address') | unique | list }}"