mirror of
https://github.com/puppetmaster/typhoon.git
synced 2024-12-27 09:59:33 +01:00
393a38deff
* Configure Kubelet Graceful Node Shutdown to detect system shutdown events and stop running containers gracefully when possible * Allow up to 30s for critical pods to gracefully shutdown * Allow up to 15s for regular pods to gracefully shutdown * Node will be marked as NotReady promptly, instead of having to wait for health checks * Kubelet uses systemd inhibitor locks to delay shutdown for a limited number of seconds * Raise the default max inhibitor time from 5s to 45s Verify systemd inhibitor locks are present: ``` sudo systemd-inhibit --list WHO UID USER PID COMM WHAT WHY MODE kubelet 0 root 4581 kubelet shutdown Kubelet needs time to handle node shutdown delay ``` Tail journal logs and then shutdown a node via systemctl reboot or via the cloud console to watch container shutdown Rel: * https://kubernetes.io/blog/2021/04/21/graceful-node-shutdown-beta/ * https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/ * https://github.com/kubernetes/kubernetes/issues/107043 * https://github.com/coreos/fedora-coreos-tracker/issues/821 * https://www.freedesktop.org/software/systemd/man/systemd-inhibit.html * https://github.com/kubernetes/kubernetes/blob/release-1.24/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go * https://github.com/godbus/dbus/blob/master/conn.go
230 lines
8.2 KiB
YAML
230 lines
8.2 KiB
YAML
variant: flatcar
|
|
version: 1.0.0
|
|
systemd:
|
|
units:
|
|
- name: etcd-member.service
|
|
enabled: true
|
|
contents: |
|
|
[Unit]
|
|
Description=etcd (System Container)
|
|
Documentation=https://github.com/etcd-io/etcd
|
|
Requires=docker.service
|
|
After=docker.service
|
|
[Service]
|
|
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.4
|
|
ExecStartPre=/usr/bin/docker run -d \
|
|
--name etcd \
|
|
--network host \
|
|
--env-file /etc/etcd/etcd.env \
|
|
--user 232:232 \
|
|
--volume /etc/ssl/etcd:/etc/ssl/certs:ro \
|
|
--volume /var/lib/etcd:/var/lib/etcd:rw \
|
|
$${ETCD_IMAGE}
|
|
ExecStart=docker logs -f etcd
|
|
ExecStop=docker stop etcd
|
|
ExecStopPost=docker rm etcd
|
|
Restart=always
|
|
RestartSec=10s
|
|
TimeoutStartSec=0
|
|
LimitNOFILE=40000
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
- name: docker.service
|
|
enabled: true
|
|
- name: locksmithd.service
|
|
mask: true
|
|
- name: wait-for-dns.service
|
|
enabled: true
|
|
contents: |
|
|
[Unit]
|
|
Description=Wait for DNS entries
|
|
Wants=systemd-resolved.service
|
|
Before=kubelet.service
|
|
[Service]
|
|
Type=oneshot
|
|
RemainAfterExit=true
|
|
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
|
[Install]
|
|
RequiredBy=kubelet.service
|
|
RequiredBy=etcd-member.service
|
|
- name: kubelet.service
|
|
enabled: true
|
|
contents: |
|
|
[Unit]
|
|
Description=Kubelet (System Container)
|
|
Requires=docker.service
|
|
After=docker.service
|
|
Wants=rpc-statd.service
|
|
[Service]
|
|
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.25.0
|
|
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
|
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
|
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
|
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
|
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
|
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
|
ExecStartPre=/usr/bin/docker run -d \
|
|
--name kubelet \
|
|
--privileged \
|
|
--pid host \
|
|
--network host \
|
|
-v /etc/cni/net.d:/etc/cni/net.d:ro \
|
|
-v /etc/kubernetes:/etc/kubernetes:ro \
|
|
-v /etc/machine-id:/etc/machine-id:ro \
|
|
-v /usr/lib/os-release:/etc/os-release:ro \
|
|
-v /lib/modules:/lib/modules:ro \
|
|
-v /run:/run \
|
|
-v /sys/fs/cgroup:/sys/fs/cgroup \
|
|
-v /var/lib/calico:/var/lib/calico:ro \
|
|
-v /var/lib/containerd:/var/lib/containerd \
|
|
-v /var/lib/kubelet:/var/lib/kubelet:rshared \
|
|
-v /var/log:/var/log \
|
|
-v /opt/cni/bin:/opt/cni/bin \
|
|
$${KUBELET_IMAGE} \
|
|
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
|
--config=/etc/kubernetes/kubelet.yaml \
|
|
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
|
|
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
|
--node-labels=node.kubernetes.io/controller="true" \
|
|
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule
|
|
ExecStart=docker logs -f kubelet
|
|
ExecStop=docker stop kubelet
|
|
ExecStopPost=docker rm kubelet
|
|
Restart=always
|
|
RestartSec=10
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
- name: bootstrap.service
|
|
contents: |
|
|
[Unit]
|
|
Description=Kubernetes control plane
|
|
Wants=docker.service
|
|
After=docker.service
|
|
ConditionPathExists=!/opt/bootstrap/bootstrap.done
|
|
[Service]
|
|
Type=oneshot
|
|
RemainAfterExit=true
|
|
WorkingDirectory=/opt/bootstrap
|
|
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.25.0
|
|
ExecStart=/usr/bin/docker run \
|
|
-v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
|
|
-v /opt/bootstrap/assets:/assets:ro \
|
|
-v /opt/bootstrap/apply:/apply:ro \
|
|
--entrypoint=/apply \
|
|
$${KUBELET_IMAGE}
|
|
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
storage:
|
|
directories:
|
|
- path: /var/lib/etcd
|
|
mode: 0700
|
|
overwrite: true
|
|
files:
|
|
- path: /etc/kubernetes/kubeconfig
|
|
mode: 0644
|
|
contents:
|
|
inline: |
|
|
${kubeconfig}
|
|
- path: /etc/kubernetes/kubelet.yaml
|
|
mode: 0644
|
|
contents:
|
|
inline: |
|
|
apiVersion: kubelet.config.k8s.io/v1beta1
|
|
kind: KubeletConfiguration
|
|
authentication:
|
|
anonymous:
|
|
enabled: false
|
|
webhook:
|
|
enabled: true
|
|
x509:
|
|
clientCAFile: /etc/kubernetes/ca.crt
|
|
authorization:
|
|
mode: Webhook
|
|
cgroupDriver: systemd
|
|
clusterDNS:
|
|
- ${cluster_dns_service_ip}
|
|
clusterDomain: ${cluster_domain_suffix}
|
|
healthzPort: 0
|
|
featureGates:
|
|
LocalStorageCapacityIsolationFSQuotaMonitoring: false
|
|
rotateCertificates: true
|
|
shutdownGracePeriod: 45s
|
|
shutdownGracePeriodCriticalPods: 30s
|
|
staticPodPath: /etc/kubernetes/manifests
|
|
readOnlyPort: 0
|
|
resolvConf: /run/systemd/resolve/resolv.conf
|
|
volumePluginDir: /var/lib/kubelet/volumeplugins
|
|
- path: /opt/bootstrap/layout
|
|
mode: 0544
|
|
contents:
|
|
inline: |
|
|
#!/bin/bash -e
|
|
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
|
|
awk '/#####/ {filename=$2; next} {print > filename}' assets
|
|
mkdir -p /etc/ssl/etcd/etcd
|
|
mkdir -p /etc/kubernetes/pki
|
|
mv tls/etcd/{peer*,server*} /etc/ssl/etcd/etcd/
|
|
mv tls/etcd/etcd-client* /etc/kubernetes/pki/
|
|
chown -R etcd:etcd /etc/ssl/etcd
|
|
chmod -R 500 /etc/ssl/etcd
|
|
chmod -R 700 /var/lib/etcd
|
|
mv auth/* /etc/kubernetes/pki/
|
|
mv tls/k8s/* /etc/kubernetes/pki/
|
|
mkdir -p /etc/kubernetes/manifests
|
|
mv static-manifests/* /etc/kubernetes/manifests/
|
|
mkdir -p /opt/bootstrap/assets
|
|
mv manifests /opt/bootstrap/assets/manifests
|
|
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
|
rm -rf assets auth static-manifests tls manifests-networking
|
|
- path: /opt/bootstrap/apply
|
|
mode: 0544
|
|
contents:
|
|
inline: |
|
|
#!/bin/bash -e
|
|
export KUBECONFIG=/etc/kubernetes/pki/admin.conf
|
|
until kubectl version; do
|
|
echo "Waiting for static pod control plane"
|
|
sleep 5
|
|
done
|
|
until kubectl apply -f /assets/manifests -R; do
|
|
echo "Retry applying manifests"
|
|
sleep 5
|
|
done
|
|
- path: /etc/systemd/logind.conf.d/inhibitors.conf
|
|
contents:
|
|
inline: |
|
|
[Login]
|
|
InhibitDelayMaxSec=45s
|
|
- path: /etc/sysctl.d/max-user-watches.conf
|
|
mode: 0644
|
|
contents:
|
|
inline: |
|
|
fs.inotify.max_user_watches=16184
|
|
- path: /etc/etcd/etcd.env
|
|
mode: 0644
|
|
contents:
|
|
inline: |
|
|
ETCD_NAME=${etcd_name}
|
|
ETCD_DATA_DIR=/var/lib/etcd
|
|
ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379
|
|
ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380
|
|
ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379
|
|
ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380
|
|
ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381
|
|
ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}
|
|
ETCD_STRICT_RECONFIG_CHECK=true
|
|
ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt
|
|
ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt
|
|
ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key
|
|
ETCD_CLIENT_CERT_AUTH=true
|
|
ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt
|
|
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
|
|
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
|
|
ETCD_PEER_CLIENT_CERT_AUTH=true
|
|
passwd:
|
|
users:
|
|
- name: core
|
|
ssh_authorized_keys:
|
|
- "${ssh_authorized_key}"
|