Add Kubelet System Container and bootkube bootstrap

* First semi-working cluster using 30.307-metal-bios
* Enable CPU, Memory, and BlockIO accounting
* Mount /var/lib/kubelet with `rshare` so mounted tmpfs Secrets
(e.g. serviceaccount's) are visible within appropriate containers
* SELinux relabel /etc/kubernetes so install-cni init containers
can write the CNI config to the host /etc/kubernetes/net.d
* SELinux relabel /var/lib/kubelet so ConfigMaps can be read
by containers
* SELinux relabel /opt/cni/bin so install-cni containers can
write CNI binaries to the host
* Set net.ipv4_conf.all.rp_filter to 1 (not 2, loose mode) to
satisfy Calico requirement
* Enable the QoS cgroup hierarchy for pod workloads (kubepods,
burstable, besteffort). Mount /sys/fs/cgroup and
/sys/fs/cgroup/systemd into the Kubelet. Its still rather racy
whether Kubelet will fail on ContainerManager Delegation
This commit is contained in:
Dalton Hubble 2019-07-14 01:39:08 -07:00
parent aab14c5573
commit 72c94f1c6a
5 changed files with 302 additions and 41 deletions

View File

@ -14,6 +14,8 @@ module "bootkube" {
cluster_domain_suffix = var.cluster_domain_suffix
enable_reporting = var.enable_reporting
enable_aggregation = var.enable_aggregation
trusted_certs_dir = "/etc/pki/tls/certs"
}

View File

@ -20,19 +20,151 @@ systemd:
RestartSec=10s
TimeoutStartSec=0
LimitNOFILE=40000
ExecStartPre=/bin/chcon -t bin_t /opt/bin/etcd-wrapper
ExecStartPre=/bin/mkdir -p /var/lib/etcd
ExecStartPre=-/usr/bin/podman rm etcd
ExecStart=/opt/bin/etcd-wrapper
#--volume $${NOTIFY_SOCKET}:/run/systemd/notify \
ExecStart=/usr/bin/podman run --name etcd \
--env-file /etc/etcd/etcd.env \
--network host \
--volume /var/lib/etcd:/var/lib/etcd:rw,Z \
--volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \
quay.io/coreos/etcd:v3.3.13
ExecStop=/usr/bin/podman stop etcd
[Install]
WantedBy=multi-user.target
- name: docker.service
enabled: true
- name: wait-for-dns.service
enabled: true
contents: |
[Unit]
Description=Wait for DNS entries
Before=kubelet.service
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
[Install]
RequiredBy=kubelet.service
RequiredBy=etcd-member.service
- name: kubelet.service
contents: |
[Unit]
Description=Kubelet via Hyperkube (System Container)
Wants=rpc-statd.service
[Service]
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /var/lib/calico
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
ExecStartPre=/bin/mkdir -p /opt/cni/bin
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
ExecStartPre=-/usr/bin/podman rm kubelet
ExecStart=/usr/bin/podman run --name kubelet \
--privileged \
--pid host \
--network host \
--volume /etc/kubernetes:/etc/kubernetes:ro,z \
--volume /usr/lib/os-release:/etc/os-release:ro \
--volume /etc/ssl/certs:/etc/ssl/certs:ro \
--volume /lib/modules:/lib/modules:ro \
--volume /run:/run \
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
--volume /var/lib/calico:/var/lib/calico \
--volume /var/lib/docker:/var/lib/docker \
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
--volume /var/log:/var/log \
--volume /var/run:/var/run \
--volume /var/run/lock:/var/run/lock:z \
--volume /opt/cni/bin:/opt/cni/bin:z \
--volume /etc/iscsi:/etc/iscsi \
--volume /sbin/iscsiadm:/sbin/iscsiadm \
k8s.gcr.io/hyperkube:v1.15.0 /hyperkube kubelet \
--anonymous-auth=false \
--authentication-token-webhook \
--authorization-mode=Webhook \
--cgroup-driver=systemd \
--cgroups-per-qos=true \
--enforce-node-allocatable=pods \
--client-ca-file=/etc/kubernetes/ca.crt \
--cluster_dns=${cluster_dns_service_ip} \
--cluster_domain=${cluster_domain_suffix} \
--cni-conf-dir=/etc/kubernetes/cni/net.d \
--exit-on-lock-contention \
--hostname-override=${domain_name} \
--kubeconfig=/etc/kubernetes/kubeconfig \
--lock-file=/var/run/lock/kubelet.lock \
--network-plugin=cni \
--node-labels=node-role.kubernetes.io/master \
--node-labels=node-role.kubernetes.io/controller="true" \
--pod-manifest-path=/etc/kubernetes/manifests \
--read-only-port=0 \
--register-with-taints=node-role.kubernetes.io/master=:NoSchedule \
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
ExecStop=-/usr/bin/podman stop kubelet
Delegate=yes
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
- name: kubelet.path
enabled: true
contents: |
[Unit]
Description=Watch for kubeconfig
[Path]
PathExists=/etc/kubernetes/kubeconfig
[Install]
WantedBy=multi-user.target
- name: bootkube.service
contents: |
[Unit]
Description=Bootstrap a Kubernetes control plane
ConditionPathExists=!/opt/bootkube/init_bootkube.done
[Service]
Type=oneshot
RemainAfterExit=true
WorkingDirectory=/opt/bootkube
ExecStart=/usr/bin/bash -c 'set -x && \
[ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-* && exec podman run --name bootkube --privileged \
--network host \
--volume /opt/bootkube/assets:/assets \
--volume /etc/kubernetes:/etc/kubernetes \
quay.io/coreos/bootkube:v0.14.0 \
/bootkube start --asset-dir=/assets'
ExecStartPost=/bin/touch /opt/bootkube/init_bootkube.done
storage:
directories:
- path: /etc/kubernetes
- path: /opt/bootkube
files:
- path: /etc/hostname
mode: 0644
contents:
inline:
${domain_name}
- path: /etc/sysctl.d/reverse-path-filter.conf
contents:
inline: |
net.ipv4.conf.all.rp_filter=1
- path: /etc/sysctl.d/max-user-watches.conf
contents:
inline: |
fs.inotify.max_user_watches=16184
- path: /etc/systemd/system.conf.d/accounting.conf
contents:
inline: |
[Manager]
DefaultCPUAccounting=yes
DefaultMemoryAccounting=yes
DefaultBlockIOAccounting=yes
- path: /etc/etcd/etcd.env
mode: 0644
contents:
inline: |
# TODO: Use a systemd dropin once podman v1.4.5 is avail.
NOTIFY_SOCKET=/run/systemd/notify
ETCD_NAME=${etcd_name}
ETCD_DATA_DIR=/var/lib/etcd
@ -51,35 +183,6 @@ storage:
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
ETCD_PEER_CLIENT_CERT_AUTH=true
- path: /opt/bin/etcd-wrapper
mode: 0544
contents:
inline: |
#!/usr/bin/bash -e
#--volume $${NOTIFY_SOCKET}:/run/systemd/notify \
set -x
mkdir -p /var/lib/etcd
exec podman run --name etcd \
--env-file /etc/etcd/etcd.env \
--network host \
--volume /var/lib/etcd:/var/lib/etcd:rw,Z \
--volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \
quay.io/coreos/etcd:v3.3.13
- path: /etc/kubernetes/kubelet.env
mode: 0644
contents:
inline: |
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
KUBELET_IMAGE_TAG=v1.15.0
- path: /etc/hostname
mode: 0644
contents:
inline:
${domain_name}
- path: /etc/sysctl.d/max-user-watches.conf
contents:
inline: |
fs.inotify.max_user_watches=16184
passwd:
users:
- name: core

View File

@ -1,26 +1,116 @@
---
variant: fcos
version: 1.0.0
systemd:
units:
- name: docker.service
enabled: true
- name: wait-for-dns.service
enabled: true
contents: |
[Unit]
Description=Wait for DNS entries
Before=kubelet.service
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
[Install]
RequiredBy=kubelet.service
- name: kubelet.service
contents: |
[Unit]
Description=Kubelet via Hyperkube (System Container)
Wants=rpc-statd.service
[Service]
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /var/lib/calico
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
ExecStartPre=/bin/mkdir -p /opt/cni/bin
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
ExecStartPre=-/usr/bin/podman rm kubelet
ExecStart=/usr/bin/podman run --name kubelet \
--privileged \
--pid host \
--network host \
--volume /etc/kubernetes:/etc/kubernetes:ro,z \
--volume /usr/lib/os-release:/etc/os-release:ro \
--volume /etc/ssl/certs:/etc/ssl/certs:ro \
--volume /lib/modules:/lib/modules:ro \
--volume /run:/run \
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
--volume /var/lib/calico:/var/lib/calico \
--volume /var/lib/docker:/var/lib/docker \
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
--volume /var/log:/var/log \
--volume /var/run:/var/run \
--volume /var/run/lock:/var/run/lock:z \
--volume /opt/cni/bin:/opt/cni/bin:z \
--volume /etc/iscsi:/etc/iscsi \
--volume /sbin/iscsiadm:/sbin/iscsiadm \
k8s.gcr.io/hyperkube:v1.15.0 /hyperkube kubelet \
--anonymous-auth=false \
--authentication-token-webhook \
--authorization-mode=Webhook \
--cgroup-driver=systemd \
--cgroups-per-qos=true \
--enforce-node-allocatable=pods \
--client-ca-file=/etc/kubernetes/ca.crt \
--cluster_dns=${cluster_dns_service_ip} \
--cluster_domain=${cluster_domain_suffix} \
--cni-conf-dir=/etc/kubernetes/cni/net.d \
--exit-on-lock-contention \
--hostname-override=${domain_name} \
--kubeconfig=/etc/kubernetes/kubeconfig \
--lock-file=/var/run/lock/kubelet.lock \
--network-plugin=cni \
--node-labels=node-role.kubernetes.io/node \
--pod-manifest-path=/etc/kubernetes/manifests \
--read-only-port=0 \
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
ExecStop=-/usr/bin/podman stop kubelet
Delegate=yes
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
- name: kubelet.path
enabled: true
contents: |
[Unit]
Description=Watch for kubeconfig
[Path]
PathExists=/etc/kubernetes/kubeconfig
[Install]
WantedBy=multi-user.target
storage:
directories:
- path: /etc/kubernetes
- path: /opt/bootkube
files:
- path: /etc/kubernetes/kubelet.env
filesystem: root
mode: 0644
contents:
inline: |
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
KUBELET_IMAGE_TAG=v1.15.0
- path: /etc/hostname
filesystem: root
mode: 0644
contents:
inline:
${domain_name}
- path: /etc/sysctl.d/reverse-path-filter.conf
contents:
inline: |
net.ipv4.conf.all.rp_filter=1
- path: /etc/sysctl.d/max-user-watches.conf
filesystem: root
contents:
inline: |
fs.inotify.max_user_watches=16184
- path: /etc/systemd/system.conf.d/accounting.conf
contents:
inline: |
[Manager]
DefaultCPUAccounting=yes
DefaultMemoryAccounting=yes
DefaultBlockIOAccounting=yes
passwd:
users:
- name: core

View File

@ -0,0 +1,4 @@
output "kubeconfig-admin" {
value = module.bootkube.kubeconfig-admin
}

View File

@ -71,3 +71,65 @@ resource "null_resource" "copy-controller-secrets" {
}
}
# Secure copy kubeconfig to all workers. Activates kubelet.service
resource "null_resource" "copy-worker-secrets" {
count = length(var.worker_names)
# Without depends_on, remote-exec could start and wait for machines before
# matchbox groups are written, causing a deadlock.
depends_on = [
matchbox_group.controller,
matchbox_group.worker,
]
connection {
type = "ssh"
host = var.worker_domains[count.index]
user = "core"
timeout = "60m"
}
provisioner "file" {
content = module.bootkube.kubeconfig-kubelet
destination = "$HOME/kubeconfig"
}
provisioner "remote-exec" {
inline = [
"sudo mv $HOME/kubeconfig /etc/kubernetes/kubeconfig",
]
}
}
# Secure copy bootkube assets to ONE controller and start bootkube to perform
# one-time self-hosted cluster bootstrapping.
resource "null_resource" "bootkube-start" {
# Without depends_on, this remote-exec may start before the kubeconfig copy.
# Terraform only does one task at a time, so it would try to bootstrap
# while no Kubelets are running.
depends_on = [
null_resource.copy-controller-secrets,
null_resource.copy-worker-secrets,
]
connection {
type = "ssh"
host = var.controller_domains[0]
user = "core"
timeout = "15m"
}
provisioner "file" {
source = var.asset_dir
destination = "$HOME/assets"
}
provisioner "remote-exec" {
inline = [
"sudo mv $HOME/assets /opt/bootkube",
"sudo systemctl start bootkube",
]
}
}