From 72c94f1c6afc17ef3fe52c73156aa3e9a04cce86 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Sun, 14 Jul 2019 01:39:08 -0700 Subject: [PATCH] Add Kubelet System Container and bootkube bootstrap * First semi-working cluster using 30.307-metal-bios * Enable CPU, Memory, and BlockIO accounting * Mount /var/lib/kubelet with `rshare` so mounted tmpfs Secrets (e.g. serviceaccount's) are visible within appropriate containers * SELinux relabel /etc/kubernetes so install-cni init containers can write the CNI config to the host /etc/kubernetes/net.d * SELinux relabel /var/lib/kubelet so ConfigMaps can be read by containers * SELinux relabel /opt/cni/bin so install-cni containers can write CNI binaries to the host * Set net.ipv4_conf.all.rp_filter to 1 (not 2, loose mode) to satisfy Calico requirement * Enable the QoS cgroup hierarchy for pod workloads (kubepods, burstable, besteffort). Mount /sys/fs/cgroup and /sys/fs/cgroup/systemd into the Kubelet. Its still rather racy whether Kubelet will fail on ContainerManager Delegation --- .../fedora-coreos/kubernetes/bootkube.tf | 2 + .../kubernetes/fcc/controller.yaml | 167 ++++++++++++++---- .../fedora-coreos/kubernetes/fcc/worker.yaml | 108 ++++++++++- .../fedora-coreos/kubernetes/outputs.tf | 4 + bare-metal/fedora-coreos/kubernetes/ssh.tf | 62 +++++++ 5 files changed, 302 insertions(+), 41 deletions(-) create mode 100644 bare-metal/fedora-coreos/kubernetes/outputs.tf diff --git a/bare-metal/fedora-coreos/kubernetes/bootkube.tf b/bare-metal/fedora-coreos/kubernetes/bootkube.tf index 33df7bd0..06c50989 100644 --- a/bare-metal/fedora-coreos/kubernetes/bootkube.tf +++ b/bare-metal/fedora-coreos/kubernetes/bootkube.tf @@ -14,6 +14,8 @@ module "bootkube" { cluster_domain_suffix = var.cluster_domain_suffix enable_reporting = var.enable_reporting enable_aggregation = var.enable_aggregation + + trusted_certs_dir = "/etc/pki/tls/certs" } diff --git a/bare-metal/fedora-coreos/kubernetes/fcc/controller.yaml b/bare-metal/fedora-coreos/kubernetes/fcc/controller.yaml index b1d8bdbc..bd7b4b7a 100644 --- a/bare-metal/fedora-coreos/kubernetes/fcc/controller.yaml +++ b/bare-metal/fedora-coreos/kubernetes/fcc/controller.yaml @@ -20,19 +20,151 @@ systemd: RestartSec=10s TimeoutStartSec=0 LimitNOFILE=40000 - - ExecStartPre=/bin/chcon -t bin_t /opt/bin/etcd-wrapper + ExecStartPre=/bin/mkdir -p /var/lib/etcd ExecStartPre=-/usr/bin/podman rm etcd - ExecStart=/opt/bin/etcd-wrapper + #--volume $${NOTIFY_SOCKET}:/run/systemd/notify \ + ExecStart=/usr/bin/podman run --name etcd \ + --env-file /etc/etcd/etcd.env \ + --network host \ + --volume /var/lib/etcd:/var/lib/etcd:rw,Z \ + --volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \ + quay.io/coreos/etcd:v3.3.13 ExecStop=/usr/bin/podman stop etcd [Install] WantedBy=multi-user.target + - name: docker.service + enabled: true + - name: wait-for-dns.service + enabled: true + contents: | + [Unit] + Description=Wait for DNS entries + Before=kubelet.service + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done' + [Install] + RequiredBy=kubelet.service + RequiredBy=etcd-member.service + - name: kubelet.service + contents: | + [Unit] + Description=Kubelet via Hyperkube (System Container) + Wants=rpc-statd.service + [Service] + ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d + ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests + ExecStartPre=/bin/mkdir -p /var/lib/calico + ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins + ExecStartPre=/bin/mkdir -p /opt/cni/bin + ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt" + ExecStartPre=-/usr/bin/podman rm kubelet + ExecStart=/usr/bin/podman run --name kubelet \ + --privileged \ + --pid host \ + --network host \ + --volume /etc/kubernetes:/etc/kubernetes:ro,z \ + --volume /usr/lib/os-release:/etc/os-release:ro \ + --volume /etc/ssl/certs:/etc/ssl/certs:ro \ + --volume /lib/modules:/lib/modules:ro \ + --volume /run:/run \ + --volume /sys/fs/cgroup:/sys/fs/cgroup:ro \ + --volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \ + --volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \ + --volume /var/lib/calico:/var/lib/calico \ + --volume /var/lib/docker:/var/lib/docker \ + --volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \ + --volume /var/log:/var/log \ + --volume /var/run:/var/run \ + --volume /var/run/lock:/var/run/lock:z \ + --volume /opt/cni/bin:/opt/cni/bin:z \ + --volume /etc/iscsi:/etc/iscsi \ + --volume /sbin/iscsiadm:/sbin/iscsiadm \ + k8s.gcr.io/hyperkube:v1.15.0 /hyperkube kubelet \ + --anonymous-auth=false \ + --authentication-token-webhook \ + --authorization-mode=Webhook \ + --cgroup-driver=systemd \ + --cgroups-per-qos=true \ + --enforce-node-allocatable=pods \ + --client-ca-file=/etc/kubernetes/ca.crt \ + --cluster_dns=${cluster_dns_service_ip} \ + --cluster_domain=${cluster_domain_suffix} \ + --cni-conf-dir=/etc/kubernetes/cni/net.d \ + --exit-on-lock-contention \ + --hostname-override=${domain_name} \ + --kubeconfig=/etc/kubernetes/kubeconfig \ + --lock-file=/var/run/lock/kubelet.lock \ + --network-plugin=cni \ + --node-labels=node-role.kubernetes.io/master \ + --node-labels=node-role.kubernetes.io/controller="true" \ + --pod-manifest-path=/etc/kubernetes/manifests \ + --read-only-port=0 \ + --register-with-taints=node-role.kubernetes.io/master=:NoSchedule \ + --volume-plugin-dir=/var/lib/kubelet/volumeplugins + ExecStop=-/usr/bin/podman stop kubelet + Delegate=yes + Restart=always + RestartSec=10 + [Install] + WantedBy=multi-user.target + - name: kubelet.path + enabled: true + contents: | + [Unit] + Description=Watch for kubeconfig + [Path] + PathExists=/etc/kubernetes/kubeconfig + [Install] + WantedBy=multi-user.target + - name: bootkube.service + contents: | + [Unit] + Description=Bootstrap a Kubernetes control plane + ConditionPathExists=!/opt/bootkube/init_bootkube.done + [Service] + Type=oneshot + RemainAfterExit=true + WorkingDirectory=/opt/bootkube + ExecStart=/usr/bin/bash -c 'set -x && \ + [ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-* && exec podman run --name bootkube --privileged \ + --network host \ + --volume /opt/bootkube/assets:/assets \ + --volume /etc/kubernetes:/etc/kubernetes \ + quay.io/coreos/bootkube:v0.14.0 \ + /bootkube start --asset-dir=/assets' + ExecStartPost=/bin/touch /opt/bootkube/init_bootkube.done storage: + directories: + - path: /etc/kubernetes + - path: /opt/bootkube files: + - path: /etc/hostname + mode: 0644 + contents: + inline: + ${domain_name} + - path: /etc/sysctl.d/reverse-path-filter.conf + contents: + inline: | + net.ipv4.conf.all.rp_filter=1 + - path: /etc/sysctl.d/max-user-watches.conf + contents: + inline: | + fs.inotify.max_user_watches=16184 + - path: /etc/systemd/system.conf.d/accounting.conf + contents: + inline: | + [Manager] + DefaultCPUAccounting=yes + DefaultMemoryAccounting=yes + DefaultBlockIOAccounting=yes - path: /etc/etcd/etcd.env mode: 0644 contents: inline: | + # TODO: Use a systemd dropin once podman v1.4.5 is avail. NOTIFY_SOCKET=/run/systemd/notify ETCD_NAME=${etcd_name} ETCD_DATA_DIR=/var/lib/etcd @@ -51,35 +183,6 @@ storage: ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key ETCD_PEER_CLIENT_CERT_AUTH=true - - path: /opt/bin/etcd-wrapper - mode: 0544 - contents: - inline: | - #!/usr/bin/bash -e - #--volume $${NOTIFY_SOCKET}:/run/systemd/notify \ - set -x - mkdir -p /var/lib/etcd - exec podman run --name etcd \ - --env-file /etc/etcd/etcd.env \ - --network host \ - --volume /var/lib/etcd:/var/lib/etcd:rw,Z \ - --volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \ - quay.io/coreos/etcd:v3.3.13 - - path: /etc/kubernetes/kubelet.env - mode: 0644 - contents: - inline: | - KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube - KUBELET_IMAGE_TAG=v1.15.0 - - path: /etc/hostname - mode: 0644 - contents: - inline: - ${domain_name} - - path: /etc/sysctl.d/max-user-watches.conf - contents: - inline: | - fs.inotify.max_user_watches=16184 passwd: users: - name: core diff --git a/bare-metal/fedora-coreos/kubernetes/fcc/worker.yaml b/bare-metal/fedora-coreos/kubernetes/fcc/worker.yaml index 8e18b2f1..482bf2c7 100644 --- a/bare-metal/fedora-coreos/kubernetes/fcc/worker.yaml +++ b/bare-metal/fedora-coreos/kubernetes/fcc/worker.yaml @@ -1,26 +1,116 @@ --- variant: fcos version: 1.0.0 +systemd: + units: + - name: docker.service + enabled: true + - name: wait-for-dns.service + enabled: true + contents: | + [Unit] + Description=Wait for DNS entries + Before=kubelet.service + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done' + [Install] + RequiredBy=kubelet.service + - name: kubelet.service + contents: | + [Unit] + Description=Kubelet via Hyperkube (System Container) + Wants=rpc-statd.service + [Service] + ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d + ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests + ExecStartPre=/bin/mkdir -p /var/lib/calico + ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins + ExecStartPre=/bin/mkdir -p /opt/cni/bin + ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt" + ExecStartPre=-/usr/bin/podman rm kubelet + ExecStart=/usr/bin/podman run --name kubelet \ + --privileged \ + --pid host \ + --network host \ + --volume /etc/kubernetes:/etc/kubernetes:ro,z \ + --volume /usr/lib/os-release:/etc/os-release:ro \ + --volume /etc/ssl/certs:/etc/ssl/certs:ro \ + --volume /lib/modules:/lib/modules:ro \ + --volume /run:/run \ + --volume /sys/fs/cgroup:/sys/fs/cgroup:ro \ + --volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \ + --volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \ + --volume /var/lib/calico:/var/lib/calico \ + --volume /var/lib/docker:/var/lib/docker \ + --volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \ + --volume /var/log:/var/log \ + --volume /var/run:/var/run \ + --volume /var/run/lock:/var/run/lock:z \ + --volume /opt/cni/bin:/opt/cni/bin:z \ + --volume /etc/iscsi:/etc/iscsi \ + --volume /sbin/iscsiadm:/sbin/iscsiadm \ + k8s.gcr.io/hyperkube:v1.15.0 /hyperkube kubelet \ + --anonymous-auth=false \ + --authentication-token-webhook \ + --authorization-mode=Webhook \ + --cgroup-driver=systemd \ + --cgroups-per-qos=true \ + --enforce-node-allocatable=pods \ + --client-ca-file=/etc/kubernetes/ca.crt \ + --cluster_dns=${cluster_dns_service_ip} \ + --cluster_domain=${cluster_domain_suffix} \ + --cni-conf-dir=/etc/kubernetes/cni/net.d \ + --exit-on-lock-contention \ + --hostname-override=${domain_name} \ + --kubeconfig=/etc/kubernetes/kubeconfig \ + --lock-file=/var/run/lock/kubelet.lock \ + --network-plugin=cni \ + --node-labels=node-role.kubernetes.io/node \ + --pod-manifest-path=/etc/kubernetes/manifests \ + --read-only-port=0 \ + --volume-plugin-dir=/var/lib/kubelet/volumeplugins + ExecStop=-/usr/bin/podman stop kubelet + Delegate=yes + Restart=always + RestartSec=10 + [Install] + WantedBy=multi-user.target + - name: kubelet.path + enabled: true + contents: | + [Unit] + Description=Watch for kubeconfig + [Path] + PathExists=/etc/kubernetes/kubeconfig + [Install] + WantedBy=multi-user.target storage: + directories: + - path: /etc/kubernetes + - path: /opt/bootkube files: - - path: /etc/kubernetes/kubelet.env - filesystem: root - mode: 0644 - contents: - inline: | - KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube - KUBELET_IMAGE_TAG=v1.15.0 - path: /etc/hostname - filesystem: root mode: 0644 contents: inline: ${domain_name} + - path: /etc/sysctl.d/reverse-path-filter.conf + contents: + inline: | + net.ipv4.conf.all.rp_filter=1 - path: /etc/sysctl.d/max-user-watches.conf - filesystem: root contents: inline: | fs.inotify.max_user_watches=16184 + - path: /etc/systemd/system.conf.d/accounting.conf + contents: + inline: | + [Manager] + DefaultCPUAccounting=yes + DefaultMemoryAccounting=yes + DefaultBlockIOAccounting=yes passwd: users: - name: core diff --git a/bare-metal/fedora-coreos/kubernetes/outputs.tf b/bare-metal/fedora-coreos/kubernetes/outputs.tf new file mode 100644 index 00000000..1fd43af6 --- /dev/null +++ b/bare-metal/fedora-coreos/kubernetes/outputs.tf @@ -0,0 +1,4 @@ +output "kubeconfig-admin" { + value = module.bootkube.kubeconfig-admin +} + diff --git a/bare-metal/fedora-coreos/kubernetes/ssh.tf b/bare-metal/fedora-coreos/kubernetes/ssh.tf index f11ad79f..da8329ab 100644 --- a/bare-metal/fedora-coreos/kubernetes/ssh.tf +++ b/bare-metal/fedora-coreos/kubernetes/ssh.tf @@ -71,3 +71,65 @@ resource "null_resource" "copy-controller-secrets" { } } +# Secure copy kubeconfig to all workers. Activates kubelet.service +resource "null_resource" "copy-worker-secrets" { + count = length(var.worker_names) + + # Without depends_on, remote-exec could start and wait for machines before + # matchbox groups are written, causing a deadlock. + depends_on = [ + matchbox_group.controller, + matchbox_group.worker, + ] + + connection { + type = "ssh" + host = var.worker_domains[count.index] + user = "core" + timeout = "60m" + } + + provisioner "file" { + content = module.bootkube.kubeconfig-kubelet + destination = "$HOME/kubeconfig" + } + + provisioner "remote-exec" { + inline = [ + "sudo mv $HOME/kubeconfig /etc/kubernetes/kubeconfig", + ] + } +} + +# Secure copy bootkube assets to ONE controller and start bootkube to perform +# one-time self-hosted cluster bootstrapping. +resource "null_resource" "bootkube-start" { + # Without depends_on, this remote-exec may start before the kubeconfig copy. + # Terraform only does one task at a time, so it would try to bootstrap + # while no Kubelets are running. + depends_on = [ + null_resource.copy-controller-secrets, + null_resource.copy-worker-secrets, + ] + + connection { + type = "ssh" + host = var.controller_domains[0] + user = "core" + timeout = "15m" + } + + provisioner "file" { + source = var.asset_dir + destination = "$HOME/assets" + } + + provisioner "remote-exec" { + inline = [ + "sudo mv $HOME/assets /opt/bootkube", + "sudo systemctl start bootkube", + ] + } +} + +