mirror of
https://github.com/puppetmaster/typhoon.git
synced 2025-08-03 19:31:34 +02:00
Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
f00ecde854 | |||
d85300f947 | |||
65f006e6cc | |||
8d3817e0ae | |||
5f5eec1175 | |||
5308fde3d3 | |||
9ab61d7bf5 | |||
6483f613c5 | |||
56c6bf431a | |||
63ab117205 | |||
1cd262e712 | |||
32bdda1b6c | |||
07d257aa7b |
@ -4,6 +4,15 @@ Notable changes between versions.
|
||||
|
||||
## Latest
|
||||
|
||||
## v1.8.4
|
||||
|
||||
* Kubernetes v1.8.4
|
||||
* Calico related bug fixes
|
||||
* Update Calico from v2.6.1 to v2.6.3
|
||||
* Update flannel from v0.9.0 to v0.9.1
|
||||
* Service accounts for kube-proxy and pod-checkpointer
|
||||
* Use kubernetes-incubator/bootkube v0.9.0
|
||||
|
||||
## v1.8.3
|
||||
|
||||
* Kubernetes v1.8.3
|
||||
|
12
README.md
12
README.md
@ -1,4 +1,4 @@
|
||||
# Typhoon []() <img align="right" src="https://storage.googleapis.com/dghubble/spin.png">
|
||||
# Typhoon []() <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
|
||||
|
||||
Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
@ -9,9 +9,9 @@ Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
Typhoon distributes upstream Kubernetes, architectural conventions, and cluster addons, much like a GNU/Linux distribution provides the Linux kernel and userspace components.
|
||||
|
||||
## Features
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.8.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Kubernetes v1.8.4 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Ready for Ingress, Dashboards, Metrics, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
@ -78,9 +78,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou
|
||||
$ KUBECONFIG=/home/user/.secrets/clusters/yavin/auth/kubeconfig
|
||||
$ kubectl get nodes
|
||||
NAME STATUS AGE VERSION
|
||||
yavin-controller-0.c.example-com.internal Ready 6m v1.8.3
|
||||
yavin-worker-jrbf.c.example-com.internal Ready 5m v1.8.3
|
||||
yavin-worker-mzdm.c.example-com.internal Ready 5m v1.8.3
|
||||
yavin-controller-0.c.example-com.internal Ready 6m v1.8.4
|
||||
yavin-worker-jrbf.c.example-com.internal Ready 5m v1.8.4
|
||||
yavin-worker-mzdm.c.example-com.internal Ready 5m v1.8.4
|
||||
```
|
||||
|
||||
List the pods.
|
||||
|
@ -21,7 +21,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: grafana
|
||||
image: grafana/grafana:4.6.1
|
||||
image: grafana/grafana:4.6.2
|
||||
env:
|
||||
- name: GF_SERVER_HTTP_PORT
|
||||
value: "8080"
|
||||
|
@ -19,7 +19,7 @@ spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.9.0-beta.17
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.9.0-beta.19
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
||||
|
@ -19,7 +19,7 @@ spec:
|
||||
hostNetwork: true
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.9.0-beta.17
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.9.0-beta.19
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
||||
|
@ -7,7 +7,7 @@ data:
|
||||
# Rules adapted from those provided by coreos/prometheus-operator and SoundCloud
|
||||
alertmanager.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./alertmanager.rules
|
||||
- name: alertmanager.rules
|
||||
rules:
|
||||
- alert: AlertmanagerConfigInconsistent
|
||||
expr: count_values("config_hash", alertmanager_config_hash) BY (service) / ON(service)
|
||||
@ -19,7 +19,6 @@ data:
|
||||
annotations:
|
||||
description: The configuration of the instances of the Alertmanager cluster
|
||||
`{{$labels.service}}` are out of sync.
|
||||
summary: Alertmanager configurations are inconsistent
|
||||
- alert: AlertmanagerDownOrMissing
|
||||
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
|
||||
"alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
|
||||
@ -29,8 +28,7 @@ data:
|
||||
annotations:
|
||||
description: An unexpected number of Alertmanagers are scraped or Alertmanagers
|
||||
disappeared from discovery.
|
||||
summary: Alertmanager down or not discovered
|
||||
- alert: FailedReload
|
||||
- alert: AlertmanagerFailedReload
|
||||
expr: alertmanager_config_last_reload_successful == 0
|
||||
for: 10m
|
||||
labels:
|
||||
@ -38,7 +36,6 @@ data:
|
||||
annotations:
|
||||
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
||||
}}/{{ $labels.pod}}.
|
||||
summary: Alertmanager configuration reload has failed
|
||||
etcd3.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./etcd3.rules
|
||||
@ -165,7 +162,7 @@ data:
|
||||
summary: high commit durations
|
||||
general.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./general.rules
|
||||
- name: general.rules
|
||||
rules:
|
||||
- alert: TargetDown
|
||||
expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
|
||||
@ -173,63 +170,31 @@ data:
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: '{{ $value }}% or more of {{ $labels.job }} targets are down.'
|
||||
description: '{{ $value }}% of {{ $labels.job }} targets are down.'
|
||||
summary: Targets are down
|
||||
- alert: TooManyOpenFileDescriptors
|
||||
expr: 100 * (process_open_fds / process_max_fds) > 95
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{
|
||||
$labels.instance }}) is using {{ $value }}% of the available file/socket descriptors.'
|
||||
summary: too many open file descriptors
|
||||
- record: instance:fd_utilization
|
||||
- record: fd_utilization
|
||||
expr: process_open_fds / process_max_fds
|
||||
- alert: FdExhaustionClose
|
||||
expr: predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1
|
||||
expr: predict_linear(fd_utilization[1h], 3600 * 4) > 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{
|
||||
$labels.instance }}) instance will exhaust in file/socket descriptors soon'
|
||||
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
|
||||
will exhaust in file/socket descriptors within the next 4 hours'
|
||||
summary: file descriptors soon exhausted
|
||||
- alert: FdExhaustionClose
|
||||
expr: predict_linear(instance:fd_utilization[10m], 3600) > 1
|
||||
expr: predict_linear(fd_utilization[10m], 3600) > 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{
|
||||
$labels.instance }}) instance will exhaust in file/socket descriptors soon'
|
||||
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
|
||||
will exhaust in file/socket descriptors within the next hour'
|
||||
summary: file descriptors soon exhausted
|
||||
kube-apiserver.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./kube-apiserver.rules
|
||||
rules:
|
||||
- alert: K8SApiserverDown
|
||||
expr: absent(up{job="kubernetes-apiservers"} == 1)
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
description: Prometheus failed to scrape API server(s), or all API servers have
|
||||
disappeared from service discovery.
|
||||
summary: API server unreachable
|
||||
- alert: K8SApiServerLatency
|
||||
expr: histogram_quantile(0.99, sum(apiserver_request_latencies_bucket{subresource!="log",verb!~"^(?:CONNECT|WATCHLIST|WATCH|PROXY)$"})
|
||||
WITHOUT (instance, resource)) / 1e+06 > 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: 99th percentile Latency for {{ $labels.verb }} requests to the
|
||||
kube-apiserver is higher than 1s.
|
||||
summary: Kubernetes apiserver latency is high
|
||||
kube-controller-manager.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./kube-controller-manager.rules
|
||||
- name: kube-controller-manager.rules
|
||||
rules:
|
||||
- alert: K8SControllerManagerDown
|
||||
expr: absent(up{kubernetes_name="kube-controller-manager"} == 1)
|
||||
@ -242,8 +207,53 @@ data:
|
||||
summary: Controller manager is down
|
||||
kube-scheduler.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./kube-scheduler.rules
|
||||
- name: kube-scheduler.rules
|
||||
rules:
|
||||
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.9, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.5, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.9, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.5, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
- record: cluster:scheduler_binding_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
- record: cluster:scheduler_binding_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.9, sum(scheduler_binding_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
- record: cluster:scheduler_binding_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.5, sum(scheduler_binding_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
- alert: K8SSchedulerDown
|
||||
expr: absent(up{kubernetes_name="kube-scheduler"} == 1)
|
||||
for: 5m
|
||||
@ -253,9 +263,65 @@ data:
|
||||
description: There is no running K8S scheduler. New pods are not being assigned
|
||||
to nodes.
|
||||
summary: Scheduler is down
|
||||
kube-state-metrics.rules.yaml: |+
|
||||
groups:
|
||||
- name: kube-state-metrics.rules
|
||||
rules:
|
||||
- alert: DeploymentGenerationMismatch
|
||||
expr: kube_deployment_status_observed_generation != kube_deployment_metadata_generation
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Observed deployment generation does not match expected one for
|
||||
deployment {{$labels.namespaces}}{{$labels.deployment}}
|
||||
- alert: DeploymentReplicasNotUpdated
|
||||
expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas)
|
||||
or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas))
|
||||
unless (kube_deployment_spec_paused == 1)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Replicas are not updated and available for deployment {{$labels.namespaces}}/{{$labels.deployment}}
|
||||
- alert: DaemonSetRolloutStuck
|
||||
expr: kube_daemonset_status_current_number_ready / kube_daemonset_status_desired_number_scheduled
|
||||
* 100 < 100
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Only {{$value}}% of desired pods scheduled and ready for daemon
|
||||
set {{$labels.namespaces}}/{{$labels.daemonset}}
|
||||
- alert: K8SDaemonSetsNotScheduled
|
||||
expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled
|
||||
> 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: A number of daemonsets are not scheduled.
|
||||
summary: Daemonsets are not scheduled correctly
|
||||
- alert: DaemonSetsMissScheduled
|
||||
expr: kube_daemonset_status_number_misscheduled > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: A number of daemonsets are running where they are not supposed
|
||||
to run.
|
||||
summary: Daemonsets are not scheduled correctly
|
||||
- alert: PodFrequentlyRestarting
|
||||
expr: increase(kube_pod_container_status_restarts[1h]) > 5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Pod {{$labels.namespaces}}/{{$labels.pod}} is was restarted {{$value}}
|
||||
times within the last hour
|
||||
kubelet.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./kubelet.rules
|
||||
- name: kubelet.rules
|
||||
rules:
|
||||
- alert: K8SNodeNotReady
|
||||
expr: kube_node_status_condition{condition="Ready",status="true"} == 0
|
||||
@ -274,20 +340,17 @@ data:
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
description: '{{ $value }} Kubernetes nodes (more than 10% are in the NotReady
|
||||
state).'
|
||||
summary: Many Kubernetes nodes are Not Ready
|
||||
description: '{{ $value }}% of Kubernetes nodes are not ready'
|
||||
- alert: K8SKubeletDown
|
||||
expr: count(up{job="kubernetes-nodes"} == 0) / count(up{job="kubernetes-nodes"}) > 0.03
|
||||
expr: count(up{job="kubernetes-nodes"} == 0) / count(up{job="kubernetes-nodes"}) * 100 > 3
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
||||
summary: Many Kubelets cannot be scraped
|
||||
- alert: K8SKubeletDown
|
||||
expr: absent(up{job="kubernetes-nodes"} == 1) or count(up{job="kubernetes-nodes"} == 0) / count(up{job="kubernetes-nodes"})
|
||||
> 0.1
|
||||
expr: (absent(up{job="kubernetes-nodes"} == 1) or count(up{job="kubernetes-nodes"} == 0) / count(up{job="kubernetes-nodes"}))
|
||||
* 100 > 1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
@ -297,6 +360,7 @@ data:
|
||||
summary: Many Kubelets cannot be scraped
|
||||
- alert: K8SKubeletTooManyPods
|
||||
expr: kubelet_running_pod_count > 100
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
@ -305,124 +369,112 @@ data:
|
||||
summary: Kubelet is close to pod limit
|
||||
kubernetes.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./kubernetes.rules
|
||||
- name: kubernetes.rules
|
||||
rules:
|
||||
- record: cluster_namespace_controller_pod_container:spec_memory_limit_bytes
|
||||
expr: sum(label_replace(container_spec_memory_limit_bytes{container_name!=""},
|
||||
"controller", "$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace,
|
||||
controller, pod_name, container_name)
|
||||
- record: cluster_namespace_controller_pod_container:spec_cpu_shares
|
||||
expr: sum(label_replace(container_spec_cpu_shares{container_name!=""}, "controller",
|
||||
"$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace, controller, pod_name,
|
||||
container_name)
|
||||
- record: cluster_namespace_controller_pod_container:cpu_usage:rate
|
||||
expr: sum(label_replace(irate(container_cpu_usage_seconds_total{container_name!=""}[5m]),
|
||||
"controller", "$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace,
|
||||
controller, pod_name, container_name)
|
||||
- record: cluster_namespace_controller_pod_container:memory_usage:bytes
|
||||
expr: sum(label_replace(container_memory_usage_bytes{container_name!=""}, "controller",
|
||||
"$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace, controller, pod_name,
|
||||
container_name)
|
||||
- record: cluster_namespace_controller_pod_container:memory_working_set:bytes
|
||||
expr: sum(label_replace(container_memory_working_set_bytes{container_name!=""},
|
||||
"controller", "$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace,
|
||||
controller, pod_name, container_name)
|
||||
- record: cluster_namespace_controller_pod_container:memory_rss:bytes
|
||||
expr: sum(label_replace(container_memory_rss{container_name!=""}, "controller",
|
||||
"$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace, controller, pod_name,
|
||||
container_name)
|
||||
- record: cluster_namespace_controller_pod_container:memory_cache:bytes
|
||||
expr: sum(label_replace(container_memory_cache{container_name!=""}, "controller",
|
||||
"$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace, controller, pod_name,
|
||||
container_name)
|
||||
- record: cluster_namespace_controller_pod_container:disk_usage:bytes
|
||||
expr: sum(label_replace(container_disk_usage_bytes{container_name!=""}, "controller",
|
||||
"$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace, controller, pod_name,
|
||||
container_name)
|
||||
- record: cluster_namespace_controller_pod_container:memory_pagefaults:rate
|
||||
expr: sum(label_replace(irate(container_memory_failures_total{container_name!=""}[5m]),
|
||||
"controller", "$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace,
|
||||
controller, pod_name, container_name, scope, type)
|
||||
- record: cluster_namespace_controller_pod_container:memory_oom:rate
|
||||
expr: sum(label_replace(irate(container_memory_failcnt{container_name!=""}[5m]),
|
||||
"controller", "$1", "pod_name", "^(.*)-[a-z0-9]+")) BY (cluster, namespace,
|
||||
controller, pod_name, container_name, scope, type)
|
||||
- record: cluster:memory_allocation:percent
|
||||
expr: 100 * sum(container_spec_memory_limit_bytes{pod_name!=""}) BY (cluster)
|
||||
/ sum(machine_memory_bytes) BY (cluster)
|
||||
- record: cluster:memory_used:percent
|
||||
expr: 100 * sum(container_memory_usage_bytes{pod_name!=""}) BY (cluster) / sum(machine_memory_bytes)
|
||||
BY (cluster)
|
||||
- record: cluster:cpu_allocation:percent
|
||||
expr: 100 * sum(container_spec_cpu_shares{pod_name!=""}) BY (cluster) / sum(container_spec_cpu_shares{id="/"}
|
||||
* ON(cluster, instance) machine_cpu_cores) BY (cluster)
|
||||
- record: cluster:node_cpu_use:percent
|
||||
expr: 100 * sum(rate(node_cpu{mode!="idle"}[5m])) BY (cluster) / sum(machine_cpu_cores)
|
||||
BY (cluster)
|
||||
- record: cluster_resource_verb:apiserver_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.99, sum(apiserver_request_latencies_bucket) BY (le,
|
||||
cluster, job, resource, verb)) / 1e+06
|
||||
- record: pod_name:container_memory_usage_bytes:sum
|
||||
expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY
|
||||
(pod_name)
|
||||
- record: pod_name:container_spec_cpu_shares:sum
|
||||
expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) BY (pod_name)
|
||||
- record: pod_name:container_cpu_usage:sum
|
||||
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m]))
|
||||
BY (pod_name)
|
||||
- record: pod_name:container_fs_usage_bytes:sum
|
||||
expr: sum(container_fs_usage_bytes{container_name!="POD",pod_name!=""}) BY (pod_name)
|
||||
- record: namespace:container_memory_usage_bytes:sum
|
||||
expr: sum(container_memory_usage_bytes{container_name!=""}) BY (namespace)
|
||||
- record: namespace:container_spec_cpu_shares:sum
|
||||
expr: sum(container_spec_cpu_shares{container_name!=""}) BY (namespace)
|
||||
- record: namespace:container_cpu_usage:sum
|
||||
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD"}[5m]))
|
||||
BY (namespace)
|
||||
- record: cluster:memory_usage:ratio
|
||||
expr: sum(container_memory_usage_bytes{container_name!="POD",pod_name!=""}) BY
|
||||
(cluster) / sum(machine_memory_bytes) BY (cluster)
|
||||
- record: cluster:container_spec_cpu_shares:ratio
|
||||
expr: sum(container_spec_cpu_shares{container_name!="POD",pod_name!=""}) / 1000
|
||||
/ sum(machine_cpu_cores)
|
||||
- record: cluster:container_cpu_usage:ratio
|
||||
expr: sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name!=""}[5m]))
|
||||
/ sum(machine_cpu_cores)
|
||||
- record: apiserver_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.99, rate(apiserver_request_latencies_bucket[5m])) /
|
||||
1e+06
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
- record: cluster_resource_verb:apiserver_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.9, sum(apiserver_request_latencies_bucket) BY (le,
|
||||
cluster, job, resource, verb)) / 1e+06
|
||||
- record: apiserver_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.9, rate(apiserver_request_latencies_bucket[5m])) /
|
||||
1e+06
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
- record: cluster_resource_verb:apiserver_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.5, sum(apiserver_request_latencies_bucket) BY (le,
|
||||
cluster, job, resource, verb)) / 1e+06
|
||||
- record: apiserver_latency_seconds:quantile
|
||||
expr: histogram_quantile(0.5, rate(apiserver_request_latencies_bucket[5m])) /
|
||||
1e+06
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
- record: cluster:scheduler_e2e_scheduling_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
- alert: APIServerLatencyHigh
|
||||
expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
|
||||
> 1
|
||||
for: 10m
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
- record: cluster:scheduler_e2e_scheduling_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.9, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
severity: warning
|
||||
annotations:
|
||||
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
||||
for {{$labels.verb}} {{$labels.resource}}
|
||||
- alert: APIServerLatencyHigh
|
||||
expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
|
||||
> 4
|
||||
for: 10m
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
- record: cluster:scheduler_e2e_scheduling_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.5, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
severity: critical
|
||||
annotations:
|
||||
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
||||
for {{$labels.verb}} {{$labels.resource}}
|
||||
- alert: APIServerErrorsHigh
|
||||
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
||||
* 100 > 2
|
||||
for: 10m
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
- record: cluster:scheduler_scheduling_algorithm_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
severity: warning
|
||||
annotations:
|
||||
description: API server returns errors for {{ $value }}% of requests
|
||||
- alert: APIServerErrorsHigh
|
||||
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
||||
* 100 > 5
|
||||
for: 10m
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
- record: cluster:scheduler_scheduling_algorithm_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.9, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
severity: critical
|
||||
annotations:
|
||||
description: API server returns errors for {{ $value }}% of requests
|
||||
- alert: K8SApiserverDown
|
||||
expr: absent(up{job="kubernetes-apiservers"} == 1)
|
||||
for: 20m
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
- record: cluster:scheduler_scheduling_algorithm_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.5, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
- record: cluster:scheduler_binding_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.99"
|
||||
- record: cluster:scheduler_binding_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.9, sum(scheduler_binding_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.9"
|
||||
- record: cluster:scheduler_binding_latency:quantile_seconds
|
||||
expr: histogram_quantile(0.5, sum(scheduler_binding_latency_microseconds_bucket)
|
||||
BY (le, cluster)) / 1e+06
|
||||
labels:
|
||||
quantile: "0.5"
|
||||
severity: critical
|
||||
annotations:
|
||||
description: No API servers are reachable or all have disappeared from service
|
||||
discovery
|
||||
node.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./node.rules
|
||||
- name: node.rules
|
||||
rules:
|
||||
- record: instance:node_cpu:rate:sum
|
||||
expr: sum(rate(node_cpu{mode!="idle",mode!="iowait",mode!~"^(?:guest.*)$"}[3m]))
|
||||
BY (instance)
|
||||
- record: instance:node_filesystem_usage:sum
|
||||
expr: sum((node_filesystem_size{mountpoint="/"} - node_filesystem_free{mountpoint="/"}))
|
||||
BY (instance)
|
||||
- record: instance:node_network_receive_bytes:rate:sum
|
||||
expr: sum(rate(node_network_receive_bytes[3m])) BY (instance)
|
||||
- record: instance:node_network_transmit_bytes:rate:sum
|
||||
expr: sum(rate(node_network_transmit_bytes[3m])) BY (instance)
|
||||
- record: instance:node_cpu:ratio
|
||||
expr: sum(rate(node_cpu{mode!="idle"}[5m])) WITHOUT (cpu, mode) / ON(instance)
|
||||
GROUP_LEFT() count(sum(node_cpu) BY (instance, cpu)) BY (instance)
|
||||
- record: cluster:node_cpu:sum_rate5m
|
||||
expr: sum(rate(node_cpu{mode!="idle"}[5m]))
|
||||
- record: cluster:node_cpu:ratio
|
||||
expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu))
|
||||
- alert: NodeExporterDown
|
||||
expr: absent(up{kubernetes_name="node-exporter"} == 1)
|
||||
for: 10m
|
||||
@ -430,43 +482,65 @@ data:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Prometheus could not scrape a node-exporter for more than 10m,
|
||||
or node-exporters have disappeared from discovery.
|
||||
summary: node-exporter cannot be scraped
|
||||
- alert: K8SNodeOutOfDisk
|
||||
expr: kube_node_status_condition{condition="OutOfDisk",status="true"} == 1
|
||||
or node-exporters have disappeared from discovery
|
||||
- alert: NodeDiskRunningFull
|
||||
expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: device {{$labels.device}} on node {{$labels.instance}} is running
|
||||
full within the next 24 hours (mounted at {{$labels.mountpoint}})
|
||||
- alert: NodeDiskRunningFull
|
||||
expr: predict_linear(node_filesystem_free[30m], 3600 * 2) < 0
|
||||
for: 10m
|
||||
labels:
|
||||
service: k8s
|
||||
severity: critical
|
||||
annotations:
|
||||
description: '{{ $labels.node }} has run out of disk space.'
|
||||
summary: Node ran out of disk space.
|
||||
- alert: K8SNodeMemoryPressure
|
||||
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} ==
|
||||
1
|
||||
labels:
|
||||
service: k8s
|
||||
severity: warning
|
||||
annotations:
|
||||
description: '{{ $labels.node }} is under memory pressure.'
|
||||
summary: Node is under memory pressure.
|
||||
- alert: K8SNodeDiskPressure
|
||||
expr: kube_node_status_condition{condition="DiskPressure",status="true"} == 1
|
||||
labels:
|
||||
service: k8s
|
||||
severity: warning
|
||||
annotations:
|
||||
description: '{{ $labels.node }} is under disk pressure.'
|
||||
summary: Node is under disk pressure.
|
||||
description: device {{$labels.device}} on node {{$labels.instance}} is running
|
||||
full within the next 2 hours (mounted at {{$labels.mountpoint}})
|
||||
prometheus.rules.yaml: |+
|
||||
groups:
|
||||
- name: ./prometheus.rules
|
||||
- name: prometheus.rules
|
||||
rules:
|
||||
- alert: FailedReload
|
||||
- alert: PrometheusConfigReloadFailed
|
||||
expr: prometheus_config_last_reload_successful == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Reloading Prometheus' configuration has failed for {{ $labels.namespace
|
||||
}}/{{ $labels.pod}}.
|
||||
summary: Prometheus configuration reload has failed
|
||||
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
|
||||
- alert: PrometheusNotificationQueueRunningFull
|
||||
expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
|
||||
$labels.pod}}
|
||||
- alert: PrometheusErrorSendingAlerts
|
||||
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
||||
> 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
||||
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
||||
- alert: PrometheusErrorSendingAlerts
|
||||
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
||||
> 0.03
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
||||
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
||||
- alert: PrometheusNotConnectedToAlertmanagers
|
||||
expr: prometheus_notifications_alertmanagers_discovered < 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
|
||||
to any Alertmanagers
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Typhoon
|
||||
# Typhoon <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
|
||||
|
||||
Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
@ -9,9 +9,9 @@ Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
Typhoon distributes upstream Kubernetes, architectural conventions, and cluster addons, much like a GNU/Linux distribution provides the Linux kernel and userspace components.
|
||||
|
||||
## Features
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.8.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Kubernetes v1.8.4 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Ready for Ingress, Dashboards, Metrics, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootkube" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.8.2"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.9.0"
|
||||
|
||||
cluster_name = "${var.cluster_name}"
|
||||
api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"]
|
||||
|
@ -128,7 +128,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
contents:
|
||||
@ -151,7 +151,7 @@ storage:
|
||||
[ -d /opt/bootkube/assets/experimental/manifests ] && mv /opt/bootkube/assets/experimental/manifests/* /opt/bootkube/assets/manifests && rm -r /opt/bootkube/assets/experimental/manifests
|
||||
[ -d /opt/bootkube/assets/experimental/bootstrap-manifests ] && mv /opt/bootkube/assets/experimental/bootstrap-manifests/* /opt/bootkube/assets/bootstrap-manifests && rm -r /opt/bootkube/assets/experimental/bootstrap-manifests
|
||||
BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}"
|
||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.8.2}"
|
||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.9.0}"
|
||||
BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}"
|
||||
exec /usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
|
@ -103,7 +103,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
contents:
|
||||
@ -121,7 +121,7 @@ storage:
|
||||
--volume config,kind=host,source=/etc/kubernetes \
|
||||
--mount volume=config,target=/etc/kubernetes \
|
||||
--insecure-options=image \
|
||||
docker://gcr.io/google_containers/hyperkube:v1.8.3 \
|
||||
docker://gcr.io/google_containers/hyperkube:v1.8.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname)
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Typhoon
|
||||
# Typhoon <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
|
||||
|
||||
Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
@ -9,9 +9,9 @@ Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
Typhoon distributes upstream Kubernetes, architectural conventions, and cluster addons, much like a GNU/Linux distribution provides the Linux kernel and userspace components.
|
||||
|
||||
## Features
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.8.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Kubernetes v1.8.4 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Ready for Ingress, Dashboards, Metrics, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootkube" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.8.2"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.9.0"
|
||||
|
||||
cluster_name = "${var.cluster_name}"
|
||||
api_servers = ["${var.k8s_domain_name}"]
|
||||
|
@ -114,7 +114,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/hostname
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
@ -143,7 +143,7 @@ storage:
|
||||
[ -d /opt/bootkube/assets/experimental/manifests ] && mv /opt/bootkube/assets/experimental/manifests/* /opt/bootkube/assets/manifests && rm -r /opt/bootkube/assets/experimental/manifests
|
||||
[ -d /opt/bootkube/assets/experimental/bootstrap-manifests ] && mv /opt/bootkube/assets/experimental/bootstrap-manifests/* /opt/bootkube/assets/bootstrap-manifests && rm -r /opt/bootkube/assets/experimental/bootstrap-manifests
|
||||
BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}"
|
||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.8.2}"
|
||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.9.0}"
|
||||
BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}"
|
||||
exec /usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
|
@ -80,7 +80,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/hostname
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
|
@ -8,6 +8,7 @@ resource "matchbox_profile" "container-linux-install" {
|
||||
]
|
||||
|
||||
args = [
|
||||
"initrd=coreos_production_pxe_image.cpio.gz",
|
||||
"coreos.config.url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}",
|
||||
"coreos.first_boot=yes",
|
||||
"console=tty0",
|
||||
@ -44,6 +45,7 @@ resource "matchbox_profile" "cached-container-linux-install" {
|
||||
]
|
||||
|
||||
args = [
|
||||
"initrd=coreos_production_pxe_image.cpio.gz",
|
||||
"coreos.config.url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}",
|
||||
"coreos.first_boot=yes",
|
||||
"console=tty0",
|
||||
|
@ -96,7 +96,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/hostname
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
|
@ -8,6 +8,7 @@ resource "matchbox_profile" "bootkube-worker-pxe" {
|
||||
]
|
||||
|
||||
args = [
|
||||
"initrd=coreos_production_pxe_image.cpio.gz",
|
||||
"coreos.config.url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}",
|
||||
"coreos.first_boot=yes",
|
||||
"console=tty0",
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Typhoon
|
||||
# Typhoon <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
|
||||
|
||||
Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
@ -9,9 +9,9 @@ Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
Typhoon distributes upstream Kubernetes, architectural conventions, and cluster addons, much like a GNU/Linux distribution provides the Linux kernel and userspace components.
|
||||
|
||||
## Features
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.8.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Kubernetes v1.8.4 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Ready for Ingress, Dashboards, Metrics, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootkube" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.8.2"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.9.0"
|
||||
|
||||
cluster_name = "${var.cluster_name}"
|
||||
api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"]
|
||||
|
@ -119,7 +119,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
contents:
|
||||
@ -142,7 +142,7 @@ storage:
|
||||
[ -d /opt/bootkube/assets/experimental/manifests ] && mv /opt/bootkube/assets/experimental/manifests/* /opt/bootkube/assets/manifests && rm -r /opt/bootkube/assets/experimental/manifests
|
||||
[ -d /opt/bootkube/assets/experimental/bootstrap-manifests ] && mv /opt/bootkube/assets/experimental/bootstrap-manifests/* /opt/bootkube/assets/bootstrap-manifests && rm -r /opt/bootkube/assets/experimental/bootstrap-manifests
|
||||
BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}"
|
||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.8.2}"
|
||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.9.0}"
|
||||
BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}"
|
||||
exec /usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
|
@ -94,7 +94,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
contents:
|
||||
@ -112,7 +112,7 @@ storage:
|
||||
--volume config,kind=host,source=/etc/kubernetes \
|
||||
--mount volume=config,target=/etc/kubernetes \
|
||||
--insecure-options=image \
|
||||
docker://gcr.io/google_containers/hyperkube:v1.8.3 \
|
||||
docker://gcr.io/google_containers/hyperkube:v1.8.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname)
|
||||
|
14
docs/aws.md
14
docs/aws.md
@ -1,6 +1,6 @@
|
||||
# AWS
|
||||
|
||||
In this tutorial, we'll create a Kubernetes v1.8.3 cluster on AWS.
|
||||
In this tutorial, we'll create a Kubernetes v1.8.4 cluster on AWS.
|
||||
|
||||
We'll declare a Kubernetes cluster in Terraform using the Typhoon Terraform module. On apply, a VPC, gateway, subnets, auto-scaling groups of controllers and workers, network load balancers for controllers and workers, and security groups will be created.
|
||||
|
||||
@ -10,11 +10,11 @@ Controllers and workers are provisioned to run a `kubelet`. A one-time [bootkube
|
||||
|
||||
* AWS Account and IAM credentials
|
||||
* AWS Route53 DNS Zone (registered Domain Name or delegated subdomain)
|
||||
* Terraform v0.10.4+ and [terraform-provider-ct](https://github.com/coreos/terraform-provider-ct) installed locally
|
||||
* Terraform v0.10.x and [terraform-provider-ct](https://github.com/coreos/terraform-provider-ct) installed locally
|
||||
|
||||
## Terraform Setup
|
||||
|
||||
Install [Terraform](https://www.terraform.io/downloads.html) v0.10.1 on your system.
|
||||
Install [Terraform](https://www.terraform.io/downloads.html) v0.10.x on your system.
|
||||
|
||||
```sh
|
||||
$ terraform version
|
||||
@ -119,7 +119,7 @@ Get or update Terraform modules.
|
||||
$ terraform get # downloads missing modules
|
||||
$ terraform get --update # updates all modules
|
||||
Get: git::https://github.com/poseidon/typhoon (update)
|
||||
Get: git::https://github.com/poseidon/bootkube-terraform.git?ref=v0.8.2 (update)
|
||||
Get: git::https://github.com/poseidon/bootkube-terraform.git?ref=v0.9.0 (update)
|
||||
```
|
||||
|
||||
Plan the resources to be created.
|
||||
@ -151,9 +151,9 @@ In 4-8 minutes, the Kubernetes cluster will be ready.
|
||||
$ KUBECONFIG=/home/user/.secrets/clusters/tempest/auth/kubeconfig
|
||||
$ kubectl get nodes
|
||||
NAME STATUS AGE VERSION
|
||||
ip-10-0-12-221 Ready 34m v1.8.3
|
||||
ip-10-0-19-112 Ready 34m v1.8.3
|
||||
ip-10-0-4-22 Ready 34m v1.8.3
|
||||
ip-10-0-12-221 Ready 34m v1.8.4
|
||||
ip-10-0-19-112 Ready 34m v1.8.4
|
||||
ip-10-0-4-22 Ready 34m v1.8.4
|
||||
```
|
||||
|
||||
List the pods.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Bare-Metal
|
||||
|
||||
In this tutorial, we'll network boot and provison a Kubernetes v1.8.3 cluster on bare-metal.
|
||||
In this tutorial, we'll network boot and provison a Kubernetes v1.8.4 cluster on bare-metal.
|
||||
|
||||
First, we'll deploy a [Matchbox](https://github.com/coreos/matchbox) service and setup a network boot environment. Then, we'll declare a Kubernetes cluster in Terraform using the Typhoon Terraform module and power on machines. On PXE boot, machines will install Container Linux to disk, reboot into the disk install, and provision themselves as Kubernetes controllers or workers.
|
||||
|
||||
@ -12,7 +12,7 @@ Controllers are provisioned as etcd peers and run `etcd-member` (etcd3) and `kub
|
||||
* PXE-enabled [network boot](https://coreos.com/matchbox/docs/latest/network-setup.html) environment
|
||||
* Matchbox v0.6+ deployment with API enabled
|
||||
* Matchbox credentials `client.crt`, `client.key`, `ca.crt`
|
||||
* Terraform v0.10.4+ and [terraform-provider-matchbox](https://github.com/coreos/terraform-provider-matchbox) installed locally
|
||||
* Terraform v0.10.x and [terraform-provider-matchbox](https://github.com/coreos/terraform-provider-matchbox) installed locally
|
||||
|
||||
## Machines
|
||||
|
||||
@ -109,7 +109,7 @@ Read about the [many ways](https://coreos.com/matchbox/docs/latest/network-setup
|
||||
|
||||
## Terraform Setup
|
||||
|
||||
Install [Terraform](https://www.terraform.io/downloads.html) v0.9.2+ on your system.
|
||||
Install [Terraform](https://www.terraform.io/downloads.html) v0.10.x on your system.
|
||||
|
||||
```sh
|
||||
$ terraform version
|
||||
@ -219,7 +219,7 @@ Get or update Terraform modules.
|
||||
$ terraform get # downloads missing modules
|
||||
$ terraform get --update # updates all modules
|
||||
Get: git::https://github.com/poseidon/typhoon (update)
|
||||
Get: git::https://github.com/poseidon/bootkube-terraform.git?ref=v0.8.2 (update)
|
||||
Get: git::https://github.com/poseidon/bootkube-terraform.git?ref=v0.9.0 (update)
|
||||
```
|
||||
|
||||
Plan the resources to be created.
|
||||
@ -290,9 +290,9 @@ bootkube[5]: Tearing down temporary bootstrap control plane...
|
||||
$ KUBECONFIG=/home/user/.secrets/clusters/mercury/auth/kubeconfig
|
||||
$ kubectl get nodes
|
||||
NAME STATUS AGE VERSION
|
||||
node1.example.com Ready 11m v1.8.3
|
||||
node2.example.com Ready 11m v1.8.3
|
||||
node3.example.com Ready 11m v1.8.3
|
||||
node1.example.com Ready 11m v1.8.4
|
||||
node2.example.com Ready 11m v1.8.4
|
||||
node3.example.com Ready 11m v1.8.4
|
||||
```
|
||||
|
||||
List the pods.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Digital Ocean
|
||||
|
||||
In this tutorial, we'll create a Kubernetes v1.8.3 cluster on Digital Ocean.
|
||||
In this tutorial, we'll create a Kubernetes v1.8.4 cluster on Digital Ocean.
|
||||
|
||||
We'll declare a Kubernetes cluster in Terraform using the Typhoon Terraform module. On apply, firewall rules, DNS records, tags, and droplets for Kubernetes controllers and workers will be created.
|
||||
|
||||
@ -10,11 +10,11 @@ Controllers and workers are provisioned to run a `kubelet`. A one-time [bootkube
|
||||
|
||||
* Digital Ocean Account and Token
|
||||
* Digital Ocean Domain (registered Domain Name or delegated subdomain)
|
||||
* Terraform v0.10.4+ and [terraform-provider-ct](https://github.com/coreos/terraform-provider-ct) installed locally
|
||||
* Terraform v0.10.x and [terraform-provider-ct](https://github.com/coreos/terraform-provider-ct) installed locally
|
||||
|
||||
## Terraform Setup
|
||||
|
||||
Install [Terraform](https://www.terraform.io/downloads.html) v0.10.1+ on your system.
|
||||
Install [Terraform](https://www.terraform.io/downloads.html) v0.10.x on your system.
|
||||
|
||||
```sh
|
||||
$ terraform version
|
||||
@ -114,7 +114,7 @@ Get or update Terraform modules.
|
||||
$ terraform get # downloads missing modules
|
||||
$ terraform get --update # updates all modules
|
||||
Get: git::https://github.com/poseidon/typhoon (update)
|
||||
Get: git::https://github.com/poseidon/bootkube-terraform.git?ref=v0.8.2 (update)
|
||||
Get: git::https://github.com/poseidon/bootkube-terraform.git?ref=v0.9.0 (update)
|
||||
```
|
||||
|
||||
Plan the resources to be created.
|
||||
@ -147,9 +147,9 @@ In 3-6 minutes, the Kubernetes cluster will be ready.
|
||||
$ KUBECONFIG=/home/user/.secrets/clusters/nemo/auth/kubeconfig
|
||||
$ kubectl get nodes
|
||||
NAME STATUS AGE VERSION
|
||||
10.132.110.130 Ready 10m v1.8.3
|
||||
10.132.115.81 Ready 10m v1.8.3
|
||||
10.132.124.107 Ready 10m v1.8.3
|
||||
10.132.110.130 Ready 10m v1.8.4
|
||||
10.132.115.81 Ready 10m v1.8.4
|
||||
10.132.124.107 Ready 10m v1.8.4
|
||||
```
|
||||
|
||||
List the pods.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Google Cloud
|
||||
|
||||
In this tutorial, we'll create a Kubernetes v1.8.3 cluster on Google Compute Engine (not GKE).
|
||||
In this tutorial, we'll create a Kubernetes v1.8.4 cluster on Google Compute Engine (not GKE).
|
||||
|
||||
We'll declare a Kubernetes cluster in Terraform using the Typhoon Terraform module. On apply, a network, firewall rules, managed instance groups of Kubernetes controllers and workers, network load balancers for controllers and workers, and health checks will be created.
|
||||
|
||||
@ -10,11 +10,11 @@ Controllers and workers are provisioned to run a `kubelet`. A one-time [bootkube
|
||||
|
||||
* Google Cloud Account and Service Account
|
||||
* Google Cloud DNS Zone (registered Domain Name or delegated subdomain)
|
||||
* Terraform v0.10.4+ and [terraform-provider-ct](https://github.com/coreos/terraform-provider-ct) installed locally
|
||||
* Terraform v0.10.x and [terraform-provider-ct](https://github.com/coreos/terraform-provider-ct) installed locally
|
||||
|
||||
## Terraform Setup
|
||||
|
||||
Install [Terraform](https://www.terraform.io/downloads.html) v0.9.2+ on your system.
|
||||
Install [Terraform](https://www.terraform.io/downloads.html) v0.10.x on your system.
|
||||
|
||||
```sh
|
||||
$ terraform version
|
||||
@ -120,7 +120,7 @@ Get or update Terraform modules.
|
||||
$ terraform get # downloads missing modules
|
||||
$ terraform get --update # updates all modules
|
||||
Get: git::https://github.com/poseidon/typhoon (update)
|
||||
Get: git::https://github.com/poseidon/bootkube-terraform.git?ref=v0.8.2 (update)
|
||||
Get: git::https://github.com/poseidon/bootkube-terraform.git?ref=v0.9.0 (update)
|
||||
```
|
||||
|
||||
Plan the resources to be created.
|
||||
@ -154,9 +154,9 @@ In 4-8 minutes, the Kubernetes cluster will be ready.
|
||||
$ KUBECONFIG=/home/user/.secrets/clusters/yavin/auth/kubeconfig
|
||||
$ kubectl get nodes
|
||||
NAME STATUS AGE VERSION
|
||||
yavin-controller-0.c.example-com.internal Ready 6m v1.8.3
|
||||
yavin-worker-jrbf.c.example-com.internal Ready 5m v1.8.3
|
||||
yavin-worker-mzdm.c.example-com.internal Ready 5m v1.8.3
|
||||
yavin-controller-0.c.example-com.internal Ready 6m v1.8.4
|
||||
yavin-worker-jrbf.c.example-com.internal Ready 5m v1.8.4
|
||||
yavin-worker-mzdm.c.example-com.internal Ready 5m v1.8.4
|
||||
```
|
||||
|
||||
List the pods.
|
||||
|
BIN
docs/img/typhoon-logo.png
Normal file
BIN
docs/img/typhoon-logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 20 KiB |
BIN
docs/img/typhoon.png
Normal file
BIN
docs/img/typhoon.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 22 KiB |
@ -1,4 +1,4 @@
|
||||
# Typhoon <img align="right" src="https://storage.googleapis.com/dghubble/spin.png">
|
||||
# Typhoon <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
|
||||
|
||||
Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
@ -9,9 +9,9 @@ Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
Typhoon distributes upstream Kubernetes, architectural conventions, and cluster addons, much like a GNU/Linux distribution provides the Linux kernel and userspace components.
|
||||
|
||||
## Features
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.8.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Kubernetes v1.8.4 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Ready for Ingress, Dashboards, Metrics and other optional [addons](addons/overview.md)
|
||||
@ -77,9 +77,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou
|
||||
$ KUBECONFIG=/home/user/.secrets/clusters/yavin/auth/kubeconfig
|
||||
$ kubectl get nodes
|
||||
NAME STATUS AGE VERSION
|
||||
yavin-controller-0.c.example-com.internal Ready 6m v1.8.3
|
||||
yavin-worker-jrbf.c.example-com.internal Ready 5m v1.8.3
|
||||
yavin-worker-mzdm.c.example-com.internal Ready 5m v1.8.3
|
||||
yavin-controller-0.c.example-com.internal Ready 6m v1.8.4
|
||||
yavin-worker-jrbf.c.example-com.internal Ready 5m v1.8.4
|
||||
yavin-worker-mzdm.c.example-com.internal Ready 5m v1.8.4
|
||||
```
|
||||
|
||||
List the pods.
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Typhoon
|
||||
# Typhoon <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
|
||||
|
||||
Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
@ -9,9 +9,9 @@ Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
Typhoon distributes upstream Kubernetes, architectural conventions, and cluster addons, much like a GNU/Linux distribution provides the Linux kernel and userspace components.
|
||||
|
||||
## Features
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.8.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Kubernetes v1.8.4 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Ready for Ingress, Dashboards, Metrics, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootkube" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.8.2"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.9.0"
|
||||
|
||||
cluster_name = "${var.cluster_name}"
|
||||
api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"]
|
||||
|
@ -129,7 +129,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
contents:
|
||||
@ -152,7 +152,7 @@ storage:
|
||||
[ -d /opt/bootkube/assets/experimental/manifests ] && mv /opt/bootkube/assets/experimental/manifests/* /opt/bootkube/assets/manifests && rm -r /opt/bootkube/assets/experimental/manifests
|
||||
[ -d /opt/bootkube/assets/experimental/bootstrap-manifests ] && mv /opt/bootkube/assets/experimental/bootstrap-manifests/* /opt/bootkube/assets/bootstrap-manifests && rm -r /opt/bootkube/assets/experimental/bootstrap-manifests
|
||||
BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}"
|
||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.8.2}"
|
||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.9.0}"
|
||||
BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}"
|
||||
exec /usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
|
@ -104,7 +104,7 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.8.3
|
||||
KUBELET_IMAGE_TAG=v1.8.4
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
contents:
|
||||
@ -122,7 +122,7 @@ storage:
|
||||
--volume config,kind=host,source=/etc/kubernetes \
|
||||
--mount volume=config,target=/etc/kubernetes \
|
||||
--insecure-options=image \
|
||||
docker://gcr.io/google_containers/hyperkube:v1.8.3 \
|
||||
docker://gcr.io/google_containers/hyperkube:v1.8.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname)
|
||||
|
Reference in New Issue
Block a user