Skip to content
Snippets Groups Projects
Commit 00d25db7 authored by František Dvořák's avatar František Dvořák
Browse files

Unify k8s playbook - inventory hostname for kube server, but with possible override

Better to prefer inventory hostname for kube server. But for sites without public IPv4 we need to use local IPv4 instead. Use inventory host variable for the override.
parent a58b4c04
No related branches found
No related tags found
No related merge requests found
......@@ -150,7 +150,8 @@
vars:
# do not downgrade docker
kube_docker_version: latest
kube_server: "{{ groups['master'][0] }}"
# must be IPv4 address or hostname
kube_server: "{{ hostvars[groups['master'][0]].kube_server | default(groups['master'][0]) }}"
kube_type_of_node: wn
kube_version: 1.28.2
kubelet_extra_args: '--volume-stats-agg-period 0'
......
......@@ -6,6 +6,8 @@ fip:
master:
hosts:
2001:718:801:432:f816:3eff:febb:5fc8:
# must be IPv4 address or hostname
kube_server: 192.168.0.243
ingress:
hosts:
......
---
- name: Basic setup and NFS common
hosts: allnodes
become: true
tasks:
- name: Add SSH keys
authorized_key:
user: egi
state: present
key: '{{ item }}'
with_file:
- public_keys/andrea-manzi
- public_keys/enolfc
- public_keys/jhradil
- public_keys/pospisilp
- public_keys/sustr
- public_keys/valtri
- name: Install nfs-common
apt:
name: nfs-common
update_cache: true
- name: Site install packages
package:
name:
- atop
- cron-apt
- fail2ban
- mc
- vim
- postfix
- name: Site remove packages
package:
name:
- unattended-upgrades
state: absent
- name: Site cron-apt config
copy:
dest: /etc/cron-apt/config
content: |
MAILTO=valtri@civ.zcu.cz
MAILON=upgrade
RUNSLEEP=600
mode: 0644
- name: Site cron-apt action
copy:
dest: /etc/cron-apt/action.d/9-upgrade
content: -q -q dist-upgrade
mode: 0644
- name: Site touch
file:
path: "/EOSC-{{ site_name | upper }}"
state: touch
mode: 0644
- name: NFS server
hosts: nfs
become: true
tasks:
- name: Install nfs-server
apt:
name: nfs-kernel-server
state: present
update_cache: true
- name: Create user for NFS
user:
name: volumes
create_home: false
uid: 5005
- name: Create /exports dir
file:
path: /exports
state: directory
mode: 0755
owner: volumes
- name: Create exports
template:
src: templates/etc/exports
dest: /etc/exports
mode: 0644
notify: Reload exports
- name: Start NFS service
service:
name: nfs-server
state: started
handlers:
- name: Reload exports
command: exportfs -ra
- name: K8s master deployment
hosts: master
become: true
roles:
- role: 'grycap.kubernetes'
vars:
# do not downgrade docker
kube_docker_version: latest
kube_version: 1.28.2
kube_network: 'none' # custom network installation
kube_install_helm: true
kube_install_helm_version: 'v3.13.0'
kube_install_metrics: true
tasks:
- name: Create kubectl config dir
file:
path: "~{{ ansible_user }}/.kube"
mode: 0750
owner: "{{ ansible_user }}"
state: directory
- name: Copy kubectl config to regular user
copy:
remote_src: true
src: /etc/kubernetes/admin.conf
dest: "~{{ ansible_user }}/.kube/config"
mode: 0600
owner: "{{ ansible_user }}"
- name: Site k8s cheat sheets
copy:
dest: /etc/profile.d/k8s-cheats.sh
src: files/k8s-cheats.sh
mode: preserve
- name: K8s network deployment
hosts: master
vars:
calicoctl_version: 3.27.0
tasks:
- name: Calico config
copy:
# https://raw.githubusercontent.com/projectcalico/calico/v3.27.0/manifests/calico.yaml
src: files/calico.yaml
dest: /tmp/calico-net.yaml
mode: 0644
- name: Calico installation
command:
cmd: kubectl apply -f /tmp/calico-net.yaml
creates: /var/etcd/calico-data
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
- name: Download calicoctl
get_url:
url: https://github.com/projectcalico/calico/releases/download/v{{ calicoctl_version }}/calicoctl-linux-amd64
dest: /usr/local/sbin/calicoctl
mode: 0755
- name: K8s nodes deployment
hosts: nfs, ingress, worker
become: true
roles:
- role: 'grycap.kubernetes'
vars:
# do not downgrade docker
kube_docker_version: latest
# must be IPv4 address or hostname
kube_server: "{{ hostvars[groups['master'][0]].ansible_default_ipv4.address }}"
kube_type_of_node: wn
kube_version: 1.28.2
kubelet_extra_args: '--volume-stats-agg-period 0'
- name: K8s customization
hosts: master
become: true
tasks:
- name: Wait for helm
command: helm version
register: result
until: result.rc == 0
retries: 20
delay: 10
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
when: true
- name: Create custom fact directory
file:
path: "/etc/ansible/facts.d"
mode: 0755
recurse: true
state: "directory"
- name: Create helm repos custom fact
copy:
src: files/helm_repos.fact
dest: /etc/ansible/facts.d/helm_repos.fact
mode: 0755
- name: Reload custom facts
setup:
filter: ansible_local
- name: Helm repo add stable
shell: |-
helm repo add stable https://charts.helm.sh/stable/
helm repo update
when: "'stable' not in ansible_local.helm_repos | map(attribute='name') | list"
- name: Helm repo add nfs-subdir-external-provisioner
shell: |-
helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner
helm repo update
when: "'nfs-subdir-external-provisioner' not in ansible_local.helm_repos | map(attribute='name') | list"
- name: NFS provisioner
vars:
config: >-
--set nfs.server={{ groups['nfs'][0] }}
--set storageClass.defaultClass=true
--set nfs.path=/exports
shell: |-
helm status --namespace kube-system nfs-provisioner
if [ $? -ne 0 ]; then
helm install --namespace kube-system {{ config }} nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner
else
helm upgrade --namespace kube-system {{ config }} nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner
fi
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
when: true
- name: Helm repo add ingress-nginx
shell: |-
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
helm repo update
when: "'ingress-nginx' not in ansible_local.helm_repos | map(attribute='name') | list"
- name: Ingress
vars:
config: >-
--set controller.service.type=NodePort
--set controller.service.externalIPs={{ '{' + hostvars[groups['ingress'][0]].ansible_default_ipv4.address + '}' }}
--set controller.config.proxy-body-size=0
--set controller.allowSnippetAnnotations=false
shell: |-
helm status --namespace kube-system cluster-ingress
if [ $? -ne 0 ]; then
helm install cluster-ingress --namespace kube-system {{ config }} ingress-nginx/ingress-nginx
else
helm upgrade --namespace kube-system {{ config }} cluster-ingress ingress-nginx/ingress-nginx
fi
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
when: true
- name: Cert-manager
vars:
version: 1.13.3
config: >-
--version={{ version }}
--set ingressShim.defaultIssuerName=letsencrypt-prod
--set ingressShim.defaultIssuerKind=ClusterIssuer
--set ingressShim.defaultIssuerGroup=cert-manager.io
shell: |-
helm status --namespace cert-manager certs-man
if [ $? -ne 0 ]; then
kubectl create namespace cert-manager
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v{{ version }}/cert-manager.crds.yaml
helm repo add jetstack https://charts.jetstack.io
helm repo update
helm install --namespace cert-manager {{ config }} certs-man jetstack/cert-manager
else
helm upgrade --namespace cert-manager {{ config }} certs-man jetstack/cert-manager
fi
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
when: true
- name: Cluster issuer file
copy:
dest: /tmp/clusterissuer.yaml
mode: 0644
content: |
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
spec:
acme:
email: valtri@civ.zcu.cz
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: cluster-issuer-account-key
# Add a single challenge solver, HTTP01 using nginx
solvers:
- http01:
ingress:
class: nginx
- name: Cluster issuer
command:
kubectl apply -f /tmp/clusterissuer.yaml
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
when: true
# Accounting / monitoring needs
- name: Helm repo add prometheus-community
shell: |-
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
when: "'prometheus-community' not in ansible_local.helm_repos | map(attribute='name') | list"
- name: Prometheus configuration
vars:
smtp_from: "noreply@{{ groups['ingress'][0] }}"
limit_memory_warn: 80
limit_cpu_warn: 80
limit_disk_warn: 80
copy:
dest: /tmp/prometheus.yaml
mode: 0600
content: |
alertmanagerFiles:
alertmanager.yml:
global:
smtp_from: "{{ smtp_from }}"
receivers:
- name: default-receiver
email_configs:
- send_resolved: true
to: valtri@civ.zcu.cz
- name: 'null'
route:
group_by: ['job']
kube-state-metrics:
metricAnnotationsAllowList:
- pods=[hub.jupyter.org/username,egi.eu/primary_group]
serverFiles:
alerting_rules.yml:
groups:
- name: limits
rules:
- alert: HighCpuLoad
expr: 100 * (1 - avg by(kubernetes_node) (rate(node_cpu_seconds_total{mode="idle"}[5m]))) > {{ limit_cpu_warn }}
for: 15m
labels:
job: "eosc-{{ site_name }}"
annotations:
summary: "Host high CPU load ({{ '{{ $labels.kubernetes_node }}' }})"
description: "CPU load {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_cpu_warn }}%)"
- alert: OutOfMemory
expr: 100 * (1 - avg by(kubernetes_node) (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > {{ limit_memory_warn }}
for: 20m
labels:
job: "eosc-{{ site_name }}"
annotations:
summary: "Host out of memory ({{ '{{ $labels.kubernetes_node }}' }})"
description: "Node memory {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_memory_warn }}%)"
- alert: OutOfDiskSpace
expr: 100 * (1 - avg by (kubernetes_node, mountpoint) (node_filesystem_avail_bytes{device=~"/dev/.*"} / node_filesystem_size_bytes))
> {{ limit_disk_warn }}
for: 20m
labels:
job: "eosc-{{ site_name }}"
annotations:
summary: "Host out of disk space ({{ '{{ $labels.kubernetes_node }}' }})"
description: "Disk is almost full {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_disk_warn }}%)"
- name: Prometheus
vars:
config: >-
--version=25.8.2
-f /tmp/prometheus.yaml
shell: |-
helm status --namespace prometheus prometheus
if [ $? -ne 0 ]; then
kubectl create ns prometheus >/dev/null 2>&1 || true
helm install --namespace prometheus {{ config }} prometheus prometheus-community/prometheus
else
helm upgrade --namespace prometheus {{ config }} prometheus prometheus-community/prometheus
fi
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
when: true
- name: Grafana configuration
copy:
dest: /tmp/grafana.yaml
mode: 0640
content: |
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: "nginx"
kubernetes.io/tls-acme: "true"
hosts:
- "{{ grafana_hostname }}"
tls:
- hosts:
- "{{ grafana_hostname }}"
secretName: acme-tls-grafana
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: Server
orgId: 1
url: http://prometheus-server.prometheus.svc.cluster.local
isDefault: true
version: 1
editable: false
sidecar:
dashboards:
enabled: true
- name: Grafana
vars:
config: >-
--version=7.0.3
-f /tmp/grafana.yaml
shell: |-
helm status --namespace grafana grafana
if [ $? -ne 0 ]; then
kubectl create ns grafana
helm repo add grafana https://grafana.github.io/helm-charts
helm repo update
helm install --namespace grafana {{ config }} grafana grafana/grafana
else
helm upgrade --namespace grafana {{ config }} grafana grafana/grafana
fi
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
when: true
../../cesnet-central/playbooks/k8s.yaml
\ No newline at end of file
......@@ -262,6 +262,8 @@ fip:
master:
hosts:
${local.master_ip}:
# must be IPv4 address or hostname
kube_server: ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4}
ingress:
hosts:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment