From 00d25db7943b6f2c931bd464ac61bfde4b160325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz> Date: Thu, 8 Feb 2024 17:30:57 +0000 Subject: [PATCH] Unify k8s playbook - inventory hostname for kube server, but with possible override Better to prefer inventory hostname for kube server. But for sites without public IPv4 we need to use local IPv4 instead. Use inventory host variable for the override. --- cesnet-central/playbooks/k8s.yaml | 3 +- cesnet-mcc/inventory/1-cesnet.yaml | 2 + cesnet-mcc/playbooks/k8s.yaml | 414 +---------------------------- cesnet-mcc/terraform/vms.tf | 2 + 4 files changed, 7 insertions(+), 414 deletions(-) mode change 100644 => 120000 cesnet-mcc/playbooks/k8s.yaml diff --git a/cesnet-central/playbooks/k8s.yaml b/cesnet-central/playbooks/k8s.yaml index eb4424d..3646604 100644 --- a/cesnet-central/playbooks/k8s.yaml +++ b/cesnet-central/playbooks/k8s.yaml @@ -150,7 +150,8 @@ vars: # do not downgrade docker kube_docker_version: latest - kube_server: "{{ groups['master'][0] }}" + # must be IPv4 address or hostname + kube_server: "{{ hostvars[groups['master'][0]].kube_server | default(groups['master'][0]) }}" kube_type_of_node: wn kube_version: 1.28.2 kubelet_extra_args: '--volume-stats-agg-period 0' diff --git a/cesnet-mcc/inventory/1-cesnet.yaml b/cesnet-mcc/inventory/1-cesnet.yaml index 1b20f12..78e28dd 100644 --- a/cesnet-mcc/inventory/1-cesnet.yaml +++ b/cesnet-mcc/inventory/1-cesnet.yaml @@ -6,6 +6,8 @@ fip: master: hosts: 2001:718:801:432:f816:3eff:febb:5fc8: + # must be IPv4 address or hostname + kube_server: 192.168.0.243 ingress: hosts: diff --git a/cesnet-mcc/playbooks/k8s.yaml b/cesnet-mcc/playbooks/k8s.yaml deleted file mode 100644 index a72f84a..0000000 --- a/cesnet-mcc/playbooks/k8s.yaml +++ /dev/null @@ -1,413 +0,0 @@ ---- -- name: Basic setup and NFS common - hosts: allnodes - become: true - tasks: - - name: Add SSH keys - authorized_key: - user: egi - state: present - key: '{{ item }}' - with_file: - - public_keys/andrea-manzi - - public_keys/enolfc - - public_keys/jhradil - - public_keys/pospisilp - - public_keys/sustr - - public_keys/valtri - - name: Install nfs-common - apt: - name: nfs-common - update_cache: true - - name: Site install packages - package: - name: - - atop - - cron-apt - - fail2ban - - mc - - vim - - postfix - - name: Site remove packages - package: - name: - - unattended-upgrades - state: absent - - name: Site cron-apt config - copy: - dest: /etc/cron-apt/config - content: | - MAILTO=valtri@civ.zcu.cz - MAILON=upgrade - RUNSLEEP=600 - mode: 0644 - - name: Site cron-apt action - copy: - dest: /etc/cron-apt/action.d/9-upgrade - content: -q -q dist-upgrade - mode: 0644 - - name: Site touch - file: - path: "/EOSC-{{ site_name | upper }}" - state: touch - mode: 0644 - -- name: NFS server - hosts: nfs - become: true - tasks: - - name: Install nfs-server - apt: - name: nfs-kernel-server - state: present - update_cache: true - - name: Create user for NFS - user: - name: volumes - create_home: false - uid: 5005 - - name: Create /exports dir - file: - path: /exports - state: directory - mode: 0755 - owner: volumes - - name: Create exports - template: - src: templates/etc/exports - dest: /etc/exports - mode: 0644 - notify: Reload exports - - name: Start NFS service - service: - name: nfs-server - state: started - handlers: - - name: Reload exports - command: exportfs -ra - -- name: K8s master deployment - hosts: master - become: true - roles: - - role: 'grycap.kubernetes' - vars: - # do not downgrade docker - kube_docker_version: latest - kube_version: 1.28.2 - kube_network: 'none' # custom network installation - kube_install_helm: true - kube_install_helm_version: 'v3.13.0' - kube_install_metrics: true - tasks: - - name: Create kubectl config dir - file: - path: "~{{ ansible_user }}/.kube" - mode: 0750 - owner: "{{ ansible_user }}" - state: directory - - name: Copy kubectl config to regular user - copy: - remote_src: true - src: /etc/kubernetes/admin.conf - dest: "~{{ ansible_user }}/.kube/config" - mode: 0600 - owner: "{{ ansible_user }}" - - name: Site k8s cheat sheets - copy: - dest: /etc/profile.d/k8s-cheats.sh - src: files/k8s-cheats.sh - mode: preserve - -- name: K8s network deployment - hosts: master - vars: - calicoctl_version: 3.27.0 - tasks: - - name: Calico config - copy: - # https://raw.githubusercontent.com/projectcalico/calico/v3.27.0/manifests/calico.yaml - src: files/calico.yaml - dest: /tmp/calico-net.yaml - mode: 0644 - - name: Calico installation - command: - cmd: kubectl apply -f /tmp/calico-net.yaml - creates: /var/etcd/calico-data - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - - name: Download calicoctl - get_url: - url: https://github.com/projectcalico/calico/releases/download/v{{ calicoctl_version }}/calicoctl-linux-amd64 - dest: /usr/local/sbin/calicoctl - mode: 0755 - -- name: K8s nodes deployment - hosts: nfs, ingress, worker - become: true - roles: - - role: 'grycap.kubernetes' - vars: - # do not downgrade docker - kube_docker_version: latest - # must be IPv4 address or hostname - kube_server: "{{ hostvars[groups['master'][0]].ansible_default_ipv4.address }}" - kube_type_of_node: wn - kube_version: 1.28.2 - kubelet_extra_args: '--volume-stats-agg-period 0' - -- name: K8s customization - hosts: master - become: true - tasks: - - name: Wait for helm - command: helm version - register: result - until: result.rc == 0 - retries: 20 - delay: 10 - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - when: true - - name: Create custom fact directory - file: - path: "/etc/ansible/facts.d" - mode: 0755 - recurse: true - state: "directory" - - name: Create helm repos custom fact - copy: - src: files/helm_repos.fact - dest: /etc/ansible/facts.d/helm_repos.fact - mode: 0755 - - name: Reload custom facts - setup: - filter: ansible_local - - name: Helm repo add stable - shell: |- - helm repo add stable https://charts.helm.sh/stable/ - helm repo update - when: "'stable' not in ansible_local.helm_repos | map(attribute='name') | list" - - name: Helm repo add nfs-subdir-external-provisioner - shell: |- - helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner - helm repo update - when: "'nfs-subdir-external-provisioner' not in ansible_local.helm_repos | map(attribute='name') | list" - - name: NFS provisioner - vars: - config: >- - --set nfs.server={{ groups['nfs'][0] }} - --set storageClass.defaultClass=true - --set nfs.path=/exports - shell: |- - helm status --namespace kube-system nfs-provisioner - if [ $? -ne 0 ]; then - helm install --namespace kube-system {{ config }} nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner - else - helm upgrade --namespace kube-system {{ config }} nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - - name: Helm repo add ingress-nginx - shell: |- - helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx - helm repo update - when: "'ingress-nginx' not in ansible_local.helm_repos | map(attribute='name') | list" - - name: Ingress - vars: - config: >- - --set controller.service.type=NodePort - --set controller.service.externalIPs={{ '{' + hostvars[groups['ingress'][0]].ansible_default_ipv4.address + '}' }} - --set controller.config.proxy-body-size=0 - --set controller.allowSnippetAnnotations=false - shell: |- - helm status --namespace kube-system cluster-ingress - if [ $? -ne 0 ]; then - helm install cluster-ingress --namespace kube-system {{ config }} ingress-nginx/ingress-nginx - else - helm upgrade --namespace kube-system {{ config }} cluster-ingress ingress-nginx/ingress-nginx - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - - name: Cert-manager - vars: - version: 1.13.3 - config: >- - --version={{ version }} - --set ingressShim.defaultIssuerName=letsencrypt-prod - --set ingressShim.defaultIssuerKind=ClusterIssuer - --set ingressShim.defaultIssuerGroup=cert-manager.io - shell: |- - helm status --namespace cert-manager certs-man - if [ $? -ne 0 ]; then - kubectl create namespace cert-manager - kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v{{ version }}/cert-manager.crds.yaml - helm repo add jetstack https://charts.jetstack.io - helm repo update - helm install --namespace cert-manager {{ config }} certs-man jetstack/cert-manager - else - helm upgrade --namespace cert-manager {{ config }} certs-man jetstack/cert-manager - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - - name: Cluster issuer file - copy: - dest: /tmp/clusterissuer.yaml - mode: 0644 - content: | - apiVersion: cert-manager.io/v1 - kind: ClusterIssuer - metadata: - name: letsencrypt-prod - spec: - acme: - email: valtri@civ.zcu.cz - server: https://acme-v02.api.letsencrypt.org/directory - privateKeySecretRef: - name: cluster-issuer-account-key - # Add a single challenge solver, HTTP01 using nginx - solvers: - - http01: - ingress: - class: nginx - - name: Cluster issuer - command: - kubectl apply -f /tmp/clusterissuer.yaml - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - # Accounting / monitoring needs - - name: Helm repo add prometheus-community - shell: |- - helm repo add prometheus-community https://prometheus-community.github.io/helm-charts - helm repo update - when: "'prometheus-community' not in ansible_local.helm_repos | map(attribute='name') | list" - - name: Prometheus configuration - vars: - smtp_from: "noreply@{{ groups['ingress'][0] }}" - limit_memory_warn: 80 - limit_cpu_warn: 80 - limit_disk_warn: 80 - copy: - dest: /tmp/prometheus.yaml - mode: 0600 - content: | - alertmanagerFiles: - alertmanager.yml: - global: - smtp_from: "{{ smtp_from }}" - receivers: - - name: default-receiver - email_configs: - - send_resolved: true - to: valtri@civ.zcu.cz - - name: 'null' - route: - group_by: ['job'] - kube-state-metrics: - metricAnnotationsAllowList: - - pods=[hub.jupyter.org/username,egi.eu/primary_group] - serverFiles: - alerting_rules.yml: - groups: - - name: limits - rules: - - alert: HighCpuLoad - expr: 100 * (1 - avg by(kubernetes_node) (rate(node_cpu_seconds_total{mode="idle"}[5m]))) > {{ limit_cpu_warn }} - for: 15m - labels: - job: "eosc-{{ site_name }}" - annotations: - summary: "Host high CPU load ({{ '{{ $labels.kubernetes_node }}' }})" - description: "CPU load {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_cpu_warn }}%)" - - alert: OutOfMemory - expr: 100 * (1 - avg by(kubernetes_node) (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > {{ limit_memory_warn }} - for: 20m - labels: - job: "eosc-{{ site_name }}" - annotations: - summary: "Host out of memory ({{ '{{ $labels.kubernetes_node }}' }})" - description: "Node memory {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_memory_warn }}%)" - - alert: OutOfDiskSpace - expr: 100 * (1 - avg by (kubernetes_node, mountpoint) (node_filesystem_avail_bytes{device=~"/dev/.*"} / node_filesystem_size_bytes)) - > {{ limit_disk_warn }} - for: 20m - labels: - job: "eosc-{{ site_name }}" - annotations: - summary: "Host out of disk space ({{ '{{ $labels.kubernetes_node }}' }})" - description: "Disk is almost full {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_disk_warn }}%)" - - name: Prometheus - vars: - config: >- - --version=25.8.2 - -f /tmp/prometheus.yaml - shell: |- - helm status --namespace prometheus prometheus - if [ $? -ne 0 ]; then - kubectl create ns prometheus >/dev/null 2>&1 || true - helm install --namespace prometheus {{ config }} prometheus prometheus-community/prometheus - else - helm upgrade --namespace prometheus {{ config }} prometheus prometheus-community/prometheus - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - - name: Grafana configuration - copy: - dest: /tmp/grafana.yaml - mode: 0640 - content: | - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: "nginx" - kubernetes.io/tls-acme: "true" - hosts: - - "{{ grafana_hostname }}" - tls: - - hosts: - - "{{ grafana_hostname }}" - secretName: acme-tls-grafana - datasources: - datasources.yaml: - apiVersion: 1 - datasources: - - name: Prometheus - type: prometheus - access: Server - orgId: 1 - url: http://prometheus-server.prometheus.svc.cluster.local - isDefault: true - version: 1 - editable: false - sidecar: - dashboards: - enabled: true - - name: Grafana - vars: - config: >- - --version=7.0.3 - -f /tmp/grafana.yaml - shell: |- - helm status --namespace grafana grafana - if [ $? -ne 0 ]; then - kubectl create ns grafana - helm repo add grafana https://grafana.github.io/helm-charts - helm repo update - helm install --namespace grafana {{ config }} grafana grafana/grafana - else - helm upgrade --namespace grafana {{ config }} grafana grafana/grafana - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true diff --git a/cesnet-mcc/playbooks/k8s.yaml b/cesnet-mcc/playbooks/k8s.yaml new file mode 120000 index 0000000..5e18112 --- /dev/null +++ b/cesnet-mcc/playbooks/k8s.yaml @@ -0,0 +1 @@ +../../cesnet-central/playbooks/k8s.yaml \ No newline at end of file diff --git a/cesnet-mcc/terraform/vms.tf b/cesnet-mcc/terraform/vms.tf index 9ad44ab..7837a72 100644 --- a/cesnet-mcc/terraform/vms.tf +++ b/cesnet-mcc/terraform/vms.tf @@ -262,6 +262,8 @@ fip: master: hosts: ${local.master_ip}: + # must be IPv4 address or hostname + kube_server: ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4} ingress: hosts: -- GitLab