diff --git a/cesnet-central/playbooks/k8s.yaml b/cesnet-central/playbooks/k8s.yaml index eb4424dc3bcc85a59aef69a17b36c1b9af3acd13..3646604fb55088d2bb7872016805411e8166b919 100644 --- a/cesnet-central/playbooks/k8s.yaml +++ b/cesnet-central/playbooks/k8s.yaml @@ -150,7 +150,8 @@ vars: # do not downgrade docker kube_docker_version: latest - kube_server: "{{ groups['master'][0] }}" + # must be IPv4 address or hostname + kube_server: "{{ hostvars[groups['master'][0]].kube_server | default(groups['master'][0]) }}" kube_type_of_node: wn kube_version: 1.28.2 kubelet_extra_args: '--volume-stats-agg-period 0' diff --git a/cesnet-mcc/inventory/1-cesnet.yaml b/cesnet-mcc/inventory/1-cesnet.yaml index 1b20f12503c6334ae012f8cb0b279bbf4f52e769..78e28dd528dd79b345c8f7a2dd25d0c68e38787e 100644 --- a/cesnet-mcc/inventory/1-cesnet.yaml +++ b/cesnet-mcc/inventory/1-cesnet.yaml @@ -6,6 +6,8 @@ fip: master: hosts: 2001:718:801:432:f816:3eff:febb:5fc8: + # must be IPv4 address or hostname + kube_server: 192.168.0.243 ingress: hosts: diff --git a/cesnet-mcc/playbooks/k8s.yaml b/cesnet-mcc/playbooks/k8s.yaml deleted file mode 100644 index a72f84a21d8272171ef635d59aaf5f19d8b7abbc..0000000000000000000000000000000000000000 --- a/cesnet-mcc/playbooks/k8s.yaml +++ /dev/null @@ -1,413 +0,0 @@ ---- -- name: Basic setup and NFS common - hosts: allnodes - become: true - tasks: - - name: Add SSH keys - authorized_key: - user: egi - state: present - key: '{{ item }}' - with_file: - - public_keys/andrea-manzi - - public_keys/enolfc - - public_keys/jhradil - - public_keys/pospisilp - - public_keys/sustr - - public_keys/valtri - - name: Install nfs-common - apt: - name: nfs-common - update_cache: true - - name: Site install packages - package: - name: - - atop - - cron-apt - - fail2ban - - mc - - vim - - postfix - - name: Site remove packages - package: - name: - - unattended-upgrades - state: absent - - name: Site cron-apt config - copy: - dest: /etc/cron-apt/config - content: | - MAILTO=valtri@civ.zcu.cz - MAILON=upgrade - RUNSLEEP=600 - mode: 0644 - - name: Site cron-apt action - copy: - dest: /etc/cron-apt/action.d/9-upgrade - content: -q -q dist-upgrade - mode: 0644 - - name: Site touch - file: - path: "/EOSC-{{ site_name | upper }}" - state: touch - mode: 0644 - -- name: NFS server - hosts: nfs - become: true - tasks: - - name: Install nfs-server - apt: - name: nfs-kernel-server - state: present - update_cache: true - - name: Create user for NFS - user: - name: volumes - create_home: false - uid: 5005 - - name: Create /exports dir - file: - path: /exports - state: directory - mode: 0755 - owner: volumes - - name: Create exports - template: - src: templates/etc/exports - dest: /etc/exports - mode: 0644 - notify: Reload exports - - name: Start NFS service - service: - name: nfs-server - state: started - handlers: - - name: Reload exports - command: exportfs -ra - -- name: K8s master deployment - hosts: master - become: true - roles: - - role: 'grycap.kubernetes' - vars: - # do not downgrade docker - kube_docker_version: latest - kube_version: 1.28.2 - kube_network: 'none' # custom network installation - kube_install_helm: true - kube_install_helm_version: 'v3.13.0' - kube_install_metrics: true - tasks: - - name: Create kubectl config dir - file: - path: "~{{ ansible_user }}/.kube" - mode: 0750 - owner: "{{ ansible_user }}" - state: directory - - name: Copy kubectl config to regular user - copy: - remote_src: true - src: /etc/kubernetes/admin.conf - dest: "~{{ ansible_user }}/.kube/config" - mode: 0600 - owner: "{{ ansible_user }}" - - name: Site k8s cheat sheets - copy: - dest: /etc/profile.d/k8s-cheats.sh - src: files/k8s-cheats.sh - mode: preserve - -- name: K8s network deployment - hosts: master - vars: - calicoctl_version: 3.27.0 - tasks: - - name: Calico config - copy: - # https://raw.githubusercontent.com/projectcalico/calico/v3.27.0/manifests/calico.yaml - src: files/calico.yaml - dest: /tmp/calico-net.yaml - mode: 0644 - - name: Calico installation - command: - cmd: kubectl apply -f /tmp/calico-net.yaml - creates: /var/etcd/calico-data - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - - name: Download calicoctl - get_url: - url: https://github.com/projectcalico/calico/releases/download/v{{ calicoctl_version }}/calicoctl-linux-amd64 - dest: /usr/local/sbin/calicoctl - mode: 0755 - -- name: K8s nodes deployment - hosts: nfs, ingress, worker - become: true - roles: - - role: 'grycap.kubernetes' - vars: - # do not downgrade docker - kube_docker_version: latest - # must be IPv4 address or hostname - kube_server: "{{ hostvars[groups['master'][0]].ansible_default_ipv4.address }}" - kube_type_of_node: wn - kube_version: 1.28.2 - kubelet_extra_args: '--volume-stats-agg-period 0' - -- name: K8s customization - hosts: master - become: true - tasks: - - name: Wait for helm - command: helm version - register: result - until: result.rc == 0 - retries: 20 - delay: 10 - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - when: true - - name: Create custom fact directory - file: - path: "/etc/ansible/facts.d" - mode: 0755 - recurse: true - state: "directory" - - name: Create helm repos custom fact - copy: - src: files/helm_repos.fact - dest: /etc/ansible/facts.d/helm_repos.fact - mode: 0755 - - name: Reload custom facts - setup: - filter: ansible_local - - name: Helm repo add stable - shell: |- - helm repo add stable https://charts.helm.sh/stable/ - helm repo update - when: "'stable' not in ansible_local.helm_repos | map(attribute='name') | list" - - name: Helm repo add nfs-subdir-external-provisioner - shell: |- - helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner - helm repo update - when: "'nfs-subdir-external-provisioner' not in ansible_local.helm_repos | map(attribute='name') | list" - - name: NFS provisioner - vars: - config: >- - --set nfs.server={{ groups['nfs'][0] }} - --set storageClass.defaultClass=true - --set nfs.path=/exports - shell: |- - helm status --namespace kube-system nfs-provisioner - if [ $? -ne 0 ]; then - helm install --namespace kube-system {{ config }} nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner - else - helm upgrade --namespace kube-system {{ config }} nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - - name: Helm repo add ingress-nginx - shell: |- - helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx - helm repo update - when: "'ingress-nginx' not in ansible_local.helm_repos | map(attribute='name') | list" - - name: Ingress - vars: - config: >- - --set controller.service.type=NodePort - --set controller.service.externalIPs={{ '{' + hostvars[groups['ingress'][0]].ansible_default_ipv4.address + '}' }} - --set controller.config.proxy-body-size=0 - --set controller.allowSnippetAnnotations=false - shell: |- - helm status --namespace kube-system cluster-ingress - if [ $? -ne 0 ]; then - helm install cluster-ingress --namespace kube-system {{ config }} ingress-nginx/ingress-nginx - else - helm upgrade --namespace kube-system {{ config }} cluster-ingress ingress-nginx/ingress-nginx - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - - name: Cert-manager - vars: - version: 1.13.3 - config: >- - --version={{ version }} - --set ingressShim.defaultIssuerName=letsencrypt-prod - --set ingressShim.defaultIssuerKind=ClusterIssuer - --set ingressShim.defaultIssuerGroup=cert-manager.io - shell: |- - helm status --namespace cert-manager certs-man - if [ $? -ne 0 ]; then - kubectl create namespace cert-manager - kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v{{ version }}/cert-manager.crds.yaml - helm repo add jetstack https://charts.jetstack.io - helm repo update - helm install --namespace cert-manager {{ config }} certs-man jetstack/cert-manager - else - helm upgrade --namespace cert-manager {{ config }} certs-man jetstack/cert-manager - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - - name: Cluster issuer file - copy: - dest: /tmp/clusterissuer.yaml - mode: 0644 - content: | - apiVersion: cert-manager.io/v1 - kind: ClusterIssuer - metadata: - name: letsencrypt-prod - spec: - acme: - email: valtri@civ.zcu.cz - server: https://acme-v02.api.letsencrypt.org/directory - privateKeySecretRef: - name: cluster-issuer-account-key - # Add a single challenge solver, HTTP01 using nginx - solvers: - - http01: - ingress: - class: nginx - - name: Cluster issuer - command: - kubectl apply -f /tmp/clusterissuer.yaml - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - # Accounting / monitoring needs - - name: Helm repo add prometheus-community - shell: |- - helm repo add prometheus-community https://prometheus-community.github.io/helm-charts - helm repo update - when: "'prometheus-community' not in ansible_local.helm_repos | map(attribute='name') | list" - - name: Prometheus configuration - vars: - smtp_from: "noreply@{{ groups['ingress'][0] }}" - limit_memory_warn: 80 - limit_cpu_warn: 80 - limit_disk_warn: 80 - copy: - dest: /tmp/prometheus.yaml - mode: 0600 - content: | - alertmanagerFiles: - alertmanager.yml: - global: - smtp_from: "{{ smtp_from }}" - receivers: - - name: default-receiver - email_configs: - - send_resolved: true - to: valtri@civ.zcu.cz - - name: 'null' - route: - group_by: ['job'] - kube-state-metrics: - metricAnnotationsAllowList: - - pods=[hub.jupyter.org/username,egi.eu/primary_group] - serverFiles: - alerting_rules.yml: - groups: - - name: limits - rules: - - alert: HighCpuLoad - expr: 100 * (1 - avg by(kubernetes_node) (rate(node_cpu_seconds_total{mode="idle"}[5m]))) > {{ limit_cpu_warn }} - for: 15m - labels: - job: "eosc-{{ site_name }}" - annotations: - summary: "Host high CPU load ({{ '{{ $labels.kubernetes_node }}' }})" - description: "CPU load {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_cpu_warn }}%)" - - alert: OutOfMemory - expr: 100 * (1 - avg by(kubernetes_node) (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > {{ limit_memory_warn }} - for: 20m - labels: - job: "eosc-{{ site_name }}" - annotations: - summary: "Host out of memory ({{ '{{ $labels.kubernetes_node }}' }})" - description: "Node memory {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_memory_warn }}%)" - - alert: OutOfDiskSpace - expr: 100 * (1 - avg by (kubernetes_node, mountpoint) (node_filesystem_avail_bytes{device=~"/dev/.*"} / node_filesystem_size_bytes)) - > {{ limit_disk_warn }} - for: 20m - labels: - job: "eosc-{{ site_name }}" - annotations: - summary: "Host out of disk space ({{ '{{ $labels.kubernetes_node }}' }})" - description: "Disk is almost full {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_disk_warn }}%)" - - name: Prometheus - vars: - config: >- - --version=25.8.2 - -f /tmp/prometheus.yaml - shell: |- - helm status --namespace prometheus prometheus - if [ $? -ne 0 ]; then - kubectl create ns prometheus >/dev/null 2>&1 || true - helm install --namespace prometheus {{ config }} prometheus prometheus-community/prometheus - else - helm upgrade --namespace prometheus {{ config }} prometheus prometheus-community/prometheus - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true - - name: Grafana configuration - copy: - dest: /tmp/grafana.yaml - mode: 0640 - content: | - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: "nginx" - kubernetes.io/tls-acme: "true" - hosts: - - "{{ grafana_hostname }}" - tls: - - hosts: - - "{{ grafana_hostname }}" - secretName: acme-tls-grafana - datasources: - datasources.yaml: - apiVersion: 1 - datasources: - - name: Prometheus - type: prometheus - access: Server - orgId: 1 - url: http://prometheus-server.prometheus.svc.cluster.local - isDefault: true - version: 1 - editable: false - sidecar: - dashboards: - enabled: true - - name: Grafana - vars: - config: >- - --version=7.0.3 - -f /tmp/grafana.yaml - shell: |- - helm status --namespace grafana grafana - if [ $? -ne 0 ]; then - kubectl create ns grafana - helm repo add grafana https://grafana.github.io/helm-charts - helm repo update - helm install --namespace grafana {{ config }} grafana grafana/grafana - else - helm upgrade --namespace grafana {{ config }} grafana grafana/grafana - fi - environment: - KUBECONFIG: /etc/kubernetes/admin.conf - PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin - when: true diff --git a/cesnet-mcc/playbooks/k8s.yaml b/cesnet-mcc/playbooks/k8s.yaml new file mode 120000 index 0000000000000000000000000000000000000000..5e18112ca53b79e98249019a43b02a8f6b19235a --- /dev/null +++ b/cesnet-mcc/playbooks/k8s.yaml @@ -0,0 +1 @@ +../../cesnet-central/playbooks/k8s.yaml \ No newline at end of file diff --git a/cesnet-mcc/terraform/vms.tf b/cesnet-mcc/terraform/vms.tf index 9ad44ab219c5d3a50f620ebbdcae3fe14b142149..7837a72b3a4fad7d2f4d8e80a7ddf70f693937f8 100644 --- a/cesnet-mcc/terraform/vms.tf +++ b/cesnet-mcc/terraform/vms.tf @@ -262,6 +262,8 @@ fip: master: hosts: ${local.master_ip}: + # must be IPv4 address or hostname + kube_server: ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4} ingress: hosts: