From 28ab8bb2823aa5206c23d13b256549b58ba7ffe1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz>
Date: Fri, 19 Jan 2024 13:51:29 +0000
Subject: [PATCH] Example 3rd-party site

---
 cesnet-mcc/ansible.cfg                     |   5 +
 cesnet-mcc/deploy.sh                       |  48 +++
 cesnet-mcc/inventory/99-all.yaml           |   4 +
 cesnet-mcc/playbooks/k8s.yaml              | 414 ++++++++++++++++++++-
 cesnet-mcc/playbooks/templates/etc/exports |   2 +-
 cesnet-mcc/terraform/.gitignore            |   8 +
 cesnet-mcc/terraform/cloud-init.yaml       |  18 +
 cesnet-mcc/terraform/terraform.tfvars      |  22 ++
 cesnet-mcc/terraform/vars.tf               |  54 +++
 cesnet-mcc/terraform/versions.tf           |  10 +
 cesnet-mcc/terraform/vms.tf                | 303 +++++++++++++++
 11 files changed, 886 insertions(+), 2 deletions(-)
 create mode 100644 cesnet-mcc/ansible.cfg
 create mode 100755 cesnet-mcc/deploy.sh
 mode change 120000 => 100644 cesnet-mcc/playbooks/k8s.yaml
 create mode 100644 cesnet-mcc/terraform/.gitignore
 create mode 100644 cesnet-mcc/terraform/cloud-init.yaml
 create mode 100644 cesnet-mcc/terraform/terraform.tfvars
 create mode 100644 cesnet-mcc/terraform/vars.tf
 create mode 100644 cesnet-mcc/terraform/versions.tf
 create mode 100644 cesnet-mcc/terraform/vms.tf

diff --git a/cesnet-mcc/ansible.cfg b/cesnet-mcc/ansible.cfg
new file mode 100644
index 0000000..c3a73be
--- /dev/null
+++ b/cesnet-mcc/ansible.cfg
@@ -0,0 +1,5 @@
+[defaults]
+inventory=inventory
+
+[diff]
+always=true
diff --git a/cesnet-mcc/deploy.sh b/cesnet-mcc/deploy.sh
new file mode 100755
index 0000000..5ca3b71
--- /dev/null
+++ b/cesnet-mcc/deploy.sh
@@ -0,0 +1,48 @@
+#! /bin/bash -xe
+
+#
+# Example to setup new site with 3rd party nodes
+#
+
+cd terraform && terraform init && terraform apply
+cd -
+cp -pv terraform/inventory.yaml inventory/1-cesnet.yaml
+
+# dynamic DNS
+ip="$(head -n 1 < terraform/fip.txt)"
+shellstate=$(shopt -po xtrace)
+set +o xtrace
+# https://nsupdate.fedcloud.eu
+FEDCLOUD_DYNAMIC_DNS=$(vault kv get -mount=eosc/dev -field 'data' -format table FEDCLOUD_DYNAMIC_DNS | grep ^map | head -n 1 | sed 's/map\[\(.*\)\]/\1/')
+for auth in $FEDCLOUD_DYNAMIC_DNS; do
+	echo "curl -i -X GET -u $(echo "$auth" | cut -d: -f1):XXX https://nsupdate.fedcloud.eu/nic/update?myip=$ip"
+	curl -i -X GET -u "$auth" https://nsupdate.fedcloud.eu/nic/update?myip="$ip"
+done
+eval "$shellstate"
+
+# wait for ping and ssh
+for ip in $(cat terraform/hosts.txt); do
+	while ! ping -c 1 "$ip"; do sleep 5; done
+	ssh-keygen -R "$ip"
+	while ! ssh egi@"$ip" -o ConnectTimeout=10 -o PreferredAuthentications=publickey -o StrictHostKeyChecking=no :; do sleep 10; done
+done
+
+# check ssh access
+ansible -m command -a 'uname -a' allnodes
+
+# wait cloud-init
+ansible -m shell -a 'while ! test -f /var/lib/cloud/instance/boot-finished; do sleep 2; done' allnodes
+
+# setup volumes
+ansible -m copy -a 'src=terraform/nfs-volume.sh dest=/root/ mode=preserve' nfs
+ansible -m command -a '/root/nfs-volume.sh' nfs
+ansible -m copy -a 'src=terraform/squid-volume.sh dest=/root/ mode=preserve' 'ingress[0]'
+ansible -m command -a '/root/squid-volume.sh' 'ingress[0]'
+ansible -m copy -a 'src=terraform/docker-volume.sh dest=/root/ mode=preserve' 'ingress nfs worker'
+ansible -m command -a '/root/docker-volume.sh' 'ingress nfs worker'
+
+# k8s + notebooks
+ansible-playbook playbooks/k8s.yaml
+# ansible-playbook playbooks/squid.yaml
+# ansible-playbook playbooks/cvmfs.yaml
+while ansible -i ./inventory -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -v ' Running '; do sleep 5; done
diff --git a/cesnet-mcc/inventory/99-all.yaml b/cesnet-mcc/inventory/99-all.yaml
index 75a797a..d39c6a7 100644
--- a/cesnet-mcc/inventory/99-all.yaml
+++ b/cesnet-mcc/inventory/99-all.yaml
@@ -2,6 +2,8 @@
 allnodes:
   children:
     master:
+    ingress:
+    nfs:
     worker:
 
 all:
@@ -10,3 +12,5 @@ all:
     ansible_user: egi
     site_name: cesnet-mcc
     vault_mount_point: eosc/dev
+
+    grafana_hostname: grafana-cesnet.eosc.zcu.cz
diff --git a/cesnet-mcc/playbooks/k8s.yaml b/cesnet-mcc/playbooks/k8s.yaml
deleted file mode 120000
index 5e18112..0000000
--- a/cesnet-mcc/playbooks/k8s.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../cesnet-central/playbooks/k8s.yaml
\ No newline at end of file
diff --git a/cesnet-mcc/playbooks/k8s.yaml b/cesnet-mcc/playbooks/k8s.yaml
new file mode 100644
index 0000000..a72f84a
--- /dev/null
+++ b/cesnet-mcc/playbooks/k8s.yaml
@@ -0,0 +1,413 @@
+---
+- name: Basic setup and NFS common
+  hosts: allnodes
+  become: true
+  tasks:
+    - name: Add SSH keys
+      authorized_key:
+        user: egi
+        state: present
+        key: '{{ item }}'
+      with_file:
+        - public_keys/andrea-manzi
+        - public_keys/enolfc
+        - public_keys/jhradil
+        - public_keys/pospisilp
+        - public_keys/sustr
+        - public_keys/valtri
+    - name: Install nfs-common
+      apt:
+        name: nfs-common
+        update_cache: true
+    - name: Site install packages
+      package:
+        name:
+          - atop
+          - cron-apt
+          - fail2ban
+          - mc
+          - vim
+          - postfix
+    - name: Site remove packages
+      package:
+        name:
+          - unattended-upgrades
+        state: absent
+    - name: Site cron-apt config
+      copy:
+        dest: /etc/cron-apt/config
+        content: |
+          MAILTO=valtri@civ.zcu.cz
+          MAILON=upgrade
+          RUNSLEEP=600
+        mode: 0644
+    - name: Site cron-apt action
+      copy:
+        dest: /etc/cron-apt/action.d/9-upgrade
+        content: -q -q dist-upgrade
+        mode: 0644
+    - name: Site touch
+      file:
+        path: "/EOSC-{{ site_name | upper }}"
+        state: touch
+        mode: 0644
+
+- name: NFS server
+  hosts: nfs
+  become: true
+  tasks:
+    - name: Install nfs-server
+      apt:
+        name: nfs-kernel-server
+        state: present
+        update_cache: true
+    - name: Create user for NFS
+      user:
+        name: volumes
+        create_home: false
+        uid: 5005
+    - name: Create /exports dir
+      file:
+        path: /exports
+        state: directory
+        mode: 0755
+        owner: volumes
+    - name: Create exports
+      template:
+        src: templates/etc/exports
+        dest: /etc/exports
+        mode: 0644
+      notify: Reload exports
+    - name: Start NFS service
+      service:
+        name: nfs-server
+        state: started
+  handlers:
+    - name: Reload exports
+      command: exportfs -ra
+
+- name: K8s master deployment
+  hosts: master
+  become: true
+  roles:
+    - role: 'grycap.kubernetes'
+      vars:
+        # do not downgrade docker
+        kube_docker_version: latest
+        kube_version: 1.28.2
+        kube_network: 'none'  # custom network installation
+        kube_install_helm: true
+        kube_install_helm_version: 'v3.13.0'
+        kube_install_metrics: true
+  tasks:
+    - name: Create kubectl config dir
+      file:
+        path: "~{{ ansible_user }}/.kube"
+        mode: 0750
+        owner: "{{ ansible_user }}"
+        state: directory
+    - name: Copy kubectl config to regular user
+      copy:
+        remote_src: true
+        src: /etc/kubernetes/admin.conf
+        dest: "~{{ ansible_user }}/.kube/config"
+        mode: 0600
+        owner: "{{ ansible_user }}"
+    - name: Site k8s cheat sheets
+      copy:
+        dest: /etc/profile.d/k8s-cheats.sh
+        src: files/k8s-cheats.sh
+        mode: preserve
+
+- name: K8s network deployment
+  hosts: master
+  vars:
+    calicoctl_version: 3.27.0
+  tasks:
+    - name: Calico config
+      copy:
+        # https://raw.githubusercontent.com/projectcalico/calico/v3.27.0/manifests/calico.yaml
+        src: files/calico.yaml
+        dest: /tmp/calico-net.yaml
+        mode: 0644
+    - name: Calico installation
+      command:
+        cmd: kubectl apply -f /tmp/calico-net.yaml
+        creates: /var/etcd/calico-data
+      environment:
+        KUBECONFIG: /etc/kubernetes/admin.conf
+    - name: Download calicoctl
+      get_url:
+        url: https://github.com/projectcalico/calico/releases/download/v{{ calicoctl_version }}/calicoctl-linux-amd64
+        dest: /usr/local/sbin/calicoctl
+        mode: 0755
+
+- name: K8s nodes deployment
+  hosts: nfs, ingress, worker
+  become: true
+  roles:
+    - role: 'grycap.kubernetes'
+      vars:
+        # do not downgrade docker
+        kube_docker_version: latest
+        # must be IPv4 address or hostname
+        kube_server: "{{ hostvars[groups['master'][0]].ansible_default_ipv4.address }}"
+        kube_type_of_node: wn
+        kube_version: 1.28.2
+        kubelet_extra_args: '--volume-stats-agg-period 0'
+
+- name: K8s customization
+  hosts: master
+  become: true
+  tasks:
+    - name: Wait for helm
+      command: helm version
+      register: result
+      until: result.rc == 0
+      retries: 20
+      delay: 10
+      environment:
+        KUBECONFIG: /etc/kubernetes/admin.conf
+      when: true
+    - name: Create custom fact directory
+      file:
+        path: "/etc/ansible/facts.d"
+        mode: 0755
+        recurse: true
+        state: "directory"
+    - name: Create helm repos custom fact
+      copy:
+        src: files/helm_repos.fact
+        dest: /etc/ansible/facts.d/helm_repos.fact
+        mode: 0755
+    - name: Reload custom facts
+      setup:
+        filter: ansible_local
+    - name: Helm repo add stable
+      shell: |-
+        helm repo add stable https://charts.helm.sh/stable/
+        helm repo update
+      when: "'stable' not in ansible_local.helm_repos | map(attribute='name') | list"
+    - name: Helm repo add nfs-subdir-external-provisioner
+      shell: |-
+        helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner
+        helm repo update
+      when: "'nfs-subdir-external-provisioner' not in ansible_local.helm_repos | map(attribute='name') | list"
+    - name: NFS provisioner
+      vars:
+        config: >-
+          --set nfs.server={{ groups['nfs'][0] }}
+          --set storageClass.defaultClass=true
+          --set nfs.path=/exports
+      shell: |-
+        helm status --namespace kube-system nfs-provisioner
+        if [ $? -ne 0 ]; then
+            helm install --namespace kube-system {{ config }} nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner
+        else
+            helm upgrade --namespace kube-system {{ config }} nfs-provisioner nfs-subdir-external-provisioner/nfs-subdir-external-provisioner
+        fi
+      environment:
+        KUBECONFIG: /etc/kubernetes/admin.conf
+        PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
+      when: true
+    - name: Helm repo add ingress-nginx
+      shell: |-
+        helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+        helm repo update
+      when: "'ingress-nginx' not in ansible_local.helm_repos | map(attribute='name') | list"
+    - name: Ingress
+      vars:
+        config: >-
+          --set controller.service.type=NodePort
+          --set controller.service.externalIPs={{ '{' + hostvars[groups['ingress'][0]].ansible_default_ipv4.address + '}' }}
+          --set controller.config.proxy-body-size=0
+          --set controller.allowSnippetAnnotations=false
+      shell: |-
+        helm status --namespace kube-system cluster-ingress
+        if [ $? -ne 0 ]; then
+            helm install cluster-ingress --namespace kube-system {{ config }} ingress-nginx/ingress-nginx
+        else
+            helm upgrade --namespace kube-system {{ config }} cluster-ingress ingress-nginx/ingress-nginx
+        fi
+      environment:
+        KUBECONFIG: /etc/kubernetes/admin.conf
+        PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
+      when: true
+    - name: Cert-manager
+      vars:
+        version: 1.13.3
+        config: >-
+          --version={{ version }}
+          --set ingressShim.defaultIssuerName=letsencrypt-prod
+          --set ingressShim.defaultIssuerKind=ClusterIssuer
+          --set ingressShim.defaultIssuerGroup=cert-manager.io
+      shell: |-
+        helm status --namespace cert-manager certs-man
+        if [ $? -ne 0 ]; then
+            kubectl create namespace cert-manager
+            kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v{{ version }}/cert-manager.crds.yaml
+            helm repo add jetstack https://charts.jetstack.io
+            helm repo update
+            helm install --namespace cert-manager {{ config }} certs-man jetstack/cert-manager
+        else
+            helm upgrade --namespace cert-manager {{ config }} certs-man jetstack/cert-manager
+        fi
+      environment:
+        KUBECONFIG: /etc/kubernetes/admin.conf
+        PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
+      when: true
+    - name: Cluster issuer file
+      copy:
+        dest: /tmp/clusterissuer.yaml
+        mode: 0644
+        content: |
+          apiVersion: cert-manager.io/v1
+          kind: ClusterIssuer
+          metadata:
+            name: letsencrypt-prod
+          spec:
+            acme:
+              email: valtri@civ.zcu.cz
+              server: https://acme-v02.api.letsencrypt.org/directory
+              privateKeySecretRef:
+                name: cluster-issuer-account-key
+              # Add a single challenge solver, HTTP01 using nginx
+              solvers:
+              - http01:
+                  ingress:
+                    class: nginx
+    - name: Cluster issuer
+      command:
+        kubectl apply -f /tmp/clusterissuer.yaml
+      environment:
+        KUBECONFIG: /etc/kubernetes/admin.conf
+        PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
+      when: true
+    # Accounting / monitoring needs
+    - name: Helm repo add prometheus-community
+      shell: |-
+        helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+        helm repo update
+      when: "'prometheus-community' not in ansible_local.helm_repos | map(attribute='name') | list"
+    - name: Prometheus configuration
+      vars:
+        smtp_from: "noreply@{{ groups['ingress'][0] }}"
+        limit_memory_warn: 80
+        limit_cpu_warn: 80
+        limit_disk_warn: 80
+      copy:
+        dest: /tmp/prometheus.yaml
+        mode: 0600
+        content: |
+          alertmanagerFiles:
+            alertmanager.yml:
+              global:
+                smtp_from: "{{ smtp_from }}"
+              receivers:
+                - name: default-receiver
+                  email_configs:
+                    - send_resolved: true
+                      to: valtri@civ.zcu.cz
+                - name: 'null'
+              route:
+                group_by: ['job']
+          kube-state-metrics:
+            metricAnnotationsAllowList:
+              - pods=[hub.jupyter.org/username,egi.eu/primary_group]
+          serverFiles:
+            alerting_rules.yml:
+              groups:
+                - name: limits
+                  rules:
+                    - alert: HighCpuLoad
+                      expr: 100 * (1 - avg by(kubernetes_node) (rate(node_cpu_seconds_total{mode="idle"}[5m]))) > {{ limit_cpu_warn }}
+                      for: 15m
+                      labels:
+                        job: "eosc-{{ site_name }}"
+                      annotations:
+                        summary: "Host high CPU load ({{ '{{ $labels.kubernetes_node }}' }})"
+                        description: "CPU load {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_cpu_warn }}%)"
+                    - alert: OutOfMemory
+                      expr: 100 * (1 - avg by(kubernetes_node) (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) > {{ limit_memory_warn }}
+                      for: 20m
+                      labels:
+                        job: "eosc-{{ site_name }}"
+                      annotations:
+                        summary: "Host out of memory ({{ '{{ $labels.kubernetes_node }}' }})"
+                        description: "Node memory {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_memory_warn }}%)"
+                    - alert: OutOfDiskSpace
+                      expr: 100 * (1 - avg by (kubernetes_node, mountpoint) (node_filesystem_avail_bytes{device=~"/dev/.*"} / node_filesystem_size_bytes))
+                        > {{ limit_disk_warn }}
+                      for: 20m
+                      labels:
+                        job: "eosc-{{ site_name }}"
+                      annotations:
+                        summary: "Host out of disk space ({{ '{{ $labels.kubernetes_node }}' }})"
+                        description: "Disk is almost full {{ '{{ $value | printf \"%.2f\" }}' }}% (limit {{ limit_disk_warn }}%)"
+    - name: Prometheus
+      vars:
+        config: >-
+          --version=25.8.2
+          -f /tmp/prometheus.yaml
+      shell: |-
+        helm status --namespace prometheus prometheus
+        if [ $? -ne 0 ]; then
+            kubectl create ns prometheus >/dev/null 2>&1 || true
+            helm install --namespace prometheus {{ config }} prometheus prometheus-community/prometheus
+        else
+            helm upgrade --namespace prometheus {{ config }} prometheus prometheus-community/prometheus
+        fi
+      environment:
+        KUBECONFIG: /etc/kubernetes/admin.conf
+        PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
+      when: true
+    - name: Grafana configuration
+      copy:
+        dest: /tmp/grafana.yaml
+        mode: 0640
+        content: |
+          ingress:
+            enabled: true
+            annotations:
+              kubernetes.io/ingress.class: "nginx"
+              kubernetes.io/tls-acme: "true"
+            hosts:
+            - "{{ grafana_hostname }}"
+            tls:
+            - hosts:
+              - "{{ grafana_hostname }}"
+              secretName: acme-tls-grafana
+          datasources:
+           datasources.yaml:
+             apiVersion: 1
+             datasources:
+              - name: Prometheus
+                type: prometheus
+                access: Server
+                orgId: 1
+                url: http://prometheus-server.prometheus.svc.cluster.local
+                isDefault: true
+                version: 1
+                editable: false
+          sidecar:
+            dashboards:
+              enabled: true
+    - name: Grafana
+      vars:
+        config: >-
+          --version=7.0.3
+          -f /tmp/grafana.yaml
+      shell: |-
+        helm status --namespace grafana grafana
+        if [ $? -ne 0 ]; then
+            kubectl create ns grafana
+            helm repo add grafana https://grafana.github.io/helm-charts
+            helm repo update
+            helm install --namespace grafana {{ config }} grafana grafana/grafana
+        else
+            helm upgrade --namespace grafana {{ config }} grafana grafana/grafana
+        fi
+      environment:
+        KUBECONFIG: /etc/kubernetes/admin.conf
+        PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
+      when: true
diff --git a/cesnet-mcc/playbooks/templates/etc/exports b/cesnet-mcc/playbooks/templates/etc/exports
index dfc08fc..d00f3ed 100644
--- a/cesnet-mcc/playbooks/templates/etc/exports
+++ b/cesnet-mcc/playbooks/templates/etc/exports
@@ -1,2 +1,2 @@
 # export the NFS directory to all the cluster members
-/exports {% for host in groups['allnodes'] -%}{{ host }}(rw,async,no_root_squash,no_subtree_check) {% endfor -%}
+/exports {% for host in groups['allnodes'] -%}{{ hostvars[host].ansible_default_ipv4.address }}(rw,async,no_root_squash,no_subtree_check) {{ hostvars[host].ansible_default_ipv6.address }}(rw,async,no_root_squash,no_subtree_check) {% endfor -%}
diff --git a/cesnet-mcc/terraform/.gitignore b/cesnet-mcc/terraform/.gitignore
new file mode 100644
index 0000000..e15cf5f
--- /dev/null
+++ b/cesnet-mcc/terraform/.gitignore
@@ -0,0 +1,8 @@
+/.terraform/
+/.terraform.lock.hcl
+/fip.txt
+/hosts.txt
+/inventory.yaml
+/terraform.tfstate
+/terraform.tfstate.backup
+/*-volume.sh
diff --git a/cesnet-mcc/terraform/cloud-init.yaml b/cesnet-mcc/terraform/cloud-init.yaml
new file mode 100644
index 0000000..481d1f2
--- /dev/null
+++ b/cesnet-mcc/terraform/cloud-init.yaml
@@ -0,0 +1,18 @@
+#cloud-config
+---
+package_update: true
+
+package_upgrade: true
+
+users:
+  - default
+  - name: egi
+    gecos: EGI
+    primary_group: egi
+    groups: users
+    shell: /bin/bash
+    sudo: ALL=(ALL) NOPASSWD:ALL
+    ssh_import_id:
+      - gh:enolfc
+      - gh:andrea-manzi
+      - gh:valtri
diff --git a/cesnet-mcc/terraform/terraform.tfvars b/cesnet-mcc/terraform/terraform.tfvars
new file mode 100644
index 0000000..7c8ffc7
--- /dev/null
+++ b/cesnet-mcc/terraform/terraform.tfvars
@@ -0,0 +1,22 @@
+# These need to be defined for things to work
+ip_pool  = "public-muni-147-251-124-GROUP"
+net_name = "group-project-network"
+net6_name = "public-muni-v6-432"
+
+# These may need some adjustment for your provider
+master_cpus = 2    # 2 CPUs to match existing flavours
+master_ram  = 4096
+worker_cpus = 4
+worker_ram  = 8192
+
+# Number of extra workers
+extra_workers = 2
+
+# volumes for docker
+docker_volumes_size = 384
+
+# NFS volume
+nfs_volume_size = 256
+
+# squid volume
+squid_volume_size = 128
diff --git a/cesnet-mcc/terraform/vars.tf b/cesnet-mcc/terraform/vars.tf
new file mode 100644
index 0000000..1ca3b86
--- /dev/null
+++ b/cesnet-mcc/terraform/vars.tf
@@ -0,0 +1,54 @@
+variable "ip_pool" {
+  type        = string
+  description = "The name of the public IP pool for the servers"
+}
+
+variable "net_name" {
+  type        = string
+  description = "The name of the IPv4 network"
+}
+
+variable "net6_name" {
+  type        = string
+  description = "The name of the IPv6 network"
+}
+
+variable "master_cpus" {
+  type        = number
+  description = "Number of CPUs for the master"
+}
+
+variable "master_ram" {
+  type        = number
+  description = "RAM for the master"
+}
+
+variable "worker_cpus" {
+  type        = number
+  description = "Number of CPUs for the worker"
+}
+
+variable "worker_ram" {
+  type        = number
+  description = "RAM for the worker"
+}
+
+variable "extra_workers" {
+  type        = number
+  description = "Number of extra workers to create"
+}
+
+variable "docker_volumes_size" {
+  type        = number
+  description = "Size of volumes for docker (GB)"
+}
+
+variable "nfs_volume_size" {
+  type        = number
+  description = "Size of volume for NFS server (GB)"
+}
+
+variable "squid_volume_size" {
+  type        = number
+  description = "Size of volume for squid proxy, CVMFS cache (GB)"
+}
diff --git a/cesnet-mcc/terraform/versions.tf b/cesnet-mcc/terraform/versions.tf
new file mode 100644
index 0000000..43054dd
--- /dev/null
+++ b/cesnet-mcc/terraform/versions.tf
@@ -0,0 +1,10 @@
+terraform {
+  required_providers {
+    local = "~> 2.0"
+    openstack = {
+      source  = "terraform-provider-openstack/openstack",
+      version = ">= 1.38.0"
+    }
+  }
+  required_version = ">= 0.13"
+}
diff --git a/cesnet-mcc/terraform/vms.tf b/cesnet-mcc/terraform/vms.tf
new file mode 100644
index 0000000..9ad44ab
--- /dev/null
+++ b/cesnet-mcc/terraform/vms.tf
@@ -0,0 +1,303 @@
+provider "openstack" {
+}
+
+locals {
+  nodes = concat([
+    openstack_compute_instance_v2.ingress,
+    openstack_compute_instance_v2.nfs,
+  ], openstack_compute_instance_v2.worker[*])
+  master_ip = replace(openstack_compute_instance_v2.master.network[1].fixed_ip_v6, "/\\[(.*)\\]/", "$1")
+  ingress_ip = replace(openstack_compute_instance_v2.ingress.network[1].fixed_ip_v6, "/\\[(.*)\\]/", "$1")
+  nfs_ip = replace(openstack_compute_instance_v2.nfs.network[1].fixed_ip_v6, "/\\[(.*)\\]/", "$1")
+  worker_ips = [for s in openstack_compute_instance_v2.worker[*].network[1].fixed_ip_v6 : replace(s, "/\\[(.*)\\]/", "$1")]
+}
+
+# Security groups
+
+resource "openstack_compute_secgroup_v2" "ping" {
+  name        = "ping"
+  description = "ICMP for ping"
+
+  rule {
+    from_port   = 8
+    to_port     = 0
+    ip_protocol = "icmp"
+    cidr        = "0.0.0.0/0"
+  }
+  rule {
+    from_port   = 128
+    to_port     = 0
+    ip_protocol = "ipv6-icmp"
+    cidr        = "::/0"
+  }
+}
+
+resource "openstack_compute_secgroup_v2" "ssh" {
+  name        = "ssh"
+  description = "ssh connection"
+
+  rule {
+    from_port   = 22
+    to_port     = 22
+    ip_protocol = "tcp"
+    cidr        = "0.0.0.0/0"
+  }
+  rule {
+    from_port   = 22
+    to_port     = 22
+    ip_protocol = "tcp"
+    cidr        = "::/0"
+  }
+}
+
+resource "openstack_compute_secgroup_v2" "http" {
+  name        = "http"
+  description = "http/https"
+
+  rule {
+    from_port   = 80
+    to_port     = 80
+    ip_protocol = "tcp"
+    cidr        = "0.0.0.0/0"
+  }
+  rule {
+    from_port   = 80
+    to_port     = 80
+    ip_protocol = "tcp"
+    cidr        = "::/0"
+  }
+  rule {
+    from_port   = 443
+    to_port     = 443
+    ip_protocol = "tcp"
+    cidr        = "0.0.0.0/0"
+  }
+  rule {
+    from_port   = 443
+    to_port     = 443
+    ip_protocol = "tcp"
+    cidr        = "::/0"
+  }
+}
+
+resource "openstack_networking_floatingip_v2" "public_ip" {
+  pool = var.ip_pool
+}
+
+data "openstack_images_image_v2" "ubuntu" {
+  name = "ubuntu-jammy-x86_64"
+}
+
+data "openstack_compute_flavor_v2" "master-flavor" {
+  vcpus = var.master_cpus
+  ram   = var.master_ram
+}
+
+data "openstack_compute_flavor_v2" "worker-flavor" {
+  vcpus = var.worker_cpus
+  ram   = var.worker_ram
+}
+
+resource "openstack_compute_instance_v2" "master" {
+  name     = "k8s-master"
+  image_id = data.openstack_images_image_v2.ubuntu.id
+  # 4 cores 4 GB RAM
+  flavor_id       = data.openstack_compute_flavor_v2.master-flavor.id
+  security_groups = ["default", "all"]
+  user_data       = file("cloud-init.yaml")
+  tags            = ["master"]
+  network {
+    name = var.net_name
+  }
+  network {
+    name = var.net6_name
+  }
+}
+
+resource "openstack_compute_instance_v2" "nfs" {
+  name            = "k8s-nfs"
+  image_id        = data.openstack_images_image_v2.ubuntu.id
+  flavor_id       = data.openstack_compute_flavor_v2.worker-flavor.id
+  security_groups = ["default", openstack_compute_secgroup_v2.ping.name, openstack_compute_secgroup_v2.ssh.name]
+  user_data       = file("cloud-init.yaml")
+  tags            = ["worker"]
+  network {
+    name = var.net_name
+  }
+  network {
+    name = var.net6_name
+  }
+}
+
+resource "openstack_compute_instance_v2" "ingress" {
+  name            = "k8s-w-ingress"
+  image_id        = data.openstack_images_image_v2.ubuntu.id
+  flavor_id       = data.openstack_compute_flavor_v2.worker-flavor.id
+  security_groups = ["default", openstack_compute_secgroup_v2.ping.name, openstack_compute_secgroup_v2.ssh.name, openstack_compute_secgroup_v2.http.name]
+  user_data       = file("cloud-init.yaml")
+  tags            = ["worker"]
+  network {
+    name = var.net_name
+  }
+  network {
+    name = var.net6_name
+  }
+}
+
+resource "openstack_compute_instance_v2" "worker" {
+  count           = var.extra_workers
+  name            = "k8s-worker-${count.index}"
+  image_id        = data.openstack_images_image_v2.ubuntu.id
+  flavor_id       = data.openstack_compute_flavor_v2.worker-flavor.id
+  security_groups = ["default", openstack_compute_secgroup_v2.ping.name, openstack_compute_secgroup_v2.ssh.name]
+  user_data       = file("cloud-init.yaml")
+  tags            = ["worker"]
+  network {
+    name = var.net_name
+  }
+  network {
+    name = var.net6_name
+  }
+}
+
+resource "openstack_compute_floatingip_associate_v2" "fip" {
+  floating_ip = openstack_networking_floatingip_v2.public_ip.address
+  instance_id = openstack_compute_instance_v2.ingress.id
+}
+
+resource "openstack_blockstorage_volume_v3" "nfs-volume" {
+  name = "nfs"
+  size = var.nfs_volume_size
+}
+
+resource "openstack_compute_volume_attach_v2" "nfs-volume-attach" {
+  instance_id = openstack_compute_instance_v2.nfs.id
+  volume_id   = openstack_blockstorage_volume_v3.nfs-volume.id
+}
+
+resource "local_file" "volume-script" {
+  filename        = "nfs-volume.sh"
+  file_permission = "0755"
+  content         = <<EOT
+#! /bin/bash -xe
+if ! dpkg-query -s xfsprogs >/dev/null 2>&1; then
+	apt-get update
+	apt-get install -y xfsprogs
+fi
+device="${openstack_compute_volume_attach_v2.nfs-volume-attach.device}"
+mkfs.xfs -L NFS "$device" || true
+grep -q 'LABEL=NFS' /etc/fstab || /bin/echo -e "LABEL=NFS\t/exports\txfs\tdefaults,uquota,pquota\t0\t0" | tee -a /etc/fstab
+mkdir /exports 2>/dev/null || true
+mount -a
+EOT
+}
+
+resource "openstack_blockstorage_volume_v3" "docker-volume" {
+  count = var.extra_workers + 2
+  name  = format("docker-%s", local.nodes[count.index].name)
+  size  = var.docker_volumes_size
+}
+
+resource "openstack_compute_volume_attach_v2" "docker-volume-attach" {
+  count       = var.extra_workers + 2
+  instance_id = local.nodes[count.index].id
+  volume_id   = openstack_blockstorage_volume_v3.docker-volume[count.index].id
+}
+
+resource "local_file" "docker-volume-script" {
+  filename        = "docker-volume.sh"
+  file_permission = "0755"
+  content         = <<EOT
+#! /bin/bash -xe
+volumes="${join("\n", [for n, d in zipmap(tolist(local.nodes[*].name), tolist(openstack_compute_volume_attach_v2.docker-volume-attach[*].device)) : format("%s:%s", n, d)])}"
+volume=$(echo "$volumes" | grep "$(hostname):")
+device=$(echo "$volume" | cut -d: -f2)
+if ! dumpe2fs -h "$device" >/dev/null 2>&1; then
+	mkfs.ext4 -L DOCKER "$device"
+	grep -q 'LABEL=DOCKER' /etc/fstab || /bin/echo -e "LABEL=DOCKER\t/var/lib/docker/overlay2\text4\tdefaults\t0\t0" | tee -a /etc/fstab
+	mkdir -p /var/lib/docker/overlay2 2>/dev/null || true
+	service docker stop >/dev/null 2>&1 || true
+	sleep 10
+	mount "$device" /mnt
+	mv /var/lib/docker/overlay2/* /mnt >/dev/null 2>&1 || true
+	umount /mnt
+	mount -a
+fi
+EOT
+}
+
+resource "openstack_blockstorage_volume_v3" "squid-volume" {
+  name = "squid"
+  size = var.squid_volume_size
+}
+
+resource "openstack_compute_volume_attach_v2" "squid-volume-attach" {
+  instance_id = openstack_compute_instance_v2.ingress.id
+  volume_id   = openstack_blockstorage_volume_v3.squid-volume.id
+}
+
+resource "local_file" "squid-volume-script" {
+  filename        = "squid-volume.sh"
+  file_permission = "0755"
+  content         = <<EOT
+#! /bin/bash -xe
+device="${openstack_compute_volume_attach_v2.squid-volume-attach.device}"
+if ! dumpe2fs -h "$device" >/dev/null 2>&1; then
+	mkfs.ext4 -L SQUID "$device"
+fi
+grep -q 'LABEL=SQUID' /etc/fstab || /bin/echo -e "LABEL=SQUID\t/var/spool/squid\text4\tdefaults\t0\t0" | tee -a /etc/fstab
+mkdir /var/spool/squid 2>/dev/null || true
+mount -a
+EOT
+}
+resource "local_file" "inventory" {
+  filename        = "inventory.yaml"
+  file_permission = "0644"
+  content         = <<EOT
+---
+fip:
+  hosts:
+    ${openstack_networking_floatingip_v2.public_ip.address}:
+
+master:
+  hosts:
+    ${local.master_ip}:
+
+ingress:
+  hosts:
+    ${local.ingress_ip}:
+
+nfs:
+  hosts:
+    ${local.nfs_ip}:
+
+worker:
+  hosts:
+    ${join(":\n    ", local.worker_ips)}:
+
+# using public IP of kube_server for ansible delegate_to
+kube_server:
+  hosts:
+    ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4}:
+      ansible_host: ${local.master_ip}
+EOT
+}
+
+resource "local_file" "fip" {
+  filename        = "fip.txt"
+  file_permission = "0644"
+  content         = <<EOT
+${openstack_networking_floatingip_v2.public_ip.address}
+EOT
+}
+
+resource "local_file" "hosts" {
+  filename        = "hosts.txt"
+  file_permission = "0644"
+  content         = <<EOT
+${local.master_ip}
+${local.ingress_ip}
+${local.nfs_ip}
+${join("\n", local.worker_ips)}
+EOT
+}
-- 
GitLab