From 6d51d8e7c4e35ce7a68e2d414a4d8ccfe4432f1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz> Date: Thu, 18 Jul 2024 15:18:31 +0000 Subject: [PATCH] Staging deployment at SafeSpring + cleanup * refactoring - symlinks to common/ directory * inital SafeSpring terraform setup --- cesnet-central/extra | 1 + cesnet-central/playbooks/public_keys | 1 + cesnet-mcc/deploy.sh | 7 +- cesnet-mcc/extra | 2 +- cesnet-mcc/playbooks/public_keys | 2 +- cesnet-mcc/terraform/vms.tf | 11 +- {cesnet-central => common}/extra/README.md | 0 .../extra/jupyter-dashboard.yaml | 0 .../extra/k8s-dashboard.yaml | 0 .../playbooks/public_keys/andrea-manzi | 0 .../playbooks/public_keys/enolfc | 0 .../playbooks/public_keys/jhradil | 0 .../playbooks/public_keys/pailozian | 0 .../playbooks/public_keys/pospisilp | 0 .../playbooks/public_keys/sustr | 0 .../playbooks/public_keys/valtri | 0 {staging => common}/terraform/.gitignore | 0 {staging => common}/terraform/cloud-init.yaml | 0 {staging => common}/terraform/versions.tf | 0 staging/extra | 1 - staging/playbooks/public_keys | 1 - {staging => staging1}/ansible.cfg | 0 {staging => staging1}/deploy.sh | 0 staging1/extra | 1 + {staging => staging1}/inventory/1-psnc.yaml | 0 {staging => staging1}/inventory/99-all.yaml | 3 +- {staging => staging1}/playbooks/cvmfs.yaml | 0 .../playbooks/files/calico.yaml | 0 {staging => staging1}/playbooks/files/etc | 0 {staging => staging1}/playbooks/files/usr | 0 {staging => staging1}/playbooks/k8s.yaml | 0 staging1/playbooks/public_keys | 1 + {staging => staging1}/playbooks/squid.yaml | 0 .../playbooks/templates/etc/exports | 0 .../playbooks/templates/etc/mailutils.conf | 0 .../etc/squid/conf.d/allcluster.conf | 0 {staging => staging1}/playbooks/upgrade.yaml | 0 staging1/terraform/.gitignore | 1 + staging1/terraform/cloud-init.yaml | 1 + {staging => staging1}/terraform/network.tf | 0 .../terraform/terraform.tfvars | 0 {staging => staging1}/terraform/vars.tf | 0 staging1/terraform/versions.tf | 1 + {staging => staging1}/terraform/vms.tf | 0 staging2/ansible.cfg | 5 + staging2/deploy.sh | 52 +++ staging2/extra | 1 + staging2/inventory/99-all.yaml | 18 + staging2/terraform/.gitignore | 8 + staging2/terraform/cloud-init.yaml | 1 + staging2/terraform/terraform.tfvars | 30 ++ staging2/terraform/vars.tf | 64 ++++ staging2/terraform/versions.tf | 1 + staging2/terraform/vms.tf | 339 ++++++++++++++++++ testing/extra | 2 +- testing/playbooks/public_keys | 2 +- 56 files changed, 543 insertions(+), 14 deletions(-) create mode 120000 cesnet-central/extra create mode 120000 cesnet-central/playbooks/public_keys rename {cesnet-central => common}/extra/README.md (100%) rename {cesnet-central => common}/extra/jupyter-dashboard.yaml (100%) rename {cesnet-central => common}/extra/k8s-dashboard.yaml (100%) rename {cesnet-central => common}/playbooks/public_keys/andrea-manzi (100%) rename {cesnet-central => common}/playbooks/public_keys/enolfc (100%) rename {cesnet-central => common}/playbooks/public_keys/jhradil (100%) rename {cesnet-central => common}/playbooks/public_keys/pailozian (100%) rename {cesnet-central => common}/playbooks/public_keys/pospisilp (100%) rename {cesnet-central => common}/playbooks/public_keys/sustr (100%) rename {cesnet-central => common}/playbooks/public_keys/valtri (100%) rename {staging => common}/terraform/.gitignore (100%) rename {staging => common}/terraform/cloud-init.yaml (100%) rename {staging => common}/terraform/versions.tf (100%) delete mode 120000 staging/extra delete mode 120000 staging/playbooks/public_keys rename {staging => staging1}/ansible.cfg (100%) rename {staging => staging1}/deploy.sh (100%) create mode 120000 staging1/extra rename {staging => staging1}/inventory/1-psnc.yaml (100%) rename {staging => staging1}/inventory/99-all.yaml (81%) rename {staging => staging1}/playbooks/cvmfs.yaml (100%) rename {staging => staging1}/playbooks/files/calico.yaml (100%) rename {staging => staging1}/playbooks/files/etc (100%) rename {staging => staging1}/playbooks/files/usr (100%) rename {staging => staging1}/playbooks/k8s.yaml (100%) create mode 120000 staging1/playbooks/public_keys rename {staging => staging1}/playbooks/squid.yaml (100%) rename {staging => staging1}/playbooks/templates/etc/exports (100%) rename {staging => staging1}/playbooks/templates/etc/mailutils.conf (100%) rename {staging => staging1}/playbooks/templates/etc/squid/conf.d/allcluster.conf (100%) rename {staging => staging1}/playbooks/upgrade.yaml (100%) create mode 120000 staging1/terraform/.gitignore create mode 120000 staging1/terraform/cloud-init.yaml rename {staging => staging1}/terraform/network.tf (100%) rename {staging => staging1}/terraform/terraform.tfvars (100%) rename {staging => staging1}/terraform/vars.tf (100%) create mode 120000 staging1/terraform/versions.tf rename {staging => staging1}/terraform/vms.tf (100%) create mode 100644 staging2/ansible.cfg create mode 100755 staging2/deploy.sh create mode 120000 staging2/extra create mode 100644 staging2/inventory/99-all.yaml create mode 100644 staging2/terraform/.gitignore create mode 120000 staging2/terraform/cloud-init.yaml create mode 100644 staging2/terraform/terraform.tfvars create mode 100644 staging2/terraform/vars.tf create mode 120000 staging2/terraform/versions.tf create mode 100644 staging2/terraform/vms.tf diff --git a/cesnet-central/extra b/cesnet-central/extra new file mode 120000 index 0000000..440decb --- /dev/null +++ b/cesnet-central/extra @@ -0,0 +1 @@ +../common/extra \ No newline at end of file diff --git a/cesnet-central/playbooks/public_keys b/cesnet-central/playbooks/public_keys new file mode 120000 index 0000000..6ef4918 --- /dev/null +++ b/cesnet-central/playbooks/public_keys @@ -0,0 +1 @@ +../../common/playbooks/public_keys \ No newline at end of file diff --git a/cesnet-mcc/deploy.sh b/cesnet-mcc/deploy.sh index 6870568..2d30999 100755 --- a/cesnet-mcc/deploy.sh +++ b/cesnet-mcc/deploy.sh @@ -20,6 +20,8 @@ for auth in $FEDCLOUD_DYNAMIC_DNS; do curl -i -X GET -u "$auth" https://nsupdate.fedcloud.eu/nic/update?myip="$ip" done eval "$shellstate" +echo "Terraform finished. Continue? (CTRL-C to quit)" +read -r _ # wait for ping and ssh for ip in $(cat terraform/hosts.txt); do @@ -39,11 +41,12 @@ ansible -m copy -a 'src=terraform/nfs-volume.sh dest=/root/ mode=preserve' nfs ansible -m command -a '/root/nfs-volume.sh' nfs ansible -m copy -a 'src=terraform/squid-volume.sh dest=/root/ mode=preserve' 'ingress[0]' ansible -m command -a '/root/squid-volume.sh' 'ingress[0]' -ansible -m copy -a 'src=terraform/docker-volume.sh dest=/root/ mode=preserve' 'ingress nfs worker' -ansible -m command -a '/root/docker-volume.sh' 'ingress nfs worker' # k8s + notebooks ansible-playbook playbooks/k8s.yaml # ansible-playbook playbooks/squid.yaml # ansible-playbook playbooks/cvmfs.yaml while ansible -i ./inventory -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -v ' Running '; do sleep 5; done +# docker runtime directory after Kubernetes deployment (problem with unmounts) +ansible -m copy -a 'src=terraform/docker-volume.sh dest=/root/ mode=preserve' 'ingress nfs worker gpu' +ansible -m command -a '/root/docker-volume.sh' 'ingress nfs worker gpu' diff --git a/cesnet-mcc/extra b/cesnet-mcc/extra index e5f031b..440decb 120000 --- a/cesnet-mcc/extra +++ b/cesnet-mcc/extra @@ -1 +1 @@ -../cesnet-central/extra \ No newline at end of file +../common/extra \ No newline at end of file diff --git a/cesnet-mcc/playbooks/public_keys b/cesnet-mcc/playbooks/public_keys index 3022cbb..6ef4918 120000 --- a/cesnet-mcc/playbooks/public_keys +++ b/cesnet-mcc/playbooks/public_keys @@ -1 +1 @@ -../../cesnet-central/playbooks/public_keys \ No newline at end of file +../../common/playbooks/public_keys \ No newline at end of file diff --git a/cesnet-mcc/terraform/vms.tf b/cesnet-mcc/terraform/vms.tf index 2f8d2e9..deabb55 100644 --- a/cesnet-mcc/terraform/vms.tf +++ b/cesnet-mcc/terraform/vms.tf @@ -1,6 +1,3 @@ -provider "openstack" { -} - locals { nodes = concat([ openstack_compute_instance_v2.ingress, @@ -217,12 +214,15 @@ if ! dumpe2fs -h "$device" >/dev/null 2>&1; then mkfs.ext4 -L DOCKER "$device" grep -q 'LABEL=DOCKER' /etc/fstab || /bin/echo -e "LABEL=DOCKER\t/var/lib/docker/overlay2\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab mkdir -p /var/lib/docker/overlay2 2>/dev/null || true - service docker stop >/dev/null 2>&1 || true + systemctl stop docker kubelet >/dev/null 2>&1 || true sleep 10 + systemctl stop docker kubelet >/dev/null 2>&1 || true + umount /var/lib/docker/overlay2 2>&1 || true mount "$device" /mnt mv /var/lib/docker/overlay2/* /mnt >/dev/null 2>&1 || true umount /mnt mount -a + systemctl start docker kubelet >/dev/null 2>&1 || true fi EOT } @@ -251,6 +251,7 @@ mkdir /var/spool/squid 2>/dev/null || true mount -a EOT } + resource "local_file" "inventory" { filename = "inventory.yaml" file_permission = "0644" @@ -276,7 +277,7 @@ nfs: worker: hosts: - ${join(":\n ", local.worker_ips)}: + ${join("\n ", [for s in local.worker_ips: "${s}:"])} # using public IP of kube_server for ansible delegate_to kube_server: diff --git a/cesnet-central/extra/README.md b/common/extra/README.md similarity index 100% rename from cesnet-central/extra/README.md rename to common/extra/README.md diff --git a/cesnet-central/extra/jupyter-dashboard.yaml b/common/extra/jupyter-dashboard.yaml similarity index 100% rename from cesnet-central/extra/jupyter-dashboard.yaml rename to common/extra/jupyter-dashboard.yaml diff --git a/cesnet-central/extra/k8s-dashboard.yaml b/common/extra/k8s-dashboard.yaml similarity index 100% rename from cesnet-central/extra/k8s-dashboard.yaml rename to common/extra/k8s-dashboard.yaml diff --git a/cesnet-central/playbooks/public_keys/andrea-manzi b/common/playbooks/public_keys/andrea-manzi similarity index 100% rename from cesnet-central/playbooks/public_keys/andrea-manzi rename to common/playbooks/public_keys/andrea-manzi diff --git a/cesnet-central/playbooks/public_keys/enolfc b/common/playbooks/public_keys/enolfc similarity index 100% rename from cesnet-central/playbooks/public_keys/enolfc rename to common/playbooks/public_keys/enolfc diff --git a/cesnet-central/playbooks/public_keys/jhradil b/common/playbooks/public_keys/jhradil similarity index 100% rename from cesnet-central/playbooks/public_keys/jhradil rename to common/playbooks/public_keys/jhradil diff --git a/cesnet-central/playbooks/public_keys/pailozian b/common/playbooks/public_keys/pailozian similarity index 100% rename from cesnet-central/playbooks/public_keys/pailozian rename to common/playbooks/public_keys/pailozian diff --git a/cesnet-central/playbooks/public_keys/pospisilp b/common/playbooks/public_keys/pospisilp similarity index 100% rename from cesnet-central/playbooks/public_keys/pospisilp rename to common/playbooks/public_keys/pospisilp diff --git a/cesnet-central/playbooks/public_keys/sustr b/common/playbooks/public_keys/sustr similarity index 100% rename from cesnet-central/playbooks/public_keys/sustr rename to common/playbooks/public_keys/sustr diff --git a/cesnet-central/playbooks/public_keys/valtri b/common/playbooks/public_keys/valtri similarity index 100% rename from cesnet-central/playbooks/public_keys/valtri rename to common/playbooks/public_keys/valtri diff --git a/staging/terraform/.gitignore b/common/terraform/.gitignore similarity index 100% rename from staging/terraform/.gitignore rename to common/terraform/.gitignore diff --git a/staging/terraform/cloud-init.yaml b/common/terraform/cloud-init.yaml similarity index 100% rename from staging/terraform/cloud-init.yaml rename to common/terraform/cloud-init.yaml diff --git a/staging/terraform/versions.tf b/common/terraform/versions.tf similarity index 100% rename from staging/terraform/versions.tf rename to common/terraform/versions.tf diff --git a/staging/extra b/staging/extra deleted file mode 120000 index e5f031b..0000000 --- a/staging/extra +++ /dev/null @@ -1 +0,0 @@ -../cesnet-central/extra \ No newline at end of file diff --git a/staging/playbooks/public_keys b/staging/playbooks/public_keys deleted file mode 120000 index 3022cbb..0000000 --- a/staging/playbooks/public_keys +++ /dev/null @@ -1 +0,0 @@ -../../cesnet-central/playbooks/public_keys \ No newline at end of file diff --git a/staging/ansible.cfg b/staging1/ansible.cfg similarity index 100% rename from staging/ansible.cfg rename to staging1/ansible.cfg diff --git a/staging/deploy.sh b/staging1/deploy.sh similarity index 100% rename from staging/deploy.sh rename to staging1/deploy.sh diff --git a/staging1/extra b/staging1/extra new file mode 120000 index 0000000..440decb --- /dev/null +++ b/staging1/extra @@ -0,0 +1 @@ +../common/extra \ No newline at end of file diff --git a/staging/inventory/1-psnc.yaml b/staging1/inventory/1-psnc.yaml similarity index 100% rename from staging/inventory/1-psnc.yaml rename to staging1/inventory/1-psnc.yaml diff --git a/staging/inventory/99-all.yaml b/staging1/inventory/99-all.yaml similarity index 81% rename from staging/inventory/99-all.yaml rename to staging1/inventory/99-all.yaml index 55b2400..835c93c 100644 --- a/staging/inventory/99-all.yaml +++ b/staging1/inventory/99-all.yaml @@ -15,4 +15,5 @@ all: site_name: psnc-staging vault_mount_point: secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-staging - grafana_hostname: grafana-stg.cloud.cesnet.cz + notebooks_hostname: notebooks-stg1.cloud.cesnet.cz + grafana_hostname: grafana-stg1.cloud.cesnet.cz diff --git a/staging/playbooks/cvmfs.yaml b/staging1/playbooks/cvmfs.yaml similarity index 100% rename from staging/playbooks/cvmfs.yaml rename to staging1/playbooks/cvmfs.yaml diff --git a/staging/playbooks/files/calico.yaml b/staging1/playbooks/files/calico.yaml similarity index 100% rename from staging/playbooks/files/calico.yaml rename to staging1/playbooks/files/calico.yaml diff --git a/staging/playbooks/files/etc b/staging1/playbooks/files/etc similarity index 100% rename from staging/playbooks/files/etc rename to staging1/playbooks/files/etc diff --git a/staging/playbooks/files/usr b/staging1/playbooks/files/usr similarity index 100% rename from staging/playbooks/files/usr rename to staging1/playbooks/files/usr diff --git a/staging/playbooks/k8s.yaml b/staging1/playbooks/k8s.yaml similarity index 100% rename from staging/playbooks/k8s.yaml rename to staging1/playbooks/k8s.yaml diff --git a/staging1/playbooks/public_keys b/staging1/playbooks/public_keys new file mode 120000 index 0000000..6ef4918 --- /dev/null +++ b/staging1/playbooks/public_keys @@ -0,0 +1 @@ +../../common/playbooks/public_keys \ No newline at end of file diff --git a/staging/playbooks/squid.yaml b/staging1/playbooks/squid.yaml similarity index 100% rename from staging/playbooks/squid.yaml rename to staging1/playbooks/squid.yaml diff --git a/staging/playbooks/templates/etc/exports b/staging1/playbooks/templates/etc/exports similarity index 100% rename from staging/playbooks/templates/etc/exports rename to staging1/playbooks/templates/etc/exports diff --git a/staging/playbooks/templates/etc/mailutils.conf b/staging1/playbooks/templates/etc/mailutils.conf similarity index 100% rename from staging/playbooks/templates/etc/mailutils.conf rename to staging1/playbooks/templates/etc/mailutils.conf diff --git a/staging/playbooks/templates/etc/squid/conf.d/allcluster.conf b/staging1/playbooks/templates/etc/squid/conf.d/allcluster.conf similarity index 100% rename from staging/playbooks/templates/etc/squid/conf.d/allcluster.conf rename to staging1/playbooks/templates/etc/squid/conf.d/allcluster.conf diff --git a/staging/playbooks/upgrade.yaml b/staging1/playbooks/upgrade.yaml similarity index 100% rename from staging/playbooks/upgrade.yaml rename to staging1/playbooks/upgrade.yaml diff --git a/staging1/terraform/.gitignore b/staging1/terraform/.gitignore new file mode 120000 index 0000000..a809396 --- /dev/null +++ b/staging1/terraform/.gitignore @@ -0,0 +1 @@ +../../common/terraform/.gitignore \ No newline at end of file diff --git a/staging1/terraform/cloud-init.yaml b/staging1/terraform/cloud-init.yaml new file mode 120000 index 0000000..f315687 --- /dev/null +++ b/staging1/terraform/cloud-init.yaml @@ -0,0 +1 @@ +../../common/terraform/cloud-init.yaml \ No newline at end of file diff --git a/staging/terraform/network.tf b/staging1/terraform/network.tf similarity index 100% rename from staging/terraform/network.tf rename to staging1/terraform/network.tf diff --git a/staging/terraform/terraform.tfvars b/staging1/terraform/terraform.tfvars similarity index 100% rename from staging/terraform/terraform.tfvars rename to staging1/terraform/terraform.tfvars diff --git a/staging/terraform/vars.tf b/staging1/terraform/vars.tf similarity index 100% rename from staging/terraform/vars.tf rename to staging1/terraform/vars.tf diff --git a/staging1/terraform/versions.tf b/staging1/terraform/versions.tf new file mode 120000 index 0000000..b4eea0e --- /dev/null +++ b/staging1/terraform/versions.tf @@ -0,0 +1 @@ +../../common/terraform/versions.tf \ No newline at end of file diff --git a/staging/terraform/vms.tf b/staging1/terraform/vms.tf similarity index 100% rename from staging/terraform/vms.tf rename to staging1/terraform/vms.tf diff --git a/staging2/ansible.cfg b/staging2/ansible.cfg new file mode 100644 index 0000000..c3a73be --- /dev/null +++ b/staging2/ansible.cfg @@ -0,0 +1,5 @@ +[defaults] +inventory=inventory + +[diff] +always=true diff --git a/staging2/deploy.sh b/staging2/deploy.sh new file mode 100755 index 0000000..b871dc9 --- /dev/null +++ b/staging2/deploy.sh @@ -0,0 +1,52 @@ +#! /bin/bash -xe + +# +# Deploy EOSC staging instance +# + +cd terraform && terraform init && terraform apply +cd - +cp -pv terraform/inventory.yaml inventory/1-safespring.yaml + +# dynamic DNS +ip="$(head -n 1 < terraform/fip.txt)" +# shellstate=$(shopt -po xtrace) +# set +o xtrace +# # https://nsupdate.fedcloud.eu +# vault_prefix=secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-staging +# FEDCLOUD_DYNAMIC_DNS=$(vault read -field data $vault_prefix/FEDCLOUD_DYNAMIC_DNS | grep ^map | head -n 1 | sed 's/map\[\(.*\)\]/\1/') +# for auth in $FEDCLOUD_DYNAMIC_DNS; do +# echo "curl -i -X GET -u $(echo "$auth" | cut -d: -f1):XXX https://nsupdate.fedcloud.eu/nic/update?myip=$ip" +# curl -i -X GET -u "$auth" https://nsupdate.fedcloud.eu/nic/update?myip="$ip" +# done +# eval "$shellstate" +echo "Terraform finished. Continue? (CTRL-C to quit)" +read -r _ + +# wait for ping and ssh +for ip in $(cat terraform/hosts.txt); do + while ! ping -c 1 "$ip"; do sleep 5; done + ssh-keygen -R "$ip" + while ! ssh egi@"$ip" -o ConnectTimeout=10 -o PreferredAuthentications=publickey -o StrictHostKeyChecking=no :; do sleep 10; done +done + +# check ssh access +ansible -m command -a 'uname -a' allnodes + +# wait cloud-init +ansible -m shell -a 'while ! test -f /var/lib/cloud/instance/boot-finished; do sleep 2; done' allnodes + +# setup volumes +ansible -m copy -a 'src=terraform/nfs-volume.sh dest=/root/ mode=preserve' nfs +ansible -m command -a '/root/nfs-volume.sh' nfs +ansible -m copy -a 'src=terraform/squid-volume.sh dest=/root/ mode=preserve' 'ingress[0]' +ansible -m command -a '/root/squid-volume.sh' 'ingress[0]' + +# k8s + notebooks +ansible-playbook playbooks/k8s.yaml +# ansible-playbook playbooks/squid.yaml +# ansible-playbook playbooks/cvmfs.yaml +while ansible -i ./inventory -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -v ' Running '; do sleep 5; done +# docker runtime directory after Kubernetes deployment (problem with unmounts) +ansible -m copy -a 'src=terraform/docker-volume.sh dest=/root/ mode=preserve' 'ingress nfs worker gpu' +ansible -m command -a '/root/docker-volume.sh' 'ingress nfs worker gpu' diff --git a/staging2/extra b/staging2/extra new file mode 120000 index 0000000..440decb --- /dev/null +++ b/staging2/extra @@ -0,0 +1 @@ +../common/extra \ No newline at end of file diff --git a/staging2/inventory/99-all.yaml b/staging2/inventory/99-all.yaml new file mode 100644 index 0000000..75f4d04 --- /dev/null +++ b/staging2/inventory/99-all.yaml @@ -0,0 +1,18 @@ +--- +allnodes: + children: + master: + ingress: + nfs: + worker: + +all: + vars: + ansible_become: yes + ansible_user: egi + + site_name: safespring-staging + vault_mount_point: secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-staging + + notebooks_hostname: notebooks-stg2.cloud.cesnet.cz + grafana_hostname: grafana-stg2.cloud.cesnet.cz diff --git a/staging2/terraform/.gitignore b/staging2/terraform/.gitignore new file mode 100644 index 0000000..e15cf5f --- /dev/null +++ b/staging2/terraform/.gitignore @@ -0,0 +1,8 @@ +/.terraform/ +/.terraform.lock.hcl +/fip.txt +/hosts.txt +/inventory.yaml +/terraform.tfstate +/terraform.tfstate.backup +/*-volume.sh diff --git a/staging2/terraform/cloud-init.yaml b/staging2/terraform/cloud-init.yaml new file mode 120000 index 0000000..58c76c6 --- /dev/null +++ b/staging2/terraform/cloud-init.yaml @@ -0,0 +1 @@ +../../staging1/terraform/cloud-init.yaml \ No newline at end of file diff --git a/staging2/terraform/terraform.tfvars b/staging2/terraform/terraform.tfvars new file mode 100644 index 0000000..3462a54 --- /dev/null +++ b/staging2/terraform/terraform.tfvars @@ -0,0 +1,30 @@ +# These need to be defined for things to work +ip_pool = "PSNC-PUB1-EDU" +net_name = "public" +site_name = "staging2" + +# These may need some adjustment for your provider +master_cpus = 4 +master_ram = 8192 +worker_cpus = 4 +worker_ram = 8192 +# XXX: replace this for GPU flavor, once available +gpu_flavor_name = "l2.c2r4.100" + +# Number of extra workers +extra_workers = 1 + +# Number of GPU workers +gpu_workers = 0 + +# volumes for docker +docker_volumes_size = 384 + +# NFS volume +nfs_volume_size = 256 + +# scratch volume +scratch_volumes_size = 128 + +# squid volume +squid_volume_size = 128 diff --git a/staging2/terraform/vars.tf b/staging2/terraform/vars.tf new file mode 100644 index 0000000..797ce3f --- /dev/null +++ b/staging2/terraform/vars.tf @@ -0,0 +1,64 @@ +variable "ip_pool" { + type = string + description = "The name of the public IP pool for the servers" +} + +variable "site_name" { + type = string + description = "Site identifier for internal host names" +} + +variable "gpu_flavor_name" { + type = string + description = "Name of the GPU flavor" +} + +variable "master_cpus" { + type = number + description = "Number of CPUs for the master" +} + +variable "master_ram" { + type = number + description = "RAM for the master" +} + +variable "worker_cpus" { + type = number + description = "Number of CPUs for the worker" +} + +variable "worker_ram" { + type = number + description = "RAM for the worker" +} + +variable "extra_workers" { + type = number + description = "Number of extra workers to create" +} + +variable "gpu_workers" { + type = number + description = "Number of GPU workers to create" +} + +variable "docker_volumes_size" { + type = number + description = "Size of volumes for docker (GB)" +} + +variable "nfs_volume_size" { + type = number + description = "Size of volume for NFS server (GB)" +} + +variable "scratch_volumes_size" { + type = number + description = "Size of volume for ephemeral volumes (GB)" +} + +variable "squid_volume_size" { + type = number + description = "Size of volume for squid proxy, CVMFS cache (GB)" +} diff --git a/staging2/terraform/versions.tf b/staging2/terraform/versions.tf new file mode 120000 index 0000000..f2cc6c5 --- /dev/null +++ b/staging2/terraform/versions.tf @@ -0,0 +1 @@ +../../staging1/terraform/versions.tf \ No newline at end of file diff --git a/staging2/terraform/vms.tf b/staging2/terraform/vms.tf new file mode 100644 index 0000000..bed0b9f --- /dev/null +++ b/staging2/terraform/vms.tf @@ -0,0 +1,339 @@ +locals { + nodes = concat([ + openstack_compute_instance_v2.ingress, + openstack_compute_instance_v2.nfs, + ], openstack_compute_instance_v2.worker[*], openstack_compute_instance_v2.gpu[*]) + master_ip = replace(openstack_compute_instance_v2.master.network[0].fixed_ip_v6, "/\\[(.*)\\]/", "$1") + ingress_ip = replace(openstack_compute_instance_v2.ingress.network[0].fixed_ip_v6, "/\\[(.*)\\]/", "$1") + nfs_ip = replace(openstack_compute_instance_v2.nfs.network[0].fixed_ip_v6, "/\\[(.*)\\]/", "$1") + worker_ips = [for s in openstack_compute_instance_v2.worker[*].network[0].fixed_ip_v6 : replace(s, "/\\[(.*)\\]/", "$1")] + gpu_ips = [for s in openstack_compute_instance_v2.gpu[*].network[0].fixed_ip_v6 : replace(s, "/\\[(.*)\\]/", "$1")] +} + +# Security groups + +resource "openstack_networking_secgroup_v2" "ping" { + name = "ping" + description = "ICMP for ping" +} + +resource "openstack_networking_secgroup_v2" "ssh" { + name = "ssh" + description = "ssh connection" +} + +resource "openstack_networking_secgroup_v2" "http" { + name = "http" + description = "http/https" +} + +resource "openstack_networking_secgroup_rule_v2" "ping4" { + direction = "ingress" + ethertype = "IPv4" + port_range_min = 8 + port_range_max = 0 + protocol = "icmp" + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.ping.id +} + +resource "openstack_networking_secgroup_rule_v2" "ping6" { + direction = "ingress" + ethertype = "IPv6" + port_range_min = 128 + port_range_max = 0 + protocol = "icmp" + remote_ip_prefix = "::/0" + security_group_id = openstack_networking_secgroup_v2.ping.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh4" { + direction = "ingress" + ethertype = "IPv4" + port_range_min = 22 + port_range_max = 22 + protocol = "tcp" + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.ssh.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh6" { + direction = "ingress" + ethertype = "IPv6" + port_range_min = 22 + port_range_max = 22 + protocol = "tcp" + remote_ip_prefix = "::/0" + security_group_id = openstack_networking_secgroup_v2.ssh.id +} + +resource "openstack_networking_secgroup_rule_v2" "http4" { + direction = "ingress" + ethertype = "IPv4" + port_range_min = 80 + port_range_max = 80 + protocol = "tcp" + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.http.id +} + +resource "openstack_networking_secgroup_rule_v2" "http6" { + direction = "ingress" + ethertype = "IPv6" + port_range_min = 80 + port_range_max = 80 + protocol = "tcp" + remote_ip_prefix = "::/0" + security_group_id = openstack_networking_secgroup_v2.http.id +} + +resource "openstack_networking_secgroup_rule_v2" "https4" { + direction = "ingress" + ethertype = "IPv4" + port_range_min = 443 + port_range_max = 443 + protocol = "tcp" + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.http.id +} + +resource "openstack_networking_secgroup_rule_v2" "https6" { + direction = "ingress" + ethertype = "IPv6" + port_range_min = 443 + port_range_max = 443 + protocol = "tcp" + remote_ip_prefix = "::/0" + security_group_id = openstack_networking_secgroup_v2.http.id +} + +resource "openstack_networking_floatingip_v2" "public_ip" { + pool = var.ip_pool +} + +data "openstack_images_image_v2" "ubuntu" { + name = "ubuntu-22.04" +} + +data "openstack_compute_flavor_v2" "master-flavor" { + vcpus = var.master_cpus + ram = var.master_ram +} + +data "openstack_compute_flavor_v2" "worker-flavor" { + vcpus = var.worker_cpus + ram = var.worker_ram +} + +data "openstack_compute_flavor_v2" "gpu-flavor" { + name = var.gpu_flavor_name +} + +resource "openstack_compute_instance_v2" "master" { + name = "k8s-${var.site_name}-master" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.master-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["master"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "nfs" { + name = "k8s-${var.site_name}-nfs" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "ingress" { + name = "k8s-${var.site_name}-w-ingress" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name, openstack_networking_secgroup_v2.http.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "worker" { + count = var.extra_workers + name = "k8s-${var.site_name}-worker-${count.index}" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "gpu" { + count = var.gpu_workers + name = "k8s-${var.site_name}-gpu-${count.index}" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.gpu-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_blockstorage_volume_v3" "nfs-volume" { + name = "nfs" + size = var.nfs_volume_size +} + +resource "openstack_compute_volume_attach_v2" "nfs-volume-attach" { + instance_id = openstack_compute_instance_v2.nfs.id + volume_id = openstack_blockstorage_volume_v3.nfs-volume.id +} + +resource "local_file" "volume-script" { + filename = "nfs-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +if ! dpkg-query -s xfsprogs >/dev/null 2>&1; then + apt-get update + apt-get install -y xfsprogs +fi +device="${openstack_compute_volume_attach_v2.nfs-volume-attach.device}" +mkfs.xfs -L NFS "$device" || true +grep -q 'LABEL=NFS' /etc/fstab || /bin/echo -e "LABEL=NFS\t/exports\txfs\tdefaults,uquota,pquota\t0\t0" | tee -a /etc/fstab +mkdir /exports 2>/dev/null || true +mount -a +EOT +} + +resource "openstack_blockstorage_volume_v3" "docker-volume" { + count = var.extra_workers + var.gpu_workers + 2 + name = format("docker-%s", local.nodes[count.index].name) + size = var.docker_volumes_size +} + +resource "openstack_compute_volume_attach_v2" "docker-volume-attach" { + count = var.extra_workers + var.gpu_workers + 2 + instance_id = local.nodes[count.index].id + volume_id = openstack_blockstorage_volume_v3.docker-volume[count.index].id +} + +resource "local_file" "docker-volume-script" { + filename = "docker-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +volumes="${join("\n", [for n, d in zipmap(tolist(local.nodes[*].name), tolist(openstack_compute_volume_attach_v2.docker-volume-attach[*].device)) : format("%s:%s", n, d)])}" +volume=$(echo "$volumes" | grep "$(hostname):") +device=$(echo "$volume" | cut -d: -f2) +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L DOCKER "$device" + grep -q 'LABEL=DOCKER' /etc/fstab || /bin/echo -e "LABEL=DOCKER\t/var/lib/docker/overlay2\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab + mkdir -p /var/lib/docker/overlay2 2>/dev/null || true + systemctl stop docker kubelet >/dev/null 2>&1 || true + sleep 10 + systemctl stop docker kubelet >/dev/null 2>&1 || true + umount /var/lib/docker/overlay2 2>&1 || true + mount "$device" /mnt + mv /var/lib/docker/overlay2/* /mnt >/dev/null 2>&1 || true + umount /mnt + mount -a + systemctl start docker kubelet >/dev/null 2>&1 || true +fi +EOT +} + +resource "openstack_blockstorage_volume_v3" "squid-volume" { + name = "squid" + size = var.squid_volume_size +} + +resource "openstack_compute_volume_attach_v2" "squid-volume-attach" { + instance_id = openstack_compute_instance_v2.ingress.id + volume_id = openstack_blockstorage_volume_v3.squid-volume.id +} + +resource "local_file" "squid-volume-script" { + filename = "squid-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +device="${openstack_compute_volume_attach_v2.squid-volume-attach.device}" +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L SQUID "$device" +fi +grep -q 'LABEL=SQUID' /etc/fstab || /bin/echo -e "LABEL=SQUID\t/var/spool/squid\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab +mkdir /var/spool/squid 2>/dev/null || true +mount -a +EOT +} + +resource "local_file" "inventory" { + filename = "inventory.yaml" + file_permission = "0644" + content = <<EOT +--- +fip: + hosts: + ${openstack_compute_instance_v2.ingress.network[0].fixed_ip_v4} + +master: + hosts: + ${local.master_ip}: + # must be IPv4 address or hostname + kube_server: ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4} + +ingress: + hosts: + ${local.ingress_ip}: + +nfs: + hosts: + ${local.nfs_ip}: + +worker: + hosts: + ${join("\n ", [for s in local.worker_ips: "${s}:"])} + +gpu: + hosts: + ${join("\n ", [for s in local.gpu_ips : "${s}:"])} + +# using public IP of kube_server for ansible delegate_to +kube_server: + hosts: + ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4}: + ansible_host: ${local.master_ip} +EOT +} + +resource "local_file" "fip" { + filename = "fip.txt" + file_permission = "0644" + content = <<EOT +${openstack_compute_instance_v2.ingress.network[0].fixed_ip_v4} +EOT +} + +resource "local_file" "hosts" { + filename = "hosts.txt" + file_permission = "0644" + content = <<EOT +${local.master_ip} +${local.ingress_ip} +${local.nfs_ip} +${join("\n", concat(local.worker_ips, local.gpu_ips))} +EOT +} diff --git a/testing/extra b/testing/extra index e5f031b..440decb 120000 --- a/testing/extra +++ b/testing/extra @@ -1 +1 @@ -../cesnet-central/extra \ No newline at end of file +../common/extra \ No newline at end of file diff --git a/testing/playbooks/public_keys b/testing/playbooks/public_keys index 3022cbb..6ef4918 120000 --- a/testing/playbooks/public_keys +++ b/testing/playbooks/public_keys @@ -1 +1 @@ -../../cesnet-central/playbooks/public_keys \ No newline at end of file +../../common/playbooks/public_keys \ No newline at end of file -- GitLab