diff --git a/cesnet-central/playbooks/squid.yaml b/cesnet-central/playbooks/squid.yaml index 753407841965587cca67431e1c8c42bb6722ac08..8b2a996d3e5cd8c0612976036a646395e9fcec4a 100644 --- a/cesnet-central/playbooks/squid.yaml +++ b/cesnet-central/playbooks/squid.yaml @@ -5,7 +5,7 @@ tasks: - name: Gather facts on the node debug: - msg: "IPv4: {{ ansible_default_ipv4.address }}, IPv6: {{ ansible_default_ipv6.address }}" + msg: "IPv4: {{ ansible_default_ipv4.address | default('') }}, IPv6: {{ ansible_default_ipv6.address | default('') }}" - name: Squid proxy deployment hosts: ingress[0] become: true diff --git a/staging/ansible.cfg b/staging/ansible.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c3a73bec9aa17dbdd27c77947d8813866f7036e6 --- /dev/null +++ b/staging/ansible.cfg @@ -0,0 +1,5 @@ +[defaults] +inventory=inventory + +[diff] +always=true diff --git a/staging/deploy.sh b/staging/deploy.sh new file mode 100755 index 0000000000000000000000000000000000000000..00c6352fec2ee15b21aada067cce317a7c72eaac --- /dev/null +++ b/staging/deploy.sh @@ -0,0 +1,54 @@ +#! /bin/bash -xe + +# +# Deploy EOSC staging instance +# + +cd terraform && terraform init && terraform apply +cd - +cp -pv terraform/inventory.yaml inventory/1-psnc.yaml + +# dynamic DNS +ip="$(head -n 1 < terraform/fip.txt)" +# shellstate=$(shopt -po xtrace) +# set +o xtrace +# # https://nsupdate.fedcloud.eu +# vault_prefix=secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-staging +# FEDCLOUD_DYNAMIC_DNS=$(vault read -field data $vault_prefix/FEDCLOUD_DYNAMIC_DNS | grep ^map | head -n 1 | sed 's/map\[\(.*\)\]/\1/') +# for auth in $FEDCLOUD_DYNAMIC_DNS; do +# echo "curl -i -X GET -u $(echo "$auth" | cut -d: -f1):XXX https://nsupdate.fedcloud.eu/nic/update?myip=$ip" +# curl -i -X GET -u "$auth" https://nsupdate.fedcloud.eu/nic/update?myip="$ip" +# done +# eval "$shellstate" +echo "Terraform finished. Continue? (CTRL-C to quit)" +read -r _ + +# wait for ping and ssh +for ip in $(cat terraform/hosts.txt); do + while ! ping -c 1 "$ip"; do sleep 5; done + ssh-keygen -R "$ip" + while ! ssh egi@"$ip" -o ConnectTimeout=10 -o PreferredAuthentications=publickey -o StrictHostKeyChecking=no :; do sleep 10; done +done + +# check ssh access +ansible -m command -a 'uname -a' allnodes + +# wait cloud-init +ansible -m shell -a 'while ! test -f /var/lib/cloud/instance/boot-finished; do sleep 2; done' allnodes + +# setup volumes +ansible -m copy -a 'src=terraform/nfs-volume.sh dest=/root/ mode=preserve' nfs +ansible -m command -a '/root/nfs-volume.sh' nfs +ansible -m copy -a 'src=terraform/squid-volume.sh dest=/root/ mode=preserve' 'ingress[0]' +ansible -m command -a '/root/squid-volume.sh' 'ingress[0]' + +# k8s + notebooks +ansible-playbook playbooks/k8s.yaml +# docker runtime directory after Kubernetes deployment (problem with unmounts) +ansible -m copy -a 'src=terraform/docker-volume.sh dest=/root/ mode=preserve' 'ingress nfs worker' +ansible -m command -a '/root/docker-volume.sh' 'ingress nfs worker' + +# ansible-playbook playbooks/squid.yaml +# ansible-playbook playbooks/cvmfs.yaml + +while ansible -i ./inventory -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -v ' Running '; do sleep 5; done diff --git a/staging/extra b/staging/extra new file mode 120000 index 0000000000000000000000000000000000000000..e5f031b315b7487e0c8b5952adb00b4894b15408 --- /dev/null +++ b/staging/extra @@ -0,0 +1 @@ +../cesnet-central/extra \ No newline at end of file diff --git a/staging/inventory/1-psnc.yaml b/staging/inventory/1-psnc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..708ff6c9f909d70a68acb3fc6bdd08f92f8707e8 --- /dev/null +++ b/staging/inventory/1-psnc.yaml @@ -0,0 +1,31 @@ +--- +fip: + hosts: + 62.3.174.45: + +master: + hosts: + 192.168.3.27: + # must be IPv4 address or hostname + kube_server: 192.168.3.27 + +ingress: + hosts: + 192.168.1.221: + +nfs: + hosts: + 192.168.0.64: + +worker: + hosts: + 192.168.1.24: + +gpu: + hosts: + +# using public IP of kube_server for ansible delegate_to +kube_server: + hosts: + 192.168.3.27: + ansible_host: 192.168.3.27 diff --git a/staging/inventory/99-all.yaml b/staging/inventory/99-all.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55b240016383772b198eac000a27ea65e8130593 --- /dev/null +++ b/staging/inventory/99-all.yaml @@ -0,0 +1,18 @@ +--- +allnodes: + children: + master: + ingress: + nfs: + worker: + +all: + vars: + ansible_become: yes + ansible_user: egi + ansible_ssh_common_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -W %h:%p -q egi@{{ groups["fip"][0] }}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' + + site_name: psnc-staging + vault_mount_point: secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-staging + + grafana_hostname: grafana-stg.cloud.cesnet.cz diff --git a/staging/playbooks/cvmfs.yaml b/staging/playbooks/cvmfs.yaml new file mode 120000 index 0000000000000000000000000000000000000000..b5dcdf012c6c9390bf06efdebe81cd3ec41d1205 --- /dev/null +++ b/staging/playbooks/cvmfs.yaml @@ -0,0 +1 @@ +../../cesnet-central/playbooks/cvmfs.yaml \ No newline at end of file diff --git a/staging/playbooks/files/calico.yaml b/staging/playbooks/files/calico.yaml new file mode 120000 index 0000000000000000000000000000000000000000..3d2b7877a37ef6e527475c5b04b7c9ce82fdeef4 --- /dev/null +++ b/staging/playbooks/files/calico.yaml @@ -0,0 +1 @@ +../../../cesnet-central/playbooks/files/calico.yaml \ No newline at end of file diff --git a/staging/playbooks/files/helm_repos.fact b/staging/playbooks/files/helm_repos.fact new file mode 120000 index 0000000000000000000000000000000000000000..94d2856ed013223d9684d8d3e5021efe2c9c648d --- /dev/null +++ b/staging/playbooks/files/helm_repos.fact @@ -0,0 +1 @@ +../../../cesnet-central/playbooks/files/helm_repos.fact \ No newline at end of file diff --git a/staging/playbooks/files/k8s-cheats.sh b/staging/playbooks/files/k8s-cheats.sh new file mode 120000 index 0000000000000000000000000000000000000000..d20f90ff8514cfb786c0ab8b494df4886d890e6c --- /dev/null +++ b/staging/playbooks/files/k8s-cheats.sh @@ -0,0 +1 @@ +../../../cesnet-central/playbooks/files/k8s-cheats.sh \ No newline at end of file diff --git a/staging/playbooks/files/xfs-quotas.sh b/staging/playbooks/files/xfs-quotas.sh new file mode 120000 index 0000000000000000000000000000000000000000..5330cad707943f9cedf9e0d1e519b123c7c4ac3e --- /dev/null +++ b/staging/playbooks/files/xfs-quotas.sh @@ -0,0 +1 @@ +../../../cesnet-central/playbooks/files/xfs-quotas.sh \ No newline at end of file diff --git a/staging/playbooks/k8s.yaml b/staging/playbooks/k8s.yaml new file mode 120000 index 0000000000000000000000000000000000000000..5e18112ca53b79e98249019a43b02a8f6b19235a --- /dev/null +++ b/staging/playbooks/k8s.yaml @@ -0,0 +1 @@ +../../cesnet-central/playbooks/k8s.yaml \ No newline at end of file diff --git a/staging/playbooks/public_keys b/staging/playbooks/public_keys new file mode 120000 index 0000000000000000000000000000000000000000..3022cbb37a5a75d91fc46e99679527b5368e5806 --- /dev/null +++ b/staging/playbooks/public_keys @@ -0,0 +1 @@ +../../cesnet-central/playbooks/public_keys \ No newline at end of file diff --git a/staging/playbooks/squid.yaml b/staging/playbooks/squid.yaml new file mode 120000 index 0000000000000000000000000000000000000000..408847ec888f5eed343c23e047160d0873dc2416 --- /dev/null +++ b/staging/playbooks/squid.yaml @@ -0,0 +1 @@ +../../cesnet-central/playbooks/squid.yaml \ No newline at end of file diff --git a/staging/playbooks/templates/etc/exports b/staging/playbooks/templates/etc/exports new file mode 100644 index 0000000000000000000000000000000000000000..ef7691790da45dd87e7387760e7b9acd25f2cc1d --- /dev/null +++ b/staging/playbooks/templates/etc/exports @@ -0,0 +1,2 @@ +# export the NFS directory to all the cluster members +/exports {% for host in groups['allnodes'] -%}{{ hostvars[host].ansible_default_ipv4.address }}(rw,async,no_root_squash,no_subtree_check) {% endfor -%} diff --git a/staging/playbooks/templates/etc/mailutils.conf b/staging/playbooks/templates/etc/mailutils.conf new file mode 120000 index 0000000000000000000000000000000000000000..c67eb7db4e30f64d71f6ea930f1dbec22ae46230 --- /dev/null +++ b/staging/playbooks/templates/etc/mailutils.conf @@ -0,0 +1 @@ +../../../../cesnet-central/playbooks/templates/etc/mailutils.conf \ No newline at end of file diff --git a/staging/playbooks/templates/etc/squid/conf.d/allcluster.conf b/staging/playbooks/templates/etc/squid/conf.d/allcluster.conf new file mode 100644 index 0000000000000000000000000000000000000000..2bd09266fa51feb624492e107907c23ccf79fe1d --- /dev/null +++ b/staging/playbooks/templates/etc/squid/conf.d/allcluster.conf @@ -0,0 +1,4 @@ +{% for host in groups['ingress'] + groups['nfs'] + groups['worker'] + groups['gpu'] -%} +acl allcluster src {{ hostvars[host].ansible_default_ipv4.address }} +{% endfor -%} +http_access allow allcluster diff --git a/staging/playbooks/upgrade.yaml b/staging/playbooks/upgrade.yaml new file mode 120000 index 0000000000000000000000000000000000000000..3a004255a11079586ec6ca47467336cfb866c6d0 --- /dev/null +++ b/staging/playbooks/upgrade.yaml @@ -0,0 +1 @@ +/home/valtri/notebooks-operations.eosc/cesnet-central/playbooks/upgrade.yaml \ No newline at end of file diff --git a/staging/terraform/.gitignore b/staging/terraform/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e15cf5f0280219a788276bcb4546e5806302376b --- /dev/null +++ b/staging/terraform/.gitignore @@ -0,0 +1,8 @@ +/.terraform/ +/.terraform.lock.hcl +/fip.txt +/hosts.txt +/inventory.yaml +/terraform.tfstate +/terraform.tfstate.backup +/*-volume.sh diff --git a/staging/terraform/cloud-init.yaml b/staging/terraform/cloud-init.yaml new file mode 100644 index 0000000000000000000000000000000000000000..481d1f2a6ae647127e69c233c936529b441e13cc --- /dev/null +++ b/staging/terraform/cloud-init.yaml @@ -0,0 +1,18 @@ +#cloud-config +--- +package_update: true + +package_upgrade: true + +users: + - default + - name: egi + gecos: EGI + primary_group: egi + groups: users + shell: /bin/bash + sudo: ALL=(ALL) NOPASSWD:ALL + ssh_import_id: + - gh:enolfc + - gh:andrea-manzi + - gh:valtri diff --git a/staging/terraform/terraform.tfvars b/staging/terraform/terraform.tfvars new file mode 100644 index 0000000000000000000000000000000000000000..e7f2d7692e2b2e1b3097122f26037db581e40c86 --- /dev/null +++ b/staging/terraform/terraform.tfvars @@ -0,0 +1,30 @@ +# These need to be defined for things to work +ip_pool = "PSNC-PUB1-EDU" +net_name = "local-network" +site_name = "staging" + +# These may need some adjustment for your provider +master_cpus = 4 +master_ram = 8192 +worker_cpus = 4 +worker_ram = 8192 +# XXX: replace this for GPU flavor, once available +gpu_flavor_name = "M1-NVME-2vCPU-8R-50D" + +# Number of extra workers +extra_workers = 1 + +# Number of GPU workers +gpu_workers = 0 + +# volumes for docker +docker_volumes_size = 384 + +# NFS volume +nfs_volume_size = 256 + +# scratch volume +scratch_volumes_size = 128 + +# squid volume +squid_volume_size = 128 diff --git a/staging/terraform/vars.tf b/staging/terraform/vars.tf new file mode 100644 index 0000000000000000000000000000000000000000..b0891ef0b1d27d96a6a58b447ca1a8b95a9691d3 --- /dev/null +++ b/staging/terraform/vars.tf @@ -0,0 +1,69 @@ +variable "ip_pool" { + type = string + description = "The name of the public IP pool for the servers" +} + +variable "net_name" { + type = string + description = "The name of the IPv4 network" +} + +variable "site_name" { + type = string + description = "Site identifier for internal host names" +} + +variable "gpu_flavor_name" { + type = string + description = "Name of the GPU flavor" +} + +variable "master_cpus" { + type = number + description = "Number of CPUs for the master" +} + +variable "master_ram" { + type = number + description = "RAM for the master" +} + +variable "worker_cpus" { + type = number + description = "Number of CPUs for the worker" +} + +variable "worker_ram" { + type = number + description = "RAM for the worker" +} + +variable "extra_workers" { + type = number + description = "Number of extra workers to create" +} + +variable "gpu_workers" { + type = number + description = "Number of GPU workers to create" +} + +variable "docker_volumes_size" { + type = number + description = "Size of volumes for docker (GB)" +} + +variable "nfs_volume_size" { + type = number + description = "Size of volume for NFS server (GB)" +} + +variable "scratch_volumes_size" { + type = number + description = "Size of volume for ephemeral volumes (GB)" +} + +variable "squid_volume_size" { + type = number + description = "Size of volume for squid proxy, CVMFS cache (GB)" +} diff --git a/staging/terraform/versions.tf b/staging/terraform/versions.tf new file mode 100644 index 0000000000000000000000000000000000000000..43054dd2536754df11f95dd920ca7ed5f4b5c6f3 --- /dev/null +++ b/staging/terraform/versions.tf @@ -0,0 +1,10 @@ +terraform { + required_providers { + local = "~> 2.0" + openstack = { + source = "terraform-provider-openstack/openstack", + version = ">= 1.38.0" + } + } + required_version = ">= 0.13" +} diff --git a/staging/terraform/vms.tf b/staging/terraform/vms.tf new file mode 100644 index 0000000000000000000000000000000000000000..e9f8fa18d9ebd0d71b4726880e683c540f296ea0 --- /dev/null +++ b/staging/terraform/vms.tf @@ -0,0 +1,364 @@ +provider "openstack" { +} + +locals { + nodes = concat([ + openstack_compute_instance_v2.ingress, + openstack_compute_instance_v2.nfs, + ], openstack_compute_instance_v2.worker[*], openstack_compute_instance_v2.gpu[*]) + master_ip = openstack_compute_instance_v2.master.network[0].fixed_ip_v4 + ingress_ip = openstack_compute_instance_v2.ingress.network[0].fixed_ip_v4 + nfs_ip = openstack_compute_instance_v2.nfs.network[0].fixed_ip_v4 + worker_ips = [for s in openstack_compute_instance_v2.worker[*].network[0].fixed_ip_v4 : s] + gpu_ips = [for s in openstack_compute_instance_v2.gpu[*].network[0].fixed_ip_v4 : s] +} + +# Security groups + +resource "openstack_networking_secgroup_v2" "ping" { + name = "ping" + description = "ICMP for ping" +} + +resource "openstack_networking_secgroup_v2" "ssh" { + name = "ssh" + description = "ssh connection" +} + +resource "openstack_networking_secgroup_v2" "http" { + name = "http" + description = "http/https" +} + +resource "openstack_networking_secgroup_rule_v2" "ping4" { + direction = "ingress" + ethertype = "IPv4" + port_range_min = 8 + port_range_max = 0 + protocol = "icmp" + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.ping.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh4" { + direction = "ingress" + ethertype = "IPv4" + port_range_min = 22 + port_range_max = 22 + protocol = "tcp" + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.ssh.id +} + +resource "openstack_networking_secgroup_rule_v2" "ssh6" { + direction = "ingress" + ethertype = "IPv6" + port_range_min = 22 + port_range_max = 22 + protocol = "tcp" + remote_ip_prefix = "::/0" + security_group_id = openstack_networking_secgroup_v2.ssh.id +} + +resource "openstack_networking_secgroup_rule_v2" "http4" { + direction = "ingress" + ethertype = "IPv4" + port_range_min = 80 + port_range_max = 80 + protocol = "tcp" + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.http.id +} + +resource "openstack_networking_secgroup_rule_v2" "http6" { + direction = "ingress" + ethertype = "IPv6" + port_range_min = 80 + port_range_max = 80 + protocol = "tcp" + remote_ip_prefix = "::/0" + security_group_id = openstack_networking_secgroup_v2.http.id +} + +resource "openstack_networking_secgroup_rule_v2" "https4" { + direction = "ingress" + ethertype = "IPv4" + port_range_min = 443 + port_range_max = 443 + protocol = "tcp" + remote_ip_prefix = "0.0.0.0/0" + security_group_id = openstack_networking_secgroup_v2.http.id +} + +resource "openstack_networking_secgroup_rule_v2" "https6" { + direction = "ingress" + ethertype = "IPv6" + port_range_min = 443 + port_range_max = 443 + protocol = "tcp" + remote_ip_prefix = "::/0" + security_group_id = openstack_networking_secgroup_v2.http.id +} + +resource "openstack_networking_floatingip_v2" "public_ip" { + pool = var.ip_pool +} + +data "openstack_images_image_v2" "ubuntu" { + name = "ubuntu-22.04-x86_64-server-cloudimg-20240429" +} + +data "openstack_compute_flavor_v2" "master-flavor" { + vcpus = var.master_cpus + ram = var.master_ram +} + +data "openstack_compute_flavor_v2" "worker-flavor" { + vcpus = var.worker_cpus + ram = var.worker_ram +} + +data "openstack_compute_flavor_v2" "gpu-flavor" { + name = var.gpu_flavor_name +} + +resource "openstack_compute_instance_v2" "master" { + name = "k8s-${var.site_name}-master" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.master-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["master"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "nfs" { + name = "k8s-${var.site_name}-nfs" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "ingress" { + name = "k8s-${var.site_name}-w-ingress" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name, openstack_networking_secgroup_v2.http.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "worker" { + count = var.extra_workers + name = "k8s-${var.site_name}-worker-${count.index}" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "gpu" { + count = var.gpu_workers + name = "k8s-${var.site_name}-gpu-${count.index}" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.gpu-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_floatingip_associate_v2" "fip" { + floating_ip = openstack_networking_floatingip_v2.public_ip.address + instance_id = openstack_compute_instance_v2.ingress.id +} + +resource "openstack_blockstorage_volume_v3" "nfs-volume" { + name = "nfs" + size = var.nfs_volume_size +} + +resource "openstack_compute_volume_attach_v2" "nfs-volume-attach" { + instance_id = openstack_compute_instance_v2.nfs.id + volume_id = openstack_blockstorage_volume_v3.nfs-volume.id +} + +resource "local_file" "volume-script" { + filename = "nfs-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +if ! dpkg-query -s xfsprogs >/dev/null 2>&1; then + apt-get update + apt-get install -y xfsprogs +fi +device="${openstack_compute_volume_attach_v2.nfs-volume-attach.device}" +mkfs.xfs -L NFS "$device" || true +grep -q 'LABEL=NFS' /etc/fstab || /bin/echo -e "LABEL=NFS\t/exports\txfs\tdefaults,uquota,pquota\t0\t0" | tee -a /etc/fstab +mkdir /exports 2>/dev/null || true +mount -a +EOT +} + +resource "openstack_blockstorage_volume_v3" "docker-volume" { + count = var.extra_workers + var.gpu_workers + 2 + name = format("docker-%s", local.nodes[count.index].name) + size = var.docker_volumes_size +} + +resource "openstack_compute_volume_attach_v2" "docker-volume-attach" { + count = var.extra_workers + var.gpu_workers + 2 + instance_id = local.nodes[count.index].id + volume_id = openstack_blockstorage_volume_v3.docker-volume[count.index].id +} + +resource "local_file" "docker-volume-script" { + filename = "docker-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +volumes="${join("\n", [for n, d in zipmap(tolist(local.nodes[*].name), tolist(openstack_compute_volume_attach_v2.docker-volume-attach[*].device)) : format("%s:%s", n, d)])}" +volume=$(echo "$volumes" | grep "$(hostname):") +device=$(echo "$volume" | cut -d: -f2) +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L DOCKER "$device" + grep -q 'LABEL=DOCKER' /etc/fstab || /bin/echo -e "LABEL=DOCKER\t/var/lib/docker/overlay2\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab + mkdir -p /var/lib/docker/overlay2 2>/dev/null || true + service docker stop >/dev/null 2>&1 || true + sleep 10 + umount /var/lib/docker/overlay2 2>&1 || true + mount "$device" /mnt + mv /var/lib/docker/overlay2/* /mnt >/dev/null 2>&1 || true + umount /mnt + mount -a +fi +EOT +} + +resource "openstack_blockstorage_volume_v3" "scratch-volume" { + count = var.extra_workers + var.gpu_workers + 2 + name = format("scratch-%s", local.nodes[count.index].name) + size = var.scratch_volumes_size +} + +resource "openstack_compute_volume_attach_v2" "scratch-volume-attach" { + count = var.extra_workers + var.gpu_workers + 2 + instance_id = local.nodes[count.index].id + volume_id = openstack_blockstorage_volume_v3.scratch-volume[count.index].id +} + +resource "local_file" "scratch-volume-script" { + filename = "scratch-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +volumes="${join("\n", [for n, d in zipmap(tolist(local.nodes[*].name), tolist(openstack_compute_volume_attach_v2.scratch-volume-attach[*].device)) : format("%s:%s", n, d)])}" +volume=$(echo "$volumes" | grep "$(hostname):") +device=$(echo "$volume" | cut -d: -f2) +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L SCRATCH "$device" + grep -q 'LABEL=SCRATCH' /etc/fstab || /bin/echo -e "LABEL=SCRATCH\t/scratch\text4\tdefaults\t0\t0" | tee -a /etc/fstab + mkdir -p /scratch 2>/dev/null || true + mount -a +fi +EOT +} + +resource "openstack_blockstorage_volume_v3" "squid-volume" { + name = "squid" + size = var.squid_volume_size +} + +resource "openstack_compute_volume_attach_v2" "squid-volume-attach" { + instance_id = openstack_compute_instance_v2.ingress.id + volume_id = openstack_blockstorage_volume_v3.squid-volume.id +} + +resource "local_file" "squid-volume-script" { + filename = "squid-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +device="${openstack_compute_volume_attach_v2.squid-volume-attach.device}" +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L SQUID "$device" +fi +grep -q 'LABEL=SQUID' /etc/fstab || /bin/echo -e "LABEL=SQUID\t/var/spool/squid\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab +mkdir /var/spool/squid 2>/dev/null || true +mount -a +EOT +} + +resource "local_file" "inventory" { + filename = "inventory.yaml" + file_permission = "0644" + content = <<EOT +--- +fip: + hosts: + ${openstack_networking_floatingip_v2.public_ip.address}: + +master: + hosts: + ${local.master_ip}: + # must be IPv4 address or hostname + kube_server: ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4} + +ingress: + hosts: + ${local.ingress_ip}: + +nfs: + hosts: + ${local.nfs_ip}: + +worker: + hosts: + ${join("\n ", [for s in local.worker_ips: "${s}:"])} + +gpu: + hosts: + ${join("\n ", [for s in local.gpu_ips : "${s}:"])} + +# using public IP of kube_server for ansible delegate_to +kube_server: + hosts: + ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4}: + ansible_host: ${local.master_ip} +EOT +} + +resource "local_file" "fip" { + filename = "fip.txt" + file_permission = "0644" + content = <<EOT +${openstack_networking_floatingip_v2.public_ip.address} +EOT +} + +resource "local_file" "hosts" { + filename = "hosts.txt" + file_permission = "0644" + content = <<EOT +${local.master_ip} +${local.ingress_ip} +${local.nfs_ip} +${join("\n", concat(local.worker_ips, local.gpu_ips))} +EOT +}