From 113d19ed99eec9477df5b687b925756c43b1eae6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz> Date: Thu, 12 Sep 2024 18:55:07 +0000 Subject: [PATCH] New production1 deployment @ PSNC * Tune OpenStack parameters * Enlarge flavours on PSNC production * Disable scratch volumes * Fix production FW for admin machine * Inventory file * Site name --- common/terraform/network.tf | 25 ++ production1/ansible.cfg | 5 + production1/deploy.sh | 59 +++++ production1/deployments/hub.yaml | 1 + production1/extra | 1 + production1/inventory/1-psnc.yaml | 33 +++ production1/inventory/99-all.yaml | 21 ++ production1/playbooks/cvmfs.yaml | 1 + production1/playbooks/files/calico.yaml | 1 + production1/playbooks/files/etc | 1 + .../playbooks/files/jupyterhub-jwt.yaml | 1 + production1/playbooks/files/usr | 1 + production1/playbooks/k8s.yaml | 1 + production1/playbooks/notebooks.yaml | 1 + production1/playbooks/public_keys | 1 + production1/playbooks/security-assets.yaml | 1 + production1/playbooks/security-logs.yaml | 1 + production1/playbooks/security-scanner.yaml | 1 + production1/playbooks/squid.yaml | 1 + .../templates/deepfence-agent.yaml.j2 | 1 + production1/playbooks/templates/etc/exports | 1 + .../playbooks/templates/etc/glpi-agent | 1 + .../playbooks/templates/etc/mailutils.conf | 1 + production1/playbooks/templates/etc/squid | 1 + .../templates/fluent-bit-secrets.yaml.j2 | 1 + .../playbooks/templates/fluent-bit.yaml.j2 | 1 + production1/playbooks/upgrade.yaml | 1 + production1/terraform/.gitignore | 1 + production1/terraform/cloud-init.yaml | 1 + production1/terraform/firewall.tf | 1 + production1/terraform/network.tf | 1 + production1/terraform/terraform.tfvars | 45 ++++ production1/terraform/vars.tf | 1 + production1/terraform/versions.tf | 1 + production1/terraform/vms.tf | 245 ++++++++++++++++++ production2/terraform/terraform.tfvars | 3 +- staging1/terraform/network.tf | 26 +- 37 files changed, 463 insertions(+), 27 deletions(-) create mode 100644 common/terraform/network.tf create mode 100644 production1/ansible.cfg create mode 100755 production1/deploy.sh create mode 120000 production1/deployments/hub.yaml create mode 120000 production1/extra create mode 100644 production1/inventory/1-psnc.yaml create mode 100644 production1/inventory/99-all.yaml create mode 120000 production1/playbooks/cvmfs.yaml create mode 120000 production1/playbooks/files/calico.yaml create mode 120000 production1/playbooks/files/etc create mode 120000 production1/playbooks/files/jupyterhub-jwt.yaml create mode 120000 production1/playbooks/files/usr create mode 120000 production1/playbooks/k8s.yaml create mode 120000 production1/playbooks/notebooks.yaml create mode 120000 production1/playbooks/public_keys create mode 120000 production1/playbooks/security-assets.yaml create mode 120000 production1/playbooks/security-logs.yaml create mode 120000 production1/playbooks/security-scanner.yaml create mode 120000 production1/playbooks/squid.yaml create mode 120000 production1/playbooks/templates/deepfence-agent.yaml.j2 create mode 120000 production1/playbooks/templates/etc/exports create mode 120000 production1/playbooks/templates/etc/glpi-agent create mode 120000 production1/playbooks/templates/etc/mailutils.conf create mode 120000 production1/playbooks/templates/etc/squid create mode 120000 production1/playbooks/templates/fluent-bit-secrets.yaml.j2 create mode 120000 production1/playbooks/templates/fluent-bit.yaml.j2 create mode 120000 production1/playbooks/upgrade.yaml create mode 120000 production1/terraform/.gitignore create mode 120000 production1/terraform/cloud-init.yaml create mode 120000 production1/terraform/firewall.tf create mode 120000 production1/terraform/network.tf create mode 100644 production1/terraform/terraform.tfvars create mode 120000 production1/terraform/vars.tf create mode 120000 production1/terraform/versions.tf create mode 100644 production1/terraform/vms.tf mode change 100644 => 120000 staging1/terraform/network.tf diff --git a/common/terraform/network.tf b/common/terraform/network.tf new file mode 100644 index 0000000..c244efe --- /dev/null +++ b/common/terraform/network.tf @@ -0,0 +1,25 @@ +data "openstack_networking_network_v2" "public-network" { + name = var.ip_pool +} + +resource "openstack_networking_network_v2" "local-network" { + name = "local-network" + admin_state_up = "true" +} + +resource "openstack_networking_subnet_v2" "local-network-subnet" { + name = "local-network-subnet" + network_id = openstack_networking_network_v2.local-network.id + cidr = "192.168.0.0/24" +} + +resource "openstack_networking_router_v2" "local-router" { + name = "local-router" + admin_state_up = true + external_network_id = data.openstack_networking_network_v2.public-network.id +} + +resource "openstack_networking_router_interface_v2" "router_interface_1" { + router_id = openstack_networking_router_v2.local-router.id + subnet_id = openstack_networking_subnet_v2.local-network-subnet.id +} diff --git a/production1/ansible.cfg b/production1/ansible.cfg new file mode 100644 index 0000000..c3a73be --- /dev/null +++ b/production1/ansible.cfg @@ -0,0 +1,5 @@ +[defaults] +inventory=inventory + +[diff] +always=true diff --git a/production1/deploy.sh b/production1/deploy.sh new file mode 100755 index 0000000..b1d9f7e --- /dev/null +++ b/production1/deploy.sh @@ -0,0 +1,59 @@ +#! /bin/bash -xe + +# +# Deploy EOSC production instance +# + +cd terraform && terraform init && terraform apply +cd - +cp -pv terraform/inventory.yaml inventory/1-psnc.yaml + +# dynamic DNS +ip="$(head -n 1 < terraform/fip.txt)" +# shellstate=$(shopt -po xtrace) +# set +o xtrace +# # https://nsupdate.fedcloud.eu +# vault_prefix=secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-production +# FEDCLOUD_DYNAMIC_DNS=$(vault read -field data $vault_prefix/FEDCLOUD_DYNAMIC_DNS | grep ^map | head -n 1 | sed 's/map\[\(.*\)\]/\1/') +# for auth in $FEDCLOUD_DYNAMIC_DNS; do +# echo "curl -i -X GET -u $(echo "$auth" | cut -d: -f1):XXX https://nsupdate.fedcloud.eu/nic/update?myip=$ip" +# curl -i -X GET -u "$auth" https://nsupdate.fedcloud.eu/nic/update?myip="$ip" +# done +# eval "$shellstate" +echo "Terraform finished. Check terraform/docker-volume.sh. Continue? (CTRL-C to quit)" +read -r _ + +# wait for ping and ssh +for ip in $(cat terraform/fip.txt); do + while ! ping -c 1 "$ip"; do sleep 5; done + ssh-keygen -R "$ip" + while ! ssh egi@"$ip" -o ConnectTimeout=10 -o PreferredAuthentications=publickey -o StrictHostKeyChecking=no :; do sleep 10; done +done + +# check ssh access +ansible -m command -a 'uname -a' allnodes + +# wait cloud-init +ansible -m shell -a 'while ! test -f /var/lib/cloud/instance/boot-finished; do sleep 2; done' allnodes + +# setup volumes +ansible -m copy -a 'src=terraform/nfs-volume.sh dest=/root/ mode=preserve' nfs +ansible -m command -a '/root/nfs-volume.sh' nfs +ansible -m copy -a 'src=terraform/squid-volume.sh dest=/root/ mode=preserve' 'ingress[0]' +ansible -m command -a '/root/squid-volume.sh' 'ingress[0]' + +# kubernetes +ansible-playbook playbooks/k8s.yaml +while ansible -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -Ev ' (Running|Completed) '; do sleep 5; done +# docker runtime directory after Kubernetes deployment (problem with unmounts) +ansible -m copy -a 'src=terraform/docker-volume.sh dest=/root/ mode=preserve' 'ingress nfs worker gpu' +ansible -m command -a '/root/docker-volume.sh' 'ingress nfs worker gpu' +ansible-playbook playbooks/squid.yaml +ansible-playbook playbooks/cvmfs.yaml + +# wait for finish +while ansible -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -Ev ' (Running|Completed) '; do sleep 5; done + +ansible-playbook playbooks/security-assets.yaml +ansible-playbook playbooks/security-logs.yaml +ansible-playbook playbooks/security-scanner.yaml diff --git a/production1/deployments/hub.yaml b/production1/deployments/hub.yaml new file mode 120000 index 0000000..637bf5d --- /dev/null +++ b/production1/deployments/hub.yaml @@ -0,0 +1 @@ +../../common/deployments/hub-production.yaml \ No newline at end of file diff --git a/production1/extra b/production1/extra new file mode 120000 index 0000000..440decb --- /dev/null +++ b/production1/extra @@ -0,0 +1 @@ +../common/extra \ No newline at end of file diff --git a/production1/inventory/1-psnc.yaml b/production1/inventory/1-psnc.yaml new file mode 100644 index 0000000..110d1e4 --- /dev/null +++ b/production1/inventory/1-psnc.yaml @@ -0,0 +1,33 @@ +--- +fip: + hosts: + 62.3.174.184: + +master: + hosts: + 192.168.0.80: + # must be IPv4 address or hostname + kube_server: 192.168.0.80 + +ingress: + hosts: + 192.168.0.74: + +nfs: + hosts: + 192.168.0.129: + +worker: + hosts: + 192.168.0.98: + 192.168.0.109: + 192.168.0.147: + +gpu: + hosts: + +# using public IP of kube_server for ansible delegate_to +kube_server: + hosts: + 192.168.0.80: + ansible_host: 192.168.0.80 diff --git a/production1/inventory/99-all.yaml b/production1/inventory/99-all.yaml new file mode 100644 index 0000000..7bca22b --- /dev/null +++ b/production1/inventory/99-all.yaml @@ -0,0 +1,21 @@ +--- +allnodes: + children: + master: + ingress: + nfs: + worker: + gpu: + +all: + vars: + ansible_become: yes + ansible_user: egi + ansible_ssh_common_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -W %h:%p -q egi@{{ groups["fip"][0] }}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' + + mail_local: true + site_name: psnc-production1 + vault_mount_point: secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-production + + notebooks_hostname: eu-1.notebooks.open-science-cloud.ec.europa.eu + grafana_hostname: grafana.eu-1.notebooks.open-science-cloud.ec.europa.eu diff --git a/production1/playbooks/cvmfs.yaml b/production1/playbooks/cvmfs.yaml new file mode 120000 index 0000000..2e82cca --- /dev/null +++ b/production1/playbooks/cvmfs.yaml @@ -0,0 +1 @@ +../../common/playbooks/cvmfs.yaml \ No newline at end of file diff --git a/production1/playbooks/files/calico.yaml b/production1/playbooks/files/calico.yaml new file mode 120000 index 0000000..732c864 --- /dev/null +++ b/production1/playbooks/files/calico.yaml @@ -0,0 +1 @@ +../../../common/playbooks/files/calico.yaml \ No newline at end of file diff --git a/production1/playbooks/files/etc b/production1/playbooks/files/etc new file mode 120000 index 0000000..ed53b87 --- /dev/null +++ b/production1/playbooks/files/etc @@ -0,0 +1 @@ +../../../common/playbooks/files/etc \ No newline at end of file diff --git a/production1/playbooks/files/jupyterhub-jwt.yaml b/production1/playbooks/files/jupyterhub-jwt.yaml new file mode 120000 index 0000000..59f9ac2 --- /dev/null +++ b/production1/playbooks/files/jupyterhub-jwt.yaml @@ -0,0 +1 @@ +../../../common/playbooks/files/jupyterhub-jwt.yaml \ No newline at end of file diff --git a/production1/playbooks/files/usr b/production1/playbooks/files/usr new file mode 120000 index 0000000..b034223 --- /dev/null +++ b/production1/playbooks/files/usr @@ -0,0 +1 @@ +../../../common/playbooks/files/usr \ No newline at end of file diff --git a/production1/playbooks/k8s.yaml b/production1/playbooks/k8s.yaml new file mode 120000 index 0000000..117aed6 --- /dev/null +++ b/production1/playbooks/k8s.yaml @@ -0,0 +1 @@ +../../common/playbooks/k8s.yaml \ No newline at end of file diff --git a/production1/playbooks/notebooks.yaml b/production1/playbooks/notebooks.yaml new file mode 120000 index 0000000..3f1a33f --- /dev/null +++ b/production1/playbooks/notebooks.yaml @@ -0,0 +1 @@ +../../common/playbooks/notebooks.yaml \ No newline at end of file diff --git a/production1/playbooks/public_keys b/production1/playbooks/public_keys new file mode 120000 index 0000000..6ef4918 --- /dev/null +++ b/production1/playbooks/public_keys @@ -0,0 +1 @@ +../../common/playbooks/public_keys \ No newline at end of file diff --git a/production1/playbooks/security-assets.yaml b/production1/playbooks/security-assets.yaml new file mode 120000 index 0000000..6bc8155 --- /dev/null +++ b/production1/playbooks/security-assets.yaml @@ -0,0 +1 @@ +../../common/playbooks/security-assets.yaml \ No newline at end of file diff --git a/production1/playbooks/security-logs.yaml b/production1/playbooks/security-logs.yaml new file mode 120000 index 0000000..0149b19 --- /dev/null +++ b/production1/playbooks/security-logs.yaml @@ -0,0 +1 @@ +../../common/playbooks/security-logs.yaml \ No newline at end of file diff --git a/production1/playbooks/security-scanner.yaml b/production1/playbooks/security-scanner.yaml new file mode 120000 index 0000000..186bdc1 --- /dev/null +++ b/production1/playbooks/security-scanner.yaml @@ -0,0 +1 @@ +../../common/playbooks/security-scanner.yaml \ No newline at end of file diff --git a/production1/playbooks/squid.yaml b/production1/playbooks/squid.yaml new file mode 120000 index 0000000..114c327 --- /dev/null +++ b/production1/playbooks/squid.yaml @@ -0,0 +1 @@ +../../common/playbooks/squid.yaml \ No newline at end of file diff --git a/production1/playbooks/templates/deepfence-agent.yaml.j2 b/production1/playbooks/templates/deepfence-agent.yaml.j2 new file mode 120000 index 0000000..faf3956 --- /dev/null +++ b/production1/playbooks/templates/deepfence-agent.yaml.j2 @@ -0,0 +1 @@ +../../../common/playbooks/templates/deepfence-agent.yaml.j2 \ No newline at end of file diff --git a/production1/playbooks/templates/etc/exports b/production1/playbooks/templates/etc/exports new file mode 120000 index 0000000..a743a02 --- /dev/null +++ b/production1/playbooks/templates/etc/exports @@ -0,0 +1 @@ +../../../../common/playbooks/templates/etc/exports.ipv4 \ No newline at end of file diff --git a/production1/playbooks/templates/etc/glpi-agent b/production1/playbooks/templates/etc/glpi-agent new file mode 120000 index 0000000..dfa13f5 --- /dev/null +++ b/production1/playbooks/templates/etc/glpi-agent @@ -0,0 +1 @@ +../../../../common/playbooks/templates/etc/glpi-agent \ No newline at end of file diff --git a/production1/playbooks/templates/etc/mailutils.conf b/production1/playbooks/templates/etc/mailutils.conf new file mode 120000 index 0000000..dbd8a1f --- /dev/null +++ b/production1/playbooks/templates/etc/mailutils.conf @@ -0,0 +1 @@ +../../../../common/playbooks/templates/etc/mailutils.conf \ No newline at end of file diff --git a/production1/playbooks/templates/etc/squid b/production1/playbooks/templates/etc/squid new file mode 120000 index 0000000..352b598 --- /dev/null +++ b/production1/playbooks/templates/etc/squid @@ -0,0 +1 @@ +../../../../common/playbooks/templates/etc/squid \ No newline at end of file diff --git a/production1/playbooks/templates/fluent-bit-secrets.yaml.j2 b/production1/playbooks/templates/fluent-bit-secrets.yaml.j2 new file mode 120000 index 0000000..c64dcee --- /dev/null +++ b/production1/playbooks/templates/fluent-bit-secrets.yaml.j2 @@ -0,0 +1 @@ +../../../common/playbooks/templates/fluent-bit-secrets.yaml.j2 \ No newline at end of file diff --git a/production1/playbooks/templates/fluent-bit.yaml.j2 b/production1/playbooks/templates/fluent-bit.yaml.j2 new file mode 120000 index 0000000..4ccab19 --- /dev/null +++ b/production1/playbooks/templates/fluent-bit.yaml.j2 @@ -0,0 +1 @@ +../../../common/playbooks/templates/fluent-bit.yaml.j2 \ No newline at end of file diff --git a/production1/playbooks/upgrade.yaml b/production1/playbooks/upgrade.yaml new file mode 120000 index 0000000..0f9e3f4 --- /dev/null +++ b/production1/playbooks/upgrade.yaml @@ -0,0 +1 @@ +../../common/playbooks/upgrade.yaml \ No newline at end of file diff --git a/production1/terraform/.gitignore b/production1/terraform/.gitignore new file mode 120000 index 0000000..a809396 --- /dev/null +++ b/production1/terraform/.gitignore @@ -0,0 +1 @@ +../../common/terraform/.gitignore \ No newline at end of file diff --git a/production1/terraform/cloud-init.yaml b/production1/terraform/cloud-init.yaml new file mode 120000 index 0000000..f315687 --- /dev/null +++ b/production1/terraform/cloud-init.yaml @@ -0,0 +1 @@ +../../common/terraform/cloud-init.yaml \ No newline at end of file diff --git a/production1/terraform/firewall.tf b/production1/terraform/firewall.tf new file mode 120000 index 0000000..0088c12 --- /dev/null +++ b/production1/terraform/firewall.tf @@ -0,0 +1 @@ +../../common/terraform/firewall.tf \ No newline at end of file diff --git a/production1/terraform/network.tf b/production1/terraform/network.tf new file mode 120000 index 0000000..2e79cd9 --- /dev/null +++ b/production1/terraform/network.tf @@ -0,0 +1 @@ +../../common/terraform/network.tf \ No newline at end of file diff --git a/production1/terraform/terraform.tfvars b/production1/terraform/terraform.tfvars new file mode 100644 index 0000000..9a5b199 --- /dev/null +++ b/production1/terraform/terraform.tfvars @@ -0,0 +1,45 @@ +# These need to be defined for things to work +ip_pool = "PSNC-EXT-PUB1-EDU" +net_name = "" +net6_name = "" +site_name = "production1" + +# These may need some adjustment for your provider +master_flavor_name = "M1-NET-2vCPU-8R" +worker_flavor_name = "M1-NVME-32vCPU-128R-300D" +# XXX: replace for the GPU flavor +gpu_flavor_name = "M1-NVME-32vCPU-128R-300D" + +# Number of extra workers +extra_workers = 3 + +# Number of GPU workers +gpu_workers = 0 + +# volumes for docker +docker_volumes_size = 384 + +# NFS volume +nfs_volume_size = 256 + +# scratch volume +scratch_volumes_size = 0 + +# squid volume +squid_volume_size = 128 + +# global firewall rules - public and admin access +security_public_cidr = { + "147.228.0.0/16": "University of West Bohemia in Pilsen", + "2001:718:1801::/48": "University of West Bohemia in Pilsen", + "78.128.246.160/32": "CESNET VPN", + "78.128.247.175/32": "CESNET VPN", + "2001:718:ff05:acb::/64": "CESNET VPN", + "2001:718:ff05:acc::/64": "CESNET VPN", + "147.251.21.79/32": "admin machine - OpenStack project router", + "2001:718:801:432:f816:3eff:feab:fbc8/128": "admin machine", + "145.90.225.224/27": "EGI VPN", + "2001:610:450:80::/64": "EGI VPN", + "78.128.247.55/32": "CESNET monitoring", + "2001:718:ff05:205::55/128": "CESNET monitoring", +} diff --git a/production1/terraform/vars.tf b/production1/terraform/vars.tf new file mode 120000 index 0000000..00c4e3a --- /dev/null +++ b/production1/terraform/vars.tf @@ -0,0 +1 @@ +../../common/terraform/vars.tf \ No newline at end of file diff --git a/production1/terraform/versions.tf b/production1/terraform/versions.tf new file mode 120000 index 0000000..b4eea0e --- /dev/null +++ b/production1/terraform/versions.tf @@ -0,0 +1 @@ +../../common/terraform/versions.tf \ No newline at end of file diff --git a/production1/terraform/vms.tf b/production1/terraform/vms.tf new file mode 100644 index 0000000..df3934e --- /dev/null +++ b/production1/terraform/vms.tf @@ -0,0 +1,245 @@ +locals { + nodes = concat([ + openstack_compute_instance_v2.ingress, + openstack_compute_instance_v2.nfs, + ], openstack_compute_instance_v2.worker[*], openstack_compute_instance_v2.gpu[*]) + master_ip = openstack_compute_instance_v2.master.network[0].fixed_ip_v4 + ingress_ip = openstack_compute_instance_v2.ingress.network[0].fixed_ip_v4 + nfs_ip = openstack_compute_instance_v2.nfs.network[0].fixed_ip_v4 + worker_ips = [for s in openstack_compute_instance_v2.worker[*].network[0].fixed_ip_v4 : s] + gpu_ips = [for s in openstack_compute_instance_v2.gpu[*].network[0].fixed_ip_v4 : s] +} + +resource "openstack_networking_floatingip_v2" "public_ip" { + pool = var.ip_pool +} + +data "openstack_images_image_v2" "ubuntu" { + name = "ubuntu-22.04-x86_64-server-cloudimg-20240429" +} + +data "openstack_compute_flavor_v2" "master-flavor" { + name = var.master_flavor_name +} + +data "openstack_compute_flavor_v2" "worker-flavor" { + name = var.worker_flavor_name +} + +data "openstack_compute_flavor_v2" "gpu-flavor" { + name = var.gpu_flavor_name +} + +resource "openstack_compute_instance_v2" "master" { + name = "k8s-${var.site_name}-master" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.master-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["master"] + network { + uuid = openstack_networking_network_v2.local-network.id + } +} + +resource "openstack_compute_instance_v2" "nfs" { + name = "k8s-${var.site_name}-nfs" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + uuid = openstack_networking_network_v2.local-network.id + } +} + +resource "openstack_compute_instance_v2" "ingress" { + name = "k8s-${var.site_name}-w-ingress" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name, openstack_networking_secgroup_v2.http.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + uuid = openstack_networking_network_v2.local-network.id + } +} + +resource "openstack_compute_instance_v2" "worker" { + count = var.extra_workers + name = "k8s-${var.site_name}-worker-${count.index}" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + uuid = openstack_networking_network_v2.local-network.id + } +} + +resource "openstack_compute_instance_v2" "gpu" { + count = var.gpu_workers + name = "k8s-${var.site_name}-gpu-${count.index}" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.gpu-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + uuid = openstack_networking_network_v2.local-network.id + } +} + +resource "openstack_compute_floatingip_associate_v2" "fip" { + floating_ip = openstack_networking_floatingip_v2.public_ip.address + instance_id = openstack_compute_instance_v2.ingress.id +} + +resource "openstack_blockstorage_volume_v3" "nfs-volume" { + name = "nfs" + size = var.nfs_volume_size +} + +resource "openstack_compute_volume_attach_v2" "nfs-volume-attach" { + instance_id = openstack_compute_instance_v2.nfs.id + volume_id = openstack_blockstorage_volume_v3.nfs-volume.id +} + +resource "local_file" "volume-script" { + filename = "nfs-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +if ! dpkg-query -s xfsprogs >/dev/null 2>&1; then + apt-get update + apt-get install -y xfsprogs +fi +device="${openstack_compute_volume_attach_v2.nfs-volume-attach.device}" +mkfs.xfs -L NFS "$device" || true +grep -q 'LABEL=NFS' /etc/fstab || /bin/echo -e "LABEL=NFS\t/exports\txfs\tdefaults,uquota,pquota\t0\t0" | tee -a /etc/fstab +mkdir /exports 2>/dev/null || true +mount -a +EOT +} + +resource "openstack_blockstorage_volume_v3" "docker-volume" { + count = var.extra_workers + var.gpu_workers + 2 + name = format("docker-%s", local.nodes[count.index].name) + size = var.docker_volumes_size +} + +resource "openstack_compute_volume_attach_v2" "docker-volume-attach" { + count = var.extra_workers + var.gpu_workers + 2 + instance_id = local.nodes[count.index].id + volume_id = openstack_blockstorage_volume_v3.docker-volume[count.index].id +} + +resource "local_file" "docker-volume-script" { + filename = "docker-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +volumes="${join("\n", [for n, d in zipmap(tolist(local.nodes[*].name), tolist(openstack_compute_volume_attach_v2.docker-volume-attach[*].device)) : format("%s:%s", n, d)])}" +volume=$(echo "$volumes" | grep "$(hostname):") +device=$(echo "$volume" | cut -d: -f2) +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L DOCKER "$device" + grep -q 'LABEL=DOCKER' /etc/fstab || /bin/echo -e "LABEL=DOCKER\t/var/lib/docker/overlay2\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab + mkdir -p /var/lib/docker/overlay2 2>/dev/null || true + systemctl stop docker kubelet >/dev/null 2>&1 || true + sleep 10 + systemctl stop docker kubelet >/dev/null 2>&1 || true + umount /var/lib/docker/overlay2 2>&1 || true + mount "$device" /mnt + mv /var/lib/docker/overlay2/* /mnt >/dev/null 2>&1 || true + umount /mnt + mount -a + systemctl start docker kubelet >/dev/null 2>&1 || true +fi +EOT +} + +resource "openstack_blockstorage_volume_v3" "squid-volume" { + name = "squid" + size = var.squid_volume_size +} + +resource "openstack_compute_volume_attach_v2" "squid-volume-attach" { + instance_id = openstack_compute_instance_v2.ingress.id + volume_id = openstack_blockstorage_volume_v3.squid-volume.id +} + +resource "local_file" "squid-volume-script" { + filename = "squid-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +device="${openstack_compute_volume_attach_v2.squid-volume-attach.device}" +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L SQUID "$device" +fi +grep -q 'LABEL=SQUID' /etc/fstab || /bin/echo -e "LABEL=SQUID\t/var/spool/squid\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab +mkdir /var/spool/squid 2>/dev/null || true +mount -a +EOT +} + +resource "local_file" "inventory" { + filename = "inventory.yaml" + file_permission = "0644" + content = <<EOT +--- +fip: + hosts: + ${openstack_networking_floatingip_v2.public_ip.address}: + +master: + hosts: + ${local.master_ip}: + # must be IPv4 address or hostname + kube_server: ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4} + +ingress: + hosts: + ${local.ingress_ip}: + +nfs: + hosts: + ${local.nfs_ip}: + +worker: + hosts: + ${join("\n ", [for s in local.worker_ips: "${s}:"])} + +gpu: + hosts: + ${join("\n ", [for s in local.gpu_ips : "${s}:"])} + +# using public IP of kube_server for ansible delegate_to +kube_server: + hosts: + ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4}: + ansible_host: ${local.master_ip} +EOT +} + +resource "local_file" "fip" { + filename = "fip.txt" + file_permission = "0644" + content = <<EOT +${openstack_networking_floatingip_v2.public_ip.address} +EOT +} + +resource "local_file" "hosts" { + filename = "hosts.txt" + file_permission = "0644" + content = <<EOT +${local.master_ip} +${local.ingress_ip} +${local.nfs_ip} +${join("\n", concat(local.worker_ips, local.gpu_ips))} +EOT +} diff --git a/production2/terraform/terraform.tfvars b/production2/terraform/terraform.tfvars index 1b58435..31c9623 100644 --- a/production2/terraform/terraform.tfvars +++ b/production2/terraform/terraform.tfvars @@ -39,9 +39,8 @@ security_public_cidr = { "78.128.247.175/32": "", "2001:718:ff05:acb::/64": "", "2001:718:ff05:acc::/64": "", - "147.251.21.79/32": "", # admin machine (OpenStack project router) - "147.228.21.79/32": "", + "147.251.21.79/32": "", # admin machine "2001:718:801:432:f816:3eff:feab:fbc8/128": "", # EGI VPN diff --git a/staging1/terraform/network.tf b/staging1/terraform/network.tf deleted file mode 100644 index c244efe..0000000 --- a/staging1/terraform/network.tf +++ /dev/null @@ -1,25 +0,0 @@ -data "openstack_networking_network_v2" "public-network" { - name = var.ip_pool -} - -resource "openstack_networking_network_v2" "local-network" { - name = "local-network" - admin_state_up = "true" -} - -resource "openstack_networking_subnet_v2" "local-network-subnet" { - name = "local-network-subnet" - network_id = openstack_networking_network_v2.local-network.id - cidr = "192.168.0.0/24" -} - -resource "openstack_networking_router_v2" "local-router" { - name = "local-router" - admin_state_up = true - external_network_id = data.openstack_networking_network_v2.public-network.id -} - -resource "openstack_networking_router_interface_v2" "router_interface_1" { - router_id = openstack_networking_router_v2.local-router.id - subnet_id = openstack_networking_subnet_v2.local-network-subnet.id -} diff --git a/staging1/terraform/network.tf b/staging1/terraform/network.tf new file mode 120000 index 0000000..2e79cd9 --- /dev/null +++ b/staging1/terraform/network.tf @@ -0,0 +1 @@ +../../common/terraform/network.tf \ No newline at end of file -- GitLab