Skip to content
Snippets Groups Projects
Commit 1e7a6dae authored by František Dvořák's avatar František Dvořák
Browse files

Rename EOSC deployments directories

parent 406db392
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 547 deletions
#! /bin/bash -xe
#
# Example to setup new site with 3rd party nodes
#
cd terraform && terraform init && terraform apply
cd -
cp -pv terraform/inventory.yaml inventory/1-cesnet.yaml
# dynamic DNS
ip="$(head -n 1 <terraform/fip.txt)"
shellstate=$(shopt -po xtrace)
set +o xtrace
# https://nsupdate.fedcloud.eu
vault_prefix=secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-dev
FEDCLOUD_DYNAMIC_DNS=$(vault read -field data $vault_prefix/FEDCLOUD_DYNAMIC_DNS | grep ^map | head -n 1 | sed 's/map\[\(.*\)\]/\1/')
for auth in $FEDCLOUD_DYNAMIC_DNS; do
echo "curl -i -X GET -u $(echo "$auth" | cut -d: -f1):XXX https://nsupdate.fedcloud.eu/nic/update?myip=$ip"
curl -i -X GET -u "$auth" https://nsupdate.fedcloud.eu/nic/update?myip="$ip"
done
eval "$shellstate"
echo "Terraform finished. Continue? (CTRL-C to quit)"
read -r _
# wait for ping and ssh
while read -r ip; do
while ! ping -c 1 "$ip"; do sleep 5; done
ssh-keygen -R "$ip"
while ! ssh egi@"$ip" -o ConnectTimeout=10 -o PreferredAuthentications=publickey -o StrictHostKeyChecking=no :; do sleep 10; done
done <terraform/hosts.txt
# check ssh access
ansible -m command -a 'uname -a' allnodes
# wait cloud-init
ansible -m shell -a 'while ! test -f /var/lib/cloud/instance/boot-finished; do sleep 2; done' allnodes
# setup volumes
ansible -m copy -a 'src=terraform/nfs-volume.sh dest=/root/ mode=preserve' nfs
ansible -m command -a '/root/nfs-volume.sh' nfs
ansible -m copy -a 'src=terraform/squid-volume.sh dest=/root/ mode=preserve' 'ingress[0]'
ansible -m command -a '/root/squid-volume.sh' 'ingress[0]'
# k8s + notebooks
ansible-playbook playbooks/k8s.yaml
# ansible-playbook playbooks/squid.yaml
# ansible-playbook playbooks/cvmfs.yaml
while ansible -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -Ev ' (Running|Completed) '; do sleep 5; done
# docker runtime directory after Kubernetes deployment (problem with unmounts)
ansible -m copy -a 'src=terraform/docker-volume.sh dest=/root/ mode=preserve' 'ingress nfs worker gpu'
ansible -m command -a '/root/docker-volume.sh' 'ingress nfs worker gpu'
---
fip:
hosts:
147.251.124.130:
master:
hosts:
2001:718:801:432:f816:3eff:feba:7a74:
# must be IPv4 address or hostname
kube_server: 192.168.0.209
ingress:
hosts:
2001:718:801:432:f816:3eff:fe39:7b21:
nfs:
hosts:
2001:718:801:432:f816:3eff:fee1:90e3:
worker:
hosts:
2001:718:801:432:f816:3eff:fe9b:fffe:
2001:718:801:432:f816:3eff:fe04:2d78:
# using public IP of kube_server for ansible delegate_to
kube_server:
hosts:
192.168.0.209:
ansible_host: 2001:718:801:432:f816:3eff:feba:7a74
---
allnodes:
children:
master:
ingress:
nfs:
worker:
gpu:
all:
vars:
ansible_become: true
ansible_user: egi
site_name: cesnet-mcc
vault_mount_point: secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-dev
gateway_hostname: gateway-cesnet.eosc.zcu.cz
grafana_hostname: grafana-cesnet.eosc.zcu.cz
---
- name: Enterprise Gateway Deployment
hosts: master
become: true
vars:
namespace: gateway
version: 3.2.2
tasks:
- name: Get Secrets from Vault for gateway
vars:
name: cesnet-mcc
set_fact:
secrets: "{{ lookup('community.hashi_vault.hashi_vault', (vault_mount_point, 'gateway-' + name) | join('/'), token_validate=false) }}"
- name: Enterprise Gateway Configuration
copy:
dest: /tmp/gateway.yaml
mode: 0640
content: |
authToken: "{{ secrets['authtoken'] }}"
global:
rbac: true
deployment:
replicas: 1
# serviceAccountName: 'enterprise-gateway-sa'
terminationGracePeriodSeconds: 60
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: "nginx"
kubernetes.io/tls-acme: "true"
hostName: "{{ gateway_hostname }}"
tls:
- hosts:
- "{{ gateway_hostname }}"
secretName: acme-tls-gateway
kernel:
# disabled: Spark, not installing Spark on gatewy clusters
allowedKernels:
- r_kubernetes
- python_kubernetes
# missing version 3.2.2
# - python_tf_kubernetes
# missing version 3.2.2
# - python_tf_gpu_kubernetes
- scala_kubernetes
kernelspecs:
image: valtri/eosc-notebooks-kernelspecs:latest
imagePullPolicy: Always
kip:
enabled: true
# serviceAccountName: 'kernel-image-puller-sa'
# podSecurityPolicy:
# create: true
service:
type: "ClusterIP"
ports:
# Enterprise Gateway reqursts
- name: http
port: 8888
targetPort: 8888
# Kernel connection info responses
- name: http-reponse
port: 8887
targetPort: 8887
- name: Enterprise Gateway Download
get_url:
url: "https://github.com/jupyter-server/enterprise_gateway/releases/download/v{{ version }}/jupyter_enterprise_gateway_helm-{{ version }}.tar.gz"
dest: "/tmp/jupyter_enterprise_gateway_helm-{{ version }}.tar.gz"
mode: 0644
- name: Enterprise Gateway Helm
vars:
config: >-
--namespace {{ namespace }}
--kube-context kubernetes-admin@kubernetes
-f /tmp/gateway.yaml
enterprise-gateway
/tmp/jupyter_enterprise_gateway_helm-{{ version }}.tar.gz
shell: |-
helm status --namespace {{ namespace }} enterprise-gateway
if [ $? -ne 0 ]; then
kubectl create namespace {{ namespace }} || :
helm install {{ config }}
else
helm upgrade {{ config }}
fi
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
PATH: /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
changed_when: true
when: true
# These need to be defined for things to work
ip_pool = "public-muni-147-251-124-GROUP"
net_name = "group-project-network"
net6_name = "public-muni-v6-432"
site_name = "cesnet-mcc"
# These may need some adjustment for your provider
master_flavor_name = "standard.medium"
worker_flavor_name = "standard.large"
gpu_flavor_name = "a3.32core-240ram-1t4"
# Number of extra workers
extra_workers = 2
# Number of GPU workers
gpu_workers = 0
# volumes for docker
docker_volumes_size = 384
# NFS volume
nfs_volume_size = 256
# scratch volume
scratch_volumes_size = 128
# squid volume
squid_volume_size = 128
locals {
nodes = concat([
openstack_compute_instance_v2.ingress,
openstack_compute_instance_v2.nfs,
], openstack_compute_instance_v2.worker[*], openstack_compute_instance_v2.gpu[*])
master_ip = replace(openstack_compute_instance_v2.master.network[1].fixed_ip_v6, "/\\[(.*)\\]/", "$1")
ingress_ip = replace(openstack_compute_instance_v2.ingress.network[1].fixed_ip_v6, "/\\[(.*)\\]/", "$1")
nfs_ip = replace(openstack_compute_instance_v2.nfs.network[1].fixed_ip_v6, "/\\[(.*)\\]/", "$1")
worker_ips = [for s in openstack_compute_instance_v2.worker[*].network[1].fixed_ip_v6 : replace(s, "/\\[(.*)\\]/", "$1")]
gpu_ips = [for s in openstack_compute_instance_v2.gpu[*].network[1].fixed_ip_v6 : replace(s, "/\\[(.*)\\]/", "$1")]
}
# Security groups
resource "openstack_compute_secgroup_v2" "ping" {
name = "ping"
description = "ICMP for ping"
rule {
from_port = 8
to_port = 0
ip_protocol = "icmp"
cidr = "0.0.0.0/0"
}
rule {
from_port = 128
to_port = 0
ip_protocol = "ipv6-icmp"
cidr = "::/0"
# initial installation (bug in terraform): ip_protocol = "icmp"
}
}
resource "openstack_compute_secgroup_v2" "ssh" {
name = "ssh"
description = "ssh connection"
rule {
from_port = 22
to_port = 22
ip_protocol = "tcp"
cidr = "0.0.0.0/0"
}
rule {
from_port = 22
to_port = 22
ip_protocol = "tcp"
cidr = "::/0"
}
}
resource "openstack_compute_secgroup_v2" "http" {
name = "http"
description = "http/https"
rule {
from_port = 80
to_port = 80
ip_protocol = "tcp"
cidr = "0.0.0.0/0"
}
rule {
from_port = 80
to_port = 80
ip_protocol = "tcp"
cidr = "::/0"
}
rule {
from_port = 443
to_port = 443
ip_protocol = "tcp"
cidr = "0.0.0.0/0"
}
rule {
from_port = 443
to_port = 443
ip_protocol = "tcp"
cidr = "::/0"
}
}
resource "openstack_networking_floatingip_v2" "public_ip" {
pool = var.ip_pool
}
data "openstack_images_image_v2" "ubuntu" {
name = "ubuntu-jammy-x86_64"
}
data "openstack_compute_flavor_v2" "master-flavor" {
name = var.master_flavor_name
}
data "openstack_compute_flavor_v2" "worker-flavor" {
name = var.worker_flavor_name
}
data "openstack_compute_flavor_v2" "gpu-flavor" {
name = var.gpu_flavor_name
}
resource "openstack_compute_instance_v2" "master" {
name = "k8s-${var.site_name}-master"
image_id = data.openstack_images_image_v2.ubuntu.id
flavor_id = data.openstack_compute_flavor_v2.master-flavor.id
security_groups = ["default", "all"]
user_data = file("cloud-init.yaml")
tags = ["master"]
network {
name = var.net_name
}
network {
name = var.net6_name
}
}
resource "openstack_compute_instance_v2" "nfs" {
name = "k8s-${var.site_name}-nfs"
image_id = data.openstack_images_image_v2.ubuntu.id
flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id
security_groups = ["default", openstack_compute_secgroup_v2.ping.name, openstack_compute_secgroup_v2.ssh.name]
user_data = file("cloud-init.yaml")
tags = ["worker"]
network {
name = var.net_name
}
network {
name = var.net6_name
}
}
resource "openstack_compute_instance_v2" "ingress" {
name = "k8s-${var.site_name}-w-ingress"
image_id = data.openstack_images_image_v2.ubuntu.id
flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id
security_groups = ["default", openstack_compute_secgroup_v2.ping.name, openstack_compute_secgroup_v2.ssh.name, openstack_compute_secgroup_v2.http.name]
user_data = file("cloud-init.yaml")
tags = ["worker"]
network {
name = var.net_name
}
network {
name = var.net6_name
}
}
resource "openstack_compute_instance_v2" "worker" {
count = var.extra_workers
name = "k8s-${var.site_name}-worker-${count.index}"
image_id = data.openstack_images_image_v2.ubuntu.id
flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id
security_groups = ["default", openstack_compute_secgroup_v2.ping.name, openstack_compute_secgroup_v2.ssh.name]
user_data = file("cloud-init.yaml")
tags = ["worker"]
network {
name = var.net_name
}
network {
name = var.net6_name
}
}
resource "openstack_compute_instance_v2" "gpu" {
count = var.gpu_workers
name = "k8s-${var.site_name}-gpu-${count.index}"
image_id = data.openstack_images_image_v2.ubuntu.id
flavor_id = data.openstack_compute_flavor_v2.gpu-flavor.id
security_groups = ["default", openstack_compute_secgroup_v2.ping.name, openstack_compute_secgroup_v2.ssh.name]
user_data = file("cloud-init.yaml")
tags = ["worker"]
network {
name = var.net_name
}
network {
name = var.net6_name
}
}
resource "openstack_compute_floatingip_associate_v2" "fip" {
floating_ip = openstack_networking_floatingip_v2.public_ip.address
instance_id = openstack_compute_instance_v2.ingress.id
}
resource "openstack_blockstorage_volume_v3" "nfs-volume" {
name = "nfs"
size = var.nfs_volume_size
}
resource "openstack_compute_volume_attach_v2" "nfs-volume-attach" {
instance_id = openstack_compute_instance_v2.nfs.id
volume_id = openstack_blockstorage_volume_v3.nfs-volume.id
}
resource "local_file" "volume-script" {
filename = "nfs-volume.sh"
file_permission = "0755"
content = <<EOT
#! /bin/bash -xe
if ! dpkg-query -s xfsprogs >/dev/null 2>&1; then
apt-get update
apt-get install -y xfsprogs
fi
device="${openstack_compute_volume_attach_v2.nfs-volume-attach.device}"
mkfs.xfs -L NFS "$device" || true
grep -q 'LABEL=NFS' /etc/fstab || /bin/echo -e "LABEL=NFS\t/exports\txfs\tdefaults,uquota,pquota\t0\t0" | tee -a /etc/fstab
mkdir /exports 2>/dev/null || true
mount -a
EOT
}
resource "openstack_blockstorage_volume_v3" "docker-volume" {
count = var.extra_workers + var.gpu_workers + 2
name = format("docker-%s", local.nodes[count.index].name)
size = var.docker_volumes_size
}
resource "openstack_compute_volume_attach_v2" "docker-volume-attach" {
count = var.extra_workers + var.gpu_workers + 2
instance_id = local.nodes[count.index].id
volume_id = openstack_blockstorage_volume_v3.docker-volume[count.index].id
}
resource "local_file" "docker-volume-script" {
filename = "docker-volume.sh"
file_permission = "0755"
content = <<EOT
#! /bin/bash -xe
volumes="${join("\n", [for n, d in zipmap(tolist(local.nodes[*].name), tolist(openstack_compute_volume_attach_v2.docker-volume-attach[*].device)) : format("%s:%s", n, d)])}"
volume=$(echo "$volumes" | grep "$(hostname):")
device=$(echo "$volume" | cut -d: -f2)
if ! dumpe2fs -h "$device" >/dev/null 2>&1; then
mkfs.ext4 -L DOCKER "$device"
grep -q 'LABEL=DOCKER' /etc/fstab || /bin/echo -e "LABEL=DOCKER\t/var/lib/docker/overlay2\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab
mkdir -p /var/lib/docker/overlay2 2>/dev/null || true
systemctl stop docker kubelet >/dev/null 2>&1 || true
sleep 10
systemctl stop docker kubelet >/dev/null 2>&1 || true
umount /var/lib/docker/overlay2 2>&1 || true
mount "$device" /mnt
mv /var/lib/docker/overlay2/* /mnt >/dev/null 2>&1 || true
umount /mnt
mount -a
systemctl start docker kubelet >/dev/null 2>&1 || true
fi
EOT
}
resource "openstack_blockstorage_volume_v3" "squid-volume" {
name = "squid"
size = var.squid_volume_size
}
resource "openstack_compute_volume_attach_v2" "squid-volume-attach" {
instance_id = openstack_compute_instance_v2.ingress.id
volume_id = openstack_blockstorage_volume_v3.squid-volume.id
}
resource "local_file" "squid-volume-script" {
filename = "squid-volume.sh"
file_permission = "0755"
content = <<EOT
#! /bin/bash -xe
device="${openstack_compute_volume_attach_v2.squid-volume-attach.device}"
if ! dumpe2fs -h "$device" >/dev/null 2>&1; then
mkfs.ext4 -L SQUID "$device"
fi
grep -q 'LABEL=SQUID' /etc/fstab || /bin/echo -e "LABEL=SQUID\t/var/spool/squid\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab
mkdir /var/spool/squid 2>/dev/null || true
mount -a
EOT
}
resource "local_file" "inventory" {
filename = "inventory.yaml"
file_permission = "0644"
content = <<EOT
---
fip:
hosts:
${openstack_networking_floatingip_v2.public_ip.address}:
master:
hosts:
${local.master_ip}:
# must be IPv4 address or hostname
kube_server: ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4}
ingress:
hosts:
${local.ingress_ip}:
nfs:
hosts:
${local.nfs_ip}:
worker:
hosts:
${join("\n ", [for s in local.worker_ips : "${s}:"])}
gpu:
hosts:
${join("\n ", [for s in local.gpu_ips : "${s}:"])}
# using public IP of kube_server for ansible delegate_to
kube_server:
hosts:
${openstack_compute_instance_v2.master.network[0].fixed_ip_v4}:
ansible_host: ${local.master_ip}
EOT
}
resource "local_file" "fip" {
filename = "fip.txt"
file_permission = "0644"
content = <<EOT
${openstack_networking_floatingip_v2.public_ip.address}
EOT
}
resource "local_file" "hosts" {
filename = "hosts.txt"
file_permission = "0644"
content = <<EOT
${local.master_ip}
${local.ingress_ip}
${local.nfs_ip}
${join("\n", concat(local.worker_ips, local.gpu_ips))}
EOT
}
File moved
File moved
File moved
File moved
File moved
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment