diff --git a/common/deployments/hub-production.yaml b/common/deployments/hub-production.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af64deb3ae5f58c06d12769aea8c044dfd7989ec --- /dev/null +++ b/common/deployments/hub-production.yaml @@ -0,0 +1,327 @@ +--- +proxy: + service: + type: NodePort + +ingress: + enabled: true + annotations: + kubernetes.io/ingress.class: "nginx" + kubernetes.io/tls-acme: "true" + hosts: + - "{{ notebooks_hostname }}" + tls: + - hosts: + - "{{ notebooks_hostname }}" + secretName: acme-tls-hub + +singleuser: + # keep resource limits in sync with: + # - profileList + storage: + type: none + extraVolumes: + - name: cvmfs-host + hostPath: + path: /cvmfs + type: Directory + - name: owncloud-home + empty_dir: + # - name: scratch + # ephemeral: + # volumeClaimTemplate: + # spec: + # accessModes: [ "ReadWriteOnce" ] + # storageClassName: local-path + # resources: + # requests: + # storage: "10Gi" + extraVolumeMounts: + - name: cvmfs-host + mountPath: "/cvmfs:shared" + - name: owncloud-home + mountPath: '/home/jovyan:shared' + # - name: scratch + # mountPath: '/scratch' + memory: + limit: 4G + guarantee: 512M + cpu: + limit: 2 + guarantee: .2 + defaultUrl: "/lab" + image: + name: eginotebooks/single-user + tag: "sha-6d48e61" + profileList: + - display_name: Small Environment - 2 vCPU / 4 GB RAM + description: > + The notebook environment includes Python, R, Julia and Octave kernels. + default: true + kubespawner_override: + args: + - "--CondaKernelSpecManager.env_filter='/opt/conda$'" + extra_annotations: + "egi.eu/flavor": "small-environment-2-vcpu-4-gb-ram" + vo_claims: + - urn:geant:eosc-federation.eu:res:notebooks.open-science-cloud.ec.europa.eu:2-vcpu-4-gb-ram:act:ppa + - display_name: Medium Environment - 4 vCPU / 8 GB RAM + description: > + The notebook environment includes Python, R, Julia and Octave kernels. + kubespawner_override: + args: + - "--CondaKernelSpecManager.env_filter='/opt/conda$'" + extra_annotations: + "egi.eu/flavor": "medium-environment-4-vcpu-8-gb-ram" + cpu_guarantee: 0.4 + cpu_limit: 4 + mem_guarantee: 1G + mem_limit: 8G + vo_claims: + - urn:geant:eosc-federation.eu:res:notebooks.open-science-cloud.ec.europa.eu:4-vcpu-8-gb-ram:act:ppa + - display_name: Large Environment - 8 vCPU / 16 GB RAM / GPU + description: > + The notebook environment includes Python, R, Julia and Octave kernels with GPU. + kubespawner_override: + args: + - "--CondaKernelSpecManager.env_filter='/opt/conda$'" + cpu_guarantee: 0.8 + cpu_limit: 8 + mem_guarantee: 2G + mem_limit: 16G + extra_annotations: + "egi.eu/flavor": "large-environment-8-vcpu-16-gb-ram-gpu" + extra_resource_guarantees: + nvidia.com/gpu: 1 + extra_resource_limits: + nvidia.com/gpu: 1 + vo_claims: + - urn:geant:eosc-federation.eu:res:notebooks.open-science-cloud.ec.europa.eu:8-vcpu-16-gb-ram-gpu:act:ppa + cmd: jupyterhub-singleuser-webdav-wrapper + extraFiles: + wait-remote-home.sh: + mode: 0755 + mountPath: /usr/local/bin/jupyterhub-wait-remote-home + stringData: |- + #! /bin/sh + i=0 + while ! grep '^webdav-fs: /home/jovyan ' /proc/mounts && test $i -lt 30; do + echo 'Waiting for ownClound mount...' + sleep 0.5 + i=$((i+1)) + done + singleuser-webdav-wrapper.sh: + mode: 0755 + mountPath: /usr/local/bin/jupyterhub-singleuser-webdav-wrapper + stringData: |- + #! /bin/sh + # + # Dirty hack to make remote mount on home directory working properly: + # + # 1) wait for webdav sidecar image to kick in + # 2) change directory to the mounted version of itself + # 3) launch notebook server + # + /usr/local/bin/jupyterhub-wait-remote-home + + cd . + + exec jupyterhub-singleuser \ + --FileCheckpoints.checkpoint_dir='/home/jovyan/.notebookCheckpoints' \ + --NotebookNotary.db_file=':memory:' \ + "$@" + disable-labnews: + mode: 0644 + mountPath: /opt/conda/share/jupyter/lab/settings/overrides.json + stringData: |- + { + "@jupyterlab/apputils-extension:notification": { + "checkForUpdates": "false", + "fetchNews": "false" + }, + "@jupyterlab/application-extension:context-menu": { + "contextMenu": [ + { + "command": "filebrowser:share-main", + "selector": ".jp-DirListing-item[data-isdir]", + "disabled": true + } + ] + } + } + +hub: + services: + status: + url: "http://status-web/" + admin: true + jwt: + url: "http://jwt/" + display: false + # recommended to keep in sync with common/playbooks/files/jupyterhub-jwt.yaml + image: + name: eginotebooks/hub + tag: "sha-ae769eb" + config: + Authenticator: + enable_auth_state: true + admin_users: + # valtri@civ.zcu.cz + - 94d3cde7-3121-4b33-b4c2-526c67e8cb38@eosc-federation.eu + allowed_groups: + - urn:geant:eosc-federation.eu:res:notebooks.open-science-cloud.ec.europa.eu:2-vcpu-4-gb-ram:act:ppa + - urn:geant:eosc-federation.eu:res:notebooks.open-science-cloud.ec.europa.eu:4-vcpu-8-gb-ram:act:ppa + - urn:geant:eosc-federation.eu:res:notebooks.open-science-cloud.ec.europa.eu:8-vcpu-16-gb-ram-gpu:act:ppa + admin_groups: + - urn:geant:eosc-federation.eu:group:asg:notebooks.open-science-cloud.ec.europa.eu:role=admin + claim_groups_key: "entitlements" + EGICheckinAuthenticator: + checkin_host: "{{ secret['checkin_host'] }}" + authorize_url: "https://{{ secret['checkin_host'] }}/OIDC/authorization" + token_url: "https://{{ secret['checkin_host'] }}/OIDC/token" + userdata_url: "https://{{ secret['checkin_host'] }}/OIDC/userinfo" + client_id: "{{ secret['client_id'] }}" + client_secret: "{{ secret['client_secret'] }}" + oauth_callback_url: "https://{{ notebooks_hostname }}/hub/oauth_callback" + openid_configuration_url: "https://proxy.testing.eosc-federation.eu/.well-known/openid-configuration" + scope: ["openid", "profile", "email", "offline_access", "entitlements"] + username_claim: "sub" + extra_authorize_params: + prompt: consent + JupyterHub: + admin_access: true + authenticate_prometheus: false + authenticator_class: egi_notebooks_hub.egiauthenticator.EOSCNodeAuthenticator + # spawner_class: (in egi-notebooks-b2drop) + LabApp: + check_for_updates_class: jupyterlab.NeverCheckForUpdate + extraConfig: + egi-notebooks-welcome: |- + from egi_notebooks_hub.welcome import WelcomeHandler + c.JupyterHub.default_url = "/welcome" + c.JupyterHub.extra_handlers = [(r'/welcome', WelcomeHandler)] + egi-notebooks-b2drop: |- +{%- raw %} + import json + from egi_notebooks_hub.onedata import OnedataSpawner + from tornado.httpclient import AsyncHTTPClient, HTTPClientError, HTTPRequest + + class WebDavOIDCSpawner(OnedataSpawner): + # ownCloud Infinite Scale parameters + # (https://owncloud.dev/apis/http/graph/spaces/#list-my-spaces-get-medrives) + OCIS_URL = "https://drive.open-science-cloud.ec.europa.eu" + # personal space + OCIS_PERSONAL_SPACE = "/graph/v1.0/me/drives?%24filter=driveType+eq+personal" + # shared space + OCIS_SHARED_WITH_ME = "/graph/v1.0/me/drives?%24filter=driveType+eq+virtual" + # otter spaces + OCIS_SPACES = "/graph/v1.0/me/drives?%24filter=driveType+eq+project" + + async def append_owncloud_sidecar(self, spawner, type, query, fallback_url=None, headers={}): + owncloud_url = fallback_url + http_client = AsyncHTTPClient() + req = HTTPRequest( + self.OCIS_URL + query, + headers=headers, + method="GET", + ) + try: + resp = await http_client.fetch(req) + body = json.loads(resp.body.decode("utf8", "replace")) + self.log.debug("OCIS response: %s", body) + if "value" in body: + ocis_infos = body["value"] + if len(ocis_infos) >= 1 and "root" in ocis_infos[0]: + owncloud_url = ocis_infos[0]["root"].get("webDavUrl", None) + except HTTPClientError as e: + self.log.error("can't query ownCloud: %s", e) + self.log.info("ownCloud %s URL: %s", type, owncloud_url) + + if owncloud_url is None: + return + + if type == "home": + subpath = "" + else: + subpath = "/" + type.capitalize() + env = [ + {"name": "WEBDAV_URL", "value": owncloud_url}, + {"name": "WEBDAV_VENDOR", "value": "owncloud"}, + # XXX: strict permissions needed for .local/share/jupyter/runtime/jupyter_cookie_secret + # quicker directory cache and polling + {"name": "MOUNT_OPTS", "value": "--file-perms=0600 --dir-perms=0770 --dir-cache-time=1m0s --poll-interval=0m20s"}, + {"name": "MOUNT_PATH", "value": "/owncloud" + subpath}, + # default mode is "full" + {"name": "VFS_CACHE_MODE", "value": "full"}, + ] + if type != "home": + env.append({"name": "MOUNT_WAIT_POINT", "value": "webdav-fs: /owncloud fuse.rclone"}) + volume_mounts = [ + {"mountPath": "/owncloud:shared", "name": "owncloud-home"}, + {"mountPath": self.token_mount_path, "name": self.token_secret_volume_name, "readOnly": True}, + ] + spawner.extra_containers.append( + { + "name": "owncloud-" + type, + "image": "eginotebooks/webdav-rclone-sidecar:sha-0a62679", + "args": ["bearer_token_command=cat " + self.token_path], + "env": env, + "resources": self.sidecar_resources, + "securityContext": { + "runAsUser": 1000, + "fsUser": 1000, + "fsGroup": 100, + "privileged": True, + "capabilities": {"add": ["SYS_ADMIN"]}, + }, + "volumeMounts": volume_mounts, + } + ) + + async def pre_spawn_hook(self, spawner): + await super(WebDavOIDCSpawner, self).pre_spawn_hook(spawner) + auth_state = await self.user.get_auth_state() + # volume name as in EGI spawner + self.token_secret_volume_name = self._expand_user_properties( + self.token_secret_volume_name_template + ) + self.token_path = os.path.join(self.token_mount_path, "access_token") + + if auth_state: + access_token = auth_state.get("access_token", None) + headers = { + "Accept": "application/json", + "User-Agent": "JupyterHub", + "Authorization": "Bearer %s" % access_token, + } + + await self.append_owncloud_sidecar(spawner, "home", self.OCIS_PERSONAL_SPACE, headers=headers) + await self.append_owncloud_sidecar(spawner, "shares", self.OCIS_SHARED_WITH_ME, headers=headers) + await self.append_owncloud_sidecar(spawner, "spaces", self.OCIS_SPACES, headers=headers) + else: + self.log.info("No auth state, skipping ownCloud") + + c.JupyterHub.spawner_class = WebDavOIDCSpawner + c.WebDavOIDCSpawner.token_mount_path = "/var/run/secrets/oidc/" + c.WebDavOIDCSpawner.http_timeout = 90 +{% endraw %} + extraFiles: + welcome.html: + mountPath: /usr/local/share/jupyterhub/templates/welcome.html + stringData: |- +{%- raw %} + {% extends "login.html" %} +{% endraw %} + 403.html: + mountPath: /usr/local/share/jupyterhub/templates/403.html + stringData: |- +{%- raw %} + {% extends "error.html" %} + {% block main %} + <div class="error"> + <h1>Unauthorized</h1> + <p>You don't have the correct entitlements to access this service.</p> + <p>If you think you should be granted access, please open an issue!</p> + </div> + {% endblock %} +{% endraw %} diff --git a/production2/ansible.cfg b/production2/ansible.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c3a73bec9aa17dbdd27c77947d8813866f7036e6 --- /dev/null +++ b/production2/ansible.cfg @@ -0,0 +1,5 @@ +[defaults] +inventory=inventory + +[diff] +always=true diff --git a/production2/deploy.sh b/production2/deploy.sh new file mode 100755 index 0000000000000000000000000000000000000000..c64fa1c8c12f32002e0649843a6ffbb6fcb00b62 --- /dev/null +++ b/production2/deploy.sh @@ -0,0 +1,55 @@ +#! /bin/bash -xe + +# +# Deploy EOSC production instance +# + +cd terraform && terraform init && terraform apply +cd - +cp -pv terraform/inventory.yaml inventory/1-safespring.yaml + +# dynamic DNS +ip="$(head -n 1 < terraform/fip.txt)" +# shellstate=$(shopt -po xtrace) +# set +o xtrace +# # https://nsupdate.fedcloud.eu +# vault_prefix=secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-production +# FEDCLOUD_DYNAMIC_DNS=$(vault read -field data $vault_prefix/FEDCLOUD_DYNAMIC_DNS | grep ^map | head -n 1 | sed 's/map\[\(.*\)\]/\1/') +# for auth in $FEDCLOUD_DYNAMIC_DNS; do +# echo "curl -i -X GET -u $(echo "$auth" | cut -d: -f1):XXX https://nsupdate.fedcloud.eu/nic/update?myip=$ip" +# curl -i -X GET -u "$auth" https://nsupdate.fedcloud.eu/nic/update?myip="$ip" +# done +# eval "$shellstate" +echo "Terraform finished. Check terraform/docker-volume.sh. Continue? (CTRL-C to quit)" +read -r _ + +# wait for ping and ssh +for ip in $(cat terraform/hosts.txt); do + while ! ping -c 1 "$ip"; do sleep 5; done + ssh-keygen -R "$ip" + while ! ssh egi@"$ip" -o ConnectTimeout=10 -o PreferredAuthentications=publickey -o StrictHostKeyChecking=no :; do sleep 10; done +done + +# check ssh access +ansible -m command -a 'uname -a' allnodes + +# wait cloud-init +ansible -m shell -a 'while ! test -f /var/lib/cloud/instance/boot-finished; do sleep 2; done' allnodes + +# setup volumes +ansible -m copy -a 'src=terraform/nfs-volume.sh dest=/root/ mode=preserve' nfs +ansible -m command -a '/root/nfs-volume.sh' nfs +ansible -m copy -a 'src=terraform/squid-volume.sh dest=/root/ mode=preserve' 'ingress[0]' +ansible -m command -a '/root/squid-volume.sh' 'ingress[0]' + +# kubernetes +ansible-playbook playbooks/k8s.yaml +while ansible -i ./inventory -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -v ' Running '; do sleep 5; done +# docker runtime directory after Kubernetes deployment (problem with unmounts) +ansible -m copy -a 'src=terraform/docker-volume.sh dest=/root/ mode=preserve' 'ingress nfs worker gpu' +ansible -m command -a '/root/docker-volume.sh' 'ingress nfs worker gpu' +ansible-playbook playbooks/squid.yaml +ansible-playbook playbooks/cvmfs.yaml + +# wait for finish +while ansible -m command -a 'kubectl get pods --all-namespaces' master | tail -n +3 | grep -v ' Running '; do sleep 5; done diff --git a/production2/deployments/hub.yaml b/production2/deployments/hub.yaml new file mode 120000 index 0000000000000000000000000000000000000000..637bf5da5adcb06f961d1fd51c04bc983560bb60 --- /dev/null +++ b/production2/deployments/hub.yaml @@ -0,0 +1 @@ +../../common/deployments/hub-production.yaml \ No newline at end of file diff --git a/production2/extra b/production2/extra new file mode 120000 index 0000000000000000000000000000000000000000..440decb513324d2db7f1c9cd642408b22e72fb64 --- /dev/null +++ b/production2/extra @@ -0,0 +1 @@ +../common/extra \ No newline at end of file diff --git a/production2/inventory/1-safespring.yaml b/production2/inventory/1-safespring.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c400cbacbc70fa550b2bf0a5befc16d59ea41324 --- /dev/null +++ b/production2/inventory/1-safespring.yaml @@ -0,0 +1,38 @@ +--- +fip: + hosts: + 89.47.190.171 + +master: + hosts: + 2001:6b0:7d:40::5e: + # must be IPv4 address or hostname + kube_server: 89.47.191.144 + +ingress: + hosts: + 2001:6b0:7d:40::1bd: + +nfs: + hosts: + 2001:6b0:7d:40::274: + +worker: + hosts: + 2001:6b0:7d:40::bb: + 2001:6b0:7d:40::37e: + 2001:6b0:7d:40::276: + 2001:6b0:7d:40::273: + 2001:6b0:7d:40::246: + 2001:6b0:7d:40::13: + 2001:6b0:7d:40::1a0: + 2001:6b0:7d:40::3dc: + +gpu: + hosts: + +# using public IP of kube_server for ansible delegate_to +kube_server: + hosts: + 89.47.191.144: + ansible_host: 2001:6b0:7d:40::5e diff --git a/production2/inventory/99-all.yaml b/production2/inventory/99-all.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b32b48d2fd72aa4bc03998991dcd7a372170a256 --- /dev/null +++ b/production2/inventory/99-all.yaml @@ -0,0 +1,19 @@ +--- +allnodes: + children: + master: + ingress: + nfs: + worker: + +all: + vars: + ansible_become: yes + ansible_user: egi + + mail_local: true + site_name: safespring-production2 + vault_mount_point: secrets/users/e1662e20-e34b-468c-b0ce-d899bc878364@egi.eu/eosc-production + + notebooks_hostname: eu-2.notebooks.open-science-cloud.ec.europa.eu + grafana_hostname: grafana.eu-2.notebooks.open-science-cloud.ec.europa.eu diff --git a/production2/playbooks/cvmfs.yaml b/production2/playbooks/cvmfs.yaml new file mode 120000 index 0000000000000000000000000000000000000000..2e82cca6c387556209aad5ff66ea41eba8f28082 --- /dev/null +++ b/production2/playbooks/cvmfs.yaml @@ -0,0 +1 @@ +../../common/playbooks/cvmfs.yaml \ No newline at end of file diff --git a/production2/playbooks/files/calico.yaml b/production2/playbooks/files/calico.yaml new file mode 120000 index 0000000000000000000000000000000000000000..732c864b90be9b3f8aaefe227ad0da7a7685b763 --- /dev/null +++ b/production2/playbooks/files/calico.yaml @@ -0,0 +1 @@ +../../../common/playbooks/files/calico.yaml \ No newline at end of file diff --git a/production2/playbooks/files/etc b/production2/playbooks/files/etc new file mode 120000 index 0000000000000000000000000000000000000000..ed53b8742792e16bb4bae2ed49d02c79d79de146 --- /dev/null +++ b/production2/playbooks/files/etc @@ -0,0 +1 @@ +../../../common/playbooks/files/etc \ No newline at end of file diff --git a/production2/playbooks/files/jupyterhub-jwt.yaml b/production2/playbooks/files/jupyterhub-jwt.yaml new file mode 120000 index 0000000000000000000000000000000000000000..59f9ac23b488d899df25d4ea582405ec6998d793 --- /dev/null +++ b/production2/playbooks/files/jupyterhub-jwt.yaml @@ -0,0 +1 @@ +../../../common/playbooks/files/jupyterhub-jwt.yaml \ No newline at end of file diff --git a/production2/playbooks/files/usr b/production2/playbooks/files/usr new file mode 120000 index 0000000000000000000000000000000000000000..b034223ec617fbae8cd736f29cf1c61394fa7ebd --- /dev/null +++ b/production2/playbooks/files/usr @@ -0,0 +1 @@ +../../../common/playbooks/files/usr \ No newline at end of file diff --git a/production2/playbooks/k8s.yaml b/production2/playbooks/k8s.yaml new file mode 120000 index 0000000000000000000000000000000000000000..117aed694c9e6d26908aec62da2dc03cdbbacc7d --- /dev/null +++ b/production2/playbooks/k8s.yaml @@ -0,0 +1 @@ +../../common/playbooks/k8s.yaml \ No newline at end of file diff --git a/production2/playbooks/notebooks.yaml b/production2/playbooks/notebooks.yaml new file mode 120000 index 0000000000000000000000000000000000000000..3f1a33f7bdd6b5a2381afcd25e41f8a051fde965 --- /dev/null +++ b/production2/playbooks/notebooks.yaml @@ -0,0 +1 @@ +../../common/playbooks/notebooks.yaml \ No newline at end of file diff --git a/production2/playbooks/public_keys b/production2/playbooks/public_keys new file mode 120000 index 0000000000000000000000000000000000000000..6ef4918a9eb6aba6c6076f8e4d42570f35735d86 --- /dev/null +++ b/production2/playbooks/public_keys @@ -0,0 +1 @@ +../../common/playbooks/public_keys \ No newline at end of file diff --git a/production2/playbooks/squid.yaml b/production2/playbooks/squid.yaml new file mode 120000 index 0000000000000000000000000000000000000000..114c327c31c35f2362f124c93f50c31d9e28b589 --- /dev/null +++ b/production2/playbooks/squid.yaml @@ -0,0 +1 @@ +../../common/playbooks/squid.yaml \ No newline at end of file diff --git a/production2/playbooks/templates/etc/exports b/production2/playbooks/templates/etc/exports new file mode 120000 index 0000000000000000000000000000000000000000..3ef288e7f957972605d82cccffdb528b3992e2d7 --- /dev/null +++ b/production2/playbooks/templates/etc/exports @@ -0,0 +1 @@ +../../../../common/playbooks/templates/etc/exports.ipv46 \ No newline at end of file diff --git a/production2/playbooks/templates/etc/mailutils.conf b/production2/playbooks/templates/etc/mailutils.conf new file mode 120000 index 0000000000000000000000000000000000000000..dbd8a1f31599952201aa0ce602fc80b047c07c16 --- /dev/null +++ b/production2/playbooks/templates/etc/mailutils.conf @@ -0,0 +1 @@ +../../../../common/playbooks/templates/etc/mailutils.conf \ No newline at end of file diff --git a/production2/playbooks/templates/etc/squid b/production2/playbooks/templates/etc/squid new file mode 120000 index 0000000000000000000000000000000000000000..352b5984beddb28543e7e6e2a84e5d5152a155ec --- /dev/null +++ b/production2/playbooks/templates/etc/squid @@ -0,0 +1 @@ +../../../../common/playbooks/templates/etc/squid \ No newline at end of file diff --git a/production2/playbooks/upgrade.yaml b/production2/playbooks/upgrade.yaml new file mode 120000 index 0000000000000000000000000000000000000000..0f9e3f4182cd474bfd384ba28fb5166eaeed8110 --- /dev/null +++ b/production2/playbooks/upgrade.yaml @@ -0,0 +1 @@ +../../common/playbooks/upgrade.yaml \ No newline at end of file diff --git a/production2/terraform/.gitignore b/production2/terraform/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e15cf5f0280219a788276bcb4546e5806302376b --- /dev/null +++ b/production2/terraform/.gitignore @@ -0,0 +1,8 @@ +/.terraform/ +/.terraform.lock.hcl +/fip.txt +/hosts.txt +/inventory.yaml +/terraform.tfstate +/terraform.tfstate.backup +/*-volume.sh diff --git a/production2/terraform/cloud-init.yaml b/production2/terraform/cloud-init.yaml new file mode 120000 index 0000000000000000000000000000000000000000..58c76c65f9e66caf063015c77cc1dd75aad0583b --- /dev/null +++ b/production2/terraform/cloud-init.yaml @@ -0,0 +1 @@ +../../staging1/terraform/cloud-init.yaml \ No newline at end of file diff --git a/production2/terraform/firewall.tf b/production2/terraform/firewall.tf new file mode 120000 index 0000000000000000000000000000000000000000..0088c1251a5778555091eb81f3bf6840486531ef --- /dev/null +++ b/production2/terraform/firewall.tf @@ -0,0 +1 @@ +../../common/terraform/firewall.tf \ No newline at end of file diff --git a/production2/terraform/terraform.tfvars b/production2/terraform/terraform.tfvars new file mode 100644 index 0000000000000000000000000000000000000000..742013c76dcd5b801195eb71581e5fed8d1e6efd --- /dev/null +++ b/production2/terraform/terraform.tfvars @@ -0,0 +1,47 @@ +# These need to be defined for things to work +ip_pool = "" +net_name = "public" +net6_name = "" +site_name = "production2" + +# These may need some adjustment for your provider +master_flavor_name = "l2.c4r8.100" +worker_flavor_name = "l2.c16r32.100" +# XXX: replace this for GPU flavor, once available +gpu_flavor_name = "l2.c2r4.100" + +# Number of extra workers +extra_workers = 8 + +# Number of GPU workers +gpu_workers = 0 + +# volumes for docker +docker_volumes_size = 384 + +# NFS volume +nfs_volume_size = 256 + +# scratch volume +scratch_volumes_size = 0 + +# squid volume +squid_volume_size = 128 + +# global firewall rules - public and admin access +# (descriptions not supported @ SafeSpring) +security_public_cidr = { + # University of West Bohemia in Pilsen + "147.228.0.0/16": "", + "2001:718:1801::/48": "", + # CESNET VPN + "78.128.246.160/32": "", + "78.128.247.175/32": "", + "2001:718:ff05:acb::/64": "", + "2001:718:ff05:acc::/64": "", + "147.251.21.79/32": "", + # admin machine (OpenStack project router) + "147.228.21.79/32": "", + # admin machine + "2001:718:801:432:f816:3eff:feab:fbc8/128": "", +} diff --git a/production2/terraform/vars.tf b/production2/terraform/vars.tf new file mode 120000 index 0000000000000000000000000000000000000000..00c4e3a2893853ba74d00429d159cd321d0e7d78 --- /dev/null +++ b/production2/terraform/vars.tf @@ -0,0 +1 @@ +../../common/terraform/vars.tf \ No newline at end of file diff --git a/production2/terraform/versions.tf b/production2/terraform/versions.tf new file mode 120000 index 0000000000000000000000000000000000000000..f2cc6c580f7e5a838c570744dda896209ab837f9 --- /dev/null +++ b/production2/terraform/versions.tf @@ -0,0 +1 @@ +../../staging1/terraform/versions.tf \ No newline at end of file diff --git a/production2/terraform/vms.tf b/production2/terraform/vms.tf new file mode 100644 index 0000000000000000000000000000000000000000..c6314656be86bbec189412d06b34531e1409eea8 --- /dev/null +++ b/production2/terraform/vms.tf @@ -0,0 +1,238 @@ +locals { + nodes = concat([ + openstack_compute_instance_v2.ingress, + openstack_compute_instance_v2.nfs, + ], openstack_compute_instance_v2.worker[*], openstack_compute_instance_v2.gpu[*]) + master_ip = replace(openstack_compute_instance_v2.master.network[0].fixed_ip_v6, "/\\[(.*)\\]/", "$1") + ingress_ip = replace(openstack_compute_instance_v2.ingress.network[0].fixed_ip_v6, "/\\[(.*)\\]/", "$1") + nfs_ip = replace(openstack_compute_instance_v2.nfs.network[0].fixed_ip_v6, "/\\[(.*)\\]/", "$1") + worker_ips = [for s in openstack_compute_instance_v2.worker[*].network[0].fixed_ip_v6 : replace(s, "/\\[(.*)\\]/", "$1")] + gpu_ips = [for s in openstack_compute_instance_v2.gpu[*].network[0].fixed_ip_v6 : replace(s, "/\\[(.*)\\]/", "$1")] +} + +data "openstack_images_image_v2" "ubuntu" { + name = "ubuntu-22.04" +} + +data "openstack_compute_flavor_v2" "master-flavor" { + name = var.master_flavor_name +} + +data "openstack_compute_flavor_v2" "worker-flavor" { + name = var.worker_flavor_name +} + +data "openstack_compute_flavor_v2" "gpu-flavor" { + name = var.gpu_flavor_name +} + +resource "openstack_compute_instance_v2" "master" { + name = "k8s-${var.site_name}-master" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.master-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["master"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "nfs" { + name = "k8s-${var.site_name}-nfs" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "ingress" { + name = "k8s-${var.site_name}-w-ingress" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name, openstack_networking_secgroup_v2.http.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "worker" { + count = var.extra_workers + name = "k8s-${var.site_name}-worker-${count.index}" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.worker-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_compute_instance_v2" "gpu" { + count = var.gpu_workers + name = "k8s-${var.site_name}-gpu-${count.index}" + image_id = data.openstack_images_image_v2.ubuntu.id + flavor_id = data.openstack_compute_flavor_v2.gpu-flavor.id + security_groups = ["default", openstack_networking_secgroup_v2.ping.name, openstack_networking_secgroup_v2.ssh.name] + user_data = file("cloud-init.yaml") + tags = ["worker"] + network { + name = var.net_name + } +} + +resource "openstack_blockstorage_volume_v3" "nfs-volume" { + name = "nfs" + size = var.nfs_volume_size + volume_type = "large" +} + +resource "openstack_compute_volume_attach_v2" "nfs-volume-attach" { + instance_id = openstack_compute_instance_v2.nfs.id + volume_id = openstack_blockstorage_volume_v3.nfs-volume.id +} + +resource "local_file" "volume-script" { + filename = "nfs-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +if ! dpkg-query -s xfsprogs >/dev/null 2>&1; then + apt-get update + apt-get install -y xfsprogs +fi +device="${openstack_compute_volume_attach_v2.nfs-volume-attach.device}" +mkfs.xfs -L NFS "$device" || true +grep -q 'LABEL=NFS' /etc/fstab || /bin/echo -e "LABEL=NFS\t/exports\txfs\tdefaults,uquota,pquota\t0\t0" | tee -a /etc/fstab +mkdir /exports 2>/dev/null || true +mount -a +EOT +} + +resource "openstack_blockstorage_volume_v3" "docker-volume" { + count = var.extra_workers + var.gpu_workers + 2 + name = format("docker-%s", local.nodes[count.index].name) + size = var.docker_volumes_size +} + +resource "openstack_compute_volume_attach_v2" "docker-volume-attach" { + count = var.extra_workers + var.gpu_workers + 2 + instance_id = local.nodes[count.index].id + volume_id = openstack_blockstorage_volume_v3.docker-volume[count.index].id +} + +resource "local_file" "docker-volume-script" { + filename = "docker-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +volumes="${join("\n", [for n, d in zipmap(tolist(local.nodes[*].name), tolist(openstack_compute_volume_attach_v2.docker-volume-attach[*].device)) : format("%s:%s", n, d)])}" +volume=$(echo "$volumes" | grep "$(hostname):") +device=$(echo "$volume" | cut -d: -f2) +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L DOCKER "$device" + grep -q 'LABEL=DOCKER' /etc/fstab || /bin/echo -e "LABEL=DOCKER\t/var/lib/docker/overlay2\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab + mkdir -p /var/lib/docker/overlay2 2>/dev/null || true + systemctl stop docker kubelet >/dev/null 2>&1 || true + sleep 10 + systemctl stop docker kubelet >/dev/null 2>&1 || true + umount /var/lib/docker/overlay2 2>&1 || true + mount "$device" /mnt + mv /var/lib/docker/overlay2/* /mnt >/dev/null 2>&1 || true + umount /mnt + mount -a + systemctl start docker kubelet >/dev/null 2>&1 || true +fi +EOT +} + +resource "openstack_blockstorage_volume_v3" "squid-volume" { + name = "squid" + size = var.squid_volume_size + volume_type = "fast" +} + +resource "openstack_compute_volume_attach_v2" "squid-volume-attach" { + instance_id = openstack_compute_instance_v2.ingress.id + volume_id = openstack_blockstorage_volume_v3.squid-volume.id +} + +resource "local_file" "squid-volume-script" { + filename = "squid-volume.sh" + file_permission = "0755" + content = <<EOT +#! /bin/bash -xe +device="${openstack_compute_volume_attach_v2.squid-volume-attach.device}" +if ! dumpe2fs -h "$device" >/dev/null 2>&1; then + mkfs.ext4 -L SQUID "$device" +fi +grep -q 'LABEL=SQUID' /etc/fstab || /bin/echo -e "LABEL=SQUID\t/var/spool/squid\text4\tdefaults,x-systemd.before=local-fs.target\t0\t0" | tee -a /etc/fstab +mkdir /var/spool/squid 2>/dev/null || true +mount -a +EOT +} + +resource "local_file" "inventory" { + filename = "inventory.yaml" + file_permission = "0644" + content = <<EOT +--- +fip: + hosts: + ${openstack_compute_instance_v2.ingress.network[0].fixed_ip_v4} + +master: + hosts: + ${local.master_ip}: + # must be IPv4 address or hostname + kube_server: ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4} + +ingress: + hosts: + ${local.ingress_ip}: + +nfs: + hosts: + ${local.nfs_ip}: + +worker: + hosts: + ${join("\n ", [for s in local.worker_ips: "${s}:"])} + +gpu: + hosts: + ${join("\n ", [for s in local.gpu_ips : "${s}:"])} + +# using public IP of kube_server for ansible delegate_to +kube_server: + hosts: + ${openstack_compute_instance_v2.master.network[0].fixed_ip_v4}: + ansible_host: ${local.master_ip} +EOT +} + +resource "local_file" "fip" { + filename = "fip.txt" + file_permission = "0644" + content = <<EOT +${openstack_compute_instance_v2.ingress.network[0].fixed_ip_v4} +EOT +} + +resource "local_file" "hosts" { + filename = "hosts.txt" + file_permission = "0644" + content = <<EOT +${local.master_ip} +${local.ingress_ip} +${local.nfs_ip} +${join("\n", concat(local.worker_ips, local.gpu_ips))} +EOT +}