From d1516097ef70a54d5a43a423e59b7ba2b8bc8d86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz>
Date: Sat, 2 Jan 2021 13:48:54 +0100
Subject: [PATCH] Hadoop: create users tuning

* separated non-interactive script (password generated by default), no users from puppet
* image user update in puppet still needed
* explicit image user setup in deployment stage
* no example users
* explain in the documentation
---
 .gitignore                         |  2 +
 README.md                          |  8 ++++
 deployments/hadoop/adduser.sh.tmpl | 65 ++++++++++++++++++++++++++++++
 deployments/hadoop/krb5.conf.tmpl  | 10 +++++
 deployments/hadoop/plugin.py       | 32 ++++++++++++---
 deployments/hadoop/site.pp.tmpl    | 32 ++++-----------
 orchestrate.py                     |  2 +-
 7 files changed, 121 insertions(+), 30 deletions(-)
 create mode 100755 deployments/hadoop/adduser.sh.tmpl
 create mode 100644 deployments/hadoop/krb5.conf.tmpl

diff --git a/.gitignore b/.gitignore
index 7e8eeb4..8738465 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,9 @@
 clouds.yaml
 config.json
 inventory
+hadoop-adduser.sh
 hosts
+krb5.conf.hadoop
 public_hosts
 secrets.auto.tfvars
 site.pp
diff --git a/README.md b/README.md
index 3da0c6a..153f61d 100644
--- a/README.md
+++ b/README.md
@@ -36,3 +36,11 @@ The *launch.sh* script is doing this:
     ./orchestrate.py
 
 The orchestration script has multiple steps and dry-run option. See *./orchestrate.py --help*.
+
+3. look for the generated password of the created user for Hadoop in the output
+
+It is possible to set the new password on the master server using ('debian' is the used user name):
+
+    sudo kadmin.local cpw debian
+
+You can consider configure local Kerberos client by installing the *krb5.conf.hadoop* configuration to /etc/krb5.conf.d/ and then enable SPNEGO in web browser.
diff --git a/deployments/hadoop/adduser.sh.tmpl b/deployments/hadoop/adduser.sh.tmpl
new file mode 100755
index 0000000..2ac7a0f
--- /dev/null
+++ b/deployments/hadoop/adduser.sh.tmpl
@@ -0,0 +1,65 @@
+#! /bin/sh
+
+MASTER='$master_hostname.$domain'
+REALM='$realm'
+FQDN=`hostname -f`
+KEYTAB_HBASE=$${KEYTAB_HBASE:-'/etc/security/keytab/hbase.service.keytab'}
+KEYTAB_HDFS=$${KEYTAB_HDFS:-'/etc/security/keytab/nn.service.keytab'}
+PRINCIPAL_HBASE=$${PRINCIPAL_HBASE:-"hbase/$${FQDN}@$${REALM}"}
+PRINCIPAL_HDFS=$${PRINCIPAL_HDFS:-"nn/$${FQDN}@$${REALM}"}
+
+if [ -z "$$1" ]; then
+	echo "Usage: $$0 LOGIN"
+	exit 0
+fi
+NAME=$$1
+
+if [ "$$MASTER" = "$$FQDN" ]; then
+	if ! getent passwd $${NAME} >/dev/null; then
+		useradd -m -s /bin/bash $${NAME} >/dev/null 2>&1
+		echo "$${NAME}@$${REALM}" > /home/$${NAME}/.k5login
+		chown $${NAME}:$${NAME} /home/$${NAME}/.k5login
+	fi
+
+	if [ -n "$$REALM" ]; then
+		if ! kadmin.local getprinc $$NAME >/dev/null 2>&1; then
+			if [ -z "$$PASSWORD" ]; then
+				GENPWD="`dd if=/dev/random bs=6 count=1 2>/dev/null | base64 -`"
+				PASSWORD="$$GENPWD"
+			fi
+			(echo "$$PASSWORD"; echo $$PASSWORD) | kadmin.local ank +requires_preauth $$NAME >/dev/null
+		fi
+	fi
+
+	if [ -n "$$REALM" ]; then
+		export KRB5CCNAME=FILE:/tmp/krb5cc_admin_hdfs_$$$$
+		su hdfs -p -c "kinit -k -t '$${KEYTAB_HDFS}' '$${PRINCIPAL_HDFS}'"
+	fi
+	su hdfs -p -c "hdfs dfs -mkdir /user/$${NAME}"
+	su hdfs -p -c "hdfs dfs -chown $${NAME}:users /user/$${NAME}"
+	su hdfs -p -c "hdfs dfs -chmod 0750 /user/$${NAME}"
+	if [ -n "$$REALM" ]; then
+		kdestroy
+	fi
+
+	if [ -n "$$REALM" ]; then
+		export KRB5CCNAME=FILE:/tmp/krb5cc_admin_hbase_$$$$
+		su hbase -p -c "kinit -k -t '$${KEYTAB_HBASE}' '$${PRINCIPAL_HBASE}'"
+		(echo "create_namespace '$${NAME}'"
+		 echo "grant '$${NAME}', 'RWXCA', '@$${NAME}'"
+		 echo) | su hbase -s /bin/bash -p -c 'hbase shell -n' >/dev/null
+		kdestroy
+	else
+		echo "create_namespace '$${NAME}'" | su hbase -s /bin/bash -p -c 'hbase shell -n' >/dev/null
+	fi
+
+else
+	if ! getent passwd $${NAME} >/dev/null; then
+		useradd -m -s /bin/false $${NAME} >/dev/null 2>&1
+	fi
+fi
+
+if [ -n "$$GENPWD" ]; then
+	echo "Generated password:"
+	echo "$${GENPWD}"
+fi
diff --git a/deployments/hadoop/krb5.conf.tmpl b/deployments/hadoop/krb5.conf.tmpl
new file mode 100644
index 0000000..234b060
--- /dev/null
+++ b/deployments/hadoop/krb5.conf.tmpl
@@ -0,0 +1,10 @@
+[realms]
+	$realm = {
+		kdc = $master_ip
+		admin_server = $master_ip
+		default_domain = $domain
+	}
+
+[domain_realm]
+	.$domain = $realm
+	$domain = $realm
diff --git a/deployments/hadoop/plugin.py b/deployments/hadoop/plugin.py
index 3a53892..f0cc9cd 100644
--- a/deployments/hadoop/plugin.py
+++ b/deployments/hadoop/plugin.py
@@ -6,7 +6,7 @@ DEFAULT_DISTRIBUTION = 'bigtop'
 
 class ComponentHadoop:
 
-    def __init__(self, args, config, hosts):
+    def __init__(self, args, config, hosts, public_hosts):
         self.args = args
         self.config = config
         self.params = {
@@ -15,6 +15,7 @@ class ComponentHadoop:
             'image_user': config['image_user'],
             'hdfs_deployed': 'false',
             'master_hostname': config['master_hostname'],
+            'master_ip': public_hosts[config['master_hostname']],
             'node_hostname': config['node_hostname'],
             'nodes': list([h for h in hosts.keys() if h != config['master_hostname']]),
             'realm': 'HADOOP',
@@ -40,18 +41,39 @@ class ComponentHadoop:
                     with open('site2.pp', 'w') as f:
                         os.chmod('site2.pp', 0o600)
                         f.write(site)
+                template = None
+                with open('deployments/hadoop/adduser.sh.tmpl', 'r') as f:
+                    template = string.Template(f.read())
+                if template:
+                    print('-> hadoop-adduser.sh')
+                    result = template.substitute(self.params)
+                    with open('hadoop-adduser.sh', 'w') as f:
+                        os.chmod('hadoop-adduser.sh', 0o755)
+                        f.write(result)
+                template = None
+                with open('deployments/hadoop/krb5.conf.tmpl', 'r') as f:
+                    template = string.Template(f.read())
+                if template:
+                    print('-> krb5.conf.hadoop')
+                    result = template.substitute(self.params)
+                    with open('krb5.conf.hadoop', 'w') as f:
+                        f.write(result)
 
     def commands(self, action):
         if action == 'deployment':
             return [
-                ['ansible', '-i', './inventory', '-m', 'copy', '-a', 'src=site.pp dest=/root',
-                 'all'],
-                ['ansible', '-i', './inventory', '-m', 'copy', '-a', 'src=site2.pp dest=/root',
-                 'all'],
+                ['ansible', '-i', './inventory', '-m', 'copy', '-a',
+                 'src=site.pp dest=/root mode=0600', 'all'],
+                ['ansible', '-i', './inventory', '-m', 'copy', '-a',
+                 'src=site2.pp dest=/root mode=0600', 'all'],
+                ['ansible', '-i', './inventory', '-m', 'copy', '-a',
+                 'src=hadoop-adduser.sh dest=/root mode=0755', 'all'],
                 ['ansible', '-i', './inventory', '-m', 'shell', '-a',
                  'puppet apply --test /root/site.pp >> stage1.log 2>&1; echo $?', 'all'],
                 ['ansible', '-i', './inventory', '-m', 'shell', '-a',
                  'puppet apply --test /root/site2.pp >> stage2.log 2>&1; echo $?', 'all'],
+                ['ansible', '-i', './inventory', '-m', 'command', '-a',
+                 '/root/hadoop-adduser.sh %s' % self.config['image_user'], 'all'],
             ]
 
 
diff --git a/deployments/hadoop/site.pp.tmpl b/deployments/hadoop/site.pp.tmpl
index f739d88..f29eb87 100644
--- a/deployments/hadoop/site.pp.tmpl
+++ b/deployments/hadoop/site.pp.tmpl
@@ -164,13 +164,6 @@ class { '::zookeeper':
 class{'site_hadoop':
   distribution        => $$distribution,
   version             => $$version,
-  users               => [
-    'example',
-    'hawking',
-  ],
-  user_realms => [
-    '$$realm',
-  ],
   accounting_enable   => false,
   hbase_enable        => true,
   nfs_frontend_enable => false,
@@ -179,24 +172,15 @@ class{'site_hadoop':
   spark_enable        => true,
 }
 
-# site_hadoop::users hasn't shell on the nodes, we need exception for '${image_user}'
-$$touchfile = 'hdfs-user-${image_user}-created'
-hadoop::user{'${image_user}':
-  shell     => true,
-  hdfs      => $$hadoop::hdfs_hostname == $$::fqdn,
-  groups    => 'users',
-  realms    => $$site_hadoop::user_realms,
-  touchfile => $$touchfile,
+group{$image_user:
+  ensure => 'present',
 }
-if $$hadoop::hdfs_hostname == $$::fqdn {
-  hadoop::kinit{$$touchfile:
-  }
-  ->
-  Hadoop::User <| touchfile == $$touchfile |>
-  ->
-  hadoop::kdestroy{$$touchfile:
-    touch     => true,
-  }
+->
+user{$image_user:
+  gid        => $image_user,
+  groups     => ['users'],
+  managehome => true,
+  shell      => '/bin/bash',
 }
 
 class local_kerberos {
diff --git a/orchestrate.py b/orchestrate.py
index 4f83be6..36cb1ef 100755
--- a/orchestrate.py
+++ b/orchestrate.py
@@ -60,7 +60,7 @@ if t and os.path.exists('deployments/%s/plugin.py' % t):
     print('-> deployments/%s/plugin.py' % t)
     plugin = importlib.import_module('deployments.%s.plugin' % t)
     Component = getattr(plugin, 'Component')
-    component = Component(args, config, hosts)
+    component = Component(args, config, hosts, public_hosts)
 else:
     component = None
 
-- 
GitLab