From ea429dcbc0ec8ae8d93d353850f7977d408fe6dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franti=C5=A1ek=20Dvo=C5=99=C3=A1k?= <valtri@civ.zcu.cz>
Date: Sun, 20 Dec 2020 20:23:05 +0100
Subject: [PATCH] Hadoop image: implement tests

---
 image/hadoop.sh             |  4 ++++
 image/tests/common.sh       | 19 ++++++++++++++++++
 image/tests/hadoop-yarn.sh  |  6 ++++++
 image/tests/hbase.sh        | 40 +++++++++++++++++++++++++++++++++++++
 image/tests/hive-beeline.sh | 11 ++++++++++
 image/tests/hive.sh         | 11 ++++++++++
 image/tests/spark-client.sh | 12 +++++++++++
 image/tests/spark-yarn.sh   | 28 ++++++++++++++++++++++++++
 8 files changed, 131 insertions(+)
 create mode 100644 image/tests/common.sh
 create mode 100755 image/tests/hadoop-yarn.sh
 create mode 100755 image/tests/hbase.sh
 create mode 100755 image/tests/hive-beeline.sh
 create mode 100755 image/tests/hive.sh
 create mode 100755 image/tests/spark-client.sh
 create mode 100755 image/tests/spark-yarn.sh

diff --git a/image/hadoop.sh b/image/hadoop.sh
index dcf4e5a..f817e85 100755
--- a/image/hadoop.sh
+++ b/image/hadoop.sh
@@ -31,3 +31,7 @@ cp -vp single.pp $BODI_CHROOT_PATH/root
 sed 's/\(\$hdfs_deployed\s*=\s*\).*/\1true/' single.pp > $BODI_CHROOT_PATH/root/single2.pp
 chroot $BODI_CHROOT_PATH touch -r /root/single.pp /root/single2.pp
 cp -vp scripts/*.sh $BODI_CHROOT_PATH/usr/local/sbin/
+
+# tests
+mkdir -p $BODI_CHROOT_PATH/opt/hadoop-tests
+cp -vp tests/*.sh $BODI_CHROOT_PATH/opt/hadoop-tests/
diff --git a/image/tests/common.sh b/image/tests/common.sh
new file mode 100644
index 0000000..a4203f4
--- /dev/null
+++ b/image/tests/common.sh
@@ -0,0 +1,19 @@
+#! /bin/sh -e
+
+HIVE_HOSTNAME=`hostname -f`
+HIVE_DB=`id -un`_test
+JDBC_URL="jdbc:hive2://${HIVE_HOSTNAME}:10000/${HIVE_DB}"
+#JDBC_URL="jdbc:hive2://${HIVE_HOSTNAME}:10000/${HIVE_DB};principal=hive/${HIVE_HOSTNAME}@${REALM}"
+
+if test ! -f ~/.hadoop-test-`hostname -f`.txt; then
+	hive -e "DROP DATABASE ${HIVE_DB} CASCADE" 2>/dev/null || :
+	hive -e "CREATE DATABASE ${HIVE_DB}"
+	hive --database "${HIVE_DB}" -e "\
+CREATE TABLE pokes (foo INT, bar STRING);
+CREATE TABLE invites (foo INT, bar STRING) PARTITIONED BY (ds STRING);
+"
+	hive --database "${HIVE_DB}" -e "\
+INSERT INTO pokes VALUES (1, 'A'), (2, 'B');
+"
+	touch ~/.hadoop-test-`hostname -f`.txt
+fi
diff --git a/image/tests/hadoop-yarn.sh b/image/tests/hadoop-yarn.sh
new file mode 100755
index 0000000..d9a3ceb
--- /dev/null
+++ b/image/tests/hadoop-yarn.sh
@@ -0,0 +1,6 @@
+#! /bin/sh -e
+cmd='hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 2 10'
+echo $cmd
+$cmd | tee hadoop-yarn.log
+
+tail -n 1 hadoop-yarn.log | grep -q 'Estimated value of Pi is 3\.'
diff --git a/image/tests/hbase.sh b/image/tests/hbase.sh
new file mode 100755
index 0000000..ab46101
--- /dev/null
+++ b/image/tests/hbase.sh
@@ -0,0 +1,40 @@
+#! /bin/sh -e
+if ! echo list_namespace | hbase shell -n > hbase.log; then
+	cat hbase.log
+	exit 1
+fi
+if ! grep -q "^`id -un`$" hbase.log; then
+	echo "create_namespace '`id -un`'" | hbase shell -n
+fi
+
+echo "[hbase] deleting previous tables..."
+hbase shell <<EOF >hbase-delete.log 2>&1 || :
+disable '`id -un`:autotest'
+disable '`id -un`:autotest-new'
+drop '`id -un`:autotest'
+drop '`id -un`:autotest-new'
+EOF
+
+log="hbase.log"
+
+echo "[hbase] creating table and putting test..."
+hbase shell <<EOF | tee ${log}
+create '`id -un`:autotest', 'autocf'
+put '`id -un`:autotest', 'row1', 'autocf:a', 'value1'
+put '`id -un`:autotest', 'row2', 'autocf:a', 'value2'
+put '`id -un`:autotest', 'row3', 'autocf:a', 'value3'
+put '`id -un`:autotest', 'row4', 'autocf:a', {'a'=>1, 'b'=>2}
+scan '`id -un`:autotest'
+EOF
+grep -q 'row1.*value1' ${log}
+
+echo "[hbase] creating target table and yarn copy test..."
+hbase shell <<EOF
+create '`id -un`:autotest-new', {NAME=>'autocf', COMPRESSION=>'snappy'}
+EOF
+hbase org.apache.hadoop.hbase.mapreduce.CopyTable --new.name="`id -un`:autotest-new" "`id -un`:autotest"
+echo "[hbase] check the copy..."
+hbase shell <<EOF | tee ${log}
+scan '`id -un`:autotest-new'
+EOF
+grep -q 'row1.*value1' ${log}
diff --git a/image/tests/hive-beeline.sh b/image/tests/hive-beeline.sh
new file mode 100755
index 0000000..d15a3f1
--- /dev/null
+++ b/image/tests/hive-beeline.sh
@@ -0,0 +1,11 @@
+#! /bin/sh -e
+. `dirname $0`/common.sh
+
+cat <<EOF | beeline -u $JDBC_URL | tee beeline.log
+SHOW TABLES;
+SELECT a.foo FROM invites a WHERE a.ds='2008-08-15';
+EOF
+
+grep -q invites beeline.log && \
+grep -q pokes beeline.log && \
+grep -q a\.foo beeline.log
diff --git a/image/tests/hive.sh b/image/tests/hive.sh
new file mode 100755
index 0000000..b03e25f
--- /dev/null
+++ b/image/tests/hive.sh
@@ -0,0 +1,11 @@
+#! /bin/sh -e
+. `dirname $0`/common.sh
+
+(cat << EOF
+SHOW TABLES;
+SELECT a.foo FROM invites a WHERE a.ds='2008-08-15';
+EOF
+) | hive --database $HIVE_DB | tee hive.log
+
+grep -q invites hive.log && \
+grep -q pokes hive.log
diff --git a/image/tests/spark-client.sh b/image/tests/spark-client.sh
new file mode 100755
index 0000000..1ce374b
--- /dev/null
+++ b/image/tests/spark-client.sh
@@ -0,0 +1,12 @@
+#! /bin/sh -e
+
+if test -d /usr/lib/spark/examples; then
+	d=/usr/lib/spark/examples/jars
+else
+	d=/usr/lib/spark/lib
+fi
+cmd="spark-submit --class org.apache.spark.examples.SparkPi --deploy-mode client --master local `ls -1 ${d}/spark-examples*.jar | head -n 1` 10"
+echo $cmd
+$cmd | tee spark-client.log
+
+grep 'Pi is roughly 3\.' spark-client.log
diff --git a/image/tests/spark-yarn.sh b/image/tests/spark-yarn.sh
new file mode 100755
index 0000000..189af7b
--- /dev/null
+++ b/image/tests/spark-yarn.sh
@@ -0,0 +1,28 @@
+#! /bin/sh -e
+
+if test -d /usr/lib/spark/examples; then
+	d=/usr/lib/spark/examples/jars
+else
+	d=/usr/lib/spark/lib
+fi
+cmd="spark-submit --class org.apache.spark.examples.SparkPi --deploy-mode cluster --master yarn `ls -1 ${d}/spark-examples*.jar | head -n 1` 10"
+
+echo "$cmd"
+$cmd 2>&1 | tee spark-yarn.log
+
+# fetch output from yarn
+id=`grep 'Application report for .* (state: FINISHED)' spark-yarn.log | tail -n 1 | sed 's/.*Application report for \([^ ]*\).*/\1/'`
+echo " => applicationId $id"
+cmd="yarn logs -applicationId $id"
+echo $cmd
+$cmd > spark-yarn-output.log && ret=0 || ret=$?
+count=0
+while test $ret -ne 0 -a $count -lt 20; do
+	sleep 0.5
+	echo $cmd
+	$cmd > spark-yarn-output.log && ret=0 || ret=$?
+	count=$((count+1))
+done
+
+echo -n ' => '
+grep 'Pi is roughly 3\.' spark-yarn-output.log
-- 
GitLab