diff --git a/README.md b/README.md index d92b476f69eabfc5d4a38be207c49aaac0fc6ec2..aebc9797a8e707b6e12309ea563ae6242fa4b171 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,15 @@ Experiments with Terraform and Hadoop. Hadoop image is not required. But it can speed things up, because it contains pre-downloaded and pre-installed Hadoop packages. -How the image has been built: - - apt install openstack-debian-images - image/HOWTO.sh - # Requirements * [Terraform](https://www.terraform.io/) * [Ansible](https://www.ansible.com/) +# Image + +Launch */usr/local/sbin/hadoop-setup.sh* to setup Hadoop on single machine. + # Cluster Build cluster: diff --git a/image/HOWTO.sh b/image/HOWTO.sh index 8cb919730158350e68da30e85fe858a070c85cea..50a44bf4f31ed652db9074051a551dd889bb0940 100755 --- a/image/HOWTO.sh +++ b/image/HOWTO.sh @@ -1,6 +1,6 @@ #! /bin/sh -xe -build-openstack-debian-image --release stretch \ - --automatic-resize \ - --extra-packages acl,default-jre-headless,gnupg,qemu-guest-agent,puppet \ - --image-size 5 \ - --hook-script `dirname $0`/hadoop.sh 2>&1 | tee build-image.log +/usr/sbin/build-openstack-debian-image \ + --release stretch \ + --extra-packages acl,default-jre-headless,git,gnupg,librarian-puppet,qemu-guest-agent,puppet \ + --image-size 3 \ + --hook-script ./hadoop.sh 2>&1 | tee build-image.log diff --git a/image/Puppetfile b/image/Puppetfile new file mode 100644 index 0000000000000000000000000000000000000000..4ab90426fcbcb6ab67c85df0bde855b889a8bc27 --- /dev/null +++ b/image/Puppetfile @@ -0,0 +1,34 @@ +#!/usr/bin/env ruby +#^syntax detection + +forge "https://forgeapi.puppetlabs.com" + +mod 'cesnet-site_hadoop', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-site_hadoop/' + +mod 'cesnet-hadoop', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hadoop/' + +mod 'cesnet-hadoop_lib', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hadoop_lib/' + +mod 'cesnet-hbase', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hbase/' + +mod 'cesnet-hue', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hue/' + +mod 'cesnet-hive', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hive/' + +mod 'cesnet-oozie', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-oozie/' + +mod 'cesnet-pig', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-pig/' + +mod 'cesnet-spark', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-spark/' + +mod 'cesnet-zookeeper', + :git => 'https://github.com/MetaCenterCloudPuppet/cesnet-zookeeper/' diff --git a/image/apt/bigtop.pref b/image/apt/bigtop.pref new file mode 100644 index 0000000000000000000000000000000000000000..a773559aa39bcedb6de5f86cac79e91ab5bf1e10 --- /dev/null +++ b/image/apt/bigtop.pref @@ -0,0 +1,5 @@ +# Created by image builder (Hadoop). +Explanation: site_hadoop: bigtop +Package: * +Pin: release o=Bigtop +Pin-Priority: 900 diff --git a/image/apt/cloudera.pref b/image/apt/cloudera.pref new file mode 100644 index 0000000000000000000000000000000000000000..26dac10f251eebb1428dcff50b4cc211dac0655b --- /dev/null +++ b/image/apt/cloudera.pref @@ -0,0 +1,5 @@ +# Created by image builder (Hadoop). +Explanation: site_hadoop: cloudera +Package: * +Pin: release o=Cloudera +Pin-Priority: 900 diff --git a/image/hadoop.sh b/image/hadoop.sh index 40435f5f3d7b2520c5f66f3756c77165048ef25e..e7b1411969859dc9a21f36177cb40e0f1f6d6bb5 100755 --- a/image/hadoop.sh +++ b/image/hadoop.sh @@ -1,11 +1,21 @@ #! /bin/sh wget https://dist.apache.org/repos/dist/release/bigtop/KEYS -O - | chroot $BODI_CHROOT_PATH apt-key add - +cp -vp apt/*.pref $BODI_CHROOT_PATH/etc/apt/preferences.d/ cat <<EOF > $BODI_CHROOT_PATH/etc/apt/sources.list.d/bigtop.list # initial setup deb http://repos.bigtop.apache.org/releases/1.4.0/debian/9/amd64 bigtop contrib deb-src http://repos.bigtop.apache.org/releases/1.4.0/debian/9/amd64 bigtop contrib EOF + +# download and pre-install chroot $BODI_CHROOT_PATH apt-get update chroot $BODI_CHROOT_PATH apt-get install -y hadoop hadoop-client hadoop-hdfs hadoop-mapreduce hadoop-yarn hbase hive-jdbc python-scipy zookeeper -chroot $BODI_CHROOT_PATH apt-get install -dy hadoop-doc hadoop-hdfs-namenode hadoop-httpfs hadoop-hdfs-datanode hadoop-mapreduce-historyserver hadoop-yarn-resourcemanager hadoop-yarn-nodemanager hbase-master hbase-regionserver hive hive-hbase hive-hcatalog hive-metastore hive-server2 libmysql-java mariadb-client mariadb-common mariadb-server spark-core spark-history-server spark-python zookeeper-server -chroot $BODI_CHROOT_PATH puppet module install cesnet/site_hadoop +chroot $BODI_CHROOT_PATH apt-get install -dy hadoop-doc hadoop-hdfs-namenode hadoop-httpfs hadoop-hdfs-datanode hadoop-mapreduce-historyserver hadoop-yarn-resourcemanager hadoop-yarn-nodemanager hbase-master hbase-regionserver hive hive-hbase hive-hcatalog hive-metastore hive-server2 libmysql-java maven ant mariadb-client mariadb-common mariadb-server spark-core spark-history-server spark-python zookeeper-server + +# setup +cp -vp Puppetfile $BODI_CHROOT_PATH/etc/puppet/code/ +chroot $BODI_CHROOT_PATH bash -c 'cd /etc/puppet/code; librarian-puppet install' +cp -vp single.pp $BODI_CHROOT_PATH/root +sed 's/\(\$hdfs_deployed\s*=\s*\).*/\1true/' single.pp > $BODI_CHROOT_PATH/root/single2.pp +chroot $BODI_CHROOT_PATH touch -r /root/single.pp /root/single2.pp +cp -vp scripts/*.sh $BODI_CHROOT_PATH/usr/local/sbin/ diff --git a/image/scripts/fix-hostname.sh b/image/scripts/fix-hostname.sh new file mode 100755 index 0000000000000000000000000000000000000000..73c22f3fd1a993753cf2d275c31521f2106f406a --- /dev/null +++ b/image/scripts/fix-hostname.sh @@ -0,0 +1,42 @@ +#! /bin/sh + +# +# script to set the hostname properly +# + +if [ -z "$1" ]; then + echo "Usage: [DRY_RUN=1] $0 HOSTNAME [DOMAIN]" + exit 0 +fi + +h="$1" +d="$2" +line="$h" +if [ -n "$d" ]; then + line="$h.$d $h.$d. $h" +fi + +ips=`ip address show scope global up | grep '\<inet6\?\>\s' | awk '{print $2}' | cut -d'/' -f1` + +if [ -n "$DRY_RUN" ]; then + for ip in $ips; do + echo "$ip $line" + done +else + sed -e "s/^\(manage_etc_hosts\):.*/\1: False/" -i /etc/cloud/cloud.cfg + + echo "$h" > /etc/hostname + { + for ip in $ips; do + echo "$ip $line" >> /etc/hosts2 + done + echo + cat /etc/hosts + } >> /etc/hosts2 + mv /etc/hosts2 /etc/hosts + + hostname "$h" + if [ -n "$d" ]; then + domainname "$d" + fi +fi diff --git a/image/scripts/hadoop-single-setup.sh b/image/scripts/hadoop-single-setup.sh new file mode 100755 index 0000000000000000000000000000000000000000..b9be44aba75dea8c72acb4e4e0a9d1be8d68fd36 --- /dev/null +++ b/image/scripts/hadoop-single-setup.sh @@ -0,0 +1,9 @@ +#! /bin/sh +if ! hostname || ! hostname -f || ! ping -c1 `hostname` >/dev/null || ! ping -c1 `hostname -f` >/dev/null; then + echo "Problem with DNS hostname, fixing..." + /usr/local/bin/fix-hostname.sh master hadoop +fi + +mkdir /data +puppet apply --test /root/single.pp \ + && puppet apply --test /root/single2.pp diff --git a/image/single.pp b/image/single.pp new file mode 100644 index 0000000000000000000000000000000000000000..ad0a8cec80ba8e203d77e70d2c3e00e6ae2719a4 --- /dev/null +++ b/image/single.pp @@ -0,0 +1,147 @@ +$distribution = 'bigtop' +$hdfs_deployed = false +$ssl = false + +$master = $::fqdn +$frontends = [ + $::fqdn, +] +$nodes = [$::fqdn] +$zookeepers = [ + $master, +] +$realm = '' + +if $distribution == 'bigtop' { + $version = '1.4.0' + $hadoop_version = 2 + $hive_schema_file = 'hive-schema-2.3.0.mysql.sql' +} elsif $distribution == 'cloudera' { + $version = '6.3.0' + $hadoop_version = 3 + $hive_schema_file = 'hive-schema-2.1.1.mysql.sql' +} + +class{'hadoop': + acl => true, + hdfs_hostname => $master, + yarn_hostname => $master, + historyserver_hostname => $master, + httpfs_hostnames => [ + $master, + ], + frontends => $frontends, + oozie_hostnames => [ + $master, + ], + slaves => $nodes, + zookeeper_hostnames => $zookeepers, + hdfs_name_dirs => [ + '/data', + ], + hdfs_data_dirs => [ + '/data', + ], + cluster_name => $domain, + https => $ssl, + realm => $realm, + features => { + 'yellowmanager' => true, + 'aggregation' => true, + }, + properties => { + 'dfs.replication' => 2, + 'hadoop.proxyuser.hive.groups' => "*", + 'hadoop.proxyuser.hive.hosts' => "*", + }, + version => $hadoop_version, + hdfs_deployed => $hdfs_deployed, +} + +class{'hbase': + acl => true, + frontends => $frontends, + hdfs_hostname => $master, + master_hostname => $master, + slaves => $nodes, + zookeeper_hostnames => $zookeepers, + features => { + 'hbmanager' => true, + }, + properties => { + 'hbase.master.info.port' => -1, + 'hbase.regionserver.info.port' => -1, + }, + realm => $realm, +} + +class{'hive': + hdfs_hostname => $master, + metastore_hostname => $master, + server2_hostname => $master, + zookeeper_hostnames => $zookeepers, + realm => $realm, + features => { + 'manager' => true, + }, + #db => 'mariadb', + db => 'mysql', + db_password => 'good-password', + schema_file => $hive_schema_file, +} + +#class { 'oozie': +# acl => true, +# #defaultFS => +# hdfs_hostname => $master, +# #db => 'mariadb', +# db => 'mysql', +# db_password => 'good-password', +# oozie_hostname => "$master-disabled", +# realm => $realm, +#} + +class { 'spark': + historyserver_hostname => $master, + environment => { + 'LD_LIBRARY_PATH' => '/usr/lib/hadoop/lib/native:${LD_LIBRARY_PATH}', + 'SPARK_YARN_USER_ENV' => 'LD_LIBRARY_PATH=${LD_LIBRARY_PATH},${SPARK_YARN_USER_ENV}', + }, + #jar_enable => true, + realm => $realm, +} + +class { '::zookeeper': + hostnames => $zookeepers, + realm => $realm, +} + +class{'site_hadoop': + distribution => $distribution, + version => $version, + users => [ + 'hawking', + 'example', + ], + accounting_enable => false, + hbase_enable => true, + nfs_frontend_enable => false, + oozie_enable => false, + pig_enable => false, + spark_enable => true, +} + + +# master_hdfs, master_yarn, frontend, slave +# (with additional internal dependencies) +include ::site_hadoop::role::simple + +include ::hadoop::httpfs +class { 'mysql::bindings': + java_enable => true, + #java_package_name => 'libmariadb-java', +} +class { 'mysql::server': + root_password => 'root', +} +#include ::oozie::client