Skip to content
Snippets Groups Projects
Commit ef3b0a48 authored by František Dvořák's avatar František Dvořák
Browse files

Finalize Hadoop image - the working version based on BigTop 1.4.0

parent 78981c26
No related branches found
No related tags found
No related merge requests found
Pipeline #414 passed
...@@ -4,16 +4,15 @@ Experiments with Terraform and Hadoop. ...@@ -4,16 +4,15 @@ Experiments with Terraform and Hadoop.
Hadoop image is not required. But it can speed things up, because it contains pre-downloaded and pre-installed Hadoop packages. Hadoop image is not required. But it can speed things up, because it contains pre-downloaded and pre-installed Hadoop packages.
How the image has been built:
apt install openstack-debian-images
image/HOWTO.sh
# Requirements # Requirements
* [Terraform](https://www.terraform.io/) * [Terraform](https://www.terraform.io/)
* [Ansible](https://www.ansible.com/) * [Ansible](https://www.ansible.com/)
# Image
Launch */usr/local/sbin/hadoop-setup.sh* to setup Hadoop on single machine.
# Cluster # Cluster
Build cluster: Build cluster:
......
#! /bin/sh -xe #! /bin/sh -xe
build-openstack-debian-image --release stretch \ /usr/sbin/build-openstack-debian-image \
--automatic-resize \ --release stretch \
--extra-packages acl,default-jre-headless,gnupg,qemu-guest-agent,puppet \ --extra-packages acl,default-jre-headless,git,gnupg,librarian-puppet,qemu-guest-agent,puppet \
--image-size 5 \ --image-size 3 \
--hook-script `dirname $0`/hadoop.sh 2>&1 | tee build-image.log --hook-script ./hadoop.sh 2>&1 | tee build-image.log
#!/usr/bin/env ruby
#^syntax detection
forge "https://forgeapi.puppetlabs.com"
mod 'cesnet-site_hadoop',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-site_hadoop/'
mod 'cesnet-hadoop',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hadoop/'
mod 'cesnet-hadoop_lib',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hadoop_lib/'
mod 'cesnet-hbase',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hbase/'
mod 'cesnet-hue',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hue/'
mod 'cesnet-hive',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-hive/'
mod 'cesnet-oozie',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-oozie/'
mod 'cesnet-pig',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-pig/'
mod 'cesnet-spark',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-spark/'
mod 'cesnet-zookeeper',
:git => 'https://github.com/MetaCenterCloudPuppet/cesnet-zookeeper/'
# Created by image builder (Hadoop).
Explanation: site_hadoop: bigtop
Package: *
Pin: release o=Bigtop
Pin-Priority: 900
# Created by image builder (Hadoop).
Explanation: site_hadoop: cloudera
Package: *
Pin: release o=Cloudera
Pin-Priority: 900
#! /bin/sh #! /bin/sh
wget https://dist.apache.org/repos/dist/release/bigtop/KEYS -O - | chroot $BODI_CHROOT_PATH apt-key add - wget https://dist.apache.org/repos/dist/release/bigtop/KEYS -O - | chroot $BODI_CHROOT_PATH apt-key add -
cp -vp apt/*.pref $BODI_CHROOT_PATH/etc/apt/preferences.d/
cat <<EOF > $BODI_CHROOT_PATH/etc/apt/sources.list.d/bigtop.list cat <<EOF > $BODI_CHROOT_PATH/etc/apt/sources.list.d/bigtop.list
# initial setup # initial setup
deb http://repos.bigtop.apache.org/releases/1.4.0/debian/9/amd64 bigtop contrib deb http://repos.bigtop.apache.org/releases/1.4.0/debian/9/amd64 bigtop contrib
deb-src http://repos.bigtop.apache.org/releases/1.4.0/debian/9/amd64 bigtop contrib deb-src http://repos.bigtop.apache.org/releases/1.4.0/debian/9/amd64 bigtop contrib
EOF EOF
# download and pre-install
chroot $BODI_CHROOT_PATH apt-get update chroot $BODI_CHROOT_PATH apt-get update
chroot $BODI_CHROOT_PATH apt-get install -y hadoop hadoop-client hadoop-hdfs hadoop-mapreduce hadoop-yarn hbase hive-jdbc python-scipy zookeeper chroot $BODI_CHROOT_PATH apt-get install -y hadoop hadoop-client hadoop-hdfs hadoop-mapreduce hadoop-yarn hbase hive-jdbc python-scipy zookeeper
chroot $BODI_CHROOT_PATH apt-get install -dy hadoop-doc hadoop-hdfs-namenode hadoop-httpfs hadoop-hdfs-datanode hadoop-mapreduce-historyserver hadoop-yarn-resourcemanager hadoop-yarn-nodemanager hbase-master hbase-regionserver hive hive-hbase hive-hcatalog hive-metastore hive-server2 libmysql-java mariadb-client mariadb-common mariadb-server spark-core spark-history-server spark-python zookeeper-server chroot $BODI_CHROOT_PATH apt-get install -dy hadoop-doc hadoop-hdfs-namenode hadoop-httpfs hadoop-hdfs-datanode hadoop-mapreduce-historyserver hadoop-yarn-resourcemanager hadoop-yarn-nodemanager hbase-master hbase-regionserver hive hive-hbase hive-hcatalog hive-metastore hive-server2 libmysql-java maven ant mariadb-client mariadb-common mariadb-server spark-core spark-history-server spark-python zookeeper-server
chroot $BODI_CHROOT_PATH puppet module install cesnet/site_hadoop
# setup
cp -vp Puppetfile $BODI_CHROOT_PATH/etc/puppet/code/
chroot $BODI_CHROOT_PATH bash -c 'cd /etc/puppet/code; librarian-puppet install'
cp -vp single.pp $BODI_CHROOT_PATH/root
sed 's/\(\$hdfs_deployed\s*=\s*\).*/\1true/' single.pp > $BODI_CHROOT_PATH/root/single2.pp
chroot $BODI_CHROOT_PATH touch -r /root/single.pp /root/single2.pp
cp -vp scripts/*.sh $BODI_CHROOT_PATH/usr/local/sbin/
#! /bin/sh
#
# script to set the hostname properly
#
if [ -z "$1" ]; then
echo "Usage: [DRY_RUN=1] $0 HOSTNAME [DOMAIN]"
exit 0
fi
h="$1"
d="$2"
line="$h"
if [ -n "$d" ]; then
line="$h.$d $h.$d. $h"
fi
ips=`ip address show scope global up | grep '\<inet6\?\>\s' | awk '{print $2}' | cut -d'/' -f1`
if [ -n "$DRY_RUN" ]; then
for ip in $ips; do
echo "$ip $line"
done
else
sed -e "s/^\(manage_etc_hosts\):.*/\1: False/" -i /etc/cloud/cloud.cfg
echo "$h" > /etc/hostname
{
for ip in $ips; do
echo "$ip $line" >> /etc/hosts2
done
echo
cat /etc/hosts
} >> /etc/hosts2
mv /etc/hosts2 /etc/hosts
hostname "$h"
if [ -n "$d" ]; then
domainname "$d"
fi
fi
#! /bin/sh
if ! hostname || ! hostname -f || ! ping -c1 `hostname` >/dev/null || ! ping -c1 `hostname -f` >/dev/null; then
echo "Problem with DNS hostname, fixing..."
/usr/local/bin/fix-hostname.sh master hadoop
fi
mkdir /data
puppet apply --test /root/single.pp \
&& puppet apply --test /root/single2.pp
$distribution = 'bigtop'
$hdfs_deployed = false
$ssl = false
$master = $::fqdn
$frontends = [
$::fqdn,
]
$nodes = [$::fqdn]
$zookeepers = [
$master,
]
$realm = ''
if $distribution == 'bigtop' {
$version = '1.4.0'
$hadoop_version = 2
$hive_schema_file = 'hive-schema-2.3.0.mysql.sql'
} elsif $distribution == 'cloudera' {
$version = '6.3.0'
$hadoop_version = 3
$hive_schema_file = 'hive-schema-2.1.1.mysql.sql'
}
class{'hadoop':
acl => true,
hdfs_hostname => $master,
yarn_hostname => $master,
historyserver_hostname => $master,
httpfs_hostnames => [
$master,
],
frontends => $frontends,
oozie_hostnames => [
$master,
],
slaves => $nodes,
zookeeper_hostnames => $zookeepers,
hdfs_name_dirs => [
'/data',
],
hdfs_data_dirs => [
'/data',
],
cluster_name => $domain,
https => $ssl,
realm => $realm,
features => {
'yellowmanager' => true,
'aggregation' => true,
},
properties => {
'dfs.replication' => 2,
'hadoop.proxyuser.hive.groups' => "*",
'hadoop.proxyuser.hive.hosts' => "*",
},
version => $hadoop_version,
hdfs_deployed => $hdfs_deployed,
}
class{'hbase':
acl => true,
frontends => $frontends,
hdfs_hostname => $master,
master_hostname => $master,
slaves => $nodes,
zookeeper_hostnames => $zookeepers,
features => {
'hbmanager' => true,
},
properties => {
'hbase.master.info.port' => -1,
'hbase.regionserver.info.port' => -1,
},
realm => $realm,
}
class{'hive':
hdfs_hostname => $master,
metastore_hostname => $master,
server2_hostname => $master,
zookeeper_hostnames => $zookeepers,
realm => $realm,
features => {
'manager' => true,
},
#db => 'mariadb',
db => 'mysql',
db_password => 'good-password',
schema_file => $hive_schema_file,
}
#class { 'oozie':
# acl => true,
# #defaultFS =>
# hdfs_hostname => $master,
# #db => 'mariadb',
# db => 'mysql',
# db_password => 'good-password',
# oozie_hostname => "$master-disabled",
# realm => $realm,
#}
class { 'spark':
historyserver_hostname => $master,
environment => {
'LD_LIBRARY_PATH' => '/usr/lib/hadoop/lib/native:${LD_LIBRARY_PATH}',
'SPARK_YARN_USER_ENV' => 'LD_LIBRARY_PATH=${LD_LIBRARY_PATH},${SPARK_YARN_USER_ENV}',
},
#jar_enable => true,
realm => $realm,
}
class { '::zookeeper':
hostnames => $zookeepers,
realm => $realm,
}
class{'site_hadoop':
distribution => $distribution,
version => $version,
users => [
'hawking',
'example',
],
accounting_enable => false,
hbase_enable => true,
nfs_frontend_enable => false,
oozie_enable => false,
pig_enable => false,
spark_enable => true,
}
# master_hdfs, master_yarn, frontend, slave
# (with additional internal dependencies)
include ::site_hadoop::role::simple
include ::hadoop::httpfs
class { 'mysql::bindings':
java_enable => true,
#java_package_name => 'libmariadb-java',
}
class { 'mysql::server':
root_password => 'root',
}
#include ::oozie::client
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment