diff --git a/hadoop-hdfs/ctx.yaml b/hadoop-hdfs/ctx.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9edcd625fa1ef0ff03599e146a7211b84cb51a3d --- /dev/null +++ b/hadoop-hdfs/ctx.yaml @@ -0,0 +1,16 @@ +#cloud-config + +merge_type: + - name: list + settings: [append] + - name: dict + settings: [recurse_array] + +packages: + - default-jre-headless + - gnupg + - puppet + +runcmd: + - puppet module install cesnet/site_hadoop + - mkdir /data diff --git a/hadoop-hdfs/plugin.py b/hadoop-hdfs/plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..57569012945616c44f9697afbf476fc1a3e6f09f --- /dev/null +++ b/hadoop-hdfs/plugin.py @@ -0,0 +1,5 @@ +import hadoop.plugin + + +class Component(hadoop.plugin.ComponentHadoop): + pass diff --git a/hadoop-hdfs/site.pp.tmpl b/hadoop-hdfs/site.pp.tmpl new file mode 100644 index 0000000000000000000000000000000000000000..49f199911e7f5a93eb6b632cbc31a91872e92673 --- /dev/null +++ b/hadoop-hdfs/site.pp.tmpl @@ -0,0 +1,120 @@ +$$distribution = '${distribution}' +$$hdfs_deployed = ${hdfs_deployed} +$$ssl = false + +$$master = '${master_hostname}.${domain}' +$$frontends = [ + '${master_hostname}.${domain}', +] +$$nodes = suffix(${nodes}, '.${domain}') +$$zookeepers = [ + $$master, +] +$$realm = '${realm}' + +if $$distribution == 'bigtop' { + $$version = '1.4.0' + $$hadoop_version = 2 + $$hive_schema_file = 'hive-schema-2.3.0.mysql.sql' +} elsif $$distribution == 'cloudera' { + $$version = '6.3.0' + $$hadoop_version = 3 + $$hive_schema_file = 'hive-schema-2.1.1.mysql.sql' +} + +class{'hadoop': + acl => true, + hdfs_hostname => $$master, + httpfs_hostnames => [ + $$master, + ], + frontends => $$frontends, + oozie_hostnames => [ + $$master, + ], + slaves => $$nodes, + zookeeper_hostnames => $$zookeepers, + hdfs_name_dirs => [ + '/data', + ], + hdfs_data_dirs => [ + '/data', + ], + cluster_name => '${domain}', + https => $$ssl, + realm => $$realm, + features => { + 'yellowmanager' => true, + 'aggregation' => true, + }, + properties => { + 'dfs.replication' => 2, + 'hadoop.proxyuser.hive.groups' => "hive,impala,oozie,users", + 'hadoop.proxyuser.hive.hosts' => "*", + }, + version => $$hadoop_version, + hdfs_deployed => $$hdfs_deployed, +} + +class{'hive': + hdfs_hostname => $$master, + metastore_hostname => $$master, + server2_hostname => $$master, + zookeeper_hostnames => $$zookeepers, + realm => $$realm, + features => { + 'manager' => true, + }, + #db => 'mariadb', + db => 'mysql', + db_password => 'good-password', + schema_file => $$hive_schema_file, +} + +class { 'spark': + historyserver_hostname => $$master, + environment => { + 'LD_LIBRARY_PATH' => '/usr/lib/hadoop/lib/native:$${LD_LIBRARY_PATH}', + 'SPARK_YARN_USER_ENV' => 'LD_LIBRARY_PATH=$${LD_LIBRARY_PATH},$${SPARK_YARN_USER_ENV}', + }, + #jar_enable => true, + realm => $$realm, +} + +class { '::zookeeper': + hostnames => $$zookeepers, + realm => $$realm, +} + +class{'site_hadoop': + distribution => $$distribution, + version => $$version, + users => [ + 'hawking', + 'example', + ], + accounting_enable => false, + hbase_enable => false, + nfs_frontend_enable => false, + oozie_enable => false, + pig_enable => false, + spark_enable => true, +} + + +node /${master_hostname}\..*/ { + include ::site_hadoop::role::master_hdfs + include ::site_hadoop::role::frontend + include ::hadoop::httpfs + class { 'mysql::bindings': + java_enable => true, + #java_package_name => 'libmariadb-java', + } + class { 'mysql::server': + root_password => 'root', + } +} + +node /${node_hostname}\d*\..*/ { + include ::site_hadoop::role::slave +} diff --git a/hadoop/plugin.py b/hadoop/plugin.py index a0ce3821f12a10fcda00913cbedcabb8840e69d0..169b41dd715395c67ff322a96e2e1f0a0a64f64d 100644 --- a/hadoop/plugin.py +++ b/hadoop/plugin.py @@ -3,10 +3,11 @@ import string DEFAULT_DISTRIBUTION = 'bigtop' -class Component: +class ComponentHadoop: def __init__(self, args, config, hosts): self.args = args + self.config = config self.params = { 'distribution': DEFAULT_DISTRIBUTION, 'domain': config['domain'], @@ -23,7 +24,7 @@ class Component: print('-> site.pp') if not self.args.dry_run: template = None - with open('hadoop/site.pp.tmpl', 'r') as f: + with open('%s/site.pp.tmpl' % self.config['type'], 'r') as f: template = string.Template(f.read()) if template: site = template.substitute(self.params) @@ -34,3 +35,7 @@ class Component: return [ ['ansible', '-i', './inventory', '-m', 'copy', '-a', 'src=site.pp dest=/root', 'all'], ] + + +class Component(ComponentHadoop): + pass