Skip to content
Snippets Groups Projects
Select Git revision
  • d87ddb0a7ff42bdeefde5f3ec0cb788a4ff62735
  • master default protected
  • ci-bullseye
  • wip/bigtop-3.0.0
  • bio3
  • feature/certificates2
6 results

orchestrate.py

Blame
  • orchestrate.py 7.75 KiB
    #! /usr/bin/python3
    
    import argparse
    import importlib
    import json
    import os
    import subprocess
    import sys
    import time
    
    DEFAULT_ACTIONS = ['files', 'ping', 'init', 'wait', 'deployment']
    elapsed = 0
    fperf = None
    
    
    def escape_cmd(cmd):
        if ' ' in cmd:
            return '\'%s\'' % cmd
        else:
            return cmd
    
    
    def perform_command(cmd, waitExitCode=None):
        global elapsed
    
        desc = ' '.join(map(escape_cmd, cmd))
        print('-> %s' % desc, flush=True)
        start = time.perf_counter()
        if not args.dry_run:
            count = 1
            ret = subprocess.call(cmd)
            if waitExitCode is not None:
                while ret is None or ret != waitExitCode:
                    time.sleep(5)
                    count += 1
                    print('->(%d) %s' % (count, desc), flush=True)
                    ret = subprocess.call(cmd)
            end = time.perf_counter()
            fperf.write('[%f] (%d) %s\n' % (end - start, count, desc))
            elapsed += (end - start)
    
    
    def perform_action(action, commands, waitExitCode=None):
        start = time.perf_counter()
        if component:
            plugin_commands = component.commands(action)
            if plugin_commands:
                commands += plugin_commands
        for cmd in commands:
            perform_command(cmd, waitExitCode)
        if component:
            component.action(action)
        end = time.perf_counter()
        fperf.write('[%f] Elapsed time of %s\n' % (end - start, action))
    
    
    parser = argparse.ArgumentParser(description='terraform cluster orchestrator')
    parser.add_argument('-c', '--config',
                        help='Terraform output for using by orchestrator (default: config.json)',
                        default='config.json')
    parser.add_argument('actions', metavar='ACTIONS', nargs='*',
                        help='actions (default: %s)' % ' '.join(DEFAULT_ACTIONS),
                        default=DEFAULT_ACTIONS)
    parser.add_argument('-e', '--ssh', default='ssh', help='ssh command to use (local and remote)')
    parser.add_argument('-n', '--dry-run', action='store_true',
                        help='simulated run')
    parser.add_argument('-o', '--ssh-opts', default='-o PreferredAuthentications=publickey',
                        help='ssh options to use (local and remote)')
    parser.add_argument('-p', '--parameters',
                        help='orchestration parameters')
    args = parser.parse_args()
    
    j = None
    if args.config == '-':
        j = json.load(sys.stdin)
    else:
        with open(args.config) as f:
            j = json.load(f)
    
    config = j['config']['value']
    n = int(config['n'])
    d = config['domain']
    hosts = j['hosts']['value']
    public_hosts = j['public_hosts']['value']
    master_hostname = config['master_hostname']
    master_ip = public_hosts[master_hostname]
    user = 'deployadm'
    secrets = config['secrets']
    t = config.get('type', None)
    ssh = [args.ssh] + args.ssh_opts.split(r' ')
    
    fperf = open('perf.log', buffering=1, mode='at')
    
    print('== plugin ==', flush=True)
    if t and os.path.exists('deployments/%s/plugin.py' % t):
        print('-> deployments/%s/plugin.py' % t)
        plugin = importlib.import_module('deployments.%s.plugin' % t)
        Component = getattr(plugin, 'Component')
        component = Component(args, config, hosts, public_hosts)
    else:
        component = None
    
    if 'files' in args.actions:
        print('== files ==', flush=True)
        print('-> hosts')
        if not args.dry_run:
            with open('hosts', 'w') as f:
                f.write('''\
    127.0.0.1	localhost
    
    # The following lines are desirable for IPv6 capable hosts
    ::1     localhost ip6-localhost ip6-loopback
    ff02::1 ip6-allnodes
    ff02::2 ip6-allrouters
    
    ''')
                for h, ip in hosts.items():
                    f.write('%s	%s.%s	%s.%s.	%s\n' % (ip, h, d, h, d, h))
    
        print('-> public_hosts')
        if not args.dry_run:
            with open('public_hosts', 'w') as f:
                for h, ip in public_hosts.items():
                    f.write('%s	%s.%s	%s.%s.	%s\n' % (ip, h, d, h, d, h))
    
        print('-> inventory')
        if not args.dry_run:
            with open('inventory', 'w') as f:
                f.write('''\
    [all:vars]
    ansible_become=true
    ansible_user=%s
    ansible_ssh_common_args=\'%s -o ForwardAgent=yes -o ProxyCommand="ssh -W {{ ansible_host }}:22 -q \
    {{ ansible_user }}@%s"\'
    
    [masters]
    %s ansible_host=%s
    
    [nodes]
    ''' % (user, args.ssh_opts, master_ip, master_hostname, hosts[master_hostname]))
                for h, ip in hosts.items():
                    if h == master_hostname:
                        continue
                    f.write('\
    %s ansible_host=%s \n' % (h, ip))
        if component:
            component.action('files')
    
    if 'ping' in args.actions:
        print('== ping ==', flush=True)
        cmd = ['ping', '-c', '1', '-i', '2', '-q', master_ip]
        perform_command(cmd, 0)
        cmd = ['ssh-keygen', '-R', master_ip]
        perform_command(cmd)
        cmd = ssh + ['-o', 'ConnectTimeout=5', '-o', 'StrictHostKeyChecking=no', '%s@%s'
                                                     % (user, master_ip), ':']
        perform_command(cmd, 0)
        for ip in hosts.values():
            cmd = ssh + ['%s@%s' % (user, master_ip), 'ping', '-c', '1', '-i', '2', '-q', ip]
            perform_command(cmd, 0)
        for ip in hosts.values():
            remote_cmd = ' '.join(ssh) + ' -o ConnectTimeout=5 -o StrictHostKeyChecking=no %s :' % ip
            cmd = ssh + ['-o', 'ForwardAgent=yes', '%s@%s' % (user, master_ip), remote_cmd]
            perform_command(cmd, 0)
        if component:
            component.action('ping')
    
    if 'init' in args.actions:
        print('== init ==', flush=True)
    
        h = list(hosts.keys()) + ['%s.%s.' % (host, d) for host in hosts.keys()] + \
            ['%s.%s' % (host, d) for host in hosts.keys()] + list(hosts.values())
    
        cycle_all = ('echo -n At:; uname -n; for h in %s; do ' % ' '.join(h)) + ' '.join(ssh) \
            + ' -o StrictHostKeyChecking=no $h :; done'
        commands = [
            ['scp', '-p', 'hosts', '%s@%s:/tmp/' % (user, master_ip)],
            ssh + ['%s@%s' % (user, master_ip), 'sudo mv -v /tmp/hosts /etc/hosts'],
            # the frontend to all nodes
            ssh + ['-o', 'ForwardAgent=yes', '%s@%s' % (user, master_ip), cycle_all]
        ]
        for ip in hosts.values():
            # check the special case if it is not already frontend ('ssh-keygen -R' would be a problem)
            if ip == master_ip:
                continue
            commands += [
                ['ssh-keygen', '-R', ip],
                ssh + ['-o', 'ForwardAgent=yes', '-o', 'ProxyCommand=ssh -W %s:22 -q %s@%s'
                       % (ip, user, master_ip), '-o', 'StrictHostKeyChecking=no', '%s@%s' % (user, ip),
                       ':']
            ]
        commands += [
            ['ansible', '-i', './inventory', '-m', 'copy', '-a', 'src=hosts dest=/etc/hosts', 'nodes'],
        ]
        # node to node ssh keys
        # (after the /etc/hosts is distributed)
        for node in hosts.keys():
            # skip the frontend - already covered
            if node == master_hostname:
                continue
            # the node to all nodes
            commands += [
                ssh + ['-o', 'ForwardAgent=yes', '%s@%s' % (user, master_ip), ' '.join(ssh)
                       + ' -o ForwardAgent=yes %s \'%s\'' % ('%s.%s' % (node, d), cycle_all)],
            ]
        perform_action('init', commands)
    
    if 'wait' in args.actions:
        print('== wait ==', flush=True)
        commands = [
            ['ansible', '-i', './inventory', '-m', 'command', '-a', 'uname -a', 'all'],
            ['ansible', '-i', './inventory', '-m', 'shell', '-a',
             'while ! test -f /var/lib/cloud/instance/boot-finished; do sleep 2; done', 'all'],
        ]
        perform_action('wait', commands)
    
    if 'deployment' in args.actions:
        print('== deployment ==', flush=True)
        commands = [
            ['ansible', '-i', './inventory', '-m', 'command', '-a', 'systemctl enable puppet-master',
             'master'],
            # started with the old /etc/hosts
            ['ansible', '-i', './inventory', '-m', 'command', '-a', 'service puppet-master restart',
             'master'],
        ]
        perform_action('deployment', commands)
    
    fperf.write('[%f] Elapsed time\n' % elapsed)
    fperf.write('\n')
    fperf.close()