#!/usr/bin/python3

# Copyright 2020 Acronis
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This file is a barebones file needed to file a gap until Ansible 2.0. No
# error checking, no deletions, no updates. Idempotent creation only.

# If you look closely, you will see we arent _really_ using the shade module
# we just use it to slightly abstract the authentication model. As patches land
# in upstream shade we will be able to use more of the shade module. Until then
# if we want to be 'stable' we really need to be using it as a passthrough

import argparse
import contextlib
import datetime
import functools
import logging
import multiprocessing
import pathlib
import os
import subprocess
import sys
import time
import traceback
import warnings

import docker

from oslo_config import cfg

from keystonemiddleware.auth_token import list_opts

from keystoneauth1 import loading

from keystoneauth1 import identity
from keystoneauth1.session import Session
from keystoneclient.v3.client import Client as KeystoneClient

from gnocchiclient.client import Client as GnocchiClient


# log warnings only once per type
warnings.simplefilter('default')

GNOCCHI_CONF = cfg.CONF
LOG = logging.getLogger()

VSTORAGE_MOUNTPOINT = str(pathlib.Path('/mnt/vstorage').resolve())
GNOCCHI_LOCAL_ENDPOINT = 'http://127.0.0.1:8043/'
GNOCCHI_RELATIVE_DATASTORE = '/vols/datastores/gnocchi'
GNOCCHI_DATASTORE = VSTORAGE_MOUNTPOINT + GNOCCHI_RELATIVE_DATASTORE

GNOCCHI_PLOOP_DATASTORE = GNOCCHI_DATASTORE + '/ploop'
GNOCCHI_PLOOP_DISKDESCRIPTOR = GNOCCHI_PLOOP_DATASTORE + '/DiskDescriptor.xml'
GNOCCHI_PLOOP_RELATIVE_PATH = GNOCCHI_RELATIVE_DATASTORE + '/ploop/ploop'
GNOCCHI_PLOOP_FULL_PATH = GNOCCHI_DATASTORE + '/ploop/ploop'

GNOCCHI_PLOOP_PATHS = (GNOCCHI_PLOOP_RELATIVE_PATH, GNOCCHI_PLOOP_FULL_PATH)

GHOCCNI_PLOOP_MOUNTPOINT = '/mnt/gnocchi'

GNOCCHI_FS_CHECK_DIR = GHOCCNI_PLOOP_MOUNTPOINT + '/ping'
GNOCCHI_FS_CHECK_PING_FILE = GNOCCHI_FS_CHECK_DIR + '/ping'

GNOCCHI_CONTAINERS = ('gnocchi_api', 'gnocchi_metricd')

GNOCCHI_MAX_API_FAILURES = 8
GNOCCHI_MAX_CONTAINER_FAILURES = 3

PLOOP_MOUNT_TIMEOUT = 10 * 60


def _noexcept(error_rv=False):
    def dec(fn):
        @functools.wraps(fn)
        def wrap(*args, **kwargs):
            try:
                return fn(*args, **kwargs)
            except Exception as e:
                LOG.debug('noexcept prevented %s', e)
                return error_rv
        return wrap
    return dec


class Process(multiprocessing.Process):
    def __init__(self, *args, **kwargs):
        super(Process, self).__init__(*args, **kwargs)
        self._chan_parent, self._chan_child = multiprocessing.Pipe()
        self._exception = None

    def run(self):
        # send exception details though pipe
        try:
            super(Process, self).run()
            self._chan_child.send(None)
        except:  # catch all exceptions
            e = sys.exc_info()[1]
            tb = traceback.format_exc()
            self._chan_child.send((e, tb))
            raise

    @property
    def exception(self):
        if self._chan_parent.poll():
            self._exception = self._chan_parent.recv()
        return self._exception


def is_alive_process(p):  # could be in Adaptor
    if isinstance(p, Process):
        return p.is_alive
    return p.poll() is None


class ProcessAdaptor:
    def __init__(self, process):
        self._process = process

    @staticmethod
    def _is_alive_process(process):
        if isinstance(process, Process):
            return process.is_alive()
        return process.poll() is None

    def wait(self, timeout=30):
        for _sec in range(timeout):
            if not self._is_alive_process(self._process):
                break
            time.sleep(1)
        return self

    def is_running(self):
        return self._is_alive_process(self._process)

    def is_succeded(self):
        if isinstance(self._process, Process):
            if self._process.exception:
                LOG.error('%s', self._process.exception)
            return self._process.exception is None

        return self._process.wait() == 0


def _write_date(filepath):
    f = open(filepath, 'w')
    f.write(datetime.datetime.utcnow().isoformat())
    f.flush()
    os.fsync(f.fileno())
    f.close()


def with_docker_client(fn):
    @functools.wraps(fn)
    def wrap(*args, **kwargs):
        with contextlib.closing(docker.from_env()) as client:
            return fn(client, *args, **kwargs)
    return wrap


@with_docker_client
def _stop_docker_containers(client, names):
    containers = [c for c in client.containers.list() if c.name in names]

    errors = []
    if containers:
        LOG.info('stopping docker containers: %s',
                 ', '.join(c.name for c in containers))
    for c in containers:
        try:
            c.stop()
        except Exception as e:
            errors.append(e)
    if errors:
        raise Exception('containers stop errors: {}'.format(errors))


@with_docker_client
def _restart_docker_containers(client, names):
    containers = [c for c in client.containers.list(all=True) if c.name in names]

    if len(containers) < len(names):
        raise Exception('not all expected containers exists')

    errors = []
    LOG.info('restarting docker containers: %s',
             ', '.join(c.name for c in containers))
    for c in containers:
        try:
            c.restart()
        except Exception as e:
            errors.append(e)
    if errors:
        raise Exception('containers start errors: {}'.format(errors))


@with_docker_client
def _check_docker_containers(client, names, running):
    containers = [c for c in client.containers.list(all=not running) if c.name in names]

    if len(containers) < len(names):
        raise Exception('not all exptected containers found')


class ProcessManager:
    def __init__(self):
        self._mount_ploop = None
        self._umount_ploop = None
        self._check_ploop_fs = None
        self._stop_gnocchi_containers = None
        self._start_gnocchi_containers = None
        self._check_gnocchi_containers_exists = None

    def _get_ploop_device(self):
        p = subprocess.Popen(
            ['ploop', 'list'], stdout=subprocess.PIPE,
            universal_newlines=True, bufsize=0)

        # it doesn't touch vstorage, wait blocking
        stdout = p.communicate()[0]
        if p.returncode != 0:
            return None

        for line in stdout.splitlines():
            values = line.split()
            # [device_name, ploop_path]
            if len(values) < 2:
                continue
            device_name = values[0]
            ploop_path = values[1]
            # on different versions or mount options it return
            # either full path or vstorage relative path
            if ploop_path in GNOCCHI_PLOOP_PATHS:
                devmapper_path = '/dev/mapper/' + device_name
                if os.path.exists(devmapper_path):
                    return devmapper_path
                return '/dev/' + device_name

        return None

    @_noexcept()
    def is_ploop_mounted(self):
        return bool(self._get_ploop_device())

    @_noexcept()
    def mount_ploop(self, timeout=30):
        if self._mount_ploop:
            if self._mount_ploop.is_running():
                return False
            self._mount_ploop = None

        device = self._get_ploop_device()
        if device:
            return True

        LOG.info('mounting ploop device')
        process = ProcessAdaptor(subprocess.Popen([
            'ploop', 'mount', '-F',
            '-m', GHOCCNI_PLOOP_MOUNTPOINT,
            GNOCCHI_PLOOP_DISKDESCRIPTOR]))

        if process.wait(timeout).is_running():
            self._mount_ploop = process
            return False

        return process.is_succeded()

    @_noexcept()
    def umount_ploop(self, timeout=30):
        if self._umount_ploop:
            if self._umount_ploop.is_running():
                return False
            self._umount_ploop = None

        device = self._get_ploop_device()
        if device is None:
            return True

        LOG.info('unmounting ploop device')
        process = ProcessAdaptor(subprocess.Popen([
            'ploop', 'umount', '-d', device]))

        if process.wait(timeout).is_running():
            self._umount_ploop = process
            return False

        return process.is_succeded()

    @_noexcept()
    def check_ploop_fs(self, timeout=30):
        device = self._get_ploop_device()
        if device is None:
            return False

        if self._check_ploop_fs:
            if self._check_ploop_fs.is_running():
                return False
            self._check_ploop_fs = None

        LOG.debug('checking ploop fs')
        process = Process(
            target=_write_date, args=(GNOCCHI_FS_CHECK_PING_FILE,))
        process.start()
        process = ProcessAdaptor(process)

        if process.wait(timeout).is_running():
            self._check_ploop_fs = process
            return False

        return process.is_succeded()

    @_noexcept()
    def stop_gnocchi_services(self, timeout=30):
        if self._stop_gnocchi_containers:
            if self._stop_gnocchi_containers.is_running():
                return False
            self._stop_gnocchi_containers = None

        process = Process(
            target=_stop_docker_containers, args=(GNOCCHI_CONTAINERS,))
        process.start()
        process = ProcessAdaptor(process)

        if process.wait(timeout).is_running():
            self._stop_gnocchi_containers = process
            return False

        return process.is_succeded()

    @_noexcept()
    def start_gnocchi_services(self, timeout=30):
        if self._start_gnocchi_containers:
            if self._start_gnocchi_containers.is_running():
                return False
            self._start_gnocchi_containers = None

        process = Process(
            target=_restart_docker_containers, args=(GNOCCHI_CONTAINERS,))
        process.start()
        process = ProcessAdaptor(process)

        if process.wait(timeout).is_running():
            self._start_gnocchi_containers = process
            return False

        return process.is_succeded()

    @_noexcept()
    def check_gnocchi_services(self, timeout=30, running=False):
        if self._check_gnocchi_containers_exists:
            if self._check_gnocchi_containers_exists.is_running():
                return False
            self._check_gnocchi_containers_exists = None

        LOG.debug('checking gnocchi services')
        process = Process(
            target=_check_docker_containers, args=(GNOCCHI_CONTAINERS, running))
        process.start()
        process = ProcessAdaptor(process)

        if process.wait(timeout).is_running():
            self._check_gnocchi_containers_exists = process
            return False

        return process.is_succeded()


class GnocchiManager:
    def __init__(self, conf):
        self._pm = ProcessManager()

        # auth credentials are taken from gnocchi config
        auth = loading.load_auth_from_conf_options(conf, 'keystone_authtoken')
        session = Session(auth=auth, verify=False, timeout=60)
        self._client = GnocchiClient(
            version='1', session=session,
            # we check local API endpoint only, proxy/lb issues are not ours
            adapter_options=dict(endpoint_override=GNOCCHI_LOCAL_ENDPOINT))

        self._api_failures_counter = 0
        self._container_failures_counter = 0
        self._master = False

    def _get_metric_id(self):
        # get or create gnocchi.ping resource with gnocchi.ping metric
        try:
            resource = self._client.resource.get('generic', 'gnocchi.ping')
        except Exception:  # gnocchi 404
            resource = self._client.resource.create(
                'generic',
                {
                    'metrics': {
                        'gnocchi.ping': {'archive_policy_name': 'bool'},
                    },
                    'id': 'gnocchi.ping',
                }
            )
        return resource['metrics']['gnocchi.ping']

    def ping_api(self):
        # set current time value for gnocchi.ping metric
        try:
            metric_id = self._get_metric_id()
            self._client.metric.add_measures(metric_id, [dict(timestamp=0, value=time.time())])
            LOG.info('API ping OK')
        except Exception as e:
            LOG.warning('API ping error "%s"', e)
            return False
        return True

    def _stop_mastering(self):
        self._pm.stop_gnocchi_services()
        self._container_failures_counter = 0
        self._pm.umount_ploop()
        self._master = False

    def _become_master(self):
        rv = self._pm.mount_ploop(timeout=PLOOP_MOUNT_TIMEOUT) and \
            self._pm.check_ploop_fs() and \
            self._pm.start_gnocchi_services()
        self._master = rv
        return rv

    def _check_node_is_master(self):
        # simple check that HA master is on the node
        return 0 == subprocess.call(
            ['/usr/bin/systemctl', 'is-enabled', 'vstorage-ui-backend'], stdout=subprocess.PIPE)

    def loop(self):
        self._master = self._check_node_is_master()
        LOG.info('starting as master' if self._master else 'not master')

        while True:
            if self._master:
                if not self._check_node_is_master():
                    LOG.warning('node is no master')
                    self._stop_mastering()
                    time.sleep(5)
                    continue

                if not self._pm.check_ploop_fs():
                    LOG.warning('ploop filesystem check failed')
                    self._stop_mastering()
                    time.sleep(5)
                    continue

                if not self._pm.check_gnocchi_services(running=True):
                    self._container_failures_counter += 1
                    if self._container_failures_counter > GNOCCHI_MAX_CONTAINER_FAILURES:
                        LOG.warning('gnocchi services are not running')
                        self._stop_mastering()
                        time.sleep(10)
                        continue
                    self._pm.start_gnocchi_services()
                    self._api_failures_counter = 0

                if not self.ping_api():
                    self._api_failures_counter += 1

                    if self._api_failures_counter > GNOCCHI_MAX_API_FAILURES:
                        LOG.warning('too many API failures')
                        self._stop_mastering()
                        continue
                    time.sleep(10)
                    continue

                # ok, everything works
                self._container_failures_counter = 0
                self._api_failures_counter = 0
                time.sleep(20)
                continue

            # not master
            if not self._pm.check_gnocchi_services():
                # docker containers are missing or docker service is down
                # this is unexpected, probably we are in the middle of a
                # manual reconfiguration
                time.sleep(15)
                continue

            if self._check_node_is_master():
                # node is HA master, mount and start services
                if self._become_master():
                    continue

            # definitely not master
            self._stop_mastering()
            LOG.info('not master')

            time.sleep(30)


def _watchdog(_args):
    GNOCCHI_CONF(
        [], project=__name__,
        default_config_files=['/etc/kolla/gnocchi-api/gnocchi.conf'],
    )

    GNOCCHI_CONF.register_opt(cfg.StrOpt('file_basepath'), group='storage')
    if not GNOCCHI_CONF.storage.file_basepath.startswith(GHOCCNI_PLOOP_MOUNTPOINT):
        LOG.error('unexpected storage path in gnocchi config, must be %s',
                  GHOCCNI_PLOOP_MOUNTPOINT)
        sys.exit(1)

    mgr = GnocchiManager(GNOCCHI_CONF)
    try:
        mgr.loop()
    except Exception as e:
        LOG.error('%s', e)
        sys.exit(1)


def _create_ploop(_args):
    if not os.path.exists(GNOCCHI_PLOOP_DATASTORE):
        os.makedirs(GNOCCHI_PLOOP_DATASTORE)

    if os.path.exists(GNOCCHI_PLOOP_DISKDESCRIPTOR):
        return True

    process = subprocess.Popen([
        'ploop', 'init', '-s', '500G', '-t', 'ext4', GNOCCHI_PLOOP_FULL_PATH])

    return process.wait() == 0


def _mount_ploop(args):
    if not ProcessManager().mount_ploop(args.timeout):
        sys.exit(1)


def _umount_ploop(args):
    if not ProcessManager().umount_ploop(args.timeout):
        sys.exit(1)


def _check_ploop_fs(args):
    if not ProcessManager().check_ploop_fs(args.timeout):
        sys.exit(1)


def _main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--timeout', action="store", dest="timeout", default=30, type=int)
    parser.add_argument('--debug', action="store_true", dest="debug", default=False)

    commands = parser.add_subparsers(title='sub-commands')

    subparser = commands.add_parser('watchdog')
    subparser.set_defaults(action=_watchdog)

    ploop_parser = commands.add_parser('ploop')
    ploop_commands = ploop_parser.add_subparsers(title='ploop-commands')

    subparser = ploop_commands.add_parser('create')
    subparser.set_defaults(action=_create_ploop)

    subparser = ploop_commands.add_parser('mount')
    subparser.set_defaults(action=_mount_ploop)
    subparser = ploop_commands.add_parser('umount')
    subparser.set_defaults(action=_umount_ploop)
    subparser = ploop_commands.add_parser('check-fs')
    subparser.set_defaults(action=_check_ploop_fs)
    args = parser.parse_args(sys.argv[1:])

    logging.basicConfig(
        level=logging.DEBUG if args.debug else logging.INFO,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        handlers=[
            logging.StreamHandler(),
        ]
    )

    args.action(args)


if __name__ == "__main__":
    _main()
