#!/usr/bin/python -u

#
# Copyright (c) 2017 Parallels International GmbH
#

from sys import exit
import subprocess32
import prlsdkapi
import libvirt
import libvirt_qemu
import re
import os, sys
import json
import time
import yum
import signal
from ctypes import c_uint
from prlsdkapi import consts
from prlsdkapi import errors
from pprint import pprint
from contextlib import contextmanager

prlsdkapi.init_server_sdk()

config_f = '/var/lib/vz-qemu-engine-updater.json'
flags_transitional = [consts.VMS_STARTING, consts.VMS_SNAPSHOTING, consts.VMS_RESETTING, consts.VMS_PAUSING, consts.VMS_CONTINUING, consts.VMS_RESTORING, consts.VMS_COMPACTING, consts.VMS_RECONNECTING, consts.VMS_RESUMING, consts.VMS_MIGRATING, consts.VMS_PAUSED, consts.VMS_RESETTING]
flags_stopped = [consts.VMS_STOPPED, consts.VMS_DELETING_STATE, consts.VMS_MOUNTED, consts.VMS_UNKNOWN, consts.VMS_SUSPENDING, consts.VMS_STOPPING, consts.VMS_SUSPENDED, consts.VMS_SUSPENDING_SYNC]

def sigint_exit(signum, frame):
	"""
	Supplementary function to handle SIGINT properly
	"""
	#restore the original signal handler as otherwise evil things will happen
	#in raw_input when CTRL+C is pressed, and our signal handler is not re-entrant
	signal.signal(signal.SIGINT, original_sigint)

	try:
		if raw_input("\nAre you sure you want to interrupt execution and quit? (y/n)> ").lower().startswith('y'):
			exit(255)

	# If SIGINT happens while we wait for y/n answer consider it a confirmation and exit
	except KeyboardInterrupt:
		print("Exiting on Keyboard Interrupt...")
		exit(255)

	signal.signal(signal.SIGINT, sigint_exit)

@contextmanager
def suppress_stdout():
	"""
	Supplimentary function to suppress stdout from yum
	"""
	with open(os.devnull, "w") as devnull:
		old_stdout = sys.stdout
		sys.stdout = devnull
		try:  
			yield
		finally:
			sys.stdout = old_stdout

def init_prl():
	"""
	Connect to local Dispatcher and return Host handle.
	"""
	global config
	for tries in range(0, config["reconnect_timeout"]/5):
		try:
			server = prlsdkapi.Server()
			server.login_local().wait()
			return server
		except:
			print "Unable to connect to Virtuozzo Dispatcher, retry (%d/%d)" % ((tries+1)*5, config["reconnect_timeout"])
			time.sleep(5)
	print "Unable to connect to Virtuozzo Dispatcher."
	exit(3)

def init_libvirt():
	"""
	Connect to local Libvirt engine and return connection handle.
	"""
	global config
	for tries in range(0, config["reconnect_timeout"]/5):
		try:
			conn = libvirt.open(None)
			if conn != None:
				return conn
		except:
			print "Unable to connect to Libvirt daemon, retry (%d/%d)" % ((tries+1)*5, config["reconnect_timeout"])
			time.sleep(5)
	print "Unable to connect to Libvirt daemon."
	exit(2)

def init_yum():
	"""
	Connect to YUM database.
	We would like to avoid collisions between updating KVM/QEMU engine and YUM transactions, therefore will operate under a lock
	"""
	try:
		yb = yum.YumBase()
	except:
		print "Unable to open connection to YUM database."
		exit(6)
	return yb

def check_if_still_connected():
	"""
	Check if we're still connected to Dispatcher and Libvirt.
	Connection could have been dropped while we waited for a retry.
	Reconnect if connection was dropped.
	"""
	global prlserver
	global libvirtcon

	# try Dispatcher
	try:
		prlserver.get_common_prefs().wait()
	except:
		print "Connection to PrlSDK was dropped! Will try to reconnect."
		try:
			prlserver = server.login_local().wait()
		except:
			print "Unable to restore connection to Dispatcher, interrupting."
			exit(3)

	# try Libvirt
	try:
		libvirtcon.getSysinfo()
	except:
		print "Connection to LibVirt was dropped! Will try to reconnect."
		try:
			libvirtcon = libvirt.open()
		except:
			print "Unable to restore connection to Libvirt, interrupting."
			exit(2)

def load_config(cfg_f):
	"""
	Read configuration from a file
	"""
	config = {}
	malformed = False
	try:
		config = json.load(open(cfg_f, 'r'))
	except:
		print "Failed to read configuration file %s." % cfg_f
		print "Will use default values."
		config = init_config()
		malformed = True

	# Validate config to avoid malfunction
	if "yum_timeout" not in config or not isinstance(config["yum_timeout"], int) or config["yum_timeout"] < 1:
		print "Invalid Yum Timeout in config! Will reset to default value."
		config["yum_timeout"] = 3600
		malformed = True

	if config["yum_timeout"] < 3600:
		print "Default value for yum_timeout has changed. It is recommended to be at least 1 hour long."
		config["yum_timeout"] = 3600
		malformed = True

	if "retries" not in config or not isinstance(config["retries"], int):
		print "Invalid Max Retries value in config! Will reset to default value."
		config["retries"] = 5
		malformed = True

	if "retry_delay" not in config or not isinstance(config["retry_delay"], int):
		print "Invalid Retry Delay value in config! Will reset to default."
		config["retry_delay"] = 900
		malformed = True

	if ("retry_step" not in config or "retry_max" not in config) or \
			not isinstance(config["retry_max"], int) or \
			not isinstance(config["retry_step"], int) or \
			(config["retry_max"] < config["retry_delay"] and config["retry_max"] != 0 ) or \
			config["retry_step"] < 0:
		print "Invalid Retry Max and Retry Step values in config! Will reset to default."
		config["retry_max"] = 3600
		config["retry_step"] = 600
		malformed = True

	if "reconnect_timeout" not in config or not isinstance(config["reconnect_timeout"], int):
		print "Invalid Reconnect Timeout value in config! Will reset to default."
		config["reconnect_timeout"] = 1800
		malformed = True

	if malformed:
		if os.path.isfile(cfg_f):
			print "Saved malformed config as %s.save." % cfg_f
			os.rename(cfg_f, cfg_f+".save")
		save_config(config, cfg_f)

	return config

def save_config(cfg, cfg_f):
	"""
	Save config to a file, store last seen qemu-kvm-vz version
	"""
	try:
		open(cfg_f, 'w').write(json.dumps(cfg, indent = 3))
	except:
		print "Failed to save configuration file!"

def init_config():
	"""
	Init default configuration file if original one couldn't be read
	"""
	print "Initializing default config file"

	cfg = {}
	cfg["retries"] = 5				# Retry failed VMs "retries" times
	cfg["retry_delay"] = 900		# Wait "retry_delay" seconds before retrying failed VMs
	cfg["yum_timeout"] = 3600		# Wait "yum_timeout" seconds to obtain yum lock
	cfg["retry_step"] = 600			# Increase retry_delay by "retry_step" seconds after each retry
	cfg["retry_max"] = 3600			# Increased retry_delay cannot be larger than "retry_max" seconds
	cfg["reconnect_timeout"] = 1800	# Retry connection to Dispatcher/Libvirt within X seconds

	return cfg

def try_lock_yum(yb):
	"""
	Try to lock Yum database - we do not want to KVM/QEMU engine update to take place while there's a yum update running
	"""
	global config
	iters = config["yum_timeout"]
	while iters > 0:
		try:
			with suppress_stdout():
				yb.doLock()
		except:
			print "Yum database is locked, waiting."
			iters -= 5
			time.sleep(5)
			continue
		return True
	return False

def is_supported_version(package):
	"""
	Check if supplied version suppots KVM/QEMU engine update (>=2.6.0)
	"""
	version = re.search('qemu-kvm-vz-([0-9]*.[0-9]*.[0-9]*)-.*', package).group(1)
	major = int(version.split(".")[0])
	minor = int(version.split(".")[1])

	if major > 2:
		return True
	if major == 2:
		if minor >= 6:
			return True
	return False

def get_host_qemu_version():
	"""
	Query current KVM/QEMU engine version.
	"""
	package_name = 'qemu-kvm-vz'

	global yb
	if not try_lock_yum(yb):
		print "Unable to lock yum database."
		exit(4)
	with suppress_stdout():
		pkgs = yb.rpmdb.searchNames(names=[package_name])

	yb.doUnlock()

	if pkgs.__len__() != 1:
		print "Found %d packages named %s" % (pkgs.__len__(), package_name)
		print "Cannot detect package version reliably, exiting."
		exit(7)

	for pkg in pkgs:
		version = "%s-%s-%s" % (pkg.name, pkg.ver, pkg.release)
		print "Current KVM/QEMU Package version: %s" % version
		if not is_supported_version(version):
			print "Current KVM/QEMU version does not support live update, it should be 2.6.0 or newer."
			exit(5)
		return version

def get_vm_qemu_version(libvirtcon, ve_name):
	"""
	Query KVM/QEMU Monitor of a running VM to find the version it's running.
	"""
	try:
		ve = libvirtcon.lookupByName(ve_name)
		reply = json.loads(libvirt_qemu.qemuMonitorCommand(ve, '{"execute":"query-version"}',0))
		version = re.sub('[()]', '', reply["return"]["package"].strip())
	except:
		version = None
	return version

def update_vms(ves):
	"""
	Update KVM/QEMU engine for VMs running on an outdated engine.
	"""
	failed = []

	global host_qemu_ver
	global prlserver
	global libvirtcon
	global yb
	global failed_no_retry

	if not try_lock_yum(yb):
		print "Unable to lock yum database, exiting."
		exit(6)

	# Check if connection is still alive, it could have died while we waited for a retry
	check_if_still_connected()

	print "Start updating VMs."
	for ve in ves:
		try:
			ve_name = ve.get_config().get_name()

			ve_state = ve.get_config().get_state().wait().get_param().get_state()

			if ve_state in flags_stopped:
				print "- VM %s is not running, skipping." % ve_name
				continue

			if ve_state in flags_transitional:
				print "- VM %s is in transitional state, will retry later." % ve_name
				failed.append(ve)
				continue

			vm_qemu_ver = get_vm_qemu_version(libvirtcon, ve_name)
			if not vm_qemu_ver:
				print "- Unable to detect qemu engine version for VM %s, will retry later." % ve_name
				failed.append(ve)
				continue

			if vm_qemu_ver == host_qemu_ver:
				print "- VM %s is already up-to-date." % ve_name
				continue

			if not is_supported_version(vm_qemu_ver):
				print "- VM %s (using %s) failed to update, KVM/QEMU engine is too old." % (ve_name, vm_qemu_ver)
				failed_no_retry.append(ve)
				continue

			ret = qemu_update_ve(prlserver, ve)
			if ret == 0:
				print "- VM %s (using %s) has been successfully updated." % (ve_name, vm_qemu_ver)
			elif ret == 1:
				print "- VM %s (using %s) is locked, will retry later." % (ve_name, vm_qemu_ver)
				failed.append(ve)
			else:
				print "- VM %s (using %s) failed to update." % (ve_name, vm_qemu_ver)
				failed_no_retry.append(ve)

		except Exception as ex:
			if type(ex).__name__ == "PrlSDKError":
				print "- VM %s has failed. [%s]:%s" % (ve_name, str(ex.get_result()), ex.args)
				failed_no_retry.append(ve)
				continue
			else:
				raise

	print "Finished updating VMs."
	
	yb.doUnlock()
	return failed

def qemu_update_ve(server, ve):
	"""
	Update KVM/QEMU engine for a specific VM.
	"""
	try:
		UpdJob = ve.migrate(server, '', consts.PVMSL_LOW_SECURITY | consts.PVM_UPDATE_MODE, 0, True)
		UpdJob.wait()
		ret = 0
	except:
		if UpdJob.get_ret_code() == c_uint(errors.PRL_ERR_VM_IS_EXCLUSIVELY_LOCKED).value:
			ret = 1
		else:
			ret = 2
	return ret

"""
Main
"""

if __name__ == '__main__':
	# Timer is configured to restart itself each 10 minutes to workaround
	# a bug in systemd when timer gets elapsed, but unit is not started.
	# Once we are actually executed - we will stop the timer.
	st = subprocess32.Popen("/bin/systemctl stop vz-qemu-engine-updater.timer".split())
	st.communicate()

	# Load configuration file
	config = load_config(config_f)

	# Initialize Yum, and wait until it finishes transaction
	yb = init_yum()
	if not try_lock_yum(yb):
		print "Unable to lock yum database."
		exit(4)
	yb.doUnlock()

	# Connect to services
	prlserver = init_prl()
	libvirtcon = init_libvirt()

	# Get Host QEMU version
	host_qemu_ver = get_host_qemu_version()

	# Install signal handler
	# We must do it after get_host_qemu_version because
	# yum rpm base search resets the sighandle
	original_sigint = signal.signal(signal.SIGINT, sigint_exit)

	# First run
	failed_no_retry = []
	ves = prlserver.get_vm_list_ex(consts.PVTF_VM).wait()
	failed = update_vms(ves)
	failed_names = ""

	# If there are failed VMs we'll iterate over the list of failed VMs
	# pre-configured amount of retry times
	retries = 0
	retry_time = config["retry_delay"]
	while failed and retries < config["retries"]:
		retries += 1
		print "Some VMs couldn't update qemu engine, will retry in %d seconds." % config["retry_delay"]
		time.sleep(retry_time)

		print "Starting retry %d/%d." % (retries, config["retries"])
		ves = failed
		failed = update_vms(ves)

		# Increment retry delay
		retry_time += config["retry_step"]
		if config["retry_max"] and retry_time > config["retry_max"]:
			retry_time = config["retry_max"]

	# Check if we managed to update all VMs after all retries
	failed += failed_no_retry
	if not failed:
		print "Successfully updated KVM/QEMU engine for all running VMs."
		ret = 0
	else:
		for ve in failed:
			ve_name = ve.get_config().get_name()
			failed_names += ve_name
			if ve is not failed[failed.__len__()-1]:
				failed_names += ", "
		print "Some of the VMs couldn't be updated: %s." % failed_names
		ret = 1

	exit(ret)
