-
Balazs Lecz authored
Signed-off-by:
Balazs Lecz <leczb@google.com> Reviewed-by:
Iustin Pop <iustin@google.com>
c4d3e57f
hv_lxc.py 12.56 KiB
#
#
# Copyright (C) 2010 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
"""LXC hypervisor
"""
import os
import os.path
import time
import logging
from ganeti import constants
from ganeti import errors # pylint: disable-msg=W0611
from ganeti import utils
from ganeti.hypervisor import hv_base
from ganeti.errors import HypervisorError
class LXCHypervisor(hv_base.BaseHypervisor):
"""LXC-based virtualization.
Since current (Spring 2010) distributions are not yet ready for
running under a container, the following changes must be done
manually:
- remove udev
- disable the kernel log component of sysklogd/rsyslog/etc.,
otherwise they will fail to read the log, and at least rsyslog
will fill the filesystem with error messages
TODO:
- move hardcoded parameters into hypervisor parameters, once we
have the container-parameter support
- implement memory limits, but only optionally, depending on host
kernel support
Problems/issues:
- LXC is very temperamental; in daemon mode, it succeeds or fails
in launching the instance silently, without any error
indication, and when failing it can leave network interfaces
around, and future successful startups will list the instance
twice
- shutdown sequence of containers leaves the init 'dead', and the
container effectively stopped, but LXC still believes the
container to be running; need to investigate using the
notify_on_release and release_agent feature of cgroups
"""
_ROOT_DIR = constants.RUN_GANETI_DIR + "/lxc"
_LOG_FILE = constants.LOG_DIR + "hv_lxc.log"
_DEVS = [
"c 1:3", # /dev/null
"c 1:5", # /dev/zero
"c 1:7", # /dev/full
"c 1:8", # /dev/random
"c 1:9", # /dev/urandom
"c 1:10", # /dev/aio
"c 5:0", # /dev/tty
"c 5:1", # /dev/console
"c 5:2", # /dev/ptmx
"c 136:*", # first block of Unix98 PTY slaves
]
_DENIED_CAPABILITIES = [
"mac_override", # Allow MAC configuration or state changes
# TODO: remove sys_admin too, for safety
#"sys_admin", # Perform a range of system administration operations
"sys_boot", # Use reboot(2) and kexec_load(2)
"sys_module", # Load and unload kernel modules
"sys_time", # Set system clock, set real-time (hardware) clock
]
_DIR_MODE = 0755
PARAMETERS = {
}
def __init__(self):
hv_base.BaseHypervisor.__init__(self)
utils.EnsureDirs([(self._ROOT_DIR, self._DIR_MODE)])
@staticmethod
def _GetMountSubdirs(path):
"""Return the list of mountpoints under a given path.
"""
result = []
for _, mountpoint, _, _ in utils.GetMounts():
if (mountpoint.startswith(path) and
mountpoint != path):
result.append(mountpoint)
result.sort(key=lambda x: x.count("/"), reverse=True)
return result
@classmethod
def _InstanceDir(cls, instance_name):
"""Return the root directory for an instance.
"""
return utils.PathJoin(cls._ROOT_DIR, instance_name)
@classmethod
def _InstanceConfFile(cls, instance_name):
"""Return the configuration file for an instance.
"""
return utils.PathJoin(cls._ROOT_DIR, instance_name + ".conf")
@classmethod
def _GetCgroupMountPoint(cls):
for _, mountpoint, fstype, _ in utils.GetMounts():
if fstype == "cgroup":
return mountpoint
raise errors.HypervisorError("The cgroup filesystem is not mounted")
@classmethod
def _GetCgroupCpuList(cls, instance_name):
"""Return the list of CPU ids for an instance.
"""
cgroup = cls._GetCgroupMountPoint()
try:
cpus = utils.ReadFile(utils.PathJoin(cgroup,
instance_name,
"cpuset.cpus"))
except EnvironmentError, err:
raise errors.HypervisorError("Getting CPU list for instance"
" %s failed: %s" % (instance_name, err))
# cpuset.cpus format: comma-separated list of CPU ids
# or dash-separated id ranges
# Example: "0-1,3"
cpu_list = []
for range_def in cpus.split(","):
boundaries = range_def.split("-")
n_elements = len(boundaries)
lower = int(boundaries[0])
higher = int(boundaries[n_elements - 1])
cpu_list.extend(range(lower, higher + 1))
return cpu_list
def ListInstances(self):
"""Get the list of running instances.
"""
result = utils.RunCmd(["lxc-ls"])
if result.failed:
raise errors.HypervisorError("Can't run lxc-ls: %s" % result.output)
return result.stdout.splitlines()
def GetInstanceInfo(self, instance_name):
"""Get instance properties.
@type instance_name: string
@param instance_name: the instance name
@return: (name, id, memory, vcpus, stat, times)
"""
# TODO: read container info from the cgroup mountpoint
result = utils.RunCmd(["lxc-info", "-n", instance_name])
if result.failed:
raise errors.HypervisorError("Can't run lxc-info: %s" % result.output)
# lxc-info output examples:
# 'ganeti-lxc-test1' is STOPPED
# 'ganeti-lxc-test1' is RUNNING
_, state = result.stdout.rsplit(None, 1)
cpu_list = self._GetCgroupCpuList(instance_name)
if state == "RUNNING":
return (instance_name, 0, 0, len(cpu_list), 0, 0)
return None
def GetAllInstancesInfo(self):
"""Get properties of all instances.
@return: [(name, id, memory, vcpus, stat, times),...]
"""
# TODO: read container info from the cgroup mountpoint
data = []
for name in self.ListInstances():
data.append((name, 0, 0, 0, 0, 0))
return data
def _CreateConfigFile(self, instance, root_dir):
"""Create an lxc.conf file for an instance"""
out = []
# hostname
out.append("lxc.utsname = %s" % instance.name)
# separate pseudo-TTY instances
out.append("lxc.pts = 255")
# standard TTYs
out.append("lxc.tty = 6")
# console log file
console_log = utils.PathJoin(self._ROOT_DIR, instance.name + ".console")
try:
utils.WriteFile(console_log, data="", mode=constants.SECURE_FILE_MODE)
except EnvironmentError, err:
raise errors.HypervisorError("Creating console log file %s for"
" instance %s failed: %s" %
(console_log, instance.name, err))
out.append("lxc.console = %s" % console_log)
# root FS
out.append("lxc.rootfs = %s" % root_dir)
# TODO: additional mounts, if we disable CAP_SYS_ADMIN
# Device control
# deny direct device access
out.append("lxc.cgroup.devices.deny = a")
for devinfo in self._DEVS:
out.append("lxc.cgroup.devices.allow = %s rw" % devinfo)
# Networking
for idx, nic in enumerate(instance.nics):
out.append("# NIC %d" % idx)
mode = nic.nicparams[constants.NIC_MODE]
link = nic.nicparams[constants.NIC_LINK]
if mode == constants.NIC_MODE_BRIDGED:
out.append("lxc.network.type = veth")
out.append("lxc.network.link = %s" % link)
else:
raise errors.HypervisorError("LXC hypervisor only supports"
" bridged mode (NIC %d has mode %s)" %
(idx, mode))
out.append("lxc.network.hwaddr = %s" % nic.mac)
out.append("lxc.network.flags = up")
# Capabilities
for cap in self._DENIED_CAPABILITIES:
out.append("lxc.cap.drop = %s" % cap)
return "\n".join(out) + "\n"
def StartInstance(self, instance, block_devices):
"""Start an instance.
For LCX, we try to mount the block device and execute 'lxc-start
start' (we use volatile containers).
"""
root_dir = self._InstanceDir(instance.name)
try:
utils.EnsureDirs([(root_dir, self._DIR_MODE)])
except errors.GenericError, err:
raise HypervisorError("Cannot create instance directory: %s", str(err))
conf_file = self._InstanceConfFile(instance.name)
utils.WriteFile(conf_file, data=self._CreateConfigFile(instance, root_dir))
if not os.path.ismount(root_dir):
if not block_devices:
raise HypervisorError("LXC needs at least one disk")
sda_dev_path = block_devices[0][1]
result = utils.RunCmd(["mount", sda_dev_path, root_dir])
if result.failed:
raise HypervisorError("Can't mount the chroot dir: %s" % result.output)
# TODO: replace the global log file with a per-instance log file
result = utils.RunCmd(["lxc-start", "-n", instance.name,
"-o", self._LOG_FILE, "-l", "DEBUG",
"-f", conf_file, "-d"])
if result.failed:
raise HypervisorError("Running the lxc-start script failed: %s" %
result.output)
def StopInstance(self, instance, force=False, retry=False, name=None):
"""Stop an instance.
This method has complicated cleanup tests, as we must:
- try to kill all leftover processes
- try to unmount any additional sub-mountpoints
- finally unmount the instance dir
"""
if name is None:
name = instance.name
root_dir = self._InstanceDir(name)
if not os.path.exists(root_dir):
return
if name in self.ListInstances():
# Signal init to shutdown; this is a hack
if not retry and not force:
result = utils.RunCmd(["chroot", root_dir, "poweroff"])
if result.failed:
raise HypervisorError("Can't run 'poweroff' for the instance: %s" %
result.output)
time.sleep(2)
result = utils.RunCmd(["lxc-stop", "-n", name])
if result.failed:
logging.warning("Error while doing lxc-stop for %s: %s", name,
result.output)
for mpath in self._GetMountSubdirs(root_dir):
result = utils.RunCmd(["umount", mpath])
if result.failed:
logging.warning("Error while umounting subpath %s for instance %s: %s",
mpath, name, result.output)
result = utils.RunCmd(["umount", root_dir])
if result.failed and force:
msg = ("Processes still alive in the chroot: %s" %
utils.RunCmd("fuser -vm %s" % root_dir).output)
logging.error(msg)
raise HypervisorError("Can't umount the chroot dir: %s (%s)" %
(result.output, msg))
def RebootInstance(self, instance):
"""Reboot an instance.
This is not (yet) implemented (in Ganeti) for the LXC hypervisor.
"""
# TODO: implement reboot
raise HypervisorError("The LXC hypervisor doesn't implement the"
" reboot functionality")
def GetNodeInfo(self):
"""Return information about the node.
This is just a wrapper over the base GetLinuxNodeInfo method.
@return: a dict with the following keys (values in MiB):
- memory_total: the total memory size on the node
- memory_free: the available memory on the node for instances
- memory_dom0: the memory used by the node itself, if available
"""
return self.GetLinuxNodeInfo()
@classmethod
def GetShellCommandForConsole(cls, instance, hvparams, beparams):
"""Return a command for connecting to the console of an instance.
"""
return "lxc-console -n %s" % instance.name
def Verify(self):
"""Verify the hypervisor.
For the chroot manager, it just checks the existence of the base dir.
"""
if not os.path.exists(self._ROOT_DIR):
return "The required directory '%s' does not exist." % self._ROOT_DIR
@classmethod
def PowercycleNode(cls):
"""LXC powercycle, just a wrapper over Linux powercycle.
"""
cls.LinuxPowercycle()
def MigrateInstance(self, instance, target, live):
"""Migrate an instance.
@type instance: L{objects.Instance}
@param instance: the instance to be migrated
@type target: string
@param target: hostname (usually ip) of the target node
@type live: boolean
@param live: whether to do a live or non-live migration
"""
raise HypervisorError("Migration not supported by the LXC hypervisor")