Commit 47eca2c6 authored by Luka Blaskovic's avatar Luka Blaskovic Committed by Jose A. Lopes

Add support for KVM multiqueue virtio-net

This patch adds support for multiqueue virtio-net[1] which sets a number of
queues (file descriptors) from virtio_net_queues parameter for tap device
to parallelize packets sending or receiving. Tap devices will be created with
MULTI_QUEUE (IFF_MULTI_QUEUE) support.

KVM paravirtual nics (virtio-net) are only one which supports this feature.
Number of queues are limited by kernel tap implementation (currently to 8).
Instances must manually set number of queues, on Linux using:

ethtool -L ethX combined $queues

Network device options logic is moved to separate method
_GetNetworkDeviceFeatures which is now properly reused in hotplugging part.
This also fixes unreported bug when vhost_net parameter is set to true,
hotplugged network device will be created without "vhost=on" parameter.

[1] http://www.linux-kvm.org/page/MultiqueueSigned-off-by: default avatarLuka Blaskovic <lblasc@znode.net>
Reviewed-by: default avatarJose A. Lopes <jabolopes@google.com>
parent b9ece334
......@@ -109,6 +109,10 @@ _MULTI_CPU_MASK_CHECK = (_IsMultiCpuMaskWellFormed,
_NET_PORT_CHECK = (lambda x: 0 < x < 65535, "invalid port number",
None, None)
# Check if number of queues is in safe range
_VIRTIO_NET_QUEUES_CHECK = (lambda x: 0 < x < 9, "invalid number of queues",
None, None)
# Check that an integer is non negative
_NONNEGATIVE_INT_CHECK = (lambda x: x >= 0, "cannot be negative", None, None)
......@@ -121,6 +125,8 @@ REQ_DIR_CHECK = (True, ) + _DIR_CHECK
OPT_DIR_CHECK = (False, ) + _DIR_CHECK
REQ_NET_PORT_CHECK = (True, ) + _NET_PORT_CHECK
OPT_NET_PORT_CHECK = (False, ) + _NET_PORT_CHECK
REQ_VIRTIO_NET_QUEUES_CHECK = (True, ) + _VIRTIO_NET_QUEUES_CHECK
OPT_VIRTIO_NET_QUEUES_CHECK = (False, ) + _VIRTIO_NET_QUEUES_CHECK
REQ_CPU_MASK_CHECK = (True, ) + _CPU_MASK_CHECK
OPT_CPU_MASK_CHECK = (False, ) + _CPU_MASK_CHECK
REQ_MULTI_CPU_MASK_CHECK = (True, ) + _MULTI_CPU_MASK_CHECK
......
......@@ -339,6 +339,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
constants.HV_KVM_FLAG:
hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES),
constants.HV_VHOST_NET: hv_base.NO_CHECK,
constants.HV_VIRTIO_NET_QUEUES: hv_base.OPT_VIRTIO_NET_QUEUES_CHECK,
constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK,
constants.HV_KVM_USER_SHUTDOWN: hv_base.NO_CHECK,
constants.HV_MEM_PATH: hv_base.OPT_DIR_CHECK,
......@@ -385,6 +386,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
_QMP_RE = re.compile(r"^-qmp\s", re.M)
_SPICE_RE = re.compile(r"^-spice\s", re.M)
_VHOST_RE = re.compile(r"^-net\s.*,vhost=on|off", re.M)
_VIRTIO_NET_QUEUES_RE = re.compile(r"^-net\s.*,fds=x:y:...:z", re.M)
_ENABLE_KVM_RE = re.compile(r"^-enable-kvm\s", re.M)
_DISABLE_KVM_RE = re.compile(r"^-disable-kvm\s", re.M)
_NETDEV_RE = re.compile(r"^-netdev\s", re.M)
......@@ -1466,6 +1468,55 @@ class KVMHypervisor(hv_base.BaseHypervisor):
return hv_base.GenerateTapName()
def _GetNetworkDeviceFeatures(self, up_hvp, devlist, kvmhelp):
"""Get network device options to properly enable supported features.
Return tuple of supported and enabled tap features with nic_model.
This function is called before opening a new tap device.
@return: (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra)
@rtype: tuple
"""
virtio_net_queues = 1
nic_extra = ""
nic_type = up_hvp[constants.HV_NIC_TYPE]
tap_extra = ""
vnet_hdr = False
if nic_type == constants.HT_NIC_PARAVIRTUAL:
nic_model = self._VIRTIO
try:
if self._VIRTIO_NET_RE.search(devlist):
nic_model = self._VIRTIO_NET_PCI
vnet_hdr = up_hvp[constants.HV_VNET_HDR]
except errors.HypervisorError, _:
# Older versions of kvm don't support DEVICE_LIST, but they don't
# have new virtio syntax either.
pass
if up_hvp[constants.HV_VHOST_NET]:
# Check for vhost_net support.
if self._VHOST_RE.search(kvmhelp):
tap_extra = ",vhost=on"
else:
raise errors.HypervisorError("vhost_net is configured"
" but it is not available")
if up_hvp[constants.HV_VIRTIO_NET_QUEUES] > 1:
# Check for multiqueue virtio-net support.
if self._VIRTIO_NET_QUEUES_RE.search(kvmhelp):
virtio_net_queues = up_hvp[constants.HV_VIRTIO_NET_QUEUES]
# As advised at http://www.linux-kvm.org/page/Multiqueue formula
# for calculating vector size is: vectors=2*N+1 where N is the
# number of queues (HV_VIRTIO_NET_QUEUES).
nic_extra = ",mq=on,vectors=%d" % (2 * virtio_net_queues + 1)
else:
raise errors.HypervisorError("virtio_net_queues is configured"
" but it is not available")
else:
nic_model = nic_type
return (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra)
# too many local variables
# pylint: disable=R0914
def _ExecuteKVMRuntime(self, instance, kvm_runtime, kvmhelp, incoming=None):
......@@ -1524,37 +1575,18 @@ class KVMHypervisor(hv_base.BaseHypervisor):
if not kvm_nics:
kvm_cmd.extend(["-net", "none"])
else:
vnet_hdr = False
tap_extra = ""
nic_type = up_hvp[constants.HV_NIC_TYPE]
if nic_type == constants.HT_NIC_PARAVIRTUAL:
nic_model = self._VIRTIO
try:
if self._VIRTIO_NET_RE.search(devlist):
nic_model = self._VIRTIO_NET_PCI
vnet_hdr = up_hvp[constants.HV_VNET_HDR]
except errors.HypervisorError, _:
# Older versions of kvm don't support DEVICE_LIST, but they don't
# have new virtio syntax either.
pass
if up_hvp[constants.HV_VHOST_NET]:
# check for vhost_net support
if self._VHOST_RE.search(kvmhelp):
tap_extra = ",vhost=on"
else:
raise errors.HypervisorError("vhost_net is configured"
" but it is not available")
else:
nic_model = nic_type
(nic_model, vnet_hdr,
virtio_net_queues, tap_extra,
nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, kvmhelp)
kvm_supports_netdev = self._NETDEV_RE.search(kvmhelp)
for nic_seq, nic in enumerate(kvm_nics):
tapname, tapfd = OpenTap(vnet_hdr=vnet_hdr,
name=self._GenerateKvmTapName(nic))
tapfds.append(tapfd)
tapname, nic_tapfds = OpenTap(vnet_hdr=vnet_hdr,
virtio_net_queues=virtio_net_queues,
name=self._GenerateKvmTapName(nic))
tapfds.extend(nic_tapfds)
taps.append(tapname)
tapfd = "%s%s" % ("fds=" if len(nic_tapfds) > 1 else "fd=",
":".join(str(fd) for fd in nic_tapfds))
if kvm_supports_netdev:
nic_val = "%s,mac=%s" % (nic_model, nic.mac)
try:
......@@ -1565,14 +1597,14 @@ class KVMHypervisor(hv_base.BaseHypervisor):
nic_val += (",id=%s,bus=pci.0,addr=%s" % (kvm_devid, hex(nic.pci)))
except errors.HotplugError:
netdev = "netdev%d" % nic_seq
nic_val += (",netdev=%s" % netdev)
tap_val = ("type=tap,id=%s,fd=%d%s" %
nic_val += (",netdev=%s%s" % (netdev, nic_extra))
tap_val = ("type=tap,id=%s,%s%s" %
(netdev, tapfd, tap_extra))
kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val])
else:
nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq,
nic.mac, nic_model)
tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd)
tap_val = "tap,vlan=%s,%s" % (nic_seq, tapfd)
kvm_cmd.extend(["-net", tap_val, "-net", nic_val])
if incoming:
......@@ -1869,12 +1901,23 @@ class KVMHypervisor(hv_base.BaseHypervisor):
cmds += ["device_add virtio-blk-pci,bus=pci.0,addr=%s,drive=%s,id=%s" %
(hex(device.pci), kvm_devid, kvm_devid)]
elif dev_type == constants.HOTPLUG_TARGET_NIC:
(tap, fd) = OpenTap()
kvmpath = instance.hvparams[constants.HV_KVM_PATH]
kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP)
devlist = self._GetKVMOutput(kvmpath, self._KVMOPT_DEVICELIST)
up_hvp = runtime[2]
(_, vnet_hdr,
virtio_net_queues, tap_extra,
nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, kvmhelp)
(tap, fds) = OpenTap(vnet_hdr=vnet_hdr,
virtio_net_queues=virtio_net_queues)
# netdev_add don't support "fds=" when multiple fds are
# requested, generate separate "fd=" string for every fd
tapfd = ",".join(["fd=%s" % fd for fd in fds])
self._ConfigureNIC(instance, seq, device, tap)
self._PassTapFd(instance, fd, device)
cmds = ["netdev_add tap,id=%s,fd=%s" % (kvm_devid, kvm_devid)]
args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s" % \
(hex(device.pci), device.mac, kvm_devid, kvm_devid)
self._PassTapFd(instance, fds, device)
cmds = ["netdev_add tap,id=%s,%s%s" % (kvm_devid, tapfd, tap_extra)]
args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s%s" % \
(hex(device.pci), device.mac, kvm_devid, kvm_devid, nic_extra)
cmds += ["device_add %s" % args]
utils.WriteFile(self._InstanceNICFile(instance.name, seq), data=tap)
......@@ -1924,7 +1967,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
device.pci = self.HotDelDevice(instance, dev_type, device, _, seq)
self.HotAddDevice(instance, dev_type, device, _, seq)
def _PassTapFd(self, instance, fd, nic):
def _PassTapFd(self, instance, fds, nic):
"""Pass file descriptor to kvm process via monitor socket using SCM_RIGHTS
"""
......@@ -1932,7 +1975,6 @@ class KVMHypervisor(hv_base.BaseHypervisor):
# squash common parts between monitor and qmp
kvm_devid = _GenerateDeviceKVMId(constants.HOTPLUG_TARGET_NIC, nic)
command = "getfd %s\n" % kvm_devid
fds = [fd]
logging.info("%s", fds)
try:
monsock = MonitorSocket(self._InstanceMonitor(instance.name))
......
......@@ -41,6 +41,7 @@ IFF_TAP = 0x0002
IFF_NO_PI = 0x1000
IFF_ONE_QUEUE = 0x2000
IFF_VNET_HDR = 0x4000
IFF_MULTI_QUEUE = 0x0100
def _GetTunFeatures(fd, _ioctl=fcntl.ioctl):
......@@ -91,42 +92,82 @@ def _ProbeTapVnetHdr(fd, _features_fn=_GetTunFeatures):
return result
def OpenTap(vnet_hdr=True, name=""):
def _ProbeTapMqVirtioNet(fd, _features_fn=_GetTunFeatures):
"""Check whether to enable the IFF_MULTI_QUEUE flag.
This flag was introduced in Linux kernel 3.8.
@type fd: int
@param fd: the file descriptor of /dev/net/tun
"""
flags = _features_fn(fd)
if flags is None:
# Not supported
return False
result = bool(flags & IFF_MULTI_QUEUE)
if not result:
logging.warning("Kernel does not support IFF_MULTI_QUEUE, not enabling")
return result
def OpenTap(vnet_hdr=True, virtio_net_queues=1, name=""):
"""Open a new tap device and return its file descriptor.
This is intended to be used by a qemu-type hypervisor together with the -net
tap,fd=<fd> command line parameter.
tap,fd=<fd> or -net tap,fds=x:y:...:z command line parameter.
@type vnet_hdr: boolean
@param vnet_hdr: Enable the VNET Header
@type virtio_net_queues: int
@param virtio_net_queues: Set number of tap queues but not more than 8,
queues only work with virtio-net device;
disabled by default (one queue).
@type name: string
@param name: name for the TAP interface being created; if an empty
string is passed, the OS will generate a unique name
@return: (ifname, tapfd)
@return: (ifname, [tapfds])
@rtype: tuple
"""
try:
tapfd = os.open("/dev/net/tun", os.O_RDWR)
except EnvironmentError:
raise errors.HypervisorError("Failed to open /dev/net/tun")
tapfds = []
flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE
for _ in range(virtio_net_queues):
try:
tapfd = os.open("/dev/net/tun", os.O_RDWR)
except EnvironmentError:
raise errors.HypervisorError("Failed to open /dev/net/tun")
if vnet_hdr and _ProbeTapVnetHdr(tapfd):
flags |= IFF_VNET_HDR
flags = IFF_TAP | IFF_NO_PI
# The struct ifreq ioctl request (see netdevice(7))
ifr = struct.pack("16sh", name, flags)
if vnet_hdr and _ProbeTapVnetHdr(tapfd):
flags |= IFF_VNET_HDR
try:
res = fcntl.ioctl(tapfd, TUNSETIFF, ifr)
except EnvironmentError, err:
raise errors.HypervisorError("Failed to allocate a new TAP device: %s" %
err)
# Check if it's ok to enable IFF_MULTI_QUEUE
if virtio_net_queues > 1 and _ProbeTapMqVirtioNet(tapfd):
flags |= IFF_MULTI_QUEUE
else:
flags |= IFF_ONE_QUEUE
# The struct ifreq ioctl request (see netdevice(7))
ifr = struct.pack("16sh", name, flags)
try:
res = fcntl.ioctl(tapfd, TUNSETIFF, ifr)
except EnvironmentError, err:
raise errors.HypervisorError("Failed to allocate a new TAP device: %s" %
err)
tapfds.append(tapfd)
# Get the interface name from the ioctl
ifname = struct.unpack("16sh", res)[0].strip("\x00")
return (ifname, tapfd)
return (ifname, tapfds)
......@@ -842,6 +842,25 @@ vnet\_hdr
It is set to ``true`` by default.
virtio\_net\_queues
Valid for the KVM hypervisor.
Set a number of queues (file descriptors) for tap device to
parallelize packets sending or receiving. Tap devices will be
created with MULTI_QUEUE (IFF_MULTI_QUEUE) support. This only
works with KVM paravirtual nics (virtio-net) and the maximum
number of queues is limited to ``8``. Tehnically this is an
extension of ``vnet_hdr`` which must be enabled for multiqueue
support.
If set to ``1`` queue, it effectively disables multiqueue support
on the tap and virio-net devices.
For instances it is necessary to manually set number of queues (on
Linux using: ``ethtool -L ethX combined $queues``).
It is set to ``1`` by default.
The ``-O (--os-parameters)`` option allows customisation of the OS
parameters. The actual parameter names and values depends on the OS
being used, but the syntax is the same key=value. For example, setting
......
......@@ -1696,6 +1696,9 @@ hvVga = "vga"
hvVhostNet :: String
hvVhostNet = "vhost_net"
hvVirtioNetQueues :: String
hvVirtioNetQueues = "virtio_net_queues"
hvVifScript :: String
hvVifScript = "vif_script"
......@@ -1813,6 +1816,7 @@ hvsParameterTypes = Map.fromList
, (hvUseLocaltime, VTypeBool)
, (hvVga, VTypeString)
, (hvVhostNet, VTypeBool)
, (hvVirtioNetQueues, VTypeInt)
, (hvVifScript, VTypeString)
, (hvVifType, VTypeString)
, (hvViridian, VTypeBool)
......@@ -3830,6 +3834,7 @@ hvcDefaults =
, (hvSecurityDomain, PyValueEx "")
, (hvKvmFlag, PyValueEx "")
, (hvVhostNet, PyValueEx False)
, (hvVirtioNetQueues, PyValueEx (1 :: Int))
, (hvKvmUseChroot, PyValueEx False)
, (hvKvmUserShutdown, PyValueEx False)
, (hvMemPath, PyValueEx "")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment