Add support for KVM multiqueue virtio-net
authorLuka Blaskovic <lblasc@znode.net>
Fri, 6 Jun 2014 10:58:22 +0000 (12:58 +0200)
committerJose A. Lopes <jabolopes@google.com>
Fri, 6 Jun 2014 15:45:54 +0000 (17:45 +0200)
This patch adds support for multiqueue virtio-net[1] which sets a number of
queues (file descriptors) from virtio_net_queues parameter for tap device
to parallelize packets sending or receiving. Tap devices will be created with
MULTI_QUEUE (IFF_MULTI_QUEUE) support.

KVM paravirtual nics (virtio-net) are only one which supports this feature.
Number of queues are limited by kernel tap implementation (currently to 8).
Instances must manually set number of queues, on Linux using:

ethtool -L ethX combined $queues

Network device options logic is moved to separate method
_GetNetworkDeviceFeatures which is now properly reused in hotplugging part.
This also fixes unreported bug when vhost_net parameter is set to true,
hotplugged network device will be created without "vhost=on" parameter.

[1] http://www.linux-kvm.org/page/Multiqueue

Signed-off-by: Luka Blaskovic <lblasc@znode.net>
Reviewed-by: Jose A. Lopes <jabolopes@google.com>

lib/hypervisor/hv_base.py
lib/hypervisor/hv_kvm/__init__.py
lib/hypervisor/hv_kvm/netdev.py
man/gnt-instance.rst
src/Ganeti/Constants.hs

index f7317fc..491156c 100644 (file)
@@ -109,6 +109,10 @@ _MULTI_CPU_MASK_CHECK = (_IsMultiCpuMaskWellFormed,
 _NET_PORT_CHECK = (lambda x: 0 < x < 65535, "invalid port number",
                    None, None)
 
+# Check if number of queues is in safe range
+_VIRTIO_NET_QUEUES_CHECK = (lambda x: 0 < x < 9, "invalid number of queues",
+                            None, None)
+
 # Check that an integer is non negative
 _NONNEGATIVE_INT_CHECK = (lambda x: x >= 0, "cannot be negative", None, None)
 
@@ -121,6 +125,8 @@ REQ_DIR_CHECK = (True, ) + _DIR_CHECK
 OPT_DIR_CHECK = (False, ) + _DIR_CHECK
 REQ_NET_PORT_CHECK = (True, ) + _NET_PORT_CHECK
 OPT_NET_PORT_CHECK = (False, ) + _NET_PORT_CHECK
+REQ_VIRTIO_NET_QUEUES_CHECK = (True, ) + _VIRTIO_NET_QUEUES_CHECK
+OPT_VIRTIO_NET_QUEUES_CHECK = (False, ) + _VIRTIO_NET_QUEUES_CHECK
 REQ_CPU_MASK_CHECK = (True, ) + _CPU_MASK_CHECK
 OPT_CPU_MASK_CHECK = (False, ) + _CPU_MASK_CHECK
 REQ_MULTI_CPU_MASK_CHECK = (True, ) + _MULTI_CPU_MASK_CHECK
index 5617ca7..f192c82 100644 (file)
@@ -339,6 +339,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     constants.HV_KVM_FLAG:
       hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES),
     constants.HV_VHOST_NET: hv_base.NO_CHECK,
+    constants.HV_VIRTIO_NET_QUEUES: hv_base.OPT_VIRTIO_NET_QUEUES_CHECK,
     constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK,
     constants.HV_KVM_USER_SHUTDOWN: hv_base.NO_CHECK,
     constants.HV_MEM_PATH: hv_base.OPT_DIR_CHECK,
@@ -385,6 +386,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
   _QMP_RE = re.compile(r"^-qmp\s", re.M)
   _SPICE_RE = re.compile(r"^-spice\s", re.M)
   _VHOST_RE = re.compile(r"^-net\s.*,vhost=on|off", re.M)
+  _VIRTIO_NET_QUEUES_RE = re.compile(r"^-net\s.*,fds=x:y:...:z", re.M)
   _ENABLE_KVM_RE = re.compile(r"^-enable-kvm\s", re.M)
   _DISABLE_KVM_RE = re.compile(r"^-disable-kvm\s", re.M)
   _NETDEV_RE = re.compile(r"^-netdev\s", re.M)
@@ -1466,6 +1468,55 @@ class KVMHypervisor(hv_base.BaseHypervisor):
 
     return hv_base.GenerateTapName()
 
+  def _GetNetworkDeviceFeatures(self, up_hvp, devlist, kvmhelp):
+    """Get network device options to properly enable supported features.
+
+    Return tuple of supported and enabled tap features with nic_model.
+    This function is called before opening a new tap device.
+
+    @return: (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra)
+    @rtype: tuple
+
+    """
+    virtio_net_queues = 1
+    nic_extra = ""
+    nic_type = up_hvp[constants.HV_NIC_TYPE]
+    tap_extra = ""
+    vnet_hdr = False
+    if nic_type == constants.HT_NIC_PARAVIRTUAL:
+      nic_model = self._VIRTIO
+      try:
+        if self._VIRTIO_NET_RE.search(devlist):
+          nic_model = self._VIRTIO_NET_PCI
+          vnet_hdr = up_hvp[constants.HV_VNET_HDR]
+      except errors.HypervisorError, _:
+        # Older versions of kvm don't support DEVICE_LIST, but they don't
+        # have new virtio syntax either.
+        pass
+
+      if up_hvp[constants.HV_VHOST_NET]:
+        # Check for vhost_net support.
+        if self._VHOST_RE.search(kvmhelp):
+          tap_extra = ",vhost=on"
+        else:
+          raise errors.HypervisorError("vhost_net is configured"
+                                       " but it is not available")
+        if up_hvp[constants.HV_VIRTIO_NET_QUEUES] > 1:
+          # Check for multiqueue virtio-net support.
+          if self._VIRTIO_NET_QUEUES_RE.search(kvmhelp):
+            virtio_net_queues = up_hvp[constants.HV_VIRTIO_NET_QUEUES]
+            # As advised at http://www.linux-kvm.org/page/Multiqueue formula
+            # for calculating vector size is: vectors=2*N+1 where N is the
+            # number of queues (HV_VIRTIO_NET_QUEUES).
+            nic_extra = ",mq=on,vectors=%d" % (2 * virtio_net_queues + 1)
+          else:
+            raise errors.HypervisorError("virtio_net_queues is configured"
+                                         " but it is not available")
+    else:
+      nic_model = nic_type
+
+    return (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra)
+
   # too many local variables
   # pylint: disable=R0914
   def _ExecuteKVMRuntime(self, instance, kvm_runtime, kvmhelp, incoming=None):
@@ -1524,37 +1575,18 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     if not kvm_nics:
       kvm_cmd.extend(["-net", "none"])
     else:
-      vnet_hdr = False
-      tap_extra = ""
-      nic_type = up_hvp[constants.HV_NIC_TYPE]
-      if nic_type == constants.HT_NIC_PARAVIRTUAL:
-        nic_model = self._VIRTIO
-        try:
-          if self._VIRTIO_NET_RE.search(devlist):
-            nic_model = self._VIRTIO_NET_PCI
-            vnet_hdr = up_hvp[constants.HV_VNET_HDR]
-        except errors.HypervisorError, _:
-          # Older versions of kvm don't support DEVICE_LIST, but they don't
-          # have new virtio syntax either.
-          pass
-
-        if up_hvp[constants.HV_VHOST_NET]:
-          # check for vhost_net support
-          if self._VHOST_RE.search(kvmhelp):
-            tap_extra = ",vhost=on"
-          else:
-            raise errors.HypervisorError("vhost_net is configured"
-                                         " but it is not available")
-      else:
-        nic_model = nic_type
-
+      (nic_model, vnet_hdr,
+       virtio_net_queues, tap_extra,
+       nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, kvmhelp)
       kvm_supports_netdev = self._NETDEV_RE.search(kvmhelp)
-
       for nic_seq, nic in enumerate(kvm_nics):
-        tapname, tapfd = OpenTap(vnet_hdr=vnet_hdr,
-                                 name=self._GenerateKvmTapName(nic))
-        tapfds.append(tapfd)
+        tapname, nic_tapfds = OpenTap(vnet_hdr=vnet_hdr,
+                                      virtio_net_queues=virtio_net_queues,
+                                      name=self._GenerateKvmTapName(nic))
+        tapfds.extend(nic_tapfds)
         taps.append(tapname)
+        tapfd = "%s%s" % ("fds=" if len(nic_tapfds) > 1 else "fd=",
+                          ":".join(str(fd) for fd in nic_tapfds))
         if kvm_supports_netdev:
           nic_val = "%s,mac=%s" % (nic_model, nic.mac)
           try:
@@ -1565,14 +1597,14 @@ class KVMHypervisor(hv_base.BaseHypervisor):
             nic_val += (",id=%s,bus=pci.0,addr=%s" % (kvm_devid, hex(nic.pci)))
           except errors.HotplugError:
             netdev = "netdev%d" % nic_seq
-          nic_val += (",netdev=%s" % netdev)
-          tap_val = ("type=tap,id=%s,fd=%d%s" %
+          nic_val += (",netdev=%s%s" % (netdev, nic_extra))
+          tap_val = ("type=tap,id=%s,%s%s" %
                      (netdev, tapfd, tap_extra))
           kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val])
         else:
           nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq,
                                                          nic.mac, nic_model)
-          tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd)
+          tap_val = "tap,vlan=%s,%s" % (nic_seq, tapfd)
           kvm_cmd.extend(["-net", tap_val, "-net", nic_val])
 
     if incoming:
@@ -1869,12 +1901,23 @@ class KVMHypervisor(hv_base.BaseHypervisor):
       cmds += ["device_add virtio-blk-pci,bus=pci.0,addr=%s,drive=%s,id=%s" %
                 (hex(device.pci), kvm_devid, kvm_devid)]
     elif dev_type == constants.HOTPLUG_TARGET_NIC:
-      (tap, fd) = OpenTap()
+      kvmpath = instance.hvparams[constants.HV_KVM_PATH]
+      kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP)
+      devlist = self._GetKVMOutput(kvmpath, self._KVMOPT_DEVICELIST)
+      up_hvp = runtime[2]
+      (_, vnet_hdr,
+       virtio_net_queues, tap_extra,
+       nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, kvmhelp)
+      (tap, fds) = OpenTap(vnet_hdr=vnet_hdr,
+                           virtio_net_queues=virtio_net_queues)
+      # netdev_add don't support "fds=" when multiple fds are
+      # requested, generate separate "fd=" string for every fd
+      tapfd = ",".join(["fd=%s" % fd for fd in fds])
       self._ConfigureNIC(instance, seq, device, tap)
-      self._PassTapFd(instance, fd, device)
-      cmds = ["netdev_add tap,id=%s,fd=%s" % (kvm_devid, kvm_devid)]
-      args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s" % \
-               (hex(device.pci), device.mac, kvm_devid, kvm_devid)
+      self._PassTapFd(instance, fds, device)
+      cmds = ["netdev_add tap,id=%s,%s%s" % (kvm_devid, tapfd, tap_extra)]
+      args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s%s" % \
+               (hex(device.pci), device.mac, kvm_devid, kvm_devid, nic_extra)
       cmds += ["device_add %s" % args]
       utils.WriteFile(self._InstanceNICFile(instance.name, seq), data=tap)
 
@@ -1924,7 +1967,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
       device.pci = self.HotDelDevice(instance, dev_type, device, _, seq)
       self.HotAddDevice(instance, dev_type, device, _, seq)
 
-  def _PassTapFd(self, instance, fd, nic):
+  def _PassTapFd(self, instance, fds, nic):
     """Pass file descriptor to kvm process via monitor socket using SCM_RIGHTS
 
     """
@@ -1932,7 +1975,6 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     #       squash common parts between monitor and qmp
     kvm_devid = _GenerateDeviceKVMId(constants.HOTPLUG_TARGET_NIC, nic)
     command = "getfd %s\n" % kvm_devid
-    fds = [fd]
     logging.info("%s", fds)
     try:
       monsock = MonitorSocket(self._InstanceMonitor(instance.name))
index a4f2b5d..f5e8067 100644 (file)
@@ -41,6 +41,7 @@ IFF_TAP = 0x0002
 IFF_NO_PI = 0x1000
 IFF_ONE_QUEUE = 0x2000
 IFF_VNET_HDR = 0x4000
+IFF_MULTI_QUEUE = 0x0100
 
 
 def _GetTunFeatures(fd, _ioctl=fcntl.ioctl):
@@ -91,42 +92,82 @@ def _ProbeTapVnetHdr(fd, _features_fn=_GetTunFeatures):
   return result
 
 
-def OpenTap(vnet_hdr=True, name=""):
+def _ProbeTapMqVirtioNet(fd, _features_fn=_GetTunFeatures):
+  """Check whether to enable the IFF_MULTI_QUEUE flag.
+
+  This flag was introduced in Linux kernel 3.8.
+
+   @type fd: int
+   @param fd: the file descriptor of /dev/net/tun
+
+  """
+  flags = _features_fn(fd)
+
+  if flags is None:
+    # Not supported
+    return False
+
+  result = bool(flags & IFF_MULTI_QUEUE)
+
+  if not result:
+    logging.warning("Kernel does not support IFF_MULTI_QUEUE, not enabling")
+
+  return result
+
+
+def OpenTap(vnet_hdr=True, virtio_net_queues=1, name=""):
   """Open a new tap device and return its file descriptor.
 
   This is intended to be used by a qemu-type hypervisor together with the -net
-  tap,fd=<fd> command line parameter.
+  tap,fd=<fd> or -net tap,fds=x:y:...:z command line parameter.
 
   @type vnet_hdr: boolean
   @param vnet_hdr: Enable the VNET Header
 
+  @type virtio_net_queues: int
+  @param virtio_net_queues: Set number of tap queues but not more than 8,
+                            queues only work with virtio-net device;
+                            disabled by default (one queue).
+
   @type name: string
   @param name: name for the TAP interface being created; if an empty
                string is passed, the OS will generate a unique name
 
-  @return: (ifname, tapfd)
+  @return: (ifname, [tapfds])
   @rtype: tuple
 
   """
-  try:
-    tapfd = os.open("/dev/net/tun", os.O_RDWR)
-  except EnvironmentError:
-    raise errors.HypervisorError("Failed to open /dev/net/tun")
+  tapfds = []
 
-  flags = IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE
+  for _ in range(virtio_net_queues):
+    try:
+      tapfd = os.open("/dev/net/tun", os.O_RDWR)
+    except EnvironmentError:
+      raise errors.HypervisorError("Failed to open /dev/net/tun")
 
-  if vnet_hdr and _ProbeTapVnetHdr(tapfd):
-    flags |= IFF_VNET_HDR
+    flags = IFF_TAP | IFF_NO_PI
 
-  # The struct ifreq ioctl request (see netdevice(7))
-  ifr = struct.pack("16sh", name, flags)
+    if vnet_hdr and _ProbeTapVnetHdr(tapfd):
+      flags |= IFF_VNET_HDR
 
-  try:
-    res = fcntl.ioctl(tapfd, TUNSETIFF, ifr)
-  except EnvironmentError, err:
-    raise errors.HypervisorError("Failed to allocate a new TAP device: %s" %
-                                 err)
+    # Check if it's ok to enable IFF_MULTI_QUEUE
+    if virtio_net_queues > 1 and _ProbeTapMqVirtioNet(tapfd):
+      flags |= IFF_MULTI_QUEUE
+    else:
+      flags |= IFF_ONE_QUEUE
+
+    # The struct ifreq ioctl request (see netdevice(7))
+    ifr = struct.pack("16sh", name, flags)
+
+    try:
+      res = fcntl.ioctl(tapfd, TUNSETIFF, ifr)
+    except EnvironmentError, err:
+      raise errors.HypervisorError("Failed to allocate a new TAP device: %s" %
+                                   err)
+
+    tapfds.append(tapfd)
 
   # Get the interface name from the ioctl
   ifname = struct.unpack("16sh", res)[0].strip("\x00")
-  return (ifname, tapfd)
+
+  return (ifname, tapfds)
index 989d113..b125ced 100644 (file)
@@ -842,6 +842,25 @@ vnet\_hdr
 
     It is set to ``true`` by default.
 
+virtio\_net\_queues
+    Valid for the KVM hypervisor.
+
+    Set a number of queues (file descriptors) for tap device to
+    parallelize packets sending or receiving. Tap devices will be
+    created with MULTI_QUEUE (IFF_MULTI_QUEUE) support. This only
+    works with KVM paravirtual nics (virtio-net) and the maximum
+    number of queues is limited to ``8``. Tehnically this is an
+    extension of ``vnet_hdr`` which must be enabled for multiqueue
+    support.
+
+    If set to ``1`` queue, it effectively disables multiqueue support
+    on the tap and virio-net devices.
+
+    For instances it is necessary to manually set number of queues (on
+    Linux using: ``ethtool -L ethX combined $queues``).
+
+    It is set to ``1`` by default.
+
 The ``-O (--os-parameters)`` option allows customisation of the OS
 parameters. The actual parameter names and values depends on the OS
 being used, but the syntax is the same key=value. For example, setting
index 5b1182a..22cb7c9 100644 (file)
@@ -1696,6 +1696,9 @@ hvVga = "vga"
 hvVhostNet :: String
 hvVhostNet = "vhost_net"
 
+hvVirtioNetQueues :: String
+hvVirtioNetQueues = "virtio_net_queues"
+
 hvVifScript :: String
 hvVifScript = "vif_script"
 
@@ -1813,6 +1816,7 @@ hvsParameterTypes = Map.fromList
   , (hvUseLocaltime,                    VTypeBool)
   , (hvVga,                             VTypeString)
   , (hvVhostNet,                        VTypeBool)
+  , (hvVirtioNetQueues,                 VTypeInt)
   , (hvVifScript,                       VTypeString)
   , (hvVifType,                         VTypeString)
   , (hvViridian,                        VTypeBool)
@@ -3830,6 +3834,7 @@ hvcDefaults =
           , (hvSecurityDomain,                  PyValueEx "")
           , (hvKvmFlag,                         PyValueEx "")
           , (hvVhostNet,                        PyValueEx False)
+          , (hvVirtioNetQueues,                 PyValueEx (1 :: Int))
           , (hvKvmUseChroot,                    PyValueEx False)
           , (hvKvmUserShutdown,                 PyValueEx False)
           , (hvMemPath,                         PyValueEx "")