Merge branch 'stable-2.11' into stable-2.12
[ganeti-github.git] / lib / hypervisor / hv_kvm / __init__.py
index 612f318..2f22720 100644 (file)
@@ -2,21 +2,30 @@
 #
 
 # Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc.
+# All rights reserved.
 #
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
 #
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
 #
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 """KVM hypervisor
@@ -35,9 +44,9 @@ import shutil
 import urllib2
 from bitarray import bitarray
 try:
-  import affinity   # pylint: disable=F0401
+  import psutil   # pylint: disable=F0401
 except ImportError:
-  affinity = None
+  psutil = None
 try:
   import fdsend   # pylint: disable=F0401
 except ImportError:
@@ -101,12 +110,33 @@ _RUNTIME_DEVICE = {
   }
 _RUNTIME_ENTRY = {
   constants.HOTPLUG_TARGET_NIC: lambda d, e: d,
-  constants.HOTPLUG_TARGET_DISK: lambda d, e: (d, e, None)
+  constants.HOTPLUG_TARGET_DISK: lambda d, e: (d, e[0], e[1])
   }
 
 _MIGRATION_CAPS_DELIM = ":"
 
 
+def _GetDriveURI(disk, link, uri):
+  """Helper function to get the drive uri to be used in --drive kvm option
+
+  @type disk: L{objects.Disk}
+  @param disk: A disk configuration object
+  @type link: string
+  @param link: The device link as returned by _SymlinkBlockDev()
+  @type uri: string
+  @param uri: The drive uri as returned by _CalculateDeviceURI()
+
+  """
+  access_mode = disk.params.get(constants.LDP_ACCESS,
+                                constants.DISK_KERNELSPACE)
+  if (uri and access_mode == constants.DISK_USERSPACE):
+    drive_uri = uri
+  else:
+    drive_uri = link
+
+  return drive_uri
+
+
 def _GenerateDeviceKVMId(dev_type, dev):
   """Helper function to generate a unique device name used by KVM
 
@@ -339,6 +369,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     constants.HV_KVM_FLAG:
       hv_base.ParamInSet(False, constants.HT_KVM_FLAG_VALUES),
     constants.HV_VHOST_NET: hv_base.NO_CHECK,
+    constants.HV_VIRTIO_NET_QUEUES: hv_base.OPT_VIRTIO_NET_QUEUES_CHECK,
     constants.HV_KVM_USE_CHROOT: hv_base.NO_CHECK,
     constants.HV_KVM_USER_SHUTDOWN: hv_base.NO_CHECK,
     constants.HV_MEM_PATH: hv_base.OPT_DIR_CHECK,
@@ -385,6 +416,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
   _QMP_RE = re.compile(r"^-qmp\s", re.M)
   _SPICE_RE = re.compile(r"^-spice\s", re.M)
   _VHOST_RE = re.compile(r"^-net\s.*,vhost=on|off", re.M)
+  _VIRTIO_NET_QUEUES_RE = re.compile(r"^-net\s.*,fds=x:y:...:z", re.M)
   _ENABLE_KVM_RE = re.compile(r"^-enable-kvm\s", re.M)
   _DISABLE_KVM_RE = re.compile(r"^-disable-kvm\s", re.M)
   _NETDEV_RE = re.compile(r"^-netdev\s", re.M)
@@ -553,6 +585,13 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     return utils.PathJoin(cls._CTRL_DIR, "%s.qmp" % instance_name)
 
   @classmethod
+  def _InstanceKvmdMonitor(cls, instance_name):
+    """Returns the instance kvm daemon socket name
+
+    """
+    return utils.PathJoin(cls._CTRL_DIR, "%s.kvmd" % instance_name)
+
+  @classmethod
   def _InstanceShutdownMonitor(cls, instance_name):
     """Returns the instance QMP output filename
 
@@ -680,30 +719,24 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """
     hv_base.ConfigureNIC([pathutils.KVM_IFUP, tap], instance, seq, nic, tap)
 
-  @staticmethod
-  def _VerifyAffinityPackage():
-    if affinity is None:
-      raise errors.HypervisorError("affinity Python package not"
-                                   " found; cannot use CPU pinning under KVM")
-
-  @staticmethod
-  def _BuildAffinityCpuMask(cpu_list):
-    """Create a CPU mask suitable for sched_setaffinity from a list of
-    CPUs.
-
-    See man taskset for more info on sched_setaffinity masks.
-    For example: [ 0, 2, 5, 6 ] will return 101 (0x65, 0..01100101).
+  @classmethod
+  def _SetProcessAffinity(cls, process_id, cpus):
+    """Sets the affinity of a process to the given CPUs.
 
-    @type cpu_list: list of int
-    @param cpu_list: list of physical CPU numbers to map to vCPUs in order
-    @rtype: int
-    @return: a bit mask of CPU affinities
+    @type process_id: int
+    @type cpus: list of int
+    @param cpus: The list of CPUs the process ID may use.
 
     """
-    if cpu_list == constants.CPU_PINNING_OFF:
-      return constants.CPU_PINNING_ALL_KVM
+    if psutil is None:
+      raise errors.HypervisorError("psutil Python package not"
+                                   " found; cannot use CPU pinning under KVM")
+
+    target_process = psutil.Process(process_id)
+    if cpus == constants.CPU_PINNING_OFF:
+      target_process.set_cpu_affinity(range(psutil.cpu_count()))
     else:
-      return sum(2 ** cpu for cpu in cpu_list)
+      target_process.set_cpu_affinity(cpus)
 
   @classmethod
   def _AssignCpuAffinity(cls, cpu_mask, process_id, thread_dict):
@@ -729,20 +762,16 @@ class KVMHypervisor(hv_base.BaseHypervisor):
       else:
         # If CPU pinning has one non-all entry, map the entire VM to
         # one set of physical CPUs
-        cls._VerifyAffinityPackage()
-        affinity.set_process_affinity_mask(
-          process_id, cls._BuildAffinityCpuMask(all_cpu_mapping))
+        cls._SetProcessAffinity(process_id, all_cpu_mapping)
     else:
       # The number of vCPUs mapped should match the number of vCPUs
       # reported by KVM. This was already verified earlier, so
       # here only as a sanity check.
       assert len(thread_dict) == len(cpu_list)
-      cls._VerifyAffinityPackage()
 
       # For each vCPU, map it to the proper list of physical CPUs
-      for vcpu, i in zip(cpu_list, range(len(cpu_list))):
-        affinity.set_process_affinity_mask(thread_dict[i],
-                                           cls._BuildAffinityCpuMask(vcpu))
+      for i, vcpu in enumerate(cpu_list):
+        cls._SetProcessAffinity(thread_dict[i], vcpu)
 
   def _GetVcpuThreadIds(self, instance_name):
     """Get a mapping of vCPU no. to thread IDs for the instance
@@ -789,7 +818,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """
     result = []
     for name in os.listdir(self._PIDS_DIR):
-      if self._InstancePidAlive(name)[2] or self._IsUserShutdown(name):
+      if self._InstancePidAlive(name)[2]:
         result.append(name)
     return result
 
@@ -807,7 +836,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     @type instance_name: string
     @param instance_name: the instance name
     @type hvparams: dict of strings
-    @param hvparams: hvparams to be used with this instance
+    @param hvparams: hypervisor parameters to be used with this instance
     @rtype: tuple of strings
     @return: (name, id, memory, vcpus, stat, times)
 
@@ -840,7 +869,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """Get properties of all instances.
 
     @type hvparams: dict of strings
-    @param hvparams: hypervisor parameter
+    @param hvparams: hypervisor parameters
     @return: list of tuples (name, id, memory, vcpus, stat, times)
 
     """
@@ -928,12 +957,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
         if needs_boot_flag and disk_type != constants.HT_DISK_IDE:
           boot_val = ",boot=on"
 
-      access_mode = cfdev.params.get(constants.LDP_ACCESS,
-                                     constants.DISK_KERNELSPACE)
-      if (uri and access_mode == constants.DISK_USERSPACE):
-        drive_uri = uri
-      else:
-        drive_uri = link_name
+      drive_uri = _GetDriveURI(cfdev, link_name, uri)
 
       drive_val = "file=%s,format=raw%s%s%s%s" % \
                   (drive_uri, if_val, boot_val, cache_val, aio_val)
@@ -1459,6 +1483,55 @@ class KVMHypervisor(hv_base.BaseHypervisor):
 
     return hv_base.GenerateTapName()
 
+  def _GetNetworkDeviceFeatures(self, up_hvp, devlist, kvmhelp):
+    """Get network device options to properly enable supported features.
+
+    Return tuple of supported and enabled tap features with nic_model.
+    This function is called before opening a new tap device.
+
+    @return: (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra)
+    @rtype: tuple
+
+    """
+    virtio_net_queues = 1
+    nic_extra = ""
+    nic_type = up_hvp[constants.HV_NIC_TYPE]
+    tap_extra = ""
+    vnet_hdr = False
+    if nic_type == constants.HT_NIC_PARAVIRTUAL:
+      nic_model = self._VIRTIO
+      try:
+        if self._VIRTIO_NET_RE.search(devlist):
+          nic_model = self._VIRTIO_NET_PCI
+          vnet_hdr = up_hvp[constants.HV_VNET_HDR]
+      except errors.HypervisorError, _:
+        # Older versions of kvm don't support DEVICE_LIST, but they don't
+        # have new virtio syntax either.
+        pass
+
+      if up_hvp[constants.HV_VHOST_NET]:
+        # Check for vhost_net support.
+        if self._VHOST_RE.search(kvmhelp):
+          tap_extra = ",vhost=on"
+        else:
+          raise errors.HypervisorError("vhost_net is configured"
+                                       " but it is not available")
+        if up_hvp[constants.HV_VIRTIO_NET_QUEUES] > 1:
+          # Check for multiqueue virtio-net support.
+          if self._VIRTIO_NET_QUEUES_RE.search(kvmhelp):
+            virtio_net_queues = up_hvp[constants.HV_VIRTIO_NET_QUEUES]
+            # As advised at http://www.linux-kvm.org/page/Multiqueue formula
+            # for calculating vector size is: vectors=2*N+1 where N is the
+            # number of queues (HV_VIRTIO_NET_QUEUES).
+            nic_extra = ",mq=on,vectors=%d" % (2 * virtio_net_queues + 1)
+          else:
+            raise errors.HypervisorError("virtio_net_queues is configured"
+                                         " but it is not available")
+    else:
+      nic_model = nic_type
+
+    return (nic_model, vnet_hdr, virtio_net_queues, tap_extra, nic_extra)
+
   # too many local variables
   # pylint: disable=R0914
   def _ExecuteKVMRuntime(self, instance, kvm_runtime, kvmhelp, incoming=None):
@@ -1517,37 +1590,18 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     if not kvm_nics:
       kvm_cmd.extend(["-net", "none"])
     else:
-      vnet_hdr = False
-      tap_extra = ""
-      nic_type = up_hvp[constants.HV_NIC_TYPE]
-      if nic_type == constants.HT_NIC_PARAVIRTUAL:
-        nic_model = self._VIRTIO
-        try:
-          if self._VIRTIO_NET_RE.search(devlist):
-            nic_model = self._VIRTIO_NET_PCI
-            vnet_hdr = up_hvp[constants.HV_VNET_HDR]
-        except errors.HypervisorError, _:
-          # Older versions of kvm don't support DEVICE_LIST, but they don't
-          # have new virtio syntax either.
-          pass
-
-        if up_hvp[constants.HV_VHOST_NET]:
-          # check for vhost_net support
-          if self._VHOST_RE.search(kvmhelp):
-            tap_extra = ",vhost=on"
-          else:
-            raise errors.HypervisorError("vhost_net is configured"
-                                         " but it is not available")
-      else:
-        nic_model = nic_type
-
+      (nic_model, vnet_hdr,
+       virtio_net_queues, tap_extra,
+       nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, kvmhelp)
       kvm_supports_netdev = self._NETDEV_RE.search(kvmhelp)
-
       for nic_seq, nic in enumerate(kvm_nics):
-        tapname, tapfd = OpenTap(vnet_hdr=vnet_hdr,
-                                 name=self._GenerateKvmTapName(nic))
-        tapfds.append(tapfd)
+        tapname, nic_tapfds = OpenTap(vnet_hdr=vnet_hdr,
+                                      virtio_net_queues=virtio_net_queues,
+                                      name=self._GenerateKvmTapName(nic))
+        tapfds.extend(nic_tapfds)
         taps.append(tapname)
+        tapfd = "%s%s" % ("fds=" if len(nic_tapfds) > 1 else "fd=",
+                          ":".join(str(fd) for fd in nic_tapfds))
         if kvm_supports_netdev:
           nic_val = "%s,mac=%s" % (nic_model, nic.mac)
           try:
@@ -1558,14 +1612,14 @@ class KVMHypervisor(hv_base.BaseHypervisor):
             nic_val += (",id=%s,bus=pci.0,addr=%s" % (kvm_devid, hex(nic.pci)))
           except errors.HotplugError:
             netdev = "netdev%d" % nic_seq
-          nic_val += (",netdev=%s" % netdev)
-          tap_val = ("type=tap,id=%s,fd=%d%s" %
+          nic_val += (",netdev=%s%s" % (netdev, nic_extra))
+          tap_val = ("type=tap,id=%s,%s%s" %
                      (netdev, tapfd, tap_extra))
           kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val])
         else:
           nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq,
                                                          nic.mac, nic_model)
-          tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd)
+          tap_val = "tap,vlan=%s,%s" % (nic_seq, tapfd)
           kvm_cmd.extend(["-net", tap_val, "-net", nic_val])
 
     if incoming:
@@ -1593,12 +1647,16 @@ class KVMHypervisor(hv_base.BaseHypervisor):
       logging.debug("Enabling QMP")
       kvm_cmd.extend(["-qmp", "unix:%s,server,nowait" %
                       self._InstanceQmpMonitor(instance.name)])
+      # Add a second monitor for kvmd
+      kvm_cmd.extend(["-qmp", "unix:%s,server,nowait" %
+                      self._InstanceKvmdMonitor(instance.name)])
 
-    # Configure the network now for starting instances and bridged interfaces,
-    # during FinalizeMigration for incoming instances' routed interfaces
+    # Configure the network now for starting instances and bridged/OVS
+    # interfaces, during FinalizeMigration for incoming instances' routed
+    # interfaces.
     for nic_seq, nic in enumerate(kvm_nics):
       if (incoming and
-          nic.nicparams[constants.NIC_MODE] != constants.NIC_MODE_BRIDGED):
+          nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_ROUTED):
         continue
       self._ConfigureNIC(instance, nic_seq, nic, taps[nic_seq])
 
@@ -1693,6 +1751,9 @@ class KVMHypervisor(hv_base.BaseHypervisor):
   def _StartKvmd(hvparams):
     """Ensure that the Kvm daemon is running.
 
+    @type hvparams: dict of strings
+    @param hvparams: hypervisor parameters
+
     """
     if hvparams is None \
           or not hvparams[constants.HV_KVM_USER_SHUTDOWN] \
@@ -1776,12 +1837,13 @@ class KVMHypervisor(hv_base.BaseHypervisor):
       hvp = instance.hvparams
       security_model = hvp[constants.HV_SECURITY_MODEL]
       use_chroot = hvp[constants.HV_KVM_USE_CHROOT]
-      if use_chroot:
-        raise errors.HotplugError("Disk hotplug is not supported"
-                                  " in case of chroot.")
-      if security_model != constants.HT_SM_NONE:
-        raise errors.HotplugError("Disk Hotplug is not supported in case"
-                                  " security models are used.")
+      if action == constants.HOTPLUG_ACTION_ADD:
+        if use_chroot:
+          raise errors.HotplugError("Disk hotplug is not supported"
+                                    " in case of chroot.")
+        if security_model != constants.HT_SM_NONE:
+          raise errors.HotplugError("Disk Hotplug is not supported in case"
+                                    " security models are used.")
 
     if (dev_type == constants.HOTPLUG_TARGET_NIC and
         action == constants.HOTPLUG_ACTION_ADD and not fdsend):
@@ -1854,17 +1916,29 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     kvm_devid = _GenerateDeviceKVMId(dev_type, device)
     runtime = self._LoadKVMRuntime(instance)
     if dev_type == constants.HOTPLUG_TARGET_DISK:
+      drive_uri = _GetDriveURI(device, extra[0], extra[1])
       cmds = ["drive_add dummy file=%s,if=none,id=%s,format=raw" %
-                (extra, kvm_devid)]
+                (drive_uri, kvm_devid)]
       cmds += ["device_add virtio-blk-pci,bus=pci.0,addr=%s,drive=%s,id=%s" %
                 (hex(device.pci), kvm_devid, kvm_devid)]
     elif dev_type == constants.HOTPLUG_TARGET_NIC:
-      (tap, fd) = OpenTap()
+      kvmpath = instance.hvparams[constants.HV_KVM_PATH]
+      kvmhelp = self._GetKVMOutput(kvmpath, self._KVMOPT_HELP)
+      devlist = self._GetKVMOutput(kvmpath, self._KVMOPT_DEVICELIST)
+      up_hvp = runtime[2]
+      (_, vnet_hdr,
+       virtio_net_queues, tap_extra,
+       nic_extra) = self._GetNetworkDeviceFeatures(up_hvp, devlist, kvmhelp)
+      (tap, fds) = OpenTap(vnet_hdr=vnet_hdr,
+                           virtio_net_queues=virtio_net_queues)
+      # netdev_add don't support "fds=" when multiple fds are
+      # requested, generate separate "fd=" string for every fd
+      tapfd = ",".join(["fd=%s" % fd for fd in fds])
       self._ConfigureNIC(instance, seq, device, tap)
-      self._PassTapFd(instance, fd, device)
-      cmds = ["netdev_add tap,id=%s,fd=%s" % (kvm_devid, kvm_devid)]
-      args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s" % \
-               (hex(device.pci), device.mac, kvm_devid, kvm_devid)
+      self._PassTapFd(instance, fds, device)
+      cmds = ["netdev_add tap,id=%s,%s%s" % (kvm_devid, tapfd, tap_extra)]
+      args = "virtio-net-pci,bus=pci.0,addr=%s,mac=%s,netdev=%s,id=%s%s" % \
+               (hex(device.pci), device.mac, kvm_devid, kvm_devid, nic_extra)
       cmds += ["device_add %s" % args]
       utils.WriteFile(self._InstanceNICFile(instance.name, seq), data=tap)
 
@@ -1914,7 +1988,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
       device.pci = self.HotDelDevice(instance, dev_type, device, _, seq)
       self.HotAddDevice(instance, dev_type, device, _, seq)
 
-  def _PassTapFd(self, instance, fd, nic):
+  def _PassTapFd(self, instance, fds, nic):
     """Pass file descriptor to kvm process via monitor socket using SCM_RIGHTS
 
     """
@@ -1922,7 +1996,6 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     #       squash common parts between monitor and qmp
     kvm_devid = _GenerateDeviceKVMId(constants.HOTPLUG_TARGET_NIC, nic)
     command = "getfd %s\n" % kvm_devid
-    fds = [fd]
     logging.info("%s", fds)
     try:
       monsock = MonitorSocket(self._InstanceMonitor(instance.name))
@@ -2103,8 +2176,8 @@ class KVMHypervisor(hv_base.BaseHypervisor):
       kvm_nics = kvm_runtime[1]
 
       for nic_seq, nic in enumerate(kvm_nics):
-        if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
-          # Bridged interfaces have already been configured
+        if nic.nicparams[constants.NIC_MODE] != constants.NIC_MODE_ROUTED:
+          # Bridged/OVS interfaces have already been configured
           continue
         try:
           tap = utils.ReadFile(self._InstanceNICFile(instance.name, nic_seq))
@@ -2319,8 +2392,8 @@ class KVMHypervisor(hv_base.BaseHypervisor):
   def CheckParameterSyntax(cls, hvparams):
     """Check the given parameters for validity.
 
-    @type hvparams:  dict
-    @param hvparams: dictionary with parameter names/value
+    @type hvparams: dict of strings
+    @param hvparams: hypervisor parameters
     @raise errors.HypervisorError: when a parameter is not valid
 
     """
@@ -2392,8 +2465,8 @@ class KVMHypervisor(hv_base.BaseHypervisor):
   def ValidateParameters(cls, hvparams):
     """Check the given parameters for validity.
 
-    @type hvparams:  dict
-    @param hvparams: dictionary with parameter names/value
+    @type hvparams: dict of strings
+    @param hvparams: hypervisor parameters
     @raise errors.HypervisorError: when a parameter is not valid
 
     """
@@ -2454,7 +2527,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
     """KVM powercycle, just a wrapper over Linux powercycle.
 
     @type hvparams: dict of strings
-    @param hvparams: hypervisor params to be used on this node
+    @param hvparams: hypervisor parameters to be used on this node
 
     """
     cls.LinuxPowercycle()