Add disks_active to configuration
authorThomas Thrainer <thomasth@google.com>
Wed, 15 May 2013 11:55:30 +0000 (13:55 +0200)
committerThomas Thrainer <thomasth@google.com>
Tue, 28 May 2013 13:05:35 +0000 (15:05 +0200)
This flag tracks if the disks of an instace are supposed to be active.
That's the case when an instance is running or when its disks got
activated explicitly (and in a couple of other cases).
It will be used by watcher to re-activate disks after a node reboot.

Signed-off-by: Thomas Thrainer <thomasth@google.com>
Reviewed-by: Helga Velroyen <helgav@google.com>

12 files changed:
lib/cmdlib/backup.py
lib/cmdlib/cluster.py
lib/cmdlib/group.py
lib/cmdlib/instance.py
lib/cmdlib/instance_migration.py
lib/cmdlib/instance_storage.py
lib/cmdlib/node.py
lib/config.py
lib/masterd/iallocator.py
lib/objects.py
src/Ganeti/Objects.hs
test/hs/Test/Ganeti/Objects.hs

index f41ee52..f853a84 100644 (file)
@@ -398,7 +398,7 @@ class LUBackupExport(LogicalUnit):
     for disk in instance.disks:
       self.cfg.SetDiskID(disk, src_node)
 
-    activate_disks = (instance.admin_state != constants.ADMINST_UP)
+    activate_disks = not instance.disks_active
 
     if activate_disks:
       # Activate the instance disks if we'exporting a stopped instance
index aee895b..8756ecd 100644 (file)
@@ -1779,12 +1779,12 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
       # node here
       snode = node_image[nname]
       bad_snode = snode.ghost or snode.offline
-      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
+      _ErrorIf(inst_config.disks_active and
                not success and not bad_snode,
                constants.CV_EINSTANCEFAULTYDISK, instance,
                "couldn't retrieve status for disk/%s on %s: %s",
                idx, nname, bdev_status)
-      _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
+      _ErrorIf((inst_config.disks_active and
                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
                constants.CV_EINSTANCEFAULTYDISK, instance,
                "disk/%s on %s is faulty", idx, nname)
@@ -2062,8 +2062,7 @@ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
         node_drbd[minor] = (instance, False)
       else:
         instance = instanceinfo[instance]
-        node_drbd[minor] = (instance.name,
-                            instance.admin_state == constants.ADMINST_UP)
+        node_drbd[minor] = (instance.name, instance.disks_active)
 
     # and now check them
     used_minors = nresult.get(constants.NV_DRBDLIST, [])
index d505d50..676803c 100644 (file)
@@ -910,8 +910,7 @@ class LUGroupVerifyDisks(NoHooksLU):
     res_missing = {}
 
     nv_dict = MapInstanceDisksToNodes(
-      [inst for inst in self.instances.values()
-       if inst.admin_state == constants.ADMINST_UP])
+      [inst for inst in self.instances.values() if inst.disks_active])
 
     if nv_dict:
       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
index 2d4864e..f7d3bb9 100644 (file)
@@ -1197,6 +1197,7 @@ class LUInstanceCreate(LogicalUnit):
                             primary_node=pnode_name,
                             nics=self.nics, disks=disks,
                             disk_template=self.op.disk_template,
+                            disks_active=False,
                             admin_state=constants.ADMINST_DOWN,
                             network_port=network_port,
                             beparams=self.op.beparams,
@@ -1276,6 +1277,9 @@ class LUInstanceCreate(LogicalUnit):
       raise errors.OpExecError("There are some degraded disks for"
                                " this instance")
 
+    # instance disks are now active
+    iobj.disks_active = True
+
     # Release all node resource locks
     ReleaseLocks(self, locking.LEVEL_NODE_RES)
 
index 201accb..9b715ae 100644 (file)
@@ -832,7 +832,7 @@ class TLMigrateInstance(Tasklet):
     source_node = instance.primary_node
     target_node = self.target_node
 
-    if instance.admin_state == constants.ADMINST_UP:
+    if instance.disks_active:
       self.feedback_fn("* checking disk consistency between source and target")
       for (idx, dev) in enumerate(instance.disks):
         # for drbd, these are drbd over lvm
index 4ad08d0..f5457fe 100644 (file)
@@ -1139,6 +1139,7 @@ def ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
   ignored.
 
   """
+  lu.cfg.MarkInstanceDisksInactive(instance.name)
   all_result = True
   disks = ExpandCheckDisks(instance, disks)
 
@@ -1205,6 +1206,10 @@ def AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
   # into any other network-connected state (Connected, SyncTarget,
   # SyncSource, etc.)
 
+  # mark instance disks as active before doing actual work, so watcher does
+  # not try to shut them down erroneously
+  lu.cfg.MarkInstanceDisksActive(iname)
+
   # 1st pass, assemble on all nodes in secondary mode
   for idx, inst_disk in enumerate(disks):
     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
@@ -1256,6 +1261,9 @@ def AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
   for disk in disks:
     lu.cfg.SetDiskID(disk, instance.primary_node)
 
+  if not disks_ok:
+    lu.cfg.MarkInstanceDisksInactive(iname)
+
   return disks_ok, device_info
 
 
@@ -1460,9 +1468,9 @@ class LUInstanceGrowDisk(LogicalUnit):
       if disk_abort:
         self.LogWarning("Disk syncing has not returned a good status; check"
                         " the instance")
-      if instance.admin_state != constants.ADMINST_UP:
+      if not instance.disks_active:
         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
-    elif instance.admin_state != constants.ADMINST_UP:
+    elif not instance.disks_active:
       self.LogWarning("Not shutting down the disk even if the instance is"
                       " not supposed to be running because no wait for"
                       " sync mode was requested")
@@ -1650,6 +1658,7 @@ class LUInstanceActivateDisks(NoHooksLU):
 
     if self.op.wait_for_sync:
       if not WaitForSync(self, self.instance):
+        self.cfg.MarkInstanceDisksInactive(self.instance.name)
         raise errors.OpExecError("Some disks of the instance are degraded!")
 
     return disks_info
@@ -2035,7 +2044,7 @@ class TLReplaceDisks(Tasklet):
     feedback_fn("Current seconary node: %s" %
                 utils.CommaJoin(self.instance.secondary_nodes))
 
-    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
+    activate_disks = not self.instance.disks_active
 
     # Activate the instance disks if we're replacing them on a down instance
     if activate_disks:
index adbafb6..2ac2e3a 100644 (file)
@@ -1549,7 +1549,7 @@ class LURepairNodeStorage(NoHooksLU):
     """
     # Check whether any instance on this node has faulty disks
     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
-      if inst.admin_state != constants.ADMINST_UP:
+      if not inst.disks_active:
         continue
       check_nodes = set(inst.all_nodes)
       check_nodes.discard(self.op.node_name)
index b968fcf..ed693a7 100644 (file)
@@ -1436,19 +1436,27 @@ class ConfigWriter:
       raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
                                       " in use" % (item.name, item.uuid))
 
-  def _SetInstanceStatus(self, instance_name, status):
+  def _SetInstanceStatus(self, instance_name, status, disks_active):
     """Set the instance's status to a given value.
 
     """
-    assert status in constants.ADMINST_ALL, \
-           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
-
     if instance_name not in self._config_data.instances:
       raise errors.ConfigurationError("Unknown instance '%s'" %
                                       instance_name)
     instance = self._config_data.instances[instance_name]
-    if instance.admin_state != status:
+
+    if status is None:
+      status = instance.admin_state
+    if disks_active is None:
+      disks_active = instance.disks_active
+
+    assert status in constants.ADMINST_ALL, \
+           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
+
+    if instance.admin_state != status or \
+       instance.disks_active != disks_active:
       instance.admin_state = status
+      instance.disks_active = disks_active
       instance.serial_no += 1
       instance.mtime = time.time()
       self._WriteConfig()
@@ -1457,15 +1465,19 @@ class ConfigWriter:
   def MarkInstanceUp(self, instance_name):
     """Mark the instance status to up in the config.
 
+    This also sets the instance disks active flag.
+
     """
-    self._SetInstanceStatus(instance_name, constants.ADMINST_UP)
+    self._SetInstanceStatus(instance_name, constants.ADMINST_UP, True)
 
   @locking.ssynchronized(_config_lock)
   def MarkInstanceOffline(self, instance_name):
     """Mark the instance status to down in the config.
 
+    This also clears the instance disks active flag.
+
     """
-    self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE)
+    self._SetInstanceStatus(instance_name, constants.ADMINST_OFFLINE, False)
 
   @locking.ssynchronized(_config_lock)
   def RemoveInstance(self, instance_name):
@@ -1531,8 +1543,25 @@ class ConfigWriter:
   def MarkInstanceDown(self, instance_name):
     """Mark the status of an instance to down in the configuration.
 
+    This does not touch the instance disks active flag, as shut down instances
+    can still have active disks.
+
+    """
+    self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN, None)
+
+  @locking.ssynchronized(_config_lock)
+  def MarkInstanceDisksActive(self, instance_name):
+    """Mark the status of instance disks active.
+
+    """
+    self._SetInstanceStatus(instance_name, None, True)
+
+  @locking.ssynchronized(_config_lock)
+  def MarkInstanceDisksInactive(self, instance_name):
+    """Mark the status of instance disks inactive.
+
     """
-    self._SetInstanceStatus(instance_name, constants.ADMINST_DOWN)
+    self._SetInstanceStatus(instance_name, None, False)
 
   def _UnlockedGetInstanceList(self):
     """Get the list of instances.
index 7179b0e..b1fd9f1 100644 (file)
@@ -605,6 +605,7 @@ class IAllocator(object):
                    constants.IDISK_MODE: dsk.mode}
                   for dsk in iinfo.disks],
         "disk_template": iinfo.disk_template,
+        "disks_active": iinfo.disks_active,
         "hypervisor": iinfo.hypervisor,
         }
       pir["disk_space_total"] = gmi.ComputeDiskSize(iinfo.disk_template,
index 8d809c4..f7ce3b6 100644 (file)
@@ -1057,6 +1057,7 @@ class Instance(TaggableObject):
     "nics",
     "disks",
     "disk_template",
+    "disks_active",
     "network_port",
     "serial_no",
     ] + _TIMESTAMPS + _UUID
index 1550309..cd8d024 100644 (file)
@@ -472,6 +472,7 @@ $(buildObject "Instance" "inst" $
   , simpleField "nics"           [t| [PartialNic]       |]
   , simpleField "disks"          [t| [Disk]             |]
   , simpleField "disk_template"  [t| DiskTemplate       |]
+  , simpleField "disks_active"   [t| Bool               |]
   , optionalField $ simpleField "network_port" [t| Int  |]
   ]
   ++ timeStampFields
index 1cb33e1..b27bba5 100644 (file)
@@ -131,6 +131,8 @@ instance Arbitrary Instance where
       <*> vectorOf 5 genDisk
       -- disk template
       <*> arbitrary
+      -- disks active
+      <*> arbitrary
       -- network port
       <*> arbitrary
       -- ts