Renew crypto retries for non-master nodes
authorHelga Velroyen <helgav@google.com>
Wed, 4 Mar 2015 22:14:12 +0000 (23:14 +0100)
committerHelga Velroyen <helgav@google.com>
Wed, 18 Mar 2015 10:20:34 +0000 (11:20 +0100)
If renewing the SSL certificate for non-master nodes fails,
try retring two more times. Unit tests included.

Signed-off-by: Helga Velroyen <helgav@google.com>
Reviewed-by: Petr Pudlak <pudlak@google.com>

lib/cmdlib/cluster.py
test/py/cmdlib/cluster_unittest.py

index 792ee3b..0f4b2eb 100644 (file)
@@ -164,14 +164,19 @@ class LUClusterRenewCrypto(NoHooksLU):
         feedback_fn("* Skipping offline node %s" % node_info.name)
         continue
       if node_uuid != master_uuid:
-        try:
-          new_digest = CreateNewClientCert(self, node_uuid)
-          if node_info.master_candidate:
-            utils.AddNodeToCandidateCerts(node_uuid,
-                                          new_digest,
-                                          cluster.candidate_certs)
-        except errors.OpExecError as e:
-          node_errors[node_uuid] = e
+        for _ in range(self._MAX_NUM_RETRIES):
+          try:
+            new_digest = CreateNewClientCert(self, node_uuid)
+            if node_info.master_candidate:
+              utils.AddNodeToCandidateCerts(node_uuid,
+                                            new_digest,
+                                            cluster.candidate_certs)
+            break
+          except errors.OpExecError as last_exception:
+            pass
+        else:
+          if last_exception:
+            node_errors[node_uuid] = last_exception
 
     if node_errors:
       msg = ("Some nodes' SSL client certificates could not be renewed."
index 88e448c..073b8de 100644 (file)
@@ -2469,7 +2469,48 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
     self._AssertCertFiles(pathutils)
 
     cluster = self.cfg.GetClusterInfo()
-    self.assertFalse(cluster.candidate_certs.values)
+    self.assertFalse(cluster.candidate_certs)
+
+  def _RpcSuccessfulAfterRetriesNonMaster(self, node_uuid, _):
+    if self._retries < self._max_retries and node_uuid != self._master_uuid:
+      self._retries += 1
+      return self.RpcResultsBuilder() \
+        .CreateFailedNodeResult(node_uuid)
+    else:
+      return self.RpcResultsBuilder() \
+        .CreateSuccessfulNodeResult(node_uuid,
+          [(constants.CRYPTO_TYPE_SSL_DIGEST, self._GetFakeDigest(node_uuid))])
+
+  def _NonMasterRetries(self, pathutils, max_retries):
+    self._InitPathutils(pathutils)
+
+    self._master_uuid = self.cfg.GetMasterNode()
+    self._max_retries = max_retries
+    self._retries = 0
+    self.rpc.call_node_crypto_tokens = self._RpcSuccessfulAfterRetriesNonMaster
+
+    # Add one non-master node
+    self.cfg.AddNewNode()
+
+    op = opcodes.OpClusterRenewCrypto()
+    self.ExecOpCode(op)
+
+    self._AssertCertFiles(pathutils)
+
+    return self.cfg.GetClusterInfo()
+
+  @patchPathutils("cluster")
+  def testNonMasterRetriesSuccess(self, pathutils):
+    cluster = self._NonMasterRetries(pathutils, 2)
+    self.assertEqual(2, len(cluster.candidate_certs.values()))
+
+  @patchPathutils("cluster")
+  def testNonMasterRetriesFail(self, pathutils):
+    cluster = self._NonMasterRetries(pathutils, 5)
+
+    # Only the master digest should be in the cert list
+    self.assertEqual(1, len(cluster.candidate_certs.values()))
+    self.assertTrue(self._master_uuid in cluster.candidate_certs)
 
 
 if __name__ == "__main__":