Retries for the master's SSL cert renewal
authorHelga Velroyen <helgav@google.com>
Wed, 4 Mar 2015 21:55:27 +0000 (22:55 +0100)
committerHelga Velroyen <helgav@google.com>
Wed, 18 Mar 2015 10:20:34 +0000 (11:20 +0100)
If renewing the master's client SSL certificate fails, try
two more times before giving up. Unit test included.

Signed-off-by: Helga Velroyen <helgav@google.com>
Reviewed-by: Petr Pudlak <pudlak@google.com>

lib/cmdlib/cluster.py
test/py/cmdlib/cluster_unittest.py

index 10deda6..792ee3b 100644 (file)
@@ -111,6 +111,8 @@ class LUClusterRenewCrypto(NoHooksLU):
   takes care of the renewal of the client SSL certificates.
 
   """
+  _MAX_NUM_RETRIES = 3
+
   def Exec(self, feedback_fn):
     master_uuid = self.cfg.GetMasterNode()
     cluster = self.cfg.GetClusterInfo()
@@ -129,28 +131,31 @@ class LUClusterRenewCrypto(NoHooksLU):
     except IOError:
       logging.info("No old certificate available.")
 
-    try:
-      # Technically it should not be necessary to set the cert
-      # paths. However, due to a bug in the mock library, we
-      # have to do this to be able to test the function properly.
-      _UpdateMasterClientCert(
-          self, master_uuid, cluster, feedback_fn,
-          client_cert=pathutils.NODED_CLIENT_CERT_FILE,
-          client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
-    except errors.OpExecError as e:
+    for _ in range(self._MAX_NUM_RETRIES):
+      try:
+        # Technically it should not be necessary to set the cert
+        # paths. However, due to a bug in the mock library, we
+        # have to do this to be able to test the function properly.
+        _UpdateMasterClientCert(
+            self, master_uuid, cluster, feedback_fn,
+            client_cert=pathutils.NODED_CLIENT_CERT_FILE,
+            client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
+        break
+      except errors.OpExecError as e:
+        pass
+    else:
       feedback_fn("Could not renew the master's client SSL certificate."
-                  " Cleaning up. Error: %s." % e)
+                   " Cleaning up. Error: %s." % e)
       # Cleaning up temporary certificates
       utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid,
                                          cluster.candidate_certs)
       utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
                                          cluster.candidate_certs)
-      return
-    finally:
       try:
         utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP)
       except IOError:
         pass
+      return
 
     node_errors = {}
     nodes = self.cfg.GetAllNodesInfo()
index 289df4f..88e448c 100644 (file)
@@ -2427,5 +2427,50 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
         expected_digest = self._GetFakeDigest(node_uuid)
         self.assertEqual(expected_digest, cluster.candidate_certs[node_uuid])
 
+  def _RpcSuccessfulAfterRetries(self, node_uuid, _):
+    if self._retries < self._max_retries:
+      self._retries += 1
+      return self.RpcResultsBuilder() \
+        .CreateFailedNodeResult(node_uuid)
+    else:
+      return self.RpcResultsBuilder() \
+        .CreateSuccessfulNodeResult(node_uuid,
+          [(constants.CRYPTO_TYPE_SSL_DIGEST, self._GetFakeDigest(node_uuid))])
+
+  @patchPathutils("cluster")
+  def testMasterRetriesSuccess(self, pathutils):
+    self._InitPathutils(pathutils)
+
+    self._max_retries = 2
+    self._retries = 0
+    self.rpc.call_node_crypto_tokens = self._RpcSuccessfulAfterRetries
+
+    op = opcodes.OpClusterRenewCrypto()
+    self.ExecOpCode(op)
+
+    self._AssertCertFiles(pathutils)
+
+    cluster = self.cfg.GetClusterInfo()
+    master_uuid = self.cfg.GetMasterNode()
+    self.assertTrue(self._GetFakeDigest(master_uuid)
+                    in cluster.candidate_certs.values())
+
+  @patchPathutils("cluster")
+  def testMasterRetriesFail(self, pathutils):
+    self._InitPathutils(pathutils)
+
+    self._max_retries = 5
+    self._retries = 0
+    self.rpc.call_node_crypto_tokens = self._RpcSuccessfulAfterRetries
+
+    op = opcodes.OpClusterRenewCrypto()
+    self.ExecOpCode(op)
+
+    self._AssertCertFiles(pathutils)
+
+    cluster = self.cfg.GetClusterInfo()
+    self.assertFalse(cluster.candidate_certs.values)
+
+
 if __name__ == "__main__":
   testutils.GanetiTestProgram()