Error handling on failed SSL cert renewal for master
authorHelga Velroyen <helgav@google.com>
Wed, 4 Mar 2015 20:31:40 +0000 (21:31 +0100)
committerHelga Velroyen <helgav@google.com>
Wed, 18 Mar 2015 10:20:27 +0000 (11:20 +0100)
When the recreation of the master's SSL client certificate
fails, LURenewCrypto did not conclude very graciously.
This patch adds unit tests for this case and improves
the error handling.

Signed-off-by: Helga Velroyen <helgav@google.com>
Reviewed-by: Petr Pudlak <pudlak@google.com>

lib/cmdlib/cluster.py
test/py/cmdlib/cluster_unittest.py

index 6f0103e..4372fc5 100644 (file)
@@ -129,13 +129,28 @@ class LUClusterRenewCrypto(NoHooksLU):
     except IOError:
       logging.info("No old certificate available.")
 
-    # Technically it should not be necessary to set the cert
-    # paths. However, due to a bug in the mock library, we
-    # have to do this to be able to test the function properly.
-    _UpdateMasterClientCert(
-        self, master_uuid, cluster, feedback_fn,
-        client_cert=pathutils.NODED_CLIENT_CERT_FILE,
-        client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
+    try:
+      # Technically it should not be necessary to set the cert
+      # paths. However, due to a bug in the mock library, we
+      # have to do this to be able to test the function properly.
+      _UpdateMasterClientCert(
+          self, master_uuid, cluster, feedback_fn,
+          client_cert=pathutils.NODED_CLIENT_CERT_FILE,
+          client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
+    except errors.OpExecError as e:
+      feedback_fn("Could not renew the master's client SSL certificate."
+                  " Cleaning up. Error: %s." % e)
+      # Cleaning up temporary certificates
+      utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid,
+                                         cluster.candidate_certs)
+      utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
+                                         cluster.candidate_certs)
+      return
+    finally:
+      try:
+        utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP)
+      except IOError:
+        pass
 
     nodes = self.cfg.GetAllNodesInfo()
     for (node_uuid, node_info) in nodes.items():
index e3ea305..4da6ea0 100644 (file)
@@ -2325,6 +2325,32 @@ class TestLUClusterRenewCrypto(CmdlibTestCase):
       expected_digest = self._GetFakeDigest(node_uuid)
       self.assertEqual(expected_digest, cluster.candidate_certs[node_uuid])
 
+  @patchPathutils("cluster")
+  def testMasterFails(self, pathutils):
+
+    # patch pathutils to point to temporary files
+    pathutils.NODED_CERT_FILE = self._node_cert
+    pathutils.NODED_CLIENT_CERT_FILE = self._client_node_cert
+    pathutils.NODED_CLIENT_CERT_FILE_TMP = \
+        self._client_node_cert_tmp
+
+    # make sure the RPC calls are failing for all nodes
+    master_uuid = self.cfg.GetMasterNode()
+    self.rpc.call_node_crypto_tokens.return_value = self.RpcResultsBuilder() \
+        .CreateFailedNodeResult(master_uuid)
+
+    op = opcodes.OpClusterRenewCrypto()
+    self.ExecOpCode(op)
+
+    # Check if the correct certificates exist and don't exist on the master
+    self.assertTrue(os.path.exists(pathutils.NODED_CERT_FILE))
+    self.assertTrue(os.path.exists(pathutils.NODED_CLIENT_CERT_FILE))
+    self.assertFalse(os.path.exists(pathutils.NODED_CLIENT_CERT_FILE_TMP))
+
+    # Check if we correctly have no candidate certificates
+    cluster = self.cfg.GetClusterInfo()
+    self.assertFalse(cluster.candidate_certs)
+
 
 if __name__ == "__main__":
   testutils.GanetiTestProgram()