Add logging to RenewCrypto
authorHelga Velroyen <helgav@google.com>
Mon, 13 Apr 2015 09:07:45 +0000 (11:07 +0200)
committerHelga Velroyen <helgav@google.com>
Mon, 13 Apr 2015 13:55:33 +0000 (15:55 +0200)
As the LURenewCrypto is a pain to debug, this patch
adds a lot more logging of events to the method.
Note:
- As renew-crypto is a relatively rarely used operation
  in a normal production cluster, this won't clutter up
  real user's log files.
- Most of the messages are in debug mode, so they would
  anyway just show up in log files of clusters run in
  debug mode (as for example our QA clusters.
- A few log messages are in error mode, which is
  intentional as they log more details about the errors
  than is given in the feedback functions.

Signed-off-by: Helga Velroyen <helgav@google.com>
Reviewed-by: Klaus Aehlig <aehlig@google.com>

lib/cmdlib/cluster.py

index 6cb1401..5cd96b1 100644 (file)
@@ -115,24 +115,37 @@ class LUClusterRenewCrypto(NoHooksLU):
 
   def Exec(self, feedback_fn):
     master_uuid = self.cfg.GetMasterNode()
+    logging.debug("Renewing the master's SSL node certificate."
+                  " Master's UUID: %s.", master_uuid)
     cluster = self.cfg.GetClusterInfo()
 
     server_digest = utils.GetCertificateDigest(
       cert_filename=pathutils.NODED_CERT_FILE)
+    logging.debug("SSL digest of the node certificate: %s.", server_digest)
     utils.AddNodeToCandidateCerts("%s-SERVER" % master_uuid,
                                   server_digest,
                                   cluster.candidate_certs)
+    logging.debug("Added master's digest as *-SERVER entry to configuration."
+                  " Current list of candidate certificates: %s.",
+                  str(cluster.candidate_certs))
+
     try:
       old_master_digest = utils.GetCertificateDigest(
         cert_filename=pathutils.NODED_CLIENT_CERT_FILE)
+      logging.debug("SSL digest of old master's SSL node certificate: %s.",
+                    old_master_digest)
       utils.AddNodeToCandidateCerts("%s-OLDMASTER" % master_uuid,
                                     old_master_digest,
                                     cluster.candidate_certs)
+      logging.debug("Added old master's node certificate digest to config"
+                    " as *-OLDMASTER. Current list of candidate certificates:"
+                    " %s.", str(cluster.candidate_certs))
+
     except IOError:
-      logging.info("No old certificate available.")
+      logging.info("No old master certificate available.")
 
     last_exception = None
-    for _ in range(self._MAX_NUM_RETRIES):
+    for i in range(self._MAX_NUM_RETRIES):
       try:
         # Technically it should not be necessary to set the cert
         # paths. However, due to a bug in the mock library, we
@@ -141,42 +154,61 @@ class LUClusterRenewCrypto(NoHooksLU):
             self, master_uuid, cluster, feedback_fn,
             client_cert=pathutils.NODED_CLIENT_CERT_FILE,
             client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP)
+        logging.debug("Successfully renewed the master's node certificate.")
         break
       except errors.OpExecError as e:
+        logging.error("Renewing the master's SSL node certificate failed"
+                      " at attempt no. %s with error '%s'", str(i), e)
         last_exception = e
     else:
       if last_exception:
         feedback_fn("Could not renew the master's client SSL certificate."
-                     " Cleaning up. Error: %s." % last_exception)
+                    " Cleaning up. Error: %s." % last_exception)
       # Cleaning up temporary certificates
       utils.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid,
                                          cluster.candidate_certs)
       utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
                                          cluster.candidate_certs)
+      logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from"
+                    " master candidate cert list. Current state of the"
+                    " list: %s.", str(cluster.candidate_certs))
       try:
         utils.RemoveFile(pathutils.NODED_CLIENT_CERT_FILE_TMP)
-      except IOError:
-        pass
+      except IOError as e:
+        logging.debug("Could not clean up temporary node certificate of the"
+                      " master node. (Possibly because it was already removed"
+                      " properly.) Error: %s.", e)
       return
 
     node_errors = {}
     nodes = self.cfg.GetAllNodesInfo()
+    logging.debug("Renewing non-master nodes' node certificates.")
     for (node_uuid, node_info) in nodes.items():
       if node_info.offline:
         feedback_fn("* Skipping offline node %s" % node_info.name)
+        logging.debug("Skipping offline node %s (UUID: %s).",
+                      node_info.name, node_uuid)
         continue
       if node_uuid != master_uuid:
+        logging.debug("Renewing node certificate of node '%s'.", node_uuid)
         last_exception = None
-        for _ in range(self._MAX_NUM_RETRIES):
+        for i in range(self._MAX_NUM_RETRIES):
           try:
             new_digest = CreateNewClientCert(self, node_uuid)
             if node_info.master_candidate:
               utils.AddNodeToCandidateCerts(node_uuid,
                                             new_digest,
                                             cluster.candidate_certs)
+              logging.debug("Added the node's certificate to candidate"
+                            " certificate list. Current list: %s.",
+                            str(cluster.candidate_certs))
             break
           except errors.OpExecError as e:
             last_exception = e
+            logging.error("Could not renew a non-master node's SSL node"
+                          " certificate at attempt no. %s. The node's UUID"
+                          " is %s, and the error was: %s.",
+                          str(i), node_uuid, e)
         else:
           if last_exception:
             node_errors[node_uuid] = last_exception
@@ -193,7 +225,12 @@ class LUClusterRenewCrypto(NoHooksLU):
                                        cluster.candidate_certs)
     utils.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid,
                                        cluster.candidate_certs)
+    logging.debug("Cleaned up *-SERVER and *-OLDMASTER certificate from"
+                  " master candidate cert list. Current state of the"
+                  " list: %s.", cluster.candidate_certs)
+
     # Trigger another update of the config now with the new master cert
+    logging.debug("Trigger an update of the configuration on all nodes.")
     self.cfg.Update(cluster, feedback_fn)