Merge branch 'stable-2.12' into stable-2.13
[ganeti-github.git] / lib / client / gnt_cluster.py
index 0efe1d5..fa29f9f 100644 (file)
@@ -85,6 +85,10 @@ TO_OPT = cli_option("--to", default=None, type="string",
 RESUME_OPT = cli_option("--resume", default=False, action="store_true",
                         help="Resume any pending Ganeti upgrades")
 
+DATA_COLLECTOR_INTERVAL_OPT = cli_option(
+    "--data-collector-interval", default={}, type="keyval",
+    help="Set collection intervals in seconds of data collectors.")
+
 _EPO_PING_INTERVAL = 30 # 30 seconds between pings
 _EPO_PING_TIMEOUT = 1 # 1 second
 _EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
@@ -492,6 +496,26 @@ def _FormatGroupedParams(paramsdict, roman=False):
   return ret
 
 
+def _FormatDataCollectors(paramsdict):
+  """Format Grouped parameters (be, nic, disk) by group.
+
+  @type paramsdict: dict of dicts
+  @param paramsdict: response of QueryClusterInfo
+  @rtype: dict of dicts
+  @return: parameter grouped by data collector
+
+  """
+
+  enabled = paramsdict[constants.DATA_COLLECTORS_ENABLED_NAME]
+  interval = paramsdict[constants.DATA_COLLECTORS_INTERVAL_NAME]
+
+  ret = {}
+  for key in enabled:
+    ret[key] = dict(active=enabled[key],
+                    interval="%.3fs" % (interval[key] / 1e6))
+  return ret
+
+
 def ShowClusterConfig(opts, args):
   """Shows cluster information.
 
@@ -604,6 +628,7 @@ def ShowClusterConfig(opts, args):
 
     ("Instance policy - limits for instances",
      FormatPolicyInfo(result["ipolicy"], None, True, opts.roman_integers)),
+    ("Data collectors", _FormatDataCollectors(result)),
     ]
 
   PrintGenericInfo(info)
@@ -718,7 +743,8 @@ def VerifyCluster(opts, args):
                                debug_simulate_errors=opts.simulate_errors,
                                skip_checks=skip_checks,
                                ignore_errors=opts.ignore_errors,
-                               group_name=opts.nodegroup)
+                               group_name=opts.nodegroup,
+                               verify_clutter=opts.verify_clutter)
   result = SubmitOpCode(op, cl=cl, opts=opts)
 
   # Keep track of submitted jobs
@@ -941,7 +967,8 @@ def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
 def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
                  rapi_cert_filename, new_spice_cert, spice_cert_filename,
                  spice_cacert_filename, new_confd_hmac_key, new_cds,
-                 cds_filename, force, new_node_cert, verbose, debug):
+                 cds_filename, force, new_node_cert, new_ssh_keys,
+                 verbose, debug):
   """Renews cluster certificates, keys and secrets.
 
   @type new_cluster_cert: bool
@@ -965,14 +992,19 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
   @param cds_filename: Path to file containing new cluster domain secret
   @type force: bool
   @param force: Whether to ask user for confirmation
-  @type new_node_cert: string
+  @type new_node_cert: bool
   @param new_node_cert: Whether to generate new node certificates
+  @type new_ssh_keys: bool
+  @param new_ssh_keys: Whether to generate new node SSH keys
   @type verbose: boolean
   @param verbose: show verbose output
   @type debug: boolean
   @param debug: show debug output
 
   """
+  ToStdout("Updating certificates now. Running \"gnt-cluster verify\" "
+           " is recommended after this operation.")
+
   if new_rapi_cert and rapi_cert_filename:
     ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
              " options can be specified at the same time.")
@@ -1074,17 +1106,17 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
         constants.NDS_ACTION: constants.CRYPTO_ACTION_CREATE,
         }
 
-      bootstrap.RunNodeSetupCmd(
+      ssh.RunSshCmdWithStdin(
           cluster_name,
           node_name,
           pathutils.SSL_UPDATE,
-          ctx.debug,
-          ctx.verbose,
-          True, # use cluster key
-          False, # ask key
-          True, # strict host check
           ssh_port,
-          data)
+          data,
+          debug=ctx.debug,
+          verbose=ctx.verbose,
+          use_cluster_key=True,
+          ask_key=False,
+          strict_host_check=True)
 
     # Create a temporary ssconf file using the master's client cert digest
     # and the 'bootstrap' keyword to enable distribution of all nodes' digests.
@@ -1144,21 +1176,73 @@ def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
                            _RenewServerAndClientCerts, verbose=verbose,
                            debug=debug)
 
+  if new_node_cert or new_cluster_cert or new_ssh_keys:
+    cl = GetClient()
+    renew_op = opcodes.OpClusterRenewCrypto(
+        node_certificates=new_node_cert or new_cluster_cert,
+        ssh_keys=new_ssh_keys)
+    SubmitOpCode(renew_op, cl=cl)
+
   ToStdout("All requested certificates and keys have been replaced."
            " Running \"gnt-cluster verify\" now is recommended.")
 
-  if new_node_cert or new_cluster_cert:
+  return 0
+
+
+def _BuildGanetiPubKeys(options, pub_key_file=pathutils.SSH_PUB_KEYS, cl=None,
+                        get_online_nodes_fn=GetOnlineNodes,
+                        get_nodes_ssh_ports_fn=GetNodesSshPorts,
+                        get_node_uuids_fn=GetNodeUUIDs,
+                        homedir_fn=None):
+  """Recreates the 'ganeti_pub_key' file by polling all nodes.
+
+  """
+  if os.path.exists(pub_key_file):
+    utils.CreateBackup(pub_key_file)
+    utils.RemoveFile(pub_key_file)
+
+  ssh.ClearPubKeyFile(pub_key_file)
+
+  if not cl:
     cl = GetClient()
-    renew_op = opcodes.OpClusterRenewCrypto()
-    SubmitOpCode(renew_op, cl=cl)
 
-  return 0
+  (cluster_name, master_node) = \
+    cl.QueryConfigValues(["cluster_name", "master_node"])
+
+  online_nodes = get_online_nodes_fn([], cl=cl)
+  ssh_ports = get_nodes_ssh_ports_fn(online_nodes + [master_node], cl)
+  ssh_port_map = dict(zip(online_nodes + [master_node], ssh_ports))
+
+  node_uuids = get_node_uuids_fn(online_nodes + [master_node], cl)
+  node_uuid_map = dict(zip(online_nodes + [master_node], node_uuids))
+
+  nonmaster_nodes = [name for name in online_nodes
+                     if name != master_node]
+
+  _, pub_key_filename, _ = \
+    ssh.GetUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False,
+                     kind=constants.SSHK_DSA, _homedir_fn=homedir_fn)
+
+  # get the key file of the master node
+  pub_key = utils.ReadFile(pub_key_filename)
+  ssh.AddPublicKey(node_uuid_map[master_node], pub_key,
+                   key_file=pub_key_file)
+
+  # get the key files of all non-master nodes
+  for node in nonmaster_nodes:
+    pub_key = ssh.ReadRemoteSshPubKeys(pub_key_filename, node, cluster_name,
+                                       ssh_port_map[node],
+                                       options.ssh_key_check,
+                                       options.ssh_key_check)
+    ssh.AddPublicKey(node_uuid_map[node], pub_key, key_file=pub_key_file)
 
 
 def RenewCrypto(opts, args):
   """Renews cluster certificates, keys and secrets.
 
   """
+  if opts.new_ssh_keys:
+    _BuildGanetiPubKeys(opts)
   return _RenewCrypto(opts.new_cluster_cert,
                       opts.new_rapi_cert,
                       opts.rapi_cert,
@@ -1170,6 +1254,7 @@ def RenewCrypto(opts, args):
                       opts.cluster_domain_secret,
                       opts.force,
                       opts.new_node_cert,
+                      opts.new_ssh_keys,
                       opts.verbose,
                       opts.debug > 0)
 
@@ -1274,7 +1359,9 @@ def SetClusterParams(opts, args):
           opts.shared_file_storage_dir is not None or
           opts.compression_tools is not None or
           opts.shared_file_storage_dir is not None or
-          opts.enabled_user_shutdown is not None):
+          opts.enabled_user_shutdown is not None or
+          opts.data_collector_interval or
+          opts.enabled_data_collectors):
     ToStderr("Please give at least one of the parameters.")
     return 1
 
@@ -1357,6 +1444,29 @@ def SetClusterParams(opts, args):
 
   compression_tools = _GetCompressionTools(opts)
 
+  enabled_data_collectors = dict(
+      (k, v.lower().startswith("t"))
+      for k, v in opts.enabled_data_collectors.items())
+
+  unrecognized_data_collectors = [
+      k for k in enabled_data_collectors.keys()
+      if k not in constants.DATA_COLLECTOR_NAMES]
+  if unrecognized_data_collectors:
+    ToStderr("Data collector names not recognized: %s" %
+             ", ".join(unrecognized_data_collectors))
+
+  try:
+    data_collector_interval = dict(
+        (k, long(1e6 * float(v)))
+        for (k, v) in opts.data_collector_interval.items())
+  except ValueError:
+    ToStderr("Can't transform all values to integers: {}".format(
+        opts.data_collector_interval))
+    return 1
+  if any(v <= 0 for v in data_collector_interval):
+    ToStderr("Some interval times where not above zero.")
+    return 1
+
   op = opcodes.OpClusterSetParams(
     vg_name=vg_name,
     drbd_helper=drbd_helper,
@@ -1395,6 +1505,8 @@ def SetClusterParams(opts, args):
     shared_file_storage_dir=opts.shared_file_storage_dir,
     compression_tools=compression_tools,
     enabled_user_shutdown=opts.enabled_user_shutdown,
+    enabled_data_collectors=enabled_data_collectors,
+    data_collector_interval=data_collector_interval,
     )
   return base.GetResult(None, opts, SubmitOrSend(op, opts))
 
@@ -2069,36 +2181,41 @@ def _VersionSpecificDowngrade():
   """
   ToStdout("Performing version-specific downgrade tasks.")
 
+  # Determine if this cluster is set up with SSH handling
+  # (aka not using --no-ssh-init), check if the public
+  # keyfile exists.
+  update_keys = os.path.exists(pathutils.SSH_PUB_KEYS)
+
+  if not update_keys:
+    return True
+
+  ToStdout("Replace nodes' SSH keys with the master's keys.")
+  (_, root_keyfiles) = \
+    ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
+
+  dsa_root_keyfiles = dict((kind, value) for (kind, value)
+                           in root_keyfiles.items()
+                           if kind == constants.SSHK_DSA)
+  master_private_keyfile, master_public_keyfile = \
+      dsa_root_keyfiles[constants.SSHK_DSA]
+
   nodes = ssconf.SimpleStore().GetOnlineNodeList()
+  master_node = ssconf.SimpleStore().GetMasterNode()
   cluster_name = ssconf.SimpleStore().GetClusterName()
-  ssh_ports = ssconf.SimpleStore().GetSshPortMap()
 
-  for node in nodes:
-    data = {
-      constants.NDS_CLUSTER_NAME: cluster_name,
-      constants.NDS_NODE_DAEMON_CERTIFICATE:
-        utils.ReadFile(pathutils.NODED_CERT_FILE),
-      constants.NDS_NODE_NAME: node,
-      constants.NDS_ACTION: constants.CRYPTO_ACTION_DELETE,
-      }
+  # If master node is in 'nodes', remove it
+  if master_node in nodes:
+    nodes.remove(master_node)
 
-    try:
-      bootstrap.RunNodeSetupCmd(
-          cluster_name,
-          node,
-          pathutils.SSL_UPDATE,
-          True, # debug
-          True, # verbose,
-          True, # use cluster key
-          False, # ask key
-          True, # strict host check
-          ssh_ports[node],
-          data)
-    except Exception as e: # pylint: disable=W0703
-      # As downgrading can fail if a node is temporarily unreachable
-      # only output the error, but do not abort the entire operation.
-      ToStderr("Downgrading SSL setup of node '%s' failed: %s." %
-               (node, e))
+  srun = ssh.SshRunner(cluster_name=cluster_name)
+  for name in nodes:
+    for key_file in [master_private_keyfile, master_public_keyfile]:
+      command = utils.text.ShellQuoteArgs([
+          "scp", key_file, "%s:%s" % (name, key_file)])
+      result = srun.Run(master_node, constants.SSH_LOGIN_USER, command)
+      if result.exit_code != 0:
+        ToStderr("Overiding SSH key '%s' of node '%s' failed. You might"
+                 " want to clean up manually." % (key_file, name))
 
   return True
 
@@ -2344,7 +2461,8 @@ commands = {
   "verify": (
     VerifyCluster, ARGS_NONE,
     [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
-     DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT],
+     DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT,
+     VERIFY_CLUTTER_OPT],
     "", "Does a check on the cluster configuration"),
   "verify-disks": (
     VerifyDisks, ARGS_NONE, [PRIORITY_OPT],
@@ -2412,7 +2530,8 @@ commands = {
       ENABLED_USER_SHUTDOWN_OPT] +
      INSTANCE_POLICY_OPTS +
      [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
-      COMPRESSION_TOOLS_OPT],
+      COMPRESSION_TOOLS_OPT] +
+     [ENABLED_DATA_COLLECTORS_OPT, DATA_COLLECTOR_INTERVAL_OPT],
     "[opts...]",
     "Alters the parameters of the cluster"),
   "renew-crypto": (
@@ -2421,7 +2540,8 @@ commands = {
      NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
      NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
      NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT,
-     NEW_NODE_CERT_OPT, VERBOSE_OPT],
+     NEW_NODE_CERT_OPT, NEW_SSH_KEY_OPT, NOSSH_KEYCHECK_OPT,
+     VERBOSE_OPT],
     "[opts...]",
     "Renews cluster certificates, keys and secrets"),
   "epo": (