Add the SSH key options
[ganeti-github.git] / lib / client / gnt_cluster.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """Cluster related commands"""
31
32 # pylint: disable=W0401,W0613,W0614,C0103
33 # W0401: Wildcard import ganeti.cli
34 # W0613: Unused argument, since all functions follow the same API
35 # W0614: Unused import %s from wildcard import (since we need cli)
36 # C0103: Invalid name gnt-cluster
37
38 from cStringIO import StringIO
39 import os
40 import time
41 import OpenSSL
42 import tempfile
43 import itertools
44
45 from ganeti.cli import *
46 from ganeti import bootstrap
47 from ganeti import compat
48 from ganeti import constants
49 from ganeti import config
50 from ganeti import errors
51 from ganeti import netutils
52 from ganeti import objects
53 from ganeti import opcodes
54 from ganeti import pathutils
55 from ganeti import qlang
56 from ganeti import serializer
57 from ganeti import ssconf
58 from ganeti import ssh
59 from ganeti import uidpool
60 from ganeti import utils
61 from ganeti.client import base
62
63
64 ON_OPT = cli_option("--on", default=False,
65 action="store_true", dest="on",
66 help="Recover from an EPO")
67
68 GROUPS_OPT = cli_option("--groups", default=False,
69 action="store_true", dest="groups",
70 help="Arguments are node groups instead of nodes")
71
72 FORCE_FAILOVER = cli_option("--yes-do-it", dest="yes_do_it",
73 help="Override interactive check for --no-voting",
74 default=False, action="store_true")
75
76 FORCE_DISTRIBUTION = cli_option("--yes-do-it", dest="yes_do_it",
77 help="Unconditionally distribute the"
78 " configuration, even if the queue"
79 " is drained",
80 default=False, action="store_true")
81
82 TO_OPT = cli_option("--to", default=None, type="string",
83 help="The Ganeti version to upgrade to")
84
85 RESUME_OPT = cli_option("--resume", default=False, action="store_true",
86 help="Resume any pending Ganeti upgrades")
87
88 DATA_COLLECTOR_INTERVAL_OPT = cli_option(
89 "--data-collector-interval", default={}, type="keyval",
90 help="Set collection intervals in seconds of data collectors.")
91
92 _EPO_PING_INTERVAL = 30 # 30 seconds between pings
93 _EPO_PING_TIMEOUT = 1 # 1 second
94 _EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
95
96
97 def _InitEnabledDiskTemplates(opts):
98 """Initialize the list of enabled disk templates.
99
100 """
101 if opts.enabled_disk_templates:
102 return opts.enabled_disk_templates.split(",")
103 else:
104 return constants.DEFAULT_ENABLED_DISK_TEMPLATES
105
106
107 def _InitVgName(opts, enabled_disk_templates):
108 """Initialize the volume group name.
109
110 @type enabled_disk_templates: list of strings
111 @param enabled_disk_templates: cluster-wide enabled disk templates
112
113 """
114 vg_name = None
115 if opts.vg_name is not None:
116 vg_name = opts.vg_name
117 if vg_name:
118 if not utils.IsLvmEnabled(enabled_disk_templates):
119 ToStdout("You specified a volume group with --vg-name, but you did not"
120 " enable any disk template that uses lvm.")
121 elif utils.IsLvmEnabled(enabled_disk_templates):
122 raise errors.OpPrereqError(
123 "LVM disk templates are enabled, but vg name not set.")
124 elif utils.IsLvmEnabled(enabled_disk_templates):
125 vg_name = constants.DEFAULT_VG
126 return vg_name
127
128
129 def _InitDrbdHelper(opts, enabled_disk_templates):
130 """Initialize the DRBD usermode helper.
131
132 """
133 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
134
135 if not drbd_enabled and opts.drbd_helper is not None:
136 ToStdout("Note: You specified a DRBD usermode helper, while DRBD storage"
137 " is not enabled.")
138
139 if drbd_enabled:
140 if opts.drbd_helper is None:
141 return constants.DEFAULT_DRBD_HELPER
142 if opts.drbd_helper == '':
143 raise errors.OpPrereqError(
144 "Unsetting the drbd usermode helper while enabling DRBD is not"
145 " allowed.")
146
147 return opts.drbd_helper
148
149
150 @UsesRPC
151 def InitCluster(opts, args):
152 """Initialize the cluster.
153
154 @param opts: the command line options selected by the user
155 @type args: list
156 @param args: should contain only one element, the desired
157 cluster name
158 @rtype: int
159 @return: the desired exit code
160
161 """
162 enabled_disk_templates = _InitEnabledDiskTemplates(opts)
163
164 try:
165 vg_name = _InitVgName(opts, enabled_disk_templates)
166 drbd_helper = _InitDrbdHelper(opts, enabled_disk_templates)
167 except errors.OpPrereqError, e:
168 ToStderr(str(e))
169 return 1
170
171 master_netdev = opts.master_netdev
172 if master_netdev is None:
173 nic_mode = opts.nicparams.get(constants.NIC_MODE, None)
174 if not nic_mode:
175 # default case, use bridging
176 master_netdev = constants.DEFAULT_BRIDGE
177 elif nic_mode == constants.NIC_MODE_OVS:
178 # default ovs is different from default bridge
179 master_netdev = constants.DEFAULT_OVS
180 opts.nicparams[constants.NIC_LINK] = constants.DEFAULT_OVS
181
182 hvlist = opts.enabled_hypervisors
183 if hvlist is None:
184 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
185 hvlist = hvlist.split(",")
186
187 hvparams = dict(opts.hvparams)
188 beparams = opts.beparams
189 nicparams = opts.nicparams
190
191 diskparams = dict(opts.diskparams)
192
193 # check the disk template types here, as we cannot rely on the type check done
194 # by the opcode parameter types
195 diskparams_keys = set(diskparams.keys())
196 if not (diskparams_keys <= constants.DISK_TEMPLATES):
197 unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES)
198 ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown))
199 return 1
200
201 # prepare beparams dict
202 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
203 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
204
205 # prepare nicparams dict
206 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
207 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
208
209 # prepare ndparams dict
210 if opts.ndparams is None:
211 ndparams = dict(constants.NDC_DEFAULTS)
212 else:
213 ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
214 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
215
216 # prepare hvparams dict
217 for hv in constants.HYPER_TYPES:
218 if hv not in hvparams:
219 hvparams[hv] = {}
220 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
221 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
222
223 # prepare diskparams dict
224 for templ in constants.DISK_TEMPLATES:
225 if templ not in diskparams:
226 diskparams[templ] = {}
227 diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ],
228 diskparams[templ])
229 utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES)
230
231 # prepare ipolicy dict
232 ipolicy = CreateIPolicyFromOpts(
233 ispecs_mem_size=opts.ispecs_mem_size,
234 ispecs_cpu_count=opts.ispecs_cpu_count,
235 ispecs_disk_count=opts.ispecs_disk_count,
236 ispecs_disk_size=opts.ispecs_disk_size,
237 ispecs_nic_count=opts.ispecs_nic_count,
238 minmax_ispecs=opts.ipolicy_bounds_specs,
239 std_ispecs=opts.ipolicy_std_specs,
240 ipolicy_disk_templates=opts.ipolicy_disk_templates,
241 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
242 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
243 fill_all=True)
244
245 if opts.candidate_pool_size is None:
246 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
247
248 if opts.mac_prefix is None:
249 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
250
251 uid_pool = opts.uid_pool
252 if uid_pool is not None:
253 uid_pool = uidpool.ParseUidPool(uid_pool)
254
255 if opts.prealloc_wipe_disks is None:
256 opts.prealloc_wipe_disks = False
257
258 external_ip_setup_script = opts.use_external_mip_script
259 if external_ip_setup_script is None:
260 external_ip_setup_script = False
261
262 try:
263 primary_ip_version = int(opts.primary_ip_version)
264 except (ValueError, TypeError), err:
265 ToStderr("Invalid primary ip version value: %s" % str(err))
266 return 1
267
268 master_netmask = opts.master_netmask
269 try:
270 if master_netmask is not None:
271 master_netmask = int(master_netmask)
272 except (ValueError, TypeError), err:
273 ToStderr("Invalid master netmask value: %s" % str(err))
274 return 1
275
276 if opts.disk_state:
277 disk_state = utils.FlatToDict(opts.disk_state)
278 else:
279 disk_state = {}
280
281 hv_state = dict(opts.hv_state)
282
283 if opts.install_image:
284 install_image = opts.install_image
285 else:
286 install_image = ""
287
288 if opts.zeroing_image:
289 zeroing_image = opts.zeroing_image
290 else:
291 zeroing_image = ""
292
293 compression_tools = _GetCompressionTools(opts)
294
295 default_ialloc_params = opts.default_iallocator_params
296
297 if opts.enabled_user_shutdown:
298 enabled_user_shutdown = True
299 else:
300 enabled_user_shutdown = False
301
302 bootstrap.InitCluster(cluster_name=args[0],
303 secondary_ip=opts.secondary_ip,
304 vg_name=vg_name,
305 mac_prefix=opts.mac_prefix,
306 master_netmask=master_netmask,
307 master_netdev=master_netdev,
308 file_storage_dir=opts.file_storage_dir,
309 shared_file_storage_dir=opts.shared_file_storage_dir,
310 gluster_storage_dir=opts.gluster_storage_dir,
311 enabled_hypervisors=hvlist,
312 hvparams=hvparams,
313 beparams=beparams,
314 nicparams=nicparams,
315 ndparams=ndparams,
316 diskparams=diskparams,
317 ipolicy=ipolicy,
318 candidate_pool_size=opts.candidate_pool_size,
319 modify_etc_hosts=opts.modify_etc_hosts,
320 modify_ssh_setup=opts.modify_ssh_setup,
321 maintain_node_health=opts.maintain_node_health,
322 drbd_helper=drbd_helper,
323 uid_pool=uid_pool,
324 default_iallocator=opts.default_iallocator,
325 default_iallocator_params=default_ialloc_params,
326 primary_ip_version=primary_ip_version,
327 prealloc_wipe_disks=opts.prealloc_wipe_disks,
328 use_external_mip_script=external_ip_setup_script,
329 hv_state=hv_state,
330 disk_state=disk_state,
331 enabled_disk_templates=enabled_disk_templates,
332 install_image=install_image,
333 zeroing_image=zeroing_image,
334 compression_tools=compression_tools,
335 enabled_user_shutdown=enabled_user_shutdown,
336 )
337 op = opcodes.OpClusterPostInit()
338 SubmitOpCode(op, opts=opts)
339 return 0
340
341
342 @UsesRPC
343 def DestroyCluster(opts, args):
344 """Destroy the cluster.
345
346 @param opts: the command line options selected by the user
347 @type args: list
348 @param args: should be an empty list
349 @rtype: int
350 @return: the desired exit code
351
352 """
353 if not opts.yes_do_it:
354 ToStderr("Destroying a cluster is irreversible. If you really want"
355 " destroy this cluster, supply the --yes-do-it option.")
356 return 1
357
358 op = opcodes.OpClusterDestroy()
359 master_uuid = SubmitOpCode(op, opts=opts)
360 # if we reached this, the opcode didn't fail; we can proceed to
361 # shutdown all the daemons
362 bootstrap.FinalizeClusterDestroy(master_uuid)
363 return 0
364
365
366 def RenameCluster(opts, args):
367 """Rename the cluster.
368
369 @param opts: the command line options selected by the user
370 @type args: list
371 @param args: should contain only one element, the new cluster name
372 @rtype: int
373 @return: the desired exit code
374
375 """
376 cl = GetClient()
377
378 (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])
379
380 new_name = args[0]
381 if not opts.force:
382 usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
383 " connected over the network to the cluster name, the"
384 " operation is very dangerous as the IP address will be"
385 " removed from the node and the change may not go through."
386 " Continue?") % (cluster_name, new_name)
387 if not AskUser(usertext):
388 return 1
389
390 op = opcodes.OpClusterRename(name=new_name)
391 result = SubmitOpCode(op, opts=opts, cl=cl)
392
393 if result:
394 ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
395
396 return 0
397
398
399 def ActivateMasterIp(opts, args):
400 """Activates the master IP.
401
402 """
403 op = opcodes.OpClusterActivateMasterIp()
404 SubmitOpCode(op)
405 return 0
406
407
408 def DeactivateMasterIp(opts, args):
409 """Deactivates the master IP.
410
411 """
412 if not opts.confirm:
413 usertext = ("This will disable the master IP. All the open connections to"
414 " the master IP will be closed. To reach the master you will"
415 " need to use its node IP."
416 " Continue?")
417 if not AskUser(usertext):
418 return 1
419
420 op = opcodes.OpClusterDeactivateMasterIp()
421 SubmitOpCode(op)
422 return 0
423
424
425 def RedistributeConfig(opts, args):
426 """Forces push of the cluster configuration.
427
428 @param opts: the command line options selected by the user
429 @type args: list
430 @param args: empty list
431 @rtype: int
432 @return: the desired exit code
433
434 """
435 op = opcodes.OpClusterRedistConf()
436 if opts.yes_do_it:
437 SubmitOpCodeToDrainedQueue(op)
438 else:
439 SubmitOrSend(op, opts)
440 return 0
441
442
443 def ShowClusterVersion(opts, args):
444 """Write version of ganeti software to the standard output.
445
446 @param opts: the command line options selected by the user
447 @type args: list
448 @param args: should be an empty list
449 @rtype: int
450 @return: the desired exit code
451
452 """
453 cl = GetClient()
454 result = cl.QueryClusterInfo()
455 ToStdout("Software version: %s", result["software_version"])
456 ToStdout("Internode protocol: %s", result["protocol_version"])
457 ToStdout("Configuration format: %s", result["config_version"])
458 ToStdout("OS api version: %s", result["os_api_version"])
459 ToStdout("Export interface: %s", result["export_version"])
460 ToStdout("VCS version: %s", result["vcs_version"])
461 return 0
462
463
464 def ShowClusterMaster(opts, args):
465 """Write name of master node to the standard output.
466
467 @param opts: the command line options selected by the user
468 @type args: list
469 @param args: should be an empty list
470 @rtype: int
471 @return: the desired exit code
472
473 """
474 master = bootstrap.GetMaster()
475 ToStdout(master)
476 return 0
477
478
479 def _FormatGroupedParams(paramsdict, roman=False):
480 """Format Grouped parameters (be, nic, disk) by group.
481
482 @type paramsdict: dict of dicts
483 @param paramsdict: {group: {param: value, ...}, ...}
484 @rtype: dict of dicts
485 @return: copy of the input dictionaries with strings as values
486
487 """
488 ret = {}
489 for (item, val) in paramsdict.items():
490 if isinstance(val, dict):
491 ret[item] = _FormatGroupedParams(val, roman=roman)
492 elif roman and isinstance(val, int):
493 ret[item] = compat.TryToRoman(val)
494 else:
495 ret[item] = str(val)
496 return ret
497
498
499 def _FormatDataCollectors(paramsdict):
500 """Format Grouped parameters (be, nic, disk) by group.
501
502 @type paramsdict: dict of dicts
503 @param paramsdict: response of QueryClusterInfo
504 @rtype: dict of dicts
505 @return: parameter grouped by data collector
506
507 """
508
509 enabled = paramsdict[constants.DATA_COLLECTORS_ENABLED_NAME]
510 interval = paramsdict[constants.DATA_COLLECTORS_INTERVAL_NAME]
511
512 ret = {}
513 for key in enabled:
514 ret[key] = dict(active=enabled[key],
515 interval="%.3fs" % (interval[key] / 1e6))
516 return ret
517
518
519 def ShowClusterConfig(opts, args):
520 """Shows cluster information.
521
522 @param opts: the command line options selected by the user
523 @type args: list
524 @param args: should be an empty list
525 @rtype: int
526 @return: the desired exit code
527
528 """
529 cl = GetClient()
530 result = cl.QueryClusterInfo()
531
532 if result["tags"]:
533 tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
534 else:
535 tags = "(none)"
536 if result["reserved_lvs"]:
537 reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
538 else:
539 reserved_lvs = "(none)"
540
541 enabled_hv = result["enabled_hypervisors"]
542 hvparams = dict((k, v) for k, v in result["hvparams"].iteritems()
543 if k in enabled_hv)
544
545 info = [
546 ("Cluster name", result["name"]),
547 ("Cluster UUID", result["uuid"]),
548
549 ("Creation time", utils.FormatTime(result["ctime"])),
550 ("Modification time", utils.FormatTime(result["mtime"])),
551
552 ("Master node", result["master"]),
553
554 ("Architecture (this node)",
555 "%s (%s)" % (result["architecture"][0], result["architecture"][1])),
556
557 ("Tags", tags),
558
559 ("Default hypervisor", result["default_hypervisor"]),
560 ("Enabled hypervisors", utils.CommaJoin(enabled_hv)),
561
562 ("Hypervisor parameters", _FormatGroupedParams(hvparams,
563 opts.roman_integers)),
564
565 ("OS-specific hypervisor parameters",
566 _FormatGroupedParams(result["os_hvp"], opts.roman_integers)),
567
568 ("OS parameters", _FormatGroupedParams(result["osparams"],
569 opts.roman_integers)),
570
571 ("Hidden OSes", utils.CommaJoin(result["hidden_os"])),
572 ("Blacklisted OSes", utils.CommaJoin(result["blacklisted_os"])),
573
574 ("Cluster parameters", [
575 ("candidate pool size",
576 compat.TryToRoman(result["candidate_pool_size"],
577 convert=opts.roman_integers)),
578 ("maximal number of jobs running simultaneously",
579 compat.TryToRoman(result["max_running_jobs"],
580 convert=opts.roman_integers)),
581 ("maximal number of jobs simultaneously tracked by the scheduler",
582 compat.TryToRoman(result["max_tracked_jobs"],
583 convert=opts.roman_integers)),
584 ("mac prefix", result["mac_prefix"]),
585 ("master netdev", result["master_netdev"]),
586 ("master netmask", compat.TryToRoman(result["master_netmask"],
587 opts.roman_integers)),
588 ("use external master IP address setup script",
589 result["use_external_mip_script"]),
590 ("lvm volume group", result["volume_group_name"]),
591 ("lvm reserved volumes", reserved_lvs),
592 ("drbd usermode helper", result["drbd_usermode_helper"]),
593 ("file storage path", result["file_storage_dir"]),
594 ("shared file storage path", result["shared_file_storage_dir"]),
595 ("gluster storage path", result["gluster_storage_dir"]),
596 ("maintenance of node health", result["maintain_node_health"]),
597 ("uid pool", uidpool.FormatUidPool(result["uid_pool"])),
598 ("default instance allocator", result["default_iallocator"]),
599 ("default instance allocator parameters",
600 result["default_iallocator_params"]),
601 ("primary ip version", compat.TryToRoman(result["primary_ip_version"],
602 opts.roman_integers)),
603 ("preallocation wipe disks", result["prealloc_wipe_disks"]),
604 ("OS search path", utils.CommaJoin(pathutils.OS_SEARCH_PATH)),
605 ("ExtStorage Providers search path",
606 utils.CommaJoin(pathutils.ES_SEARCH_PATH)),
607 ("enabled disk templates",
608 utils.CommaJoin(result["enabled_disk_templates"])),
609 ("install image", result["install_image"]),
610 ("instance communication network",
611 result["instance_communication_network"]),
612 ("zeroing image", result["zeroing_image"]),
613 ("compression tools", result["compression_tools"]),
614 ("enabled user shutdown", result["enabled_user_shutdown"]),
615 ("modify ssh setup", result["modify_ssh_setup"]),
616 ]),
617
618 ("Default node parameters",
619 _FormatGroupedParams(result["ndparams"], roman=opts.roman_integers)),
620
621 ("Default instance parameters",
622 _FormatGroupedParams(result["beparams"], roman=opts.roman_integers)),
623
624 ("Default nic parameters",
625 _FormatGroupedParams(result["nicparams"], roman=opts.roman_integers)),
626
627 ("Default disk parameters",
628 _FormatGroupedParams(result["diskparams"], roman=opts.roman_integers)),
629
630 ("Instance policy - limits for instances",
631 FormatPolicyInfo(result["ipolicy"], None, True, opts.roman_integers)),
632 ("Data collectors", _FormatDataCollectors(result)),
633 ]
634
635 PrintGenericInfo(info)
636 return 0
637
638
639 def ClusterCopyFile(opts, args):
640 """Copy a file from master to some nodes.
641
642 @param opts: the command line options selected by the user
643 @type args: list
644 @param args: should contain only one element, the path of
645 the file to be copied
646 @rtype: int
647 @return: the desired exit code
648
649 """
650 filename = args[0]
651 filename = os.path.abspath(filename)
652
653 if not os.path.exists(filename):
654 raise errors.OpPrereqError("No such filename '%s'" % filename,
655 errors.ECODE_INVAL)
656
657 cl = GetClient()
658 qcl = GetClient()
659 try:
660 cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
661
662 results = GetOnlineNodes(nodes=opts.nodes, cl=qcl, filter_master=True,
663 secondary_ips=opts.use_replication_network,
664 nodegroup=opts.nodegroup)
665 ports = GetNodesSshPorts(opts.nodes, qcl)
666 finally:
667 cl.Close()
668 qcl.Close()
669
670 srun = ssh.SshRunner(cluster_name)
671 for (node, port) in zip(results, ports):
672 if not srun.CopyFileToNode(node, port, filename):
673 ToStderr("Copy of file %s to node %s:%d failed", filename, node, port)
674
675 return 0
676
677
678 def RunClusterCommand(opts, args):
679 """Run a command on some nodes.
680
681 @param opts: the command line options selected by the user
682 @type args: list
683 @param args: should contain the command to be run and its arguments
684 @rtype: int
685 @return: the desired exit code
686
687 """
688 cl = GetClient()
689 qcl = GetClient()
690
691 command = " ".join(args)
692
693 nodes = GetOnlineNodes(nodes=opts.nodes, cl=qcl, nodegroup=opts.nodegroup)
694 ports = GetNodesSshPorts(nodes, qcl)
695
696 cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
697 "master_node"])
698
699 srun = ssh.SshRunner(cluster_name=cluster_name)
700
701 # Make sure master node is at list end
702 if master_node in nodes:
703 nodes.remove(master_node)
704 nodes.append(master_node)
705
706 for (name, port) in zip(nodes, ports):
707 result = srun.Run(name, constants.SSH_LOGIN_USER, command, port=port)
708
709 if opts.failure_only and result.exit_code == constants.EXIT_SUCCESS:
710 # Do not output anything for successful commands
711 continue
712
713 ToStdout("------------------------------------------------")
714 if opts.show_machine_names:
715 for line in result.output.splitlines():
716 ToStdout("%s: %s", name, line)
717 else:
718 ToStdout("node: %s", name)
719 ToStdout("%s", result.output)
720 ToStdout("return code = %s", result.exit_code)
721
722 return 0
723
724
725 def VerifyCluster(opts, args):
726 """Verify integrity of cluster, performing various test on nodes.
727
728 @param opts: the command line options selected by the user
729 @type args: list
730 @param args: should be an empty list
731 @rtype: int
732 @return: the desired exit code
733
734 """
735 skip_checks = []
736
737 if opts.skip_nplusone_mem:
738 skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
739
740 cl = GetClient()
741
742 op = opcodes.OpClusterVerify(verbose=opts.verbose,
743 error_codes=opts.error_codes,
744 debug_simulate_errors=opts.simulate_errors,
745 skip_checks=skip_checks,
746 ignore_errors=opts.ignore_errors,
747 group_name=opts.nodegroup,
748 verify_clutter=opts.verify_clutter)
749 result = SubmitOpCode(op, cl=cl, opts=opts)
750
751 # Keep track of submitted jobs
752 jex = JobExecutor(cl=cl, opts=opts)
753
754 for (status, job_id) in result[constants.JOB_IDS_KEY]:
755 jex.AddJobId(None, status, job_id)
756
757 results = jex.GetResults()
758
759 (bad_jobs, bad_results) = \
760 map(len,
761 # Convert iterators to lists
762 map(list,
763 # Count errors
764 map(compat.partial(itertools.ifilterfalse, bool),
765 # Convert result to booleans in a tuple
766 zip(*((job_success, len(op_results) == 1 and op_results[0])
767 for (job_success, op_results) in results)))))
768
769 if bad_jobs == 0 and bad_results == 0:
770 rcode = constants.EXIT_SUCCESS
771 else:
772 rcode = constants.EXIT_FAILURE
773 if bad_jobs > 0:
774 ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
775
776 return rcode
777
778
779 def VerifyDisks(opts, args):
780 """Verify integrity of cluster disks.
781
782 @param opts: the command line options selected by the user
783 @type args: list
784 @param args: should be an empty list
785 @rtype: int
786 @return: the desired exit code
787
788 """
789 cl = GetClient()
790
791 op = opcodes.OpClusterVerifyDisks(group_name=opts.nodegroup)
792
793 result = SubmitOpCode(op, cl=cl, opts=opts)
794
795 # Keep track of submitted jobs
796 jex = JobExecutor(cl=cl, opts=opts)
797
798 for (status, job_id) in result[constants.JOB_IDS_KEY]:
799 jex.AddJobId(None, status, job_id)
800
801 retcode = constants.EXIT_SUCCESS
802
803 for (status, result) in jex.GetResults():
804 if not status:
805 ToStdout("Job failed: %s", result)
806 continue
807
808 ((bad_nodes, instances, missing), ) = result
809
810 for node, text in bad_nodes.items():
811 ToStdout("Error gathering data on node %s: %s",
812 node, utils.SafeEncode(text[-400:]))
813 retcode = constants.EXIT_FAILURE
814 ToStdout("You need to fix these nodes first before fixing instances")
815
816 for iname in instances:
817 if iname in missing:
818 continue
819 op = opcodes.OpInstanceActivateDisks(instance_name=iname)
820 try:
821 ToStdout("Activating disks for instance '%s'", iname)
822 SubmitOpCode(op, opts=opts, cl=cl)
823 except errors.GenericError, err:
824 nret, msg = FormatError(err)
825 retcode |= nret
826 ToStderr("Error activating disks for instance %s: %s", iname, msg)
827
828 if missing:
829 for iname, ival in missing.iteritems():
830 all_missing = compat.all(x[0] in bad_nodes for x in ival)
831 if all_missing:
832 ToStdout("Instance %s cannot be verified as it lives on"
833 " broken nodes", iname)
834 else:
835 ToStdout("Instance %s has missing logical volumes:", iname)
836 ival.sort()
837 for node, vol in ival:
838 if node in bad_nodes:
839 ToStdout("\tbroken node %s /dev/%s", node, vol)
840 else:
841 ToStdout("\t%s /dev/%s", node, vol)
842
843 ToStdout("You need to replace or recreate disks for all the above"
844 " instances if this message persists after fixing broken nodes.")
845 retcode = constants.EXIT_FAILURE
846 elif not instances:
847 ToStdout("No disks need to be activated.")
848
849 return retcode
850
851
852 def RepairDiskSizes(opts, args):
853 """Verify sizes of cluster disks.
854
855 @param opts: the command line options selected by the user
856 @type args: list
857 @param args: optional list of instances to restrict check to
858 @rtype: int
859 @return: the desired exit code
860
861 """
862 op = opcodes.OpClusterRepairDiskSizes(instances=args)
863 SubmitOpCode(op, opts=opts)
864
865
866 @UsesRPC
867 def MasterFailover(opts, args):
868 """Failover the master node.
869
870 This command, when run on a non-master node, will cause the current
871 master to cease being master, and the non-master to become new
872 master.
873
874 @param opts: the command line options selected by the user
875 @type args: list
876 @param args: should be an empty list
877 @rtype: int
878 @return: the desired exit code
879
880 """
881 if opts.no_voting and not opts.yes_do_it:
882 usertext = ("This will perform the failover even if most other nodes"
883 " are down, or if this node is outdated. This is dangerous"
884 " as it can lead to a non-consistent cluster. Check the"
885 " gnt-cluster(8) man page before proceeding. Continue?")
886 if not AskUser(usertext):
887 return 1
888
889 rvlaue, msgs = bootstrap.MasterFailover(no_voting=opts.no_voting)
890 for msg in msgs:
891 ToStderr(msg)
892 return rvlaue
893
894
895 def MasterPing(opts, args):
896 """Checks if the master is alive.
897
898 @param opts: the command line options selected by the user
899 @type args: list
900 @param args: should be an empty list
901 @rtype: int
902 @return: the desired exit code
903
904 """
905 try:
906 cl = GetClient()
907 cl.QueryClusterInfo()
908 return 0
909 except Exception: # pylint: disable=W0703
910 return 1
911
912
913 def SearchTags(opts, args):
914 """Searches the tags on all the cluster.
915
916 @param opts: the command line options selected by the user
917 @type args: list
918 @param args: should contain only one element, the tag pattern
919 @rtype: int
920 @return: the desired exit code
921
922 """
923 op = opcodes.OpTagsSearch(pattern=args[0])
924 result = SubmitOpCode(op, opts=opts)
925 if not result:
926 return 1
927 result = list(result)
928 result.sort()
929 for path, tag in result:
930 ToStdout("%s %s", path, tag)
931
932
933 def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
934 """Reads and verifies an X509 certificate.
935
936 @type cert_filename: string
937 @param cert_filename: the path of the file containing the certificate to
938 verify encoded in PEM format
939 @type verify_private_key: bool
940 @param verify_private_key: whether to verify the private key in addition to
941 the public certificate
942 @rtype: string
943 @return: a string containing the PEM-encoded certificate.
944
945 """
946 try:
947 pem = utils.ReadFile(cert_filename)
948 except IOError, err:
949 raise errors.X509CertError(cert_filename,
950 "Unable to read certificate: %s" % str(err))
951
952 try:
953 OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
954 except Exception, err:
955 raise errors.X509CertError(cert_filename,
956 "Unable to load certificate: %s" % str(err))
957
958 if verify_private_key:
959 try:
960 OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
961 except Exception, err:
962 raise errors.X509CertError(cert_filename,
963 "Unable to load private key: %s" % str(err))
964
965 return pem
966
967
968 def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
969 rapi_cert_filename, new_spice_cert, spice_cert_filename,
970 spice_cacert_filename, new_confd_hmac_key, new_cds,
971 cds_filename, force, new_node_cert, new_ssh_keys,
972 verbose, debug):
973 """Renews cluster certificates, keys and secrets.
974
975 @type new_cluster_cert: bool
976 @param new_cluster_cert: Whether to generate a new cluster certificate
977 @type new_rapi_cert: bool
978 @param new_rapi_cert: Whether to generate a new RAPI certificate
979 @type rapi_cert_filename: string
980 @param rapi_cert_filename: Path to file containing new RAPI certificate
981 @type new_spice_cert: bool
982 @param new_spice_cert: Whether to generate a new SPICE certificate
983 @type spice_cert_filename: string
984 @param spice_cert_filename: Path to file containing new SPICE certificate
985 @type spice_cacert_filename: string
986 @param spice_cacert_filename: Path to file containing the certificate of the
987 CA that signed the SPICE certificate
988 @type new_confd_hmac_key: bool
989 @param new_confd_hmac_key: Whether to generate a new HMAC key
990 @type new_cds: bool
991 @param new_cds: Whether to generate a new cluster domain secret
992 @type cds_filename: string
993 @param cds_filename: Path to file containing new cluster domain secret
994 @type force: bool
995 @param force: Whether to ask user for confirmation
996 @type new_node_cert: bool
997 @param new_node_cert: Whether to generate new node certificates
998 @type new_ssh_keys: bool
999 @param new_ssh_keys: Whether to generate new node SSH keys
1000 @type verbose: boolean
1001 @param verbose: show verbose output
1002 @type debug: boolean
1003 @param debug: show debug output
1004
1005 """
1006 ToStdout("Updating certificates now. Running \"gnt-cluster verify\" "
1007 " is recommended after this operation.")
1008
1009 if new_rapi_cert and rapi_cert_filename:
1010 ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
1011 " options can be specified at the same time.")
1012 return 1
1013
1014 if new_cds and cds_filename:
1015 ToStderr("Only one of the --new-cluster-domain-secret and"
1016 " --cluster-domain-secret options can be specified at"
1017 " the same time.")
1018 return 1
1019
1020 if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
1021 ToStderr("When using --new-spice-certificate, the --spice-certificate"
1022 " and --spice-ca-certificate must not be used.")
1023 return 1
1024
1025 if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
1026 ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
1027 " specified.")
1028 return 1
1029
1030 rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
1031 try:
1032 if rapi_cert_filename:
1033 rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
1034 if spice_cert_filename:
1035 spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
1036 spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
1037 except errors.X509CertError, err:
1038 ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
1039 return 1
1040
1041 if cds_filename:
1042 try:
1043 cds = utils.ReadFile(cds_filename)
1044 except Exception, err: # pylint: disable=W0703
1045 ToStderr("Can't load new cluster domain secret from %s: %s" %
1046 (cds_filename, str(err)))
1047 return 1
1048 else:
1049 cds = None
1050
1051 if not force:
1052 usertext = ("This requires all daemons on all nodes to be restarted and"
1053 " may take some time. Continue?")
1054 if not AskUser(usertext):
1055 return 1
1056
1057 def _RenewCryptoInner(ctx):
1058 ctx.feedback_fn("Updating certificates and keys")
1059
1060 bootstrap.GenerateClusterCrypto(False,
1061 new_rapi_cert,
1062 new_spice_cert,
1063 new_confd_hmac_key,
1064 new_cds,
1065 False,
1066 None,
1067 rapi_cert_pem=rapi_cert_pem,
1068 spice_cert_pem=spice_cert_pem,
1069 spice_cacert_pem=spice_cacert_pem,
1070 cds=cds)
1071
1072 files_to_copy = []
1073
1074 if new_rapi_cert or rapi_cert_pem:
1075 files_to_copy.append(pathutils.RAPI_CERT_FILE)
1076
1077 if new_spice_cert or spice_cert_pem:
1078 files_to_copy.append(pathutils.SPICE_CERT_FILE)
1079 files_to_copy.append(pathutils.SPICE_CACERT_FILE)
1080
1081 if new_confd_hmac_key:
1082 files_to_copy.append(pathutils.CONFD_HMAC_KEY)
1083
1084 if new_cds or cds:
1085 files_to_copy.append(pathutils.CLUSTER_DOMAIN_SECRET_FILE)
1086
1087 if files_to_copy:
1088 for node_name in ctx.nonmaster_nodes:
1089 port = ctx.ssh_ports[node_name]
1090 ctx.feedback_fn("Copying %s to %s:%d" %
1091 (", ".join(files_to_copy), node_name, port))
1092 for file_name in files_to_copy:
1093 ctx.ssh.CopyFileToNode(node_name, port, file_name)
1094
1095 def _RenewClientCerts(ctx):
1096 ctx.feedback_fn("Updating client SSL certificates.")
1097
1098 cluster_name = ssconf.SimpleStore().GetClusterName()
1099
1100 for node_name in ctx.nonmaster_nodes + [ctx.master_node]:
1101 ssh_port = ctx.ssh_ports[node_name]
1102 data = {
1103 constants.NDS_CLUSTER_NAME: cluster_name,
1104 constants.NDS_NODE_DAEMON_CERTIFICATE:
1105 utils.ReadFile(pathutils.NODED_CERT_FILE),
1106 constants.NDS_NODE_NAME: node_name,
1107 constants.NDS_ACTION: constants.CRYPTO_ACTION_CREATE,
1108 }
1109
1110 ssh.RunSshCmdWithStdin(
1111 cluster_name,
1112 node_name,
1113 pathutils.SSL_UPDATE,
1114 ssh_port,
1115 data,
1116 debug=ctx.debug,
1117 verbose=ctx.verbose,
1118 use_cluster_key=True,
1119 ask_key=False,
1120 strict_host_check=True)
1121
1122 # Create a temporary ssconf file using the master's client cert digest
1123 # and the 'bootstrap' keyword to enable distribution of all nodes' digests.
1124 master_digest = utils.GetCertificateDigest()
1125 ssconf_master_candidate_certs_filename = os.path.join(
1126 pathutils.DATA_DIR, "%s%s" %
1127 (constants.SSCONF_FILEPREFIX, constants.SS_MASTER_CANDIDATES_CERTS))
1128 utils.WriteFile(
1129 ssconf_master_candidate_certs_filename,
1130 data="%s=%s" % (constants.CRYPTO_BOOTSTRAP, master_digest))
1131 for node_name in ctx.nonmaster_nodes:
1132 port = ctx.ssh_ports[node_name]
1133 ctx.feedback_fn("Copying %s to %s:%d" %
1134 (ssconf_master_candidate_certs_filename, node_name, port))
1135 ctx.ssh.CopyFileToNode(node_name, port,
1136 ssconf_master_candidate_certs_filename)
1137
1138 # Write the boostrap entry to the config using wconfd.
1139 config_live_lock = utils.livelock.LiveLock("renew_crypto")
1140 cfg = config.GetConfig(None, config_live_lock)
1141 cfg.AddNodeToCandidateCerts(constants.CRYPTO_BOOTSTRAP, master_digest)
1142 cfg.Update(cfg.GetClusterInfo(), ctx.feedback_fn)
1143
1144 def _RenewServerAndClientCerts(ctx):
1145 ctx.feedback_fn("Updating the cluster SSL certificate.")
1146
1147 master_name = ssconf.SimpleStore().GetMasterNode()
1148 bootstrap.GenerateClusterCrypto(True, # cluster cert
1149 False, # rapi cert
1150 False, # spice cert
1151 False, # confd hmac key
1152 False, # cds
1153 True, # client cert
1154 master_name)
1155
1156 for node_name in ctx.nonmaster_nodes:
1157 port = ctx.ssh_ports[node_name]
1158 server_cert = pathutils.NODED_CERT_FILE
1159 ctx.feedback_fn("Copying %s to %s:%d" %
1160 (server_cert, node_name, port))
1161 ctx.ssh.CopyFileToNode(node_name, port, server_cert)
1162
1163 _RenewClientCerts(ctx)
1164
1165 if new_rapi_cert or new_spice_cert or new_confd_hmac_key or new_cds:
1166 RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
1167
1168 # If only node certficates are recreated, call _RenewClientCerts only.
1169 if new_node_cert and not new_cluster_cert:
1170 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD],
1171 _RenewClientCerts, verbose=verbose, debug=debug)
1172
1173 # If the cluster certificate are renewed, the client certificates need
1174 # to be renewed too.
1175 if new_cluster_cert:
1176 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD],
1177 _RenewServerAndClientCerts, verbose=verbose,
1178 debug=debug)
1179
1180 if new_node_cert or new_cluster_cert or new_ssh_keys:
1181 cl = GetClient()
1182 renew_op = opcodes.OpClusterRenewCrypto(
1183 node_certificates=new_node_cert or new_cluster_cert,
1184 ssh_keys=new_ssh_keys)
1185 SubmitOpCode(renew_op, cl=cl)
1186
1187 ToStdout("All requested certificates and keys have been replaced."
1188 " Running \"gnt-cluster verify\" now is recommended.")
1189
1190 return 0
1191
1192
1193 def _BuildGanetiPubKeys(options, pub_key_file=pathutils.SSH_PUB_KEYS, cl=None,
1194 get_online_nodes_fn=GetOnlineNodes,
1195 get_nodes_ssh_ports_fn=GetNodesSshPorts,
1196 get_node_uuids_fn=GetNodeUUIDs,
1197 homedir_fn=None):
1198 """Recreates the 'ganeti_pub_key' file by polling all nodes.
1199
1200 """
1201 if os.path.exists(pub_key_file):
1202 utils.CreateBackup(pub_key_file)
1203 utils.RemoveFile(pub_key_file)
1204
1205 ssh.ClearPubKeyFile(pub_key_file)
1206
1207 if not cl:
1208 cl = GetClient()
1209
1210 (cluster_name, master_node) = \
1211 cl.QueryConfigValues(["cluster_name", "master_node"])
1212
1213 online_nodes = get_online_nodes_fn([], cl=cl)
1214 ssh_ports = get_nodes_ssh_ports_fn(online_nodes + [master_node], cl)
1215 ssh_port_map = dict(zip(online_nodes + [master_node], ssh_ports))
1216
1217 node_uuids = get_node_uuids_fn(online_nodes + [master_node], cl)
1218 node_uuid_map = dict(zip(online_nodes + [master_node], node_uuids))
1219
1220 nonmaster_nodes = [name for name in online_nodes
1221 if name != master_node]
1222
1223 _, pub_key_filename, _ = \
1224 ssh.GetUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False,
1225 kind=constants.SSHK_DSA, _homedir_fn=homedir_fn)
1226
1227 # get the key file of the master node
1228 pub_key = utils.ReadFile(pub_key_filename)
1229 ssh.AddPublicKey(node_uuid_map[master_node], pub_key,
1230 key_file=pub_key_file)
1231
1232 # get the key files of all non-master nodes
1233 for node in nonmaster_nodes:
1234 pub_key = ssh.ReadRemoteSshPubKeys(pub_key_filename, node, cluster_name,
1235 ssh_port_map[node],
1236 options.ssh_key_check,
1237 options.ssh_key_check)
1238 ssh.AddPublicKey(node_uuid_map[node], pub_key, key_file=pub_key_file)
1239
1240
1241 def RenewCrypto(opts, args):
1242 """Renews cluster certificates, keys and secrets.
1243
1244 """
1245 if opts.new_ssh_keys:
1246 _BuildGanetiPubKeys(opts)
1247 return _RenewCrypto(opts.new_cluster_cert,
1248 opts.new_rapi_cert,
1249 opts.rapi_cert,
1250 opts.new_spice_cert,
1251 opts.spice_cert,
1252 opts.spice_cacert,
1253 opts.new_confd_hmac_key,
1254 opts.new_cluster_domain_secret,
1255 opts.cluster_domain_secret,
1256 opts.force,
1257 opts.new_node_cert,
1258 opts.new_ssh_keys,
1259 opts.verbose,
1260 opts.debug > 0)
1261
1262
1263 def _GetEnabledDiskTemplates(opts):
1264 """Determine the list of enabled disk templates.
1265
1266 """
1267 if opts.enabled_disk_templates:
1268 return opts.enabled_disk_templates.split(",")
1269 else:
1270 return None
1271
1272
1273 def _GetVgName(opts, enabled_disk_templates):
1274 """Determine the volume group name.
1275
1276 @type enabled_disk_templates: list of strings
1277 @param enabled_disk_templates: cluster-wide enabled disk-templates
1278
1279 """
1280 # consistency between vg name and enabled disk templates
1281 vg_name = None
1282 if opts.vg_name is not None:
1283 vg_name = opts.vg_name
1284 if enabled_disk_templates:
1285 if vg_name and not utils.IsLvmEnabled(enabled_disk_templates):
1286 ToStdout("You specified a volume group with --vg-name, but you did not"
1287 " enable any of the following lvm-based disk templates: %s" %
1288 utils.CommaJoin(constants.DTS_LVM))
1289 return vg_name
1290
1291
1292 def _GetDrbdHelper(opts, enabled_disk_templates):
1293 """Determine the DRBD usermode helper.
1294
1295 """
1296 drbd_helper = opts.drbd_helper
1297 if enabled_disk_templates:
1298 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
1299 if not drbd_enabled and opts.drbd_helper:
1300 ToStdout("You specified a DRBD usermode helper with "
1301 " --drbd-usermode-helper while DRBD is not enabled.")
1302 return drbd_helper
1303
1304
1305 def _GetCompressionTools(opts):
1306 """Determine the list of custom compression tools.
1307
1308 """
1309 if opts.compression_tools:
1310 return opts.compression_tools.split(",")
1311 elif opts.compression_tools is None:
1312 return None # To note the parameter was not provided
1313 else:
1314 return constants.IEC_DEFAULT_TOOLS # Resetting to default
1315
1316
1317 def SetClusterParams(opts, args):
1318 """Modify the cluster.
1319
1320 @param opts: the command line options selected by the user
1321 @type args: list
1322 @param args: should be an empty list
1323 @rtype: int
1324 @return: the desired exit code
1325
1326 """
1327 if not (opts.vg_name is not None or
1328 opts.drbd_helper is not None or
1329 opts.enabled_hypervisors or opts.hvparams or
1330 opts.beparams or opts.nicparams or
1331 opts.ndparams or opts.diskparams or
1332 opts.candidate_pool_size is not None or
1333 opts.max_running_jobs is not None or
1334 opts.max_tracked_jobs is not None or
1335 opts.uid_pool is not None or
1336 opts.maintain_node_health is not None or
1337 opts.add_uids is not None or
1338 opts.remove_uids is not None or
1339 opts.default_iallocator is not None or
1340 opts.default_iallocator_params is not None or
1341 opts.reserved_lvs is not None or
1342 opts.mac_prefix is not None or
1343 opts.master_netdev is not None or
1344 opts.master_netmask is not None or
1345 opts.use_external_mip_script is not None or
1346 opts.prealloc_wipe_disks is not None or
1347 opts.hv_state or
1348 opts.enabled_disk_templates or
1349 opts.disk_state or
1350 opts.ipolicy_bounds_specs is not None or
1351 opts.ipolicy_std_specs is not None or
1352 opts.ipolicy_disk_templates is not None or
1353 opts.ipolicy_vcpu_ratio is not None or
1354 opts.ipolicy_spindle_ratio is not None or
1355 opts.modify_etc_hosts is not None or
1356 opts.file_storage_dir is not None or
1357 opts.install_image is not None or
1358 opts.instance_communication_network is not None or
1359 opts.zeroing_image is not None or
1360 opts.shared_file_storage_dir is not None or
1361 opts.compression_tools is not None or
1362 opts.shared_file_storage_dir is not None or
1363 opts.enabled_user_shutdown is not None or
1364 opts.data_collector_interval or
1365 opts.enabled_data_collectors):
1366 ToStderr("Please give at least one of the parameters.")
1367 return 1
1368
1369 enabled_disk_templates = _GetEnabledDiskTemplates(opts)
1370 vg_name = _GetVgName(opts, enabled_disk_templates)
1371
1372 try:
1373 drbd_helper = _GetDrbdHelper(opts, enabled_disk_templates)
1374 except errors.OpPrereqError, e:
1375 ToStderr(str(e))
1376 return 1
1377
1378 hvlist = opts.enabled_hypervisors
1379 if hvlist is not None:
1380 hvlist = hvlist.split(",")
1381
1382 # a list of (name, dict) we can pass directly to dict() (or [])
1383 hvparams = dict(opts.hvparams)
1384 for hv_params in hvparams.values():
1385 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1386
1387 diskparams = dict(opts.diskparams)
1388
1389 for dt_params in diskparams.values():
1390 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
1391
1392 beparams = opts.beparams
1393 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
1394
1395 nicparams = opts.nicparams
1396 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
1397
1398 ndparams = opts.ndparams
1399 if ndparams is not None:
1400 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
1401
1402 ipolicy = CreateIPolicyFromOpts(
1403 minmax_ispecs=opts.ipolicy_bounds_specs,
1404 std_ispecs=opts.ipolicy_std_specs,
1405 ipolicy_disk_templates=opts.ipolicy_disk_templates,
1406 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
1407 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
1408 )
1409
1410 mnh = opts.maintain_node_health
1411
1412 uid_pool = opts.uid_pool
1413 if uid_pool is not None:
1414 uid_pool = uidpool.ParseUidPool(uid_pool)
1415
1416 add_uids = opts.add_uids
1417 if add_uids is not None:
1418 add_uids = uidpool.ParseUidPool(add_uids)
1419
1420 remove_uids = opts.remove_uids
1421 if remove_uids is not None:
1422 remove_uids = uidpool.ParseUidPool(remove_uids)
1423
1424 if opts.reserved_lvs is not None:
1425 if opts.reserved_lvs == "":
1426 opts.reserved_lvs = []
1427 else:
1428 opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")
1429
1430 if opts.master_netmask is not None:
1431 try:
1432 opts.master_netmask = int(opts.master_netmask)
1433 except ValueError:
1434 ToStderr("The --master-netmask option expects an int parameter.")
1435 return 1
1436
1437 ext_ip_script = opts.use_external_mip_script
1438
1439 if opts.disk_state:
1440 disk_state = utils.FlatToDict(opts.disk_state)
1441 else:
1442 disk_state = {}
1443
1444 hv_state = dict(opts.hv_state)
1445
1446 compression_tools = _GetCompressionTools(opts)
1447
1448 enabled_data_collectors = dict(
1449 (k, v.lower().startswith("t"))
1450 for k, v in opts.enabled_data_collectors.items())
1451
1452 unrecognized_data_collectors = [
1453 k for k in enabled_data_collectors.keys()
1454 if k not in constants.DATA_COLLECTOR_NAMES]
1455 if unrecognized_data_collectors:
1456 ToStderr("Data collector names not recognized: %s" %
1457 ", ".join(unrecognized_data_collectors))
1458
1459 try:
1460 data_collector_interval = dict(
1461 (k, long(1e6 * float(v)))
1462 for (k, v) in opts.data_collector_interval.items())
1463 except ValueError:
1464 ToStderr("Can't transform all values to integers: {}".format(
1465 opts.data_collector_interval))
1466 return 1
1467 if any(v <= 0 for v in data_collector_interval):
1468 ToStderr("Some interval times where not above zero.")
1469 return 1
1470
1471 op = opcodes.OpClusterSetParams(
1472 vg_name=vg_name,
1473 drbd_helper=drbd_helper,
1474 enabled_hypervisors=hvlist,
1475 hvparams=hvparams,
1476 os_hvp=None,
1477 beparams=beparams,
1478 nicparams=nicparams,
1479 ndparams=ndparams,
1480 diskparams=diskparams,
1481 ipolicy=ipolicy,
1482 candidate_pool_size=opts.candidate_pool_size,
1483 max_running_jobs=opts.max_running_jobs,
1484 max_tracked_jobs=opts.max_tracked_jobs,
1485 maintain_node_health=mnh,
1486 modify_etc_hosts=opts.modify_etc_hosts,
1487 uid_pool=uid_pool,
1488 add_uids=add_uids,
1489 remove_uids=remove_uids,
1490 default_iallocator=opts.default_iallocator,
1491 default_iallocator_params=opts.default_iallocator_params,
1492 prealloc_wipe_disks=opts.prealloc_wipe_disks,
1493 mac_prefix=opts.mac_prefix,
1494 master_netdev=opts.master_netdev,
1495 master_netmask=opts.master_netmask,
1496 reserved_lvs=opts.reserved_lvs,
1497 use_external_mip_script=ext_ip_script,
1498 hv_state=hv_state,
1499 disk_state=disk_state,
1500 enabled_disk_templates=enabled_disk_templates,
1501 force=opts.force,
1502 file_storage_dir=opts.file_storage_dir,
1503 install_image=opts.install_image,
1504 instance_communication_network=opts.instance_communication_network,
1505 zeroing_image=opts.zeroing_image,
1506 shared_file_storage_dir=opts.shared_file_storage_dir,
1507 compression_tools=compression_tools,
1508 enabled_user_shutdown=opts.enabled_user_shutdown,
1509 enabled_data_collectors=enabled_data_collectors,
1510 data_collector_interval=data_collector_interval,
1511 )
1512 return base.GetResult(None, opts, SubmitOrSend(op, opts))
1513
1514
1515 def QueueOps(opts, args):
1516 """Queue operations.
1517
1518 @param opts: the command line options selected by the user
1519 @type args: list
1520 @param args: should contain only one element, the subcommand
1521 @rtype: int
1522 @return: the desired exit code
1523
1524 """
1525 command = args[0]
1526 client = GetClient()
1527 if command in ("drain", "undrain"):
1528 drain_flag = command == "drain"
1529 client.SetQueueDrainFlag(drain_flag)
1530 elif command == "info":
1531 result = client.QueryConfigValues(["drain_flag"])
1532 if result[0]:
1533 val = "set"
1534 else:
1535 val = "unset"
1536 ToStdout("The drain flag is %s" % val)
1537 else:
1538 raise errors.OpPrereqError("Command '%s' is not valid." % command,
1539 errors.ECODE_INVAL)
1540
1541 return 0
1542
1543
1544 def _ShowWatcherPause(until):
1545 if until is None or until < time.time():
1546 ToStdout("The watcher is not paused.")
1547 else:
1548 ToStdout("The watcher is paused until %s.", time.ctime(until))
1549
1550
1551 def WatcherOps(opts, args):
1552 """Watcher operations.
1553
1554 @param opts: the command line options selected by the user
1555 @type args: list
1556 @param args: should contain only one element, the subcommand
1557 @rtype: int
1558 @return: the desired exit code
1559
1560 """
1561 command = args[0]
1562 client = GetClient()
1563
1564 if command == "continue":
1565 client.SetWatcherPause(None)
1566 ToStdout("The watcher is no longer paused.")
1567
1568 elif command == "pause":
1569 if len(args) < 2:
1570 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1571
1572 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
1573 _ShowWatcherPause(result)
1574
1575 elif command == "info":
1576 result = client.QueryConfigValues(["watcher_pause"])
1577 _ShowWatcherPause(result[0])
1578
1579 else:
1580 raise errors.OpPrereqError("Command '%s' is not valid." % command,
1581 errors.ECODE_INVAL)
1582
1583 return 0
1584
1585
1586 def _OobPower(opts, node_list, power):
1587 """Puts the node in the list to desired power state.
1588
1589 @param opts: The command line options selected by the user
1590 @param node_list: The list of nodes to operate on
1591 @param power: True if they should be powered on, False otherwise
1592 @return: The success of the operation (none failed)
1593
1594 """
1595 if power:
1596 command = constants.OOB_POWER_ON
1597 else:
1598 command = constants.OOB_POWER_OFF
1599
1600 op = opcodes.OpOobCommand(node_names=node_list,
1601 command=command,
1602 ignore_status=True,
1603 timeout=opts.oob_timeout,
1604 power_delay=opts.power_delay)
1605 result = SubmitOpCode(op, opts=opts)
1606 errs = 0
1607 for node_result in result:
1608 (node_tuple, data_tuple) = node_result
1609 (_, node_name) = node_tuple
1610 (data_status, _) = data_tuple
1611 if data_status != constants.RS_NORMAL:
1612 assert data_status != constants.RS_UNAVAIL
1613 errs += 1
1614 ToStderr("There was a problem changing power for %s, please investigate",
1615 node_name)
1616
1617 if errs > 0:
1618 return False
1619
1620 return True
1621
1622
1623 def _InstanceStart(opts, inst_list, start, no_remember=False):
1624 """Puts the instances in the list to desired state.
1625
1626 @param opts: The command line options selected by the user
1627 @param inst_list: The list of instances to operate on
1628 @param start: True if they should be started, False for shutdown
1629 @param no_remember: If the instance state should be remembered
1630 @return: The success of the operation (none failed)
1631
1632 """
1633 if start:
1634 opcls = opcodes.OpInstanceStartup
1635 text_submit, text_success, text_failed = ("startup", "started", "starting")
1636 else:
1637 opcls = compat.partial(opcodes.OpInstanceShutdown,
1638 timeout=opts.shutdown_timeout,
1639 no_remember=no_remember)
1640 text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping")
1641
1642 jex = JobExecutor(opts=opts)
1643
1644 for inst in inst_list:
1645 ToStdout("Submit %s of instance %s", text_submit, inst)
1646 op = opcls(instance_name=inst)
1647 jex.QueueJob(inst, op)
1648
1649 results = jex.GetResults()
1650 bad_cnt = len([1 for (success, _) in results if not success])
1651
1652 if bad_cnt == 0:
1653 ToStdout("All instances have been %s successfully", text_success)
1654 else:
1655 ToStderr("There were errors while %s instances:\n"
1656 "%d error(s) out of %d instance(s)", text_failed, bad_cnt,
1657 len(results))
1658 return False
1659
1660 return True
1661
1662
1663 class _RunWhenNodesReachableHelper(object):
1664 """Helper class to make shared internal state sharing easier.
1665
1666 @ivar success: Indicates if all action_cb calls were successful
1667
1668 """
1669 def __init__(self, node_list, action_cb, node2ip, port, feedback_fn,
1670 _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1671 """Init the object.
1672
1673 @param node_list: The list of nodes to be reachable
1674 @param action_cb: Callback called when a new host is reachable
1675 @type node2ip: dict
1676 @param node2ip: Node to ip mapping
1677 @param port: The port to use for the TCP ping
1678 @param feedback_fn: The function used for feedback
1679 @param _ping_fn: Function to check reachabilty (for unittest use only)
1680 @param _sleep_fn: Function to sleep (for unittest use only)
1681
1682 """
1683 self.down = set(node_list)
1684 self.up = set()
1685 self.node2ip = node2ip
1686 self.success = True
1687 self.action_cb = action_cb
1688 self.port = port
1689 self.feedback_fn = feedback_fn
1690 self._ping_fn = _ping_fn
1691 self._sleep_fn = _sleep_fn
1692
1693 def __call__(self):
1694 """When called we run action_cb.
1695
1696 @raises utils.RetryAgain: When there are still down nodes
1697
1698 """
1699 if not self.action_cb(self.up):
1700 self.success = False
1701
1702 if self.down:
1703 raise utils.RetryAgain()
1704 else:
1705 return self.success
1706
1707 def Wait(self, secs):
1708 """Checks if a host is up or waits remaining seconds.
1709
1710 @param secs: The secs remaining
1711
1712 """
1713 start = time.time()
1714 for node in self.down:
1715 if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT,
1716 live_port_needed=True):
1717 self.feedback_fn("Node %s became available" % node)
1718 self.up.add(node)
1719 self.down -= self.up
1720 # If we have a node available there is the possibility to run the
1721 # action callback successfully, therefore we don't wait and return
1722 return
1723
1724 self._sleep_fn(max(0.0, start + secs - time.time()))
1725
1726
1727 def _RunWhenNodesReachable(node_list, action_cb, interval):
1728 """Run action_cb when nodes become reachable.
1729
1730 @param node_list: The list of nodes to be reachable
1731 @param action_cb: Callback called when a new host is reachable
1732 @param interval: The earliest time to retry
1733
1734 """
1735 client = GetClient()
1736 cluster_info = client.QueryClusterInfo()
1737 if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
1738 family = netutils.IPAddress.family
1739 else:
1740 family = netutils.IP6Address.family
1741
1742 node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
1743 for node in node_list)
1744
1745 port = netutils.GetDaemonPort(constants.NODED)
1746 helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port,
1747 ToStdout)
1748
1749 try:
1750 return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
1751 wait_fn=helper.Wait)
1752 except utils.RetryTimeout:
1753 ToStderr("Time exceeded while waiting for nodes to become reachable"
1754 " again:\n - %s", " - ".join(helper.down))
1755 return False
1756
1757
1758 def _MaybeInstanceStartup(opts, inst_map, nodes_online,
1759 _instance_start_fn=_InstanceStart):
1760 """Start the instances conditional based on node_states.
1761
1762 @param opts: The command line options selected by the user
1763 @param inst_map: A dict of inst -> nodes mapping
1764 @param nodes_online: A list of nodes online
1765 @param _instance_start_fn: Callback to start instances (unittest use only)
1766 @return: Success of the operation on all instances
1767
1768 """
1769 start_inst_list = []
1770 for (inst, nodes) in inst_map.items():
1771 if not (nodes - nodes_online):
1772 # All nodes the instance lives on are back online
1773 start_inst_list.append(inst)
1774
1775 for inst in start_inst_list:
1776 del inst_map[inst]
1777
1778 if start_inst_list:
1779 return _instance_start_fn(opts, start_inst_list, True)
1780
1781 return True
1782
1783
1784 def _EpoOn(opts, full_node_list, node_list, inst_map):
1785 """Does the actual power on.
1786
1787 @param opts: The command line options selected by the user
1788 @param full_node_list: All nodes to operate on (includes nodes not supporting
1789 OOB)
1790 @param node_list: The list of nodes to operate on (all need to support OOB)
1791 @param inst_map: A dict of inst -> nodes mapping
1792 @return: The desired exit status
1793
1794 """
1795 if node_list and not _OobPower(opts, node_list, False):
1796 ToStderr("Not all nodes seem to get back up, investigate and start"
1797 " manually if needed")
1798
1799 # Wait for the nodes to be back up
1800 action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
1801
1802 ToStdout("Waiting until all nodes are available again")
1803 if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL):
1804 ToStderr("Please investigate and start stopped instances manually")
1805 return constants.EXIT_FAILURE
1806
1807 return constants.EXIT_SUCCESS
1808
1809
1810 def _EpoOff(opts, node_list, inst_map):
1811 """Does the actual power off.
1812
1813 @param opts: The command line options selected by the user
1814 @param node_list: The list of nodes to operate on (all need to support OOB)
1815 @param inst_map: A dict of inst -> nodes mapping
1816 @return: The desired exit status
1817
1818 """
1819 if not _InstanceStart(opts, inst_map.keys(), False, no_remember=True):
1820 ToStderr("Please investigate and stop instances manually before continuing")
1821 return constants.EXIT_FAILURE
1822
1823 if not node_list:
1824 return constants.EXIT_SUCCESS
1825
1826 if _OobPower(opts, node_list, False):
1827 return constants.EXIT_SUCCESS
1828 else:
1829 return constants.EXIT_FAILURE
1830
1831
1832 def Epo(opts, args, qcl=None, _on_fn=_EpoOn, _off_fn=_EpoOff,
1833 _confirm_fn=ConfirmOperation,
1834 _stdout_fn=ToStdout, _stderr_fn=ToStderr):
1835 """EPO operations.
1836
1837 @param opts: the command line options selected by the user
1838 @type args: list
1839 @param args: should contain only one element, the subcommand
1840 @rtype: int
1841 @return: the desired exit code
1842
1843 """
1844 if opts.groups and opts.show_all:
1845 _stderr_fn("Only one of --groups or --all are allowed")
1846 return constants.EXIT_FAILURE
1847 elif args and opts.show_all:
1848 _stderr_fn("Arguments in combination with --all are not allowed")
1849 return constants.EXIT_FAILURE
1850
1851 if qcl is None:
1852 # Query client
1853 qcl = GetClient()
1854
1855 if opts.groups:
1856 node_query_list = \
1857 itertools.chain(*qcl.QueryGroups(args, ["node_list"], False))
1858 else:
1859 node_query_list = args
1860
1861 result = qcl.QueryNodes(node_query_list, ["name", "master", "pinst_list",
1862 "sinst_list", "powered", "offline"],
1863 False)
1864
1865 all_nodes = map(compat.fst, result)
1866 node_list = []
1867 inst_map = {}
1868 for (node, master, pinsts, sinsts, powered, offline) in result:
1869 if not offline:
1870 for inst in (pinsts + sinsts):
1871 if inst in inst_map:
1872 if not master:
1873 inst_map[inst].add(node)
1874 elif master:
1875 inst_map[inst] = set()
1876 else:
1877 inst_map[inst] = set([node])
1878
1879 if master and opts.on:
1880 # We ignore the master for turning on the machines, in fact we are
1881 # already operating on the master at this point :)
1882 continue
1883 elif master and not opts.show_all:
1884 _stderr_fn("%s is the master node, please do a master-failover to another"
1885 " node not affected by the EPO or use --all if you intend to"
1886 " shutdown the whole cluster", node)
1887 return constants.EXIT_FAILURE
1888 elif powered is None:
1889 _stdout_fn("Node %s does not support out-of-band handling, it can not be"
1890 " handled in a fully automated manner", node)
1891 elif powered == opts.on:
1892 _stdout_fn("Node %s is already in desired power state, skipping", node)
1893 elif not offline or (offline and powered):
1894 node_list.append(node)
1895
1896 if not (opts.force or _confirm_fn(all_nodes, "nodes", "epo")):
1897 return constants.EXIT_FAILURE
1898
1899 if opts.on:
1900 return _on_fn(opts, all_nodes, node_list, inst_map)
1901 else:
1902 return _off_fn(opts, node_list, inst_map)
1903
1904
1905 def _GetCreateCommand(info):
1906 buf = StringIO()
1907 buf.write("gnt-cluster init")
1908 PrintIPolicyCommand(buf, info["ipolicy"], False)
1909 buf.write(" ")
1910 buf.write(info["name"])
1911 return buf.getvalue()
1912
1913
1914 def ShowCreateCommand(opts, args):
1915 """Shows the command that can be used to re-create the cluster.
1916
1917 Currently it works only for ipolicy specs.
1918
1919 """
1920 cl = GetClient()
1921 result = cl.QueryClusterInfo()
1922 ToStdout(_GetCreateCommand(result))
1923
1924
1925 def _RunCommandAndReport(cmd):
1926 """Run a command and report its output, iff it failed.
1927
1928 @param cmd: the command to execute
1929 @type cmd: list
1930 @rtype: bool
1931 @return: False, if the execution failed.
1932
1933 """
1934 result = utils.RunCmd(cmd)
1935 if result.failed:
1936 ToStderr("Command %s failed: %s; Output %s" %
1937 (cmd, result.fail_reason, result.output))
1938 return False
1939 return True
1940
1941
1942 def _VerifyCommand(cmd):
1943 """Verify that a given command succeeds on all online nodes.
1944
1945 As this function is intended to run during upgrades, it
1946 is implemented in such a way that it still works, if all Ganeti
1947 daemons are down.
1948
1949 @param cmd: the command to execute
1950 @type cmd: list
1951 @rtype: list
1952 @return: the list of node names that are online where
1953 the command failed.
1954
1955 """
1956 command = utils.text.ShellQuoteArgs([str(val) for val in cmd])
1957
1958 nodes = ssconf.SimpleStore().GetOnlineNodeList()
1959 master_node = ssconf.SimpleStore().GetMasterNode()
1960 cluster_name = ssconf.SimpleStore().GetClusterName()
1961
1962 # If master node is in 'nodes', make sure master node is at list end
1963 if master_node in nodes:
1964 nodes.remove(master_node)
1965 nodes.append(master_node)
1966
1967 failed = []
1968
1969 srun = ssh.SshRunner(cluster_name=cluster_name)
1970 for name in nodes:
1971 result = srun.Run(name, constants.SSH_LOGIN_USER, command)
1972 if result.exit_code != 0:
1973 failed.append(name)
1974
1975 return failed
1976
1977
1978 def _VerifyVersionInstalled(versionstring):
1979 """Verify that the given version of ganeti is installed on all online nodes.
1980
1981 Do nothing, if this is the case, otherwise print an appropriate
1982 message to stderr.
1983
1984 @param versionstring: the version to check for
1985 @type versionstring: string
1986 @rtype: bool
1987 @return: True, if the version is installed on all online nodes
1988
1989 """
1990 badnodes = _VerifyCommand(["test", "-d",
1991 os.path.join(pathutils.PKGLIBDIR, versionstring)])
1992 if badnodes:
1993 ToStderr("Ganeti version %s not installed on nodes %s"
1994 % (versionstring, ", ".join(badnodes)))
1995 return False
1996
1997 return True
1998
1999
2000 def _GetRunning():
2001 """Determine the list of running jobs.
2002
2003 @rtype: list
2004 @return: the number of jobs still running
2005
2006 """
2007 cl = GetClient()
2008 qfilter = qlang.MakeSimpleFilter("status",
2009 frozenset([constants.JOB_STATUS_RUNNING]))
2010 return len(cl.Query(constants.QR_JOB, [], qfilter).data)
2011
2012
2013 def _SetGanetiVersion(versionstring):
2014 """Set the active version of ganeti to the given versionstring
2015
2016 @type versionstring: string
2017 @rtype: list
2018 @return: the list of nodes where the version change failed
2019
2020 """
2021 failed = []
2022 if constants.HAS_GNU_LN:
2023 failed.extend(_VerifyCommand(
2024 ["ln", "-s", "-f", "-T",
2025 os.path.join(pathutils.PKGLIBDIR, versionstring),
2026 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2027 failed.extend(_VerifyCommand(
2028 ["ln", "-s", "-f", "-T",
2029 os.path.join(pathutils.SHAREDIR, versionstring),
2030 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2031 else:
2032 failed.extend(_VerifyCommand(
2033 ["rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2034 failed.extend(_VerifyCommand(
2035 ["ln", "-s", "-f", os.path.join(pathutils.PKGLIBDIR, versionstring),
2036 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2037 failed.extend(_VerifyCommand(
2038 ["rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2039 failed.extend(_VerifyCommand(
2040 ["ln", "-s", "-f", os.path.join(pathutils.SHAREDIR, versionstring),
2041 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2042 return list(set(failed))
2043
2044
2045 def _ExecuteCommands(fns):
2046 """Execute a list of functions, in reverse order.
2047
2048 @type fns: list of functions.
2049 @param fns: the functions to be executed.
2050
2051 """
2052 for fn in reversed(fns):
2053 fn()
2054
2055
2056 def _GetConfigVersion():
2057 """Determine the version the configuration file currently has.
2058
2059 @rtype: tuple or None
2060 @return: (major, minor, revision) if the version can be determined,
2061 None otherwise
2062
2063 """
2064 config_data = serializer.LoadJson(utils.ReadFile(pathutils.CLUSTER_CONF_FILE))
2065 try:
2066 config_version = config_data["version"]
2067 except KeyError:
2068 return None
2069 return utils.SplitVersion(config_version)
2070
2071
2072 def _ReadIntentToUpgrade():
2073 """Read the file documenting the intent to upgrade the cluster.
2074
2075 @rtype: (string, string) or (None, None)
2076 @return: (old version, version to upgrade to), if the file exists,
2077 and (None, None) otherwise.
2078
2079 """
2080 if not os.path.isfile(pathutils.INTENT_TO_UPGRADE):
2081 return (None, None)
2082
2083 contentstring = utils.ReadFile(pathutils.INTENT_TO_UPGRADE)
2084 contents = utils.UnescapeAndSplit(contentstring)
2085 if len(contents) != 3:
2086 # file syntactically mal-formed
2087 return (None, None)
2088 return (contents[0], contents[1])
2089
2090
2091 def _WriteIntentToUpgrade(version):
2092 """Write file documenting the intent to upgrade the cluster.
2093
2094 @type version: string
2095 @param version: the version we intent to upgrade to
2096
2097 """
2098 utils.WriteFile(pathutils.INTENT_TO_UPGRADE,
2099 data=utils.EscapeAndJoin([constants.RELEASE_VERSION, version,
2100 "%d" % os.getpid()]))
2101
2102
2103 def _UpgradeBeforeConfigurationChange(versionstring):
2104 """
2105 Carry out all the tasks necessary for an upgrade that happen before
2106 the configuration file, or Ganeti version, changes.
2107
2108 @type versionstring: string
2109 @param versionstring: the version to upgrade to
2110 @rtype: (bool, list)
2111 @return: tuple of a bool indicating success and a list of rollback tasks
2112
2113 """
2114 rollback = []
2115
2116 if not _VerifyVersionInstalled(versionstring):
2117 return (False, rollback)
2118
2119 _WriteIntentToUpgrade(versionstring)
2120 rollback.append(
2121 lambda: utils.RunCmd(["rm", "-f", pathutils.INTENT_TO_UPGRADE]))
2122
2123 ToStdout("Draining queue")
2124 client = GetClient()
2125 client.SetQueueDrainFlag(True)
2126
2127 rollback.append(lambda: GetClient().SetQueueDrainFlag(False))
2128
2129 if utils.SimpleRetry(0, _GetRunning,
2130 constants.UPGRADE_QUEUE_POLL_INTERVAL,
2131 constants.UPGRADE_QUEUE_DRAIN_TIMEOUT):
2132 ToStderr("Failed to completely empty the queue.")
2133 return (False, rollback)
2134
2135 ToStdout("Pausing the watcher for one hour.")
2136 rollback.append(lambda: GetClient().SetWatcherPause(None))
2137 GetClient().SetWatcherPause(time.time() + 60 * 60)
2138
2139 ToStdout("Stopping daemons on master node.")
2140 if not _RunCommandAndReport([pathutils.DAEMON_UTIL, "stop-all"]):
2141 return (False, rollback)
2142
2143 if not _VerifyVersionInstalled(versionstring):
2144 utils.RunCmd([pathutils.DAEMON_UTIL, "start-all"])
2145 return (False, rollback)
2146
2147 ToStdout("Stopping daemons everywhere.")
2148 rollback.append(lambda: _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]))
2149 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"])
2150 if badnodes:
2151 ToStderr("Failed to stop daemons on %s." % (", ".join(badnodes),))
2152 return (False, rollback)
2153
2154 backuptar = os.path.join(pathutils.BACKUP_DIR, "ganeti%d.tar" % time.time())
2155 ToStdout("Backing up configuration as %s" % backuptar)
2156 if not _RunCommandAndReport(["mkdir", "-p", pathutils.BACKUP_DIR]):
2157 return (False, rollback)
2158
2159 # Create the archive in a safe manner, as it contains sensitive
2160 # information.
2161 (_, tmp_name) = tempfile.mkstemp(prefix=backuptar, dir=pathutils.BACKUP_DIR)
2162 if not _RunCommandAndReport(["tar", "-cf", tmp_name,
2163 "--exclude=queue/archive",
2164 pathutils.DATA_DIR]):
2165 return (False, rollback)
2166
2167 os.rename(tmp_name, backuptar)
2168 return (True, rollback)
2169
2170
2171 def _VersionSpecificDowngrade():
2172 """
2173 Perform any additional downrade tasks that are version specific
2174 and need to be done just after the configuration downgrade. This
2175 function needs to be idempotent, so that it can be redone if the
2176 downgrade procedure gets interrupted after changing the
2177 configuration.
2178
2179 Note that this function has to be reset with every version bump.
2180
2181 @return: True upon success
2182 """
2183 ToStdout("Performing version-specific downgrade tasks.")
2184
2185 return True
2186
2187
2188 def _SwitchVersionAndConfig(versionstring, downgrade):
2189 """
2190 Switch to the new Ganeti version and change the configuration,
2191 in correct order.
2192
2193 @type versionstring: string
2194 @param versionstring: the version to change to
2195 @type downgrade: bool
2196 @param downgrade: True, if the configuration should be downgraded
2197 @rtype: (bool, list)
2198 @return: tupe of a bool indicating success, and a list of
2199 additional rollback tasks
2200
2201 """
2202 rollback = []
2203 if downgrade:
2204 ToStdout("Downgrading configuration")
2205 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "--downgrade", "-f"]):
2206 return (False, rollback)
2207 # Note: version specific downgrades need to be done before switching
2208 # binaries, so that we still have the knowledgeable binary if the downgrade
2209 # process gets interrupted at this point.
2210 if not _VersionSpecificDowngrade():
2211 return (False, rollback)
2212
2213 # Configuration change is the point of no return. From then onwards, it is
2214 # safer to push through the up/dowgrade than to try to roll it back.
2215
2216 ToStdout("Switching to version %s on all nodes" % versionstring)
2217 rollback.append(lambda: _SetGanetiVersion(constants.DIR_VERSION))
2218 badnodes = _SetGanetiVersion(versionstring)
2219 if badnodes:
2220 ToStderr("Failed to switch to Ganeti version %s on nodes %s"
2221 % (versionstring, ", ".join(badnodes)))
2222 if not downgrade:
2223 return (False, rollback)
2224
2225 # Now that we have changed to the new version of Ganeti we should
2226 # not communicate over luxi any more, as luxi might have changed in
2227 # incompatible ways. Therefore, manually call the corresponding ganeti
2228 # commands using their canonical (version independent) path.
2229
2230 if not downgrade:
2231 ToStdout("Upgrading configuration")
2232 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "-f"]):
2233 return (False, rollback)
2234
2235 return (True, rollback)
2236
2237
2238 def _UpgradeAfterConfigurationChange(oldversion):
2239 """
2240 Carry out the upgrade actions necessary after switching to the new
2241 Ganeti version and updating the configuration.
2242
2243 As this part is run at a time where the new version of Ganeti is already
2244 running, no communication should happen via luxi, as this is not a stable
2245 interface. Also, as the configuration change is the point of no return,
2246 all actions are pushed trough, even if some of them fail.
2247
2248 @param oldversion: the version the upgrade started from
2249 @type oldversion: string
2250 @rtype: int
2251 @return: the intended return value
2252
2253 """
2254 returnvalue = 0
2255
2256 ToStdout("Ensuring directories everywhere.")
2257 badnodes = _VerifyCommand([pathutils.ENSURE_DIRS])
2258 if badnodes:
2259 ToStderr("Warning: failed to ensure directories on %s." %
2260 (", ".join(badnodes)))
2261 returnvalue = 1
2262
2263 ToStdout("Starting daemons everywhere.")
2264 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])
2265 if badnodes:
2266 ToStderr("Warning: failed to start daemons on %s." % (", ".join(badnodes),))
2267 returnvalue = 1
2268
2269 ToStdout("Redistributing the configuration.")
2270 if not _RunCommandAndReport(["gnt-cluster", "redist-conf", "--yes-do-it"]):
2271 returnvalue = 1
2272
2273 ToStdout("Restarting daemons everywhere.")
2274 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"])
2275 badnodes.extend(_VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]))
2276 if badnodes:
2277 ToStderr("Warning: failed to start daemons on %s." %
2278 (", ".join(list(set(badnodes))),))
2279 returnvalue = 1
2280
2281 ToStdout("Undraining the queue.")
2282 if not _RunCommandAndReport(["gnt-cluster", "queue", "undrain"]):
2283 returnvalue = 1
2284
2285 _RunCommandAndReport(["rm", "-f", pathutils.INTENT_TO_UPGRADE])
2286
2287 ToStdout("Running post-upgrade hooks")
2288 if not _RunCommandAndReport([pathutils.POST_UPGRADE, oldversion]):
2289 returnvalue = 1
2290
2291 ToStdout("Unpausing the watcher.")
2292 if not _RunCommandAndReport(["gnt-cluster", "watcher", "continue"]):
2293 returnvalue = 1
2294
2295 ToStdout("Verifying cluster.")
2296 if not _RunCommandAndReport(["gnt-cluster", "verify"]):
2297 returnvalue = 1
2298
2299 return returnvalue
2300
2301
2302 def UpgradeGanetiCommand(opts, args):
2303 """Upgrade a cluster to a new ganeti version.
2304
2305 @param opts: the command line options selected by the user
2306 @type args: list
2307 @param args: should be an empty list
2308 @rtype: int
2309 @return: the desired exit code
2310
2311 """
2312 if ((not opts.resume and opts.to is None)
2313 or (opts.resume and opts.to is not None)):
2314 ToStderr("Precisely one of the options --to and --resume"
2315 " has to be given")
2316 return 1
2317
2318 # If we're not told to resume, verify there is no upgrade
2319 # in progress.
2320 if not opts.resume:
2321 oldversion, versionstring = _ReadIntentToUpgrade()
2322 if versionstring is not None:
2323 # An upgrade is going on; verify whether the target matches
2324 if versionstring == opts.to:
2325 ToStderr("An upgrade is already in progress. Target version matches,"
2326 " resuming.")
2327 opts.resume = True
2328 opts.to = None
2329 else:
2330 ToStderr("An upgrade from %s to %s is in progress; use --resume to"
2331 " finish it first" % (oldversion, versionstring))
2332 return 1
2333
2334 oldversion = constants.RELEASE_VERSION
2335
2336 if opts.resume:
2337 ssconf.CheckMaster(False)
2338 oldversion, versionstring = _ReadIntentToUpgrade()
2339 if versionstring is None:
2340 return 0
2341 version = utils.version.ParseVersion(versionstring)
2342 if version is None:
2343 return 1
2344 configversion = _GetConfigVersion()
2345 if configversion is None:
2346 return 1
2347 # If the upgrade we resume was an upgrade between compatible
2348 # versions (like 2.10.0 to 2.10.1), the correct configversion
2349 # does not guarantee that the config has been updated.
2350 # However, in the case of a compatible update with the configuration
2351 # not touched, we are running a different dirversion with the same
2352 # config version.
2353 config_already_modified = \
2354 (utils.IsCorrectConfigVersion(version, configversion) and
2355 not (versionstring != constants.DIR_VERSION and
2356 configversion == (constants.CONFIG_MAJOR, constants.CONFIG_MINOR,
2357 constants.CONFIG_REVISION)))
2358 if not config_already_modified:
2359 # We have to start from the beginning; however, some daemons might have
2360 # already been stopped, so the only way to get into a well-defined state
2361 # is by starting all daemons again.
2362 _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])
2363 else:
2364 versionstring = opts.to
2365 config_already_modified = False
2366 version = utils.version.ParseVersion(versionstring)
2367 if version is None:
2368 ToStderr("Could not parse version string %s" % versionstring)
2369 return 1
2370
2371 msg = utils.version.UpgradeRange(version)
2372 if msg is not None:
2373 ToStderr("Cannot upgrade to %s: %s" % (versionstring, msg))
2374 return 1
2375
2376 if not config_already_modified:
2377 success, rollback = _UpgradeBeforeConfigurationChange(versionstring)
2378 if not success:
2379 _ExecuteCommands(rollback)
2380 return 1
2381 else:
2382 rollback = []
2383
2384 downgrade = utils.version.ShouldCfgdowngrade(version)
2385
2386 success, additionalrollback = \
2387 _SwitchVersionAndConfig(versionstring, downgrade)
2388 if not success:
2389 rollback.extend(additionalrollback)
2390 _ExecuteCommands(rollback)
2391 return 1
2392
2393 return _UpgradeAfterConfigurationChange(oldversion)
2394
2395
2396 commands = {
2397 "init": (
2398 InitCluster, [ArgHost(min=1, max=1)],
2399 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
2400 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT,
2401 NIC_PARAMS_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT,
2402 SECONDARY_IP_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT,
2403 DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT, DEFAULT_IALLOCATOR_PARAMS_OPT,
2404 PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT,
2405 GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT,
2406 HV_STATE_OPT, DISK_STATE_OPT, ENABLED_DISK_TEMPLATES_OPT,
2407 IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, INSTALL_IMAGE_OPT,
2408 ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT,
2409 ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT,
2410 ]
2411 + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS,
2412 "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
2413 "destroy": (
2414 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
2415 "", "Destroy cluster"),
2416 "rename": (
2417 RenameCluster, [ArgHost(min=1, max=1)],
2418 [FORCE_OPT, DRY_RUN_OPT],
2419 "<new_name>",
2420 "Renames the cluster"),
2421 "redist-conf": (
2422 RedistributeConfig, ARGS_NONE, SUBMIT_OPTS +
2423 [DRY_RUN_OPT, PRIORITY_OPT, FORCE_DISTRIBUTION],
2424 "", "Forces a push of the configuration file and ssconf files"
2425 " to the nodes in the cluster"),
2426 "verify": (
2427 VerifyCluster, ARGS_NONE,
2428 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
2429 DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT,
2430 VERIFY_CLUTTER_OPT],
2431 "", "Does a check on the cluster configuration"),
2432 "verify-disks": (
2433 VerifyDisks, ARGS_NONE, [PRIORITY_OPT, NODEGROUP_OPT],
2434 "", "Does a check on the cluster disk status"),
2435 "repair-disk-sizes": (
2436 RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT],
2437 "[instance...]", "Updates mismatches in recorded disk sizes"),
2438 "master-failover": (
2439 MasterFailover, ARGS_NONE, [NOVOTING_OPT, FORCE_FAILOVER],
2440 "", "Makes the current node the master"),
2441 "master-ping": (
2442 MasterPing, ARGS_NONE, [],
2443 "", "Checks if the master is alive"),
2444 "version": (
2445 ShowClusterVersion, ARGS_NONE, [],
2446 "", "Shows the cluster version"),
2447 "getmaster": (
2448 ShowClusterMaster, ARGS_NONE, [],
2449 "", "Shows the cluster master"),
2450 "copyfile": (
2451 ClusterCopyFile, [ArgFile(min=1, max=1)],
2452 [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT],
2453 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
2454 "command": (
2455 RunClusterCommand, [ArgCommand(min=1)],
2456 [NODE_LIST_OPT, NODEGROUP_OPT, SHOW_MACHINE_OPT, FAILURE_ONLY_OPT],
2457 "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
2458 "info": (
2459 ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
2460 "[--roman]", "Show cluster configuration"),
2461 "list-tags": (
2462 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
2463 "add-tags": (
2464 AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
2465 "tag...", "Add tags to the cluster"),
2466 "remove-tags": (
2467 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
2468 "tag...", "Remove tags from the cluster"),
2469 "search-tags": (
2470 SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "",
2471 "Searches the tags on all objects on"
2472 " the cluster for a given pattern (regex)"),
2473 "queue": (
2474 QueueOps,
2475 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
2476 [], "drain|undrain|info", "Change queue properties"),
2477 "watcher": (
2478 WatcherOps,
2479 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
2480 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
2481 [],
2482 "{pause <timespec>|continue|info}", "Change watcher properties"),
2483 "modify": (
2484 SetClusterParams, ARGS_NONE,
2485 [FORCE_OPT,
2486 BACKEND_OPT, CP_SIZE_OPT, RQL_OPT, MAX_TRACK_OPT, INSTALL_IMAGE_OPT,
2487 INSTANCE_COMMUNICATION_NETWORK_OPT, ENABLED_HV_OPT, HVLIST_OPT,
2488 MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, NIC_PARAMS_OPT,
2489 VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT,
2490 REMOVE_UIDS_OPT, DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT,
2491 DEFAULT_IALLOCATOR_PARAMS_OPT, RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT,
2492 PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT,
2493 DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS +
2494 [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT,
2495 ENABLED_USER_SHUTDOWN_OPT] +
2496 INSTANCE_POLICY_OPTS +
2497 [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
2498 COMPRESSION_TOOLS_OPT] +
2499 [ENABLED_DATA_COLLECTORS_OPT, DATA_COLLECTOR_INTERVAL_OPT],
2500 "[opts...]",
2501 "Alters the parameters of the cluster"),
2502 "renew-crypto": (
2503 RenewCrypto, ARGS_NONE,
2504 [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
2505 NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
2506 NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
2507 NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT,
2508 NEW_NODE_CERT_OPT, NEW_SSH_KEY_OPT, NOSSH_KEYCHECK_OPT,
2509 VERBOSE_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT],
2510 "[opts...]",
2511 "Renews cluster certificates, keys and secrets"),
2512 "epo": (
2513 Epo, [ArgUnknown()],
2514 [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT,
2515 SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT],
2516 "[opts...] [args]",
2517 "Performs an emergency power-off on given args"),
2518 "activate-master-ip": (
2519 ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"),
2520 "deactivate-master-ip": (
2521 DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "",
2522 "Deactivates the master IP"),
2523 "show-ispecs-cmd": (
2524 ShowCreateCommand, ARGS_NONE, [], "",
2525 "Show the command line to re-create the cluster"),
2526 "upgrade": (
2527 UpgradeGanetiCommand, ARGS_NONE, [TO_OPT, RESUME_OPT], "",
2528 "Upgrade (or downgrade) to a new Ganeti version"),
2529 }
2530
2531
2532 #: dictionary with aliases for commands
2533 aliases = {
2534 "masterfailover": "master-failover",
2535 "show": "info",
2536 }
2537
2538
2539 def Main():
2540 return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER},
2541 aliases=aliases)