Fail early for invalid key type and size combinations
[ganeti-github.git] / lib / client / gnt_cluster.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """Cluster related commands"""
31
32 # pylint: disable=W0401,W0613,W0614,C0103
33 # W0401: Wildcard import ganeti.cli
34 # W0613: Unused argument, since all functions follow the same API
35 # W0614: Unused import %s from wildcard import (since we need cli)
36 # C0103: Invalid name gnt-cluster
37
38 from cStringIO import StringIO
39 import os
40 import time
41 import OpenSSL
42 import tempfile
43 import itertools
44
45 from ganeti.cli import *
46 from ganeti import bootstrap
47 from ganeti import compat
48 from ganeti import constants
49 from ganeti import config
50 from ganeti import errors
51 from ganeti import netutils
52 from ganeti import objects
53 from ganeti import opcodes
54 from ganeti import pathutils
55 from ganeti import qlang
56 from ganeti import serializer
57 from ganeti import ssconf
58 from ganeti import ssh
59 from ganeti import uidpool
60 from ganeti import utils
61 from ganeti.client import base
62
63
64 ON_OPT = cli_option("--on", default=False,
65 action="store_true", dest="on",
66 help="Recover from an EPO")
67
68 GROUPS_OPT = cli_option("--groups", default=False,
69 action="store_true", dest="groups",
70 help="Arguments are node groups instead of nodes")
71
72 FORCE_FAILOVER = cli_option("--yes-do-it", dest="yes_do_it",
73 help="Override interactive check for --no-voting",
74 default=False, action="store_true")
75
76 FORCE_DISTRIBUTION = cli_option("--yes-do-it", dest="yes_do_it",
77 help="Unconditionally distribute the"
78 " configuration, even if the queue"
79 " is drained",
80 default=False, action="store_true")
81
82 TO_OPT = cli_option("--to", default=None, type="string",
83 help="The Ganeti version to upgrade to")
84
85 RESUME_OPT = cli_option("--resume", default=False, action="store_true",
86 help="Resume any pending Ganeti upgrades")
87
88 DATA_COLLECTOR_INTERVAL_OPT = cli_option(
89 "--data-collector-interval", default={}, type="keyval",
90 help="Set collection intervals in seconds of data collectors.")
91
92 _EPO_PING_INTERVAL = 30 # 30 seconds between pings
93 _EPO_PING_TIMEOUT = 1 # 1 second
94 _EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
95
96
97 def _InitEnabledDiskTemplates(opts):
98 """Initialize the list of enabled disk templates.
99
100 """
101 if opts.enabled_disk_templates:
102 return opts.enabled_disk_templates.split(",")
103 else:
104 return constants.DEFAULT_ENABLED_DISK_TEMPLATES
105
106
107 def _InitVgName(opts, enabled_disk_templates):
108 """Initialize the volume group name.
109
110 @type enabled_disk_templates: list of strings
111 @param enabled_disk_templates: cluster-wide enabled disk templates
112
113 """
114 vg_name = None
115 if opts.vg_name is not None:
116 vg_name = opts.vg_name
117 if vg_name:
118 if not utils.IsLvmEnabled(enabled_disk_templates):
119 ToStdout("You specified a volume group with --vg-name, but you did not"
120 " enable any disk template that uses lvm.")
121 elif utils.IsLvmEnabled(enabled_disk_templates):
122 raise errors.OpPrereqError(
123 "LVM disk templates are enabled, but vg name not set.")
124 elif utils.IsLvmEnabled(enabled_disk_templates):
125 vg_name = constants.DEFAULT_VG
126 return vg_name
127
128
129 def _InitDrbdHelper(opts, enabled_disk_templates):
130 """Initialize the DRBD usermode helper.
131
132 """
133 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
134
135 if not drbd_enabled and opts.drbd_helper is not None:
136 ToStdout("Note: You specified a DRBD usermode helper, while DRBD storage"
137 " is not enabled.")
138
139 if drbd_enabled:
140 if opts.drbd_helper is None:
141 return constants.DEFAULT_DRBD_HELPER
142 if opts.drbd_helper == '':
143 raise errors.OpPrereqError(
144 "Unsetting the drbd usermode helper while enabling DRBD is not"
145 " allowed.")
146
147 return opts.drbd_helper
148
149
150 @UsesRPC
151 def InitCluster(opts, args):
152 """Initialize the cluster.
153
154 @param opts: the command line options selected by the user
155 @type args: list
156 @param args: should contain only one element, the desired
157 cluster name
158 @rtype: int
159 @return: the desired exit code
160
161 """
162 enabled_disk_templates = _InitEnabledDiskTemplates(opts)
163
164 try:
165 vg_name = _InitVgName(opts, enabled_disk_templates)
166 drbd_helper = _InitDrbdHelper(opts, enabled_disk_templates)
167 except errors.OpPrereqError, e:
168 ToStderr(str(e))
169 return 1
170
171 master_netdev = opts.master_netdev
172 if master_netdev is None:
173 nic_mode = opts.nicparams.get(constants.NIC_MODE, None)
174 if not nic_mode:
175 # default case, use bridging
176 master_netdev = constants.DEFAULT_BRIDGE
177 elif nic_mode == constants.NIC_MODE_OVS:
178 # default ovs is different from default bridge
179 master_netdev = constants.DEFAULT_OVS
180 opts.nicparams[constants.NIC_LINK] = constants.DEFAULT_OVS
181
182 hvlist = opts.enabled_hypervisors
183 if hvlist is None:
184 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
185 hvlist = hvlist.split(",")
186
187 hvparams = dict(opts.hvparams)
188 beparams = opts.beparams
189 nicparams = opts.nicparams
190
191 diskparams = dict(opts.diskparams)
192
193 # check the disk template types here, as we cannot rely on the type check done
194 # by the opcode parameter types
195 diskparams_keys = set(diskparams.keys())
196 if not (diskparams_keys <= constants.DISK_TEMPLATES):
197 unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES)
198 ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown))
199 return 1
200
201 # prepare beparams dict
202 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
203 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
204
205 # prepare nicparams dict
206 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
207 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
208
209 # prepare ndparams dict
210 if opts.ndparams is None:
211 ndparams = dict(constants.NDC_DEFAULTS)
212 else:
213 ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
214 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
215
216 # prepare hvparams dict
217 for hv in constants.HYPER_TYPES:
218 if hv not in hvparams:
219 hvparams[hv] = {}
220 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
221 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
222
223 # prepare diskparams dict
224 for templ in constants.DISK_TEMPLATES:
225 if templ not in diskparams:
226 diskparams[templ] = {}
227 diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ],
228 diskparams[templ])
229 utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES)
230
231 # prepare ipolicy dict
232 ipolicy = CreateIPolicyFromOpts(
233 ispecs_mem_size=opts.ispecs_mem_size,
234 ispecs_cpu_count=opts.ispecs_cpu_count,
235 ispecs_disk_count=opts.ispecs_disk_count,
236 ispecs_disk_size=opts.ispecs_disk_size,
237 ispecs_nic_count=opts.ispecs_nic_count,
238 minmax_ispecs=opts.ipolicy_bounds_specs,
239 std_ispecs=opts.ipolicy_std_specs,
240 ipolicy_disk_templates=opts.ipolicy_disk_templates,
241 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
242 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
243 fill_all=True)
244
245 if opts.candidate_pool_size is None:
246 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
247
248 if opts.mac_prefix is None:
249 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
250
251 uid_pool = opts.uid_pool
252 if uid_pool is not None:
253 uid_pool = uidpool.ParseUidPool(uid_pool)
254
255 if opts.prealloc_wipe_disks is None:
256 opts.prealloc_wipe_disks = False
257
258 external_ip_setup_script = opts.use_external_mip_script
259 if external_ip_setup_script is None:
260 external_ip_setup_script = False
261
262 try:
263 primary_ip_version = int(opts.primary_ip_version)
264 except (ValueError, TypeError), err:
265 ToStderr("Invalid primary ip version value: %s" % str(err))
266 return 1
267
268 master_netmask = opts.master_netmask
269 try:
270 if master_netmask is not None:
271 master_netmask = int(master_netmask)
272 except (ValueError, TypeError), err:
273 ToStderr("Invalid master netmask value: %s" % str(err))
274 return 1
275
276 if opts.disk_state:
277 disk_state = utils.FlatToDict(opts.disk_state)
278 else:
279 disk_state = {}
280
281 hv_state = dict(opts.hv_state)
282
283 if opts.install_image:
284 install_image = opts.install_image
285 else:
286 install_image = ""
287
288 if opts.zeroing_image:
289 zeroing_image = opts.zeroing_image
290 else:
291 zeroing_image = ""
292
293 compression_tools = _GetCompressionTools(opts)
294
295 default_ialloc_params = opts.default_iallocator_params
296
297 if opts.enabled_user_shutdown:
298 enabled_user_shutdown = True
299 else:
300 enabled_user_shutdown = False
301
302 if opts.ssh_key_type:
303 ssh_key_type = opts.ssh_key_type
304 else:
305 ssh_key_type = constants.SSH_DEFAULT_KEY_TYPE
306
307 ssh_key_bits = ssh.DetermineKeyBits(ssh_key_type, opts.ssh_key_bits, None,
308 None)
309
310 bootstrap.InitCluster(cluster_name=args[0],
311 secondary_ip=opts.secondary_ip,
312 vg_name=vg_name,
313 mac_prefix=opts.mac_prefix,
314 master_netmask=master_netmask,
315 master_netdev=master_netdev,
316 file_storage_dir=opts.file_storage_dir,
317 shared_file_storage_dir=opts.shared_file_storage_dir,
318 gluster_storage_dir=opts.gluster_storage_dir,
319 enabled_hypervisors=hvlist,
320 hvparams=hvparams,
321 beparams=beparams,
322 nicparams=nicparams,
323 ndparams=ndparams,
324 diskparams=diskparams,
325 ipolicy=ipolicy,
326 candidate_pool_size=opts.candidate_pool_size,
327 modify_etc_hosts=opts.modify_etc_hosts,
328 modify_ssh_setup=opts.modify_ssh_setup,
329 maintain_node_health=opts.maintain_node_health,
330 drbd_helper=drbd_helper,
331 uid_pool=uid_pool,
332 default_iallocator=opts.default_iallocator,
333 default_iallocator_params=default_ialloc_params,
334 primary_ip_version=primary_ip_version,
335 prealloc_wipe_disks=opts.prealloc_wipe_disks,
336 use_external_mip_script=external_ip_setup_script,
337 hv_state=hv_state,
338 disk_state=disk_state,
339 enabled_disk_templates=enabled_disk_templates,
340 install_image=install_image,
341 zeroing_image=zeroing_image,
342 compression_tools=compression_tools,
343 enabled_user_shutdown=enabled_user_shutdown,
344 ssh_key_type=ssh_key_type,
345 ssh_key_bits=ssh_key_bits,
346 )
347 op = opcodes.OpClusterPostInit()
348 SubmitOpCode(op, opts=opts)
349 return 0
350
351
352 @UsesRPC
353 def DestroyCluster(opts, args):
354 """Destroy the cluster.
355
356 @param opts: the command line options selected by the user
357 @type args: list
358 @param args: should be an empty list
359 @rtype: int
360 @return: the desired exit code
361
362 """
363 if not opts.yes_do_it:
364 ToStderr("Destroying a cluster is irreversible. If you really want"
365 " destroy this cluster, supply the --yes-do-it option.")
366 return 1
367
368 op = opcodes.OpClusterDestroy()
369 master_uuid = SubmitOpCode(op, opts=opts)
370 # if we reached this, the opcode didn't fail; we can proceed to
371 # shutdown all the daemons
372 bootstrap.FinalizeClusterDestroy(master_uuid)
373 return 0
374
375
376 def RenameCluster(opts, args):
377 """Rename the cluster.
378
379 @param opts: the command line options selected by the user
380 @type args: list
381 @param args: should contain only one element, the new cluster name
382 @rtype: int
383 @return: the desired exit code
384
385 """
386 cl = GetClient()
387
388 (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])
389
390 new_name = args[0]
391 if not opts.force:
392 usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
393 " connected over the network to the cluster name, the"
394 " operation is very dangerous as the IP address will be"
395 " removed from the node and the change may not go through."
396 " Continue?") % (cluster_name, new_name)
397 if not AskUser(usertext):
398 return 1
399
400 op = opcodes.OpClusterRename(name=new_name)
401 result = SubmitOpCode(op, opts=opts, cl=cl)
402
403 if result:
404 ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
405
406 return 0
407
408
409 def ActivateMasterIp(opts, args):
410 """Activates the master IP.
411
412 """
413 op = opcodes.OpClusterActivateMasterIp()
414 SubmitOpCode(op)
415 return 0
416
417
418 def DeactivateMasterIp(opts, args):
419 """Deactivates the master IP.
420
421 """
422 if not opts.confirm:
423 usertext = ("This will disable the master IP. All the open connections to"
424 " the master IP will be closed. To reach the master you will"
425 " need to use its node IP."
426 " Continue?")
427 if not AskUser(usertext):
428 return 1
429
430 op = opcodes.OpClusterDeactivateMasterIp()
431 SubmitOpCode(op)
432 return 0
433
434
435 def RedistributeConfig(opts, args):
436 """Forces push of the cluster configuration.
437
438 @param opts: the command line options selected by the user
439 @type args: list
440 @param args: empty list
441 @rtype: int
442 @return: the desired exit code
443
444 """
445 op = opcodes.OpClusterRedistConf()
446 if opts.yes_do_it:
447 SubmitOpCodeToDrainedQueue(op)
448 else:
449 SubmitOrSend(op, opts)
450 return 0
451
452
453 def ShowClusterVersion(opts, args):
454 """Write version of ganeti software to the standard output.
455
456 @param opts: the command line options selected by the user
457 @type args: list
458 @param args: should be an empty list
459 @rtype: int
460 @return: the desired exit code
461
462 """
463 cl = GetClient()
464 result = cl.QueryClusterInfo()
465 ToStdout("Software version: %s", result["software_version"])
466 ToStdout("Internode protocol: %s", result["protocol_version"])
467 ToStdout("Configuration format: %s", result["config_version"])
468 ToStdout("OS api version: %s", result["os_api_version"])
469 ToStdout("Export interface: %s", result["export_version"])
470 ToStdout("VCS version: %s", result["vcs_version"])
471 return 0
472
473
474 def ShowClusterMaster(opts, args):
475 """Write name of master node to the standard output.
476
477 @param opts: the command line options selected by the user
478 @type args: list
479 @param args: should be an empty list
480 @rtype: int
481 @return: the desired exit code
482
483 """
484 master = bootstrap.GetMaster()
485 ToStdout(master)
486 return 0
487
488
489 def _FormatGroupedParams(paramsdict, roman=False):
490 """Format Grouped parameters (be, nic, disk) by group.
491
492 @type paramsdict: dict of dicts
493 @param paramsdict: {group: {param: value, ...}, ...}
494 @rtype: dict of dicts
495 @return: copy of the input dictionaries with strings as values
496
497 """
498 ret = {}
499 for (item, val) in paramsdict.items():
500 if isinstance(val, dict):
501 ret[item] = _FormatGroupedParams(val, roman=roman)
502 elif roman and isinstance(val, int):
503 ret[item] = compat.TryToRoman(val)
504 else:
505 ret[item] = str(val)
506 return ret
507
508
509 def _FormatDataCollectors(paramsdict):
510 """Format Grouped parameters (be, nic, disk) by group.
511
512 @type paramsdict: dict of dicts
513 @param paramsdict: response of QueryClusterInfo
514 @rtype: dict of dicts
515 @return: parameter grouped by data collector
516
517 """
518
519 enabled = paramsdict[constants.DATA_COLLECTORS_ENABLED_NAME]
520 interval = paramsdict[constants.DATA_COLLECTORS_INTERVAL_NAME]
521
522 ret = {}
523 for key in enabled:
524 ret[key] = dict(active=enabled[key],
525 interval="%.3fs" % (interval[key] / 1e6))
526 return ret
527
528
529 def ShowClusterConfig(opts, args):
530 """Shows cluster information.
531
532 @param opts: the command line options selected by the user
533 @type args: list
534 @param args: should be an empty list
535 @rtype: int
536 @return: the desired exit code
537
538 """
539 cl = GetClient()
540 result = cl.QueryClusterInfo()
541
542 if result["tags"]:
543 tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
544 else:
545 tags = "(none)"
546 if result["reserved_lvs"]:
547 reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
548 else:
549 reserved_lvs = "(none)"
550
551 enabled_hv = result["enabled_hypervisors"]
552 hvparams = dict((k, v) for k, v in result["hvparams"].iteritems()
553 if k in enabled_hv)
554
555 info = [
556 ("Cluster name", result["name"]),
557 ("Cluster UUID", result["uuid"]),
558
559 ("Creation time", utils.FormatTime(result["ctime"])),
560 ("Modification time", utils.FormatTime(result["mtime"])),
561
562 ("Master node", result["master"]),
563
564 ("Architecture (this node)",
565 "%s (%s)" % (result["architecture"][0], result["architecture"][1])),
566
567 ("Tags", tags),
568
569 ("Default hypervisor", result["default_hypervisor"]),
570 ("Enabled hypervisors", utils.CommaJoin(enabled_hv)),
571
572 ("Hypervisor parameters", _FormatGroupedParams(hvparams,
573 opts.roman_integers)),
574
575 ("OS-specific hypervisor parameters",
576 _FormatGroupedParams(result["os_hvp"], opts.roman_integers)),
577
578 ("OS parameters", _FormatGroupedParams(result["osparams"],
579 opts.roman_integers)),
580
581 ("Hidden OSes", utils.CommaJoin(result["hidden_os"])),
582 ("Blacklisted OSes", utils.CommaJoin(result["blacklisted_os"])),
583
584 ("Cluster parameters", [
585 ("candidate pool size",
586 compat.TryToRoman(result["candidate_pool_size"],
587 convert=opts.roman_integers)),
588 ("maximal number of jobs running simultaneously",
589 compat.TryToRoman(result["max_running_jobs"],
590 convert=opts.roman_integers)),
591 ("maximal number of jobs simultaneously tracked by the scheduler",
592 compat.TryToRoman(result["max_tracked_jobs"],
593 convert=opts.roman_integers)),
594 ("mac prefix", result["mac_prefix"]),
595 ("master netdev", result["master_netdev"]),
596 ("master netmask", compat.TryToRoman(result["master_netmask"],
597 opts.roman_integers)),
598 ("use external master IP address setup script",
599 result["use_external_mip_script"]),
600 ("lvm volume group", result["volume_group_name"]),
601 ("lvm reserved volumes", reserved_lvs),
602 ("drbd usermode helper", result["drbd_usermode_helper"]),
603 ("file storage path", result["file_storage_dir"]),
604 ("shared file storage path", result["shared_file_storage_dir"]),
605 ("gluster storage path", result["gluster_storage_dir"]),
606 ("maintenance of node health", result["maintain_node_health"]),
607 ("uid pool", uidpool.FormatUidPool(result["uid_pool"])),
608 ("default instance allocator", result["default_iallocator"]),
609 ("default instance allocator parameters",
610 result["default_iallocator_params"]),
611 ("primary ip version", compat.TryToRoman(result["primary_ip_version"],
612 opts.roman_integers)),
613 ("preallocation wipe disks", result["prealloc_wipe_disks"]),
614 ("OS search path", utils.CommaJoin(pathutils.OS_SEARCH_PATH)),
615 ("ExtStorage Providers search path",
616 utils.CommaJoin(pathutils.ES_SEARCH_PATH)),
617 ("enabled disk templates",
618 utils.CommaJoin(result["enabled_disk_templates"])),
619 ("install image", result["install_image"]),
620 ("instance communication network",
621 result["instance_communication_network"]),
622 ("zeroing image", result["zeroing_image"]),
623 ("compression tools", result["compression_tools"]),
624 ("enabled user shutdown", result["enabled_user_shutdown"]),
625 ("modify ssh setup", result["modify_ssh_setup"]),
626 ("ssh_key_type", result["ssh_key_type"]),
627 ("ssh_key_bits", result["ssh_key_bits"]),
628 ]),
629
630 ("Default node parameters",
631 _FormatGroupedParams(result["ndparams"], roman=opts.roman_integers)),
632
633 ("Default instance parameters",
634 _FormatGroupedParams(result["beparams"], roman=opts.roman_integers)),
635
636 ("Default nic parameters",
637 _FormatGroupedParams(result["nicparams"], roman=opts.roman_integers)),
638
639 ("Default disk parameters",
640 _FormatGroupedParams(result["diskparams"], roman=opts.roman_integers)),
641
642 ("Instance policy - limits for instances",
643 FormatPolicyInfo(result["ipolicy"], None, True, opts.roman_integers)),
644 ("Data collectors", _FormatDataCollectors(result)),
645 ]
646
647 PrintGenericInfo(info)
648 return 0
649
650
651 def ClusterCopyFile(opts, args):
652 """Copy a file from master to some nodes.
653
654 @param opts: the command line options selected by the user
655 @type args: list
656 @param args: should contain only one element, the path of
657 the file to be copied
658 @rtype: int
659 @return: the desired exit code
660
661 """
662 filename = args[0]
663 filename = os.path.abspath(filename)
664
665 if not os.path.exists(filename):
666 raise errors.OpPrereqError("No such filename '%s'" % filename,
667 errors.ECODE_INVAL)
668
669 cl = GetClient()
670 qcl = GetClient()
671 try:
672 cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
673
674 results = GetOnlineNodes(nodes=opts.nodes, cl=qcl, filter_master=True,
675 secondary_ips=opts.use_replication_network,
676 nodegroup=opts.nodegroup)
677 ports = GetNodesSshPorts(opts.nodes, qcl)
678 finally:
679 cl.Close()
680 qcl.Close()
681
682 srun = ssh.SshRunner(cluster_name)
683 for (node, port) in zip(results, ports):
684 if not srun.CopyFileToNode(node, port, filename):
685 ToStderr("Copy of file %s to node %s:%d failed", filename, node, port)
686
687 return 0
688
689
690 def RunClusterCommand(opts, args):
691 """Run a command on some nodes.
692
693 @param opts: the command line options selected by the user
694 @type args: list
695 @param args: should contain the command to be run and its arguments
696 @rtype: int
697 @return: the desired exit code
698
699 """
700 cl = GetClient()
701 qcl = GetClient()
702
703 command = " ".join(args)
704
705 nodes = GetOnlineNodes(nodes=opts.nodes, cl=qcl, nodegroup=opts.nodegroup)
706 ports = GetNodesSshPorts(nodes, qcl)
707
708 cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
709 "master_node"])
710
711 srun = ssh.SshRunner(cluster_name=cluster_name)
712
713 # Make sure master node is at list end
714 if master_node in nodes:
715 nodes.remove(master_node)
716 nodes.append(master_node)
717
718 for (name, port) in zip(nodes, ports):
719 result = srun.Run(name, constants.SSH_LOGIN_USER, command, port=port)
720
721 if opts.failure_only and result.exit_code == constants.EXIT_SUCCESS:
722 # Do not output anything for successful commands
723 continue
724
725 ToStdout("------------------------------------------------")
726 if opts.show_machine_names:
727 for line in result.output.splitlines():
728 ToStdout("%s: %s", name, line)
729 else:
730 ToStdout("node: %s", name)
731 ToStdout("%s", result.output)
732 ToStdout("return code = %s", result.exit_code)
733
734 return 0
735
736
737 def VerifyCluster(opts, args):
738 """Verify integrity of cluster, performing various test on nodes.
739
740 @param opts: the command line options selected by the user
741 @type args: list
742 @param args: should be an empty list
743 @rtype: int
744 @return: the desired exit code
745
746 """
747 skip_checks = []
748
749 if opts.skip_nplusone_mem:
750 skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
751
752 cl = GetClient()
753
754 op = opcodes.OpClusterVerify(verbose=opts.verbose,
755 error_codes=opts.error_codes,
756 debug_simulate_errors=opts.simulate_errors,
757 skip_checks=skip_checks,
758 ignore_errors=opts.ignore_errors,
759 group_name=opts.nodegroup,
760 verify_clutter=opts.verify_clutter)
761 result = SubmitOpCode(op, cl=cl, opts=opts)
762
763 # Keep track of submitted jobs
764 jex = JobExecutor(cl=cl, opts=opts)
765
766 for (status, job_id) in result[constants.JOB_IDS_KEY]:
767 jex.AddJobId(None, status, job_id)
768
769 results = jex.GetResults()
770
771 (bad_jobs, bad_results) = \
772 map(len,
773 # Convert iterators to lists
774 map(list,
775 # Count errors
776 map(compat.partial(itertools.ifilterfalse, bool),
777 # Convert result to booleans in a tuple
778 zip(*((job_success, len(op_results) == 1 and op_results[0])
779 for (job_success, op_results) in results)))))
780
781 if bad_jobs == 0 and bad_results == 0:
782 rcode = constants.EXIT_SUCCESS
783 else:
784 rcode = constants.EXIT_FAILURE
785 if bad_jobs > 0:
786 ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
787
788 return rcode
789
790
791 def VerifyDisks(opts, args):
792 """Verify integrity of cluster disks.
793
794 @param opts: the command line options selected by the user
795 @type args: list
796 @param args: should be an empty list
797 @rtype: int
798 @return: the desired exit code
799
800 """
801 cl = GetClient()
802
803 op = opcodes.OpClusterVerifyDisks(group_name=opts.nodegroup)
804
805 result = SubmitOpCode(op, cl=cl, opts=opts)
806
807 # Keep track of submitted jobs
808 jex = JobExecutor(cl=cl, opts=opts)
809
810 for (status, job_id) in result[constants.JOB_IDS_KEY]:
811 jex.AddJobId(None, status, job_id)
812
813 retcode = constants.EXIT_SUCCESS
814
815 for (status, result) in jex.GetResults():
816 if not status:
817 ToStdout("Job failed: %s", result)
818 continue
819
820 ((bad_nodes, instances, missing), ) = result
821
822 for node, text in bad_nodes.items():
823 ToStdout("Error gathering data on node %s: %s",
824 node, utils.SafeEncode(text[-400:]))
825 retcode = constants.EXIT_FAILURE
826 ToStdout("You need to fix these nodes first before fixing instances")
827
828 for iname in instances:
829 if iname in missing:
830 continue
831 op = opcodes.OpInstanceActivateDisks(instance_name=iname)
832 try:
833 ToStdout("Activating disks for instance '%s'", iname)
834 SubmitOpCode(op, opts=opts, cl=cl)
835 except errors.GenericError, err:
836 nret, msg = FormatError(err)
837 retcode |= nret
838 ToStderr("Error activating disks for instance %s: %s", iname, msg)
839
840 if missing:
841 for iname, ival in missing.iteritems():
842 all_missing = compat.all(x[0] in bad_nodes for x in ival)
843 if all_missing:
844 ToStdout("Instance %s cannot be verified as it lives on"
845 " broken nodes", iname)
846 else:
847 ToStdout("Instance %s has missing logical volumes:", iname)
848 ival.sort()
849 for node, vol in ival:
850 if node in bad_nodes:
851 ToStdout("\tbroken node %s /dev/%s", node, vol)
852 else:
853 ToStdout("\t%s /dev/%s", node, vol)
854
855 ToStdout("You need to replace or recreate disks for all the above"
856 " instances if this message persists after fixing broken nodes.")
857 retcode = constants.EXIT_FAILURE
858 elif not instances:
859 ToStdout("No disks need to be activated.")
860
861 return retcode
862
863
864 def RepairDiskSizes(opts, args):
865 """Verify sizes of cluster disks.
866
867 @param opts: the command line options selected by the user
868 @type args: list
869 @param args: optional list of instances to restrict check to
870 @rtype: int
871 @return: the desired exit code
872
873 """
874 op = opcodes.OpClusterRepairDiskSizes(instances=args)
875 SubmitOpCode(op, opts=opts)
876
877
878 @UsesRPC
879 def MasterFailover(opts, args):
880 """Failover the master node.
881
882 This command, when run on a non-master node, will cause the current
883 master to cease being master, and the non-master to become new
884 master.
885
886 @param opts: the command line options selected by the user
887 @type args: list
888 @param args: should be an empty list
889 @rtype: int
890 @return: the desired exit code
891
892 """
893 if opts.no_voting and not opts.yes_do_it:
894 usertext = ("This will perform the failover even if most other nodes"
895 " are down, or if this node is outdated. This is dangerous"
896 " as it can lead to a non-consistent cluster. Check the"
897 " gnt-cluster(8) man page before proceeding. Continue?")
898 if not AskUser(usertext):
899 return 1
900
901 rvlaue, msgs = bootstrap.MasterFailover(no_voting=opts.no_voting)
902 for msg in msgs:
903 ToStderr(msg)
904 return rvlaue
905
906
907 def MasterPing(opts, args):
908 """Checks if the master is alive.
909
910 @param opts: the command line options selected by the user
911 @type args: list
912 @param args: should be an empty list
913 @rtype: int
914 @return: the desired exit code
915
916 """
917 try:
918 cl = GetClient()
919 cl.QueryClusterInfo()
920 return 0
921 except Exception: # pylint: disable=W0703
922 return 1
923
924
925 def SearchTags(opts, args):
926 """Searches the tags on all the cluster.
927
928 @param opts: the command line options selected by the user
929 @type args: list
930 @param args: should contain only one element, the tag pattern
931 @rtype: int
932 @return: the desired exit code
933
934 """
935 op = opcodes.OpTagsSearch(pattern=args[0])
936 result = SubmitOpCode(op, opts=opts)
937 if not result:
938 return 1
939 result = list(result)
940 result.sort()
941 for path, tag in result:
942 ToStdout("%s %s", path, tag)
943
944
945 def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
946 """Reads and verifies an X509 certificate.
947
948 @type cert_filename: string
949 @param cert_filename: the path of the file containing the certificate to
950 verify encoded in PEM format
951 @type verify_private_key: bool
952 @param verify_private_key: whether to verify the private key in addition to
953 the public certificate
954 @rtype: string
955 @return: a string containing the PEM-encoded certificate.
956
957 """
958 try:
959 pem = utils.ReadFile(cert_filename)
960 except IOError, err:
961 raise errors.X509CertError(cert_filename,
962 "Unable to read certificate: %s" % str(err))
963
964 try:
965 OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
966 except Exception, err:
967 raise errors.X509CertError(cert_filename,
968 "Unable to load certificate: %s" % str(err))
969
970 if verify_private_key:
971 try:
972 OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
973 except Exception, err:
974 raise errors.X509CertError(cert_filename,
975 "Unable to load private key: %s" % str(err))
976
977 return pem
978
979
980 # pylint: disable=R0913
981 def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
982 rapi_cert_filename, new_spice_cert, spice_cert_filename,
983 spice_cacert_filename, new_confd_hmac_key, new_cds,
984 cds_filename, force, new_node_cert, new_ssh_keys,
985 ssh_key_type, ssh_key_bits, verbose, debug):
986 """Renews cluster certificates, keys and secrets.
987
988 @type new_cluster_cert: bool
989 @param new_cluster_cert: Whether to generate a new cluster certificate
990 @type new_rapi_cert: bool
991 @param new_rapi_cert: Whether to generate a new RAPI certificate
992 @type rapi_cert_filename: string
993 @param rapi_cert_filename: Path to file containing new RAPI certificate
994 @type new_spice_cert: bool
995 @param new_spice_cert: Whether to generate a new SPICE certificate
996 @type spice_cert_filename: string
997 @param spice_cert_filename: Path to file containing new SPICE certificate
998 @type spice_cacert_filename: string
999 @param spice_cacert_filename: Path to file containing the certificate of the
1000 CA that signed the SPICE certificate
1001 @type new_confd_hmac_key: bool
1002 @param new_confd_hmac_key: Whether to generate a new HMAC key
1003 @type new_cds: bool
1004 @param new_cds: Whether to generate a new cluster domain secret
1005 @type cds_filename: string
1006 @param cds_filename: Path to file containing new cluster domain secret
1007 @type force: bool
1008 @param force: Whether to ask user for confirmation
1009 @type new_node_cert: bool
1010 @param new_node_cert: Whether to generate new node certificates
1011 @type new_ssh_keys: bool
1012 @param new_ssh_keys: Whether to generate new node SSH keys
1013 @type ssh_key_type: One of L{constants.SSHK_ALL}
1014 @param ssh_key_type: The type of SSH key to be generated
1015 @type ssh_key_bits: int
1016 @param ssh_key_bits: The length of the key to be generated
1017 @type verbose: boolean
1018 @param verbose: Show verbose output
1019 @type debug: boolean
1020 @param debug: Show debug output
1021
1022 """
1023 ToStdout("Updating certificates now. Running \"gnt-cluster verify\" "
1024 " is recommended after this operation.")
1025
1026 if new_rapi_cert and rapi_cert_filename:
1027 ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
1028 " options can be specified at the same time.")
1029 return 1
1030
1031 if new_cds and cds_filename:
1032 ToStderr("Only one of the --new-cluster-domain-secret and"
1033 " --cluster-domain-secret options can be specified at"
1034 " the same time.")
1035 return 1
1036
1037 if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
1038 ToStderr("When using --new-spice-certificate, the --spice-certificate"
1039 " and --spice-ca-certificate must not be used.")
1040 return 1
1041
1042 if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
1043 ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
1044 " specified.")
1045 return 1
1046
1047 rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
1048 try:
1049 if rapi_cert_filename:
1050 rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
1051 if spice_cert_filename:
1052 spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
1053 spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
1054 except errors.X509CertError, err:
1055 ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
1056 return 1
1057
1058 if cds_filename:
1059 try:
1060 cds = utils.ReadFile(cds_filename)
1061 except Exception, err: # pylint: disable=W0703
1062 ToStderr("Can't load new cluster domain secret from %s: %s" %
1063 (cds_filename, str(err)))
1064 return 1
1065 else:
1066 cds = None
1067
1068 if not force:
1069 usertext = ("This requires all daemons on all nodes to be restarted and"
1070 " may take some time. Continue?")
1071 if not AskUser(usertext):
1072 return 1
1073
1074 def _RenewCryptoInner(ctx):
1075 ctx.feedback_fn("Updating certificates and keys")
1076
1077 bootstrap.GenerateClusterCrypto(False,
1078 new_rapi_cert,
1079 new_spice_cert,
1080 new_confd_hmac_key,
1081 new_cds,
1082 False,
1083 None,
1084 rapi_cert_pem=rapi_cert_pem,
1085 spice_cert_pem=spice_cert_pem,
1086 spice_cacert_pem=spice_cacert_pem,
1087 cds=cds)
1088
1089 files_to_copy = []
1090
1091 if new_rapi_cert or rapi_cert_pem:
1092 files_to_copy.append(pathutils.RAPI_CERT_FILE)
1093
1094 if new_spice_cert or spice_cert_pem:
1095 files_to_copy.append(pathutils.SPICE_CERT_FILE)
1096 files_to_copy.append(pathutils.SPICE_CACERT_FILE)
1097
1098 if new_confd_hmac_key:
1099 files_to_copy.append(pathutils.CONFD_HMAC_KEY)
1100
1101 if new_cds or cds:
1102 files_to_copy.append(pathutils.CLUSTER_DOMAIN_SECRET_FILE)
1103
1104 if files_to_copy:
1105 for node_name in ctx.nonmaster_nodes:
1106 port = ctx.ssh_ports[node_name]
1107 ctx.feedback_fn("Copying %s to %s:%d" %
1108 (", ".join(files_to_copy), node_name, port))
1109 for file_name in files_to_copy:
1110 ctx.ssh.CopyFileToNode(node_name, port, file_name)
1111
1112 def _RenewClientCerts(ctx):
1113 ctx.feedback_fn("Updating client SSL certificates.")
1114
1115 cluster_name = ssconf.SimpleStore().GetClusterName()
1116
1117 for node_name in ctx.nonmaster_nodes + [ctx.master_node]:
1118 ssh_port = ctx.ssh_ports[node_name]
1119 data = {
1120 constants.NDS_CLUSTER_NAME: cluster_name,
1121 constants.NDS_NODE_DAEMON_CERTIFICATE:
1122 utils.ReadFile(pathutils.NODED_CERT_FILE),
1123 constants.NDS_NODE_NAME: node_name,
1124 constants.NDS_ACTION: constants.CRYPTO_ACTION_CREATE,
1125 }
1126
1127 ssh.RunSshCmdWithStdin(
1128 cluster_name,
1129 node_name,
1130 pathutils.SSL_UPDATE,
1131 ssh_port,
1132 data,
1133 debug=ctx.debug,
1134 verbose=ctx.verbose,
1135 use_cluster_key=True,
1136 ask_key=False,
1137 strict_host_check=True)
1138
1139 # Create a temporary ssconf file using the master's client cert digest
1140 # and the 'bootstrap' keyword to enable distribution of all nodes' digests.
1141 master_digest = utils.GetCertificateDigest()
1142 ssconf_master_candidate_certs_filename = os.path.join(
1143 pathutils.DATA_DIR, "%s%s" %
1144 (constants.SSCONF_FILEPREFIX, constants.SS_MASTER_CANDIDATES_CERTS))
1145 utils.WriteFile(
1146 ssconf_master_candidate_certs_filename,
1147 data="%s=%s" % (constants.CRYPTO_BOOTSTRAP, master_digest))
1148 for node_name in ctx.nonmaster_nodes:
1149 port = ctx.ssh_ports[node_name]
1150 ctx.feedback_fn("Copying %s to %s:%d" %
1151 (ssconf_master_candidate_certs_filename, node_name, port))
1152 ctx.ssh.CopyFileToNode(node_name, port,
1153 ssconf_master_candidate_certs_filename)
1154
1155 # Write the boostrap entry to the config using wconfd.
1156 config_live_lock = utils.livelock.LiveLock("renew_crypto")
1157 cfg = config.GetConfig(None, config_live_lock)
1158 cfg.AddNodeToCandidateCerts(constants.CRYPTO_BOOTSTRAP, master_digest)
1159 cfg.Update(cfg.GetClusterInfo(), ctx.feedback_fn)
1160
1161 def _RenewServerAndClientCerts(ctx):
1162 ctx.feedback_fn("Updating the cluster SSL certificate.")
1163
1164 master_name = ssconf.SimpleStore().GetMasterNode()
1165 bootstrap.GenerateClusterCrypto(True, # cluster cert
1166 False, # rapi cert
1167 False, # spice cert
1168 False, # confd hmac key
1169 False, # cds
1170 True, # client cert
1171 master_name)
1172
1173 for node_name in ctx.nonmaster_nodes:
1174 port = ctx.ssh_ports[node_name]
1175 server_cert = pathutils.NODED_CERT_FILE
1176 ctx.feedback_fn("Copying %s to %s:%d" %
1177 (server_cert, node_name, port))
1178 ctx.ssh.CopyFileToNode(node_name, port, server_cert)
1179
1180 _RenewClientCerts(ctx)
1181
1182 if new_rapi_cert or new_spice_cert or new_confd_hmac_key or new_cds:
1183 RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
1184
1185 # If only node certficates are recreated, call _RenewClientCerts only.
1186 if new_node_cert and not new_cluster_cert:
1187 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD],
1188 _RenewClientCerts, verbose=verbose, debug=debug)
1189
1190 # If the cluster certificate are renewed, the client certificates need
1191 # to be renewed too.
1192 if new_cluster_cert:
1193 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD],
1194 _RenewServerAndClientCerts, verbose=verbose,
1195 debug=debug)
1196
1197 if new_node_cert or new_cluster_cert or new_ssh_keys:
1198 cl = GetClient()
1199 renew_op = opcodes.OpClusterRenewCrypto(
1200 node_certificates=new_node_cert or new_cluster_cert,
1201 renew_ssh_keys=new_ssh_keys,
1202 ssh_key_type=ssh_key_type,
1203 ssh_key_bits=ssh_key_bits)
1204 SubmitOpCode(renew_op, cl=cl)
1205
1206 ToStdout("All requested certificates and keys have been replaced."
1207 " Running \"gnt-cluster verify\" now is recommended.")
1208
1209 return 0
1210
1211
1212 def _BuildGanetiPubKeys(options, pub_key_file=pathutils.SSH_PUB_KEYS, cl=None,
1213 get_online_nodes_fn=GetOnlineNodes,
1214 get_nodes_ssh_ports_fn=GetNodesSshPorts,
1215 get_node_uuids_fn=GetNodeUUIDs,
1216 homedir_fn=None):
1217 """Recreates the 'ganeti_pub_key' file by polling all nodes.
1218
1219 """
1220
1221 if not cl:
1222 cl = GetClient()
1223
1224 (cluster_name, master_node, modify_ssh_setup, ssh_key_type) = \
1225 cl.QueryConfigValues(["cluster_name", "master_node", "modify_ssh_setup",
1226 "ssh_key_type"])
1227
1228 # In case Ganeti is not supposed to modify the SSH setup, simply exit and do
1229 # not update this file.
1230 if not modify_ssh_setup:
1231 return
1232
1233 if os.path.exists(pub_key_file):
1234 utils.CreateBackup(pub_key_file)
1235 utils.RemoveFile(pub_key_file)
1236
1237 ssh.ClearPubKeyFile(pub_key_file)
1238
1239 online_nodes = get_online_nodes_fn([], cl=cl)
1240 ssh_ports = get_nodes_ssh_ports_fn(online_nodes + [master_node], cl)
1241 ssh_port_map = dict(zip(online_nodes + [master_node], ssh_ports))
1242
1243 node_uuids = get_node_uuids_fn(online_nodes + [master_node], cl)
1244 node_uuid_map = dict(zip(online_nodes + [master_node], node_uuids))
1245
1246 nonmaster_nodes = [name for name in online_nodes
1247 if name != master_node]
1248
1249 _, pub_key_filename, _ = \
1250 ssh.GetUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False,
1251 kind=ssh_key_type, _homedir_fn=homedir_fn)
1252
1253 # get the key file of the master node
1254 pub_key = utils.ReadFile(pub_key_filename)
1255 ssh.AddPublicKey(node_uuid_map[master_node], pub_key,
1256 key_file=pub_key_file)
1257
1258 # get the key files of all non-master nodes
1259 for node in nonmaster_nodes:
1260 pub_key = ssh.ReadRemoteSshPubKeys(pub_key_filename, node, cluster_name,
1261 ssh_port_map[node],
1262 options.ssh_key_check,
1263 options.ssh_key_check)
1264 ssh.AddPublicKey(node_uuid_map[node], pub_key, key_file=pub_key_file)
1265
1266
1267 def RenewCrypto(opts, args):
1268 """Renews cluster certificates, keys and secrets.
1269
1270 """
1271 if opts.new_ssh_keys:
1272 _BuildGanetiPubKeys(opts)
1273 return _RenewCrypto(opts.new_cluster_cert,
1274 opts.new_rapi_cert,
1275 opts.rapi_cert,
1276 opts.new_spice_cert,
1277 opts.spice_cert,
1278 opts.spice_cacert,
1279 opts.new_confd_hmac_key,
1280 opts.new_cluster_domain_secret,
1281 opts.cluster_domain_secret,
1282 opts.force,
1283 opts.new_node_cert,
1284 opts.new_ssh_keys,
1285 opts.ssh_key_type,
1286 opts.ssh_key_bits,
1287 opts.verbose,
1288 opts.debug > 0)
1289
1290
1291 def _GetEnabledDiskTemplates(opts):
1292 """Determine the list of enabled disk templates.
1293
1294 """
1295 if opts.enabled_disk_templates:
1296 return opts.enabled_disk_templates.split(",")
1297 else:
1298 return None
1299
1300
1301 def _GetVgName(opts, enabled_disk_templates):
1302 """Determine the volume group name.
1303
1304 @type enabled_disk_templates: list of strings
1305 @param enabled_disk_templates: cluster-wide enabled disk-templates
1306
1307 """
1308 # consistency between vg name and enabled disk templates
1309 vg_name = None
1310 if opts.vg_name is not None:
1311 vg_name = opts.vg_name
1312 if enabled_disk_templates:
1313 if vg_name and not utils.IsLvmEnabled(enabled_disk_templates):
1314 ToStdout("You specified a volume group with --vg-name, but you did not"
1315 " enable any of the following lvm-based disk templates: %s" %
1316 utils.CommaJoin(constants.DTS_LVM))
1317 return vg_name
1318
1319
1320 def _GetDrbdHelper(opts, enabled_disk_templates):
1321 """Determine the DRBD usermode helper.
1322
1323 """
1324 drbd_helper = opts.drbd_helper
1325 if enabled_disk_templates:
1326 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
1327 if not drbd_enabled and opts.drbd_helper:
1328 ToStdout("You specified a DRBD usermode helper with "
1329 " --drbd-usermode-helper while DRBD is not enabled.")
1330 return drbd_helper
1331
1332
1333 def _GetCompressionTools(opts):
1334 """Determine the list of custom compression tools.
1335
1336 """
1337 if opts.compression_tools:
1338 return opts.compression_tools.split(",")
1339 elif opts.compression_tools is None:
1340 return None # To note the parameter was not provided
1341 else:
1342 return constants.IEC_DEFAULT_TOOLS # Resetting to default
1343
1344
1345 def SetClusterParams(opts, args):
1346 """Modify the cluster.
1347
1348 @param opts: the command line options selected by the user
1349 @type args: list
1350 @param args: should be an empty list
1351 @rtype: int
1352 @return: the desired exit code
1353
1354 """
1355 if not (opts.vg_name is not None or
1356 opts.drbd_helper is not None or
1357 opts.enabled_hypervisors or opts.hvparams or
1358 opts.beparams or opts.nicparams or
1359 opts.ndparams or opts.diskparams or
1360 opts.candidate_pool_size is not None or
1361 opts.max_running_jobs is not None or
1362 opts.max_tracked_jobs is not None or
1363 opts.uid_pool is not None or
1364 opts.maintain_node_health is not None or
1365 opts.add_uids is not None or
1366 opts.remove_uids is not None or
1367 opts.default_iallocator is not None or
1368 opts.default_iallocator_params is not None or
1369 opts.reserved_lvs is not None or
1370 opts.mac_prefix is not None or
1371 opts.master_netdev is not None or
1372 opts.master_netmask is not None or
1373 opts.use_external_mip_script is not None or
1374 opts.prealloc_wipe_disks is not None or
1375 opts.hv_state or
1376 opts.enabled_disk_templates or
1377 opts.disk_state or
1378 opts.ipolicy_bounds_specs is not None or
1379 opts.ipolicy_std_specs is not None or
1380 opts.ipolicy_disk_templates is not None or
1381 opts.ipolicy_vcpu_ratio is not None or
1382 opts.ipolicy_spindle_ratio is not None or
1383 opts.modify_etc_hosts is not None or
1384 opts.file_storage_dir is not None or
1385 opts.install_image is not None or
1386 opts.instance_communication_network is not None or
1387 opts.zeroing_image is not None or
1388 opts.shared_file_storage_dir is not None or
1389 opts.compression_tools is not None or
1390 opts.shared_file_storage_dir is not None or
1391 opts.enabled_user_shutdown is not None or
1392 opts.data_collector_interval or
1393 opts.enabled_data_collectors):
1394 ToStderr("Please give at least one of the parameters.")
1395 return 1
1396
1397 enabled_disk_templates = _GetEnabledDiskTemplates(opts)
1398 vg_name = _GetVgName(opts, enabled_disk_templates)
1399
1400 try:
1401 drbd_helper = _GetDrbdHelper(opts, enabled_disk_templates)
1402 except errors.OpPrereqError, e:
1403 ToStderr(str(e))
1404 return 1
1405
1406 hvlist = opts.enabled_hypervisors
1407 if hvlist is not None:
1408 hvlist = hvlist.split(",")
1409
1410 # a list of (name, dict) we can pass directly to dict() (or [])
1411 hvparams = dict(opts.hvparams)
1412 for hv_params in hvparams.values():
1413 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1414
1415 diskparams = dict(opts.diskparams)
1416
1417 for dt_params in diskparams.values():
1418 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
1419
1420 beparams = opts.beparams
1421 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
1422
1423 nicparams = opts.nicparams
1424 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
1425
1426 ndparams = opts.ndparams
1427 if ndparams is not None:
1428 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
1429
1430 ipolicy = CreateIPolicyFromOpts(
1431 minmax_ispecs=opts.ipolicy_bounds_specs,
1432 std_ispecs=opts.ipolicy_std_specs,
1433 ipolicy_disk_templates=opts.ipolicy_disk_templates,
1434 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
1435 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
1436 )
1437
1438 mnh = opts.maintain_node_health
1439
1440 uid_pool = opts.uid_pool
1441 if uid_pool is not None:
1442 uid_pool = uidpool.ParseUidPool(uid_pool)
1443
1444 add_uids = opts.add_uids
1445 if add_uids is not None:
1446 add_uids = uidpool.ParseUidPool(add_uids)
1447
1448 remove_uids = opts.remove_uids
1449 if remove_uids is not None:
1450 remove_uids = uidpool.ParseUidPool(remove_uids)
1451
1452 if opts.reserved_lvs is not None:
1453 if opts.reserved_lvs == "":
1454 opts.reserved_lvs = []
1455 else:
1456 opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")
1457
1458 if opts.master_netmask is not None:
1459 try:
1460 opts.master_netmask = int(opts.master_netmask)
1461 except ValueError:
1462 ToStderr("The --master-netmask option expects an int parameter.")
1463 return 1
1464
1465 ext_ip_script = opts.use_external_mip_script
1466
1467 if opts.disk_state:
1468 disk_state = utils.FlatToDict(opts.disk_state)
1469 else:
1470 disk_state = {}
1471
1472 hv_state = dict(opts.hv_state)
1473
1474 compression_tools = _GetCompressionTools(opts)
1475
1476 enabled_data_collectors = dict(
1477 (k, v.lower().startswith("t"))
1478 for k, v in opts.enabled_data_collectors.items())
1479
1480 unrecognized_data_collectors = [
1481 k for k in enabled_data_collectors.keys()
1482 if k not in constants.DATA_COLLECTOR_NAMES]
1483 if unrecognized_data_collectors:
1484 ToStderr("Data collector names not recognized: %s" %
1485 ", ".join(unrecognized_data_collectors))
1486
1487 try:
1488 data_collector_interval = dict(
1489 (k, long(1e6 * float(v)))
1490 for (k, v) in opts.data_collector_interval.items())
1491 except ValueError:
1492 ToStderr("Can't transform all values to integers: {}".format(
1493 opts.data_collector_interval))
1494 return 1
1495 if any(v <= 0 for v in data_collector_interval):
1496 ToStderr("Some interval times where not above zero.")
1497 return 1
1498
1499 op = opcodes.OpClusterSetParams(
1500 vg_name=vg_name,
1501 drbd_helper=drbd_helper,
1502 enabled_hypervisors=hvlist,
1503 hvparams=hvparams,
1504 os_hvp=None,
1505 beparams=beparams,
1506 nicparams=nicparams,
1507 ndparams=ndparams,
1508 diskparams=diskparams,
1509 ipolicy=ipolicy,
1510 candidate_pool_size=opts.candidate_pool_size,
1511 max_running_jobs=opts.max_running_jobs,
1512 max_tracked_jobs=opts.max_tracked_jobs,
1513 maintain_node_health=mnh,
1514 modify_etc_hosts=opts.modify_etc_hosts,
1515 uid_pool=uid_pool,
1516 add_uids=add_uids,
1517 remove_uids=remove_uids,
1518 default_iallocator=opts.default_iallocator,
1519 default_iallocator_params=opts.default_iallocator_params,
1520 prealloc_wipe_disks=opts.prealloc_wipe_disks,
1521 mac_prefix=opts.mac_prefix,
1522 master_netdev=opts.master_netdev,
1523 master_netmask=opts.master_netmask,
1524 reserved_lvs=opts.reserved_lvs,
1525 use_external_mip_script=ext_ip_script,
1526 hv_state=hv_state,
1527 disk_state=disk_state,
1528 enabled_disk_templates=enabled_disk_templates,
1529 force=opts.force,
1530 file_storage_dir=opts.file_storage_dir,
1531 install_image=opts.install_image,
1532 instance_communication_network=opts.instance_communication_network,
1533 zeroing_image=opts.zeroing_image,
1534 shared_file_storage_dir=opts.shared_file_storage_dir,
1535 compression_tools=compression_tools,
1536 enabled_user_shutdown=opts.enabled_user_shutdown,
1537 enabled_data_collectors=enabled_data_collectors,
1538 data_collector_interval=data_collector_interval,
1539 )
1540 return base.GetResult(None, opts, SubmitOrSend(op, opts))
1541
1542
1543 def QueueOps(opts, args):
1544 """Queue operations.
1545
1546 @param opts: the command line options selected by the user
1547 @type args: list
1548 @param args: should contain only one element, the subcommand
1549 @rtype: int
1550 @return: the desired exit code
1551
1552 """
1553 command = args[0]
1554 client = GetClient()
1555 if command in ("drain", "undrain"):
1556 drain_flag = command == "drain"
1557 client.SetQueueDrainFlag(drain_flag)
1558 elif command == "info":
1559 result = client.QueryConfigValues(["drain_flag"])
1560 if result[0]:
1561 val = "set"
1562 else:
1563 val = "unset"
1564 ToStdout("The drain flag is %s" % val)
1565 else:
1566 raise errors.OpPrereqError("Command '%s' is not valid." % command,
1567 errors.ECODE_INVAL)
1568
1569 return 0
1570
1571
1572 def _ShowWatcherPause(until):
1573 if until is None or until < time.time():
1574 ToStdout("The watcher is not paused.")
1575 else:
1576 ToStdout("The watcher is paused until %s.", time.ctime(until))
1577
1578
1579 def WatcherOps(opts, args):
1580 """Watcher operations.
1581
1582 @param opts: the command line options selected by the user
1583 @type args: list
1584 @param args: should contain only one element, the subcommand
1585 @rtype: int
1586 @return: the desired exit code
1587
1588 """
1589 command = args[0]
1590 client = GetClient()
1591
1592 if command == "continue":
1593 client.SetWatcherPause(None)
1594 ToStdout("The watcher is no longer paused.")
1595
1596 elif command == "pause":
1597 if len(args) < 2:
1598 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1599
1600 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
1601 _ShowWatcherPause(result)
1602
1603 elif command == "info":
1604 result = client.QueryConfigValues(["watcher_pause"])
1605 _ShowWatcherPause(result[0])
1606
1607 else:
1608 raise errors.OpPrereqError("Command '%s' is not valid." % command,
1609 errors.ECODE_INVAL)
1610
1611 return 0
1612
1613
1614 def _OobPower(opts, node_list, power):
1615 """Puts the node in the list to desired power state.
1616
1617 @param opts: The command line options selected by the user
1618 @param node_list: The list of nodes to operate on
1619 @param power: True if they should be powered on, False otherwise
1620 @return: The success of the operation (none failed)
1621
1622 """
1623 if power:
1624 command = constants.OOB_POWER_ON
1625 else:
1626 command = constants.OOB_POWER_OFF
1627
1628 op = opcodes.OpOobCommand(node_names=node_list,
1629 command=command,
1630 ignore_status=True,
1631 timeout=opts.oob_timeout,
1632 power_delay=opts.power_delay)
1633 result = SubmitOpCode(op, opts=opts)
1634 errs = 0
1635 for node_result in result:
1636 (node_tuple, data_tuple) = node_result
1637 (_, node_name) = node_tuple
1638 (data_status, _) = data_tuple
1639 if data_status != constants.RS_NORMAL:
1640 assert data_status != constants.RS_UNAVAIL
1641 errs += 1
1642 ToStderr("There was a problem changing power for %s, please investigate",
1643 node_name)
1644
1645 if errs > 0:
1646 return False
1647
1648 return True
1649
1650
1651 def _InstanceStart(opts, inst_list, start, no_remember=False):
1652 """Puts the instances in the list to desired state.
1653
1654 @param opts: The command line options selected by the user
1655 @param inst_list: The list of instances to operate on
1656 @param start: True if they should be started, False for shutdown
1657 @param no_remember: If the instance state should be remembered
1658 @return: The success of the operation (none failed)
1659
1660 """
1661 if start:
1662 opcls = opcodes.OpInstanceStartup
1663 text_submit, text_success, text_failed = ("startup", "started", "starting")
1664 else:
1665 opcls = compat.partial(opcodes.OpInstanceShutdown,
1666 timeout=opts.shutdown_timeout,
1667 no_remember=no_remember)
1668 text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping")
1669
1670 jex = JobExecutor(opts=opts)
1671
1672 for inst in inst_list:
1673 ToStdout("Submit %s of instance %s", text_submit, inst)
1674 op = opcls(instance_name=inst)
1675 jex.QueueJob(inst, op)
1676
1677 results = jex.GetResults()
1678 bad_cnt = len([1 for (success, _) in results if not success])
1679
1680 if bad_cnt == 0:
1681 ToStdout("All instances have been %s successfully", text_success)
1682 else:
1683 ToStderr("There were errors while %s instances:\n"
1684 "%d error(s) out of %d instance(s)", text_failed, bad_cnt,
1685 len(results))
1686 return False
1687
1688 return True
1689
1690
1691 class _RunWhenNodesReachableHelper(object):
1692 """Helper class to make shared internal state sharing easier.
1693
1694 @ivar success: Indicates if all action_cb calls were successful
1695
1696 """
1697 def __init__(self, node_list, action_cb, node2ip, port, feedback_fn,
1698 _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1699 """Init the object.
1700
1701 @param node_list: The list of nodes to be reachable
1702 @param action_cb: Callback called when a new host is reachable
1703 @type node2ip: dict
1704 @param node2ip: Node to ip mapping
1705 @param port: The port to use for the TCP ping
1706 @param feedback_fn: The function used for feedback
1707 @param _ping_fn: Function to check reachabilty (for unittest use only)
1708 @param _sleep_fn: Function to sleep (for unittest use only)
1709
1710 """
1711 self.down = set(node_list)
1712 self.up = set()
1713 self.node2ip = node2ip
1714 self.success = True
1715 self.action_cb = action_cb
1716 self.port = port
1717 self.feedback_fn = feedback_fn
1718 self._ping_fn = _ping_fn
1719 self._sleep_fn = _sleep_fn
1720
1721 def __call__(self):
1722 """When called we run action_cb.
1723
1724 @raises utils.RetryAgain: When there are still down nodes
1725
1726 """
1727 if not self.action_cb(self.up):
1728 self.success = False
1729
1730 if self.down:
1731 raise utils.RetryAgain()
1732 else:
1733 return self.success
1734
1735 def Wait(self, secs):
1736 """Checks if a host is up or waits remaining seconds.
1737
1738 @param secs: The secs remaining
1739
1740 """
1741 start = time.time()
1742 for node in self.down:
1743 if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT,
1744 live_port_needed=True):
1745 self.feedback_fn("Node %s became available" % node)
1746 self.up.add(node)
1747 self.down -= self.up
1748 # If we have a node available there is the possibility to run the
1749 # action callback successfully, therefore we don't wait and return
1750 return
1751
1752 self._sleep_fn(max(0.0, start + secs - time.time()))
1753
1754
1755 def _RunWhenNodesReachable(node_list, action_cb, interval):
1756 """Run action_cb when nodes become reachable.
1757
1758 @param node_list: The list of nodes to be reachable
1759 @param action_cb: Callback called when a new host is reachable
1760 @param interval: The earliest time to retry
1761
1762 """
1763 client = GetClient()
1764 cluster_info = client.QueryClusterInfo()
1765 if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
1766 family = netutils.IPAddress.family
1767 else:
1768 family = netutils.IP6Address.family
1769
1770 node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
1771 for node in node_list)
1772
1773 port = netutils.GetDaemonPort(constants.NODED)
1774 helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port,
1775 ToStdout)
1776
1777 try:
1778 return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
1779 wait_fn=helper.Wait)
1780 except utils.RetryTimeout:
1781 ToStderr("Time exceeded while waiting for nodes to become reachable"
1782 " again:\n - %s", " - ".join(helper.down))
1783 return False
1784
1785
1786 def _MaybeInstanceStartup(opts, inst_map, nodes_online,
1787 _instance_start_fn=_InstanceStart):
1788 """Start the instances conditional based on node_states.
1789
1790 @param opts: The command line options selected by the user
1791 @param inst_map: A dict of inst -> nodes mapping
1792 @param nodes_online: A list of nodes online
1793 @param _instance_start_fn: Callback to start instances (unittest use only)
1794 @return: Success of the operation on all instances
1795
1796 """
1797 start_inst_list = []
1798 for (inst, nodes) in inst_map.items():
1799 if not (nodes - nodes_online):
1800 # All nodes the instance lives on are back online
1801 start_inst_list.append(inst)
1802
1803 for inst in start_inst_list:
1804 del inst_map[inst]
1805
1806 if start_inst_list:
1807 return _instance_start_fn(opts, start_inst_list, True)
1808
1809 return True
1810
1811
1812 def _EpoOn(opts, full_node_list, node_list, inst_map):
1813 """Does the actual power on.
1814
1815 @param opts: The command line options selected by the user
1816 @param full_node_list: All nodes to operate on (includes nodes not supporting
1817 OOB)
1818 @param node_list: The list of nodes to operate on (all need to support OOB)
1819 @param inst_map: A dict of inst -> nodes mapping
1820 @return: The desired exit status
1821
1822 """
1823 if node_list and not _OobPower(opts, node_list, False):
1824 ToStderr("Not all nodes seem to get back up, investigate and start"
1825 " manually if needed")
1826
1827 # Wait for the nodes to be back up
1828 action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
1829
1830 ToStdout("Waiting until all nodes are available again")
1831 if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL):
1832 ToStderr("Please investigate and start stopped instances manually")
1833 return constants.EXIT_FAILURE
1834
1835 return constants.EXIT_SUCCESS
1836
1837
1838 def _EpoOff(opts, node_list, inst_map):
1839 """Does the actual power off.
1840
1841 @param opts: The command line options selected by the user
1842 @param node_list: The list of nodes to operate on (all need to support OOB)
1843 @param inst_map: A dict of inst -> nodes mapping
1844 @return: The desired exit status
1845
1846 """
1847 if not _InstanceStart(opts, inst_map.keys(), False, no_remember=True):
1848 ToStderr("Please investigate and stop instances manually before continuing")
1849 return constants.EXIT_FAILURE
1850
1851 if not node_list:
1852 return constants.EXIT_SUCCESS
1853
1854 if _OobPower(opts, node_list, False):
1855 return constants.EXIT_SUCCESS
1856 else:
1857 return constants.EXIT_FAILURE
1858
1859
1860 def Epo(opts, args, qcl=None, _on_fn=_EpoOn, _off_fn=_EpoOff,
1861 _confirm_fn=ConfirmOperation,
1862 _stdout_fn=ToStdout, _stderr_fn=ToStderr):
1863 """EPO operations.
1864
1865 @param opts: the command line options selected by the user
1866 @type args: list
1867 @param args: should contain only one element, the subcommand
1868 @rtype: int
1869 @return: the desired exit code
1870
1871 """
1872 if opts.groups and opts.show_all:
1873 _stderr_fn("Only one of --groups or --all are allowed")
1874 return constants.EXIT_FAILURE
1875 elif args and opts.show_all:
1876 _stderr_fn("Arguments in combination with --all are not allowed")
1877 return constants.EXIT_FAILURE
1878
1879 if qcl is None:
1880 # Query client
1881 qcl = GetClient()
1882
1883 if opts.groups:
1884 node_query_list = \
1885 itertools.chain(*qcl.QueryGroups(args, ["node_list"], False))
1886 else:
1887 node_query_list = args
1888
1889 result = qcl.QueryNodes(node_query_list, ["name", "master", "pinst_list",
1890 "sinst_list", "powered", "offline"],
1891 False)
1892
1893 all_nodes = map(compat.fst, result)
1894 node_list = []
1895 inst_map = {}
1896 for (node, master, pinsts, sinsts, powered, offline) in result:
1897 if not offline:
1898 for inst in (pinsts + sinsts):
1899 if inst in inst_map:
1900 if not master:
1901 inst_map[inst].add(node)
1902 elif master:
1903 inst_map[inst] = set()
1904 else:
1905 inst_map[inst] = set([node])
1906
1907 if master and opts.on:
1908 # We ignore the master for turning on the machines, in fact we are
1909 # already operating on the master at this point :)
1910 continue
1911 elif master and not opts.show_all:
1912 _stderr_fn("%s is the master node, please do a master-failover to another"
1913 " node not affected by the EPO or use --all if you intend to"
1914 " shutdown the whole cluster", node)
1915 return constants.EXIT_FAILURE
1916 elif powered is None:
1917 _stdout_fn("Node %s does not support out-of-band handling, it can not be"
1918 " handled in a fully automated manner", node)
1919 elif powered == opts.on:
1920 _stdout_fn("Node %s is already in desired power state, skipping", node)
1921 elif not offline or (offline and powered):
1922 node_list.append(node)
1923
1924 if not (opts.force or _confirm_fn(all_nodes, "nodes", "epo")):
1925 return constants.EXIT_FAILURE
1926
1927 if opts.on:
1928 return _on_fn(opts, all_nodes, node_list, inst_map)
1929 else:
1930 return _off_fn(opts, node_list, inst_map)
1931
1932
1933 def _GetCreateCommand(info):
1934 buf = StringIO()
1935 buf.write("gnt-cluster init")
1936 PrintIPolicyCommand(buf, info["ipolicy"], False)
1937 buf.write(" ")
1938 buf.write(info["name"])
1939 return buf.getvalue()
1940
1941
1942 def ShowCreateCommand(opts, args):
1943 """Shows the command that can be used to re-create the cluster.
1944
1945 Currently it works only for ipolicy specs.
1946
1947 """
1948 cl = GetClient()
1949 result = cl.QueryClusterInfo()
1950 ToStdout(_GetCreateCommand(result))
1951
1952
1953 def _RunCommandAndReport(cmd):
1954 """Run a command and report its output, iff it failed.
1955
1956 @param cmd: the command to execute
1957 @type cmd: list
1958 @rtype: bool
1959 @return: False, if the execution failed.
1960
1961 """
1962 result = utils.RunCmd(cmd)
1963 if result.failed:
1964 ToStderr("Command %s failed: %s; Output %s" %
1965 (cmd, result.fail_reason, result.output))
1966 return False
1967 return True
1968
1969
1970 def _VerifyCommand(cmd):
1971 """Verify that a given command succeeds on all online nodes.
1972
1973 As this function is intended to run during upgrades, it
1974 is implemented in such a way that it still works, if all Ganeti
1975 daemons are down.
1976
1977 @param cmd: the command to execute
1978 @type cmd: list
1979 @rtype: list
1980 @return: the list of node names that are online where
1981 the command failed.
1982
1983 """
1984 command = utils.text.ShellQuoteArgs([str(val) for val in cmd])
1985
1986 nodes = ssconf.SimpleStore().GetOnlineNodeList()
1987 master_node = ssconf.SimpleStore().GetMasterNode()
1988 cluster_name = ssconf.SimpleStore().GetClusterName()
1989
1990 # If master node is in 'nodes', make sure master node is at list end
1991 if master_node in nodes:
1992 nodes.remove(master_node)
1993 nodes.append(master_node)
1994
1995 failed = []
1996
1997 srun = ssh.SshRunner(cluster_name=cluster_name)
1998 for name in nodes:
1999 result = srun.Run(name, constants.SSH_LOGIN_USER, command)
2000 if result.exit_code != 0:
2001 failed.append(name)
2002
2003 return failed
2004
2005
2006 def _VerifyVersionInstalled(versionstring):
2007 """Verify that the given version of ganeti is installed on all online nodes.
2008
2009 Do nothing, if this is the case, otherwise print an appropriate
2010 message to stderr.
2011
2012 @param versionstring: the version to check for
2013 @type versionstring: string
2014 @rtype: bool
2015 @return: True, if the version is installed on all online nodes
2016
2017 """
2018 badnodes = _VerifyCommand(["test", "-d",
2019 os.path.join(pathutils.PKGLIBDIR, versionstring)])
2020 if badnodes:
2021 ToStderr("Ganeti version %s not installed on nodes %s"
2022 % (versionstring, ", ".join(badnodes)))
2023 return False
2024
2025 return True
2026
2027
2028 def _GetRunning():
2029 """Determine the list of running jobs.
2030
2031 @rtype: list
2032 @return: the number of jobs still running
2033
2034 """
2035 cl = GetClient()
2036 qfilter = qlang.MakeSimpleFilter("status",
2037 frozenset([constants.JOB_STATUS_RUNNING]))
2038 return len(cl.Query(constants.QR_JOB, [], qfilter).data)
2039
2040
2041 def _SetGanetiVersion(versionstring):
2042 """Set the active version of ganeti to the given versionstring
2043
2044 @type versionstring: string
2045 @rtype: list
2046 @return: the list of nodes where the version change failed
2047
2048 """
2049 failed = []
2050 if constants.HAS_GNU_LN:
2051 failed.extend(_VerifyCommand(
2052 ["ln", "-s", "-f", "-T",
2053 os.path.join(pathutils.PKGLIBDIR, versionstring),
2054 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2055 failed.extend(_VerifyCommand(
2056 ["ln", "-s", "-f", "-T",
2057 os.path.join(pathutils.SHAREDIR, versionstring),
2058 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2059 else:
2060 failed.extend(_VerifyCommand(
2061 ["rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2062 failed.extend(_VerifyCommand(
2063 ["ln", "-s", "-f", os.path.join(pathutils.PKGLIBDIR, versionstring),
2064 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2065 failed.extend(_VerifyCommand(
2066 ["rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2067 failed.extend(_VerifyCommand(
2068 ["ln", "-s", "-f", os.path.join(pathutils.SHAREDIR, versionstring),
2069 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2070 return list(set(failed))
2071
2072
2073 def _ExecuteCommands(fns):
2074 """Execute a list of functions, in reverse order.
2075
2076 @type fns: list of functions.
2077 @param fns: the functions to be executed.
2078
2079 """
2080 for fn in reversed(fns):
2081 fn()
2082
2083
2084 def _GetConfigVersion():
2085 """Determine the version the configuration file currently has.
2086
2087 @rtype: tuple or None
2088 @return: (major, minor, revision) if the version can be determined,
2089 None otherwise
2090
2091 """
2092 config_data = serializer.LoadJson(utils.ReadFile(pathutils.CLUSTER_CONF_FILE))
2093 try:
2094 config_version = config_data["version"]
2095 except KeyError:
2096 return None
2097 return utils.SplitVersion(config_version)
2098
2099
2100 def _ReadIntentToUpgrade():
2101 """Read the file documenting the intent to upgrade the cluster.
2102
2103 @rtype: (string, string) or (None, None)
2104 @return: (old version, version to upgrade to), if the file exists,
2105 and (None, None) otherwise.
2106
2107 """
2108 if not os.path.isfile(pathutils.INTENT_TO_UPGRADE):
2109 return (None, None)
2110
2111 contentstring = utils.ReadFile(pathutils.INTENT_TO_UPGRADE)
2112 contents = utils.UnescapeAndSplit(contentstring)
2113 if len(contents) != 3:
2114 # file syntactically mal-formed
2115 return (None, None)
2116 return (contents[0], contents[1])
2117
2118
2119 def _WriteIntentToUpgrade(version):
2120 """Write file documenting the intent to upgrade the cluster.
2121
2122 @type version: string
2123 @param version: the version we intent to upgrade to
2124
2125 """
2126 utils.WriteFile(pathutils.INTENT_TO_UPGRADE,
2127 data=utils.EscapeAndJoin([constants.RELEASE_VERSION, version,
2128 "%d" % os.getpid()]))
2129
2130
2131 def _UpgradeBeforeConfigurationChange(versionstring):
2132 """
2133 Carry out all the tasks necessary for an upgrade that happen before
2134 the configuration file, or Ganeti version, changes.
2135
2136 @type versionstring: string
2137 @param versionstring: the version to upgrade to
2138 @rtype: (bool, list)
2139 @return: tuple of a bool indicating success and a list of rollback tasks
2140
2141 """
2142 rollback = []
2143
2144 if not _VerifyVersionInstalled(versionstring):
2145 return (False, rollback)
2146
2147 _WriteIntentToUpgrade(versionstring)
2148 rollback.append(
2149 lambda: utils.RunCmd(["rm", "-f", pathutils.INTENT_TO_UPGRADE]))
2150
2151 ToStdout("Draining queue")
2152 client = GetClient()
2153 client.SetQueueDrainFlag(True)
2154
2155 rollback.append(lambda: GetClient().SetQueueDrainFlag(False))
2156
2157 if utils.SimpleRetry(0, _GetRunning,
2158 constants.UPGRADE_QUEUE_POLL_INTERVAL,
2159 constants.UPGRADE_QUEUE_DRAIN_TIMEOUT):
2160 ToStderr("Failed to completely empty the queue.")
2161 return (False, rollback)
2162
2163 ToStdout("Pausing the watcher for one hour.")
2164 rollback.append(lambda: GetClient().SetWatcherPause(None))
2165 GetClient().SetWatcherPause(time.time() + 60 * 60)
2166
2167 ToStdout("Stopping daemons on master node.")
2168 if not _RunCommandAndReport([pathutils.DAEMON_UTIL, "stop-all"]):
2169 return (False, rollback)
2170
2171 if not _VerifyVersionInstalled(versionstring):
2172 utils.RunCmd([pathutils.DAEMON_UTIL, "start-all"])
2173 return (False, rollback)
2174
2175 ToStdout("Stopping daemons everywhere.")
2176 rollback.append(lambda: _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]))
2177 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"])
2178 if badnodes:
2179 ToStderr("Failed to stop daemons on %s." % (", ".join(badnodes),))
2180 return (False, rollback)
2181
2182 backuptar = os.path.join(pathutils.BACKUP_DIR, "ganeti%d.tar" % time.time())
2183 ToStdout("Backing up configuration as %s" % backuptar)
2184 if not _RunCommandAndReport(["mkdir", "-p", pathutils.BACKUP_DIR]):
2185 return (False, rollback)
2186
2187 # Create the archive in a safe manner, as it contains sensitive
2188 # information.
2189 (_, tmp_name) = tempfile.mkstemp(prefix=backuptar, dir=pathutils.BACKUP_DIR)
2190 if not _RunCommandAndReport(["tar", "-cf", tmp_name,
2191 "--exclude=queue/archive",
2192 pathutils.DATA_DIR]):
2193 return (False, rollback)
2194
2195 os.rename(tmp_name, backuptar)
2196 return (True, rollback)
2197
2198
2199 def _VersionSpecificDowngrade():
2200 """
2201 Perform any additional downrade tasks that are version specific
2202 and need to be done just after the configuration downgrade. This
2203 function needs to be idempotent, so that it can be redone if the
2204 downgrade procedure gets interrupted after changing the
2205 configuration.
2206
2207 Note that this function has to be reset with every version bump.
2208
2209 @return: True upon success
2210 """
2211 ToStdout("Performing version-specific downgrade tasks.")
2212
2213 return True
2214
2215
2216 def _SwitchVersionAndConfig(versionstring, downgrade):
2217 """
2218 Switch to the new Ganeti version and change the configuration,
2219 in correct order.
2220
2221 @type versionstring: string
2222 @param versionstring: the version to change to
2223 @type downgrade: bool
2224 @param downgrade: True, if the configuration should be downgraded
2225 @rtype: (bool, list)
2226 @return: tupe of a bool indicating success, and a list of
2227 additional rollback tasks
2228
2229 """
2230 rollback = []
2231 if downgrade:
2232 ToStdout("Downgrading configuration")
2233 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "--downgrade", "-f"]):
2234 return (False, rollback)
2235 # Note: version specific downgrades need to be done before switching
2236 # binaries, so that we still have the knowledgeable binary if the downgrade
2237 # process gets interrupted at this point.
2238 if not _VersionSpecificDowngrade():
2239 return (False, rollback)
2240
2241 # Configuration change is the point of no return. From then onwards, it is
2242 # safer to push through the up/dowgrade than to try to roll it back.
2243
2244 ToStdout("Switching to version %s on all nodes" % versionstring)
2245 rollback.append(lambda: _SetGanetiVersion(constants.DIR_VERSION))
2246 badnodes = _SetGanetiVersion(versionstring)
2247 if badnodes:
2248 ToStderr("Failed to switch to Ganeti version %s on nodes %s"
2249 % (versionstring, ", ".join(badnodes)))
2250 if not downgrade:
2251 return (False, rollback)
2252
2253 # Now that we have changed to the new version of Ganeti we should
2254 # not communicate over luxi any more, as luxi might have changed in
2255 # incompatible ways. Therefore, manually call the corresponding ganeti
2256 # commands using their canonical (version independent) path.
2257
2258 if not downgrade:
2259 ToStdout("Upgrading configuration")
2260 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "-f"]):
2261 return (False, rollback)
2262
2263 return (True, rollback)
2264
2265
2266 def _UpgradeAfterConfigurationChange(oldversion):
2267 """
2268 Carry out the upgrade actions necessary after switching to the new
2269 Ganeti version and updating the configuration.
2270
2271 As this part is run at a time where the new version of Ganeti is already
2272 running, no communication should happen via luxi, as this is not a stable
2273 interface. Also, as the configuration change is the point of no return,
2274 all actions are pushed trough, even if some of them fail.
2275
2276 @param oldversion: the version the upgrade started from
2277 @type oldversion: string
2278 @rtype: int
2279 @return: the intended return value
2280
2281 """
2282 returnvalue = 0
2283
2284 ToStdout("Ensuring directories everywhere.")
2285 badnodes = _VerifyCommand([pathutils.ENSURE_DIRS])
2286 if badnodes:
2287 ToStderr("Warning: failed to ensure directories on %s." %
2288 (", ".join(badnodes)))
2289 returnvalue = 1
2290
2291 ToStdout("Starting daemons everywhere.")
2292 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])
2293 if badnodes:
2294 ToStderr("Warning: failed to start daemons on %s." % (", ".join(badnodes),))
2295 returnvalue = 1
2296
2297 ToStdout("Redistributing the configuration.")
2298 if not _RunCommandAndReport(["gnt-cluster", "redist-conf", "--yes-do-it"]):
2299 returnvalue = 1
2300
2301 ToStdout("Restarting daemons everywhere.")
2302 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"])
2303 badnodes.extend(_VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]))
2304 if badnodes:
2305 ToStderr("Warning: failed to start daemons on %s." %
2306 (", ".join(list(set(badnodes))),))
2307 returnvalue = 1
2308
2309 ToStdout("Undraining the queue.")
2310 if not _RunCommandAndReport(["gnt-cluster", "queue", "undrain"]):
2311 returnvalue = 1
2312
2313 _RunCommandAndReport(["rm", "-f", pathutils.INTENT_TO_UPGRADE])
2314
2315 ToStdout("Running post-upgrade hooks")
2316 if not _RunCommandAndReport([pathutils.POST_UPGRADE, oldversion]):
2317 returnvalue = 1
2318
2319 ToStdout("Unpausing the watcher.")
2320 if not _RunCommandAndReport(["gnt-cluster", "watcher", "continue"]):
2321 returnvalue = 1
2322
2323 ToStdout("Verifying cluster.")
2324 if not _RunCommandAndReport(["gnt-cluster", "verify"]):
2325 returnvalue = 1
2326
2327 return returnvalue
2328
2329
2330 def UpgradeGanetiCommand(opts, args):
2331 """Upgrade a cluster to a new ganeti version.
2332
2333 @param opts: the command line options selected by the user
2334 @type args: list
2335 @param args: should be an empty list
2336 @rtype: int
2337 @return: the desired exit code
2338
2339 """
2340 if ((not opts.resume and opts.to is None)
2341 or (opts.resume and opts.to is not None)):
2342 ToStderr("Precisely one of the options --to and --resume"
2343 " has to be given")
2344 return 1
2345
2346 # If we're not told to resume, verify there is no upgrade
2347 # in progress.
2348 if not opts.resume:
2349 oldversion, versionstring = _ReadIntentToUpgrade()
2350 if versionstring is not None:
2351 # An upgrade is going on; verify whether the target matches
2352 if versionstring == opts.to:
2353 ToStderr("An upgrade is already in progress. Target version matches,"
2354 " resuming.")
2355 opts.resume = True
2356 opts.to = None
2357 else:
2358 ToStderr("An upgrade from %s to %s is in progress; use --resume to"
2359 " finish it first" % (oldversion, versionstring))
2360 return 1
2361
2362 oldversion = constants.RELEASE_VERSION
2363
2364 if opts.resume:
2365 ssconf.CheckMaster(False)
2366 oldversion, versionstring = _ReadIntentToUpgrade()
2367 if versionstring is None:
2368 return 0
2369 version = utils.version.ParseVersion(versionstring)
2370 if version is None:
2371 return 1
2372 configversion = _GetConfigVersion()
2373 if configversion is None:
2374 return 1
2375 # If the upgrade we resume was an upgrade between compatible
2376 # versions (like 2.10.0 to 2.10.1), the correct configversion
2377 # does not guarantee that the config has been updated.
2378 # However, in the case of a compatible update with the configuration
2379 # not touched, we are running a different dirversion with the same
2380 # config version.
2381 config_already_modified = \
2382 (utils.IsCorrectConfigVersion(version, configversion) and
2383 not (versionstring != constants.DIR_VERSION and
2384 configversion == (constants.CONFIG_MAJOR, constants.CONFIG_MINOR,
2385 constants.CONFIG_REVISION)))
2386 if not config_already_modified:
2387 # We have to start from the beginning; however, some daemons might have
2388 # already been stopped, so the only way to get into a well-defined state
2389 # is by starting all daemons again.
2390 _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])
2391 else:
2392 versionstring = opts.to
2393 config_already_modified = False
2394 version = utils.version.ParseVersion(versionstring)
2395 if version is None:
2396 ToStderr("Could not parse version string %s" % versionstring)
2397 return 1
2398
2399 msg = utils.version.UpgradeRange(version)
2400 if msg is not None:
2401 ToStderr("Cannot upgrade to %s: %s" % (versionstring, msg))
2402 return 1
2403
2404 if not config_already_modified:
2405 success, rollback = _UpgradeBeforeConfigurationChange(versionstring)
2406 if not success:
2407 _ExecuteCommands(rollback)
2408 return 1
2409 else:
2410 rollback = []
2411
2412 downgrade = utils.version.ShouldCfgdowngrade(version)
2413
2414 success, additionalrollback = \
2415 _SwitchVersionAndConfig(versionstring, downgrade)
2416 if not success:
2417 rollback.extend(additionalrollback)
2418 _ExecuteCommands(rollback)
2419 return 1
2420
2421 return _UpgradeAfterConfigurationChange(oldversion)
2422
2423
2424 commands = {
2425 "init": (
2426 InitCluster, [ArgHost(min=1, max=1)],
2427 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
2428 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT,
2429 NIC_PARAMS_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT,
2430 SECONDARY_IP_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT,
2431 DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT, DEFAULT_IALLOCATOR_PARAMS_OPT,
2432 PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT,
2433 GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT,
2434 HV_STATE_OPT, DISK_STATE_OPT, ENABLED_DISK_TEMPLATES_OPT,
2435 IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, INSTALL_IMAGE_OPT,
2436 ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT,
2437 ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT,
2438 ]
2439 + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS,
2440 "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
2441 "destroy": (
2442 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
2443 "", "Destroy cluster"),
2444 "rename": (
2445 RenameCluster, [ArgHost(min=1, max=1)],
2446 [FORCE_OPT, DRY_RUN_OPT],
2447 "<new_name>",
2448 "Renames the cluster"),
2449 "redist-conf": (
2450 RedistributeConfig, ARGS_NONE, SUBMIT_OPTS +
2451 [DRY_RUN_OPT, PRIORITY_OPT, FORCE_DISTRIBUTION],
2452 "", "Forces a push of the configuration file and ssconf files"
2453 " to the nodes in the cluster"),
2454 "verify": (
2455 VerifyCluster, ARGS_NONE,
2456 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
2457 DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT,
2458 VERIFY_CLUTTER_OPT],
2459 "", "Does a check on the cluster configuration"),
2460 "verify-disks": (
2461 VerifyDisks, ARGS_NONE, [PRIORITY_OPT, NODEGROUP_OPT],
2462 "", "Does a check on the cluster disk status"),
2463 "repair-disk-sizes": (
2464 RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT],
2465 "[instance...]", "Updates mismatches in recorded disk sizes"),
2466 "master-failover": (
2467 MasterFailover, ARGS_NONE, [NOVOTING_OPT, FORCE_FAILOVER],
2468 "", "Makes the current node the master"),
2469 "master-ping": (
2470 MasterPing, ARGS_NONE, [],
2471 "", "Checks if the master is alive"),
2472 "version": (
2473 ShowClusterVersion, ARGS_NONE, [],
2474 "", "Shows the cluster version"),
2475 "getmaster": (
2476 ShowClusterMaster, ARGS_NONE, [],
2477 "", "Shows the cluster master"),
2478 "copyfile": (
2479 ClusterCopyFile, [ArgFile(min=1, max=1)],
2480 [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT],
2481 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
2482 "command": (
2483 RunClusterCommand, [ArgCommand(min=1)],
2484 [NODE_LIST_OPT, NODEGROUP_OPT, SHOW_MACHINE_OPT, FAILURE_ONLY_OPT],
2485 "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
2486 "info": (
2487 ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
2488 "[--roman]", "Show cluster configuration"),
2489 "list-tags": (
2490 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
2491 "add-tags": (
2492 AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
2493 "tag...", "Add tags to the cluster"),
2494 "remove-tags": (
2495 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
2496 "tag...", "Remove tags from the cluster"),
2497 "search-tags": (
2498 SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "",
2499 "Searches the tags on all objects on"
2500 " the cluster for a given pattern (regex)"),
2501 "queue": (
2502 QueueOps,
2503 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
2504 [], "drain|undrain|info", "Change queue properties"),
2505 "watcher": (
2506 WatcherOps,
2507 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
2508 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
2509 [],
2510 "{pause <timespec>|continue|info}", "Change watcher properties"),
2511 "modify": (
2512 SetClusterParams, ARGS_NONE,
2513 [FORCE_OPT,
2514 BACKEND_OPT, CP_SIZE_OPT, RQL_OPT, MAX_TRACK_OPT, INSTALL_IMAGE_OPT,
2515 INSTANCE_COMMUNICATION_NETWORK_OPT, ENABLED_HV_OPT, HVLIST_OPT,
2516 MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, NIC_PARAMS_OPT,
2517 VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT,
2518 REMOVE_UIDS_OPT, DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT,
2519 DEFAULT_IALLOCATOR_PARAMS_OPT, RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT,
2520 PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT,
2521 DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS +
2522 [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT,
2523 ENABLED_USER_SHUTDOWN_OPT] +
2524 INSTANCE_POLICY_OPTS +
2525 [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
2526 COMPRESSION_TOOLS_OPT] +
2527 [ENABLED_DATA_COLLECTORS_OPT, DATA_COLLECTOR_INTERVAL_OPT],
2528 "[opts...]",
2529 "Alters the parameters of the cluster"),
2530 "renew-crypto": (
2531 RenewCrypto, ARGS_NONE,
2532 [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
2533 NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
2534 NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
2535 NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT,
2536 NEW_NODE_CERT_OPT, NEW_SSH_KEY_OPT, NOSSH_KEYCHECK_OPT,
2537 VERBOSE_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT],
2538 "[opts...]",
2539 "Renews cluster certificates, keys and secrets"),
2540 "epo": (
2541 Epo, [ArgUnknown()],
2542 [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT,
2543 SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT],
2544 "[opts...] [args]",
2545 "Performs an emergency power-off on given args"),
2546 "activate-master-ip": (
2547 ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"),
2548 "deactivate-master-ip": (
2549 DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "",
2550 "Deactivates the master IP"),
2551 "show-ispecs-cmd": (
2552 ShowCreateCommand, ARGS_NONE, [], "",
2553 "Show the command line to re-create the cluster"),
2554 "upgrade": (
2555 UpgradeGanetiCommand, ARGS_NONE, [TO_OPT, RESUME_OPT], "",
2556 "Upgrade (or downgrade) to a new Ganeti version"),
2557 }
2558
2559
2560 #: dictionary with aliases for commands
2561 aliases = {
2562 "masterfailover": "master-failover",
2563 "show": "info",
2564 }
2565
2566
2567 def Main():
2568 return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER},
2569 aliases=aliases)