Merge branch 'stable-2.12' into stable-2.13
[ganeti-github.git] / lib / client / gnt_cluster.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """Cluster related commands"""
31
32 # pylint: disable=W0401,W0613,W0614,C0103
33 # W0401: Wildcard import ganeti.cli
34 # W0613: Unused argument, since all functions follow the same API
35 # W0614: Unused import %s from wildcard import (since we need cli)
36 # C0103: Invalid name gnt-cluster
37
38 from cStringIO import StringIO
39 import os
40 import time
41 import OpenSSL
42 import tempfile
43 import itertools
44
45 from ganeti.cli import *
46 from ganeti import bootstrap
47 from ganeti import compat
48 from ganeti import constants
49 from ganeti import config
50 from ganeti import errors
51 from ganeti import netutils
52 from ganeti import objects
53 from ganeti import opcodes
54 from ganeti import pathutils
55 from ganeti import qlang
56 from ganeti import serializer
57 from ganeti import ssconf
58 from ganeti import ssh
59 from ganeti import uidpool
60 from ganeti import utils
61 from ganeti.client import base
62
63
64 ON_OPT = cli_option("--on", default=False,
65 action="store_true", dest="on",
66 help="Recover from an EPO")
67
68 GROUPS_OPT = cli_option("--groups", default=False,
69 action="store_true", dest="groups",
70 help="Arguments are node groups instead of nodes")
71
72 FORCE_FAILOVER = cli_option("--yes-do-it", dest="yes_do_it",
73 help="Override interactive check for --no-voting",
74 default=False, action="store_true")
75
76 FORCE_DISTRIBUTION = cli_option("--yes-do-it", dest="yes_do_it",
77 help="Unconditionally distribute the"
78 " configuration, even if the queue"
79 " is drained",
80 default=False, action="store_true")
81
82 TO_OPT = cli_option("--to", default=None, type="string",
83 help="The Ganeti version to upgrade to")
84
85 RESUME_OPT = cli_option("--resume", default=False, action="store_true",
86 help="Resume any pending Ganeti upgrades")
87
88 DATA_COLLECTOR_INTERVAL_OPT = cli_option(
89 "--data-collector-interval", default={}, type="keyval",
90 help="Set collection intervals in seconds of data collectors.")
91
92 _EPO_PING_INTERVAL = 30 # 30 seconds between pings
93 _EPO_PING_TIMEOUT = 1 # 1 second
94 _EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
95
96
97 def _InitEnabledDiskTemplates(opts):
98 """Initialize the list of enabled disk templates.
99
100 """
101 if opts.enabled_disk_templates:
102 return opts.enabled_disk_templates.split(",")
103 else:
104 return constants.DEFAULT_ENABLED_DISK_TEMPLATES
105
106
107 def _InitVgName(opts, enabled_disk_templates):
108 """Initialize the volume group name.
109
110 @type enabled_disk_templates: list of strings
111 @param enabled_disk_templates: cluster-wide enabled disk templates
112
113 """
114 vg_name = None
115 if opts.vg_name is not None:
116 vg_name = opts.vg_name
117 if vg_name:
118 if not utils.IsLvmEnabled(enabled_disk_templates):
119 ToStdout("You specified a volume group with --vg-name, but you did not"
120 " enable any disk template that uses lvm.")
121 elif utils.IsLvmEnabled(enabled_disk_templates):
122 raise errors.OpPrereqError(
123 "LVM disk templates are enabled, but vg name not set.")
124 elif utils.IsLvmEnabled(enabled_disk_templates):
125 vg_name = constants.DEFAULT_VG
126 return vg_name
127
128
129 def _InitDrbdHelper(opts, enabled_disk_templates):
130 """Initialize the DRBD usermode helper.
131
132 """
133 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
134
135 if not drbd_enabled and opts.drbd_helper is not None:
136 ToStdout("Note: You specified a DRBD usermode helper, while DRBD storage"
137 " is not enabled.")
138
139 if drbd_enabled:
140 if opts.drbd_helper is None:
141 return constants.DEFAULT_DRBD_HELPER
142 if opts.drbd_helper == '':
143 raise errors.OpPrereqError(
144 "Unsetting the drbd usermode helper while enabling DRBD is not"
145 " allowed.")
146
147 return opts.drbd_helper
148
149
150 @UsesRPC
151 def InitCluster(opts, args):
152 """Initialize the cluster.
153
154 @param opts: the command line options selected by the user
155 @type args: list
156 @param args: should contain only one element, the desired
157 cluster name
158 @rtype: int
159 @return: the desired exit code
160
161 """
162 enabled_disk_templates = _InitEnabledDiskTemplates(opts)
163
164 try:
165 vg_name = _InitVgName(opts, enabled_disk_templates)
166 drbd_helper = _InitDrbdHelper(opts, enabled_disk_templates)
167 except errors.OpPrereqError, e:
168 ToStderr(str(e))
169 return 1
170
171 master_netdev = opts.master_netdev
172 if master_netdev is None:
173 nic_mode = opts.nicparams.get(constants.NIC_MODE, None)
174 if not nic_mode:
175 # default case, use bridging
176 master_netdev = constants.DEFAULT_BRIDGE
177 elif nic_mode == constants.NIC_MODE_OVS:
178 # default ovs is different from default bridge
179 master_netdev = constants.DEFAULT_OVS
180 opts.nicparams[constants.NIC_LINK] = constants.DEFAULT_OVS
181
182 hvlist = opts.enabled_hypervisors
183 if hvlist is None:
184 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
185 hvlist = hvlist.split(",")
186
187 hvparams = dict(opts.hvparams)
188 beparams = opts.beparams
189 nicparams = opts.nicparams
190
191 diskparams = dict(opts.diskparams)
192
193 # check the disk template types here, as we cannot rely on the type check done
194 # by the opcode parameter types
195 diskparams_keys = set(diskparams.keys())
196 if not (diskparams_keys <= constants.DISK_TEMPLATES):
197 unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES)
198 ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown))
199 return 1
200
201 # prepare beparams dict
202 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
203 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
204
205 # prepare nicparams dict
206 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
207 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
208
209 # prepare ndparams dict
210 if opts.ndparams is None:
211 ndparams = dict(constants.NDC_DEFAULTS)
212 else:
213 ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
214 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
215
216 # prepare hvparams dict
217 for hv in constants.HYPER_TYPES:
218 if hv not in hvparams:
219 hvparams[hv] = {}
220 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
221 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
222
223 # prepare diskparams dict
224 for templ in constants.DISK_TEMPLATES:
225 if templ not in diskparams:
226 diskparams[templ] = {}
227 diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ],
228 diskparams[templ])
229 utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES)
230
231 # prepare ipolicy dict
232 ipolicy = CreateIPolicyFromOpts(
233 ispecs_mem_size=opts.ispecs_mem_size,
234 ispecs_cpu_count=opts.ispecs_cpu_count,
235 ispecs_disk_count=opts.ispecs_disk_count,
236 ispecs_disk_size=opts.ispecs_disk_size,
237 ispecs_nic_count=opts.ispecs_nic_count,
238 minmax_ispecs=opts.ipolicy_bounds_specs,
239 std_ispecs=opts.ipolicy_std_specs,
240 ipolicy_disk_templates=opts.ipolicy_disk_templates,
241 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
242 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
243 fill_all=True)
244
245 if opts.candidate_pool_size is None:
246 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
247
248 if opts.mac_prefix is None:
249 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
250
251 uid_pool = opts.uid_pool
252 if uid_pool is not None:
253 uid_pool = uidpool.ParseUidPool(uid_pool)
254
255 if opts.prealloc_wipe_disks is None:
256 opts.prealloc_wipe_disks = False
257
258 external_ip_setup_script = opts.use_external_mip_script
259 if external_ip_setup_script is None:
260 external_ip_setup_script = False
261
262 try:
263 primary_ip_version = int(opts.primary_ip_version)
264 except (ValueError, TypeError), err:
265 ToStderr("Invalid primary ip version value: %s" % str(err))
266 return 1
267
268 master_netmask = opts.master_netmask
269 try:
270 if master_netmask is not None:
271 master_netmask = int(master_netmask)
272 except (ValueError, TypeError), err:
273 ToStderr("Invalid master netmask value: %s" % str(err))
274 return 1
275
276 if opts.disk_state:
277 disk_state = utils.FlatToDict(opts.disk_state)
278 else:
279 disk_state = {}
280
281 hv_state = dict(opts.hv_state)
282
283 if opts.install_image:
284 install_image = opts.install_image
285 else:
286 install_image = ""
287
288 if opts.zeroing_image:
289 zeroing_image = opts.zeroing_image
290 else:
291 zeroing_image = ""
292
293 compression_tools = _GetCompressionTools(opts)
294
295 default_ialloc_params = opts.default_iallocator_params
296
297 if opts.enabled_user_shutdown:
298 enabled_user_shutdown = True
299 else:
300 enabled_user_shutdown = False
301
302 bootstrap.InitCluster(cluster_name=args[0],
303 secondary_ip=opts.secondary_ip,
304 vg_name=vg_name,
305 mac_prefix=opts.mac_prefix,
306 master_netmask=master_netmask,
307 master_netdev=master_netdev,
308 file_storage_dir=opts.file_storage_dir,
309 shared_file_storage_dir=opts.shared_file_storage_dir,
310 gluster_storage_dir=opts.gluster_storage_dir,
311 enabled_hypervisors=hvlist,
312 hvparams=hvparams,
313 beparams=beparams,
314 nicparams=nicparams,
315 ndparams=ndparams,
316 diskparams=diskparams,
317 ipolicy=ipolicy,
318 candidate_pool_size=opts.candidate_pool_size,
319 modify_etc_hosts=opts.modify_etc_hosts,
320 modify_ssh_setup=opts.modify_ssh_setup,
321 maintain_node_health=opts.maintain_node_health,
322 drbd_helper=drbd_helper,
323 uid_pool=uid_pool,
324 default_iallocator=opts.default_iallocator,
325 default_iallocator_params=default_ialloc_params,
326 primary_ip_version=primary_ip_version,
327 prealloc_wipe_disks=opts.prealloc_wipe_disks,
328 use_external_mip_script=external_ip_setup_script,
329 hv_state=hv_state,
330 disk_state=disk_state,
331 enabled_disk_templates=enabled_disk_templates,
332 install_image=install_image,
333 zeroing_image=zeroing_image,
334 compression_tools=compression_tools,
335 enabled_user_shutdown=enabled_user_shutdown,
336 )
337 op = opcodes.OpClusterPostInit()
338 SubmitOpCode(op, opts=opts)
339 return 0
340
341
342 @UsesRPC
343 def DestroyCluster(opts, args):
344 """Destroy the cluster.
345
346 @param opts: the command line options selected by the user
347 @type args: list
348 @param args: should be an empty list
349 @rtype: int
350 @return: the desired exit code
351
352 """
353 if not opts.yes_do_it:
354 ToStderr("Destroying a cluster is irreversible. If you really want"
355 " destroy this cluster, supply the --yes-do-it option.")
356 return 1
357
358 op = opcodes.OpClusterDestroy()
359 master_uuid = SubmitOpCode(op, opts=opts)
360 # if we reached this, the opcode didn't fail; we can proceed to
361 # shutdown all the daemons
362 bootstrap.FinalizeClusterDestroy(master_uuid)
363 return 0
364
365
366 def RenameCluster(opts, args):
367 """Rename the cluster.
368
369 @param opts: the command line options selected by the user
370 @type args: list
371 @param args: should contain only one element, the new cluster name
372 @rtype: int
373 @return: the desired exit code
374
375 """
376 cl = GetClient()
377
378 (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])
379
380 new_name = args[0]
381 if not opts.force:
382 usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
383 " connected over the network to the cluster name, the"
384 " operation is very dangerous as the IP address will be"
385 " removed from the node and the change may not go through."
386 " Continue?") % (cluster_name, new_name)
387 if not AskUser(usertext):
388 return 1
389
390 op = opcodes.OpClusterRename(name=new_name)
391 result = SubmitOpCode(op, opts=opts, cl=cl)
392
393 if result:
394 ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
395
396 return 0
397
398
399 def ActivateMasterIp(opts, args):
400 """Activates the master IP.
401
402 """
403 op = opcodes.OpClusterActivateMasterIp()
404 SubmitOpCode(op)
405 return 0
406
407
408 def DeactivateMasterIp(opts, args):
409 """Deactivates the master IP.
410
411 """
412 if not opts.confirm:
413 usertext = ("This will disable the master IP. All the open connections to"
414 " the master IP will be closed. To reach the master you will"
415 " need to use its node IP."
416 " Continue?")
417 if not AskUser(usertext):
418 return 1
419
420 op = opcodes.OpClusterDeactivateMasterIp()
421 SubmitOpCode(op)
422 return 0
423
424
425 def RedistributeConfig(opts, args):
426 """Forces push of the cluster configuration.
427
428 @param opts: the command line options selected by the user
429 @type args: list
430 @param args: empty list
431 @rtype: int
432 @return: the desired exit code
433
434 """
435 op = opcodes.OpClusterRedistConf()
436 if opts.yes_do_it:
437 SubmitOpCodeToDrainedQueue(op)
438 else:
439 SubmitOrSend(op, opts)
440 return 0
441
442
443 def ShowClusterVersion(opts, args):
444 """Write version of ganeti software to the standard output.
445
446 @param opts: the command line options selected by the user
447 @type args: list
448 @param args: should be an empty list
449 @rtype: int
450 @return: the desired exit code
451
452 """
453 cl = GetClient()
454 result = cl.QueryClusterInfo()
455 ToStdout("Software version: %s", result["software_version"])
456 ToStdout("Internode protocol: %s", result["protocol_version"])
457 ToStdout("Configuration format: %s", result["config_version"])
458 ToStdout("OS api version: %s", result["os_api_version"])
459 ToStdout("Export interface: %s", result["export_version"])
460 ToStdout("VCS version: %s", result["vcs_version"])
461 return 0
462
463
464 def ShowClusterMaster(opts, args):
465 """Write name of master node to the standard output.
466
467 @param opts: the command line options selected by the user
468 @type args: list
469 @param args: should be an empty list
470 @rtype: int
471 @return: the desired exit code
472
473 """
474 master = bootstrap.GetMaster()
475 ToStdout(master)
476 return 0
477
478
479 def _FormatGroupedParams(paramsdict, roman=False):
480 """Format Grouped parameters (be, nic, disk) by group.
481
482 @type paramsdict: dict of dicts
483 @param paramsdict: {group: {param: value, ...}, ...}
484 @rtype: dict of dicts
485 @return: copy of the input dictionaries with strings as values
486
487 """
488 ret = {}
489 for (item, val) in paramsdict.items():
490 if isinstance(val, dict):
491 ret[item] = _FormatGroupedParams(val, roman=roman)
492 elif roman and isinstance(val, int):
493 ret[item] = compat.TryToRoman(val)
494 else:
495 ret[item] = str(val)
496 return ret
497
498
499 def _FormatDataCollectors(paramsdict):
500 """Format Grouped parameters (be, nic, disk) by group.
501
502 @type paramsdict: dict of dicts
503 @param paramsdict: response of QueryClusterInfo
504 @rtype: dict of dicts
505 @return: parameter grouped by data collector
506
507 """
508
509 enabled = paramsdict[constants.DATA_COLLECTORS_ENABLED_NAME]
510 interval = paramsdict[constants.DATA_COLLECTORS_INTERVAL_NAME]
511
512 ret = {}
513 for key in enabled:
514 ret[key] = dict(active=enabled[key],
515 interval="%.3fs" % (interval[key] / 1e6))
516 return ret
517
518
519 def ShowClusterConfig(opts, args):
520 """Shows cluster information.
521
522 @param opts: the command line options selected by the user
523 @type args: list
524 @param args: should be an empty list
525 @rtype: int
526 @return: the desired exit code
527
528 """
529 cl = GetClient()
530 result = cl.QueryClusterInfo()
531
532 if result["tags"]:
533 tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
534 else:
535 tags = "(none)"
536 if result["reserved_lvs"]:
537 reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
538 else:
539 reserved_lvs = "(none)"
540
541 enabled_hv = result["enabled_hypervisors"]
542 hvparams = dict((k, v) for k, v in result["hvparams"].iteritems()
543 if k in enabled_hv)
544
545 info = [
546 ("Cluster name", result["name"]),
547 ("Cluster UUID", result["uuid"]),
548
549 ("Creation time", utils.FormatTime(result["ctime"])),
550 ("Modification time", utils.FormatTime(result["mtime"])),
551
552 ("Master node", result["master"]),
553
554 ("Architecture (this node)",
555 "%s (%s)" % (result["architecture"][0], result["architecture"][1])),
556
557 ("Tags", tags),
558
559 ("Default hypervisor", result["default_hypervisor"]),
560 ("Enabled hypervisors", utils.CommaJoin(enabled_hv)),
561
562 ("Hypervisor parameters", _FormatGroupedParams(hvparams,
563 opts.roman_integers)),
564
565 ("OS-specific hypervisor parameters",
566 _FormatGroupedParams(result["os_hvp"], opts.roman_integers)),
567
568 ("OS parameters", _FormatGroupedParams(result["osparams"],
569 opts.roman_integers)),
570
571 ("Hidden OSes", utils.CommaJoin(result["hidden_os"])),
572 ("Blacklisted OSes", utils.CommaJoin(result["blacklisted_os"])),
573
574 ("Cluster parameters", [
575 ("candidate pool size",
576 compat.TryToRoman(result["candidate_pool_size"],
577 convert=opts.roman_integers)),
578 ("maximal number of jobs running simultaneously",
579 compat.TryToRoman(result["max_running_jobs"],
580 convert=opts.roman_integers)),
581 ("maximal number of jobs simultaneously tracked by the scheduler",
582 compat.TryToRoman(result["max_tracked_jobs"],
583 convert=opts.roman_integers)),
584 ("mac prefix", result["mac_prefix"]),
585 ("master netdev", result["master_netdev"]),
586 ("master netmask", compat.TryToRoman(result["master_netmask"],
587 opts.roman_integers)),
588 ("use external master IP address setup script",
589 result["use_external_mip_script"]),
590 ("lvm volume group", result["volume_group_name"]),
591 ("lvm reserved volumes", reserved_lvs),
592 ("drbd usermode helper", result["drbd_usermode_helper"]),
593 ("file storage path", result["file_storage_dir"]),
594 ("shared file storage path", result["shared_file_storage_dir"]),
595 ("gluster storage path", result["gluster_storage_dir"]),
596 ("maintenance of node health", result["maintain_node_health"]),
597 ("uid pool", uidpool.FormatUidPool(result["uid_pool"])),
598 ("default instance allocator", result["default_iallocator"]),
599 ("default instance allocator parameters",
600 result["default_iallocator_params"]),
601 ("primary ip version", compat.TryToRoman(result["primary_ip_version"],
602 opts.roman_integers)),
603 ("preallocation wipe disks", result["prealloc_wipe_disks"]),
604 ("OS search path", utils.CommaJoin(pathutils.OS_SEARCH_PATH)),
605 ("ExtStorage Providers search path",
606 utils.CommaJoin(pathutils.ES_SEARCH_PATH)),
607 ("enabled disk templates",
608 utils.CommaJoin(result["enabled_disk_templates"])),
609 ("install image", result["install_image"]),
610 ("instance communication network",
611 result["instance_communication_network"]),
612 ("zeroing image", result["zeroing_image"]),
613 ("compression tools", result["compression_tools"]),
614 ("enabled user shutdown", result["enabled_user_shutdown"]),
615 ]),
616
617 ("Default node parameters",
618 _FormatGroupedParams(result["ndparams"], roman=opts.roman_integers)),
619
620 ("Default instance parameters",
621 _FormatGroupedParams(result["beparams"], roman=opts.roman_integers)),
622
623 ("Default nic parameters",
624 _FormatGroupedParams(result["nicparams"], roman=opts.roman_integers)),
625
626 ("Default disk parameters",
627 _FormatGroupedParams(result["diskparams"], roman=opts.roman_integers)),
628
629 ("Instance policy - limits for instances",
630 FormatPolicyInfo(result["ipolicy"], None, True, opts.roman_integers)),
631 ("Data collectors", _FormatDataCollectors(result)),
632 ]
633
634 PrintGenericInfo(info)
635 return 0
636
637
638 def ClusterCopyFile(opts, args):
639 """Copy a file from master to some nodes.
640
641 @param opts: the command line options selected by the user
642 @type args: list
643 @param args: should contain only one element, the path of
644 the file to be copied
645 @rtype: int
646 @return: the desired exit code
647
648 """
649 filename = args[0]
650 filename = os.path.abspath(filename)
651
652 if not os.path.exists(filename):
653 raise errors.OpPrereqError("No such filename '%s'" % filename,
654 errors.ECODE_INVAL)
655
656 cl = GetClient()
657 qcl = GetClient()
658 try:
659 cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
660
661 results = GetOnlineNodes(nodes=opts.nodes, cl=qcl, filter_master=True,
662 secondary_ips=opts.use_replication_network,
663 nodegroup=opts.nodegroup)
664 ports = GetNodesSshPorts(opts.nodes, qcl)
665 finally:
666 cl.Close()
667 qcl.Close()
668
669 srun = ssh.SshRunner(cluster_name)
670 for (node, port) in zip(results, ports):
671 if not srun.CopyFileToNode(node, port, filename):
672 ToStderr("Copy of file %s to node %s:%d failed", filename, node, port)
673
674 return 0
675
676
677 def RunClusterCommand(opts, args):
678 """Run a command on some nodes.
679
680 @param opts: the command line options selected by the user
681 @type args: list
682 @param args: should contain the command to be run and its arguments
683 @rtype: int
684 @return: the desired exit code
685
686 """
687 cl = GetClient()
688 qcl = GetClient()
689
690 command = " ".join(args)
691
692 nodes = GetOnlineNodes(nodes=opts.nodes, cl=qcl, nodegroup=opts.nodegroup)
693 ports = GetNodesSshPorts(nodes, qcl)
694
695 cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
696 "master_node"])
697
698 srun = ssh.SshRunner(cluster_name=cluster_name)
699
700 # Make sure master node is at list end
701 if master_node in nodes:
702 nodes.remove(master_node)
703 nodes.append(master_node)
704
705 for (name, port) in zip(nodes, ports):
706 result = srun.Run(name, constants.SSH_LOGIN_USER, command, port=port)
707
708 if opts.failure_only and result.exit_code == constants.EXIT_SUCCESS:
709 # Do not output anything for successful commands
710 continue
711
712 ToStdout("------------------------------------------------")
713 if opts.show_machine_names:
714 for line in result.output.splitlines():
715 ToStdout("%s: %s", name, line)
716 else:
717 ToStdout("node: %s", name)
718 ToStdout("%s", result.output)
719 ToStdout("return code = %s", result.exit_code)
720
721 return 0
722
723
724 def VerifyCluster(opts, args):
725 """Verify integrity of cluster, performing various test on nodes.
726
727 @param opts: the command line options selected by the user
728 @type args: list
729 @param args: should be an empty list
730 @rtype: int
731 @return: the desired exit code
732
733 """
734 skip_checks = []
735
736 if opts.skip_nplusone_mem:
737 skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
738
739 cl = GetClient()
740
741 op = opcodes.OpClusterVerify(verbose=opts.verbose,
742 error_codes=opts.error_codes,
743 debug_simulate_errors=opts.simulate_errors,
744 skip_checks=skip_checks,
745 ignore_errors=opts.ignore_errors,
746 group_name=opts.nodegroup,
747 verify_clutter=opts.verify_clutter)
748 result = SubmitOpCode(op, cl=cl, opts=opts)
749
750 # Keep track of submitted jobs
751 jex = JobExecutor(cl=cl, opts=opts)
752
753 for (status, job_id) in result[constants.JOB_IDS_KEY]:
754 jex.AddJobId(None, status, job_id)
755
756 results = jex.GetResults()
757
758 (bad_jobs, bad_results) = \
759 map(len,
760 # Convert iterators to lists
761 map(list,
762 # Count errors
763 map(compat.partial(itertools.ifilterfalse, bool),
764 # Convert result to booleans in a tuple
765 zip(*((job_success, len(op_results) == 1 and op_results[0])
766 for (job_success, op_results) in results)))))
767
768 if bad_jobs == 0 and bad_results == 0:
769 rcode = constants.EXIT_SUCCESS
770 else:
771 rcode = constants.EXIT_FAILURE
772 if bad_jobs > 0:
773 ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
774
775 return rcode
776
777
778 def VerifyDisks(opts, args):
779 """Verify integrity of cluster disks.
780
781 @param opts: the command line options selected by the user
782 @type args: list
783 @param args: should be an empty list
784 @rtype: int
785 @return: the desired exit code
786
787 """
788 cl = GetClient()
789
790 op = opcodes.OpClusterVerifyDisks()
791
792 result = SubmitOpCode(op, cl=cl, opts=opts)
793
794 # Keep track of submitted jobs
795 jex = JobExecutor(cl=cl, opts=opts)
796
797 for (status, job_id) in result[constants.JOB_IDS_KEY]:
798 jex.AddJobId(None, status, job_id)
799
800 retcode = constants.EXIT_SUCCESS
801
802 for (status, result) in jex.GetResults():
803 if not status:
804 ToStdout("Job failed: %s", result)
805 continue
806
807 ((bad_nodes, instances, missing), ) = result
808
809 for node, text in bad_nodes.items():
810 ToStdout("Error gathering data on node %s: %s",
811 node, utils.SafeEncode(text[-400:]))
812 retcode = constants.EXIT_FAILURE
813 ToStdout("You need to fix these nodes first before fixing instances")
814
815 for iname in instances:
816 if iname in missing:
817 continue
818 op = opcodes.OpInstanceActivateDisks(instance_name=iname)
819 try:
820 ToStdout("Activating disks for instance '%s'", iname)
821 SubmitOpCode(op, opts=opts, cl=cl)
822 except errors.GenericError, err:
823 nret, msg = FormatError(err)
824 retcode |= nret
825 ToStderr("Error activating disks for instance %s: %s", iname, msg)
826
827 if missing:
828 for iname, ival in missing.iteritems():
829 all_missing = compat.all(x[0] in bad_nodes for x in ival)
830 if all_missing:
831 ToStdout("Instance %s cannot be verified as it lives on"
832 " broken nodes", iname)
833 else:
834 ToStdout("Instance %s has missing logical volumes:", iname)
835 ival.sort()
836 for node, vol in ival:
837 if node in bad_nodes:
838 ToStdout("\tbroken node %s /dev/%s", node, vol)
839 else:
840 ToStdout("\t%s /dev/%s", node, vol)
841
842 ToStdout("You need to replace or recreate disks for all the above"
843 " instances if this message persists after fixing broken nodes.")
844 retcode = constants.EXIT_FAILURE
845 elif not instances:
846 ToStdout("No disks need to be activated.")
847
848 return retcode
849
850
851 def RepairDiskSizes(opts, args):
852 """Verify sizes of cluster disks.
853
854 @param opts: the command line options selected by the user
855 @type args: list
856 @param args: optional list of instances to restrict check to
857 @rtype: int
858 @return: the desired exit code
859
860 """
861 op = opcodes.OpClusterRepairDiskSizes(instances=args)
862 SubmitOpCode(op, opts=opts)
863
864
865 @UsesRPC
866 def MasterFailover(opts, args):
867 """Failover the master node.
868
869 This command, when run on a non-master node, will cause the current
870 master to cease being master, and the non-master to become new
871 master.
872
873 @param opts: the command line options selected by the user
874 @type args: list
875 @param args: should be an empty list
876 @rtype: int
877 @return: the desired exit code
878
879 """
880 if opts.no_voting and not opts.yes_do_it:
881 usertext = ("This will perform the failover even if most other nodes"
882 " are down, or if this node is outdated. This is dangerous"
883 " as it can lead to a non-consistent cluster. Check the"
884 " gnt-cluster(8) man page before proceeding. Continue?")
885 if not AskUser(usertext):
886 return 1
887
888 rvlaue, msgs = bootstrap.MasterFailover(no_voting=opts.no_voting)
889 for msg in msgs:
890 ToStderr(msg)
891 return rvlaue
892
893
894 def MasterPing(opts, args):
895 """Checks if the master is alive.
896
897 @param opts: the command line options selected by the user
898 @type args: list
899 @param args: should be an empty list
900 @rtype: int
901 @return: the desired exit code
902
903 """
904 try:
905 cl = GetClient()
906 cl.QueryClusterInfo()
907 return 0
908 except Exception: # pylint: disable=W0703
909 return 1
910
911
912 def SearchTags(opts, args):
913 """Searches the tags on all the cluster.
914
915 @param opts: the command line options selected by the user
916 @type args: list
917 @param args: should contain only one element, the tag pattern
918 @rtype: int
919 @return: the desired exit code
920
921 """
922 op = opcodes.OpTagsSearch(pattern=args[0])
923 result = SubmitOpCode(op, opts=opts)
924 if not result:
925 return 1
926 result = list(result)
927 result.sort()
928 for path, tag in result:
929 ToStdout("%s %s", path, tag)
930
931
932 def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
933 """Reads and verifies an X509 certificate.
934
935 @type cert_filename: string
936 @param cert_filename: the path of the file containing the certificate to
937 verify encoded in PEM format
938 @type verify_private_key: bool
939 @param verify_private_key: whether to verify the private key in addition to
940 the public certificate
941 @rtype: string
942 @return: a string containing the PEM-encoded certificate.
943
944 """
945 try:
946 pem = utils.ReadFile(cert_filename)
947 except IOError, err:
948 raise errors.X509CertError(cert_filename,
949 "Unable to read certificate: %s" % str(err))
950
951 try:
952 OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
953 except Exception, err:
954 raise errors.X509CertError(cert_filename,
955 "Unable to load certificate: %s" % str(err))
956
957 if verify_private_key:
958 try:
959 OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
960 except Exception, err:
961 raise errors.X509CertError(cert_filename,
962 "Unable to load private key: %s" % str(err))
963
964 return pem
965
966
967 def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
968 rapi_cert_filename, new_spice_cert, spice_cert_filename,
969 spice_cacert_filename, new_confd_hmac_key, new_cds,
970 cds_filename, force, new_node_cert, new_ssh_keys,
971 verbose, debug):
972 """Renews cluster certificates, keys and secrets.
973
974 @type new_cluster_cert: bool
975 @param new_cluster_cert: Whether to generate a new cluster certificate
976 @type new_rapi_cert: bool
977 @param new_rapi_cert: Whether to generate a new RAPI certificate
978 @type rapi_cert_filename: string
979 @param rapi_cert_filename: Path to file containing new RAPI certificate
980 @type new_spice_cert: bool
981 @param new_spice_cert: Whether to generate a new SPICE certificate
982 @type spice_cert_filename: string
983 @param spice_cert_filename: Path to file containing new SPICE certificate
984 @type spice_cacert_filename: string
985 @param spice_cacert_filename: Path to file containing the certificate of the
986 CA that signed the SPICE certificate
987 @type new_confd_hmac_key: bool
988 @param new_confd_hmac_key: Whether to generate a new HMAC key
989 @type new_cds: bool
990 @param new_cds: Whether to generate a new cluster domain secret
991 @type cds_filename: string
992 @param cds_filename: Path to file containing new cluster domain secret
993 @type force: bool
994 @param force: Whether to ask user for confirmation
995 @type new_node_cert: bool
996 @param new_node_cert: Whether to generate new node certificates
997 @type new_ssh_keys: bool
998 @param new_ssh_keys: Whether to generate new node SSH keys
999 @type verbose: boolean
1000 @param verbose: show verbose output
1001 @type debug: boolean
1002 @param debug: show debug output
1003
1004 """
1005 ToStdout("Updating certificates now. Running \"gnt-cluster verify\" "
1006 " is recommended after this operation.")
1007
1008 if new_rapi_cert and rapi_cert_filename:
1009 ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
1010 " options can be specified at the same time.")
1011 return 1
1012
1013 if new_cds and cds_filename:
1014 ToStderr("Only one of the --new-cluster-domain-secret and"
1015 " --cluster-domain-secret options can be specified at"
1016 " the same time.")
1017 return 1
1018
1019 if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
1020 ToStderr("When using --new-spice-certificate, the --spice-certificate"
1021 " and --spice-ca-certificate must not be used.")
1022 return 1
1023
1024 if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
1025 ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
1026 " specified.")
1027 return 1
1028
1029 rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
1030 try:
1031 if rapi_cert_filename:
1032 rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
1033 if spice_cert_filename:
1034 spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
1035 spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
1036 except errors.X509CertError, err:
1037 ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
1038 return 1
1039
1040 if cds_filename:
1041 try:
1042 cds = utils.ReadFile(cds_filename)
1043 except Exception, err: # pylint: disable=W0703
1044 ToStderr("Can't load new cluster domain secret from %s: %s" %
1045 (cds_filename, str(err)))
1046 return 1
1047 else:
1048 cds = None
1049
1050 if not force:
1051 usertext = ("This requires all daemons on all nodes to be restarted and"
1052 " may take some time. Continue?")
1053 if not AskUser(usertext):
1054 return 1
1055
1056 def _RenewCryptoInner(ctx):
1057 ctx.feedback_fn("Updating certificates and keys")
1058
1059 bootstrap.GenerateClusterCrypto(False,
1060 new_rapi_cert,
1061 new_spice_cert,
1062 new_confd_hmac_key,
1063 new_cds,
1064 False,
1065 None,
1066 rapi_cert_pem=rapi_cert_pem,
1067 spice_cert_pem=spice_cert_pem,
1068 spice_cacert_pem=spice_cacert_pem,
1069 cds=cds)
1070
1071 files_to_copy = []
1072
1073 if new_rapi_cert or rapi_cert_pem:
1074 files_to_copy.append(pathutils.RAPI_CERT_FILE)
1075
1076 if new_spice_cert or spice_cert_pem:
1077 files_to_copy.append(pathutils.SPICE_CERT_FILE)
1078 files_to_copy.append(pathutils.SPICE_CACERT_FILE)
1079
1080 if new_confd_hmac_key:
1081 files_to_copy.append(pathutils.CONFD_HMAC_KEY)
1082
1083 if new_cds or cds:
1084 files_to_copy.append(pathutils.CLUSTER_DOMAIN_SECRET_FILE)
1085
1086 if files_to_copy:
1087 for node_name in ctx.nonmaster_nodes:
1088 port = ctx.ssh_ports[node_name]
1089 ctx.feedback_fn("Copying %s to %s:%d" %
1090 (", ".join(files_to_copy), node_name, port))
1091 for file_name in files_to_copy:
1092 ctx.ssh.CopyFileToNode(node_name, port, file_name)
1093
1094 def _RenewClientCerts(ctx):
1095 ctx.feedback_fn("Updating client SSL certificates.")
1096
1097 cluster_name = ssconf.SimpleStore().GetClusterName()
1098
1099 for node_name in ctx.nonmaster_nodes + [ctx.master_node]:
1100 ssh_port = ctx.ssh_ports[node_name]
1101 data = {
1102 constants.NDS_CLUSTER_NAME: cluster_name,
1103 constants.NDS_NODE_DAEMON_CERTIFICATE:
1104 utils.ReadFile(pathutils.NODED_CERT_FILE),
1105 constants.NDS_NODE_NAME: node_name,
1106 constants.NDS_ACTION: constants.CRYPTO_ACTION_CREATE,
1107 }
1108
1109 ssh.RunSshCmdWithStdin(
1110 cluster_name,
1111 node_name,
1112 pathutils.SSL_UPDATE,
1113 ssh_port,
1114 data,
1115 debug=ctx.debug,
1116 verbose=ctx.verbose,
1117 use_cluster_key=True,
1118 ask_key=False,
1119 strict_host_check=True)
1120
1121 # Create a temporary ssconf file using the master's client cert digest
1122 # and the 'bootstrap' keyword to enable distribution of all nodes' digests.
1123 master_digest = utils.GetCertificateDigest()
1124 ssconf_master_candidate_certs_filename = os.path.join(
1125 pathutils.DATA_DIR, "%s%s" %
1126 (constants.SSCONF_FILEPREFIX, constants.SS_MASTER_CANDIDATES_CERTS))
1127 utils.WriteFile(
1128 ssconf_master_candidate_certs_filename,
1129 data="%s=%s" % (constants.CRYPTO_BOOTSTRAP, master_digest))
1130 for node_name in ctx.nonmaster_nodes:
1131 port = ctx.ssh_ports[node_name]
1132 ctx.feedback_fn("Copying %s to %s:%d" %
1133 (ssconf_master_candidate_certs_filename, node_name, port))
1134 ctx.ssh.CopyFileToNode(node_name, port,
1135 ssconf_master_candidate_certs_filename)
1136
1137 # Write the boostrap entry to the config using wconfd.
1138 config_live_lock = utils.livelock.LiveLock("renew_crypto")
1139 cfg = config.GetConfig(None, config_live_lock)
1140 cfg.AddNodeToCandidateCerts(constants.CRYPTO_BOOTSTRAP, master_digest)
1141 cfg.Update(cfg.GetClusterInfo(), ctx.feedback_fn)
1142
1143 def _RenewServerAndClientCerts(ctx):
1144 ctx.feedback_fn("Updating the cluster SSL certificate.")
1145
1146 master_name = ssconf.SimpleStore().GetMasterNode()
1147 bootstrap.GenerateClusterCrypto(True, # cluster cert
1148 False, # rapi cert
1149 False, # spice cert
1150 False, # confd hmac key
1151 False, # cds
1152 True, # client cert
1153 master_name)
1154
1155 for node_name in ctx.nonmaster_nodes:
1156 port = ctx.ssh_ports[node_name]
1157 server_cert = pathutils.NODED_CERT_FILE
1158 ctx.feedback_fn("Copying %s to %s:%d" %
1159 (server_cert, node_name, port))
1160 ctx.ssh.CopyFileToNode(node_name, port, server_cert)
1161
1162 _RenewClientCerts(ctx)
1163
1164 if new_rapi_cert or new_spice_cert or new_confd_hmac_key or new_cds:
1165 RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
1166
1167 # If only node certficates are recreated, call _RenewClientCerts only.
1168 if new_node_cert and not new_cluster_cert:
1169 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD],
1170 _RenewClientCerts, verbose=verbose, debug=debug)
1171
1172 # If the cluster certificate are renewed, the client certificates need
1173 # to be renewed too.
1174 if new_cluster_cert:
1175 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD],
1176 _RenewServerAndClientCerts, verbose=verbose,
1177 debug=debug)
1178
1179 if new_node_cert or new_cluster_cert or new_ssh_keys:
1180 cl = GetClient()
1181 renew_op = opcodes.OpClusterRenewCrypto(
1182 node_certificates=new_node_cert or new_cluster_cert,
1183 ssh_keys=new_ssh_keys)
1184 SubmitOpCode(renew_op, cl=cl)
1185
1186 ToStdout("All requested certificates and keys have been replaced."
1187 " Running \"gnt-cluster verify\" now is recommended.")
1188
1189 return 0
1190
1191
1192 def _BuildGanetiPubKeys(options, pub_key_file=pathutils.SSH_PUB_KEYS, cl=None,
1193 get_online_nodes_fn=GetOnlineNodes,
1194 get_nodes_ssh_ports_fn=GetNodesSshPorts,
1195 get_node_uuids_fn=GetNodeUUIDs,
1196 homedir_fn=None):
1197 """Recreates the 'ganeti_pub_key' file by polling all nodes.
1198
1199 """
1200 if os.path.exists(pub_key_file):
1201 utils.CreateBackup(pub_key_file)
1202 utils.RemoveFile(pub_key_file)
1203
1204 ssh.ClearPubKeyFile(pub_key_file)
1205
1206 if not cl:
1207 cl = GetClient()
1208
1209 (cluster_name, master_node) = \
1210 cl.QueryConfigValues(["cluster_name", "master_node"])
1211
1212 online_nodes = get_online_nodes_fn([], cl=cl)
1213 ssh_ports = get_nodes_ssh_ports_fn(online_nodes + [master_node], cl)
1214 ssh_port_map = dict(zip(online_nodes + [master_node], ssh_ports))
1215
1216 node_uuids = get_node_uuids_fn(online_nodes + [master_node], cl)
1217 node_uuid_map = dict(zip(online_nodes + [master_node], node_uuids))
1218
1219 nonmaster_nodes = [name for name in online_nodes
1220 if name != master_node]
1221
1222 _, pub_key_filename, _ = \
1223 ssh.GetUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False,
1224 kind=constants.SSHK_DSA, _homedir_fn=homedir_fn)
1225
1226 # get the key file of the master node
1227 pub_key = utils.ReadFile(pub_key_filename)
1228 ssh.AddPublicKey(node_uuid_map[master_node], pub_key,
1229 key_file=pub_key_file)
1230
1231 # get the key files of all non-master nodes
1232 for node in nonmaster_nodes:
1233 pub_key = ssh.ReadRemoteSshPubKeys(pub_key_filename, node, cluster_name,
1234 ssh_port_map[node],
1235 options.ssh_key_check,
1236 options.ssh_key_check)
1237 ssh.AddPublicKey(node_uuid_map[node], pub_key, key_file=pub_key_file)
1238
1239
1240 def RenewCrypto(opts, args):
1241 """Renews cluster certificates, keys and secrets.
1242
1243 """
1244 if opts.new_ssh_keys:
1245 _BuildGanetiPubKeys(opts)
1246 return _RenewCrypto(opts.new_cluster_cert,
1247 opts.new_rapi_cert,
1248 opts.rapi_cert,
1249 opts.new_spice_cert,
1250 opts.spice_cert,
1251 opts.spice_cacert,
1252 opts.new_confd_hmac_key,
1253 opts.new_cluster_domain_secret,
1254 opts.cluster_domain_secret,
1255 opts.force,
1256 opts.new_node_cert,
1257 opts.new_ssh_keys,
1258 opts.verbose,
1259 opts.debug > 0)
1260
1261
1262 def _GetEnabledDiskTemplates(opts):
1263 """Determine the list of enabled disk templates.
1264
1265 """
1266 if opts.enabled_disk_templates:
1267 return opts.enabled_disk_templates.split(",")
1268 else:
1269 return None
1270
1271
1272 def _GetVgName(opts, enabled_disk_templates):
1273 """Determine the volume group name.
1274
1275 @type enabled_disk_templates: list of strings
1276 @param enabled_disk_templates: cluster-wide enabled disk-templates
1277
1278 """
1279 # consistency between vg name and enabled disk templates
1280 vg_name = None
1281 if opts.vg_name is not None:
1282 vg_name = opts.vg_name
1283 if enabled_disk_templates:
1284 if vg_name and not utils.IsLvmEnabled(enabled_disk_templates):
1285 ToStdout("You specified a volume group with --vg-name, but you did not"
1286 " enable any of the following lvm-based disk templates: %s" %
1287 utils.CommaJoin(constants.DTS_LVM))
1288 return vg_name
1289
1290
1291 def _GetDrbdHelper(opts, enabled_disk_templates):
1292 """Determine the DRBD usermode helper.
1293
1294 """
1295 drbd_helper = opts.drbd_helper
1296 if enabled_disk_templates:
1297 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
1298 if not drbd_enabled and opts.drbd_helper:
1299 ToStdout("You specified a DRBD usermode helper with "
1300 " --drbd-usermode-helper while DRBD is not enabled.")
1301 return drbd_helper
1302
1303
1304 def _GetCompressionTools(opts):
1305 """Determine the list of custom compression tools.
1306
1307 """
1308 if opts.compression_tools:
1309 return opts.compression_tools.split(",")
1310 elif opts.compression_tools is None:
1311 return None # To note the parameter was not provided
1312 else:
1313 return constants.IEC_DEFAULT_TOOLS # Resetting to default
1314
1315
1316 def SetClusterParams(opts, args):
1317 """Modify the cluster.
1318
1319 @param opts: the command line options selected by the user
1320 @type args: list
1321 @param args: should be an empty list
1322 @rtype: int
1323 @return: the desired exit code
1324
1325 """
1326 if not (opts.vg_name is not None or
1327 opts.drbd_helper is not None or
1328 opts.enabled_hypervisors or opts.hvparams or
1329 opts.beparams or opts.nicparams or
1330 opts.ndparams or opts.diskparams or
1331 opts.candidate_pool_size is not None or
1332 opts.max_running_jobs is not None or
1333 opts.max_tracked_jobs is not None or
1334 opts.uid_pool is not None or
1335 opts.maintain_node_health is not None or
1336 opts.add_uids is not None or
1337 opts.remove_uids is not None or
1338 opts.default_iallocator is not None or
1339 opts.default_iallocator_params is not None or
1340 opts.reserved_lvs is not None or
1341 opts.mac_prefix is not None or
1342 opts.master_netdev is not None or
1343 opts.master_netmask is not None or
1344 opts.use_external_mip_script is not None or
1345 opts.prealloc_wipe_disks is not None or
1346 opts.hv_state or
1347 opts.enabled_disk_templates or
1348 opts.disk_state or
1349 opts.ipolicy_bounds_specs is not None or
1350 opts.ipolicy_std_specs is not None or
1351 opts.ipolicy_disk_templates is not None or
1352 opts.ipolicy_vcpu_ratio is not None or
1353 opts.ipolicy_spindle_ratio is not None or
1354 opts.modify_etc_hosts is not None or
1355 opts.file_storage_dir is not None or
1356 opts.install_image is not None or
1357 opts.instance_communication_network is not None or
1358 opts.zeroing_image is not None or
1359 opts.shared_file_storage_dir is not None or
1360 opts.compression_tools is not None or
1361 opts.shared_file_storage_dir is not None or
1362 opts.enabled_user_shutdown is not None or
1363 opts.data_collector_interval or
1364 opts.enabled_data_collectors):
1365 ToStderr("Please give at least one of the parameters.")
1366 return 1
1367
1368 enabled_disk_templates = _GetEnabledDiskTemplates(opts)
1369 vg_name = _GetVgName(opts, enabled_disk_templates)
1370
1371 try:
1372 drbd_helper = _GetDrbdHelper(opts, enabled_disk_templates)
1373 except errors.OpPrereqError, e:
1374 ToStderr(str(e))
1375 return 1
1376
1377 hvlist = opts.enabled_hypervisors
1378 if hvlist is not None:
1379 hvlist = hvlist.split(",")
1380
1381 # a list of (name, dict) we can pass directly to dict() (or [])
1382 hvparams = dict(opts.hvparams)
1383 for hv_params in hvparams.values():
1384 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1385
1386 diskparams = dict(opts.diskparams)
1387
1388 for dt_params in diskparams.values():
1389 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
1390
1391 beparams = opts.beparams
1392 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
1393
1394 nicparams = opts.nicparams
1395 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
1396
1397 ndparams = opts.ndparams
1398 if ndparams is not None:
1399 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
1400
1401 ipolicy = CreateIPolicyFromOpts(
1402 minmax_ispecs=opts.ipolicy_bounds_specs,
1403 std_ispecs=opts.ipolicy_std_specs,
1404 ipolicy_disk_templates=opts.ipolicy_disk_templates,
1405 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
1406 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
1407 )
1408
1409 mnh = opts.maintain_node_health
1410
1411 uid_pool = opts.uid_pool
1412 if uid_pool is not None:
1413 uid_pool = uidpool.ParseUidPool(uid_pool)
1414
1415 add_uids = opts.add_uids
1416 if add_uids is not None:
1417 add_uids = uidpool.ParseUidPool(add_uids)
1418
1419 remove_uids = opts.remove_uids
1420 if remove_uids is not None:
1421 remove_uids = uidpool.ParseUidPool(remove_uids)
1422
1423 if opts.reserved_lvs is not None:
1424 if opts.reserved_lvs == "":
1425 opts.reserved_lvs = []
1426 else:
1427 opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")
1428
1429 if opts.master_netmask is not None:
1430 try:
1431 opts.master_netmask = int(opts.master_netmask)
1432 except ValueError:
1433 ToStderr("The --master-netmask option expects an int parameter.")
1434 return 1
1435
1436 ext_ip_script = opts.use_external_mip_script
1437
1438 if opts.disk_state:
1439 disk_state = utils.FlatToDict(opts.disk_state)
1440 else:
1441 disk_state = {}
1442
1443 hv_state = dict(opts.hv_state)
1444
1445 compression_tools = _GetCompressionTools(opts)
1446
1447 enabled_data_collectors = dict(
1448 (k, v.lower().startswith("t"))
1449 for k, v in opts.enabled_data_collectors.items())
1450
1451 unrecognized_data_collectors = [
1452 k for k in enabled_data_collectors.keys()
1453 if k not in constants.DATA_COLLECTOR_NAMES]
1454 if unrecognized_data_collectors:
1455 ToStderr("Data collector names not recognized: %s" %
1456 ", ".join(unrecognized_data_collectors))
1457
1458 try:
1459 data_collector_interval = dict(
1460 (k, long(1e6 * float(v)))
1461 for (k, v) in opts.data_collector_interval.items())
1462 except ValueError:
1463 ToStderr("Can't transform all values to integers: {}".format(
1464 opts.data_collector_interval))
1465 return 1
1466 if any(v <= 0 for v in data_collector_interval):
1467 ToStderr("Some interval times where not above zero.")
1468 return 1
1469
1470 op = opcodes.OpClusterSetParams(
1471 vg_name=vg_name,
1472 drbd_helper=drbd_helper,
1473 enabled_hypervisors=hvlist,
1474 hvparams=hvparams,
1475 os_hvp=None,
1476 beparams=beparams,
1477 nicparams=nicparams,
1478 ndparams=ndparams,
1479 diskparams=diskparams,
1480 ipolicy=ipolicy,
1481 candidate_pool_size=opts.candidate_pool_size,
1482 max_running_jobs=opts.max_running_jobs,
1483 max_tracked_jobs=opts.max_tracked_jobs,
1484 maintain_node_health=mnh,
1485 modify_etc_hosts=opts.modify_etc_hosts,
1486 uid_pool=uid_pool,
1487 add_uids=add_uids,
1488 remove_uids=remove_uids,
1489 default_iallocator=opts.default_iallocator,
1490 default_iallocator_params=opts.default_iallocator_params,
1491 prealloc_wipe_disks=opts.prealloc_wipe_disks,
1492 mac_prefix=opts.mac_prefix,
1493 master_netdev=opts.master_netdev,
1494 master_netmask=opts.master_netmask,
1495 reserved_lvs=opts.reserved_lvs,
1496 use_external_mip_script=ext_ip_script,
1497 hv_state=hv_state,
1498 disk_state=disk_state,
1499 enabled_disk_templates=enabled_disk_templates,
1500 force=opts.force,
1501 file_storage_dir=opts.file_storage_dir,
1502 install_image=opts.install_image,
1503 instance_communication_network=opts.instance_communication_network,
1504 zeroing_image=opts.zeroing_image,
1505 shared_file_storage_dir=opts.shared_file_storage_dir,
1506 compression_tools=compression_tools,
1507 enabled_user_shutdown=opts.enabled_user_shutdown,
1508 enabled_data_collectors=enabled_data_collectors,
1509 data_collector_interval=data_collector_interval,
1510 )
1511 return base.GetResult(None, opts, SubmitOrSend(op, opts))
1512
1513
1514 def QueueOps(opts, args):
1515 """Queue operations.
1516
1517 @param opts: the command line options selected by the user
1518 @type args: list
1519 @param args: should contain only one element, the subcommand
1520 @rtype: int
1521 @return: the desired exit code
1522
1523 """
1524 command = args[0]
1525 client = GetClient()
1526 if command in ("drain", "undrain"):
1527 drain_flag = command == "drain"
1528 client.SetQueueDrainFlag(drain_flag)
1529 elif command == "info":
1530 result = client.QueryConfigValues(["drain_flag"])
1531 if result[0]:
1532 val = "set"
1533 else:
1534 val = "unset"
1535 ToStdout("The drain flag is %s" % val)
1536 else:
1537 raise errors.OpPrereqError("Command '%s' is not valid." % command,
1538 errors.ECODE_INVAL)
1539
1540 return 0
1541
1542
1543 def _ShowWatcherPause(until):
1544 if until is None or until < time.time():
1545 ToStdout("The watcher is not paused.")
1546 else:
1547 ToStdout("The watcher is paused until %s.", time.ctime(until))
1548
1549
1550 def WatcherOps(opts, args):
1551 """Watcher operations.
1552
1553 @param opts: the command line options selected by the user
1554 @type args: list
1555 @param args: should contain only one element, the subcommand
1556 @rtype: int
1557 @return: the desired exit code
1558
1559 """
1560 command = args[0]
1561 client = GetClient()
1562
1563 if command == "continue":
1564 client.SetWatcherPause(None)
1565 ToStdout("The watcher is no longer paused.")
1566
1567 elif command == "pause":
1568 if len(args) < 2:
1569 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1570
1571 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
1572 _ShowWatcherPause(result)
1573
1574 elif command == "info":
1575 result = client.QueryConfigValues(["watcher_pause"])
1576 _ShowWatcherPause(result[0])
1577
1578 else:
1579 raise errors.OpPrereqError("Command '%s' is not valid." % command,
1580 errors.ECODE_INVAL)
1581
1582 return 0
1583
1584
1585 def _OobPower(opts, node_list, power):
1586 """Puts the node in the list to desired power state.
1587
1588 @param opts: The command line options selected by the user
1589 @param node_list: The list of nodes to operate on
1590 @param power: True if they should be powered on, False otherwise
1591 @return: The success of the operation (none failed)
1592
1593 """
1594 if power:
1595 command = constants.OOB_POWER_ON
1596 else:
1597 command = constants.OOB_POWER_OFF
1598
1599 op = opcodes.OpOobCommand(node_names=node_list,
1600 command=command,
1601 ignore_status=True,
1602 timeout=opts.oob_timeout,
1603 power_delay=opts.power_delay)
1604 result = SubmitOpCode(op, opts=opts)
1605 errs = 0
1606 for node_result in result:
1607 (node_tuple, data_tuple) = node_result
1608 (_, node_name) = node_tuple
1609 (data_status, _) = data_tuple
1610 if data_status != constants.RS_NORMAL:
1611 assert data_status != constants.RS_UNAVAIL
1612 errs += 1
1613 ToStderr("There was a problem changing power for %s, please investigate",
1614 node_name)
1615
1616 if errs > 0:
1617 return False
1618
1619 return True
1620
1621
1622 def _InstanceStart(opts, inst_list, start, no_remember=False):
1623 """Puts the instances in the list to desired state.
1624
1625 @param opts: The command line options selected by the user
1626 @param inst_list: The list of instances to operate on
1627 @param start: True if they should be started, False for shutdown
1628 @param no_remember: If the instance state should be remembered
1629 @return: The success of the operation (none failed)
1630
1631 """
1632 if start:
1633 opcls = opcodes.OpInstanceStartup
1634 text_submit, text_success, text_failed = ("startup", "started", "starting")
1635 else:
1636 opcls = compat.partial(opcodes.OpInstanceShutdown,
1637 timeout=opts.shutdown_timeout,
1638 no_remember=no_remember)
1639 text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping")
1640
1641 jex = JobExecutor(opts=opts)
1642
1643 for inst in inst_list:
1644 ToStdout("Submit %s of instance %s", text_submit, inst)
1645 op = opcls(instance_name=inst)
1646 jex.QueueJob(inst, op)
1647
1648 results = jex.GetResults()
1649 bad_cnt = len([1 for (success, _) in results if not success])
1650
1651 if bad_cnt == 0:
1652 ToStdout("All instances have been %s successfully", text_success)
1653 else:
1654 ToStderr("There were errors while %s instances:\n"
1655 "%d error(s) out of %d instance(s)", text_failed, bad_cnt,
1656 len(results))
1657 return False
1658
1659 return True
1660
1661
1662 class _RunWhenNodesReachableHelper(object):
1663 """Helper class to make shared internal state sharing easier.
1664
1665 @ivar success: Indicates if all action_cb calls were successful
1666
1667 """
1668 def __init__(self, node_list, action_cb, node2ip, port, feedback_fn,
1669 _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1670 """Init the object.
1671
1672 @param node_list: The list of nodes to be reachable
1673 @param action_cb: Callback called when a new host is reachable
1674 @type node2ip: dict
1675 @param node2ip: Node to ip mapping
1676 @param port: The port to use for the TCP ping
1677 @param feedback_fn: The function used for feedback
1678 @param _ping_fn: Function to check reachabilty (for unittest use only)
1679 @param _sleep_fn: Function to sleep (for unittest use only)
1680
1681 """
1682 self.down = set(node_list)
1683 self.up = set()
1684 self.node2ip = node2ip
1685 self.success = True
1686 self.action_cb = action_cb
1687 self.port = port
1688 self.feedback_fn = feedback_fn
1689 self._ping_fn = _ping_fn
1690 self._sleep_fn = _sleep_fn
1691
1692 def __call__(self):
1693 """When called we run action_cb.
1694
1695 @raises utils.RetryAgain: When there are still down nodes
1696
1697 """
1698 if not self.action_cb(self.up):
1699 self.success = False
1700
1701 if self.down:
1702 raise utils.RetryAgain()
1703 else:
1704 return self.success
1705
1706 def Wait(self, secs):
1707 """Checks if a host is up or waits remaining seconds.
1708
1709 @param secs: The secs remaining
1710
1711 """
1712 start = time.time()
1713 for node in self.down:
1714 if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT,
1715 live_port_needed=True):
1716 self.feedback_fn("Node %s became available" % node)
1717 self.up.add(node)
1718 self.down -= self.up
1719 # If we have a node available there is the possibility to run the
1720 # action callback successfully, therefore we don't wait and return
1721 return
1722
1723 self._sleep_fn(max(0.0, start + secs - time.time()))
1724
1725
1726 def _RunWhenNodesReachable(node_list, action_cb, interval):
1727 """Run action_cb when nodes become reachable.
1728
1729 @param node_list: The list of nodes to be reachable
1730 @param action_cb: Callback called when a new host is reachable
1731 @param interval: The earliest time to retry
1732
1733 """
1734 client = GetClient()
1735 cluster_info = client.QueryClusterInfo()
1736 if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
1737 family = netutils.IPAddress.family
1738 else:
1739 family = netutils.IP6Address.family
1740
1741 node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
1742 for node in node_list)
1743
1744 port = netutils.GetDaemonPort(constants.NODED)
1745 helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port,
1746 ToStdout)
1747
1748 try:
1749 return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
1750 wait_fn=helper.Wait)
1751 except utils.RetryTimeout:
1752 ToStderr("Time exceeded while waiting for nodes to become reachable"
1753 " again:\n - %s", " - ".join(helper.down))
1754 return False
1755
1756
1757 def _MaybeInstanceStartup(opts, inst_map, nodes_online,
1758 _instance_start_fn=_InstanceStart):
1759 """Start the instances conditional based on node_states.
1760
1761 @param opts: The command line options selected by the user
1762 @param inst_map: A dict of inst -> nodes mapping
1763 @param nodes_online: A list of nodes online
1764 @param _instance_start_fn: Callback to start instances (unittest use only)
1765 @return: Success of the operation on all instances
1766
1767 """
1768 start_inst_list = []
1769 for (inst, nodes) in inst_map.items():
1770 if not (nodes - nodes_online):
1771 # All nodes the instance lives on are back online
1772 start_inst_list.append(inst)
1773
1774 for inst in start_inst_list:
1775 del inst_map[inst]
1776
1777 if start_inst_list:
1778 return _instance_start_fn(opts, start_inst_list, True)
1779
1780 return True
1781
1782
1783 def _EpoOn(opts, full_node_list, node_list, inst_map):
1784 """Does the actual power on.
1785
1786 @param opts: The command line options selected by the user
1787 @param full_node_list: All nodes to operate on (includes nodes not supporting
1788 OOB)
1789 @param node_list: The list of nodes to operate on (all need to support OOB)
1790 @param inst_map: A dict of inst -> nodes mapping
1791 @return: The desired exit status
1792
1793 """
1794 if node_list and not _OobPower(opts, node_list, False):
1795 ToStderr("Not all nodes seem to get back up, investigate and start"
1796 " manually if needed")
1797
1798 # Wait for the nodes to be back up
1799 action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
1800
1801 ToStdout("Waiting until all nodes are available again")
1802 if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL):
1803 ToStderr("Please investigate and start stopped instances manually")
1804 return constants.EXIT_FAILURE
1805
1806 return constants.EXIT_SUCCESS
1807
1808
1809 def _EpoOff(opts, node_list, inst_map):
1810 """Does the actual power off.
1811
1812 @param opts: The command line options selected by the user
1813 @param node_list: The list of nodes to operate on (all need to support OOB)
1814 @param inst_map: A dict of inst -> nodes mapping
1815 @return: The desired exit status
1816
1817 """
1818 if not _InstanceStart(opts, inst_map.keys(), False, no_remember=True):
1819 ToStderr("Please investigate and stop instances manually before continuing")
1820 return constants.EXIT_FAILURE
1821
1822 if not node_list:
1823 return constants.EXIT_SUCCESS
1824
1825 if _OobPower(opts, node_list, False):
1826 return constants.EXIT_SUCCESS
1827 else:
1828 return constants.EXIT_FAILURE
1829
1830
1831 def Epo(opts, args, qcl=None, _on_fn=_EpoOn, _off_fn=_EpoOff,
1832 _confirm_fn=ConfirmOperation,
1833 _stdout_fn=ToStdout, _stderr_fn=ToStderr):
1834 """EPO operations.
1835
1836 @param opts: the command line options selected by the user
1837 @type args: list
1838 @param args: should contain only one element, the subcommand
1839 @rtype: int
1840 @return: the desired exit code
1841
1842 """
1843 if opts.groups and opts.show_all:
1844 _stderr_fn("Only one of --groups or --all are allowed")
1845 return constants.EXIT_FAILURE
1846 elif args and opts.show_all:
1847 _stderr_fn("Arguments in combination with --all are not allowed")
1848 return constants.EXIT_FAILURE
1849
1850 if qcl is None:
1851 # Query client
1852 qcl = GetClient()
1853
1854 if opts.groups:
1855 node_query_list = \
1856 itertools.chain(*qcl.QueryGroups(args, ["node_list"], False))
1857 else:
1858 node_query_list = args
1859
1860 result = qcl.QueryNodes(node_query_list, ["name", "master", "pinst_list",
1861 "sinst_list", "powered", "offline"],
1862 False)
1863
1864 all_nodes = map(compat.fst, result)
1865 node_list = []
1866 inst_map = {}
1867 for (node, master, pinsts, sinsts, powered, offline) in result:
1868 if not offline:
1869 for inst in (pinsts + sinsts):
1870 if inst in inst_map:
1871 if not master:
1872 inst_map[inst].add(node)
1873 elif master:
1874 inst_map[inst] = set()
1875 else:
1876 inst_map[inst] = set([node])
1877
1878 if master and opts.on:
1879 # We ignore the master for turning on the machines, in fact we are
1880 # already operating on the master at this point :)
1881 continue
1882 elif master and not opts.show_all:
1883 _stderr_fn("%s is the master node, please do a master-failover to another"
1884 " node not affected by the EPO or use --all if you intend to"
1885 " shutdown the whole cluster", node)
1886 return constants.EXIT_FAILURE
1887 elif powered is None:
1888 _stdout_fn("Node %s does not support out-of-band handling, it can not be"
1889 " handled in a fully automated manner", node)
1890 elif powered == opts.on:
1891 _stdout_fn("Node %s is already in desired power state, skipping", node)
1892 elif not offline or (offline and powered):
1893 node_list.append(node)
1894
1895 if not (opts.force or _confirm_fn(all_nodes, "nodes", "epo")):
1896 return constants.EXIT_FAILURE
1897
1898 if opts.on:
1899 return _on_fn(opts, all_nodes, node_list, inst_map)
1900 else:
1901 return _off_fn(opts, node_list, inst_map)
1902
1903
1904 def _GetCreateCommand(info):
1905 buf = StringIO()
1906 buf.write("gnt-cluster init")
1907 PrintIPolicyCommand(buf, info["ipolicy"], False)
1908 buf.write(" ")
1909 buf.write(info["name"])
1910 return buf.getvalue()
1911
1912
1913 def ShowCreateCommand(opts, args):
1914 """Shows the command that can be used to re-create the cluster.
1915
1916 Currently it works only for ipolicy specs.
1917
1918 """
1919 cl = GetClient()
1920 result = cl.QueryClusterInfo()
1921 ToStdout(_GetCreateCommand(result))
1922
1923
1924 def _RunCommandAndReport(cmd):
1925 """Run a command and report its output, iff it failed.
1926
1927 @param cmd: the command to execute
1928 @type cmd: list
1929 @rtype: bool
1930 @return: False, if the execution failed.
1931
1932 """
1933 result = utils.RunCmd(cmd)
1934 if result.failed:
1935 ToStderr("Command %s failed: %s; Output %s" %
1936 (cmd, result.fail_reason, result.output))
1937 return False
1938 return True
1939
1940
1941 def _VerifyCommand(cmd):
1942 """Verify that a given command succeeds on all online nodes.
1943
1944 As this function is intended to run during upgrades, it
1945 is implemented in such a way that it still works, if all Ganeti
1946 daemons are down.
1947
1948 @param cmd: the command to execute
1949 @type cmd: list
1950 @rtype: list
1951 @return: the list of node names that are online where
1952 the command failed.
1953
1954 """
1955 command = utils.text.ShellQuoteArgs([str(val) for val in cmd])
1956
1957 nodes = ssconf.SimpleStore().GetOnlineNodeList()
1958 master_node = ssconf.SimpleStore().GetMasterNode()
1959 cluster_name = ssconf.SimpleStore().GetClusterName()
1960
1961 # If master node is in 'nodes', make sure master node is at list end
1962 if master_node in nodes:
1963 nodes.remove(master_node)
1964 nodes.append(master_node)
1965
1966 failed = []
1967
1968 srun = ssh.SshRunner(cluster_name=cluster_name)
1969 for name in nodes:
1970 result = srun.Run(name, constants.SSH_LOGIN_USER, command)
1971 if result.exit_code != 0:
1972 failed.append(name)
1973
1974 return failed
1975
1976
1977 def _VerifyVersionInstalled(versionstring):
1978 """Verify that the given version of ganeti is installed on all online nodes.
1979
1980 Do nothing, if this is the case, otherwise print an appropriate
1981 message to stderr.
1982
1983 @param versionstring: the version to check for
1984 @type versionstring: string
1985 @rtype: bool
1986 @return: True, if the version is installed on all online nodes
1987
1988 """
1989 badnodes = _VerifyCommand(["test", "-d",
1990 os.path.join(pathutils.PKGLIBDIR, versionstring)])
1991 if badnodes:
1992 ToStderr("Ganeti version %s not installed on nodes %s"
1993 % (versionstring, ", ".join(badnodes)))
1994 return False
1995
1996 return True
1997
1998
1999 def _GetRunning():
2000 """Determine the list of running jobs.
2001
2002 @rtype: list
2003 @return: the number of jobs still running
2004
2005 """
2006 cl = GetClient()
2007 qfilter = qlang.MakeSimpleFilter("status",
2008 frozenset([constants.JOB_STATUS_RUNNING]))
2009 return len(cl.Query(constants.QR_JOB, [], qfilter).data)
2010
2011
2012 def _SetGanetiVersion(versionstring):
2013 """Set the active version of ganeti to the given versionstring
2014
2015 @type versionstring: string
2016 @rtype: list
2017 @return: the list of nodes where the version change failed
2018
2019 """
2020 failed = []
2021 if constants.HAS_GNU_LN:
2022 failed.extend(_VerifyCommand(
2023 ["ln", "-s", "-f", "-T",
2024 os.path.join(pathutils.PKGLIBDIR, versionstring),
2025 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2026 failed.extend(_VerifyCommand(
2027 ["ln", "-s", "-f", "-T",
2028 os.path.join(pathutils.SHAREDIR, versionstring),
2029 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2030 else:
2031 failed.extend(_VerifyCommand(
2032 ["rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2033 failed.extend(_VerifyCommand(
2034 ["ln", "-s", "-f", os.path.join(pathutils.PKGLIBDIR, versionstring),
2035 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]))
2036 failed.extend(_VerifyCommand(
2037 ["rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2038 failed.extend(_VerifyCommand(
2039 ["ln", "-s", "-f", os.path.join(pathutils.SHAREDIR, versionstring),
2040 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]))
2041 return list(set(failed))
2042
2043
2044 def _ExecuteCommands(fns):
2045 """Execute a list of functions, in reverse order.
2046
2047 @type fns: list of functions.
2048 @param fns: the functions to be executed.
2049
2050 """
2051 for fn in reversed(fns):
2052 fn()
2053
2054
2055 def _GetConfigVersion():
2056 """Determine the version the configuration file currently has.
2057
2058 @rtype: tuple or None
2059 @return: (major, minor, revision) if the version can be determined,
2060 None otherwise
2061
2062 """
2063 config_data = serializer.LoadJson(utils.ReadFile(pathutils.CLUSTER_CONF_FILE))
2064 try:
2065 config_version = config_data["version"]
2066 except KeyError:
2067 return None
2068 return utils.SplitVersion(config_version)
2069
2070
2071 def _ReadIntentToUpgrade():
2072 """Read the file documenting the intent to upgrade the cluster.
2073
2074 @rtype: (string, string) or (None, None)
2075 @return: (old version, version to upgrade to), if the file exists,
2076 and (None, None) otherwise.
2077
2078 """
2079 if not os.path.isfile(pathutils.INTENT_TO_UPGRADE):
2080 return (None, None)
2081
2082 contentstring = utils.ReadFile(pathutils.INTENT_TO_UPGRADE)
2083 contents = utils.UnescapeAndSplit(contentstring)
2084 if len(contents) != 3:
2085 # file syntactically mal-formed
2086 return (None, None)
2087 return (contents[0], contents[1])
2088
2089
2090 def _WriteIntentToUpgrade(version):
2091 """Write file documenting the intent to upgrade the cluster.
2092
2093 @type version: string
2094 @param version: the version we intent to upgrade to
2095
2096 """
2097 utils.WriteFile(pathutils.INTENT_TO_UPGRADE,
2098 data=utils.EscapeAndJoin([constants.RELEASE_VERSION, version,
2099 "%d" % os.getpid()]))
2100
2101
2102 def _UpgradeBeforeConfigurationChange(versionstring):
2103 """
2104 Carry out all the tasks necessary for an upgrade that happen before
2105 the configuration file, or Ganeti version, changes.
2106
2107 @type versionstring: string
2108 @param versionstring: the version to upgrade to
2109 @rtype: (bool, list)
2110 @return: tuple of a bool indicating success and a list of rollback tasks
2111
2112 """
2113 rollback = []
2114
2115 if not _VerifyVersionInstalled(versionstring):
2116 return (False, rollback)
2117
2118 _WriteIntentToUpgrade(versionstring)
2119 rollback.append(
2120 lambda: utils.RunCmd(["rm", "-f", pathutils.INTENT_TO_UPGRADE]))
2121
2122 ToStdout("Draining queue")
2123 client = GetClient()
2124 client.SetQueueDrainFlag(True)
2125
2126 rollback.append(lambda: GetClient().SetQueueDrainFlag(False))
2127
2128 if utils.SimpleRetry(0, _GetRunning,
2129 constants.UPGRADE_QUEUE_POLL_INTERVAL,
2130 constants.UPGRADE_QUEUE_DRAIN_TIMEOUT):
2131 ToStderr("Failed to completely empty the queue.")
2132 return (False, rollback)
2133
2134 ToStdout("Pausing the watcher for one hour.")
2135 rollback.append(lambda: GetClient().SetWatcherPause(None))
2136 GetClient().SetWatcherPause(time.time() + 60 * 60)
2137
2138 ToStdout("Stopping daemons on master node.")
2139 if not _RunCommandAndReport([pathutils.DAEMON_UTIL, "stop-all"]):
2140 return (False, rollback)
2141
2142 if not _VerifyVersionInstalled(versionstring):
2143 utils.RunCmd([pathutils.DAEMON_UTIL, "start-all"])
2144 return (False, rollback)
2145
2146 ToStdout("Stopping daemons everywhere.")
2147 rollback.append(lambda: _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]))
2148 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"])
2149 if badnodes:
2150 ToStderr("Failed to stop daemons on %s." % (", ".join(badnodes),))
2151 return (False, rollback)
2152
2153 backuptar = os.path.join(pathutils.BACKUP_DIR, "ganeti%d.tar" % time.time())
2154 ToStdout("Backing up configuration as %s" % backuptar)
2155 if not _RunCommandAndReport(["mkdir", "-p", pathutils.BACKUP_DIR]):
2156 return (False, rollback)
2157
2158 # Create the archive in a safe manner, as it contains sensitive
2159 # information.
2160 (_, tmp_name) = tempfile.mkstemp(prefix=backuptar, dir=pathutils.BACKUP_DIR)
2161 if not _RunCommandAndReport(["tar", "-cf", tmp_name,
2162 "--exclude=queue/archive",
2163 pathutils.DATA_DIR]):
2164 return (False, rollback)
2165
2166 os.rename(tmp_name, backuptar)
2167 return (True, rollback)
2168
2169
2170 def _VersionSpecificDowngrade():
2171 """
2172 Perform any additional downrade tasks that are version specific
2173 and need to be done just after the configuration downgrade. This
2174 function needs to be idempotent, so that it can be redone if the
2175 downgrade procedure gets interrupted after changing the
2176 configuration.
2177
2178 Note that this function has to be reset with every version bump.
2179
2180 @return: True upon success
2181 """
2182 ToStdout("Performing version-specific downgrade tasks.")
2183
2184 # Determine if this cluster is set up with SSH handling
2185 # (aka not using --no-ssh-init), check if the public
2186 # keyfile exists.
2187 update_keys = os.path.exists(pathutils.SSH_PUB_KEYS)
2188
2189 if not update_keys:
2190 return True
2191
2192 ToStdout("Replace nodes' SSH keys with the master's keys.")
2193 (_, root_keyfiles) = \
2194 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
2195
2196 dsa_root_keyfiles = dict((kind, value) for (kind, value)
2197 in root_keyfiles.items()
2198 if kind == constants.SSHK_DSA)
2199 master_private_keyfile, master_public_keyfile = \
2200 dsa_root_keyfiles[constants.SSHK_DSA]
2201
2202 nodes = ssconf.SimpleStore().GetOnlineNodeList()
2203 master_node = ssconf.SimpleStore().GetMasterNode()
2204 cluster_name = ssconf.SimpleStore().GetClusterName()
2205
2206 # If master node is in 'nodes', remove it
2207 if master_node in nodes:
2208 nodes.remove(master_node)
2209
2210 srun = ssh.SshRunner(cluster_name=cluster_name)
2211 for name in nodes:
2212 for key_file in [master_private_keyfile, master_public_keyfile]:
2213 command = utils.text.ShellQuoteArgs([
2214 "scp", key_file, "%s:%s" % (name, key_file)])
2215 result = srun.Run(master_node, constants.SSH_LOGIN_USER, command)
2216 if result.exit_code != 0:
2217 ToStderr("Overiding SSH key '%s' of node '%s' failed. You might"
2218 " want to clean up manually." % (key_file, name))
2219
2220 return True
2221
2222
2223 def _SwitchVersionAndConfig(versionstring, downgrade):
2224 """
2225 Switch to the new Ganeti version and change the configuration,
2226 in correct order.
2227
2228 @type versionstring: string
2229 @param versionstring: the version to change to
2230 @type downgrade: bool
2231 @param downgrade: True, if the configuration should be downgraded
2232 @rtype: (bool, list)
2233 @return: tupe of a bool indicating success, and a list of
2234 additional rollback tasks
2235
2236 """
2237 rollback = []
2238 if downgrade:
2239 ToStdout("Downgrading configuration")
2240 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "--downgrade", "-f"]):
2241 return (False, rollback)
2242 # Note: version specific downgrades need to be done before switching
2243 # binaries, so that we still have the knowledgeable binary if the downgrade
2244 # process gets interrupted at this point.
2245 if not _VersionSpecificDowngrade():
2246 return (False, rollback)
2247
2248 # Configuration change is the point of no return. From then onwards, it is
2249 # safer to push through the up/dowgrade than to try to roll it back.
2250
2251 ToStdout("Switching to version %s on all nodes" % versionstring)
2252 rollback.append(lambda: _SetGanetiVersion(constants.DIR_VERSION))
2253 badnodes = _SetGanetiVersion(versionstring)
2254 if badnodes:
2255 ToStderr("Failed to switch to Ganeti version %s on nodes %s"
2256 % (versionstring, ", ".join(badnodes)))
2257 if not downgrade:
2258 return (False, rollback)
2259
2260 # Now that we have changed to the new version of Ganeti we should
2261 # not communicate over luxi any more, as luxi might have changed in
2262 # incompatible ways. Therefore, manually call the corresponding ganeti
2263 # commands using their canonical (version independent) path.
2264
2265 if not downgrade:
2266 ToStdout("Upgrading configuration")
2267 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "-f"]):
2268 return (False, rollback)
2269
2270 return (True, rollback)
2271
2272
2273 def _UpgradeAfterConfigurationChange(oldversion):
2274 """
2275 Carry out the upgrade actions necessary after switching to the new
2276 Ganeti version and updating the configuration.
2277
2278 As this part is run at a time where the new version of Ganeti is already
2279 running, no communication should happen via luxi, as this is not a stable
2280 interface. Also, as the configuration change is the point of no return,
2281 all actions are pushed trough, even if some of them fail.
2282
2283 @param oldversion: the version the upgrade started from
2284 @type oldversion: string
2285 @rtype: int
2286 @return: the intended return value
2287
2288 """
2289 returnvalue = 0
2290
2291 ToStdout("Ensuring directories everywhere.")
2292 badnodes = _VerifyCommand([pathutils.ENSURE_DIRS])
2293 if badnodes:
2294 ToStderr("Warning: failed to ensure directories on %s." %
2295 (", ".join(badnodes)))
2296 returnvalue = 1
2297
2298 ToStdout("Starting daemons everywhere.")
2299 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])
2300 if badnodes:
2301 ToStderr("Warning: failed to start daemons on %s." % (", ".join(badnodes),))
2302 returnvalue = 1
2303
2304 ToStdout("Redistributing the configuration.")
2305 if not _RunCommandAndReport(["gnt-cluster", "redist-conf", "--yes-do-it"]):
2306 returnvalue = 1
2307
2308 ToStdout("Restarting daemons everywhere.")
2309 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"])
2310 badnodes.extend(_VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]))
2311 if badnodes:
2312 ToStderr("Warning: failed to start daemons on %s." %
2313 (", ".join(list(set(badnodes))),))
2314 returnvalue = 1
2315
2316 ToStdout("Undraining the queue.")
2317 if not _RunCommandAndReport(["gnt-cluster", "queue", "undrain"]):
2318 returnvalue = 1
2319
2320 _RunCommandAndReport(["rm", "-f", pathutils.INTENT_TO_UPGRADE])
2321
2322 ToStdout("Running post-upgrade hooks")
2323 if not _RunCommandAndReport([pathutils.POST_UPGRADE, oldversion]):
2324 returnvalue = 1
2325
2326 ToStdout("Unpausing the watcher.")
2327 if not _RunCommandAndReport(["gnt-cluster", "watcher", "continue"]):
2328 returnvalue = 1
2329
2330 ToStdout("Verifying cluster.")
2331 if not _RunCommandAndReport(["gnt-cluster", "verify"]):
2332 returnvalue = 1
2333
2334 return returnvalue
2335
2336
2337 def UpgradeGanetiCommand(opts, args):
2338 """Upgrade a cluster to a new ganeti version.
2339
2340 @param opts: the command line options selected by the user
2341 @type args: list
2342 @param args: should be an empty list
2343 @rtype: int
2344 @return: the desired exit code
2345
2346 """
2347 if ((not opts.resume and opts.to is None)
2348 or (opts.resume and opts.to is not None)):
2349 ToStderr("Precisely one of the options --to and --resume"
2350 " has to be given")
2351 return 1
2352
2353 # If we're not told to resume, verify there is no upgrade
2354 # in progress.
2355 if not opts.resume:
2356 oldversion, versionstring = _ReadIntentToUpgrade()
2357 if versionstring is not None:
2358 # An upgrade is going on; verify whether the target matches
2359 if versionstring == opts.to:
2360 ToStderr("An upgrade is already in progress. Target version matches,"
2361 " resuming.")
2362 opts.resume = True
2363 opts.to = None
2364 else:
2365 ToStderr("An upgrade from %s to %s is in progress; use --resume to"
2366 " finish it first" % (oldversion, versionstring))
2367 return 1
2368
2369 oldversion = constants.RELEASE_VERSION
2370
2371 if opts.resume:
2372 ssconf.CheckMaster(False)
2373 oldversion, versionstring = _ReadIntentToUpgrade()
2374 if versionstring is None:
2375 return 0
2376 version = utils.version.ParseVersion(versionstring)
2377 if version is None:
2378 return 1
2379 configversion = _GetConfigVersion()
2380 if configversion is None:
2381 return 1
2382 # If the upgrade we resume was an upgrade between compatible
2383 # versions (like 2.10.0 to 2.10.1), the correct configversion
2384 # does not guarantee that the config has been updated.
2385 # However, in the case of a compatible update with the configuration
2386 # not touched, we are running a different dirversion with the same
2387 # config version.
2388 config_already_modified = \
2389 (utils.IsCorrectConfigVersion(version, configversion) and
2390 not (versionstring != constants.DIR_VERSION and
2391 configversion == (constants.CONFIG_MAJOR, constants.CONFIG_MINOR,
2392 constants.CONFIG_REVISION)))
2393 if not config_already_modified:
2394 # We have to start from the beginning; however, some daemons might have
2395 # already been stopped, so the only way to get into a well-defined state
2396 # is by starting all daemons again.
2397 _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])
2398 else:
2399 versionstring = opts.to
2400 config_already_modified = False
2401 version = utils.version.ParseVersion(versionstring)
2402 if version is None:
2403 ToStderr("Could not parse version string %s" % versionstring)
2404 return 1
2405
2406 msg = utils.version.UpgradeRange(version)
2407 if msg is not None:
2408 ToStderr("Cannot upgrade to %s: %s" % (versionstring, msg))
2409 return 1
2410
2411 if not config_already_modified:
2412 success, rollback = _UpgradeBeforeConfigurationChange(versionstring)
2413 if not success:
2414 _ExecuteCommands(rollback)
2415 return 1
2416 else:
2417 rollback = []
2418
2419 downgrade = utils.version.ShouldCfgdowngrade(version)
2420
2421 success, additionalrollback = \
2422 _SwitchVersionAndConfig(versionstring, downgrade)
2423 if not success:
2424 rollback.extend(additionalrollback)
2425 _ExecuteCommands(rollback)
2426 return 1
2427
2428 return _UpgradeAfterConfigurationChange(oldversion)
2429
2430
2431 commands = {
2432 "init": (
2433 InitCluster, [ArgHost(min=1, max=1)],
2434 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
2435 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT,
2436 NIC_PARAMS_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT,
2437 SECONDARY_IP_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT,
2438 DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT, DEFAULT_IALLOCATOR_PARAMS_OPT,
2439 PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT,
2440 GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT,
2441 HV_STATE_OPT, DISK_STATE_OPT, ENABLED_DISK_TEMPLATES_OPT,
2442 IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, INSTALL_IMAGE_OPT,
2443 ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT,
2444 ENABLED_USER_SHUTDOWN_OPT,
2445 ]
2446 + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS,
2447 "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
2448 "destroy": (
2449 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
2450 "", "Destroy cluster"),
2451 "rename": (
2452 RenameCluster, [ArgHost(min=1, max=1)],
2453 [FORCE_OPT, DRY_RUN_OPT],
2454 "<new_name>",
2455 "Renames the cluster"),
2456 "redist-conf": (
2457 RedistributeConfig, ARGS_NONE, SUBMIT_OPTS +
2458 [DRY_RUN_OPT, PRIORITY_OPT, FORCE_DISTRIBUTION],
2459 "", "Forces a push of the configuration file and ssconf files"
2460 " to the nodes in the cluster"),
2461 "verify": (
2462 VerifyCluster, ARGS_NONE,
2463 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
2464 DRY_RUN_OPT, PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT,
2465 VERIFY_CLUTTER_OPT],
2466 "", "Does a check on the cluster configuration"),
2467 "verify-disks": (
2468 VerifyDisks, ARGS_NONE, [PRIORITY_OPT],
2469 "", "Does a check on the cluster disk status"),
2470 "repair-disk-sizes": (
2471 RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT],
2472 "[instance...]", "Updates mismatches in recorded disk sizes"),
2473 "master-failover": (
2474 MasterFailover, ARGS_NONE, [NOVOTING_OPT, FORCE_FAILOVER],
2475 "", "Makes the current node the master"),
2476 "master-ping": (
2477 MasterPing, ARGS_NONE, [],
2478 "", "Checks if the master is alive"),
2479 "version": (
2480 ShowClusterVersion, ARGS_NONE, [],
2481 "", "Shows the cluster version"),
2482 "getmaster": (
2483 ShowClusterMaster, ARGS_NONE, [],
2484 "", "Shows the cluster master"),
2485 "copyfile": (
2486 ClusterCopyFile, [ArgFile(min=1, max=1)],
2487 [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT],
2488 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
2489 "command": (
2490 RunClusterCommand, [ArgCommand(min=1)],
2491 [NODE_LIST_OPT, NODEGROUP_OPT, SHOW_MACHINE_OPT, FAILURE_ONLY_OPT],
2492 "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
2493 "info": (
2494 ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
2495 "[--roman]", "Show cluster configuration"),
2496 "list-tags": (
2497 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
2498 "add-tags": (
2499 AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
2500 "tag...", "Add tags to the cluster"),
2501 "remove-tags": (
2502 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
2503 "tag...", "Remove tags from the cluster"),
2504 "search-tags": (
2505 SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "",
2506 "Searches the tags on all objects on"
2507 " the cluster for a given pattern (regex)"),
2508 "queue": (
2509 QueueOps,
2510 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
2511 [], "drain|undrain|info", "Change queue properties"),
2512 "watcher": (
2513 WatcherOps,
2514 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
2515 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
2516 [],
2517 "{pause <timespec>|continue|info}", "Change watcher properties"),
2518 "modify": (
2519 SetClusterParams, ARGS_NONE,
2520 [FORCE_OPT,
2521 BACKEND_OPT, CP_SIZE_OPT, RQL_OPT, MAX_TRACK_OPT, INSTALL_IMAGE_OPT,
2522 INSTANCE_COMMUNICATION_NETWORK_OPT, ENABLED_HV_OPT, HVLIST_OPT,
2523 MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, NIC_PARAMS_OPT,
2524 VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT,
2525 REMOVE_UIDS_OPT, DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT,
2526 DEFAULT_IALLOCATOR_PARAMS_OPT, RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT,
2527 PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT,
2528 DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS +
2529 [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT,
2530 ENABLED_USER_SHUTDOWN_OPT] +
2531 INSTANCE_POLICY_OPTS +
2532 [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
2533 COMPRESSION_TOOLS_OPT] +
2534 [ENABLED_DATA_COLLECTORS_OPT, DATA_COLLECTOR_INTERVAL_OPT],
2535 "[opts...]",
2536 "Alters the parameters of the cluster"),
2537 "renew-crypto": (
2538 RenewCrypto, ARGS_NONE,
2539 [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
2540 NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
2541 NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
2542 NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT,
2543 NEW_NODE_CERT_OPT, NEW_SSH_KEY_OPT, NOSSH_KEYCHECK_OPT,
2544 VERBOSE_OPT],
2545 "[opts...]",
2546 "Renews cluster certificates, keys and secrets"),
2547 "epo": (
2548 Epo, [ArgUnknown()],
2549 [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT,
2550 SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT],
2551 "[opts...] [args]",
2552 "Performs an emergency power-off on given args"),
2553 "activate-master-ip": (
2554 ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"),
2555 "deactivate-master-ip": (
2556 DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "",
2557 "Deactivates the master IP"),
2558 "show-ispecs-cmd": (
2559 ShowCreateCommand, ARGS_NONE, [], "",
2560 "Show the command line to re-create the cluster"),
2561 "upgrade": (
2562 UpgradeGanetiCommand, ARGS_NONE, [TO_OPT, RESUME_OPT], "",
2563 "Upgrade (or downgrade) to a new Ganeti version"),
2564 }
2565
2566
2567 #: dictionary with aliases for commands
2568 aliases = {
2569 "masterfailover": "master-failover",
2570 "show": "info",
2571 }
2572
2573
2574 def Main():
2575 return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER},
2576 aliases=aliases)