cbb3af14f5cfe28b5827114e9c707f63fcfe610a
[ganeti-github.git] / lib / client / gnt_cluster.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """Cluster related commands"""
31
32 # pylint: disable=W0401,W0613,W0614,C0103
33 # W0401: Wildcard import ganeti.cli
34 # W0613: Unused argument, since all functions follow the same API
35 # W0614: Unused import %s from wildcard import (since we need cli)
36 # C0103: Invalid name gnt-cluster
37
38 import itertools
39 import os
40 import time
41 import tempfile
42
43 from cStringIO import StringIO
44
45 import OpenSSL
46
47 from ganeti.cli import *
48 from ganeti import bootstrap
49 from ganeti import compat
50 from ganeti import constants
51 from ganeti import config
52 from ganeti import errors
53 from ganeti import netutils
54 from ganeti import objects
55 from ganeti import opcodes
56 from ganeti import pathutils
57 from ganeti import qlang
58 from ganeti.rpc.node import RunWithRPC
59 from ganeti import serializer
60 from ganeti import ssconf
61 from ganeti import ssh
62 from ganeti import uidpool
63 from ganeti import utils
64 from ganeti import wconfd
65 from ganeti.client import base
66
67
68 ON_OPT = cli_option("--on", default=False,
69 action="store_true", dest="on",
70 help="Recover from an EPO")
71
72 GROUPS_OPT = cli_option("--groups", default=False,
73 action="store_true", dest="groups",
74 help="Arguments are node groups instead of nodes")
75
76 FORCE_FAILOVER = cli_option("--yes-do-it", dest="yes_do_it",
77 help="Override interactive check for --no-voting",
78 default=False, action="store_true")
79
80 IGNORE_OFFLINE_NODES_FAILOVER = cli_option(
81 "--ignore-offline-nodes", dest="ignore_offline_nodes",
82 help="Ignores offline nodes for master failover voting", default=True)
83
84 FORCE_DISTRIBUTION = cli_option("--yes-do-it", dest="yes_do_it",
85 help="Unconditionally distribute the"
86 " configuration, even if the queue"
87 " is drained",
88 default=False, action="store_true")
89
90 TO_OPT = cli_option("--to", default=None, type="string",
91 help="The Ganeti version to upgrade to")
92
93 RESUME_OPT = cli_option("--resume", default=False, action="store_true",
94 help="Resume any pending Ganeti upgrades")
95
96 DATA_COLLECTOR_INTERVAL_OPT = cli_option(
97 "--data-collector-interval", default={}, type="keyval",
98 help="Set collection intervals in seconds of data collectors.")
99
100 STRICT_OPT = cli_option("--no-strict", default=False,
101 dest="no_strict", action="store_true",
102 help="Do not run group verify in strict mode")
103
104 _EPO_PING_INTERVAL = 30 # 30 seconds between pings
105 _EPO_PING_TIMEOUT = 1 # 1 second
106 _EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
107
108
109 def _InitEnabledDiskTemplates(opts):
110 """Initialize the list of enabled disk templates.
111
112 """
113 if opts.enabled_disk_templates:
114 return opts.enabled_disk_templates.split(",")
115 else:
116 return constants.DEFAULT_ENABLED_DISK_TEMPLATES
117
118
119 def _InitVgName(opts, enabled_disk_templates):
120 """Initialize the volume group name.
121
122 @type enabled_disk_templates: list of strings
123 @param enabled_disk_templates: cluster-wide enabled disk templates
124
125 """
126 vg_name = None
127 if opts.vg_name is not None:
128 vg_name = opts.vg_name
129 if vg_name:
130 if not utils.IsLvmEnabled(enabled_disk_templates):
131 ToStdout("You specified a volume group with --vg-name, but you did not"
132 " enable any disk template that uses lvm.")
133 elif utils.IsLvmEnabled(enabled_disk_templates):
134 raise errors.OpPrereqError(
135 "LVM disk templates are enabled, but vg name not set.")
136 elif utils.IsLvmEnabled(enabled_disk_templates):
137 vg_name = constants.DEFAULT_VG
138 return vg_name
139
140
141 def _InitDrbdHelper(opts, enabled_disk_templates, feedback_fn=ToStdout):
142 """Initialize the DRBD usermode helper.
143
144 """
145 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
146
147 if not drbd_enabled and opts.drbd_helper is not None:
148 feedback_fn("Note: You specified a DRBD usermode helper, while DRBD storage"
149 " is not enabled.")
150
151 if drbd_enabled:
152 if opts.drbd_helper is None:
153 return constants.DEFAULT_DRBD_HELPER
154 if opts.drbd_helper == '':
155 raise errors.OpPrereqError(
156 "Unsetting the drbd usermode helper while enabling DRBD is not"
157 " allowed.")
158
159 return opts.drbd_helper
160
161
162 @RunWithRPC
163 def InitCluster(opts, args):
164 """Initialize the cluster.
165
166 @param opts: the command line options selected by the user
167 @type args: list
168 @param args: should contain only one element, the desired
169 cluster name
170 @rtype: int
171 @return: the desired exit code
172
173 """
174 enabled_disk_templates = _InitEnabledDiskTemplates(opts)
175
176 try:
177 vg_name = _InitVgName(opts, enabled_disk_templates)
178 drbd_helper = _InitDrbdHelper(opts, enabled_disk_templates)
179 except errors.OpPrereqError, e:
180 ToStderr(str(e))
181 return 1
182
183 master_netdev = opts.master_netdev
184 if master_netdev is None:
185 nic_mode = opts.nicparams.get(constants.NIC_MODE, None)
186 if not nic_mode:
187 # default case, use bridging
188 master_netdev = constants.DEFAULT_BRIDGE
189 elif nic_mode == constants.NIC_MODE_OVS:
190 # default ovs is different from default bridge
191 master_netdev = constants.DEFAULT_OVS
192 opts.nicparams[constants.NIC_LINK] = constants.DEFAULT_OVS
193
194 hvlist = opts.enabled_hypervisors
195 if hvlist is None:
196 hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
197 hvlist = hvlist.split(",")
198
199 hvparams = dict(opts.hvparams)
200 beparams = opts.beparams
201 nicparams = opts.nicparams
202
203 diskparams = dict(opts.diskparams)
204
205 # check the disk template types here, as we cannot rely on the type check done
206 # by the opcode parameter types
207 diskparams_keys = set(diskparams.keys())
208 if diskparams_keys > constants.DISK_TEMPLATES:
209 unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES)
210 ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown))
211 return 1
212
213 # prepare beparams dict
214 beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
215 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
216
217 # prepare nicparams dict
218 nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
219 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
220
221 # prepare ndparams dict
222 if opts.ndparams is None:
223 ndparams = dict(constants.NDC_DEFAULTS)
224 else:
225 ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
226 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
227
228 # prepare hvparams dict
229 for hv in constants.HYPER_TYPES:
230 if hv not in hvparams:
231 hvparams[hv] = {}
232 hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
233 utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
234
235 # prepare diskparams dict
236 for templ in constants.DISK_TEMPLATES:
237 if templ not in diskparams:
238 diskparams[templ] = {}
239 diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ],
240 diskparams[templ])
241 utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES)
242
243 # prepare ipolicy dict
244 ipolicy = CreateIPolicyFromOpts(
245 ispecs_mem_size=opts.ispecs_mem_size,
246 ispecs_cpu_count=opts.ispecs_cpu_count,
247 ispecs_disk_count=opts.ispecs_disk_count,
248 ispecs_disk_size=opts.ispecs_disk_size,
249 ispecs_nic_count=opts.ispecs_nic_count,
250 minmax_ispecs=opts.ipolicy_bounds_specs,
251 std_ispecs=opts.ipolicy_std_specs,
252 ipolicy_disk_templates=opts.ipolicy_disk_templates,
253 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
254 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
255 ipolicy_memory_ratio=opts.ipolicy_memory_ratio,
256 fill_all=True)
257
258 if opts.candidate_pool_size is None:
259 opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT
260
261 if opts.mac_prefix is None:
262 opts.mac_prefix = constants.DEFAULT_MAC_PREFIX
263
264 uid_pool = opts.uid_pool
265 if uid_pool is not None:
266 uid_pool = uidpool.ParseUidPool(uid_pool)
267
268 if opts.prealloc_wipe_disks is None:
269 opts.prealloc_wipe_disks = False
270
271 external_ip_setup_script = opts.use_external_mip_script
272 if external_ip_setup_script is None:
273 external_ip_setup_script = False
274
275 try:
276 primary_ip_version = int(opts.primary_ip_version)
277 except (ValueError, TypeError), err:
278 ToStderr("Invalid primary ip version value: %s" % str(err))
279 return 1
280
281 master_netmask = opts.master_netmask
282 try:
283 if master_netmask is not None:
284 master_netmask = int(master_netmask)
285 except (ValueError, TypeError), err:
286 ToStderr("Invalid master netmask value: %s" % str(err))
287 return 1
288
289 if opts.disk_state:
290 disk_state = utils.FlatToDict(opts.disk_state)
291 else:
292 disk_state = {}
293
294 hv_state = dict(opts.hv_state)
295
296 if opts.install_image:
297 install_image = opts.install_image
298 else:
299 install_image = ""
300
301 if opts.zeroing_image:
302 zeroing_image = opts.zeroing_image
303 else:
304 zeroing_image = ""
305
306 compression_tools = _GetCompressionTools(opts)
307
308 default_ialloc_params = opts.default_iallocator_params
309
310 enabled_user_shutdown = bool(opts.enabled_user_shutdown)
311
312 if opts.ssh_key_type:
313 ssh_key_type = opts.ssh_key_type
314 else:
315 ssh_key_type = constants.SSH_DEFAULT_KEY_TYPE
316
317 ssh_key_bits = ssh.DetermineKeyBits(ssh_key_type, opts.ssh_key_bits, None,
318 None)
319
320 bootstrap.InitCluster(cluster_name=args[0],
321 secondary_ip=opts.secondary_ip,
322 vg_name=vg_name,
323 mac_prefix=opts.mac_prefix,
324 master_netmask=master_netmask,
325 master_netdev=master_netdev,
326 file_storage_dir=opts.file_storage_dir,
327 shared_file_storage_dir=opts.shared_file_storage_dir,
328 gluster_storage_dir=opts.gluster_storage_dir,
329 enabled_hypervisors=hvlist,
330 hvparams=hvparams,
331 beparams=beparams,
332 nicparams=nicparams,
333 ndparams=ndparams,
334 diskparams=diskparams,
335 ipolicy=ipolicy,
336 candidate_pool_size=opts.candidate_pool_size,
337 modify_etc_hosts=opts.modify_etc_hosts,
338 modify_ssh_setup=opts.modify_ssh_setup,
339 maintain_node_health=opts.maintain_node_health,
340 drbd_helper=drbd_helper,
341 uid_pool=uid_pool,
342 default_iallocator=opts.default_iallocator,
343 default_iallocator_params=default_ialloc_params,
344 primary_ip_version=primary_ip_version,
345 prealloc_wipe_disks=opts.prealloc_wipe_disks,
346 use_external_mip_script=external_ip_setup_script,
347 hv_state=hv_state,
348 disk_state=disk_state,
349 enabled_disk_templates=enabled_disk_templates,
350 install_image=install_image,
351 zeroing_image=zeroing_image,
352 compression_tools=compression_tools,
353 enabled_user_shutdown=enabled_user_shutdown,
354 ssh_key_type=ssh_key_type,
355 ssh_key_bits=ssh_key_bits,
356 )
357 op = opcodes.OpClusterPostInit()
358 SubmitOpCode(op, opts=opts)
359 return 0
360
361
362 @RunWithRPC
363 def DestroyCluster(opts, args):
364 """Destroy the cluster.
365
366 @param opts: the command line options selected by the user
367 @type args: list
368 @param args: should be an empty list
369 @rtype: int
370 @return: the desired exit code
371
372 """
373 if not opts.yes_do_it:
374 ToStderr("Destroying a cluster is irreversible. If you really want"
375 " destroy this cluster, supply the --yes-do-it option.")
376 return 1
377
378 op = opcodes.OpClusterDestroy()
379 master_uuid = SubmitOpCode(op, opts=opts)
380 # if we reached this, the opcode didn't fail; we can proceed to
381 # shutdown all the daemons
382 bootstrap.FinalizeClusterDestroy(master_uuid)
383 return 0
384
385
386 def RenameCluster(opts, args):
387 """Rename the cluster.
388
389 @param opts: the command line options selected by the user
390 @type args: list
391 @param args: should contain only one element, the new cluster name
392 @rtype: int
393 @return: the desired exit code
394
395 """
396 cl = GetClient()
397
398 (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])
399
400 new_name = args[0]
401 if not opts.force:
402 usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
403 " connected over the network to the cluster name, the"
404 " operation is very dangerous as the IP address will be"
405 " removed from the node and the change may not go through."
406 " Continue?") % (cluster_name, new_name)
407 if not AskUser(usertext):
408 return 1
409
410 op = opcodes.OpClusterRename(name=new_name)
411 result = SubmitOpCode(op, opts=opts, cl=cl)
412
413 if result:
414 ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
415
416 return 0
417
418
419 def ActivateMasterIp(opts, args):
420 """Activates the master IP.
421
422 """
423 op = opcodes.OpClusterActivateMasterIp()
424 SubmitOpCode(op)
425 return 0
426
427
428 def DeactivateMasterIp(opts, args):
429 """Deactivates the master IP.
430
431 """
432 if not opts.confirm:
433 usertext = ("This will disable the master IP. All the open connections to"
434 " the master IP will be closed. To reach the master you will"
435 " need to use its node IP."
436 " Continue?")
437 if not AskUser(usertext):
438 return 1
439
440 op = opcodes.OpClusterDeactivateMasterIp()
441 SubmitOpCode(op)
442 return 0
443
444
445 def RedistributeConfig(opts, args):
446 """Forces push of the cluster configuration.
447
448 @param opts: the command line options selected by the user
449 @type args: list
450 @param args: empty list
451 @rtype: int
452 @return: the desired exit code
453
454 """
455 op = opcodes.OpClusterRedistConf()
456 if opts.yes_do_it:
457 SubmitOpCodeToDrainedQueue(op)
458 else:
459 SubmitOrSend(op, opts)
460 return 0
461
462
463 def ShowClusterVersion(opts, args):
464 """Write version of ganeti software to the standard output.
465
466 @param opts: the command line options selected by the user
467 @type args: list
468 @param args: should be an empty list
469 @rtype: int
470 @return: the desired exit code
471
472 """
473 cl = GetClient()
474 result = cl.QueryClusterInfo()
475 ToStdout("Software version: %s", result["software_version"])
476 ToStdout("Internode protocol: %s", result["protocol_version"])
477 ToStdout("Configuration format: %s", result["config_version"])
478 ToStdout("OS api version: %s", result["os_api_version"])
479 ToStdout("Export interface: %s", result["export_version"])
480 ToStdout("VCS version: %s", result["vcs_version"])
481 return 0
482
483
484 def ShowClusterMaster(opts, args):
485 """Write name of master node to the standard output.
486
487 @param opts: the command line options selected by the user
488 @type args: list
489 @param args: should be an empty list
490 @rtype: int
491 @return: the desired exit code
492
493 """
494 master = bootstrap.GetMaster()
495 ToStdout(master)
496 return 0
497
498
499 def _FormatGroupedParams(paramsdict, roman=False):
500 """Format Grouped parameters (be, nic, disk) by group.
501
502 @type paramsdict: dict of dicts
503 @param paramsdict: {group: {param: value, ...}, ...}
504 @rtype: dict of dicts
505 @return: copy of the input dictionaries with strings as values
506
507 """
508 ret = {}
509 for (item, val) in paramsdict.items():
510 if isinstance(val, dict):
511 ret[item] = _FormatGroupedParams(val, roman=roman)
512 elif roman and isinstance(val, int):
513 ret[item] = compat.TryToRoman(val)
514 else:
515 ret[item] = str(val)
516 return ret
517
518
519 def _FormatDataCollectors(paramsdict):
520 """Format Grouped parameters (be, nic, disk) by group.
521
522 @type paramsdict: dict of dicts
523 @param paramsdict: response of QueryClusterInfo
524 @rtype: dict of dicts
525 @return: parameter grouped by data collector
526
527 """
528
529 enabled = paramsdict[constants.DATA_COLLECTORS_ENABLED_NAME]
530 interval = paramsdict[constants.DATA_COLLECTORS_INTERVAL_NAME]
531
532 ret = {}
533 for key in enabled:
534 ret[key] = dict(active=enabled[key],
535 interval="%.3fs" % (interval[key] / 1e6))
536 return ret
537
538
539 def ShowClusterConfig(opts, args):
540 """Shows cluster information.
541
542 @param opts: the command line options selected by the user
543 @type args: list
544 @param args: should be an empty list
545 @rtype: int
546 @return: the desired exit code
547
548 """
549 cl = GetClient()
550 result = cl.QueryClusterInfo()
551
552 if result["tags"]:
553 tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
554 else:
555 tags = "(none)"
556 if result["reserved_lvs"]:
557 reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
558 else:
559 reserved_lvs = "(none)"
560
561 enabled_hv = result["enabled_hypervisors"]
562 hvparams = dict((k, v) for k, v in result["hvparams"].iteritems()
563 if k in enabled_hv)
564
565 info = [
566 ("Cluster name", result["name"]),
567 ("Cluster UUID", result["uuid"]),
568
569 ("Creation time", utils.FormatTime(result["ctime"])),
570 ("Modification time", utils.FormatTime(result["mtime"])),
571
572 ("Master node", result["master"]),
573
574 ("Architecture (this node)",
575 "%s (%s)" % (result["architecture"][0], result["architecture"][1])),
576
577 ("Tags", tags),
578
579 ("Default hypervisor", result["default_hypervisor"]),
580 ("Enabled hypervisors", utils.CommaJoin(enabled_hv)),
581
582 ("Hypervisor parameters", _FormatGroupedParams(hvparams,
583 opts.roman_integers)),
584
585 ("OS-specific hypervisor parameters",
586 _FormatGroupedParams(result["os_hvp"], opts.roman_integers)),
587
588 ("OS parameters", _FormatGroupedParams(result["osparams"],
589 opts.roman_integers)),
590
591 ("Hidden OSes", utils.CommaJoin(result["hidden_os"])),
592 ("Blacklisted OSes", utils.CommaJoin(result["blacklisted_os"])),
593
594 ("Cluster parameters", [
595 ("candidate pool size",
596 compat.TryToRoman(result["candidate_pool_size"],
597 convert=opts.roman_integers)),
598 ("maximal number of jobs running simultaneously",
599 compat.TryToRoman(result["max_running_jobs"],
600 convert=opts.roman_integers)),
601 ("maximal number of jobs simultaneously tracked by the scheduler",
602 compat.TryToRoman(result["max_tracked_jobs"],
603 convert=opts.roman_integers)),
604 ("mac prefix", result["mac_prefix"]),
605 ("master netdev", result["master_netdev"]),
606 ("master netmask", compat.TryToRoman(result["master_netmask"],
607 opts.roman_integers)),
608 ("use external master IP address setup script",
609 result["use_external_mip_script"]),
610 ("lvm volume group", result["volume_group_name"]),
611 ("lvm reserved volumes", reserved_lvs),
612 ("drbd usermode helper", result["drbd_usermode_helper"]),
613 ("file storage path", result["file_storage_dir"]),
614 ("shared file storage path", result["shared_file_storage_dir"]),
615 ("gluster storage path", result["gluster_storage_dir"]),
616 ("maintenance of node health", result["maintain_node_health"]),
617 ("uid pool", uidpool.FormatUidPool(result["uid_pool"])),
618 ("default instance allocator", result["default_iallocator"]),
619 ("default instance allocator parameters",
620 result["default_iallocator_params"]),
621 ("primary ip version", compat.TryToRoman(result["primary_ip_version"],
622 opts.roman_integers)),
623 ("preallocation wipe disks", result["prealloc_wipe_disks"]),
624 ("OS search path", utils.CommaJoin(pathutils.OS_SEARCH_PATH)),
625 ("ExtStorage Providers search path",
626 utils.CommaJoin(pathutils.ES_SEARCH_PATH)),
627 ("enabled disk templates",
628 utils.CommaJoin(result["enabled_disk_templates"])),
629 ("install image", result["install_image"]),
630 ("instance communication network",
631 result["instance_communication_network"]),
632 ("zeroing image", result["zeroing_image"]),
633 ("compression tools", result["compression_tools"]),
634 ("enabled user shutdown", result["enabled_user_shutdown"]),
635 ("modify ssh setup", result["modify_ssh_setup"]),
636 ("ssh_key_type", result["ssh_key_type"]),
637 ("ssh_key_bits", result["ssh_key_bits"]),
638 ]),
639
640 ("Default node parameters",
641 _FormatGroupedParams(result["ndparams"], roman=opts.roman_integers)),
642
643 ("Default instance parameters",
644 _FormatGroupedParams(result["beparams"], roman=opts.roman_integers)),
645
646 ("Default nic parameters",
647 _FormatGroupedParams(result["nicparams"], roman=opts.roman_integers)),
648
649 ("Default disk parameters",
650 _FormatGroupedParams(result["diskparams"], roman=opts.roman_integers)),
651
652 ("Instance policy - limits for instances",
653 FormatPolicyInfo(result["ipolicy"], None, True, opts.roman_integers)),
654 ("Data collectors", _FormatDataCollectors(result)),
655 ]
656
657 PrintGenericInfo(info)
658 return 0
659
660
661 def ClusterCopyFile(opts, args):
662 """Copy a file from master to some nodes.
663
664 @param opts: the command line options selected by the user
665 @type args: list
666 @param args: should contain only one element, the path of
667 the file to be copied
668 @rtype: int
669 @return: the desired exit code
670
671 """
672 filename = args[0]
673 filename = os.path.abspath(filename)
674
675 if not os.path.exists(filename):
676 raise errors.OpPrereqError("No such filename '%s'" % filename,
677 errors.ECODE_INVAL)
678
679 cl = GetClient()
680 qcl = GetClient()
681 try:
682 cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
683
684 results = GetOnlineNodes(nodes=opts.nodes, cl=qcl, filter_master=True,
685 secondary_ips=opts.use_replication_network,
686 nodegroup=opts.nodegroup)
687 ports = GetNodesSshPorts(opts.nodes, qcl)
688 finally:
689 cl.Close()
690 qcl.Close()
691
692 srun = ssh.SshRunner(cluster_name)
693 for (node, port) in zip(results, ports):
694 if not srun.CopyFileToNode(node, port, filename):
695 ToStderr("Copy of file %s to node %s:%d failed", filename, node, port)
696
697 return 0
698
699
700 def RunClusterCommand(opts, args):
701 """Run a command on some nodes.
702
703 @param opts: the command line options selected by the user
704 @type args: list
705 @param args: should contain the command to be run and its arguments
706 @rtype: int
707 @return: the desired exit code
708
709 """
710 cl = GetClient()
711 qcl = GetClient()
712
713 command = " ".join(args)
714
715 nodes = GetOnlineNodes(nodes=opts.nodes, cl=qcl, nodegroup=opts.nodegroup)
716 ports = GetNodesSshPorts(nodes, qcl)
717
718 cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
719 "master_node"])
720
721 srun = ssh.SshRunner(cluster_name=cluster_name)
722
723 # Make sure master node is at list end
724 if master_node in nodes:
725 nodes.remove(master_node)
726 nodes.append(master_node)
727
728 for (name, port) in zip(nodes, ports):
729 result = srun.Run(name, constants.SSH_LOGIN_USER, command, port=port)
730
731 if opts.failure_only and result.exit_code == constants.EXIT_SUCCESS:
732 # Do not output anything for successful commands
733 continue
734
735 ToStdout("------------------------------------------------")
736 if opts.show_machine_names:
737 for line in result.output.splitlines():
738 ToStdout("%s: %s", name, line)
739 else:
740 ToStdout("node: %s", name)
741 ToStdout("%s", result.output)
742 ToStdout("return code = %s", result.exit_code)
743
744 return 0
745
746
747 def VerifyCluster(opts, args):
748 """Verify integrity of cluster, performing various test on nodes.
749
750 @param opts: the command line options selected by the user
751 @type args: list
752 @param args: should be an empty list
753 @rtype: int
754 @return: the desired exit code
755
756 """
757 skip_checks = []
758
759 if opts.skip_nplusone_mem:
760 skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
761
762 cl = GetClient()
763
764 op = opcodes.OpClusterVerify(verbose=opts.verbose,
765 error_codes=opts.error_codes,
766 debug_simulate_errors=opts.simulate_errors,
767 skip_checks=skip_checks,
768 ignore_errors=opts.ignore_errors,
769 group_name=opts.nodegroup,
770 verify_clutter=opts.verify_clutter)
771 result = SubmitOpCode(op, cl=cl, opts=opts)
772
773 # Keep track of submitted jobs
774 jex = JobExecutor(cl=cl, opts=opts)
775
776 for (status, job_id) in result[constants.JOB_IDS_KEY]:
777 jex.AddJobId(None, status, job_id)
778
779 results = jex.GetResults()
780
781 bad_jobs = sum(1 for (job_success, _) in results if not job_success)
782 bad_results = sum(1 for (_, op_res) in results if not (op_res and op_res[0]))
783
784 if bad_jobs == 0 and bad_results == 0:
785 rcode = constants.EXIT_SUCCESS
786 else:
787 rcode = constants.EXIT_FAILURE
788 if bad_jobs > 0:
789 ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
790
791 return rcode
792
793
794 def VerifyDisks(opts, args):
795 """Verify integrity of cluster disks.
796
797 @param opts: the command line options selected by the user
798 @type args: list
799 @param args: should be an empty list
800 @rtype: int
801 @return: the desired exit code
802
803 """
804 cl = GetClient()
805
806 op = opcodes.OpClusterVerifyDisks(group_name=opts.nodegroup,
807 is_strict=not opts.no_strict)
808
809 result = SubmitOpCode(op, cl=cl, opts=opts)
810
811 # Keep track of submitted jobs
812 jex = JobExecutor(cl=cl, opts=opts)
813
814 for (status, job_id) in result[constants.JOB_IDS_KEY]:
815 jex.AddJobId(None, status, job_id)
816
817 retcode = constants.EXIT_SUCCESS
818
819 for (status, result) in jex.GetResults():
820 if not status:
821 ToStdout("Job failed: %s", result)
822 continue
823
824 ((bad_nodes, instances, missing), ) = result
825
826 for node, text in bad_nodes.items():
827 ToStdout("Error gathering data on node %s: %s",
828 node, utils.SafeEncode(text[-400:]))
829 retcode = constants.EXIT_FAILURE
830 ToStdout("You need to fix these nodes first before fixing instances")
831
832 for iname in instances:
833 if iname in missing:
834 continue
835 op = opcodes.OpInstanceActivateDisks(instance_name=iname)
836 try:
837 ToStdout("Activating disks for instance '%s'", iname)
838 SubmitOpCode(op, opts=opts, cl=cl)
839 except errors.GenericError, err:
840 nret, msg = FormatError(err)
841 retcode |= nret
842 ToStderr("Error activating disks for instance %s: %s", iname, msg)
843
844 if missing:
845 for iname, ival in missing.iteritems():
846 all_missing = compat.all(x[0] in bad_nodes for x in ival)
847 if all_missing:
848 ToStdout("Instance %s cannot be verified as it lives on"
849 " broken nodes", iname)
850 continue
851
852 ToStdout("Instance %s has missing logical volumes:", iname)
853 ival.sort()
854 for node, vol in ival:
855 if node in bad_nodes:
856 ToStdout("\tbroken node %s /dev/%s", node, vol)
857 else:
858 ToStdout("\t%s /dev/%s", node, vol)
859
860 ToStdout("You need to replace or recreate disks for all the above"
861 " instances if this message persists after fixing broken nodes.")
862 retcode = constants.EXIT_FAILURE
863 elif not instances:
864 ToStdout("No disks need to be activated.")
865
866 return retcode
867
868
869 def RepairDiskSizes(opts, args):
870 """Verify sizes of cluster disks.
871
872 @param opts: the command line options selected by the user
873 @type args: list
874 @param args: optional list of instances to restrict check to
875 @rtype: int
876 @return: the desired exit code
877
878 """
879 op = opcodes.OpClusterRepairDiskSizes(instances=args)
880 SubmitOpCode(op, opts=opts)
881
882
883 @RunWithRPC
884 def MasterFailover(opts, args):
885 """Failover the master node.
886
887 This command, when run on a non-master node, will cause the current
888 master to cease being master, and the non-master to become new
889 master.
890
891 @param opts: the command line options selected by the user
892 @type args: list
893 @param args: should be an empty list
894 @rtype: int
895 @return: the desired exit code
896
897 """
898 if opts.no_voting:
899 # Don't ask for confirmation if the user provides the confirmation flag.
900 if not opts.yes_do_it:
901 usertext = ("This will perform the failover even if most other nodes"
902 " are down, or if this node is outdated. This is dangerous"
903 " as it can lead to a non-consistent cluster. Check the"
904 " gnt-cluster(8) man page before proceeding. Continue?")
905 if not AskUser(usertext):
906 return 1
907 else:
908 # Verify that a majority of nodes are still healthy
909 (majority_healthy, unhealthy_nodes) = bootstrap.MajorityHealthy(
910 opts.ignore_offline_nodes)
911 if not majority_healthy:
912 ToStderr("Master-failover with voting is only possible if the majority"
913 " of nodes are still healthy; use the --no-voting option after"
914 " ensuring by other means that you won't end up in a dual-master"
915 " scenario. Unhealthy nodes: %s" % unhealthy_nodes)
916 return 1
917
918 rvalue, msgs = bootstrap.MasterFailover(no_voting=opts.no_voting)
919 for msg in msgs:
920 ToStderr(msg)
921
922 return rvalue
923
924
925 def MasterPing(opts, args):
926 """Checks if the master is alive.
927
928 @param opts: the command line options selected by the user
929 @type args: list
930 @param args: should be an empty list
931 @rtype: int
932 @return: the desired exit code
933
934 """
935 try:
936 cl = GetClient()
937 cl.QueryClusterInfo()
938 return 0
939 except Exception: # pylint: disable=W0703
940 return 1
941
942
943 def SearchTags(opts, args):
944 """Searches the tags on all the cluster.
945
946 @param opts: the command line options selected by the user
947 @type args: list
948 @param args: should contain only one element, the tag pattern
949 @rtype: int
950 @return: the desired exit code
951
952 """
953 op = opcodes.OpTagsSearch(pattern=args[0])
954 result = SubmitOpCode(op, opts=opts)
955 if not result:
956 return 1
957 result = list(result)
958 result.sort()
959 for path, tag in result:
960 ToStdout("%s %s", path, tag)
961
962
963 def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
964 """Reads and verifies an X509 certificate.
965
966 @type cert_filename: string
967 @param cert_filename: the path of the file containing the certificate to
968 verify encoded in PEM format
969 @type verify_private_key: bool
970 @param verify_private_key: whether to verify the private key in addition to
971 the public certificate
972 @rtype: string
973 @return: a string containing the PEM-encoded certificate.
974
975 """
976 try:
977 pem = utils.ReadFile(cert_filename)
978 except IOError, err:
979 raise errors.X509CertError(cert_filename,
980 "Unable to read certificate: %s" % str(err))
981
982 try:
983 OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
984 except Exception, err:
985 raise errors.X509CertError(cert_filename,
986 "Unable to load certificate: %s" % str(err))
987
988 if verify_private_key:
989 try:
990 OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
991 except Exception, err:
992 raise errors.X509CertError(cert_filename,
993 "Unable to load private key: %s" % str(err))
994
995 return pem
996
997
998 # pylint: disable=R0913
999 def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
1000 rapi_cert_filename, new_spice_cert, spice_cert_filename,
1001 spice_cacert_filename, new_confd_hmac_key, new_cds,
1002 cds_filename, force, new_node_cert, new_ssh_keys,
1003 ssh_key_type, ssh_key_bits, verbose, debug):
1004 """Renews cluster certificates, keys and secrets.
1005
1006 @type new_cluster_cert: bool
1007 @param new_cluster_cert: Whether to generate a new cluster certificate
1008 @type new_rapi_cert: bool
1009 @param new_rapi_cert: Whether to generate a new RAPI certificate
1010 @type rapi_cert_filename: string
1011 @param rapi_cert_filename: Path to file containing new RAPI certificate
1012 @type new_spice_cert: bool
1013 @param new_spice_cert: Whether to generate a new SPICE certificate
1014 @type spice_cert_filename: string
1015 @param spice_cert_filename: Path to file containing new SPICE certificate
1016 @type spice_cacert_filename: string
1017 @param spice_cacert_filename: Path to file containing the certificate of the
1018 CA that signed the SPICE certificate
1019 @type new_confd_hmac_key: bool
1020 @param new_confd_hmac_key: Whether to generate a new HMAC key
1021 @type new_cds: bool
1022 @param new_cds: Whether to generate a new cluster domain secret
1023 @type cds_filename: string
1024 @param cds_filename: Path to file containing new cluster domain secret
1025 @type force: bool
1026 @param force: Whether to ask user for confirmation
1027 @type new_node_cert: bool
1028 @param new_node_cert: Whether to generate new node certificates
1029 @type new_ssh_keys: bool
1030 @param new_ssh_keys: Whether to generate new node SSH keys
1031 @type ssh_key_type: One of L{constants.SSHK_ALL}
1032 @param ssh_key_type: The type of SSH key to be generated
1033 @type ssh_key_bits: int
1034 @param ssh_key_bits: The length of the key to be generated
1035 @type verbose: boolean
1036 @param verbose: Show verbose output
1037 @type debug: boolean
1038 @param debug: Show debug output
1039
1040 """
1041 ToStdout("Updating certificates now. Running \"gnt-cluster verify\" "
1042 " is recommended after this operation.")
1043
1044 if new_rapi_cert and rapi_cert_filename:
1045 ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
1046 " options can be specified at the same time.")
1047 return 1
1048
1049 if new_cds and cds_filename:
1050 ToStderr("Only one of the --new-cluster-domain-secret and"
1051 " --cluster-domain-secret options can be specified at"
1052 " the same time.")
1053 return 1
1054
1055 if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
1056 ToStderr("When using --new-spice-certificate, the --spice-certificate"
1057 " and --spice-ca-certificate must not be used.")
1058 return 1
1059
1060 if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
1061 ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
1062 " specified.")
1063 return 1
1064
1065 rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
1066 try:
1067 if rapi_cert_filename:
1068 rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
1069 if spice_cert_filename:
1070 spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
1071 spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
1072 except errors.X509CertError, err:
1073 ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
1074 return 1
1075
1076 if cds_filename:
1077 try:
1078 cds = utils.ReadFile(cds_filename)
1079 except Exception, err: # pylint: disable=W0703
1080 ToStderr("Can't load new cluster domain secret from %s: %s" %
1081 (cds_filename, str(err)))
1082 return 1
1083 else:
1084 cds = None
1085
1086 if not force:
1087 usertext = ("This requires all daemons on all nodes to be restarted and"
1088 " may take some time. Continue?")
1089 if not AskUser(usertext):
1090 return 1
1091
1092 def _RenewCryptoInner(ctx):
1093 ctx.feedback_fn("Updating certificates and keys")
1094
1095 bootstrap.GenerateClusterCrypto(False,
1096 new_rapi_cert,
1097 new_spice_cert,
1098 new_confd_hmac_key,
1099 new_cds,
1100 False,
1101 None,
1102 rapi_cert_pem=rapi_cert_pem,
1103 spice_cert_pem=spice_cert_pem,
1104 spice_cacert_pem=spice_cacert_pem,
1105 cds=cds)
1106
1107 files_to_copy = []
1108
1109 if new_rapi_cert or rapi_cert_pem:
1110 files_to_copy.append(pathutils.RAPI_CERT_FILE)
1111
1112 if new_spice_cert or spice_cert_pem:
1113 files_to_copy.append(pathutils.SPICE_CERT_FILE)
1114 files_to_copy.append(pathutils.SPICE_CACERT_FILE)
1115
1116 if new_confd_hmac_key:
1117 files_to_copy.append(pathutils.CONFD_HMAC_KEY)
1118
1119 if new_cds or cds:
1120 files_to_copy.append(pathutils.CLUSTER_DOMAIN_SECRET_FILE)
1121
1122 if files_to_copy:
1123 for node_name in ctx.nonmaster_nodes:
1124 port = ctx.ssh_ports[node_name]
1125 ctx.feedback_fn("Copying %s to %s:%d" %
1126 (", ".join(files_to_copy), node_name, port))
1127 for file_name in files_to_copy:
1128 ctx.ssh.CopyFileToNode(node_name, port, file_name)
1129
1130 def _RenewClientCerts(ctx):
1131 ctx.feedback_fn("Updating client SSL certificates.")
1132
1133 cluster_name = ssconf.SimpleStore().GetClusterName()
1134
1135 for node_name in ctx.nonmaster_nodes + [ctx.master_node]:
1136 ssh_port = ctx.ssh_ports[node_name]
1137 data = {
1138 constants.NDS_CLUSTER_NAME: cluster_name,
1139 constants.NDS_NODE_DAEMON_CERTIFICATE:
1140 utils.ReadFile(pathutils.NODED_CERT_FILE),
1141 constants.NDS_NODE_NAME: node_name,
1142 constants.NDS_ACTION: constants.CRYPTO_ACTION_CREATE,
1143 }
1144
1145 ssh.RunSshCmdWithStdin(
1146 cluster_name,
1147 node_name,
1148 pathutils.SSL_UPDATE,
1149 ssh_port,
1150 data,
1151 debug=ctx.debug,
1152 verbose=ctx.verbose,
1153 use_cluster_key=True,
1154 ask_key=False,
1155 strict_host_check=True)
1156
1157 # Create a temporary ssconf file using the master's client cert digest
1158 # and the 'bootstrap' keyword to enable distribution of all nodes' digests.
1159 master_digest = utils.GetCertificateDigest()
1160 ssconf_master_candidate_certs_filename = os.path.join(
1161 pathutils.DATA_DIR, "%s%s" %
1162 (constants.SSCONF_FILEPREFIX, constants.SS_MASTER_CANDIDATES_CERTS))
1163 utils.WriteFile(
1164 ssconf_master_candidate_certs_filename,
1165 data="%s=%s" % (constants.CRYPTO_BOOTSTRAP, master_digest))
1166 for node_name in ctx.nonmaster_nodes:
1167 port = ctx.ssh_ports[node_name]
1168 ctx.feedback_fn("Copying %s to %s:%d" %
1169 (ssconf_master_candidate_certs_filename, node_name, port))
1170 ctx.ssh.CopyFileToNode(node_name, port,
1171 ssconf_master_candidate_certs_filename)
1172
1173 # Write the boostrap entry to the config using wconfd.
1174 config_live_lock = utils.livelock.LiveLock("renew_crypto")
1175 cfg = config.GetConfig(None, config_live_lock)
1176 cfg.AddNodeToCandidateCerts(constants.CRYPTO_BOOTSTRAP, master_digest)
1177 cfg.Update(cfg.GetClusterInfo(), ctx.feedback_fn)
1178
1179 def _RenewServerAndClientCerts(ctx):
1180 ctx.feedback_fn("Updating the cluster SSL certificate.")
1181
1182 master_name = ssconf.SimpleStore().GetMasterNode()
1183 bootstrap.GenerateClusterCrypto(True, # cluster cert
1184 False, # rapi cert
1185 False, # spice cert
1186 False, # confd hmac key
1187 False, # cds
1188 True, # client cert
1189 master_name)
1190
1191 for node_name in ctx.nonmaster_nodes:
1192 port = ctx.ssh_ports[node_name]
1193 server_cert = pathutils.NODED_CERT_FILE
1194 ctx.feedback_fn("Copying %s to %s:%d" %
1195 (server_cert, node_name, port))
1196 ctx.ssh.CopyFileToNode(node_name, port, server_cert)
1197
1198 _RenewClientCerts(ctx)
1199
1200 if new_rapi_cert or new_spice_cert or new_confd_hmac_key or new_cds:
1201 RunWhileClusterStopped(ToStdout, _RenewCryptoInner)
1202
1203 # If only node certficates are recreated, call _RenewClientCerts only.
1204 if new_node_cert and not new_cluster_cert:
1205 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD],
1206 _RenewClientCerts, verbose=verbose, debug=debug)
1207
1208 # If the cluster certificate are renewed, the client certificates need
1209 # to be renewed too.
1210 if new_cluster_cert:
1211 RunWhileDaemonsStopped(ToStdout, [constants.NODED, constants.WCONFD],
1212 _RenewServerAndClientCerts, verbose=verbose,
1213 debug=debug)
1214
1215 if new_node_cert or new_cluster_cert or new_ssh_keys:
1216 cl = GetClient()
1217 renew_op = opcodes.OpClusterRenewCrypto(
1218 node_certificates=new_node_cert or new_cluster_cert,
1219 renew_ssh_keys=new_ssh_keys,
1220 ssh_key_type=ssh_key_type,
1221 ssh_key_bits=ssh_key_bits,
1222 verbose=verbose,
1223 debug=debug)
1224 SubmitOpCode(renew_op, cl=cl)
1225
1226 ToStdout("All requested certificates and keys have been replaced."
1227 " Running \"gnt-cluster verify\" now is recommended.")
1228
1229 return 0
1230
1231
1232 def _BuildGanetiPubKeys(options, pub_key_file=pathutils.SSH_PUB_KEYS, cl=None,
1233 get_online_nodes_fn=GetOnlineNodes,
1234 get_nodes_ssh_ports_fn=GetNodesSshPorts,
1235 get_node_uuids_fn=GetNodeUUIDs,
1236 homedir_fn=None):
1237 """Recreates the 'ganeti_pub_key' file by polling all nodes.
1238
1239 """
1240
1241 if not cl:
1242 cl = GetClient()
1243
1244 (cluster_name, master_node, modify_ssh_setup, ssh_key_type) = \
1245 cl.QueryConfigValues(["cluster_name", "master_node", "modify_ssh_setup",
1246 "ssh_key_type"])
1247
1248 # In case Ganeti is not supposed to modify the SSH setup, simply exit and do
1249 # not update this file.
1250 if not modify_ssh_setup:
1251 return
1252
1253 if os.path.exists(pub_key_file):
1254 utils.CreateBackup(pub_key_file)
1255 utils.RemoveFile(pub_key_file)
1256
1257 ssh.ClearPubKeyFile(pub_key_file)
1258
1259 online_nodes = get_online_nodes_fn([], cl=cl)
1260 ssh_ports = get_nodes_ssh_ports_fn(online_nodes + [master_node], cl)
1261 ssh_port_map = dict(zip(online_nodes + [master_node], ssh_ports))
1262
1263 node_uuids = get_node_uuids_fn(online_nodes + [master_node], cl)
1264 node_uuid_map = dict(zip(online_nodes + [master_node], node_uuids))
1265
1266 nonmaster_nodes = [name for name in online_nodes
1267 if name != master_node]
1268
1269 _, pub_key_filename, _ = \
1270 ssh.GetUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False,
1271 kind=ssh_key_type, _homedir_fn=homedir_fn)
1272
1273 # get the key file of the master node
1274 pub_key = utils.ReadFile(pub_key_filename)
1275 ssh.AddPublicKey(node_uuid_map[master_node], pub_key,
1276 key_file=pub_key_file)
1277
1278 # get the key files of all non-master nodes
1279 for node in nonmaster_nodes:
1280 pub_key = ssh.ReadRemoteSshPubKey(pub_key_filename, node, cluster_name,
1281 ssh_port_map[node],
1282 options.ssh_key_check,
1283 options.ssh_key_check)
1284 ssh.AddPublicKey(node_uuid_map[node], pub_key, key_file=pub_key_file)
1285
1286
1287 def RenewCrypto(opts, args):
1288 """Renews cluster certificates, keys and secrets.
1289
1290 """
1291 if opts.new_ssh_keys:
1292 _BuildGanetiPubKeys(opts)
1293 return _RenewCrypto(opts.new_cluster_cert,
1294 opts.new_rapi_cert,
1295 opts.rapi_cert,
1296 opts.new_spice_cert,
1297 opts.spice_cert,
1298 opts.spice_cacert,
1299 opts.new_confd_hmac_key,
1300 opts.new_cluster_domain_secret,
1301 opts.cluster_domain_secret,
1302 opts.force,
1303 opts.new_node_cert,
1304 opts.new_ssh_keys,
1305 opts.ssh_key_type,
1306 opts.ssh_key_bits,
1307 opts.verbose,
1308 opts.debug > 0)
1309
1310
1311 def _GetEnabledDiskTemplates(opts):
1312 """Determine the list of enabled disk templates.
1313
1314 """
1315 if opts.enabled_disk_templates:
1316 return opts.enabled_disk_templates.split(",")
1317 else:
1318 return None
1319
1320
1321 def _GetVgName(opts, enabled_disk_templates):
1322 """Determine the volume group name.
1323
1324 @type enabled_disk_templates: list of strings
1325 @param enabled_disk_templates: cluster-wide enabled disk-templates
1326
1327 """
1328 # consistency between vg name and enabled disk templates
1329 vg_name = None
1330 if opts.vg_name is not None:
1331 vg_name = opts.vg_name
1332 if enabled_disk_templates:
1333 if vg_name and not utils.IsLvmEnabled(enabled_disk_templates):
1334 ToStdout("You specified a volume group with --vg-name, but you did not"
1335 " enable any of the following lvm-based disk templates: %s" %
1336 utils.CommaJoin(constants.DTS_LVM))
1337 return vg_name
1338
1339
1340 def _GetDrbdHelper(opts, enabled_disk_templates):
1341 """Determine the DRBD usermode helper.
1342
1343 """
1344 drbd_helper = opts.drbd_helper
1345 if enabled_disk_templates:
1346 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
1347 if not drbd_enabled and opts.drbd_helper:
1348 ToStdout("You specified a DRBD usermode helper with "
1349 " --drbd-usermode-helper while DRBD is not enabled.")
1350 return drbd_helper
1351
1352
1353 def _GetCompressionTools(opts):
1354 """Determine the list of custom compression tools.
1355
1356 """
1357 if opts.compression_tools:
1358 return opts.compression_tools.split(",")
1359 elif opts.compression_tools is None:
1360 return None # To note the parameter was not provided
1361 else:
1362 return constants.IEC_DEFAULT_TOOLS # Resetting to default
1363
1364
1365 def SetClusterParams(opts, args):
1366 """Modify the cluster.
1367
1368 @param opts: the command line options selected by the user
1369 @type args: list
1370 @param args: should be an empty list
1371 @rtype: int
1372 @return: the desired exit code
1373
1374 """
1375 if not (opts.vg_name is not None or
1376 opts.drbd_helper is not None or
1377 opts.enabled_hypervisors or opts.hvparams or
1378 opts.beparams or opts.nicparams or
1379 opts.ndparams or opts.diskparams or
1380 opts.candidate_pool_size is not None or
1381 opts.max_running_jobs is not None or
1382 opts.max_tracked_jobs is not None or
1383 opts.uid_pool is not None or
1384 opts.maintain_node_health is not None or
1385 opts.add_uids is not None or
1386 opts.remove_uids is not None or
1387 opts.default_iallocator is not None or
1388 opts.default_iallocator_params is not None or
1389 opts.reserved_lvs is not None or
1390 opts.mac_prefix is not None or
1391 opts.master_netdev is not None or
1392 opts.master_netmask is not None or
1393 opts.use_external_mip_script is not None or
1394 opts.prealloc_wipe_disks is not None or
1395 opts.hv_state or
1396 opts.enabled_disk_templates or
1397 opts.disk_state or
1398 opts.ipolicy_bounds_specs is not None or
1399 opts.ipolicy_std_specs is not None or
1400 opts.ipolicy_disk_templates is not None or
1401 opts.ipolicy_vcpu_ratio is not None or
1402 opts.ipolicy_spindle_ratio is not None or
1403 opts.ipolicy_memory_ratio is not None or
1404 opts.modify_etc_hosts is not None or
1405 opts.modify_ssh_setup is not None or
1406 opts.file_storage_dir is not None or
1407 opts.install_image is not None or
1408 opts.instance_communication_network is not None or
1409 opts.zeroing_image is not None or
1410 opts.shared_file_storage_dir is not None or
1411 opts.compression_tools is not None or
1412 opts.shared_file_storage_dir is not None or
1413 opts.enabled_user_shutdown is not None or
1414 opts.maint_round_delay is not None or
1415 opts.maint_balance is not None or
1416 opts.maint_balance_threshold is not None or
1417 opts.data_collector_interval or
1418 opts.diagnose_data_collector_filename is not None or
1419 opts.enabled_data_collectors):
1420 ToStderr("Please give at least one of the parameters.")
1421 return 1
1422
1423 enabled_disk_templates = _GetEnabledDiskTemplates(opts)
1424 vg_name = _GetVgName(opts, enabled_disk_templates)
1425
1426 try:
1427 drbd_helper = _GetDrbdHelper(opts, enabled_disk_templates)
1428 except errors.OpPrereqError, e:
1429 ToStderr(str(e))
1430 return 1
1431
1432 hvlist = opts.enabled_hypervisors
1433 if hvlist is not None:
1434 hvlist = hvlist.split(",")
1435
1436 # a list of (name, dict) we can pass directly to dict() (or [])
1437 hvparams = dict(opts.hvparams)
1438 for hv_params in hvparams.values():
1439 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1440
1441 diskparams = dict(opts.diskparams)
1442
1443 for dt_params in diskparams.values():
1444 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
1445
1446 beparams = opts.beparams
1447 utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
1448
1449 nicparams = opts.nicparams
1450 utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
1451
1452 ndparams = opts.ndparams
1453 if ndparams is not None:
1454 utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
1455
1456 ipolicy = CreateIPolicyFromOpts(
1457 minmax_ispecs=opts.ipolicy_bounds_specs,
1458 std_ispecs=opts.ipolicy_std_specs,
1459 ipolicy_disk_templates=opts.ipolicy_disk_templates,
1460 ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
1461 ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
1462 ipolicy_memory_ratio=opts.ipolicy_memory_ratio,
1463 )
1464
1465 mnh = opts.maintain_node_health
1466
1467 uid_pool = opts.uid_pool
1468 if uid_pool is not None:
1469 uid_pool = uidpool.ParseUidPool(uid_pool)
1470
1471 add_uids = opts.add_uids
1472 if add_uids is not None:
1473 add_uids = uidpool.ParseUidPool(add_uids)
1474
1475 remove_uids = opts.remove_uids
1476 if remove_uids is not None:
1477 remove_uids = uidpool.ParseUidPool(remove_uids)
1478
1479 if opts.reserved_lvs is not None:
1480 if opts.reserved_lvs == "":
1481 opts.reserved_lvs = []
1482 else:
1483 opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")
1484
1485 if opts.master_netmask is not None:
1486 try:
1487 opts.master_netmask = int(opts.master_netmask)
1488 except ValueError:
1489 ToStderr("The --master-netmask option expects an int parameter.")
1490 return 1
1491
1492 ext_ip_script = opts.use_external_mip_script
1493
1494 if opts.disk_state:
1495 disk_state = utils.FlatToDict(opts.disk_state)
1496 else:
1497 disk_state = {}
1498
1499 hv_state = dict(opts.hv_state)
1500
1501 compression_tools = _GetCompressionTools(opts)
1502
1503 enabled_data_collectors = dict(
1504 (k, v.lower().startswith("t"))
1505 for k, v in opts.enabled_data_collectors.items())
1506
1507 unrecognized_data_collectors = [
1508 k for k in enabled_data_collectors.keys()
1509 if k not in constants.DATA_COLLECTOR_NAMES]
1510 if unrecognized_data_collectors:
1511 ToStderr("Data collector names not recognized: %s" %
1512 ", ".join(unrecognized_data_collectors))
1513
1514 try:
1515 data_collector_interval = dict(
1516 (k, long(1e6 * float(v)))
1517 for (k, v) in opts.data_collector_interval.items())
1518 except ValueError:
1519 ToStderr("Can't transform all values to integers: {}".format(
1520 opts.data_collector_interval))
1521 return 1
1522 if any(v <= 0 for v in data_collector_interval):
1523 ToStderr("Some interval times where not above zero.")
1524 return 1
1525
1526 op = opcodes.OpClusterSetParams(
1527 vg_name=vg_name,
1528 drbd_helper=drbd_helper,
1529 enabled_hypervisors=hvlist,
1530 hvparams=hvparams,
1531 os_hvp=None,
1532 beparams=beparams,
1533 nicparams=nicparams,
1534 ndparams=ndparams,
1535 diskparams=diskparams,
1536 ipolicy=ipolicy,
1537 candidate_pool_size=opts.candidate_pool_size,
1538 max_running_jobs=opts.max_running_jobs,
1539 max_tracked_jobs=opts.max_tracked_jobs,
1540 maintain_node_health=mnh,
1541 modify_etc_hosts=opts.modify_etc_hosts,
1542 modify_ssh_setup=opts.modify_ssh_setup,
1543 uid_pool=uid_pool,
1544 add_uids=add_uids,
1545 remove_uids=remove_uids,
1546 default_iallocator=opts.default_iallocator,
1547 default_iallocator_params=opts.default_iallocator_params,
1548 prealloc_wipe_disks=opts.prealloc_wipe_disks,
1549 mac_prefix=opts.mac_prefix,
1550 master_netdev=opts.master_netdev,
1551 master_netmask=opts.master_netmask,
1552 reserved_lvs=opts.reserved_lvs,
1553 use_external_mip_script=ext_ip_script,
1554 hv_state=hv_state,
1555 disk_state=disk_state,
1556 enabled_disk_templates=enabled_disk_templates,
1557 force=opts.force,
1558 file_storage_dir=opts.file_storage_dir,
1559 install_image=opts.install_image,
1560 instance_communication_network=opts.instance_communication_network,
1561 zeroing_image=opts.zeroing_image,
1562 shared_file_storage_dir=opts.shared_file_storage_dir,
1563 compression_tools=compression_tools,
1564 enabled_user_shutdown=opts.enabled_user_shutdown,
1565 maint_round_delay=opts.maint_round_delay,
1566 maint_balance=opts.maint_balance,
1567 maint_balance_threshold=opts.maint_balance_threshold,
1568 enabled_data_collectors=enabled_data_collectors,
1569 data_collector_interval=data_collector_interval,
1570 diagnose_data_collector_filename=opts.diagnose_data_collector_filename
1571 )
1572 return base.GetResult(None, opts, SubmitOrSend(op, opts))
1573
1574
1575 def QueueOps(opts, args):
1576 """Queue operations.
1577
1578 @param opts: the command line options selected by the user
1579 @type args: list
1580 @param args: should contain only one element, the subcommand
1581 @rtype: int
1582 @return: the desired exit code
1583
1584 """
1585 command = args[0]
1586 client = GetClient()
1587 if command in ("drain", "undrain"):
1588 drain_flag = command == "drain"
1589 client.SetQueueDrainFlag(drain_flag)
1590 elif command == "info":
1591 result = client.QueryConfigValues(["drain_flag"])
1592 if result[0]:
1593 val = "set"
1594 else:
1595 val = "unset"
1596 ToStdout("The drain flag is %s" % val)
1597 else:
1598 raise errors.OpPrereqError("Command '%s' is not valid." % command,
1599 errors.ECODE_INVAL)
1600
1601 return 0
1602
1603
1604 def _ShowWatcherPause(until):
1605 if until is None or until < time.time():
1606 ToStdout("The watcher is not paused.")
1607 else:
1608 ToStdout("The watcher is paused until %s.", time.ctime(until))
1609
1610
1611 def WatcherOps(opts, args):
1612 """Watcher operations.
1613
1614 @param opts: the command line options selected by the user
1615 @type args: list
1616 @param args: should contain only one element, the subcommand
1617 @rtype: int
1618 @return: the desired exit code
1619
1620 """
1621 command = args[0]
1622 client = GetClient()
1623
1624 if command == "continue":
1625 client.SetWatcherPause(None)
1626 ToStdout("The watcher is no longer paused.")
1627
1628 elif command == "pause":
1629 if len(args) < 2:
1630 raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1631
1632 result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
1633 _ShowWatcherPause(result)
1634
1635 elif command == "info":
1636 result = client.QueryConfigValues(["watcher_pause"])
1637 _ShowWatcherPause(result[0])
1638
1639 else:
1640 raise errors.OpPrereqError("Command '%s' is not valid." % command,
1641 errors.ECODE_INVAL)
1642
1643 return 0
1644
1645
1646 def _OobPower(opts, node_list, power):
1647 """Puts the node in the list to desired power state.
1648
1649 @param opts: The command line options selected by the user
1650 @param node_list: The list of nodes to operate on
1651 @param power: True if they should be powered on, False otherwise
1652 @return: The success of the operation (none failed)
1653
1654 """
1655 if power:
1656 command = constants.OOB_POWER_ON
1657 else:
1658 command = constants.OOB_POWER_OFF
1659
1660 op = opcodes.OpOobCommand(node_names=node_list,
1661 command=command,
1662 ignore_status=True,
1663 timeout=opts.oob_timeout,
1664 power_delay=opts.power_delay)
1665 result = SubmitOpCode(op, opts=opts)
1666 errs = 0
1667 for node_result in result:
1668 (node_tuple, data_tuple) = node_result
1669 (_, node_name) = node_tuple
1670 (data_status, _) = data_tuple
1671 if data_status != constants.RS_NORMAL:
1672 assert data_status != constants.RS_UNAVAIL
1673 errs += 1
1674 ToStderr("There was a problem changing power for %s, please investigate",
1675 node_name)
1676
1677 if errs > 0:
1678 return False
1679
1680 return True
1681
1682
1683 def _InstanceStart(opts, inst_list, start, no_remember=False):
1684 """Puts the instances in the list to desired state.
1685
1686 @param opts: The command line options selected by the user
1687 @param inst_list: The list of instances to operate on
1688 @param start: True if they should be started, False for shutdown
1689 @param no_remember: If the instance state should be remembered
1690 @return: The success of the operation (none failed)
1691
1692 """
1693 if start:
1694 opcls = opcodes.OpInstanceStartup
1695 text_submit, text_success, text_failed = ("startup", "started", "starting")
1696 else:
1697 opcls = compat.partial(opcodes.OpInstanceShutdown,
1698 timeout=opts.shutdown_timeout,
1699 no_remember=no_remember)
1700 text_submit, text_success, text_failed = ("shutdown", "stopped", "stopping")
1701
1702 jex = JobExecutor(opts=opts)
1703
1704 for inst in inst_list:
1705 ToStdout("Submit %s of instance %s", text_submit, inst)
1706 op = opcls(instance_name=inst)
1707 jex.QueueJob(inst, op)
1708
1709 results = jex.GetResults()
1710 bad_cnt = len([1 for (success, _) in results if not success])
1711
1712 if bad_cnt == 0:
1713 ToStdout("All instances have been %s successfully", text_success)
1714 else:
1715 ToStderr("There were errors while %s instances:\n"
1716 "%d error(s) out of %d instance(s)", text_failed, bad_cnt,
1717 len(results))
1718 return False
1719
1720 return True
1721
1722
1723 class _RunWhenNodesReachableHelper(object):
1724 """Helper class to make shared internal state sharing easier.
1725
1726 @ivar success: Indicates if all action_cb calls were successful
1727
1728 """
1729 def __init__(self, node_list, action_cb, node2ip, port, feedback_fn,
1730 _ping_fn=netutils.TcpPing, _sleep_fn=time.sleep):
1731 """Init the object.
1732
1733 @param node_list: The list of nodes to be reachable
1734 @param action_cb: Callback called when a new host is reachable
1735 @type node2ip: dict
1736 @param node2ip: Node to ip mapping
1737 @param port: The port to use for the TCP ping
1738 @param feedback_fn: The function used for feedback
1739 @param _ping_fn: Function to check reachabilty (for unittest use only)
1740 @param _sleep_fn: Function to sleep (for unittest use only)
1741
1742 """
1743 self.down = set(node_list)
1744 self.up = set()
1745 self.node2ip = node2ip
1746 self.success = True
1747 self.action_cb = action_cb
1748 self.port = port
1749 self.feedback_fn = feedback_fn
1750 self._ping_fn = _ping_fn
1751 self._sleep_fn = _sleep_fn
1752
1753 def __call__(self):
1754 """When called we run action_cb.
1755
1756 @raises utils.RetryAgain: When there are still down nodes
1757
1758 """
1759 if not self.action_cb(self.up):
1760 self.success = False
1761
1762 if self.down:
1763 raise utils.RetryAgain()
1764 else:
1765 return self.success
1766
1767 def Wait(self, secs):
1768 """Checks if a host is up or waits remaining seconds.
1769
1770 @param secs: The secs remaining
1771
1772 """
1773 start = time.time()
1774 for node in self.down:
1775 if self._ping_fn(self.node2ip[node], self.port, timeout=_EPO_PING_TIMEOUT,
1776 live_port_needed=True):
1777 self.feedback_fn("Node %s became available" % node)
1778 self.up.add(node)
1779 self.down -= self.up
1780 # If we have a node available there is the possibility to run the
1781 # action callback successfully, therefore we don't wait and return
1782 return
1783
1784 self._sleep_fn(max(0.0, start + secs - time.time()))
1785
1786
1787 def _RunWhenNodesReachable(node_list, action_cb, interval):
1788 """Run action_cb when nodes become reachable.
1789
1790 @param node_list: The list of nodes to be reachable
1791 @param action_cb: Callback called when a new host is reachable
1792 @param interval: The earliest time to retry
1793
1794 """
1795 client = GetClient()
1796 cluster_info = client.QueryClusterInfo()
1797 if cluster_info["primary_ip_version"] == constants.IP4_VERSION:
1798 family = netutils.IPAddress.family
1799 else:
1800 family = netutils.IP6Address.family
1801
1802 node2ip = dict((node, netutils.GetHostname(node, family=family).ip)
1803 for node in node_list)
1804
1805 port = netutils.GetDaemonPort(constants.NODED)
1806 helper = _RunWhenNodesReachableHelper(node_list, action_cb, node2ip, port,
1807 ToStdout)
1808
1809 try:
1810 return utils.Retry(helper, interval, _EPO_REACHABLE_TIMEOUT,
1811 wait_fn=helper.Wait)
1812 except utils.RetryTimeout:
1813 ToStderr("Time exceeded while waiting for nodes to become reachable"
1814 " again:\n - %s", " - ".join(helper.down))
1815 return False
1816
1817
1818 def _MaybeInstanceStartup(opts, inst_map, nodes_online,
1819 _instance_start_fn=_InstanceStart):
1820 """Start the instances conditional based on node_states.
1821
1822 @param opts: The command line options selected by the user
1823 @param inst_map: A dict of inst -> nodes mapping
1824 @param nodes_online: A list of nodes online
1825 @param _instance_start_fn: Callback to start instances (unittest use only)
1826 @return: Success of the operation on all instances
1827
1828 """
1829 start_inst_list = []
1830 for (inst, nodes) in inst_map.items():
1831 if not (nodes - nodes_online):
1832 # All nodes the instance lives on are back online
1833 start_inst_list.append(inst)
1834
1835 for inst in start_inst_list:
1836 del inst_map[inst]
1837
1838 if start_inst_list:
1839 return _instance_start_fn(opts, start_inst_list, True)
1840
1841 return True
1842
1843
1844 def _EpoOn(opts, full_node_list, node_list, inst_map):
1845 """Does the actual power on.
1846
1847 @param opts: The command line options selected by the user
1848 @param full_node_list: All nodes to operate on (includes nodes not supporting
1849 OOB)
1850 @param node_list: The list of nodes to operate on (all need to support OOB)
1851 @param inst_map: A dict of inst -> nodes mapping
1852 @return: The desired exit status
1853
1854 """
1855 if node_list and not _OobPower(opts, node_list, False):
1856 ToStderr("Not all nodes seem to get back up, investigate and start"
1857 " manually if needed")
1858
1859 # Wait for the nodes to be back up
1860 action_cb = compat.partial(_MaybeInstanceStartup, opts, dict(inst_map))
1861
1862 ToStdout("Waiting until all nodes are available again")
1863 if not _RunWhenNodesReachable(full_node_list, action_cb, _EPO_PING_INTERVAL):
1864 ToStderr("Please investigate and start stopped instances manually")
1865 return constants.EXIT_FAILURE
1866
1867 return constants.EXIT_SUCCESS
1868
1869
1870 def _EpoOff(opts, node_list, inst_map):
1871 """Does the actual power off.
1872
1873 @param opts: The command line options selected by the user
1874 @param node_list: The list of nodes to operate on (all need to support OOB)
1875 @param inst_map: A dict of inst -> nodes mapping
1876 @return: The desired exit status
1877
1878 """
1879 if not _InstanceStart(opts, inst_map.keys(), False, no_remember=True):
1880 ToStderr("Please investigate and stop instances manually before continuing")
1881 return constants.EXIT_FAILURE
1882
1883 if not node_list:
1884 return constants.EXIT_SUCCESS
1885
1886 if _OobPower(opts, node_list, False):
1887 return constants.EXIT_SUCCESS
1888 else:
1889 return constants.EXIT_FAILURE
1890
1891
1892 def Epo(opts, args, qcl=None, _on_fn=_EpoOn, _off_fn=_EpoOff,
1893 _confirm_fn=ConfirmOperation,
1894 _stdout_fn=ToStdout, _stderr_fn=ToStderr):
1895 """EPO operations.
1896
1897 @param opts: the command line options selected by the user
1898 @type args: list
1899 @param args: should contain only one element, the subcommand
1900 @rtype: int
1901 @return: the desired exit code
1902
1903 """
1904 if opts.groups and opts.show_all:
1905 _stderr_fn("Only one of --groups or --all are allowed")
1906 return constants.EXIT_FAILURE
1907 elif args and opts.show_all:
1908 _stderr_fn("Arguments in combination with --all are not allowed")
1909 return constants.EXIT_FAILURE
1910
1911 if qcl is None:
1912 # Query client
1913 qcl = GetClient()
1914
1915 if opts.groups:
1916 node_query_list = \
1917 itertools.chain(*qcl.QueryGroups(args, ["node_list"], False))
1918 else:
1919 node_query_list = args
1920
1921 result = qcl.QueryNodes(node_query_list, ["name", "master", "pinst_list",
1922 "sinst_list", "powered", "offline"],
1923 False)
1924
1925 all_nodes = map(compat.fst, result)
1926 node_list = []
1927 inst_map = {}
1928 for (node, master, pinsts, sinsts, powered, offline) in result:
1929 if not offline:
1930 for inst in (pinsts + sinsts):
1931 if inst in inst_map:
1932 if not master:
1933 inst_map[inst].add(node)
1934 elif master:
1935 inst_map[inst] = set()
1936 else:
1937 inst_map[inst] = set([node])
1938
1939 if master and opts.on:
1940 # We ignore the master for turning on the machines, in fact we are
1941 # already operating on the master at this point :)
1942 continue
1943 elif master and not opts.show_all:
1944 _stderr_fn("%s is the master node, please do a master-failover to another"
1945 " node not affected by the EPO or use --all if you intend to"
1946 " shutdown the whole cluster", node)
1947 return constants.EXIT_FAILURE
1948 elif powered is None:
1949 _stdout_fn("Node %s does not support out-of-band handling, it can not be"
1950 " handled in a fully automated manner", node)
1951 elif powered == opts.on:
1952 _stdout_fn("Node %s is already in desired power state, skipping", node)
1953 elif not offline or (offline and powered):
1954 node_list.append(node)
1955
1956 if not (opts.force or _confirm_fn(all_nodes, "nodes", "epo")):
1957 return constants.EXIT_FAILURE
1958
1959 if opts.on:
1960 return _on_fn(opts, all_nodes, node_list, inst_map)
1961 else:
1962 return _off_fn(opts, node_list, inst_map)
1963
1964
1965 def RemoveRepair(opts, args):
1966 """Uncoditionally remove a repair event
1967
1968 @param opts: the command line options selected by the user (ignored)
1969 @type args: list
1970 @param args: one element, the uuid of the event to remove
1971 @rtype: int
1972 @return: the desired exit code
1973
1974 """
1975 uuid = args[0]
1976 wconfd.Client().RmMaintdIncident(uuid)
1977 return 0
1978
1979
1980 def _GetCreateCommand(info):
1981 buf = StringIO()
1982 buf.write("gnt-cluster init")
1983 PrintIPolicyCommand(buf, info["ipolicy"], False)
1984 buf.write(" ")
1985 buf.write(info["name"])
1986 return buf.getvalue()
1987
1988
1989 def ShowCreateCommand(opts, args):
1990 """Shows the command that can be used to re-create the cluster.
1991
1992 Currently it works only for ipolicy specs.
1993
1994 """
1995 cl = GetClient()
1996 result = cl.QueryClusterInfo()
1997 ToStdout(_GetCreateCommand(result))
1998
1999
2000 def _RunCommandAndReport(cmd):
2001 """Run a command and report its output, iff it failed.
2002
2003 @param cmd: the command to execute
2004 @type cmd: list
2005 @rtype: bool
2006 @return: False, if the execution failed.
2007
2008 """
2009 result = utils.RunCmd(cmd)
2010 if result.failed:
2011 ToStderr("Command %s failed: %s; Output %s" %
2012 (cmd, result.fail_reason, result.output))
2013 return False
2014 return True
2015
2016
2017 def _VerifyCommand(cmd):
2018 """Verify that a given command succeeds on all online nodes.
2019
2020 As this function is intended to run during upgrades, it
2021 is implemented in such a way that it still works, if all Ganeti
2022 daemons are down.
2023 @param cmd: a list of unquoted shell arguments
2024 @type cmd: list
2025 @rtype: list
2026 @return: the list of node names that are online where
2027 the command failed.
2028
2029 """
2030 command = utils.text.ShellQuoteArgs([str(val) for val in cmd])
2031 return _VerifyCommandRaw(command)
2032
2033
2034 def _VerifyCommandRaw(command):
2035 """Verify that a given command succeeds on all online nodes.
2036
2037 As this function is intended to run during upgrades, it
2038 is implemented in such a way that it still works, if all Ganeti
2039 daemons are down.
2040 @param cmd: a bare string to pass to SSH. The caller must do their
2041 own shell/ssh escaping.
2042 @type cmd: string
2043 @rtype: list
2044 @return: the list of node names that are online where
2045 the command failed.
2046
2047 """
2048
2049 nodes = ssconf.SimpleStore().GetOnlineNodeList()
2050 master_node = ssconf.SimpleStore().GetMasterNode()
2051 cluster_name = ssconf.SimpleStore().GetClusterName()
2052
2053 # If master node is in 'nodes', make sure master node is at list end
2054 if master_node in nodes:
2055 nodes.remove(master_node)
2056 nodes.append(master_node)
2057
2058 failed = []
2059
2060 srun = ssh.SshRunner(cluster_name=cluster_name)
2061 for name in nodes:
2062 result = srun.Run(name, constants.SSH_LOGIN_USER, command)
2063 if result.exit_code != 0:
2064 failed.append(name)
2065
2066 return failed
2067
2068
2069 def _VerifyVersionInstalled(versionstring):
2070 """Verify that the given version of ganeti is installed on all online nodes.
2071
2072 Do nothing, if this is the case, otherwise print an appropriate
2073 message to stderr.
2074
2075 @param versionstring: the version to check for
2076 @type versionstring: string
2077 @rtype: bool
2078 @return: True, if the version is installed on all online nodes
2079
2080 """
2081 badnodes = _VerifyCommand(["test", "-d",
2082 os.path.join(pathutils.PKGLIBDIR, versionstring)])
2083 if badnodes:
2084 ToStderr("Ganeti version %s not installed on nodes %s"
2085 % (versionstring, ", ".join(badnodes)))
2086 return False
2087
2088 return True
2089
2090
2091 def _GetRunning():
2092 """Determine the list of running jobs.
2093
2094 @rtype: list
2095 @return: the number of jobs still running
2096
2097 """
2098 cl = GetClient()
2099 qfilter = qlang.MakeSimpleFilter("status",
2100 frozenset([constants.JOB_STATUS_RUNNING]))
2101 return len(cl.Query(constants.QR_JOB, [], qfilter).data)
2102
2103
2104 def _SetGanetiVersionAndEnsure(versionstring):
2105 """Symlink the active version of ganeti to the given versionstring,
2106 and run the ensure-dirs script.
2107
2108 @type versionstring: string
2109 @rtype: list
2110 @return: the list of nodes where the version change failed
2111
2112 """
2113
2114 # Update symlinks to point at the new version.
2115 if constants.HAS_GNU_LN:
2116 link_lib_cmd = [
2117 "ln", "-s", "-f", "-T",
2118 os.path.join(pathutils.PKGLIBDIR, versionstring),
2119 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]
2120 link_share_cmd = [
2121 "ln", "-s", "-f", "-T",
2122 os.path.join(pathutils.SHAREDIR, versionstring),
2123 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]
2124 cmds = [link_lib_cmd, link_share_cmd]
2125 else:
2126 rm_lib_cmd = [
2127 "rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]
2128 link_lib_cmd = [
2129 "ln", "-s", "-f", os.path.join(pathutils.PKGLIBDIR, versionstring),
2130 os.path.join(pathutils.SYSCONFDIR, "ganeti/lib")]
2131 rm_share_cmd = [
2132 "rm", "-f", os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]
2133 ln_share_cmd = [
2134 "ln", "-s", "-f", os.path.join(pathutils.SHAREDIR, versionstring),
2135 os.path.join(pathutils.SYSCONFDIR, "ganeti/share")]
2136 cmds = [rm_lib_cmd, link_lib_cmd, rm_share_cmd, ln_share_cmd]
2137
2138 # Run the ensure-dirs script to verify the new version is OK.
2139 cmds.append([pathutils.ENSURE_DIRS])
2140
2141 # Submit all commands to ssh, exiting on the first failure.
2142 # The command string is a single argument that's given to ssh to submit to
2143 # the remote shell, so it only needs enough escaping to satisfy the remote
2144 # shell, rather than the 2 levels of escaping usually required when using
2145 # ssh from the commandline.
2146 quoted_cmds = [utils.text.ShellQuoteArgs(cmd) for cmd in cmds]
2147 cmd = " && ".join(quoted_cmds)
2148 failed = _VerifyCommandRaw(cmd)
2149 return list(set(failed))
2150
2151
2152 def _ExecuteCommands(fns):
2153 """Execute a list of functions, in reverse order.
2154
2155 @type fns: list of functions.
2156 @param fns: the functions to be executed.
2157
2158 """
2159 for fn in reversed(fns):
2160 fn()
2161
2162
2163 def _GetConfigVersion():
2164 """Determine the version the configuration file currently has.
2165
2166 @rtype: tuple or None
2167 @return: (major, minor, revision) if the version can be determined,
2168 None otherwise
2169
2170 """
2171 config_data = serializer.LoadJson(utils.ReadFile(pathutils.CLUSTER_CONF_FILE))
2172 try:
2173 config_version = config_data["version"]
2174 except KeyError:
2175 return None
2176 return utils.SplitVersion(config_version)
2177
2178
2179 def _ReadIntentToUpgrade():
2180 """Read the file documenting the intent to upgrade the cluster.
2181
2182 @rtype: (string, string) or (None, None)
2183 @return: (old version, version to upgrade to), if the file exists,
2184 and (None, None) otherwise.
2185
2186 """
2187 if not os.path.isfile(pathutils.INTENT_TO_UPGRADE):
2188 return (None, None)
2189
2190 contentstring = utils.ReadFile(pathutils.INTENT_TO_UPGRADE)
2191 contents = utils.UnescapeAndSplit(contentstring)
2192 if len(contents) != 3:
2193 # file syntactically mal-formed
2194 return (None, None)
2195 return (contents[0], contents[1])
2196
2197
2198 def _WriteIntentToUpgrade(version):
2199 """Write file documenting the intent to upgrade the cluster.
2200
2201 @type version: string
2202 @param version: the version we intent to upgrade to
2203
2204 """
2205 utils.WriteFile(pathutils.INTENT_TO_UPGRADE,
2206 data=utils.EscapeAndJoin([constants.RELEASE_VERSION, version,
2207 "%d" % os.getpid()]))
2208
2209
2210 def _UpgradeBeforeConfigurationChange(versionstring):
2211 """
2212 Carry out all the tasks necessary for an upgrade that happen before
2213 the configuration file, or Ganeti version, changes.
2214
2215 @type versionstring: string
2216 @param versionstring: the version to upgrade to
2217 @rtype: (bool, list)
2218 @return: tuple of a bool indicating success and a list of rollback tasks
2219
2220 """
2221 rollback = []
2222
2223 ToStdoutAndLoginfo("Verifying %s present on all nodes", versionstring)
2224 if not _VerifyVersionInstalled(versionstring):
2225 return (False, rollback)
2226
2227 _WriteIntentToUpgrade(versionstring)
2228 rollback.append(
2229 lambda: utils.RunCmd(["rm", "-f", pathutils.INTENT_TO_UPGRADE]))
2230
2231 ToStdoutAndLoginfo("Draining queue")
2232 client = GetClient()
2233 client.SetQueueDrainFlag(True)
2234
2235 rollback.append(lambda: GetClient().SetQueueDrainFlag(False))
2236
2237 if utils.SimpleRetry(0, _GetRunning,
2238 constants.UPGRADE_QUEUE_POLL_INTERVAL,
2239 constants.UPGRADE_QUEUE_DRAIN_TIMEOUT):
2240 ToStderr("Failed to completely empty the queue.")
2241 return (False, rollback)
2242
2243 ToStdoutAndLoginfo("Pausing the watcher for one hour.")
2244 rollback.append(lambda: GetClient().SetWatcherPause(None))
2245 GetClient().SetWatcherPause(time.time() + 60 * 60)
2246
2247 ToStdoutAndLoginfo("Stopping daemons on master node.")
2248 if not _RunCommandAndReport([pathutils.DAEMON_UTIL, "stop-all"]):
2249 return (False, rollback)
2250
2251 ToStdoutAndLoginfo("Stopping daemons everywhere.")
2252 rollback.append(lambda: _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]))
2253 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"])
2254 if badnodes:
2255 ToStderr("Failed to stop daemons on %s." % (", ".join(badnodes),))
2256 return (False, rollback)
2257
2258 backuptar = os.path.join(pathutils.BACKUP_DIR, "ganeti%d.tar" % time.time())
2259 ToStdoutAndLoginfo("Backing up configuration as %s", backuptar)
2260 if not _RunCommandAndReport(["mkdir", "-p", pathutils.BACKUP_DIR]):
2261 return (False, rollback)
2262
2263 # Create the archive in a safe manner, as it contains sensitive
2264 # information.
2265 (_, tmp_name) = tempfile.mkstemp(prefix=backuptar, dir=pathutils.BACKUP_DIR)
2266 if not _RunCommandAndReport(["tar", "-cf", tmp_name,
2267 "--exclude=queue/archive",
2268 pathutils.DATA_DIR]):
2269 return (False, rollback)
2270
2271 os.rename(tmp_name, backuptar)
2272 return (True, rollback)
2273
2274
2275 def _VersionSpecificDowngrade():
2276 """
2277 Perform any additional downrade tasks that are version specific
2278 and need to be done just after the configuration downgrade. This
2279 function needs to be idempotent, so that it can be redone if the
2280 downgrade procedure gets interrupted after changing the
2281 configuration.
2282
2283 Note that this function has to be reset with every version bump.
2284
2285 @return: True upon success
2286 """
2287 ToStdoutAndLoginfo("Performing version-specific downgrade tasks.")
2288
2289 return True
2290
2291
2292 def _SwitchVersionAndConfig(versionstring, downgrade):
2293 """
2294 Switch to the new Ganeti version and change the configuration,
2295 in correct order.
2296
2297 @type versionstring: string
2298 @param versionstring: the version to change to
2299 @type downgrade: bool
2300 @param downgrade: True, if the configuration should be downgraded
2301 @rtype: (bool, list)
2302 @return: tupe of a bool indicating success, and a list of
2303 additional rollback tasks
2304
2305 """
2306 rollback = []
2307 if downgrade:
2308 ToStdoutAndLoginfo("Downgrading configuration")
2309 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "--downgrade", "-f"]):
2310 return (False, rollback)
2311 # Note: version specific downgrades need to be done before switching
2312 # binaries, so that we still have the knowledgeable binary if the downgrade
2313 # process gets interrupted at this point.
2314 if not _VersionSpecificDowngrade():
2315 return (False, rollback)
2316
2317 # Configuration change is the point of no return. From then onwards, it is
2318 # safer to push through the up/dowgrade than to try to roll it back.
2319
2320 ToStdoutAndLoginfo("Switching to version %s on all nodes", versionstring)
2321 rollback.append(lambda: _SetGanetiVersionAndEnsure(constants.DIR_VERSION))
2322 badnodes = _SetGanetiVersionAndEnsure(versionstring)
2323 if badnodes:
2324 ToStderr("Failed to switch to Ganeti version %s on nodes %s"
2325 % (versionstring, ", ".join(badnodes)))
2326 if not downgrade:
2327 return (False, rollback)
2328
2329 # Now that we have changed to the new version of Ganeti we should
2330 # not communicate over luxi any more, as luxi might have changed in
2331 # incompatible ways. Therefore, manually call the corresponding ganeti
2332 # commands using their canonical (version independent) path.
2333
2334 if not downgrade:
2335 ToStdoutAndLoginfo("Upgrading configuration")
2336 if not _RunCommandAndReport([pathutils.CFGUPGRADE, "-f"]):
2337 return (False, rollback)
2338
2339 return (True, rollback)
2340
2341
2342 def _UpgradeAfterConfigurationChange(oldversion):
2343 """
2344 Carry out the upgrade actions necessary after switching to the new
2345 Ganeti version and updating the configuration.
2346
2347 As this part is run at a time where the new version of Ganeti is already
2348 running, no communication should happen via luxi, as this is not a stable
2349 interface. Also, as the configuration change is the point of no return,
2350 all actions are pushed through, even if some of them fail.
2351
2352 @param oldversion: the version the upgrade started from
2353 @type oldversion: string
2354 @rtype: int
2355 @return: the intended return value
2356
2357 """
2358 returnvalue = 0
2359
2360 ToStdoutAndLoginfo("Starting daemons everywhere.")
2361 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])
2362 if badnodes:
2363 ToStderr("Warning: failed to start daemons on %s." % (", ".join(badnodes),))
2364 returnvalue = 1
2365
2366 ToStdoutAndLoginfo("Redistributing the configuration.")
2367 if not _RunCommandAndReport(["gnt-cluster", "redist-conf", "--yes-do-it"]):
2368 returnvalue = 1
2369
2370 ToStdoutAndLoginfo("Restarting daemons everywhere.")
2371 badnodes = _VerifyCommand([pathutils.DAEMON_UTIL, "stop-all"])
2372 badnodes.extend(_VerifyCommand([pathutils.DAEMON_UTIL, "start-all"]))
2373 if badnodes:
2374 ToStderr("Warning: failed to start daemons on %s." %
2375 (", ".join(list(set(badnodes))),))
2376 returnvalue = 1
2377
2378 ToStdoutAndLoginfo("Undraining the queue.")
2379 if not _RunCommandAndReport(["gnt-cluster", "queue", "undrain"]):
2380 returnvalue = 1
2381
2382 _RunCommandAndReport(["rm", "-f", pathutils.INTENT_TO_UPGRADE])
2383
2384 ToStdoutAndLoginfo("Running post-upgrade hooks")
2385 if not _RunCommandAndReport([pathutils.POST_UPGRADE, oldversion]):
2386 returnvalue = 1
2387
2388 ToStdoutAndLoginfo("Unpausing the watcher.")
2389 if not _RunCommandAndReport(["gnt-cluster", "watcher", "continue"]):
2390 returnvalue = 1
2391
2392 ToStdoutAndLoginfo("Verifying cluster.")
2393 if not _RunCommandAndReport(["gnt-cluster", "verify"]):
2394 returnvalue = 1
2395
2396 return returnvalue
2397
2398
2399 def UpgradeGanetiCommand(opts, args):
2400 """Upgrade a cluster to a new ganeti version.
2401
2402 @param opts: the command line options selected by the user
2403 @type args: list
2404 @param args: should be an empty list
2405 @rtype: int
2406 @return: the desired exit code
2407
2408 """
2409 if ((not opts.resume and opts.to is None)
2410 or (opts.resume and opts.to is not None)):
2411 ToStderr("Precisely one of the options --to and --resume"
2412 " has to be given")
2413 return 1
2414
2415 # If we're not told to resume, verify there is no upgrade
2416 # in progress.
2417 if not opts.resume:
2418 oldversion, versionstring = _ReadIntentToUpgrade()
2419 if versionstring is not None:
2420 # An upgrade is going on; verify whether the target matches
2421 if versionstring == opts.to:
2422 ToStderr("An upgrade is already in progress. Target version matches,"
2423 " resuming.")
2424 opts.resume = True
2425 opts.to = None
2426 else:
2427 ToStderr("An upgrade from %s to %s is in progress; use --resume to"
2428 " finish it first" % (oldversion, versionstring))
2429 return 1
2430
2431 utils.SetupLogging(pathutils.LOG_COMMANDS, 'gnt-cluster upgrade', debug=1)
2432
2433 oldversion = constants.RELEASE_VERSION
2434
2435 if opts.resume:
2436 ssconf.CheckMaster(False)
2437 oldversion, versionstring = _ReadIntentToUpgrade()
2438 if versionstring is None:
2439 return 0
2440 version = utils.version.ParseVersion(versionstring)
2441 if version is None:
2442 return 1
2443 configversion = _GetConfigVersion()
2444 if configversion is None:
2445 return 1
2446 # If the upgrade we resume was an upgrade between compatible
2447 # versions (like 2.10.0 to 2.10.1), the correct configversion
2448 # does not guarantee that the config has been updated.
2449 # However, in the case of a compatible update with the configuration
2450 # not touched, we are running a different dirversion with the same
2451 # config version.
2452 config_already_modified = \
2453 (utils.IsCorrectConfigVersion(version, configversion) and
2454 not (versionstring != constants.DIR_VERSION and
2455 configversion == (constants.CONFIG_MAJOR, constants.CONFIG_MINOR,
2456 constants.CONFIG_REVISION)))
2457 if not config_already_modified:
2458 # We have to start from the beginning; however, some daemons might have
2459 # already been stopped, so the only way to get into a well-defined state
2460 # is by starting all daemons again.
2461 _VerifyCommand([pathutils.DAEMON_UTIL, "start-all"])
2462 else:
2463 versionstring = opts.to
2464 config_already_modified = False
2465 version = utils.version.ParseVersion(versionstring)
2466 if version is None:
2467 ToStderr("Could not parse version string %s" % versionstring)
2468 return 1
2469
2470 msg = utils.version.UpgradeRange(version)
2471 if msg is not None:
2472 ToStderr("Cannot upgrade to %s: %s" % (versionstring, msg))
2473 return 1
2474
2475 if not config_already_modified:
2476 success, rollback = _UpgradeBeforeConfigurationChange(versionstring)
2477 if not success:
2478 _ExecuteCommands(rollback)
2479 return 1
2480 else:
2481 rollback = []
2482
2483 downgrade = utils.version.ShouldCfgdowngrade(version)
2484
2485 success, additionalrollback = \
2486 _SwitchVersionAndConfig(versionstring, downgrade)
2487 if not success:
2488 rollback.extend(additionalrollback)
2489 _ExecuteCommands(rollback)
2490 return 1
2491
2492 return _UpgradeAfterConfigurationChange(oldversion)
2493
2494
2495 commands = {
2496 "init": (
2497 InitCluster, [ArgHost(min=1, max=1)],
2498 [BACKEND_OPT, CP_SIZE_OPT, ENABLED_HV_OPT, GLOBAL_FILEDIR_OPT,
2499 HVLIST_OPT, MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT,
2500 NIC_PARAMS_OPT, NOMODIFY_ETCHOSTS_OPT, NOMODIFY_SSH_SETUP_OPT,
2501 SECONDARY_IP_OPT, VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT,
2502 DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT, DEFAULT_IALLOCATOR_PARAMS_OPT,
2503 PRIMARY_IP_VERSION_OPT, PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT,
2504 GLOBAL_SHARED_FILEDIR_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT,
2505 HV_STATE_OPT, DISK_STATE_OPT, ENABLED_DISK_TEMPLATES_OPT,
2506 IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, INSTALL_IMAGE_OPT,
2507 ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT,
2508 ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT,
2509 ]
2510 + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS,
2511 "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
2512 "destroy": (
2513 DestroyCluster, ARGS_NONE, [YES_DOIT_OPT],
2514 "", "Destroy cluster"),
2515 "rename": (
2516 RenameCluster, [ArgHost(min=1, max=1)],
2517 [FORCE_OPT, DRY_RUN_OPT],
2518 "<new_name>",
2519 "Renames the cluster"),
2520 "redist-conf": (
2521 RedistributeConfig, ARGS_NONE, SUBMIT_OPTS +
2522 [DRY_RUN_OPT, PRIORITY_OPT, FORCE_DISTRIBUTION],
2523 "", "Forces a push of the configuration file and ssconf files"
2524 " to the nodes in the cluster"),
2525 "verify": (
2526 VerifyCluster, ARGS_NONE,
2527 [VERBOSE_OPT, DEBUG_SIMERR_OPT, ERROR_CODES_OPT, NONPLUS1_OPT,
2528 PRIORITY_OPT, NODEGROUP_OPT, IGNORE_ERRORS_OPT, VERIFY_CLUTTER_OPT],
2529 "", "Does a check on the cluster configuration"),
2530 "verify-disks": (
2531 VerifyDisks, ARGS_NONE, [PRIORITY_OPT, NODEGROUP_OPT, STRICT_OPT],
2532 "", "Does a check on the cluster disk status"),
2533 "repair-disk-sizes": (
2534 RepairDiskSizes, ARGS_MANY_INSTANCES, [DRY_RUN_OPT, PRIORITY_OPT],
2535 "[instance...]", "Updates mismatches in recorded disk sizes"),
2536 "master-failover": (
2537 MasterFailover, ARGS_NONE,
2538 [NOVOTING_OPT, FORCE_FAILOVER, IGNORE_OFFLINE_NODES_FAILOVER],
2539 "", "Makes the current node the master"),
2540 "master-ping": (
2541 MasterPing, ARGS_NONE, [],
2542 "", "Checks if the master is alive"),
2543 "version": (
2544 ShowClusterVersion, ARGS_NONE, [],
2545 "", "Shows the cluster version"),
2546 "getmaster": (
2547 ShowClusterMaster, ARGS_NONE, [],
2548 "", "Shows the cluster master"),
2549 "copyfile": (
2550 ClusterCopyFile, [ArgFile(min=1, max=1)],
2551 [NODE_LIST_OPT, USE_REPL_NET_OPT, NODEGROUP_OPT],
2552 "[-n node...] <filename>", "Copies a file to all (or only some) nodes"),
2553 "command": (
2554 RunClusterCommand, [ArgCommand(min=1)],
2555 [NODE_LIST_OPT, NODEGROUP_OPT, SHOW_MACHINE_OPT, FAILURE_ONLY_OPT],
2556 "[-n node...] <command>", "Runs a command on all (or only some) nodes"),
2557 "info": (
2558 ShowClusterConfig, ARGS_NONE, [ROMAN_OPT],
2559 "[--roman]", "Show cluster configuration"),
2560 "list-tags": (
2561 ListTags, ARGS_NONE, [], "", "List the tags of the cluster"),
2562 "add-tags": (
2563 AddTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
2564 "tag...", "Add tags to the cluster"),
2565 "remove-tags": (
2566 RemoveTags, [ArgUnknown()], [TAG_SRC_OPT, PRIORITY_OPT] + SUBMIT_OPTS,
2567 "tag...", "Remove tags from the cluster"),
2568 "search-tags": (
2569 SearchTags, [ArgUnknown(min=1, max=1)], [PRIORITY_OPT], "",
2570 "Searches the tags on all objects on"
2571 " the cluster for a given pattern (regex)"),
2572 "queue": (
2573 QueueOps,
2574 [ArgChoice(min=1, max=1, choices=["drain", "undrain", "info"])],
2575 [], "drain|undrain|info", "Change queue properties"),
2576 "watcher": (
2577 WatcherOps,
2578 [ArgChoice(min=1, max=1, choices=["pause", "continue", "info"]),
2579 ArgSuggest(min=0, max=1, choices=["30m", "1h", "4h"])],
2580 [],
2581 "{pause <timespec>|continue|info}", "Change watcher properties"),
2582 "modify": (
2583 SetClusterParams, ARGS_NONE,
2584 [FORCE_OPT,
2585 BACKEND_OPT, CP_SIZE_OPT, RQL_OPT, MAX_TRACK_OPT, INSTALL_IMAGE_OPT,
2586 INSTANCE_COMMUNICATION_NETWORK_OPT, ENABLED_HV_OPT, HVLIST_OPT,
2587 MAC_PREFIX_OPT, MASTER_NETDEV_OPT, MASTER_NETMASK_OPT, NIC_PARAMS_OPT,
2588 VG_NAME_OPT, MAINTAIN_NODE_HEALTH_OPT, UIDPOOL_OPT, ADD_UIDS_OPT,
2589 REMOVE_UIDS_OPT, DRBD_HELPER_OPT, DEFAULT_IALLOCATOR_OPT,
2590 DEFAULT_IALLOCATOR_PARAMS_OPT, RESERVED_LVS_OPT, DRY_RUN_OPT, PRIORITY_OPT,
2591 PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT,
2592 DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS +
2593 [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT,
2594 MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT] +
2595 INSTANCE_POLICY_OPTS +
2596 [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
2597 COMPRESSION_TOOLS_OPT] +
2598 [ENABLED_DATA_COLLECTORS_OPT, DATA_COLLECTOR_INTERVAL_OPT,
2599 DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT,
2600 MAINT_INTERVAL_OPT, MAINT_BALANCE_OPT, MAINT_BALANCE_THRESHOLD_OPT],
2601 "[opts...]",
2602 "Alters the parameters of the cluster"),
2603 "renew-crypto": (
2604 RenewCrypto, ARGS_NONE,
2605 [NEW_CLUSTER_CERT_OPT, NEW_RAPI_CERT_OPT, RAPI_CERT_OPT,
2606 NEW_CONFD_HMAC_KEY_OPT, FORCE_OPT,
2607 NEW_CLUSTER_DOMAIN_SECRET_OPT, CLUSTER_DOMAIN_SECRET_OPT,
2608 NEW_SPICE_CERT_OPT, SPICE_CERT_OPT, SPICE_CACERT_OPT,
2609 NEW_NODE_CERT_OPT, NEW_SSH_KEY_OPT, NOSSH_KEYCHECK_OPT,
2610 VERBOSE_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT],
2611 "[opts...]",
2612 "Renews cluster certificates, keys and secrets"),
2613 "epo": (
2614 Epo, [ArgUnknown()],
2615 [FORCE_OPT, ON_OPT, GROUPS_OPT, ALL_OPT, OOB_TIMEOUT_OPT,
2616 SHUTDOWN_TIMEOUT_OPT, POWER_DELAY_OPT],
2617 "[opts...] [args]",
2618 "Performs an emergency power-off on given args"),
2619 "activate-master-ip": (
2620 ActivateMasterIp, ARGS_NONE, [], "", "Activates the master IP"),
2621 "deactivate-master-ip": (
2622 DeactivateMasterIp, ARGS_NONE, [CONFIRM_OPT], "",
2623 "Deactivates the master IP"),
2624 "show-ispecs-cmd": (
2625 ShowCreateCommand, ARGS_NONE, [], "",
2626 "Show the command line to re-create the cluster"),
2627 "upgrade": (
2628 UpgradeGanetiCommand, ARGS_NONE, [TO_OPT, RESUME_OPT], "",
2629 "Upgrade (or downgrade) to a new Ganeti version"),
2630 "remove-repair": (
2631 RemoveRepair, [ArgUnknown()], [], "<uuid>",
2632 "Remove a repair event from the list of pending events"),
2633 }
2634
2635
2636 #: dictionary with aliases for commands
2637 aliases = {
2638 "masterfailover": "master-failover",
2639 "show": "info",
2640 }
2641
2642
2643 def Main():
2644 return GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER},
2645 aliases=aliases)