Renew SSH keys and upgrade
[ganeti-github.git] / lib / cmdlib / cluster.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Logical units dealing with the cluster."""
32
33 import copy
34 import itertools
35 import logging
36 import operator
37 import os
38 import re
39 import time
40
41 from ganeti import compat
42 from ganeti import constants
43 from ganeti import errors
44 from ganeti import hypervisor
45 from ganeti import locking
46 from ganeti import masterd
47 from ganeti import netutils
48 from ganeti import objects
49 from ganeti import opcodes
50 from ganeti import pathutils
51 from ganeti import query
52 import ganeti.rpc.node as rpc
53 from ganeti import runtime
54 from ganeti import ssh
55 from ganeti import uidpool
56 from ganeti import utils
57 from ganeti import vcluster
58
59 from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \
60 ResultWithJobs
61 from ganeti.cmdlib.common import ShareAll, RunPostHook, \
62 ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \
63 GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \
64 GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \
65 CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \
66 ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob, \
67 CheckIpolicyVsDiskTemplates, CheckDiskAccessModeValidity, \
68 CheckDiskAccessModeConsistency, CreateNewClientCert, \
69 AddInstanceCommunicationNetworkOp, ConnectInstanceCommunicationNetworkOp, \
70 CheckImageValidity, \
71 CheckDiskAccessModeConsistency, CreateNewClientCert, EnsureKvmdOnNodes
72
73 import ganeti.masterd.instance
74
75
76 def _UpdateMasterClientCert(
77 lu, cfg, master_uuid,
78 client_cert=pathutils.NODED_CLIENT_CERT_FILE,
79 client_cert_tmp=pathutils.NODED_CLIENT_CERT_FILE_TMP):
80 """Renews the master's client certificate and propagates the config.
81
82 @type lu: C{LogicalUnit}
83 @param lu: the logical unit holding the config
84 @type cfg: C{config.ConfigWriter}
85 @param cfg: the cluster's configuration
86 @type master_uuid: string
87 @param master_uuid: the master node's UUID
88 @type client_cert: string
89 @param client_cert: the path of the client certificate
90 @type client_cert_tmp: string
91 @param client_cert_tmp: the temporary path of the client certificate
92 @rtype: string
93 @return: the digest of the newly created client certificate
94
95 """
96 client_digest = CreateNewClientCert(lu, master_uuid, filename=client_cert_tmp)
97 cfg.AddNodeToCandidateCerts(master_uuid, client_digest)
98 # This triggers an update of the config and distribution of it with the old
99 # SSL certificate
100
101 utils.RemoveFile(client_cert)
102 utils.RenameFile(client_cert_tmp, client_cert)
103 return client_digest
104
105
106 class LUClusterRenewCrypto(NoHooksLU):
107 """Renew the cluster's crypto tokens.
108
109 """
110
111 REQ_BGL = False
112
113 def ExpandNames(self):
114 self.needed_locks = {
115 locking.LEVEL_NODE: locking.ALL_SET,
116 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
117 }
118 self.share_locks = ShareAll()
119 self.share_locks[locking.LEVEL_NODE] = 0
120 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
121
122 def CheckPrereq(self):
123 """Check prerequisites.
124
125 This checks whether the cluster is empty.
126
127 Any errors are signaled by raising errors.OpPrereqError.
128
129 """
130 self._ssh_renewal_suppressed = \
131 not self.cfg.GetClusterInfo().modify_ssh_setup and self.op.ssh_keys
132
133 def _RenewNodeSslCertificates(self):
134 """Renews the nodes' SSL certificates.
135
136 Note that most of this operation is done in gnt_cluster.py, this LU only
137 takes care of the renewal of the client SSL certificates.
138
139 """
140 master_uuid = self.cfg.GetMasterNode()
141
142 server_digest = utils.GetCertificateDigest(
143 cert_filename=pathutils.NODED_CERT_FILE)
144 self.cfg.AddNodeToCandidateCerts("%s-SERVER" % master_uuid,
145 server_digest)
146 try:
147 old_master_digest = utils.GetCertificateDigest(
148 cert_filename=pathutils.NODED_CLIENT_CERT_FILE)
149 self.cfg.AddNodeToCandidateCerts("%s-OLDMASTER" % master_uuid,
150 old_master_digest)
151 except IOError:
152 logging.info("No old certificate available.")
153
154 new_master_digest = _UpdateMasterClientCert(self, self.cfg, master_uuid)
155
156 self.cfg.AddNodeToCandidateCerts(master_uuid, new_master_digest)
157 nodes = self.cfg.GetAllNodesInfo()
158 for (node_uuid, node_info) in nodes.items():
159 if node_uuid != master_uuid:
160 new_digest = CreateNewClientCert(self, node_uuid)
161 if node_info.master_candidate:
162 self.cfg.AddNodeToCandidateCerts(node_uuid, new_digest)
163 self.cfg.RemoveNodeFromCandidateCerts("%s-SERVER" % master_uuid)
164 self.cfg.RemoveNodeFromCandidateCerts("%s-OLDMASTER" % master_uuid)
165
166 def _RenewSshKeys(self):
167 """Renew all nodes' SSH keys.
168
169 """
170 master_uuid = self.cfg.GetMasterNode()
171
172 nodes = self.cfg.GetAllNodesInfo()
173 nodes_uuid_names = [(node_uuid, node_info.name) for (node_uuid, node_info)
174 in nodes.items() if not node_info.offline]
175 node_names = [name for (_, name) in nodes_uuid_names]
176 node_uuids = [uuid for (uuid, _) in nodes_uuid_names]
177 port_map = ssh.GetSshPortMap(node_names, self.cfg)
178 potential_master_candidates = self.cfg.GetPotentialMasterCandidates()
179 master_candidate_uuids = self.cfg.GetMasterCandidateUuids()
180 result = self.rpc.call_node_ssh_keys_renew(
181 [master_uuid],
182 node_uuids, node_names, port_map,
183 master_candidate_uuids,
184 potential_master_candidates)
185 result[master_uuid].Raise("Could not renew the SSH keys of all nodes")
186
187 def Exec(self, feedback_fn):
188 if self.op.node_certificates:
189 self._RenewNodeSslCertificates()
190 if self.op.ssh_keys and not self._ssh_renewal_suppressed:
191 self._RenewSshKeys()
192 elif self._ssh_renewal_suppressed:
193 feedback_fn("Cannot renew SSH keys if the cluster is configured to not"
194 " modify the SSH setup.")
195
196
197 class LUClusterActivateMasterIp(NoHooksLU):
198 """Activate the master IP on the master node.
199
200 """
201 def Exec(self, feedback_fn):
202 """Activate the master IP.
203
204 """
205 master_params = self.cfg.GetMasterNetworkParameters()
206 ems = self.cfg.GetUseExternalMipScript()
207 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
208 master_params, ems)
209 result.Raise("Could not activate the master IP")
210
211
212 class LUClusterDeactivateMasterIp(NoHooksLU):
213 """Deactivate the master IP on the master node.
214
215 """
216 def Exec(self, feedback_fn):
217 """Deactivate the master IP.
218
219 """
220 master_params = self.cfg.GetMasterNetworkParameters()
221 ems = self.cfg.GetUseExternalMipScript()
222 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
223 master_params, ems)
224 result.Raise("Could not deactivate the master IP")
225
226
227 class LUClusterConfigQuery(NoHooksLU):
228 """Return configuration values.
229
230 """
231 REQ_BGL = False
232
233 def CheckArguments(self):
234 self.cq = ClusterQuery(None, self.op.output_fields, False)
235
236 def ExpandNames(self):
237 self.cq.ExpandNames(self)
238
239 def DeclareLocks(self, level):
240 self.cq.DeclareLocks(self, level)
241
242 def Exec(self, feedback_fn):
243 result = self.cq.OldStyleQuery(self)
244
245 assert len(result) == 1
246
247 return result[0]
248
249
250 class LUClusterDestroy(LogicalUnit):
251 """Logical unit for destroying the cluster.
252
253 """
254 HPATH = "cluster-destroy"
255 HTYPE = constants.HTYPE_CLUSTER
256
257 def BuildHooksEnv(self):
258 """Build hooks env.
259
260 """
261 return {
262 "OP_TARGET": self.cfg.GetClusterName(),
263 }
264
265 def BuildHooksNodes(self):
266 """Build hooks nodes.
267
268 """
269 return ([], [])
270
271 def CheckPrereq(self):
272 """Check prerequisites.
273
274 This checks whether the cluster is empty.
275
276 Any errors are signaled by raising errors.OpPrereqError.
277
278 """
279 master = self.cfg.GetMasterNode()
280
281 nodelist = self.cfg.GetNodeList()
282 if len(nodelist) != 1 or nodelist[0] != master:
283 raise errors.OpPrereqError("There are still %d node(s) in"
284 " this cluster." % (len(nodelist) - 1),
285 errors.ECODE_INVAL)
286 instancelist = self.cfg.GetInstanceList()
287 if instancelist:
288 raise errors.OpPrereqError("There are still %d instance(s) in"
289 " this cluster." % len(instancelist),
290 errors.ECODE_INVAL)
291
292 def Exec(self, feedback_fn):
293 """Destroys the cluster.
294
295 """
296 master_params = self.cfg.GetMasterNetworkParameters()
297
298 # Run post hooks on master node before it's removed
299 RunPostHook(self, self.cfg.GetNodeName(master_params.uuid))
300
301 ems = self.cfg.GetUseExternalMipScript()
302 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
303 master_params, ems)
304 result.Warn("Error disabling the master IP address", self.LogWarning)
305 return master_params.uuid
306
307
308 class LUClusterPostInit(LogicalUnit):
309 """Logical unit for running hooks after cluster initialization.
310
311 """
312 HPATH = "cluster-init"
313 HTYPE = constants.HTYPE_CLUSTER
314
315 def CheckArguments(self):
316 self.master_uuid = self.cfg.GetMasterNode()
317 self.master_ndparams = self.cfg.GetNdParams(self.cfg.GetMasterNodeInfo())
318
319 # TODO: When Issue 584 is solved, and None is properly parsed when used
320 # as a default value, ndparams.get(.., None) can be changed to
321 # ndparams[..] to access the values directly
322
323 # OpenvSwitch: Warn user if link is missing
324 if (self.master_ndparams[constants.ND_OVS] and not
325 self.master_ndparams.get(constants.ND_OVS_LINK, None)):
326 self.LogInfo("No physical interface for OpenvSwitch was given."
327 " OpenvSwitch will not have an outside connection. This"
328 " might not be what you want.")
329
330 def BuildHooksEnv(self):
331 """Build hooks env.
332
333 """
334 return {
335 "OP_TARGET": self.cfg.GetClusterName(),
336 }
337
338 def BuildHooksNodes(self):
339 """Build hooks nodes.
340
341 """
342 return ([], [self.cfg.GetMasterNode()])
343
344 def Exec(self, feedback_fn):
345 """Create and configure Open vSwitch
346
347 """
348 if self.master_ndparams[constants.ND_OVS]:
349 result = self.rpc.call_node_configure_ovs(
350 self.master_uuid,
351 self.master_ndparams[constants.ND_OVS_NAME],
352 self.master_ndparams.get(constants.ND_OVS_LINK, None))
353 result.Raise("Could not successully configure Open vSwitch")
354
355 _UpdateMasterClientCert(self, self.cfg, self.master_uuid)
356
357 return True
358
359
360 class ClusterQuery(QueryBase):
361 FIELDS = query.CLUSTER_FIELDS
362
363 #: Do not sort (there is only one item)
364 SORT_FIELD = None
365
366 def ExpandNames(self, lu):
367 lu.needed_locks = {}
368
369 # The following variables interact with _QueryBase._GetNames
370 self.wanted = locking.ALL_SET
371 self.do_locking = self.use_locking
372
373 if self.do_locking:
374 raise errors.OpPrereqError("Can not use locking for cluster queries",
375 errors.ECODE_INVAL)
376
377 def DeclareLocks(self, lu, level):
378 pass
379
380 def _GetQueryData(self, lu):
381 """Computes the list of nodes and their attributes.
382
383 """
384 if query.CQ_CONFIG in self.requested_data:
385 cluster = lu.cfg.GetClusterInfo()
386 nodes = lu.cfg.GetAllNodesInfo()
387 else:
388 cluster = NotImplemented
389 nodes = NotImplemented
390
391 if query.CQ_QUEUE_DRAINED in self.requested_data:
392 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
393 else:
394 drain_flag = NotImplemented
395
396 if query.CQ_WATCHER_PAUSE in self.requested_data:
397 master_node_uuid = lu.cfg.GetMasterNode()
398
399 result = lu.rpc.call_get_watcher_pause(master_node_uuid)
400 result.Raise("Can't retrieve watcher pause from master node '%s'" %
401 lu.cfg.GetMasterNodeName())
402
403 watcher_pause = result.payload
404 else:
405 watcher_pause = NotImplemented
406
407 return query.ClusterQueryData(cluster, nodes, drain_flag, watcher_pause)
408
409
410 class LUClusterQuery(NoHooksLU):
411 """Query cluster configuration.
412
413 """
414 REQ_BGL = False
415
416 def ExpandNames(self):
417 self.needed_locks = {}
418
419 def Exec(self, feedback_fn):
420 """Return cluster config.
421
422 """
423 cluster = self.cfg.GetClusterInfo()
424 os_hvp = {}
425
426 # Filter just for enabled hypervisors
427 for os_name, hv_dict in cluster.os_hvp.items():
428 os_hvp[os_name] = {}
429 for hv_name, hv_params in hv_dict.items():
430 if hv_name in cluster.enabled_hypervisors:
431 os_hvp[os_name][hv_name] = hv_params
432
433 # Convert ip_family to ip_version
434 primary_ip_version = constants.IP4_VERSION
435 if cluster.primary_ip_family == netutils.IP6Address.family:
436 primary_ip_version = constants.IP6_VERSION
437
438 result = {
439 "software_version": constants.RELEASE_VERSION,
440 "protocol_version": constants.PROTOCOL_VERSION,
441 "config_version": constants.CONFIG_VERSION,
442 "os_api_version": max(constants.OS_API_VERSIONS),
443 "export_version": constants.EXPORT_VERSION,
444 "vcs_version": constants.VCS_VERSION,
445 "architecture": runtime.GetArchInfo(),
446 "name": cluster.cluster_name,
447 "master": self.cfg.GetMasterNodeName(),
448 "default_hypervisor": cluster.primary_hypervisor,
449 "enabled_hypervisors": cluster.enabled_hypervisors,
450 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
451 for hypervisor_name in cluster.enabled_hypervisors]),
452 "os_hvp": os_hvp,
453 "beparams": cluster.beparams,
454 "osparams": cluster.osparams,
455 "ipolicy": cluster.ipolicy,
456 "nicparams": cluster.nicparams,
457 "ndparams": cluster.ndparams,
458 "diskparams": cluster.diskparams,
459 "candidate_pool_size": cluster.candidate_pool_size,
460 "max_running_jobs": cluster.max_running_jobs,
461 "max_tracked_jobs": cluster.max_tracked_jobs,
462 "mac_prefix": cluster.mac_prefix,
463 "master_netdev": cluster.master_netdev,
464 "master_netmask": cluster.master_netmask,
465 "use_external_mip_script": cluster.use_external_mip_script,
466 "volume_group_name": cluster.volume_group_name,
467 "drbd_usermode_helper": cluster.drbd_usermode_helper,
468 "file_storage_dir": cluster.file_storage_dir,
469 "shared_file_storage_dir": cluster.shared_file_storage_dir,
470 "maintain_node_health": cluster.maintain_node_health,
471 "ctime": cluster.ctime,
472 "mtime": cluster.mtime,
473 "uuid": cluster.uuid,
474 "tags": list(cluster.GetTags()),
475 "uid_pool": cluster.uid_pool,
476 "default_iallocator": cluster.default_iallocator,
477 "default_iallocator_params": cluster.default_iallocator_params,
478 "reserved_lvs": cluster.reserved_lvs,
479 "primary_ip_version": primary_ip_version,
480 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
481 "hidden_os": cluster.hidden_os,
482 "blacklisted_os": cluster.blacklisted_os,
483 "enabled_disk_templates": cluster.enabled_disk_templates,
484 "install_image": cluster.install_image,
485 "instance_communication_network": cluster.instance_communication_network,
486 "compression_tools": cluster.compression_tools,
487 "enabled_user_shutdown": cluster.enabled_user_shutdown,
488 }
489
490 return result
491
492
493 class LUClusterRedistConf(NoHooksLU):
494 """Force the redistribution of cluster configuration.
495
496 This is a very simple LU.
497
498 """
499 REQ_BGL = False
500
501 def ExpandNames(self):
502 self.needed_locks = {
503 locking.LEVEL_NODE: locking.ALL_SET,
504 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
505 }
506 self.share_locks = ShareAll()
507
508 def Exec(self, feedback_fn):
509 """Redistribute the configuration.
510
511 """
512 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
513 RedistributeAncillaryFiles(self)
514
515
516 class LUClusterRename(LogicalUnit):
517 """Rename the cluster.
518
519 """
520 HPATH = "cluster-rename"
521 HTYPE = constants.HTYPE_CLUSTER
522
523 def BuildHooksEnv(self):
524 """Build hooks env.
525
526 """
527 return {
528 "OP_TARGET": self.cfg.GetClusterName(),
529 "NEW_NAME": self.op.name,
530 }
531
532 def BuildHooksNodes(self):
533 """Build hooks nodes.
534
535 """
536 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
537
538 def CheckPrereq(self):
539 """Verify that the passed name is a valid one.
540
541 """
542 hostname = netutils.GetHostname(name=self.op.name,
543 family=self.cfg.GetPrimaryIPFamily())
544
545 new_name = hostname.name
546 self.ip = new_ip = hostname.ip
547 old_name = self.cfg.GetClusterName()
548 old_ip = self.cfg.GetMasterIP()
549 if new_name == old_name and new_ip == old_ip:
550 raise errors.OpPrereqError("Neither the name nor the IP address of the"
551 " cluster has changed",
552 errors.ECODE_INVAL)
553 if new_ip != old_ip:
554 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
555 raise errors.OpPrereqError("The given cluster IP address (%s) is"
556 " reachable on the network" %
557 new_ip, errors.ECODE_NOTUNIQUE)
558
559 self.op.name = new_name
560
561 def Exec(self, feedback_fn):
562 """Rename the cluster.
563
564 """
565 clustername = self.op.name
566 new_ip = self.ip
567
568 # shutdown the master IP
569 master_params = self.cfg.GetMasterNetworkParameters()
570 ems = self.cfg.GetUseExternalMipScript()
571 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
572 master_params, ems)
573 result.Raise("Could not disable the master role")
574
575 try:
576 cluster = self.cfg.GetClusterInfo()
577 cluster.cluster_name = clustername
578 cluster.master_ip = new_ip
579 self.cfg.Update(cluster, feedback_fn)
580
581 # update the known hosts file
582 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
583 node_list = self.cfg.GetOnlineNodeList()
584 try:
585 node_list.remove(master_params.uuid)
586 except ValueError:
587 pass
588 UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
589 finally:
590 master_params.ip = new_ip
591 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
592 master_params, ems)
593 result.Warn("Could not re-enable the master role on the master,"
594 " please restart manually", self.LogWarning)
595
596 return clustername
597
598
599 class LUClusterRepairDiskSizes(NoHooksLU):
600 """Verifies the cluster disks sizes.
601
602 """
603 REQ_BGL = False
604
605 def ExpandNames(self):
606 if self.op.instances:
607 (_, self.wanted_names) = GetWantedInstances(self, self.op.instances)
608 # Not getting the node allocation lock as only a specific set of
609 # instances (and their nodes) is going to be acquired
610 self.needed_locks = {
611 locking.LEVEL_NODE_RES: [],
612 locking.LEVEL_INSTANCE: self.wanted_names,
613 }
614 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
615 else:
616 self.wanted_names = None
617 self.needed_locks = {
618 locking.LEVEL_NODE_RES: locking.ALL_SET,
619 locking.LEVEL_INSTANCE: locking.ALL_SET,
620
621 # This opcode is acquires the node locks for all instances
622 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
623 }
624
625 self.share_locks = {
626 locking.LEVEL_NODE_RES: 1,
627 locking.LEVEL_INSTANCE: 0,
628 locking.LEVEL_NODE_ALLOC: 1,
629 }
630
631 def DeclareLocks(self, level):
632 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
633 self._LockInstancesNodes(primary_only=True, level=level)
634
635 def CheckPrereq(self):
636 """Check prerequisites.
637
638 This only checks the optional instance list against the existing names.
639
640 """
641 if self.wanted_names is None:
642 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
643
644 self.wanted_instances = \
645 map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))
646
647 def _EnsureChildSizes(self, disk):
648 """Ensure children of the disk have the needed disk size.
649
650 This is valid mainly for DRBD8 and fixes an issue where the
651 children have smaller disk size.
652
653 @param disk: an L{ganeti.objects.Disk} object
654
655 """
656 if disk.dev_type == constants.DT_DRBD8:
657 assert disk.children, "Empty children for DRBD8?"
658 fchild = disk.children[0]
659 mismatch = fchild.size < disk.size
660 if mismatch:
661 self.LogInfo("Child disk has size %d, parent %d, fixing",
662 fchild.size, disk.size)
663 fchild.size = disk.size
664
665 # and we recurse on this child only, not on the metadev
666 return self._EnsureChildSizes(fchild) or mismatch
667 else:
668 return False
669
670 def Exec(self, feedback_fn):
671 """Verify the size of cluster disks.
672
673 """
674 # TODO: check child disks too
675 # TODO: check differences in size between primary/secondary nodes
676 per_node_disks = {}
677 for instance in self.wanted_instances:
678 pnode = instance.primary_node
679 if pnode not in per_node_disks:
680 per_node_disks[pnode] = []
681 for idx, disk in enumerate(self.cfg.GetInstanceDisks(instance.uuid)):
682 per_node_disks[pnode].append((instance, idx, disk))
683
684 assert not (frozenset(per_node_disks.keys()) -
685 frozenset(self.owned_locks(locking.LEVEL_NODE_RES))), \
686 "Not owning correct locks"
687 assert not self.owned_locks(locking.LEVEL_NODE)
688
689 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg,
690 per_node_disks.keys())
691
692 changed = []
693 for node_uuid, dskl in per_node_disks.items():
694 if not dskl:
695 # no disks on the node
696 continue
697
698 newl = [([v[2].Copy()], v[0]) for v in dskl]
699 node_name = self.cfg.GetNodeName(node_uuid)
700 result = self.rpc.call_blockdev_getdimensions(node_uuid, newl)
701 if result.fail_msg:
702 self.LogWarning("Failure in blockdev_getdimensions call to node"
703 " %s, ignoring", node_name)
704 continue
705 if len(result.payload) != len(dskl):
706 logging.warning("Invalid result from node %s: len(dksl)=%d,"
707 " result.payload=%s", node_name, len(dskl),
708 result.payload)
709 self.LogWarning("Invalid result from node %s, ignoring node results",
710 node_name)
711 continue
712 for ((instance, idx, disk), dimensions) in zip(dskl, result.payload):
713 if dimensions is None:
714 self.LogWarning("Disk %d of instance %s did not return size"
715 " information, ignoring", idx, instance.name)
716 continue
717 if not isinstance(dimensions, (tuple, list)):
718 self.LogWarning("Disk %d of instance %s did not return valid"
719 " dimension information, ignoring", idx,
720 instance.name)
721 continue
722 (size, spindles) = dimensions
723 if not isinstance(size, (int, long)):
724 self.LogWarning("Disk %d of instance %s did not return valid"
725 " size information, ignoring", idx, instance.name)
726 continue
727 size = size >> 20
728 if size != disk.size:
729 self.LogInfo("Disk %d of instance %s has mismatched size,"
730 " correcting: recorded %d, actual %d", idx,
731 instance.name, disk.size, size)
732 disk.size = size
733 self.cfg.Update(disk, feedback_fn)
734 changed.append((instance.name, idx, "size", size))
735 if es_flags[node_uuid]:
736 if spindles is None:
737 self.LogWarning("Disk %d of instance %s did not return valid"
738 " spindles information, ignoring", idx,
739 instance.name)
740 elif disk.spindles is None or disk.spindles != spindles:
741 self.LogInfo("Disk %d of instance %s has mismatched spindles,"
742 " correcting: recorded %s, actual %s",
743 idx, instance.name, disk.spindles, spindles)
744 disk.spindles = spindles
745 self.cfg.Update(disk, feedback_fn)
746 changed.append((instance.name, idx, "spindles", disk.spindles))
747 if self._EnsureChildSizes(disk):
748 self.cfg.Update(disk, feedback_fn)
749 changed.append((instance.name, idx, "size", disk.size))
750 return changed
751
752
753 def _ValidateNetmask(cfg, netmask):
754 """Checks if a netmask is valid.
755
756 @type cfg: L{config.ConfigWriter}
757 @param cfg: cluster configuration
758 @type netmask: int
759 @param netmask: netmask to be verified
760 @raise errors.OpPrereqError: if the validation fails
761
762 """
763 ip_family = cfg.GetPrimaryIPFamily()
764 try:
765 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
766 except errors.ProgrammerError:
767 raise errors.OpPrereqError("Invalid primary ip family: %s." %
768 ip_family, errors.ECODE_INVAL)
769 if not ipcls.ValidateNetmask(netmask):
770 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
771 (netmask), errors.ECODE_INVAL)
772
773
774 def CheckFileBasedStoragePathVsEnabledDiskTemplates(
775 logging_warn_fn, file_storage_dir, enabled_disk_templates,
776 file_disk_template):
777 """Checks whether the given file-based storage directory is acceptable.
778
779 Note: This function is public, because it is also used in bootstrap.py.
780
781 @type logging_warn_fn: function
782 @param logging_warn_fn: function which accepts a string and logs it
783 @type file_storage_dir: string
784 @param file_storage_dir: the directory to be used for file-based instances
785 @type enabled_disk_templates: list of string
786 @param enabled_disk_templates: the list of enabled disk templates
787 @type file_disk_template: string
788 @param file_disk_template: the file-based disk template for which the
789 path should be checked
790
791 """
792 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes(
793 constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER
794 ))
795
796 file_storage_enabled = file_disk_template in enabled_disk_templates
797 if file_storage_dir is not None:
798 if file_storage_dir == "":
799 if file_storage_enabled:
800 raise errors.OpPrereqError(
801 "Unsetting the '%s' storage directory while having '%s' storage"
802 " enabled is not permitted." %
803 (file_disk_template, file_disk_template),
804 errors.ECODE_INVAL)
805 else:
806 if not file_storage_enabled:
807 logging_warn_fn(
808 "Specified a %s storage directory, although %s storage is not"
809 " enabled." % (file_disk_template, file_disk_template))
810 else:
811 raise errors.ProgrammerError("Received %s storage dir with value"
812 " 'None'." % file_disk_template)
813
814
815 def CheckFileStoragePathVsEnabledDiskTemplates(
816 logging_warn_fn, file_storage_dir, enabled_disk_templates):
817 """Checks whether the given file storage directory is acceptable.
818
819 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates}
820
821 """
822 CheckFileBasedStoragePathVsEnabledDiskTemplates(
823 logging_warn_fn, file_storage_dir, enabled_disk_templates,
824 constants.DT_FILE)
825
826
827 def CheckSharedFileStoragePathVsEnabledDiskTemplates(
828 logging_warn_fn, file_storage_dir, enabled_disk_templates):
829 """Checks whether the given shared file storage directory is acceptable.
830
831 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates}
832
833 """
834 CheckFileBasedStoragePathVsEnabledDiskTemplates(
835 logging_warn_fn, file_storage_dir, enabled_disk_templates,
836 constants.DT_SHARED_FILE)
837
838
839 def CheckGlusterStoragePathVsEnabledDiskTemplates(
840 logging_warn_fn, file_storage_dir, enabled_disk_templates):
841 """Checks whether the given gluster storage directory is acceptable.
842
843 @see: C{CheckFileBasedStoragePathVsEnabledDiskTemplates}
844
845 """
846 CheckFileBasedStoragePathVsEnabledDiskTemplates(
847 logging_warn_fn, file_storage_dir, enabled_disk_templates,
848 constants.DT_GLUSTER)
849
850
851 def CheckCompressionTools(tools):
852 """Check whether the provided compression tools look like executables.
853
854 @type tools: list of string
855 @param tools: The tools provided as opcode input
856
857 """
858 regex = re.compile('^[-_a-zA-Z0-9]+$')
859 illegal_tools = [t for t in tools if not regex.match(t)]
860
861 if illegal_tools:
862 raise errors.OpPrereqError(
863 "The tools '%s' contain illegal characters: only alphanumeric values,"
864 " dashes, and underscores are allowed" % ", ".join(illegal_tools),
865 errors.ECODE_INVAL
866 )
867
868 if constants.IEC_GZIP not in tools:
869 raise errors.OpPrereqError("For compatibility reasons, the %s utility must"
870 " be present among the compression tools" %
871 constants.IEC_GZIP, errors.ECODE_INVAL)
872
873 if constants.IEC_NONE in tools:
874 raise errors.OpPrereqError("%s is a reserved value used for no compression,"
875 " and cannot be used as the name of a tool" %
876 constants.IEC_NONE, errors.ECODE_INVAL)
877
878
879 class LUClusterSetParams(LogicalUnit):
880 """Change the parameters of the cluster.
881
882 """
883 HPATH = "cluster-modify"
884 HTYPE = constants.HTYPE_CLUSTER
885 REQ_BGL = False
886
887 def CheckArguments(self):
888 """Check parameters
889
890 """
891 if self.op.uid_pool:
892 uidpool.CheckUidPool(self.op.uid_pool)
893
894 if self.op.add_uids:
895 uidpool.CheckUidPool(self.op.add_uids)
896
897 if self.op.remove_uids:
898 uidpool.CheckUidPool(self.op.remove_uids)
899
900 if self.op.mac_prefix:
901 self.op.mac_prefix = \
902 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
903
904 if self.op.master_netmask is not None:
905 _ValidateNetmask(self.cfg, self.op.master_netmask)
906
907 if self.op.diskparams:
908 for dt_params in self.op.diskparams.values():
909 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
910 try:
911 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
912 CheckDiskAccessModeValidity(self.op.diskparams)
913 except errors.OpPrereqError, err:
914 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
915 errors.ECODE_INVAL)
916
917 if self.op.install_image is not None:
918 CheckImageValidity(self.op.install_image,
919 "Install image must be an absolute path or a URL")
920
921 def ExpandNames(self):
922 # FIXME: in the future maybe other cluster params won't require checking on
923 # all nodes to be modified.
924 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
925 # resource locks the right thing, shouldn't it be the BGL instead?
926 self.needed_locks = {
927 locking.LEVEL_NODE: locking.ALL_SET,
928 locking.LEVEL_INSTANCE: locking.ALL_SET,
929 locking.LEVEL_NODEGROUP: locking.ALL_SET,
930 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
931 }
932 self.share_locks = ShareAll()
933
934 def BuildHooksEnv(self):
935 """Build hooks env.
936
937 """
938 return {
939 "OP_TARGET": self.cfg.GetClusterName(),
940 "NEW_VG_NAME": self.op.vg_name,
941 }
942
943 def BuildHooksNodes(self):
944 """Build hooks nodes.
945
946 """
947 mn = self.cfg.GetMasterNode()
948 return ([mn], [mn])
949
950 def _CheckVgName(self, node_uuids, enabled_disk_templates,
951 new_enabled_disk_templates):
952 """Check the consistency of the vg name on all nodes and in case it gets
953 unset whether there are instances still using it.
954
955 """
956 lvm_is_enabled = utils.IsLvmEnabled(enabled_disk_templates)
957 lvm_gets_enabled = utils.LvmGetsEnabled(enabled_disk_templates,
958 new_enabled_disk_templates)
959 current_vg_name = self.cfg.GetVGName()
960
961 if self.op.vg_name == '':
962 if lvm_is_enabled:
963 raise errors.OpPrereqError("Cannot unset volume group if lvm-based"
964 " disk templates are or get enabled.",
965 errors.ECODE_INVAL)
966
967 if self.op.vg_name is None:
968 if current_vg_name is None and lvm_is_enabled:
969 raise errors.OpPrereqError("Please specify a volume group when"
970 " enabling lvm-based disk-templates.",
971 errors.ECODE_INVAL)
972
973 if self.op.vg_name is not None and not self.op.vg_name:
974 if self.cfg.HasAnyDiskOfType(constants.DT_PLAIN):
975 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
976 " instances exist", errors.ECODE_INVAL)
977
978 if (self.op.vg_name is not None and lvm_is_enabled) or \
979 (self.cfg.GetVGName() is not None and lvm_gets_enabled):
980 self._CheckVgNameOnNodes(node_uuids)
981
982 def _CheckVgNameOnNodes(self, node_uuids):
983 """Check the status of the volume group on each node.
984
985 """
986 vglist = self.rpc.call_vg_list(node_uuids)
987 for node_uuid in node_uuids:
988 msg = vglist[node_uuid].fail_msg
989 if msg:
990 # ignoring down node
991 self.LogWarning("Error while gathering data on node %s"
992 " (ignoring node): %s",
993 self.cfg.GetNodeName(node_uuid), msg)
994 continue
995 vgstatus = utils.CheckVolumeGroupSize(vglist[node_uuid].payload,
996 self.op.vg_name,
997 constants.MIN_VG_SIZE)
998 if vgstatus:
999 raise errors.OpPrereqError("Error on node '%s': %s" %
1000 (self.cfg.GetNodeName(node_uuid), vgstatus),
1001 errors.ECODE_ENVIRON)
1002
1003 @staticmethod
1004 def _GetDiskTemplateSetsInner(op_enabled_disk_templates,
1005 old_enabled_disk_templates):
1006 """Computes three sets of disk templates.
1007
1008 @see: C{_GetDiskTemplateSets} for more details.
1009
1010 """
1011 enabled_disk_templates = None
1012 new_enabled_disk_templates = []
1013 disabled_disk_templates = []
1014 if op_enabled_disk_templates:
1015 enabled_disk_templates = op_enabled_disk_templates
1016 new_enabled_disk_templates = \
1017 list(set(enabled_disk_templates)
1018 - set(old_enabled_disk_templates))
1019 disabled_disk_templates = \
1020 list(set(old_enabled_disk_templates)
1021 - set(enabled_disk_templates))
1022 else:
1023 enabled_disk_templates = old_enabled_disk_templates
1024 return (enabled_disk_templates, new_enabled_disk_templates,
1025 disabled_disk_templates)
1026
1027 def _GetDiskTemplateSets(self, cluster):
1028 """Computes three sets of disk templates.
1029
1030 The three sets are:
1031 - disk templates that will be enabled after this operation (no matter if
1032 they were enabled before or not)
1033 - disk templates that get enabled by this operation (thus haven't been
1034 enabled before.)
1035 - disk templates that get disabled by this operation
1036
1037 """
1038 return self._GetDiskTemplateSetsInner(self.op.enabled_disk_templates,
1039 cluster.enabled_disk_templates)
1040
1041 def _CheckIpolicy(self, cluster, enabled_disk_templates):
1042 """Checks the ipolicy.
1043
1044 @type cluster: C{objects.Cluster}
1045 @param cluster: the cluster's configuration
1046 @type enabled_disk_templates: list of string
1047 @param enabled_disk_templates: list of (possibly newly) enabled disk
1048 templates
1049
1050 """
1051 # FIXME: write unit tests for this
1052 if self.op.ipolicy:
1053 self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
1054 group_policy=False)
1055
1056 CheckIpolicyVsDiskTemplates(self.new_ipolicy,
1057 enabled_disk_templates)
1058
1059 all_instances = self.cfg.GetAllInstancesInfo().values()
1060 violations = set()
1061 for group in self.cfg.GetAllNodeGroupsInfo().values():
1062 instances = frozenset(
1063 [inst for inst in all_instances
1064 if compat.any(nuuid in group.members
1065 for nuuid in self.cfg.GetInstanceNodes(inst.uuid))])
1066 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
1067 ipol = masterd.instance.CalculateGroupIPolicy(cluster, group)
1068 new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances,
1069 self.cfg)
1070 if new:
1071 violations.update(new)
1072
1073 if violations:
1074 self.LogWarning("After the ipolicy change the following instances"
1075 " violate them: %s",
1076 utils.CommaJoin(utils.NiceSort(violations)))
1077 else:
1078 CheckIpolicyVsDiskTemplates(cluster.ipolicy,
1079 enabled_disk_templates)
1080
1081 def _CheckDrbdHelperOnNodes(self, drbd_helper, node_uuids):
1082 """Checks whether the set DRBD helper actually exists on the nodes.
1083
1084 @type drbd_helper: string
1085 @param drbd_helper: path of the drbd usermode helper binary
1086 @type node_uuids: list of strings
1087 @param node_uuids: list of node UUIDs to check for the helper
1088
1089 """
1090 # checks given drbd helper on all nodes
1091 helpers = self.rpc.call_drbd_helper(node_uuids)
1092 for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids):
1093 if ninfo.offline:
1094 self.LogInfo("Not checking drbd helper on offline node %s",
1095 ninfo.name)
1096 continue
1097 msg = helpers[ninfo.uuid].fail_msg
1098 if msg:
1099 raise errors.OpPrereqError("Error checking drbd helper on node"
1100 " '%s': %s" % (ninfo.name, msg),
1101 errors.ECODE_ENVIRON)
1102 node_helper = helpers[ninfo.uuid].payload
1103 if node_helper != drbd_helper:
1104 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
1105 (ninfo.name, node_helper),
1106 errors.ECODE_ENVIRON)
1107
1108 def _CheckDrbdHelper(self, node_uuids, drbd_enabled, drbd_gets_enabled):
1109 """Check the DRBD usermode helper.
1110
1111 @type node_uuids: list of strings
1112 @param node_uuids: a list of nodes' UUIDs
1113 @type drbd_enabled: boolean
1114 @param drbd_enabled: whether DRBD will be enabled after this operation
1115 (no matter if it was disabled before or not)
1116 @type drbd_gets_enabled: boolen
1117 @param drbd_gets_enabled: true if DRBD was disabled before this
1118 operation, but will be enabled afterwards
1119
1120 """
1121 if self.op.drbd_helper == '':
1122 if drbd_enabled:
1123 raise errors.OpPrereqError("Cannot disable drbd helper while"
1124 " DRBD is enabled.", errors.ECODE_STATE)
1125 if self.cfg.HasAnyDiskOfType(constants.DT_DRBD8):
1126 raise errors.OpPrereqError("Cannot disable drbd helper while"
1127 " drbd-based instances exist",
1128 errors.ECODE_INVAL)
1129
1130 else:
1131 if self.op.drbd_helper is not None and drbd_enabled:
1132 self._CheckDrbdHelperOnNodes(self.op.drbd_helper, node_uuids)
1133 else:
1134 if drbd_gets_enabled:
1135 current_drbd_helper = self.cfg.GetClusterInfo().drbd_usermode_helper
1136 if current_drbd_helper is not None:
1137 self._CheckDrbdHelperOnNodes(current_drbd_helper, node_uuids)
1138 else:
1139 raise errors.OpPrereqError("Cannot enable DRBD without a"
1140 " DRBD usermode helper set.",
1141 errors.ECODE_STATE)
1142
1143 def _CheckInstancesOfDisabledDiskTemplates(
1144 self, disabled_disk_templates):
1145 """Check whether we try to disable a disk template that is in use.
1146
1147 @type disabled_disk_templates: list of string
1148 @param disabled_disk_templates: list of disk templates that are going to
1149 be disabled by this operation
1150
1151 """
1152 for disk_template in disabled_disk_templates:
1153 if self.cfg.HasAnyDiskOfType(disk_template):
1154 raise errors.OpPrereqError(
1155 "Cannot disable disk template '%s', because there is at least one"
1156 " instance using it." % disk_template,
1157 errors.ECODE_STATE)
1158
1159 @staticmethod
1160 def _CheckInstanceCommunicationNetwork(network, warning_fn):
1161 """Check whether an existing network is configured for instance
1162 communication.
1163
1164 Checks whether an existing network is configured with the
1165 parameters that are advisable for instance communication, and
1166 otherwise issue security warnings.
1167
1168 @type network: L{ganeti.objects.Network}
1169 @param network: L{ganeti.objects.Network} object whose
1170 configuration is being checked
1171 @type warning_fn: function
1172 @param warning_fn: function used to print warnings
1173 @rtype: None
1174 @return: None
1175
1176 """
1177 def _MaybeWarn(err, val, default):
1178 if val != default:
1179 warning_fn("Supplied instance communication network '%s' %s '%s',"
1180 " this might pose a security risk (default is '%s').",
1181 network.name, err, val, default)
1182
1183 if network.network is None:
1184 raise errors.OpPrereqError("Supplied instance communication network '%s'"
1185 " must have an IPv4 network address.",
1186 network.name)
1187
1188 _MaybeWarn("has an IPv4 gateway", network.gateway, None)
1189 _MaybeWarn("has a non-standard IPv4 network address", network.network,
1190 constants.INSTANCE_COMMUNICATION_NETWORK4)
1191 _MaybeWarn("has an IPv6 gateway", network.gateway6, None)
1192 _MaybeWarn("has a non-standard IPv6 network address", network.network6,
1193 constants.INSTANCE_COMMUNICATION_NETWORK6)
1194 _MaybeWarn("has a non-standard MAC prefix", network.mac_prefix,
1195 constants.INSTANCE_COMMUNICATION_MAC_PREFIX)
1196
1197 def CheckPrereq(self):
1198 """Check prerequisites.
1199
1200 This checks whether the given params don't conflict and
1201 if the given volume group is valid.
1202
1203 """
1204 node_uuids = self.owned_locks(locking.LEVEL_NODE)
1205 self.cluster = cluster = self.cfg.GetClusterInfo()
1206
1207 vm_capable_node_uuids = [node.uuid
1208 for node in self.cfg.GetAllNodesInfo().values()
1209 if node.uuid in node_uuids and node.vm_capable]
1210
1211 (enabled_disk_templates, new_enabled_disk_templates,
1212 disabled_disk_templates) = self._GetDiskTemplateSets(cluster)
1213 self._CheckInstancesOfDisabledDiskTemplates(disabled_disk_templates)
1214
1215 self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates,
1216 new_enabled_disk_templates)
1217
1218 if self.op.file_storage_dir is not None:
1219 CheckFileStoragePathVsEnabledDiskTemplates(
1220 self.LogWarning, self.op.file_storage_dir, enabled_disk_templates)
1221
1222 if self.op.shared_file_storage_dir is not None:
1223 CheckSharedFileStoragePathVsEnabledDiskTemplates(
1224 self.LogWarning, self.op.shared_file_storage_dir,
1225 enabled_disk_templates)
1226
1227 drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
1228 drbd_gets_enabled = constants.DT_DRBD8 in new_enabled_disk_templates
1229 self._CheckDrbdHelper(vm_capable_node_uuids,
1230 drbd_enabled, drbd_gets_enabled)
1231
1232 # validate params changes
1233 if self.op.beparams:
1234 objects.UpgradeBeParams(self.op.beparams)
1235 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1236 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
1237
1238 if self.op.ndparams:
1239 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
1240 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
1241
1242 # TODO: we need a more general way to handle resetting
1243 # cluster-level parameters to default values
1244 if self.new_ndparams["oob_program"] == "":
1245 self.new_ndparams["oob_program"] = \
1246 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
1247
1248 if self.op.hv_state:
1249 new_hv_state = MergeAndVerifyHvState(self.op.hv_state,
1250 self.cluster.hv_state_static)
1251 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
1252 for hv, values in new_hv_state.items())
1253
1254 if self.op.disk_state:
1255 new_disk_state = MergeAndVerifyDiskState(self.op.disk_state,
1256 self.cluster.disk_state_static)
1257 self.new_disk_state = \
1258 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
1259 for name, values in svalues.items()))
1260 for storage, svalues in new_disk_state.items())
1261
1262 self._CheckIpolicy(cluster, enabled_disk_templates)
1263
1264 if self.op.nicparams:
1265 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1266 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
1267 objects.NIC.CheckParameterSyntax(self.new_nicparams)
1268 nic_errors = []
1269
1270 # check all instances for consistency
1271 for instance in self.cfg.GetAllInstancesInfo().values():
1272 for nic_idx, nic in enumerate(instance.nics):
1273 params_copy = copy.deepcopy(nic.nicparams)
1274 params_filled = objects.FillDict(self.new_nicparams, params_copy)
1275
1276 # check parameter syntax
1277 try:
1278 objects.NIC.CheckParameterSyntax(params_filled)
1279 except errors.ConfigurationError, err:
1280 nic_errors.append("Instance %s, nic/%d: %s" %
1281 (instance.name, nic_idx, err))
1282
1283 # if we're moving instances to routed, check that they have an ip
1284 target_mode = params_filled[constants.NIC_MODE]
1285 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
1286 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
1287 " address" % (instance.name, nic_idx))
1288 if nic_errors:
1289 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
1290 "\n".join(nic_errors), errors.ECODE_INVAL)
1291
1292 # hypervisor list/parameters
1293 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
1294 if self.op.hvparams:
1295 for hv_name, hv_dict in self.op.hvparams.items():
1296 if hv_name not in self.new_hvparams:
1297 self.new_hvparams[hv_name] = hv_dict
1298 else:
1299 self.new_hvparams[hv_name].update(hv_dict)
1300
1301 # disk template parameters
1302 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
1303 if self.op.diskparams:
1304 for dt_name, dt_params in self.op.diskparams.items():
1305 if dt_name not in self.new_diskparams:
1306 self.new_diskparams[dt_name] = dt_params
1307 else:
1308 self.new_diskparams[dt_name].update(dt_params)
1309 CheckDiskAccessModeConsistency(self.op.diskparams, self.cfg)
1310
1311 # os hypervisor parameters
1312 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
1313 if self.op.os_hvp:
1314 for os_name, hvs in self.op.os_hvp.items():
1315 if os_name not in self.new_os_hvp:
1316 self.new_os_hvp[os_name] = hvs
1317 else:
1318 for hv_name, hv_dict in hvs.items():
1319 if hv_dict is None:
1320 # Delete if it exists
1321 self.new_os_hvp[os_name].pop(hv_name, None)
1322 elif hv_name not in self.new_os_hvp[os_name]:
1323 self.new_os_hvp[os_name][hv_name] = hv_dict
1324 else:
1325 self.new_os_hvp[os_name][hv_name].update(hv_dict)
1326
1327 # os parameters
1328 self._BuildOSParams(cluster)
1329
1330 # changes to the hypervisor list
1331 if self.op.enabled_hypervisors is not None:
1332 for hv in self.op.enabled_hypervisors:
1333 # if the hypervisor doesn't already exist in the cluster
1334 # hvparams, we initialize it to empty, and then (in both
1335 # cases) we make sure to fill the defaults, as we might not
1336 # have a complete defaults list if the hypervisor wasn't
1337 # enabled before
1338 if hv not in new_hvp:
1339 new_hvp[hv] = {}
1340 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
1341 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
1342
1343 if self.op.hvparams or self.op.enabled_hypervisors is not None:
1344 # either the enabled list has changed, or the parameters have, validate
1345 for hv_name, hv_params in self.new_hvparams.items():
1346 if ((self.op.hvparams and hv_name in self.op.hvparams) or
1347 (self.op.enabled_hypervisors and
1348 hv_name in self.op.enabled_hypervisors)):
1349 # either this is a new hypervisor, or its parameters have changed
1350 hv_class = hypervisor.GetHypervisorClass(hv_name)
1351 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1352 hv_class.CheckParameterSyntax(hv_params)
1353 CheckHVParams(self, node_uuids, hv_name, hv_params)
1354
1355 self._CheckDiskTemplateConsistency()
1356
1357 if self.op.os_hvp:
1358 # no need to check any newly-enabled hypervisors, since the
1359 # defaults have already been checked in the above code-block
1360 for os_name, os_hvp in self.new_os_hvp.items():
1361 for hv_name, hv_params in os_hvp.items():
1362 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1363 # we need to fill in the new os_hvp on top of the actual hv_p
1364 cluster_defaults = self.new_hvparams.get(hv_name, {})
1365 new_osp = objects.FillDict(cluster_defaults, hv_params)
1366 hv_class = hypervisor.GetHypervisorClass(hv_name)
1367 hv_class.CheckParameterSyntax(new_osp)
1368 CheckHVParams(self, node_uuids, hv_name, new_osp)
1369
1370 if self.op.default_iallocator:
1371 alloc_script = utils.FindFile(self.op.default_iallocator,
1372 constants.IALLOCATOR_SEARCH_PATH,
1373 os.path.isfile)
1374 if alloc_script is None:
1375 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
1376 " specified" % self.op.default_iallocator,
1377 errors.ECODE_INVAL)
1378
1379 if self.op.instance_communication_network:
1380 network_name = self.op.instance_communication_network
1381
1382 try:
1383 network_uuid = self.cfg.LookupNetwork(network_name)
1384 except errors.OpPrereqError:
1385 network_uuid = None
1386
1387 if network_uuid is not None:
1388 network = self.cfg.GetNetwork(network_uuid)
1389 self._CheckInstanceCommunicationNetwork(network, self.LogWarning)
1390
1391 if self.op.compression_tools:
1392 CheckCompressionTools(self.op.compression_tools)
1393
1394 def _BuildOSParams(self, cluster):
1395 "Calculate the new OS parameters for this operation."
1396
1397 def _GetNewParams(source, new_params):
1398 "Wrapper around GetUpdatedParams."
1399 if new_params is None:
1400 return source
1401 result = objects.FillDict(source, {}) # deep copy of source
1402 for os_name in new_params:
1403 result[os_name] = GetUpdatedParams(result.get(os_name, {}),
1404 new_params[os_name],
1405 use_none=True)
1406 if not result[os_name]:
1407 del result[os_name] # we removed all parameters
1408 return result
1409
1410 self.new_osp = _GetNewParams(cluster.osparams,
1411 self.op.osparams)
1412 self.new_osp_private = _GetNewParams(cluster.osparams_private_cluster,
1413 self.op.osparams_private_cluster)
1414
1415 # Remove os validity check
1416 changed_oses = (set(self.new_osp.keys()) | set(self.new_osp_private.keys()))
1417 for os_name in changed_oses:
1418 os_params = cluster.SimpleFillOS(
1419 os_name,
1420 self.new_osp.get(os_name, {}),
1421 os_params_private=self.new_osp_private.get(os_name, {})
1422 )
1423 # check the parameter validity (remote check)
1424 CheckOSParams(self, False, [self.cfg.GetMasterNode()],
1425 os_name, os_params, False)
1426
1427 def _CheckDiskTemplateConsistency(self):
1428 """Check whether the disk templates that are going to be disabled
1429 are still in use by some instances.
1430
1431 """
1432 if self.op.enabled_disk_templates:
1433 cluster = self.cfg.GetClusterInfo()
1434 instances = self.cfg.GetAllInstancesInfo()
1435
1436 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
1437 - set(self.op.enabled_disk_templates)
1438 for instance in instances.itervalues():
1439 if instance.disk_template in disk_templates_to_remove:
1440 raise errors.OpPrereqError("Cannot disable disk template '%s',"
1441 " because instance '%s' is using it." %
1442 (instance.disk_template, instance.name))
1443
1444 def _SetVgName(self, feedback_fn):
1445 """Determines and sets the new volume group name.
1446
1447 """
1448 if self.op.vg_name is not None:
1449 new_volume = self.op.vg_name
1450 if not new_volume:
1451 new_volume = None
1452 if new_volume != self.cfg.GetVGName():
1453 self.cfg.SetVGName(new_volume)
1454 else:
1455 feedback_fn("Cluster LVM configuration already in desired"
1456 " state, not changing")
1457
1458 def _SetFileStorageDir(self, feedback_fn):
1459 """Set the file storage directory.
1460
1461 """
1462 if self.op.file_storage_dir is not None:
1463 if self.cluster.file_storage_dir == self.op.file_storage_dir:
1464 feedback_fn("Global file storage dir already set to value '%s'"
1465 % self.cluster.file_storage_dir)
1466 else:
1467 self.cluster.file_storage_dir = self.op.file_storage_dir
1468
1469 def _SetSharedFileStorageDir(self, feedback_fn):
1470 """Set the shared file storage directory.
1471
1472 """
1473 if self.op.shared_file_storage_dir is not None:
1474 if self.cluster.shared_file_storage_dir == \
1475 self.op.shared_file_storage_dir:
1476 feedback_fn("Global shared file storage dir already set to value '%s'"
1477 % self.cluster.shared_file_storage_dir)
1478 else:
1479 self.cluster.shared_file_storage_dir = self.op.shared_file_storage_dir
1480
1481 def _SetDrbdHelper(self, feedback_fn):
1482 """Set the DRBD usermode helper.
1483
1484 """
1485 if self.op.drbd_helper is not None:
1486 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates:
1487 feedback_fn("Note that you specified a drbd user helper, but did not"
1488 " enable the drbd disk template.")
1489 new_helper = self.op.drbd_helper
1490 if not new_helper:
1491 new_helper = None
1492 if new_helper != self.cfg.GetDRBDHelper():
1493 self.cfg.SetDRBDHelper(new_helper)
1494 else:
1495 feedback_fn("Cluster DRBD helper already in desired state,"
1496 " not changing")
1497
1498 @staticmethod
1499 def _EnsureInstanceCommunicationNetwork(cfg, network_name):
1500 """Ensure that the instance communication network exists and is
1501 connected to all groups.
1502
1503 The instance communication network given by L{network_name} it is
1504 created, if necessary, via the opcode 'OpNetworkAdd'. Also, the
1505 instance communication network is connected to all existing node
1506 groups, if necessary, via the opcode 'OpNetworkConnect'.
1507
1508 @type cfg: L{config.ConfigWriter}
1509 @param cfg: cluster configuration
1510
1511 @type network_name: string
1512 @param network_name: instance communication network name
1513
1514 @rtype: L{ganeti.cmdlib.ResultWithJobs} or L{None}
1515 @return: L{ganeti.cmdlib.ResultWithJobs} if the instance
1516 communication needs to be created or it needs to be
1517 connected to a group, otherwise L{None}
1518
1519 """
1520 jobs = []
1521
1522 try:
1523 network_uuid = cfg.LookupNetwork(network_name)
1524 network_exists = True
1525 except errors.OpPrereqError:
1526 network_exists = False
1527
1528 if not network_exists:
1529 jobs.append(AddInstanceCommunicationNetworkOp(network_name))
1530
1531 for group_uuid in cfg.GetNodeGroupList():
1532 group = cfg.GetNodeGroup(group_uuid)
1533
1534 if network_exists:
1535 network_connected = network_uuid in group.networks
1536 else:
1537 # The network was created asynchronously by the previous
1538 # opcode and, therefore, we don't have access to its
1539 # network_uuid. As a result, we assume that the network is
1540 # not connected to any group yet.
1541 network_connected = False
1542
1543 if not network_connected:
1544 op = ConnectInstanceCommunicationNetworkOp(group_uuid, network_name)
1545 jobs.append(op)
1546
1547 if jobs:
1548 return ResultWithJobs([jobs])
1549 else:
1550 return None
1551
1552 @staticmethod
1553 def _ModifyInstanceCommunicationNetwork(cfg, network_name, feedback_fn):
1554 """Update the instance communication network stored in the cluster
1555 configuration.
1556
1557 Compares the user-supplied instance communication network against
1558 the one stored in the Ganeti cluster configuration. If there is a
1559 change, the instance communication network may be possibly created
1560 and connected to all groups (see
1561 L{LUClusterSetParams._EnsureInstanceCommunicationNetwork}).
1562
1563 @type cfg: L{config.ConfigWriter}
1564 @param cfg: cluster configuration
1565
1566 @type network_name: string
1567 @param network_name: instance communication network name
1568
1569 @type feedback_fn: function
1570 @param feedback_fn: see L{ganeti.cmdlist.base.LogicalUnit}
1571
1572 @rtype: L{LUClusterSetParams._EnsureInstanceCommunicationNetwork} or L{None}
1573 @return: see L{LUClusterSetParams._EnsureInstanceCommunicationNetwork}
1574
1575 """
1576 config_network_name = cfg.GetInstanceCommunicationNetwork()
1577
1578 if network_name == config_network_name:
1579 feedback_fn("Instance communication network already is '%s', nothing to"
1580 " do." % network_name)
1581 else:
1582 try:
1583 cfg.LookupNetwork(config_network_name)
1584 feedback_fn("Previous instance communication network '%s'"
1585 " should be removed manually." % config_network_name)
1586 except errors.OpPrereqError:
1587 pass
1588
1589 if network_name:
1590 feedback_fn("Changing instance communication network to '%s', only new"
1591 " instances will be affected."
1592 % network_name)
1593 else:
1594 feedback_fn("Disabling instance communication network, only new"
1595 " instances will be affected.")
1596
1597 cfg.SetInstanceCommunicationNetwork(network_name)
1598
1599 if network_name:
1600 return LUClusterSetParams._EnsureInstanceCommunicationNetwork(
1601 cfg,
1602 network_name)
1603 else:
1604 return None
1605
1606 def Exec(self, feedback_fn):
1607 """Change the parameters of the cluster.
1608
1609 """
1610 # re-read the fresh configuration
1611 self.cluster = self.cfg.GetClusterInfo()
1612 if self.op.enabled_disk_templates:
1613 self.cluster.enabled_disk_templates = \
1614 list(self.op.enabled_disk_templates)
1615 # save the changes
1616 self.cfg.Update(self.cluster, feedback_fn)
1617
1618 self._SetVgName(feedback_fn)
1619
1620 self.cluster = self.cfg.GetClusterInfo()
1621 self._SetFileStorageDir(feedback_fn)
1622 self.cfg.Update(self.cluster, feedback_fn)
1623 self._SetDrbdHelper(feedback_fn)
1624
1625 # re-read the fresh configuration again
1626 self.cluster = self.cfg.GetClusterInfo()
1627
1628 ensure_kvmd = False
1629
1630 active = constants.DATA_COLLECTOR_STATE_ACTIVE
1631 if self.op.enabled_data_collectors is not None:
1632 for name, val in self.op.enabled_data_collectors.items():
1633 self.cluster.data_collectors[name][active] = val
1634
1635 if self.op.data_collector_interval:
1636 internal = constants.DATA_COLLECTOR_PARAMETER_INTERVAL
1637 for name, val in self.op.data_collector_interval.items():
1638 self.cluster.data_collectors[name][internal] = int(val)
1639
1640 if self.op.hvparams:
1641 self.cluster.hvparams = self.new_hvparams
1642 if self.op.os_hvp:
1643 self.cluster.os_hvp = self.new_os_hvp
1644 if self.op.enabled_hypervisors is not None:
1645 self.cluster.hvparams = self.new_hvparams
1646 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1647 ensure_kvmd = True
1648 if self.op.beparams:
1649 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1650 if self.op.nicparams:
1651 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1652 if self.op.ipolicy:
1653 self.cluster.ipolicy = self.new_ipolicy
1654 if self.op.osparams:
1655 self.cluster.osparams = self.new_osp
1656 if self.op.osparams_private_cluster:
1657 self.cluster.osparams_private_cluster = self.new_osp_private
1658 if self.op.ndparams:
1659 self.cluster.ndparams = self.new_ndparams
1660 if self.op.diskparams:
1661 self.cluster.diskparams = self.new_diskparams
1662 if self.op.hv_state:
1663 self.cluster.hv_state_static = self.new_hv_state
1664 if self.op.disk_state:
1665 self.cluster.disk_state_static = self.new_disk_state
1666
1667 if self.op.candidate_pool_size is not None:
1668 self.cluster.candidate_pool_size = self.op.candidate_pool_size
1669 # we need to update the pool size here, otherwise the save will fail
1670 AdjustCandidatePool(self, [])
1671
1672 if self.op.max_running_jobs is not None:
1673 self.cluster.max_running_jobs = self.op.max_running_jobs
1674
1675 if self.op.max_tracked_jobs is not None:
1676 self.cluster.max_tracked_jobs = self.op.max_tracked_jobs
1677
1678 if self.op.maintain_node_health is not None:
1679 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
1680 feedback_fn("Note: CONFD was disabled at build time, node health"
1681 " maintenance is not useful (still enabling it)")
1682 self.cluster.maintain_node_health = self.op.maintain_node_health
1683
1684 if self.op.modify_etc_hosts is not None:
1685 self.cluster.modify_etc_hosts = self.op.modify_etc_hosts
1686
1687 if self.op.prealloc_wipe_disks is not None:
1688 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
1689
1690 if self.op.add_uids is not None:
1691 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
1692
1693 if self.op.remove_uids is not None:
1694 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
1695
1696 if self.op.uid_pool is not None:
1697 self.cluster.uid_pool = self.op.uid_pool
1698
1699 if self.op.default_iallocator is not None:
1700 self.cluster.default_iallocator = self.op.default_iallocator
1701
1702 if self.op.default_iallocator_params is not None:
1703 self.cluster.default_iallocator_params = self.op.default_iallocator_params
1704
1705 if self.op.reserved_lvs is not None:
1706 self.cluster.reserved_lvs = self.op.reserved_lvs
1707
1708 if self.op.use_external_mip_script is not None:
1709 self.cluster.use_external_mip_script = self.op.use_external_mip_script
1710
1711 if self.op.enabled_user_shutdown is not None and \
1712 self.cluster.enabled_user_shutdown != self.op.enabled_user_shutdown:
1713 self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown
1714 ensure_kvmd = True
1715
1716 def helper_os(aname, mods, desc):
1717 desc += " OS list"
1718 lst = getattr(self.cluster, aname)
1719 for key, val in mods:
1720 if key == constants.DDM_ADD:
1721 if val in lst:
1722 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
1723 else:
1724 lst.append(val)
1725 elif key == constants.DDM_REMOVE:
1726 if val in lst:
1727 lst.remove(val)
1728 else:
1729 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
1730 else:
1731 raise errors.ProgrammerError("Invalid modification '%s'" % key)
1732
1733 if self.op.hidden_os:
1734 helper_os("hidden_os", self.op.hidden_os, "hidden")
1735
1736 if self.op.blacklisted_os:
1737 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
1738
1739 if self.op.mac_prefix:
1740 self.cluster.mac_prefix = self.op.mac_prefix
1741
1742 if self.op.master_netdev:
1743 master_params = self.cfg.GetMasterNetworkParameters()
1744 ems = self.cfg.GetUseExternalMipScript()
1745 feedback_fn("Shutting down master ip on the current netdev (%s)" %
1746 self.cluster.master_netdev)
1747 result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
1748 master_params, ems)
1749 if not self.op.force:
1750 result.Raise("Could not disable the master ip")
1751 else:
1752 if result.fail_msg:
1753 msg = ("Could not disable the master ip (continuing anyway): %s" %
1754 result.fail_msg)
1755 feedback_fn(msg)
1756 feedback_fn("Changing master_netdev from %s to %s" %
1757 (master_params.netdev, self.op.master_netdev))
1758 self.cluster.master_netdev = self.op.master_netdev
1759
1760 if self.op.master_netmask:
1761 master_params = self.cfg.GetMasterNetworkParameters()
1762 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
1763 result = self.rpc.call_node_change_master_netmask(
1764 master_params.uuid, master_params.netmask,
1765 self.op.master_netmask, master_params.ip,
1766 master_params.netdev)
1767 result.Warn("Could not change the master IP netmask", feedback_fn)
1768 self.cluster.master_netmask = self.op.master_netmask
1769
1770 if self.op.install_image:
1771 self.cluster.install_image = self.op.install_image
1772
1773 if self.op.zeroing_image is not None:
1774 CheckImageValidity(self.op.zeroing_image,
1775 "Zeroing image must be an absolute path or a URL")
1776 self.cluster.zeroing_image = self.op.zeroing_image
1777
1778 self.cfg.Update(self.cluster, feedback_fn)
1779
1780 if self.op.master_netdev:
1781 master_params = self.cfg.GetMasterNetworkParameters()
1782 feedback_fn("Starting the master ip on the new master netdev (%s)" %
1783 self.op.master_netdev)
1784 ems = self.cfg.GetUseExternalMipScript()
1785 result = self.rpc.call_node_activate_master_ip(master_params.uuid,
1786 master_params, ems)
1787 result.Warn("Could not re-enable the master ip on the master,"
1788 " please restart manually", self.LogWarning)
1789
1790 # Even though 'self.op.enabled_user_shutdown' is being tested
1791 # above, the RPCs can only be done after 'self.cfg.Update' because
1792 # this will update the cluster object and sync 'Ssconf', and kvmd
1793 # uses 'Ssconf'.
1794 if ensure_kvmd:
1795 EnsureKvmdOnNodes(self, feedback_fn)
1796
1797 if self.op.compression_tools is not None:
1798 self.cfg.SetCompressionTools(self.op.compression_tools)
1799
1800 network_name = self.op.instance_communication_network
1801 if network_name is not None:
1802 return self._ModifyInstanceCommunicationNetwork(self.cfg,
1803 network_name, feedback_fn)
1804 else:
1805 return None
1806
1807
1808 class LUClusterVerify(NoHooksLU):
1809 """Submits all jobs necessary to verify the cluster.
1810
1811 """
1812 REQ_BGL = False
1813
1814 def ExpandNames(self):
1815 self.needed_locks = {}
1816
1817 def Exec(self, feedback_fn):
1818 jobs = []
1819
1820 if self.op.group_name:
1821 groups = [self.op.group_name]
1822 depends_fn = lambda: None
1823 else:
1824 groups = self.cfg.GetNodeGroupList()
1825
1826 # Verify global configuration
1827 jobs.append([
1828 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
1829 ])
1830
1831 # Always depend on global verification
1832 depends_fn = lambda: [(-len(jobs), [])]
1833
1834 jobs.extend(
1835 [opcodes.OpClusterVerifyGroup(group_name=group,
1836 ignore_errors=self.op.ignore_errors,
1837 depends=depends_fn())]
1838 for group in groups)
1839
1840 # Fix up all parameters
1841 for op in itertools.chain(*jobs): # pylint: disable=W0142
1842 op.debug_simulate_errors = self.op.debug_simulate_errors
1843 op.verbose = self.op.verbose
1844 op.error_codes = self.op.error_codes
1845 try:
1846 op.skip_checks = self.op.skip_checks
1847 except AttributeError:
1848 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1849
1850 return ResultWithJobs(jobs)
1851
1852
1853 class _VerifyErrors(object):
1854 """Mix-in for cluster/group verify LUs.
1855
1856 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1857 self.op and self._feedback_fn to be available.)
1858
1859 """
1860
1861 ETYPE_FIELD = "code"
1862 ETYPE_ERROR = constants.CV_ERROR
1863 ETYPE_WARNING = constants.CV_WARNING
1864
1865 def _Error(self, ecode, item, msg, *args, **kwargs):
1866 """Format an error message.
1867
1868 Based on the opcode's error_codes parameter, either format a
1869 parseable error code, or a simpler error string.
1870
1871 This must be called only from Exec and functions called from Exec.
1872
1873 """
1874 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1875 itype, etxt, _ = ecode
1876 # If the error code is in the list of ignored errors, demote the error to a
1877 # warning
1878 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1879 ltype = self.ETYPE_WARNING
1880 # first complete the msg
1881 if args:
1882 msg = msg % args
1883 # then format the whole message
1884 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1885 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1886 else:
1887 if item:
1888 item = " " + item
1889 else:
1890 item = ""
1891 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1892 # and finally report it via the feedback_fn
1893 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1894 # do not mark the operation as failed for WARN cases only
1895 if ltype == self.ETYPE_ERROR:
1896 self.bad = True
1897
1898 def _ErrorIf(self, cond, *args, **kwargs):
1899 """Log an error message if the passed condition is True.
1900
1901 """
1902 if (bool(cond)
1903 or self.op.debug_simulate_errors): # pylint: disable=E1101
1904 self._Error(*args, **kwargs)
1905
1906
1907 def _GetAllHypervisorParameters(cluster, instances):
1908 """Compute the set of all hypervisor parameters.
1909
1910 @type cluster: L{objects.Cluster}
1911 @param cluster: the cluster object
1912 @param instances: list of L{objects.Instance}
1913 @param instances: additional instances from which to obtain parameters
1914 @rtype: list of (origin, hypervisor, parameters)
1915 @return: a list with all parameters found, indicating the hypervisor they
1916 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1917
1918 """
1919 hvp_data = []
1920
1921 for hv_name in cluster.enabled_hypervisors:
1922 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1923
1924 for os_name, os_hvp in cluster.os_hvp.items():
1925 for hv_name, hv_params in os_hvp.items():
1926 if hv_params:
1927 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1928 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1929
1930 # TODO: collapse identical parameter values in a single one
1931 for instance in instances:
1932 if instance.hvparams:
1933 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1934 cluster.FillHV(instance)))
1935
1936 return hvp_data
1937
1938
1939 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1940 """Verifies the cluster config.
1941
1942 """
1943 REQ_BGL = False
1944
1945 def _VerifyHVP(self, hvp_data):
1946 """Verifies locally the syntax of the hypervisor parameters.
1947
1948 """
1949 for item, hv_name, hv_params in hvp_data:
1950 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1951 (item, hv_name))
1952 try:
1953 hv_class = hypervisor.GetHypervisorClass(hv_name)
1954 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1955 hv_class.CheckParameterSyntax(hv_params)
1956 except errors.GenericError, err:
1957 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958
1959 def ExpandNames(self):
1960 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1961 self.share_locks = ShareAll()
1962
1963 def CheckPrereq(self):
1964 """Check prerequisites.
1965
1966 """
1967 # Retrieve all information
1968 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1969 self.all_node_info = self.cfg.GetAllNodesInfo()
1970 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971
1972 def Exec(self, feedback_fn):
1973 """Verify integrity of cluster, performing various test on nodes.
1974
1975 """
1976 self.bad = False
1977 self._feedback_fn = feedback_fn
1978
1979 feedback_fn("* Verifying cluster config")
1980
1981 for msg in self.cfg.VerifyConfig():
1982 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983
1984 feedback_fn("* Verifying cluster certificate files")
1985
1986 for cert_filename in pathutils.ALL_CERT_FILES:
1987 (errcode, msg) = utils.VerifyCertificate(cert_filename)
1988 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989
1990 self._ErrorIf(not utils.CanRead(constants.LUXID_USER,
1991 pathutils.NODED_CERT_FILE),
1992 constants.CV_ECLUSTERCERT,
1993 None,
1994 pathutils.NODED_CERT_FILE + " must be accessible by the " +
1995 constants.LUXID_USER + " user")
1996
1997 feedback_fn("* Verifying hypervisor parameters")
1998
1999 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2000 self.all_inst_info.values()))
2001
2002 feedback_fn("* Verifying all nodes belong to an existing group")
2003
2004 # We do this verification here because, should this bogus circumstance
2005 # occur, it would never be caught by VerifyGroup, which only acts on
2006 # nodes/instances reachable from existing node groups.
2007
2008 dangling_nodes = set(node for node in self.all_node_info.values()
2009 if node.group not in self.all_group_info)
2010
2011 dangling_instances = {}
2012 no_node_instances = []
2013
2014 for inst in self.all_inst_info.values():
2015 if inst.primary_node in [node.uuid for node in dangling_nodes]:
2016 dangling_instances.setdefault(inst.primary_node, []).append(inst)
2017 elif inst.primary_node not in self.all_node_info:
2018 no_node_instances.append(inst)
2019
2020 pretty_dangling = [
2021 "%s (%s)" %
2022 (node.name,
2023 utils.CommaJoin(inst.name for
2024 inst in dangling_instances.get(node.uuid, [])))
2025 for node in dangling_nodes]
2026
2027 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2028 None,
2029 "the following nodes (and their instances) belong to a non"
2030 " existing group: %s", utils.CommaJoin(pretty_dangling))
2031
2032 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2033 None,
2034 "the following instances have a non-existing primary-node:"
2035 " %s", utils.CommaJoin(inst.name for
2036 inst in no_node_instances))
2037
2038 return not self.bad
2039
2040
2041 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2042 """Verifies the status of a node group.
2043
2044 """
2045 HPATH = "cluster-verify"
2046 HTYPE = constants.HTYPE_CLUSTER
2047 REQ_BGL = False
2048
2049 _HOOKS_INDENT_RE = re.compile("^", re.M)
2050
2051 class NodeImage(object):
2052 """A class representing the logical and physical status of a node.
2053
2054 @type uuid: string
2055 @ivar uuid: the node UUID to which this object refers
2056 @ivar volumes: a structure as returned from
2057 L{ganeti.backend.GetVolumeList} (runtime)
2058 @ivar instances: a list of running instances (runtime)
2059 @ivar pinst: list of configured primary instances (config)
2060 @ivar sinst: list of configured secondary instances (config)
2061 @ivar sbp: dictionary of {primary-node: list of instances} for all
2062 instances for which this node is secondary (config)
2063 @ivar mfree: free memory, as reported by hypervisor (runtime)
2064 @ivar dfree: free disk, as reported by the node (runtime)
2065 @ivar offline: the offline status (config)
2066 @type rpc_fail: boolean
2067 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2068 not whether the individual keys were correct) (runtime)
2069 @type lvm_fail: boolean
2070 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2071 @type hyp_fail: boolean
2072 @ivar hyp_fail: whether the RPC call didn't return the instance list
2073 @type ghost: boolean
2074 @ivar ghost: whether this is a known node or not (config)
2075 @type os_fail: boolean
2076 @ivar os_fail: whether the RPC call didn't return valid OS data
2077 @type oslist: list
2078 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2079 @type vm_capable: boolean
2080 @ivar vm_capable: whether the node can host instances
2081 @type pv_min: float
2082 @ivar pv_min: size in MiB of the smallest PVs
2083 @type pv_max: float
2084 @ivar pv_max: size in MiB of the biggest PVs
2085
2086 """
2087 def __init__(self, offline=False, uuid=None, vm_capable=True):
2088 self.uuid = uuid
2089 self.volumes = {}
2090 self.instances = []
2091 self.pinst = []
2092 self.sinst = []
2093 self.sbp = {}
2094 self.mfree = 0
2095 self.dfree = 0
2096 self.offline = offline
2097 self.vm_capable = vm_capable
2098 self.rpc_fail = False
2099 self.lvm_fail = False
2100 self.hyp_fail = False
2101 self.ghost = False
2102 self.os_fail = False
2103 self.oslist = {}
2104 self.pv_min = None
2105 self.pv_max = None
2106
2107 def ExpandNames(self):
2108 # This raises errors.OpPrereqError on its own:
2109 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2110
2111 # Get instances in node group; this is unsafe and needs verification later
2112 inst_uuids = \
2113 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2114
2115 self.needed_locks = {
2116 locking.LEVEL_INSTANCE: self.cfg.GetInstanceNames(inst_uuids),
2117 locking.LEVEL_NODEGROUP: [self.group_uuid],
2118 locking.LEVEL_NODE: [],
2119
2120 # This opcode is run by watcher every five minutes and acquires all nodes
2121 # for a group. It doesn't run for a long time, so it's better to acquire
2122 # the node allocation lock as well.
2123 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2124 }
2125
2126 self.share_locks = ShareAll()
2127
2128 def DeclareLocks(self, level):
2129 if level == locking.LEVEL_NODE:
2130 # Get members of node group; this is unsafe and needs verification later
2131 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2132
2133 # In Exec(), we warn about mirrored instances that have primary and
2134 # secondary living in separate node groups. To fully verify that
2135 # volumes for these instances are healthy, we will need to do an
2136 # extra call to their secondaries. We ensure here those nodes will
2137 # be locked.
2138 for inst_name in self.owned_locks(locking.LEVEL_INSTANCE):
2139 # Important: access only the instances whose lock is owned
2140 instance = self.cfg.GetInstanceInfoByName(inst_name)
2141 if instance.disk_template in constants.DTS_INT_MIRROR:
2142 nodes.update(self.cfg.GetInstanceSecondaryNodes(instance.uuid))
2143
2144 self.needed_locks[locking.LEVEL_NODE] = nodes
2145
2146 def CheckPrereq(self):
2147 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2148 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2149
2150 group_node_uuids = set(self.group_info.members)
2151 group_inst_uuids = \
2152 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2153
2154 unlocked_node_uuids = \
2155 group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
2156
2157 unlocked_inst_uuids = \
2158 group_inst_uuids.difference(
2159 [self.cfg.GetInstanceInfoByName(name).uuid
2160 for name in self.owned_locks(locking.LEVEL_INSTANCE)])
2161
2162 if unlocked_node_uuids:
2163 raise errors.OpPrereqError(
2164 "Missing lock for nodes: %s" %
2165 utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)),
2166 errors.ECODE_STATE)
2167
2168 if unlocked_inst_uuids:
2169 raise errors.OpPrereqError(
2170 "Missing lock for instances: %s" %
2171 utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)),
2172 errors.ECODE_STATE)
2173
2174 self.all_node_info = self.cfg.GetAllNodesInfo()
2175 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2176
2177 self.my_node_uuids = group_node_uuids
2178 self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid])
2179 for node_uuid in group_node_uuids)
2180
2181 self.my_inst_uuids = group_inst_uuids
2182 self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
2183 for inst_uuid in group_inst_uuids)
2184
2185 # We detect here the nodes that will need the extra RPC calls for verifying
2186 # split LV volumes; they should be locked.
2187 extra_lv_nodes = set()
2188
2189 for inst in self.my_inst_info.values():
2190 if inst.disk_template in constants.DTS_INT_MIRROR:
2191 inst_nodes = self.cfg.GetInstanceNodes(inst.uuid)
2192 for nuuid in inst_nodes:
2193 if self.all_node_info[nuuid].group != self.group_uuid:
2194 extra_lv_nodes.add(nuuid)
2195
2196 unlocked_lv_nodes = \
2197 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2198
2199 if unlocked_lv_nodes:
2200 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2201 utils.CommaJoin(unlocked_lv_nodes),
2202 errors.ECODE_STATE)
2203 self.extra_lv_nodes = list(extra_lv_nodes)
2204
2205 def _VerifyNode(self, ninfo, nresult):
2206 """Perform some basic validation on data returned from a node.
2207
2208 - check the result data structure is well formed and has all the
2209 mandatory fields
2210 - check ganeti version
2211
2212 @type ninfo: L{objects.Node}
2213 @param ninfo: the node to check
2214 @param nresult: the results from the node
2215 @rtype: boolean
2216 @return: whether overall this call was successful (and we can expect
2217 reasonable values in the respose)
2218
2219 """
2220 # main result, nresult should be a non-empty dict
2221 test = not nresult or not isinstance(nresult, dict)
2222 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
2223 "unable to verify node: no data returned")
2224 if test:
2225 return False
2226
2227 # compares ganeti version
2228 local_version = constants.PROTOCOL_VERSION
2229 remote_version = nresult.get("version", None)
2230 test = not (remote_version and
2231 isinstance(remote_version, (list, tuple)) and
2232 len(remote_version) == 2)
2233 self._ErrorIf(test, constants.CV_ENODERPC, ninfo.name,
2234 "connection to node returned invalid data")
2235 if test:
2236 return False
2237
2238 test = local_version != remote_version[0]
2239 self._ErrorIf(test, constants.CV_ENODEVERSION, ninfo.name,
2240 "incompatible protocol versions: master %s,"
2241 " node %s", local_version, remote_version[0])
2242 if test:
2243 return False
2244
2245 # node seems compatible, we can actually try to look into its results
2246
2247 # full package version
2248 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2249 constants.CV_ENODEVERSION, ninfo.name,
2250 "software version mismatch: master %s, node %s",
2251 constants.RELEASE_VERSION, remote_version[1],
2252 code=self.ETYPE_WARNING)
2253
2254 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2255 if ninfo.vm_capable and isinstance(hyp_result, dict):
2256 for hv_name, hv_result in hyp_result.iteritems():
2257 test = hv_result is not None
2258 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2259 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2260
2261 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2262 if ninfo.vm_capable and isinstance(hvp_result, list):
2263 for item, hv_name, hv_result in hvp_result:
2264 self._ErrorIf(True, constants.CV_ENODEHV, ninfo.name,
2265 "hypervisor %s parameter verify failure (source %s): %s",
2266 hv_name, item, hv_result)
2267
2268 test = nresult.get(constants.NV_NODESETUP,
2269 ["Missing NODESETUP results"])
2270 self._ErrorIf(test, constants.CV_ENODESETUP, ninfo.name,
2271 "node setup error: %s", "; ".join(test))
2272
2273 return True
2274
2275 def _VerifyNodeTime(self, ninfo, nresult,
2276 nvinfo_starttime, nvinfo_endtime):
2277 """Check the node time.
2278
2279 @type ninfo: L{objects.Node}
2280 @param ninfo: the node to check
2281 @param nresult: the remote results for the node
2282 @param nvinfo_starttime: the start time of the RPC call
2283 @param nvinfo_endtime: the end time of the RPC call
2284
2285 """
2286 ntime = nresult.get(constants.NV_TIME, None)
2287 try:
2288 ntime_merged = utils.MergeTime(ntime)
2289 except (ValueError, TypeError):
2290 self._ErrorIf(True, constants.CV_ENODETIME, ninfo.name,
2291 "Node returned invalid time")
2292 return
2293
2294 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2295 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2296 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2297 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2298 else:
2299 ntime_diff = None
2300
2301 self._ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, ninfo.name,
2302 "Node time diverges by at least %s from master node time",
2303 ntime_diff)
2304
2305 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2306 """Check the node LVM results and update info for cross-node checks.
2307
2308 @type ninfo: L{objects.Node}
2309 @param ninfo: the node to check
2310 @param nresult: the remote results for the node
2311 @param vg_name: the configured VG name
2312 @type nimg: L{NodeImage}
2313 @param nimg: node image
2314
2315 """
2316 if vg_name is None:
2317 return
2318
2319 # checks vg existence and size > 20G
2320 vglist = nresult.get(constants.NV_VGLIST, None)
2321 test = not vglist
2322 self._ErrorIf(test, constants.CV_ENODELVM, ninfo.name,
2323 "unable to check volume groups")
2324 if not test:
2325 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2326 constants.MIN_VG_SIZE)
2327 self._ErrorIf(vgstatus, constants.CV_ENODELVM, ninfo.name, vgstatus)
2328
2329 # Check PVs
2330 (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
2331 for em in errmsgs:
2332 self._Error(constants.CV_ENODELVM, ninfo.name, em)
2333 if pvminmax is not None:
2334 (nimg.pv_min, nimg.pv_max) = pvminmax
2335
2336 def _VerifyGroupDRBDVersion(self, node_verify_infos):
2337 """Check cross-node DRBD version consistency.
2338
2339 @type node_verify_infos: dict
2340 @param node_verify_infos: infos about nodes as returned from the
2341 node_verify call.
2342
2343 """
2344 node_versions = {}
2345 for node_uuid, ndata in node_verify_infos.items():
2346 nresult = ndata.payload
2347 if nresult:
2348 version = nresult.get(constants.NV_DRBDVERSION, "Missing DRBD version")
2349 node_versions[node_uuid] = version
2350
2351 if len(set(node_versions.values())) > 1:
2352 for node_uuid, version in sorted(node_versions.items()):
2353 msg = "DRBD version mismatch: %s" % version
2354 self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
2355 code=self.ETYPE_WARNING)
2356
2357 def _VerifyGroupLVM(self, node_image, vg_name):
2358 """Check cross-node consistency in LVM.
2359
2360 @type node_image: dict
2361 @param node_image: info about nodes, mapping from node to names to
2362 L{NodeImage} objects
2363 @param vg_name: the configured VG name
2364
2365 """
2366 if vg_name is None:
2367 return
2368
2369 # Only exclusive storage needs this kind of checks
2370 if not self._exclusive_storage:
2371 return
2372
2373 # exclusive_storage wants all PVs to have the same size (approximately),
2374 # if the smallest and the biggest ones are okay, everything is fine.
2375 # pv_min is None iff pv_max is None
2376 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2377 if not vals:
2378 return
2379 (pvmin, minnode_uuid) = min((ni.pv_min, ni.uuid) for ni in vals)
2380 (pvmax, maxnode_uuid) = max((ni.pv_max, ni.uuid) for ni in vals)
2381 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2382 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2383 "PV sizes differ too much in the group; smallest (%s MB) is"
2384 " on %s, biggest (%s MB) is on %s",
2385 pvmin, self.cfg.GetNodeName(minnode_uuid),
2386 pvmax, self.cfg.GetNodeName(maxnode_uuid))
2387
2388 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2389 """Check the node bridges.
2390
2391 @type ninfo: L{objects.Node}
2392 @param ninfo: the node to check
2393 @param nresult: the remote results for the node
2394 @param bridges: the expected list of bridges
2395
2396 """
2397 if not bridges:
2398 return
2399
2400 missing = nresult.get(constants.NV_BRIDGES, None)
2401 test = not isinstance(missing, list)
2402 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2403 "did not return valid bridge information")
2404 if not test:
2405 self._ErrorIf(bool(missing), constants.CV_ENODENET, ninfo.name,
2406 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2407
2408 def _VerifyNodeUserScripts(self, ninfo, nresult):
2409 """Check the results of user scripts presence and executability on the node
2410
2411 @type ninfo: L{objects.Node}
2412 @param ninfo: the node to check
2413 @param nresult: the remote results for the node
2414
2415 """
2416 test = not constants.NV_USERSCRIPTS in nresult
2417 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, ninfo.name,
2418 "did not return user scripts information")
2419
2420 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2421 if not test:
2422 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, ninfo.name,
2423 "user scripts not present or not executable: %s" %
2424 utils.CommaJoin(sorted(broken_scripts)))
2425
2426 def _VerifyNodeNetwork(self, ninfo, nresult):
2427 """Check the node network connectivity results.
2428
2429 @type ninfo: L{objects.Node}
2430 @param ninfo: the node to check
2431 @param nresult: the remote results for the node
2432
2433 """
2434 test = constants.NV_NODELIST not in nresult
2435 self._ErrorIf(test, constants.CV_ENODESSH, ninfo.name,
2436 "node hasn't returned node ssh connectivity data")
2437 if not test:
2438 if nresult[constants.NV_NODELIST]:
2439 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2440 self._ErrorIf(True, constants.CV_ENODESSH, ninfo.name,
2441 "ssh communication with node '%s': %s", a_node, a_msg)
2442
2443 test = constants.NV_NODENETTEST not in nresult
2444 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2445 "node hasn't returned node tcp connectivity data")
2446 if not test:
2447 if nresult[constants.NV_NODENETTEST]:
2448 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2449 for anode in nlist:
2450 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name,
2451 "tcp communication with node '%s': %s",
2452 anode, nresult[constants.NV_NODENETTEST][anode])
2453
2454 test = constants.NV_MASTERIP not in nresult
2455 self._ErrorIf(test, constants.CV_ENODENET, ninfo.name,
2456 "node hasn't returned node master IP reachability data")
2457 if not test:
2458 if not nresult[constants.NV_MASTERIP]:
2459 if ninfo.uuid == self.master_node:
2460 msg = "the master node cannot reach the master IP (not configured?)"
2461 else:
2462 msg = "cannot reach the master IP"
2463 self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg)
2464
2465 def _VerifyInstance(self, instance, node_image, diskstatus):
2466 """Verify an instance.
2467
2468 This function checks to see if the required block devices are
2469 available on the instance's node, and that the nodes are in the correct
2470 state.
2471
2472 """
2473 pnode_uuid = instance.primary_node
2474 pnode_img = node_image[pnode_uuid]
2475 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2476
2477 node_vol_should = {}
2478 self.cfg.GetInstanceLVsByNode(instance.uuid, lvmap=node_vol_should)
2479
2480 cluster = self.cfg.GetClusterInfo()
2481 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2482 self.group_info)
2483 err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
2484 self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
2485 utils.CommaJoin(err), code=self.ETYPE_WARNING)
2486
2487 for node_uuid in node_vol_should:
2488 n_img = node_image[node_uuid]
2489 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2490 # ignore missing volumes on offline or broken nodes
2491 continue
2492 for volume in node_vol_should[node_uuid]:
2493 test = volume not in n_img.volumes
2494 self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
2495 "volume %s missing on node %s", volume,
2496 self.cfg.GetNodeName(node_uuid))
2497
2498 if instance.admin_state == constants.ADMINST_UP:
2499 test = instance.uuid not in pnode_img.instances and not pnode_img.offline
2500 self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
2501 "instance not running on its primary node %s",
2502 self.cfg.GetNodeName(pnode_uuid))
2503 self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE,
2504 instance.name, "instance is marked as running and lives on"
2505 " offline node %s", self.cfg.GetNodeName(pnode_uuid))
2506
2507 diskdata = [(nname, success, status, idx)
2508 for (nname, disks) in diskstatus.items()
2509 for idx, (success, status) in enumerate(disks)]
2510
2511 for nname, success, bdev_status, idx in diskdata:
2512 # the 'ghost node' construction in Exec() ensures that we have a
2513 # node here
2514 snode = node_image[nname]
2515 bad_snode = snode.ghost or snode.offline
2516 self._ErrorIf(instance.disks_active and
2517 not success and not bad_snode,
2518 constants.CV_EINSTANCEFAULTYDISK, instance.name,
2519 "couldn't retrieve status for disk/%s on %s: %s",
2520 idx, self.cfg.GetNodeName(nname), bdev_status)
2521
2522 if instance.disks_active and success and \
2523 (bdev_status.is_degraded or
2524 bdev_status.ldisk_status != constants.LDS_OKAY):
2525 msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname))
2526 if bdev_status.is_degraded:
2527 msg += " is degraded"
2528 if bdev_status.ldisk_status != constants.LDS_OKAY:
2529 msg += "; state is '%s'" % \
2530 constants.LDS_NAMES[bdev_status.ldisk_status]
2531
2532 self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
2533
2534 self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2535 constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid),
2536 "instance %s, connection to primary node failed",
2537 instance.name)
2538
2539 secondary_nodes = self.cfg.GetInstanceSecondaryNodes(instance.uuid)
2540 self._ErrorIf(len(secondary_nodes) > 1,
2541 constants.CV_EINSTANCELAYOUT, instance.name,
2542 "instance has multiple secondary nodes: %s",
2543 utils.CommaJoin(secondary_nodes),
2544 code=self.ETYPE_WARNING)
2545
2546 inst_nodes = self.cfg.GetInstanceNodes(instance.uuid)
2547 es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, inst_nodes)
2548 if any(es_flags.values()):
2549 if instance.disk_template not in constants.DTS_EXCL_STORAGE:
2550 # Disk template not compatible with exclusive_storage: no instance
2551 # node should have the flag set
2552 es_nodes = [n
2553 for (n, es) in es_flags.items()
2554 if es]
2555 self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
2556 "instance has template %s, which is not supported on nodes"
2557 " that have exclusive storage set: %s",
2558 instance.disk_template,
2559 utils.CommaJoin(self.cfg.GetNodeNames(es_nodes)))
2560 for (idx, disk) in enumerate(self.cfg.GetInstanceDisks(instance.uuid)):
2561 self._ErrorIf(disk.spindles is None,
2562 constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
2563 "number of spindles not configured for disk %s while"
2564 " exclusive storage is enabled, try running"
2565 " gnt-cluster repair-disk-sizes", idx)
2566
2567 if instance.disk_template in constants.DTS_INT_MIRROR:
2568 instance_nodes = utils.NiceSort(inst_nodes)
2569 instance_groups = {}
2570
2571 for node_uuid in instance_nodes:
2572 instance_groups.setdefault(self.all_node_info[node_uuid].group,
2573 []).append(node_uuid)
2574
2575 pretty_list = [
2576 "%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)),
2577 groupinfo[group].name)
2578 # Sort so that we always list the primary node first.
2579 for group, nodes in sorted(instance_groups.items(),
2580 key=lambda (_, nodes): pnode_uuid in nodes,
2581 reverse=True)]
2582
2583 self._ErrorIf(len(instance_groups) > 1,
2584 constants.CV_EINSTANCESPLITGROUPS,
2585 instance.name, "instance has primary and secondary nodes in"
2586 " different groups: %s", utils.CommaJoin(pretty_list),
2587 code=self.ETYPE_WARNING)
2588
2589 inst_nodes_offline = []
2590 for snode in secondary_nodes:
2591 s_img = node_image[snode]
2592 self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2593 self.cfg.GetNodeName(snode),
2594 "instance %s, connection to secondary node failed",
2595 instance.name)
2596
2597 if s_img.offline:
2598 inst_nodes_offline.append(snode)
2599
2600 # warn that the instance lives on offline nodes
2601 self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE,
2602 instance.name, "instance has offline secondary node(s) %s",
2603 utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
2604 # ... or ghost/non-vm_capable nodes
2605 for node_uuid in inst_nodes:
2606 self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
2607 instance.name, "instance lives on ghost node %s",
2608 self.cfg.GetNodeName(node_uuid))
2609 self._ErrorIf(not node_image[node_uuid].vm_capable,
2610 constants.CV_EINSTANCEBADNODE, instance.name,
2611 "instance lives on non-vm_capable node %s",
2612 self.cfg.GetNodeName(node_uuid))
2613
2614 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2615 """Verify if there are any unknown volumes in the cluster.
2616
2617 The .os, .swap and backup volumes are ignored. All other volumes are
2618 reported as unknown.
2619
2620 @type reserved: L{ganeti.utils.FieldSet}
2621 @param reserved: a FieldSet of reserved volume names
2622
2623 """
2624 for node_uuid, n_img in node_image.items():
2625 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2626 self.all_node_info[node_uuid].group != self.group_uuid):
2627 # skip non-healthy nodes
2628 continue
2629 for volume in n_img.volumes:
2630 test = ((node_uuid not in node_vol_should or
2631 volume not in node_vol_should[node_uuid]) and
2632 not reserved.Matches(volume))
2633 self._ErrorIf(test, constants.CV_ENODEORPHANLV,
2634 self.cfg.GetNodeName(node_uuid),
2635 "volume %s is unknown", volume,
2636 code=_VerifyErrors.ETYPE_WARNING)
2637
2638 def _VerifyNPlusOneMemory(self, node_image, all_insts):
2639 """Verify N+1 Memory Resilience.
2640
2641 Check that if one single node dies we can still start all the
2642 instances it was primary for.
2643
2644 """
2645 cluster_info = self.cfg.GetClusterInfo()
2646 for node_uuid, n_img in node_image.items():
2647 # This code checks that every node which is now listed as
2648 # secondary has enough memory to host all instances it is
2649 # supposed to should a single other node in the cluster fail.
2650 # FIXME: not ready for failover to an arbitrary node
2651 # FIXME: does not support file-backed instances
2652 # WARNING: we currently take into account down instances as well
2653 # as up ones, considering that even if they're down someone
2654 # might want to start them even in the event of a node failure.
2655 if n_img.offline or \
2656 self.all_node_info[node_uuid].group != self.group_uuid:
2657 # we're skipping nodes marked offline and nodes in other groups from
2658 # the N+1 warning, since most likely we don't have good memory
2659 # information from them; we already list instances living on such
2660 # nodes, and that's enough warning
2661 continue
2662 #TODO(dynmem): also consider ballooning out other instances
2663 for prinode, inst_uuids in n_img.sbp.items():
2664 needed_mem = 0
2665 for inst_uuid in inst_uuids:
2666 bep = cluster_info.FillBE(all_insts[inst_uuid])
2667 if bep[constants.BE_AUTO_BALANCE]:
2668 needed_mem += bep[constants.BE_MINMEM]
2669 test = n_img.mfree < needed_mem
2670 self._ErrorIf(test, constants.CV_ENODEN1,
2671 self.cfg.GetNodeName(node_uuid),
2672 "not enough memory to accomodate instance failovers"
2673 " should node %s fail (%dMiB needed, %dMiB available)",
2674 self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
2675
2676 def _VerifyClientCertificates(self, nodes, all_nvinfo):
2677 """Verifies the consistency of the client certificates.
2678
2679 This includes several aspects:
2680 - the individual validation of all nodes' certificates
2681 - the consistency of the master candidate certificate map
2682 - the consistency of the master candidate certificate map with the
2683 certificates that the master candidates are actually using.
2684
2685 @param nodes: the list of nodes to consider in this verification
2686 @param all_nvinfo: the map of results of the verify_node call to
2687 all nodes
2688
2689 """
2690 candidate_certs = self.cfg.GetClusterInfo().candidate_certs
2691 if candidate_certs is None or len(candidate_certs) == 0:
2692 self._ErrorIf(
2693 True, constants.CV_ECLUSTERCLIENTCERT, None,
2694 "The cluster's list of master candidate certificates is empty."
2695 " If you just updated the cluster, please run"
2696 " 'gnt-cluster renew-crypto --new-node-certificates'.")
2697 return
2698
2699 self._ErrorIf(
2700 len(candidate_certs) != len(set(candidate_certs.values())),
2701 constants.CV_ECLUSTERCLIENTCERT, None,
2702 "There are at least two master candidates configured to use the same"
2703 " certificate.")
2704
2705 # collect the client certificate
2706 for node in nodes:
2707 if node.offline:
2708 continue
2709
2710 nresult = all_nvinfo[node.uuid]
2711 if nresult.fail_msg or not nresult.payload:
2712 continue
2713
2714 (errcode, msg) = nresult.payload.get(constants.NV_CLIENT_CERT, None)
2715
2716 self._ErrorIf(
2717 errcode is not None, constants.CV_ECLUSTERCLIENTCERT, None,
2718 "Client certificate of node '%s' failed validation: %s (code '%s')",
2719 node.uuid, msg, errcode)
2720
2721 if not errcode:
2722 digest = msg
2723 if node.master_candidate:
2724 if node.uuid in candidate_certs:
2725 self._ErrorIf(
2726 digest != candidate_certs[node.uuid],
2727 constants.CV_ECLUSTERCLIENTCERT, None,
2728 "Client certificate digest of master candidate '%s' does not"
2729 " match its entry in the cluster's map of master candidate"
2730 " certificates. Expected: %s Got: %s", node.uuid,
2731 digest, candidate_certs[node.uuid])
2732 else:
2733 self._ErrorIf(
2734 True, constants.CV_ECLUSTERCLIENTCERT, None,
2735 "The master candidate '%s' does not have an entry in the"
2736 " map of candidate certificates.", node.uuid)
2737 self._ErrorIf(
2738 digest in candidate_certs.values(),
2739 constants.CV_ECLUSTERCLIENTCERT, None,
2740 "Master candidate '%s' is using a certificate of another node.",
2741 node.uuid)
2742 else:
2743 self._ErrorIf(
2744 node.uuid in candidate_certs,
2745 constants.CV_ECLUSTERCLIENTCERT, None,
2746 "Node '%s' is not a master candidate, but still listed in the"
2747 " map of master candidate certificates.", node.uuid)
2748 self._ErrorIf(
2749 (node.uuid not in candidate_certs) and
2750 (digest in candidate_certs.values()),
2751 constants.CV_ECLUSTERCLIENTCERT, None,
2752 "Node '%s' is not a master candidate and is incorrectly using a"
2753 " certificate of another node which is master candidate.",
2754 node.uuid)
2755
2756 def _VerifySshSetup(self, nodes, all_nvinfo):
2757 """Evaluates the verification results of the SSH setup.
2758
2759 @param nodes: List of L{objects.Node} objects
2760 @param all_nvinfo: RPC results
2761
2762 """
2763 for node in nodes:
2764 if not node.offline:
2765 nresult = all_nvinfo[node.uuid]
2766 if nresult.fail_msg or not nresult.payload:
2767 self._ErrorIf(True, constants.CV_ENODESSH, node.name,
2768 "Could not verify the SSH setup of this node.")
2769 return
2770 result = nresult.payload.get(constants.NV_SSH_SETUP, None)
2771 error_msg = ""
2772 if isinstance(result, list):
2773 error_msg = " ".join(result)
2774 self._ErrorIf(result,
2775 constants.CV_ENODESSH, None, error_msg)
2776
2777 def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo,
2778 (files_all, files_opt, files_mc, files_vm)):
2779 """Verifies file checksums collected from all nodes.
2780
2781 @param nodes: List of L{objects.Node} objects
2782 @param master_node_uuid: UUID of master node
2783 @param all_nvinfo: RPC results
2784
2785 """
2786 # Define functions determining which nodes to consider for a file
2787 files2nodefn = [
2788 (files_all, None),
2789 (files_mc, lambda node: (node.master_candidate or
2790 node.uuid == master_node_uuid)),
2791 (files_vm, lambda node: node.vm_capable),
2792 ]
2793
2794 # Build mapping from filename to list of nodes which should have the file
2795 nodefiles = {}
2796 for (files, fn) in files2nodefn:
2797 if fn is None:
2798 filenodes = nodes
2799 else:
2800 filenodes = filter(fn, nodes)
2801 nodefiles.update((filename,
2802 frozenset(map(operator.attrgetter("uuid"), filenodes)))
2803 for filename in files)
2804
2805 assert set(nodefiles) == (files_all | files_mc | files_vm)
2806
2807 fileinfo = dict((filename, {}) for filename in nodefiles)
2808 ignore_nodes = set()
2809
2810 for node in nodes:
2811 if node.offline:
2812 ignore_nodes.add(node.uuid)
2813 continue
2814
2815 nresult = all_nvinfo[node.uuid]
2816
2817 if nresult.fail_msg or not nresult.payload:
2818 node_files = None
2819 else:
2820 fingerprints = nresult.payload.get(constants.NV_FILELIST, {})
2821 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2822 for (key, value) in fingerprints.items())
2823 del fingerprints
2824
2825 test = not (node_files and isinstance(node_files, dict))
2826 self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
2827 "Node did not return file checksum data")
2828 if test:
2829 ignore_nodes.add(node.uuid)
2830 continue
2831
2832 # Build per-checksum mapping from filename to nodes having it
2833 for (filename, checksum) in node_files.items():
2834 assert filename in nodefiles
2835 fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
2836
2837 for (filename, checksums) in fileinfo.items():
2838 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2839
2840 # Nodes having the file
2841 with_file = frozenset(node_uuid
2842 for node_uuids in fileinfo[filename].values()
2843 for node_uuid in node_uuids) - ignore_nodes
2844
2845 expected_nodes = nodefiles[filename] - ignore_nodes
2846
2847 # Nodes missing file
2848 missing_file = expected_nodes - with_file
2849
2850 if filename in files_opt:
2851 # All or no nodes
2852 self._ErrorIf(missing_file and missing_file != expected_nodes,
2853 constants.CV_ECLUSTERFILECHECK, None,
2854 "File %s is optional, but it must exist on all or no"
2855 " nodes (not found on %s)",
2856 filename,
2857 utils.CommaJoin(
2858 utils.NiceSort(
2859 map(self.cfg.GetNodeName, missing_file))))
2860 else:
2861 self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2862 "File %s is missing from node(s) %s", filename,
2863 utils.CommaJoin(
2864 utils.NiceSort(
2865 map(self.cfg.GetNodeName, missing_file))))
2866
2867 # Warn if a node has a file it shouldn't
2868 unexpected = with_file - expected_nodes
2869 self._ErrorIf(unexpected,
2870 constants.CV_ECLUSTERFILECHECK, None,
2871 "File %s should not exist on node(s) %s",
2872 filename, utils.CommaJoin(
2873 utils.NiceSort(map(self.cfg.GetNodeName, unexpected))))
2874
2875 # See if there are multiple versions of the file
2876 test = len(checksums) > 1
2877 if test:
2878 variants = ["variant %s on %s" %
2879 (idx + 1,
2880 utils.CommaJoin(utils.NiceSort(
2881 map(self.cfg.GetNodeName, node_uuids))))
2882 for (idx, (checksum, node_uuids)) in
2883 enumerate(sorted(checksums.items()))]
2884 else:
2885 variants = []
2886
2887 self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
2888 "File %s found with %s different checksums (%s)",
2889 filename, len(checksums), "; ".join(variants))
2890
2891 def _VerifyNodeDrbdHelper(self, ninfo, nresult, drbd_helper):
2892 """Verify the drbd helper.
2893
2894 """
2895 if drbd_helper:
2896 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2897 test = (helper_result is None)
2898 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2899 "no drbd usermode helper returned")
2900 if helper_result:
2901 status, payload = helper_result
2902 test = not status
2903 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2904 "drbd usermode helper check unsuccessful: %s", payload)
2905 test = status and (payload != drbd_helper)
2906 self._ErrorIf(test, constants.CV_ENODEDRBDHELPER, ninfo.name,
2907 "wrong drbd usermode helper: %s", payload)
2908
2909 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2910 drbd_map):
2911 """Verifies and the node DRBD status.
2912
2913 @type ninfo: L{objects.Node}
2914 @param ninfo: the node to check
2915 @param nresult: the remote results for the node
2916 @param instanceinfo: the dict of instances
2917 @param drbd_helper: the configured DRBD usermode helper
2918 @param drbd_map: the DRBD map as returned by
2919 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2920
2921 """
2922 self._VerifyNodeDrbdHelper(ninfo, nresult, drbd_helper)
2923
2924 # compute the DRBD minors
2925 node_drbd = {}
2926 for minor, inst_uuid in drbd_map[ninfo.uuid].items():
2927 test = inst_uuid not in instanceinfo
2928 self._ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2929 "ghost instance '%s' in temporary DRBD map", inst_uuid)
2930 # ghost instance should not be running, but otherwise we
2931 # don't give double warnings (both ghost instance and
2932 # unallocated minor in use)
2933 if test:
2934 node_drbd[minor] = (inst_uuid, False)
2935 else:
2936 instance = instanceinfo[inst_uuid]
2937 node_drbd[minor] = (inst_uuid, instance.disks_active)
2938
2939 # and now check them
2940 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2941 test = not isinstance(used_minors, (tuple, list))
2942 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2943 "cannot parse drbd status file: %s", str(used_minors))
2944 if test:
2945 # we cannot check drbd status
2946 return
2947
2948 for minor, (inst_uuid, must_exist) in node_drbd.items():
2949 test = minor not in used_minors and must_exist
2950 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2951 "drbd minor %d of instance %s is not active", minor,
2952 self.cfg.GetInstanceName(inst_uuid))
2953 for minor in used_minors:
2954 test = minor not in node_drbd
2955 self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name,
2956 "unallocated drbd minor %d is in use", minor)
2957
2958 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2959 """Builds the node OS structures.
2960
2961 @type ninfo: L{objects.Node}
2962 @param ninfo: the node to check
2963 @param nresult: the remote results for the node
2964 @param nimg: the node image object
2965
2966 """
2967 remote_os = nresult.get(constants.NV_OSLIST, None)
2968 test = (not isinstance(remote_os, list) or
2969 not compat.all(isinstance(v, list) and len(v) == 8
2970 for v in remote_os))
2971
2972 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
2973 "node hasn't returned valid OS data")
2974
2975 nimg.os_fail = test
2976
2977 if test:
2978 return
2979
2980 os_dict = {}
2981
2982 for (name, os_path, status, diagnose,
2983 variants, parameters, api_ver,
2984 trusted) in nresult[constants.NV_OSLIST]:
2985
2986 if name not in os_dict:
2987 os_dict[name] = []
2988
2989 # parameters is a list of lists instead of list of tuples due to
2990 # JSON lacking a real tuple type, fix it:
2991 parameters = [tuple(v) for v in parameters]
2992 os_dict[name].append((os_path, status, diagnose,
2993 set(variants), set(parameters), set(api_ver),
2994 trusted))
2995
2996 nimg.oslist = os_dict
2997
2998 def _VerifyNodeOS(self, ninfo, nimg, base):
2999 """Verifies the node OS list.
3000
3001 @type ninfo: L{objects.Node}
3002 @param ninfo: the node to check
3003 @param nimg: the node image object
3004 @param base: the 'template' node we match against (e.g. from the master)
3005
3006 """
3007 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3008
3009 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3010 for os_name, os_data in nimg.oslist.items():
3011 assert os_data, "Empty OS status for OS %s?!" % os_name
3012 f_path, f_status, f_diag, f_var, f_param, f_api, f_trusted = os_data[0]
3013 self._ErrorIf(not f_status, constants.CV_ENODEOS, ninfo.name,
3014 "Invalid OS %s (located at %s): %s",
3015 os_name, f_path, f_diag)
3016 self._ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, ninfo.name,
3017 "OS '%s' has multiple entries"
3018 " (first one shadows the rest): %s",
3019 os_name, utils.CommaJoin([v[0] for v in os_data]))
3020 # comparisons with the 'base' image
3021 test = os_name not in base.oslist
3022 self._ErrorIf(test, constants.CV_ENODEOS, ninfo.name,
3023 "Extra OS %s not present on reference node (%s)",
3024 os_name, self.cfg.GetNodeName(base.uuid))
3025 if test:
3026 continue
3027 assert base.oslist[os_name], "Base node has empty OS status?"
3028 _, b_status, _, b_var, b_param, b_api, b_trusted = base.oslist[os_name][0]
3029 if not b_status:
3030 # base OS is invalid, skipping
3031 continue
3032 for kind, a, b in [("API version", f_api, b_api),
3033 ("variants list", f_var, b_var),
3034 ("parameters", beautify_params(f_param),
3035 beautify_params(b_param))]:
3036 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
3037 "OS %s for %s differs from reference node %s:"
3038 " [%s] vs. [%s]", kind, os_name,
3039 self.cfg.GetNodeName(base.uuid),
3040 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3041 for kind, a, b in [("trusted", f_trusted, b_trusted)]:
3042 self._ErrorIf(a != b, constants.CV_ENODEOS, ninfo.name,
3043 "OS %s for %s differs from reference node %s:"
3044 " %s vs. %s", kind, os_name,
3045 self.cfg.GetNodeName(base.uuid), a, b)
3046
3047 # check any missing OSes
3048 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3049 self._ErrorIf(missing, constants.CV_ENODEOS, ninfo.name,
3050 "OSes present on reference node %s"
3051 " but missing on this node: %s",
3052 self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
3053
3054 def _VerifyAcceptedFileStoragePaths(self, ninfo, nresult, is_master):
3055 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3056
3057 @type ninfo: L{objects.Node}
3058 @param ninfo: the node to check
3059 @param nresult: the remote results for the node
3060 @type is_master: bool
3061 @param is_master: Whether node is the master node
3062
3063 """
3064 cluster = self.cfg.GetClusterInfo()
3065 if (is_master and
3066 (cluster.IsFileStorageEnabled() or
3067 cluster.IsSharedFileStorageEnabled())):
3068 try:
3069 fspaths = nresult[constants.NV_ACCEPTED_STORAGE_PATHS]
3070 except KeyError:
3071 # This should never happen
3072 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
3073 "Node did not return forbidden file storage paths")
3074 else:
3075 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
3076 "Found forbidden file storage paths: %s",
3077 utils.CommaJoin(fspaths))
3078 else:
3079 self._ErrorIf(constants.NV_ACCEPTED_STORAGE_PATHS in nresult,
3080 constants.CV_ENODEFILESTORAGEPATHS, ninfo.name,
3081 "Node should not have returned forbidden file storage"
3082 " paths")
3083
3084 def _VerifyStoragePaths(self, ninfo, nresult, file_disk_template,
3085 verify_key, error_key):
3086 """Verifies (file) storage paths.
3087
3088 @type ninfo: L{objects.Node}
3089 @param ninfo: the node to check
3090 @param nresult: the remote results for the node
3091 @type file_disk_template: string
3092 @param file_disk_template: file-based disk template, whose directory
3093 is supposed to be verified
3094 @type verify_key: string
3095 @param verify_key: key for the verification map of this file
3096 verification step
3097 @param error_key: error key to be added to the verification results
3098 in case something goes wrong in this verification step
3099
3100 """
3101 assert (file_disk_template in utils.storage.GetDiskTemplatesOfStorageTypes(
3102 constants.ST_FILE, constants.ST_SHARED_FILE, constants.ST_GLUSTER
3103 ))
3104
3105 cluster = self.cfg.GetClusterInfo()
3106 if cluster.IsDiskTemplateEnabled(file_disk_template):
3107 self._ErrorIf(
3108 verify_key in nresult,
3109 error_key, ninfo.name,
3110 "The configured %s storage path is unusabl