0b49deaab3097144563e3bedd9a7967e1e2701ce
[ganeti-github.git] / lib / backend.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Functions used by the node daemon
32
33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
34 the L{UploadFile} function
35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted
36 in the L{_CleanDirectory} function
37
38 """
39
40 # pylint: disable=E1103,C0302
41
42 # E1103: %s %r has no %r member (but some types could not be
43 # inferred), because the _TryOSFromDisk returns either (True, os_obj)
44 # or (False, "string") which confuses pylint
45
46 # C0302: This module has become too big and should be split up
47
48
49 import base64
50 import errno
51 import logging
52 import os
53 import os.path
54 import pycurl
55 import random
56 import re
57 import shutil
58 import signal
59 import stat
60 import tempfile
61 import time
62 import zlib
63 import contextlib
64 import collections
65
66 from ganeti import errors
67 from ganeti import http
68 from ganeti import utils
69 from ganeti import ssh
70 from ganeti import hypervisor
71 from ganeti.hypervisor import hv_base
72 from ganeti import constants
73 from ganeti.storage import bdev
74 from ganeti.storage import drbd
75 from ganeti.storage import extstorage
76 from ganeti.storage import filestorage
77 from ganeti import objects
78 from ganeti import ssconf
79 from ganeti import serializer
80 from ganeti import netutils
81 from ganeti import runtime
82 from ganeti import compat
83 from ganeti import pathutils
84 from ganeti import vcluster
85 from ganeti import ht
86 from ganeti.storage.base import BlockDev
87 from ganeti.storage.drbd import DRBD8
88 from ganeti import hooksmaster
89 import ganeti.metad as metad
90
91
92 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
93 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([
94 pathutils.DATA_DIR,
95 pathutils.JOB_QUEUE_ARCHIVE_DIR,
96 pathutils.QUEUE_DIR,
97 pathutils.CRYPTO_KEYS_DIR,
98 ])
99 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60
100 _X509_KEY_FILE = "key"
101 _X509_CERT_FILE = "cert"
102 _IES_STATUS_FILE = "status"
103 _IES_PID_FILE = "pid"
104 _IES_CA_FILE = "ca"
105
106 #: Valid LVS output line regex
107 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$")
108
109 # Actions for the master setup script
110 _MASTER_START = "start"
111 _MASTER_STOP = "stop"
112
113 #: Maximum file permissions for restricted command directory and executables
114 _RCMD_MAX_MODE = (stat.S_IRWXU |
115 stat.S_IRGRP | stat.S_IXGRP |
116 stat.S_IROTH | stat.S_IXOTH)
117
118 #: Delay before returning an error for restricted commands
119 _RCMD_INVALID_DELAY = 10
120
121 #: How long to wait to acquire lock for restricted commands (shorter than
122 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many
123 #: command requests arrive
124 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8
125
126
127 class RPCFail(Exception):
128 """Class denoting RPC failure.
129
130 Its argument is the error message.
131
132 """
133
134
135 def _GetInstReasonFilename(instance_name):
136 """Path of the file containing the reason of the instance status change.
137
138 @type instance_name: string
139 @param instance_name: The name of the instance
140 @rtype: string
141 @return: The path of the file
142
143 """
144 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)
145
146
147 def _StoreInstReasonTrail(instance_name, trail):
148 """Serialize a reason trail related to an instance change of state to file.
149
150 The exact location of the file depends on the name of the instance and on
151 the configuration of the Ganeti cluster defined at deploy time.
152
153 @type instance_name: string
154 @param instance_name: The name of the instance
155
156 @type trail: list of reasons
157 @param trail: reason trail
158
159 @rtype: None
160
161 """
162 json = serializer.DumpJson(trail)
163 filename = _GetInstReasonFilename(instance_name)
164 utils.WriteFile(filename, data=json)
165
166
167 def _Fail(msg, *args, **kwargs):
168 """Log an error and the raise an RPCFail exception.
169
170 This exception is then handled specially in the ganeti daemon and
171 turned into a 'failed' return type. As such, this function is a
172 useful shortcut for logging the error and returning it to the master
173 daemon.
174
175 @type msg: string
176 @param msg: the text of the exception
177 @raise RPCFail
178
179 """
180 if args:
181 msg = msg % args
182 if "log" not in kwargs or kwargs["log"]: # if we should log this error
183 if "exc" in kwargs and kwargs["exc"]:
184 logging.exception(msg)
185 else:
186 logging.error(msg)
187 raise RPCFail(msg)
188
189
190 def _GetConfig():
191 """Simple wrapper to return a SimpleStore.
192
193 @rtype: L{ssconf.SimpleStore}
194 @return: a SimpleStore instance
195
196 """
197 return ssconf.SimpleStore()
198
199
200 def _GetSshRunner(cluster_name):
201 """Simple wrapper to return an SshRunner.
202
203 @type cluster_name: str
204 @param cluster_name: the cluster name, which is needed
205 by the SshRunner constructor
206 @rtype: L{ssh.SshRunner}
207 @return: an SshRunner instance
208
209 """
210 return ssh.SshRunner(cluster_name)
211
212
213 def _Decompress(data):
214 """Unpacks data compressed by the RPC client.
215
216 @type data: list or tuple
217 @param data: Data sent by RPC client
218 @rtype: str
219 @return: Decompressed data
220
221 """
222 assert isinstance(data, (list, tuple))
223 assert len(data) == 2
224 (encoding, content) = data
225 if encoding == constants.RPC_ENCODING_NONE:
226 return content
227 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64:
228 return zlib.decompress(base64.b64decode(content))
229 else:
230 raise AssertionError("Unknown data encoding")
231
232
233 def _CleanDirectory(path, exclude=None):
234 """Removes all regular files in a directory.
235
236 @type path: str
237 @param path: the directory to clean
238 @type exclude: list
239 @param exclude: list of files to be excluded, defaults
240 to the empty list
241
242 """
243 if path not in _ALLOWED_CLEAN_DIRS:
244 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'",
245 path)
246
247 if not os.path.isdir(path):
248 return
249 if exclude is None:
250 exclude = []
251 else:
252 # Normalize excluded paths
253 exclude = [os.path.normpath(i) for i in exclude]
254
255 for rel_name in utils.ListVisibleFiles(path):
256 full_name = utils.PathJoin(path, rel_name)
257 if full_name in exclude:
258 continue
259 if os.path.isfile(full_name) and not os.path.islink(full_name):
260 utils.RemoveFile(full_name)
261
262
263 def _BuildUploadFileList():
264 """Build the list of allowed upload files.
265
266 This is abstracted so that it's built only once at module import time.
267
268 """
269 allowed_files = set([
270 pathutils.CLUSTER_CONF_FILE,
271 pathutils.ETC_HOSTS,
272 pathutils.SSH_KNOWN_HOSTS_FILE,
273 pathutils.VNC_PASSWORD_FILE,
274 pathutils.RAPI_CERT_FILE,
275 pathutils.SPICE_CERT_FILE,
276 pathutils.SPICE_CACERT_FILE,
277 pathutils.RAPI_USERS_FILE,
278 pathutils.CONFD_HMAC_KEY,
279 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
280 ])
281
282 for hv_name in constants.HYPER_TYPES:
283 hv_class = hypervisor.GetHypervisorClass(hv_name)
284 allowed_files.update(hv_class.GetAncillaryFiles()[0])
285
286 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \
287 "Allowed file storage paths should never be uploaded via RPC"
288
289 return frozenset(allowed_files)
290
291
292 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()
293
294
295 def JobQueuePurge():
296 """Removes job queue files and archived jobs.
297
298 @rtype: tuple
299 @return: True, None
300
301 """
302 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE])
303 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)
304
305
306 def GetMasterNodeName():
307 """Returns the master node name.
308
309 @rtype: string
310 @return: name of the master node
311 @raise RPCFail: in case of errors
312
313 """
314 try:
315 return _GetConfig().GetMasterNode()
316 except errors.ConfigurationError, err:
317 _Fail("Cluster configuration incomplete: %s", err, exc=True)
318
319
320 def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):
321 """Decorator that runs hooks before and after the decorated function.
322
323 @type hook_opcode: string
324 @param hook_opcode: opcode of the hook
325 @type hooks_path: string
326 @param hooks_path: path of the hooks
327 @type env_builder_fn: function
328 @param env_builder_fn: function that returns a dictionary containing the
329 environment variables for the hooks. Will get all the parameters of the
330 decorated function.
331 @raise RPCFail: in case of pre-hook failure
332
333 """
334 def decorator(fn):
335 def wrapper(*args, **kwargs):
336 _, myself = ssconf.GetMasterAndMyself()
337 nodes = ([myself], [myself]) # these hooks run locally
338
339 env_fn = compat.partial(env_builder_fn, *args, **kwargs)
340
341 cfg = _GetConfig()
342 hr = HooksRunner()
343 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes,
344 hr.RunLocalHooks, None, env_fn, None,
345 logging.warning, cfg.GetClusterName(),
346 cfg.GetMasterNode())
347 hm.RunPhase(constants.HOOKS_PHASE_PRE)
348 result = fn(*args, **kwargs)
349 hm.RunPhase(constants.HOOKS_PHASE_POST)
350
351 return result
352 return wrapper
353 return decorator
354
355
356 def _BuildMasterIpEnv(master_params, use_external_mip_script=None):
357 """Builds environment variables for master IP hooks.
358
359 @type master_params: L{objects.MasterNetworkParameters}
360 @param master_params: network parameters of the master
361 @type use_external_mip_script: boolean
362 @param use_external_mip_script: whether to use an external master IP
363 address setup script (unused, but necessary per the implementation of the
364 _RunLocalHooks decorator)
365
366 """
367 # pylint: disable=W0613
368 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family)
369 env = {
370 "MASTER_NETDEV": master_params.netdev,
371 "MASTER_IP": master_params.ip,
372 "MASTER_NETMASK": str(master_params.netmask),
373 "CLUSTER_IP_VERSION": str(ver),
374 }
375
376 return env
377
378
379 def _RunMasterSetupScript(master_params, action, use_external_mip_script):
380 """Execute the master IP address setup script.
381
382 @type master_params: L{objects.MasterNetworkParameters}
383 @param master_params: network parameters of the master
384 @type action: string
385 @param action: action to pass to the script. Must be one of
386 L{backend._MASTER_START} or L{backend._MASTER_STOP}
387 @type use_external_mip_script: boolean
388 @param use_external_mip_script: whether to use an external master IP
389 address setup script
390 @raise backend.RPCFail: if there are errors during the execution of the
391 script
392
393 """
394 env = _BuildMasterIpEnv(master_params)
395
396 if use_external_mip_script:
397 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT
398 else:
399 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT
400
401 result = utils.RunCmd([setup_script, action], env=env, reset_env=True)
402
403 if result.failed:
404 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" %
405 (action, result.exit_code, result.output), log=True)
406
407
408 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup",
409 _BuildMasterIpEnv)
410 def ActivateMasterIp(master_params, use_external_mip_script):
411 """Activate the IP address of the master daemon.
412
413 @type master_params: L{objects.MasterNetworkParameters}
414 @param master_params: network parameters of the master
415 @type use_external_mip_script: boolean
416 @param use_external_mip_script: whether to use an external master IP
417 address setup script
418 @raise RPCFail: in case of errors during the IP startup
419
420 """
421 _RunMasterSetupScript(master_params, _MASTER_START,
422 use_external_mip_script)
423
424
425 def StartMasterDaemons(no_voting):
426 """Activate local node as master node.
427
428 The function will start the master daemons (ganeti-masterd and ganeti-rapi).
429
430 @type no_voting: boolean
431 @param no_voting: whether to start ganeti-masterd without a node vote
432 but still non-interactively
433 @rtype: None
434
435 """
436
437 if no_voting:
438 masterd_args = "--no-voting --yes-do-it"
439 else:
440 masterd_args = ""
441
442 env = {
443 "EXTRA_MASTERD_ARGS": masterd_args,
444 }
445
446 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env)
447 if result.failed:
448 msg = "Can't start Ganeti master: %s" % result.output
449 logging.error(msg)
450 _Fail(msg)
451
452
453 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown",
454 _BuildMasterIpEnv)
455 def DeactivateMasterIp(master_params, use_external_mip_script):
456 """Deactivate the master IP on this node.
457
458 @type master_params: L{objects.MasterNetworkParameters}
459 @param master_params: network parameters of the master
460 @type use_external_mip_script: boolean
461 @param use_external_mip_script: whether to use an external master IP
462 address setup script
463 @raise RPCFail: in case of errors during the IP turndown
464
465 """
466 _RunMasterSetupScript(master_params, _MASTER_STOP,
467 use_external_mip_script)
468
469
470 def StopMasterDaemons():
471 """Stop the master daemons on this node.
472
473 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node.
474
475 @rtype: None
476
477 """
478 # TODO: log and report back to the caller the error failures; we
479 # need to decide in which case we fail the RPC for this
480
481 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"])
482 if result.failed:
483 logging.error("Could not stop Ganeti master, command %s had exitcode %s"
484 " and error %s",
485 result.cmd, result.exit_code, result.output)
486
487
488 def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):
489 """Change the netmask of the master IP.
490
491 @param old_netmask: the old value of the netmask
492 @param netmask: the new value of the netmask
493 @param master_ip: the master IP
494 @param master_netdev: the master network device
495
496 """
497 if old_netmask == netmask:
498 return
499
500 if not netutils.IPAddress.Own(master_ip):
501 _Fail("The master IP address is not up, not attempting to change its"
502 " netmask")
503
504 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add",
505 "%s/%s" % (master_ip, netmask),
506 "dev", master_netdev, "label",
507 "%s:0" % master_netdev])
508 if result.failed:
509 _Fail("Could not set the new netmask on the master IP address")
510
511 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del",
512 "%s/%s" % (master_ip, old_netmask),
513 "dev", master_netdev, "label",
514 "%s:0" % master_netdev])
515 if result.failed:
516 _Fail("Could not bring down the master IP address with the old netmask")
517
518
519 def EtcHostsModify(mode, host, ip):
520 """Modify a host entry in /etc/hosts.
521
522 @param mode: The mode to operate. Either add or remove entry
523 @param host: The host to operate on
524 @param ip: The ip associated with the entry
525
526 """
527 if mode == constants.ETC_HOSTS_ADD:
528 if not ip:
529 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not"
530 " present")
531 utils.AddHostToEtcHosts(host, ip)
532 elif mode == constants.ETC_HOSTS_REMOVE:
533 if ip:
534 RPCFail("Mode 'remove' does not allow 'ip' parameter, but"
535 " parameter is present")
536 utils.RemoveHostFromEtcHosts(host)
537 else:
538 RPCFail("Mode not supported")
539
540
541 def LeaveCluster(modify_ssh_setup):
542 """Cleans up and remove the current node.
543
544 This function cleans up and prepares the current node to be removed
545 from the cluster.
546
547 If processing is successful, then it raises an
548 L{errors.QuitGanetiException} which is used as a special case to
549 shutdown the node daemon.
550
551 @param modify_ssh_setup: boolean
552
553 """
554 _CleanDirectory(pathutils.DATA_DIR)
555 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR)
556 JobQueuePurge()
557
558 if modify_ssh_setup:
559 try:
560 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER)
561
562 ssh.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
563
564 utils.RemoveFile(priv_key)
565 utils.RemoveFile(pub_key)
566 except errors.OpExecError:
567 logging.exception("Error while processing ssh files")
568
569 try:
570 utils.RemoveFile(pathutils.CONFD_HMAC_KEY)
571 utils.RemoveFile(pathutils.RAPI_CERT_FILE)
572 utils.RemoveFile(pathutils.SPICE_CERT_FILE)
573 utils.RemoveFile(pathutils.SPICE_CACERT_FILE)
574 utils.RemoveFile(pathutils.NODED_CERT_FILE)
575 except: # pylint: disable=W0702
576 logging.exception("Error while removing cluster secrets")
577
578 utils.StopDaemon(constants.CONFD)
579 utils.StopDaemon(constants.MOND)
580 utils.StopDaemon(constants.KVMD)
581
582 # Raise a custom exception (handled in ganeti-noded)
583 raise errors.QuitGanetiException(True, "Shutdown scheduled")
584
585
586 def _CheckStorageParams(params, num_params):
587 """Performs sanity checks for storage parameters.
588
589 @type params: list
590 @param params: list of storage parameters
591 @type num_params: int
592 @param num_params: expected number of parameters
593
594 """
595 if params is None:
596 raise errors.ProgrammerError("No storage parameters for storage"
597 " reporting is provided.")
598 if not isinstance(params, list):
599 raise errors.ProgrammerError("The storage parameters are not of type"
600 " list: '%s'" % params)
601 if not len(params) == num_params:
602 raise errors.ProgrammerError("Did not receive the expected number of"
603 "storage parameters: expected %s,"
604 " received '%s'" % (num_params, len(params)))
605
606
607 def _CheckLvmStorageParams(params):
608 """Performs sanity check for the 'exclusive storage' flag.
609
610 @see: C{_CheckStorageParams}
611
612 """
613 _CheckStorageParams(params, 1)
614 excl_stor = params[0]
615 if not isinstance(params[0], bool):
616 raise errors.ProgrammerError("Exclusive storage parameter is not"
617 " boolean: '%s'." % excl_stor)
618 return excl_stor
619
620
621 def _GetLvmVgSpaceInfo(name, params):
622 """Wrapper around C{_GetVgInfo} which checks the storage parameters.
623
624 @type name: string
625 @param name: name of the volume group
626 @type params: list
627 @param params: list of storage parameters, which in this case should be
628 containing only one for exclusive storage
629
630 """
631 excl_stor = _CheckLvmStorageParams(params)
632 return _GetVgInfo(name, excl_stor)
633
634
635 def _GetVgInfo(
636 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):
637 """Retrieves information about a LVM volume group.
638
639 """
640 # TODO: GetVGInfo supports returning information for multiple VGs at once
641 vginfo = info_fn([name], excl_stor)
642 if vginfo:
643 vg_free = int(round(vginfo[0][0], 0))
644 vg_size = int(round(vginfo[0][1], 0))
645 else:
646 vg_free = None
647 vg_size = None
648
649 return {
650 "type": constants.ST_LVM_VG,
651 "name": name,
652 "storage_free": vg_free,
653 "storage_size": vg_size,
654 }
655
656
657 def _GetLvmPvSpaceInfo(name, params):
658 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks.
659
660 @see: C{_GetLvmVgSpaceInfo}
661
662 """
663 excl_stor = _CheckLvmStorageParams(params)
664 return _GetVgSpindlesInfo(name, excl_stor)
665
666
667 def _GetVgSpindlesInfo(
668 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):
669 """Retrieves information about spindles in an LVM volume group.
670
671 @type name: string
672 @param name: VG name
673 @type excl_stor: bool
674 @param excl_stor: exclusive storage
675 @rtype: dict
676 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name,
677 free spindles, total spindles respectively
678
679 """
680 if excl_stor:
681 (vg_free, vg_size) = info_fn(name)
682 else:
683 vg_free = 0
684 vg_size = 0
685 return {
686 "type": constants.ST_LVM_PV,
687 "name": name,
688 "storage_free": vg_free,
689 "storage_size": vg_size,
690 }
691
692
693 def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):
694 """Retrieves node information from a hypervisor.
695
696 The information returned depends on the hypervisor. Common items:
697
698 - vg_size is the size of the configured volume group in MiB
699 - vg_free is the free size of the volume group in MiB
700 - memory_dom0 is the memory allocated for domain0 in MiB
701 - memory_free is the currently available (free) ram in MiB
702 - memory_total is the total number of ram in MiB
703 - hv_version: the hypervisor version, if available
704
705 @type hvparams: dict of string
706 @param hvparams: the hypervisor's hvparams
707
708 """
709 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)
710
711
712 def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):
713 """Retrieves node information for all hypervisors.
714
715 See C{_GetHvInfo} for information on the output.
716
717 @type hv_specs: list of pairs (string, dict of strings)
718 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
719
720 """
721 if hv_specs is None:
722 return None
723
724 result = []
725 for hvname, hvparams in hv_specs:
726 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn))
727 return result
728
729
730 def _GetNamedNodeInfo(names, fn):
731 """Calls C{fn} for all names in C{names} and returns a dictionary.
732
733 @rtype: None or dict
734
735 """
736 if names is None:
737 return None
738 else:
739 return map(fn, names)
740
741
742 def GetNodeInfo(storage_units, hv_specs):
743 """Gives back a hash with different information about the node.
744
745 @type storage_units: list of tuples (string, string, list)
746 @param storage_units: List of tuples (storage unit, identifier, parameters) to
747 ask for disk space information. In case of lvm-vg, the identifier is
748 the VG name. The parameters can contain additional, storage-type-specific
749 parameters, for example exclusive storage for lvm storage.
750 @type hv_specs: list of pairs (string, dict of strings)
751 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
752 @rtype: tuple; (string, None/dict, None/dict)
753 @return: Tuple containing boot ID, volume group information and hypervisor
754 information
755
756 """
757 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
758 storage_info = _GetNamedNodeInfo(
759 storage_units,
760 (lambda (storage_type, storage_key, storage_params):
761 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params)))
762 hv_info = _GetHvInfoAll(hv_specs)
763 return (bootid, storage_info, hv_info)
764
765
766 def _GetFileStorageSpaceInfo(path, params):
767 """Wrapper around filestorage.GetSpaceInfo.
768
769 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo
770 and ignore the *args parameter to not leak it into the filestorage
771 module's code.
772
773 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the
774 parameters.
775
776 """
777 _CheckStorageParams(params, 0)
778 return filestorage.GetFileStorageSpaceInfo(path)
779
780
781 # FIXME: implement storage reporting for all missing storage types.
782 _STORAGE_TYPE_INFO_FN = {
783 constants.ST_BLOCK: None,
784 constants.ST_DISKLESS: None,
785 constants.ST_EXT: None,
786 constants.ST_FILE: _GetFileStorageSpaceInfo,
787 constants.ST_LVM_PV: _GetLvmPvSpaceInfo,
788 constants.ST_LVM_VG: _GetLvmVgSpaceInfo,
789 constants.ST_SHARED_FILE: None,
790 constants.ST_GLUSTER: None,
791 constants.ST_RADOS: None,
792 }
793
794
795 def _ApplyStorageInfoFunction(storage_type, storage_key, *args):
796 """Looks up and applies the correct function to calculate free and total
797 storage for the given storage type.
798
799 @type storage_type: string
800 @param storage_type: the storage type for which the storage shall be reported.
801 @type storage_key: string
802 @param storage_key: identifier of a storage unit, e.g. the volume group name
803 of an LVM storage unit
804 @type args: any
805 @param args: various parameters that can be used for storage reporting. These
806 parameters and their semantics vary from storage type to storage type and
807 are just propagated in this function.
808 @return: the results of the application of the storage space function (see
809 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that
810 storage type
811 @raises NotImplementedError: for storage types who don't support space
812 reporting yet
813 """
814 fn = _STORAGE_TYPE_INFO_FN[storage_type]
815 if fn is not None:
816 return fn(storage_key, *args)
817 else:
818 raise NotImplementedError
819
820
821 def _CheckExclusivePvs(pvi_list):
822 """Check that PVs are not shared among LVs
823
824 @type pvi_list: list of L{objects.LvmPvInfo} objects
825 @param pvi_list: information about the PVs
826
827 @rtype: list of tuples (string, list of strings)
828 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...])
829
830 """
831 res = []
832 for pvi in pvi_list:
833 if len(pvi.lv_list) > 1:
834 res.append((pvi.name, pvi.lv_list))
835 return res
836
837
838 def _VerifyHypervisors(what, vm_capable, result, all_hvparams,
839 get_hv_fn=hypervisor.GetHypervisor):
840 """Verifies the hypervisor. Appends the results to the 'results' list.
841
842 @type what: C{dict}
843 @param what: a dictionary of things to check
844 @type vm_capable: boolean
845 @param vm_capable: whether or not this node is vm capable
846 @type result: dict
847 @param result: dictionary of verification results; results of the
848 verifications in this function will be added here
849 @type all_hvparams: dict of dict of string
850 @param all_hvparams: dictionary mapping hypervisor names to hvparams
851 @type get_hv_fn: function
852 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
853
854 """
855 if not vm_capable:
856 return
857
858 if constants.NV_HYPERVISOR in what:
859 result[constants.NV_HYPERVISOR] = {}
860 for hv_name in what[constants.NV_HYPERVISOR]:
861 hvparams = all_hvparams[hv_name]
862 try:
863 val = get_hv_fn(hv_name).Verify(hvparams=hvparams)
864 except errors.HypervisorError, err:
865 val = "Error while checking hypervisor: %s" % str(err)
866 result[constants.NV_HYPERVISOR][hv_name] = val
867
868
869 def _VerifyHvparams(what, vm_capable, result,
870 get_hv_fn=hypervisor.GetHypervisor):
871 """Verifies the hvparams. Appends the results to the 'results' list.
872
873 @type what: C{dict}
874 @param what: a dictionary of things to check
875 @type vm_capable: boolean
876 @param vm_capable: whether or not this node is vm capable
877 @type result: dict
878 @param result: dictionary of verification results; results of the
879 verifications in this function will be added here
880 @type get_hv_fn: function
881 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
882
883 """
884 if not vm_capable:
885 return
886
887 if constants.NV_HVPARAMS in what:
888 result[constants.NV_HVPARAMS] = []
889 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]:
890 try:
891 logging.info("Validating hv %s, %s", hv_name, hvparms)
892 get_hv_fn(hv_name).ValidateParameters(hvparms)
893 except errors.HypervisorError, err:
894 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))
895
896
897 def _VerifyInstanceList(what, vm_capable, result, all_hvparams):
898 """Verifies the instance list.
899
900 @type what: C{dict}
901 @param what: a dictionary of things to check
902 @type vm_capable: boolean
903 @param vm_capable: whether or not this node is vm capable
904 @type result: dict
905 @param result: dictionary of verification results; results of the
906 verifications in this function will be added here
907 @type all_hvparams: dict of dict of string
908 @param all_hvparams: dictionary mapping hypervisor names to hvparams
909
910 """
911 if constants.NV_INSTANCELIST in what and vm_capable:
912 # GetInstanceList can fail
913 try:
914 val = GetInstanceList(what[constants.NV_INSTANCELIST],
915 all_hvparams=all_hvparams)
916 except RPCFail, err:
917 val = str(err)
918 result[constants.NV_INSTANCELIST] = val
919
920
921 def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):
922 """Verifies the node info.
923
924 @type what: C{dict}
925 @param what: a dictionary of things to check
926 @type vm_capable: boolean
927 @param vm_capable: whether or not this node is vm capable
928 @type result: dict
929 @param result: dictionary of verification results; results of the
930 verifications in this function will be added here
931 @type all_hvparams: dict of dict of string
932 @param all_hvparams: dictionary mapping hypervisor names to hvparams
933
934 """
935 if constants.NV_HVINFO in what and vm_capable:
936 hvname = what[constants.NV_HVINFO]
937 hyper = hypervisor.GetHypervisor(hvname)
938 hvparams = all_hvparams[hvname]
939 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)
940
941
942 def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):
943 """Verify the existance and validity of the client SSL certificate.
944
945 Also, verify that the client certificate is not self-signed. Self-
946 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4
947 and should be replaced by client certificates signed by the server
948 certificate. Hence we output a warning when we encounter a self-signed
949 one.
950
951 """
952 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates"
953 if not os.path.exists(cert_file):
954 return (constants.CV_ERROR,
955 "The client certificate does not exist. Run '%s' to create"
956 " client certificates for all nodes." % create_cert_cmd)
957
958 (errcode, msg) = utils.VerifyCertificate(cert_file)
959 if errcode is not None:
960 return (errcode, msg)
961
962 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file)
963 if errcode is not None:
964 return (errcode, msg)
965
966 # if everything is fine, we return the digest to be compared to the config
967 return (None, utils.GetCertificateDigest(cert_filename=cert_file))
968
969
970 def _VerifySshSetup(node_status_list, my_name, ssh_key_type,
971 ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS):
972 """Verifies the state of the SSH key files.
973
974 @type node_status_list: list of tuples
975 @param node_status_list: list of nodes of the cluster associated with a
976 couple of flags: (uuid, name, is_master_candidate,
977 is_potential_master_candidate, online)
978 @type my_name: str
979 @param my_name: name of this node
980 @type ssh_key_type: one of L{constants.SSHK_ALL}
981 @param ssh_key_type: type of key used on nodes
982 @type ganeti_pub_keys_file: str
983 @param ganeti_pub_keys_file: filename of the public keys file
984
985 """
986 if node_status_list is None:
987 return ["No node list to check against the pub_key_file received."]
988
989 my_status_list = [(my_uuid, name, mc, pot_mc, online) for
990 (my_uuid, name, mc, pot_mc, online)
991 in node_status_list if name == my_name]
992 if len(my_status_list) == 0:
993 return ["Cannot find node information for node '%s'." % my_name]
994 (my_uuid, _, _, potential_master_candidate, online) = \
995 my_status_list[0]
996
997 result = []
998
999 if not os.path.exists(ganeti_pub_keys_file):
1000 result.append("The public key file '%s' does not exist. Consider running"
1001 " 'gnt-cluster renew-crypto --new-ssh-keys"
1002 " [--no-ssh-key-check]' to fix this." % ganeti_pub_keys_file)
1003 return result
1004
1005 pot_mc_uuids = [uuid for (uuid, _, _, _, _) in node_status_list]
1006 offline_nodes = [uuid for (uuid, _, _, _, online) in node_status_list
1007 if not online]
1008 pub_keys = ssh.QueryPubKeyFile(None, key_file=ganeti_pub_keys_file)
1009
1010 if potential_master_candidate:
1011 # Check that the set of potential master candidates matches the
1012 # public key file
1013 pub_uuids_set = set(pub_keys.keys()) - set(offline_nodes)
1014 pot_mc_uuids_set = set(pot_mc_uuids) - set(offline_nodes)
1015 missing_uuids = set([])
1016 if pub_uuids_set != pot_mc_uuids_set:
1017 unknown_uuids = pub_uuids_set - pot_mc_uuids_set
1018 if unknown_uuids:
1019 result.append("The following node UUIDs are listed in the public key"
1020 " file on node '%s', but are not potential master"
1021 " candidates: %s."
1022 % (my_name, ", ".join(list(unknown_uuids))))
1023 missing_uuids = pot_mc_uuids_set - pub_uuids_set
1024 if missing_uuids:
1025 result.append("The following node UUIDs of potential master candidates"
1026 " are missing in the public key file on node %s: %s."
1027 % (my_name, ", ".join(list(missing_uuids))))
1028
1029 (_, key_files) = \
1030 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1031 (_, node_pub_key_file) = key_files[ssh_key_type]
1032
1033 my_keys = pub_keys[my_uuid]
1034
1035 node_pub_key = utils.ReadFile(node_pub_key_file)
1036 if node_pub_key.strip() not in my_keys:
1037 result.append("The dsa key of node %s does not match this node's key"
1038 " in the pub key file." % my_name)
1039 if len(my_keys) != 1:
1040 result.append("There is more than one key for node %s in the public key"
1041 " file." % my_name)
1042 else:
1043 if len(pub_keys.keys()) > 0:
1044 result.append("The public key file of node '%s' is not empty, although"
1045 " the node is not a potential master candidate."
1046 % my_name)
1047
1048 # Check that all master candidate keys are in the authorized_keys file
1049 (auth_key_file, _) = \
1050 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1051 for (uuid, name, mc, _, online) in node_status_list:
1052 if not online:
1053 continue
1054 if uuid in missing_uuids:
1055 continue
1056 if mc:
1057 for key in pub_keys[uuid]:
1058 if not ssh.HasAuthorizedKey(auth_key_file, key):
1059 result.append("A SSH key of master candidate '%s' (UUID: '%s') is"
1060 " not in the 'authorized_keys' file of node '%s'."
1061 % (name, uuid, my_name))
1062 else:
1063 for key in pub_keys[uuid]:
1064 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key):
1065 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the"
1066 " 'authorized_keys' file of node '%s'."
1067 % (name, uuid, my_name))
1068 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key):
1069 result.append("A SSH key of normal node '%s' (UUID: '%s') is not"
1070 " in the 'authorized_keys' file of itself."
1071 % (my_name, uuid))
1072
1073 return result
1074
1075
1076 def _VerifySshClutter(node_status_list, my_name):
1077 """Verifies that the 'authorized_keys' files are not cluttered up.
1078
1079 @type node_status_list: list of tuples
1080 @param node_status_list: list of nodes of the cluster associated with a
1081 couple of flags: (uuid, name, is_master_candidate,
1082 is_potential_master_candidate, online)
1083 @type my_name: str
1084 @param my_name: name of this node
1085
1086 """
1087 result = []
1088 (auth_key_file, _) = \
1089 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1090 node_names = [name for (_, name, _, _) in node_status_list]
1091 multiple_occurrences = ssh.CheckForMultipleKeys(auth_key_file, node_names)
1092 if multiple_occurrences:
1093 msg = "There are hosts which have more than one SSH key stored for the" \
1094 " same user in the 'authorized_keys' file of node %s. This can be" \
1095 " due to an unsuccessful operation which cluttered up the" \
1096 " 'authorized_keys' file. We recommend to clean this up manually. " \
1097 % my_name
1098 for host, occ in multiple_occurrences.items():
1099 msg += "Entry for '%s' in lines %s. " % (host, utils.CommaJoin(occ))
1100 result.append(msg)
1101
1102 return result
1103
1104
1105 def VerifyNode(what, cluster_name, all_hvparams):
1106 """Verify the status of the local node.
1107
1108 Based on the input L{what} parameter, various checks are done on the
1109 local node.
1110
1111 If the I{filelist} key is present, this list of
1112 files is checksummed and the file/checksum pairs are returned.
1113
1114 If the I{nodelist} key is present, we check that we have
1115 connectivity via ssh with the target nodes (and check the hostname
1116 report).
1117
1118 If the I{node-net-test} key is present, we check that we have
1119 connectivity to the given nodes via both primary IP and, if
1120 applicable, secondary IPs.
1121
1122 @type what: C{dict}
1123 @param what: a dictionary of things to check:
1124 - filelist: list of files for which to compute checksums
1125 - nodelist: list of nodes we should check ssh communication with
1126 - node-net-test: list of nodes we should check node daemon port
1127 connectivity with
1128 - hypervisor: list with hypervisors to run the verify for
1129 @type cluster_name: string
1130 @param cluster_name: the cluster's name
1131 @type all_hvparams: dict of dict of strings
1132 @param all_hvparams: a dictionary mapping hypervisor names to hvparams
1133 @rtype: dict
1134 @return: a dictionary with the same keys as the input dict, and
1135 values representing the result of the checks
1136
1137 """
1138 result = {}
1139 my_name = netutils.Hostname.GetSysName()
1140 port = netutils.GetDaemonPort(constants.NODED)
1141 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, [])
1142
1143 _VerifyHypervisors(what, vm_capable, result, all_hvparams)
1144 _VerifyHvparams(what, vm_capable, result)
1145
1146 if constants.NV_FILELIST in what:
1147 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath,
1148 what[constants.NV_FILELIST]))
1149 result[constants.NV_FILELIST] = \
1150 dict((vcluster.MakeVirtualPath(key), value)
1151 for (key, value) in fingerprints.items())
1152
1153 if constants.NV_CLIENT_CERT in what:
1154 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate()
1155
1156 if constants.NV_SSH_SETUP in what:
1157 node_status_list, key_type = what[constants.NV_SSH_SETUP]
1158 result[constants.NV_SSH_SETUP] = \
1159 _VerifySshSetup(node_status_list, my_name, key_type)
1160 if constants.NV_SSH_CLUTTER in what:
1161 result[constants.NV_SSH_CLUTTER] = \
1162 _VerifySshClutter(what[constants.NV_SSH_SETUP], my_name)
1163
1164 if constants.NV_NODELIST in what:
1165 (nodes, bynode, mcs) = what[constants.NV_NODELIST]
1166
1167 # Add nodes from other groups (different for each node)
1168 try:
1169 nodes.extend(bynode[my_name])
1170 except KeyError:
1171 pass
1172
1173 # Use a random order
1174 random.shuffle(nodes)
1175
1176 # Try to contact all nodes
1177 val = {}
1178 ssh_port_map = ssconf.SimpleStore().GetSshPortMap()
1179 for node in nodes:
1180 # We only test if master candidates can communicate to other nodes.
1181 # We cannot test if normal nodes cannot communicate with other nodes,
1182 # because the administrator might have installed additional SSH keys,
1183 # over which Ganeti has no power.
1184 if my_name in mcs:
1185 success, message = _GetSshRunner(cluster_name). \
1186 VerifyNodeHostname(node, ssh_port_map[node])
1187 if not success:
1188 val[node] = message
1189
1190 result[constants.NV_NODELIST] = val
1191
1192 if constants.NV_NODENETTEST in what:
1193 result[constants.NV_NODENETTEST] = tmp = {}
1194 my_pip = my_sip = None
1195 for name, pip, sip in what[constants.NV_NODENETTEST]:
1196 if name == my_name:
1197 my_pip = pip
1198 my_sip = sip
1199 break
1200 if not my_pip:
1201 tmp[my_name] = ("Can't find my own primary/secondary IP"
1202 " in the node list")
1203 else:
1204 for name, pip, sip in what[constants.NV_NODENETTEST]:
1205 fail = []
1206 if not netutils.TcpPing(pip, port, source=my_pip):
1207 fail.append("primary")
1208 if sip != pip:
1209 if not netutils.TcpPing(sip, port, source=my_sip):
1210 fail.append("secondary")
1211 if fail:
1212 tmp[name] = ("failure using the %s interface(s)" %
1213 " and ".join(fail))
1214
1215 if constants.NV_MASTERIP in what:
1216 # FIXME: add checks on incoming data structures (here and in the
1217 # rest of the function)
1218 master_name, master_ip = what[constants.NV_MASTERIP]
1219 if master_name == my_name:
1220 source = constants.IP4_ADDRESS_LOCALHOST
1221 else:
1222 source = None
1223 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port,
1224 source=source)
1225
1226 if constants.NV_USERSCRIPTS in what:
1227 result[constants.NV_USERSCRIPTS] = \
1228 [script for script in what[constants.NV_USERSCRIPTS]
1229 if not utils.IsExecutable(script)]
1230
1231 if constants.NV_OOB_PATHS in what:
1232 result[constants.NV_OOB_PATHS] = tmp = []
1233 for path in what[constants.NV_OOB_PATHS]:
1234 try:
1235 st = os.stat(path)
1236 except OSError, err:
1237 tmp.append("error stating out of band helper: %s" % err)
1238 else:
1239 if stat.S_ISREG(st.st_mode):
1240 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR:
1241 tmp.append(None)
1242 else:
1243 tmp.append("out of band helper %s is not executable" % path)
1244 else:
1245 tmp.append("out of band helper %s is not a file" % path)
1246
1247 if constants.NV_LVLIST in what and vm_capable:
1248 try:
1249 val = GetVolumeList(utils.ListVolumeGroups().keys())
1250 except RPCFail, err:
1251 val = str(err)
1252 result[constants.NV_LVLIST] = val
1253
1254 _VerifyInstanceList(what, vm_capable, result, all_hvparams)
1255
1256 if constants.NV_VGLIST in what and vm_capable:
1257 result[constants.NV_VGLIST] = utils.ListVolumeGroups()
1258
1259 if constants.NV_PVLIST in what and vm_capable:
1260 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what
1261 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
1262 filter_allocatable=False,
1263 include_lvs=check_exclusive_pvs)
1264 if check_exclusive_pvs:
1265 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val)
1266 for pvi in val:
1267 # Avoid sending useless data on the wire
1268 pvi.lv_list = []
1269 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val)
1270
1271 if constants.NV_VERSION in what:
1272 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
1273 constants.RELEASE_VERSION)
1274
1275 _VerifyNodeInfo(what, vm_capable, result, all_hvparams)
1276
1277 if constants.NV_DRBDVERSION in what and vm_capable:
1278 try:
1279 drbd_version = DRBD8.GetProcInfo().GetVersionString()
1280 except errors.BlockDeviceError, err:
1281 logging.warning("Can't get DRBD version", exc_info=True)
1282 drbd_version = str(err)
1283 result[constants.NV_DRBDVERSION] = drbd_version
1284
1285 if constants.NV_DRBDLIST in what and vm_capable:
1286 try:
1287 used_minors = drbd.DRBD8.GetUsedDevs()
1288 except errors.BlockDeviceError, err:
1289 logging.warning("Can't get used minors list", exc_info=True)
1290 used_minors = str(err)
1291 result[constants.NV_DRBDLIST] = used_minors
1292
1293 if constants.NV_DRBDHELPER in what and vm_capable:
1294 status = True
1295 try:
1296 payload = drbd.DRBD8.GetUsermodeHelper()
1297 except errors.BlockDeviceError, err:
1298 logging.error("Can't get DRBD usermode helper: %s", str(err))
1299 status = False
1300 payload = str(err)
1301 result[constants.NV_DRBDHELPER] = (status, payload)
1302
1303 if constants.NV_NODESETUP in what:
1304 result[constants.NV_NODESETUP] = tmpr = []
1305 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
1306 tmpr.append("The sysfs filesytem doesn't seem to be mounted"
1307 " under /sys, missing required directories /sys/block"
1308 " and /sys/class/net")
1309 if (not os.path.isdir("/proc/sys") or
1310 not os.path.isfile("/proc/sysrq-trigger")):
1311 tmpr.append("The procfs filesystem doesn't seem to be mounted"
1312 " under /proc, missing required directory /proc/sys and"
1313 " the file /proc/sysrq-trigger")
1314
1315 if constants.NV_TIME in what:
1316 result[constants.NV_TIME] = utils.SplitTime(time.time())
1317
1318 if constants.NV_OSLIST in what and vm_capable:
1319 result[constants.NV_OSLIST] = DiagnoseOS()
1320
1321 if constants.NV_BRIDGES in what and vm_capable:
1322 result[constants.NV_BRIDGES] = [bridge
1323 for bridge in what[constants.NV_BRIDGES]
1324 if not utils.BridgeExists(bridge)]
1325
1326 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name:
1327 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \
1328 filestorage.ComputeWrongFileStoragePaths()
1329
1330 if what.get(constants.NV_FILE_STORAGE_PATH):
1331 pathresult = filestorage.CheckFileStoragePath(
1332 what[constants.NV_FILE_STORAGE_PATH])
1333 if pathresult:
1334 result[constants.NV_FILE_STORAGE_PATH] = pathresult
1335
1336 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH):
1337 pathresult = filestorage.CheckFileStoragePath(
1338 what[constants.NV_SHARED_FILE_STORAGE_PATH])
1339 if pathresult:
1340 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult
1341
1342 return result
1343
1344
1345 def GetCryptoTokens(token_requests):
1346 """Perform actions on the node's cryptographic tokens.
1347
1348 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented
1349 for 'ssl'. Action 'get' returns the digest of the public client ssl
1350 certificate. Action 'create' creates a new client certificate and private key
1351 and also returns the digest of the certificate. The third parameter of a
1352 token request are optional parameters for the actions, so far only the
1353 filename is supported.
1354
1355 @type token_requests: list of tuples of (string, string, dict), where the
1356 first string is in constants.CRYPTO_TYPES, the second in
1357 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string
1358 to string.
1359 @param token_requests: list of requests of cryptographic tokens and actions
1360 to perform on them. The actions come with a dictionary of options.
1361 @rtype: list of tuples (string, string)
1362 @return: list of tuples of the token type and the public crypto token
1363
1364 """
1365 tokens = []
1366 for (token_type, action, _) in token_requests:
1367 if token_type not in constants.CRYPTO_TYPES:
1368 raise errors.ProgrammerError("Token type '%s' not supported." %
1369 token_type)
1370 if action not in constants.CRYPTO_ACTIONS:
1371 raise errors.ProgrammerError("Action '%s' is not supported." %
1372 action)
1373 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST:
1374 tokens.append((token_type,
1375 utils.GetCertificateDigest()))
1376 return tokens
1377
1378
1379 def EnsureDaemon(daemon_name, run):
1380 """Ensures the given daemon is running or stopped.
1381
1382 @type daemon_name: string
1383 @param daemon_name: name of the daemon (e.g., constants.KVMD)
1384
1385 @type run: bool
1386 @param run: whether to start or stop the daemon
1387
1388 @rtype: bool
1389 @return: 'True' if daemon successfully started/stopped,
1390 'False' otherwise
1391
1392 """
1393 allowed_daemons = [constants.KVMD]
1394
1395 if daemon_name not in allowed_daemons:
1396 fn = lambda _: False
1397 elif run:
1398 fn = utils.EnsureDaemon
1399 else:
1400 fn = utils.StopDaemon
1401
1402 return fn(daemon_name)
1403
1404
1405 def _InitSshUpdateData(data, noded_cert_file, ssconf_store):
1406 (_, noded_cert) = \
1407 utils.ExtractX509Certificate(utils.ReadFile(noded_cert_file))
1408 data[constants.SSHS_NODE_DAEMON_CERTIFICATE] = noded_cert
1409
1410 cluster_name = ssconf_store.GetClusterName()
1411 data[constants.SSHS_CLUSTER_NAME] = cluster_name
1412
1413
1414 def AddNodeSshKey(node_uuid, node_name,
1415 potential_master_candidates,
1416 to_authorized_keys=False,
1417 to_public_keys=False,
1418 get_public_keys=False,
1419 pub_key_file=pathutils.SSH_PUB_KEYS,
1420 ssconf_store=None,
1421 noded_cert_file=pathutils.NODED_CERT_FILE,
1422 run_cmd_fn=ssh.RunSshCmdWithStdin):
1423 """Distributes a node's public SSH key across the cluster.
1424
1425 Note that this function should only be executed on the master node, which
1426 then will copy the new node's key to all nodes in the cluster via SSH.
1427
1428 Also note: at least one of the flags C{to_authorized_keys},
1429 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for
1430 the function to actually perform any actions.
1431
1432 @type node_uuid: str
1433 @param node_uuid: the UUID of the node whose key is added
1434 @type node_name: str
1435 @param node_name: the name of the node whose key is added
1436 @type potential_master_candidates: list of str
1437 @param potential_master_candidates: list of node names of potential master
1438 candidates; this should match the list of uuids in the public key file
1439 @type to_authorized_keys: boolean
1440 @param to_authorized_keys: whether the key should be added to the
1441 C{authorized_keys} file of all nodes
1442 @type to_public_keys: boolean
1443 @param to_public_keys: whether the keys should be added to the public key file
1444 @type get_public_keys: boolean
1445 @param get_public_keys: whether the node should add the clusters' public keys
1446 to its {ganeti_pub_keys} file
1447
1448 """
1449 node_list = [SshAddNodeInfo(name=node_name, uuid=node_uuid,
1450 to_authorized_keys=to_authorized_keys,
1451 to_public_keys=to_public_keys,
1452 get_public_keys=get_public_keys)]
1453 return AddNodeSshKeyBulk(node_list,
1454 potential_master_candidates,
1455 pub_key_file=pub_key_file,
1456 ssconf_store=ssconf_store,
1457 noded_cert_file=noded_cert_file,
1458 run_cmd_fn=run_cmd_fn)
1459
1460
1461 # Node info named tuple specifically for the use with AddNodeSshKeyBulk
1462 SshAddNodeInfo = collections.namedtuple(
1463 "SshAddNodeInfo",
1464 ["uuid",
1465 "name",
1466 "to_authorized_keys",
1467 "to_public_keys",
1468 "get_public_keys"])
1469
1470
1471 def AddNodeSshKeyBulk(node_list,
1472 potential_master_candidates,
1473 pub_key_file=pathutils.SSH_PUB_KEYS,
1474 ssconf_store=None,
1475 noded_cert_file=pathutils.NODED_CERT_FILE,
1476 run_cmd_fn=ssh.RunSshCmdWithStdin):
1477 """Distributes a node's public SSH key across the cluster.
1478
1479 Note that this function should only be executed on the master node, which
1480 then will copy the new node's key to all nodes in the cluster via SSH.
1481
1482 Also note: at least one of the flags C{to_authorized_keys},
1483 C{to_public_keys}, and C{get_public_keys} has to be set to C{True} for
1484 the function to actually perform any actions.
1485
1486 @type node_list: list of SshAddNodeInfo tuples
1487 @param node_list: list of tuples containing the necessary node information for
1488 adding their keys
1489 @type potential_master_candidates: list of str
1490 @param potential_master_candidates: list of node names of potential master
1491 candidates; this should match the list of uuids in the public key file
1492
1493 """
1494 # whether there are any keys to be added or retrieved at all
1495 to_authorized_keys = any([node_info.to_authorized_keys for node_info in
1496 node_list])
1497 to_public_keys = any([node_info.to_public_keys for node_info in
1498 node_list])
1499 get_public_keys = any([node_info.get_public_keys for node_info in
1500 node_list])
1501
1502 # assure that at least one of those flags is true, as the function would
1503 # not do anything otherwise
1504 assert (to_authorized_keys or to_public_keys or get_public_keys)
1505
1506 if not ssconf_store:
1507 ssconf_store = ssconf.SimpleStore()
1508
1509 for node_info in node_list:
1510 # replacement not necessary for keys that are not supposed to be in the
1511 # list of public keys
1512 if not node_info.to_public_keys:
1513 continue
1514 # Check and fix sanity of key file
1515 keys_by_name = ssh.QueryPubKeyFile([node_info.name], key_file=pub_key_file)
1516 keys_by_uuid = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file)
1517
1518 if (not keys_by_name or node_info.name not in keys_by_name) \
1519 and (not keys_by_uuid or node_info.uuid not in keys_by_uuid):
1520 raise errors.SshUpdateError(
1521 "No keys found for the new node '%s' (UUID %s) in the list of public"
1522 " SSH keys, neither for the name or the UUID" %
1523 (node_info.name, node_info.uuid))
1524 else:
1525 if node_info.name in keys_by_name:
1526 # Replace the name by UUID in the file as the name should only be used
1527 # temporarily
1528 ssh.ReplaceNameByUuid(node_info.uuid, node_info.name,
1529 error_fn=errors.SshUpdateError,
1530 key_file=pub_key_file)
1531
1532 # Retrieve updated map of UUIDs to keys
1533 keys_by_uuid = ssh.QueryPubKeyFile(
1534 [node_info.uuid for node_info in node_list], key_file=pub_key_file)
1535
1536 # Update the master node's key files
1537 (auth_key_file, _) = \
1538 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1539 for node_info in node_list:
1540 if node_info.to_authorized_keys:
1541 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_info.uuid])
1542
1543 base_data = {}
1544 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store)
1545 cluster_name = base_data[constants.SSHS_CLUSTER_NAME]
1546
1547 ssh_port_map = ssconf_store.GetSshPortMap()
1548
1549 # Update the target nodes themselves
1550 for node_info in node_list:
1551 logging.debug("Updating SSH key files of target node '%s'.", node_info.name)
1552 if node_info.get_public_keys:
1553 node_data = {}
1554 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store)
1555 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file)
1556 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1557 (constants.SSHS_OVERRIDE, all_keys)
1558
1559 try:
1560 utils.RetryByNumberOfTimes(
1561 constants.SSHS_MAX_RETRIES,
1562 errors.SshUpdateError,
1563 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE,
1564 ssh_port_map.get(node_info.name), node_data,
1565 debug=False, verbose=False, use_cluster_key=False,
1566 ask_key=False, strict_host_check=False)
1567 except errors.SshUpdateError as e:
1568 # Clean up the master's public key file if adding key fails
1569 if node_info.to_public_keys:
1570 ssh.RemovePublicKey(node_info.uuid)
1571 raise e
1572
1573 # Update all nodes except master and the target nodes
1574 keys_by_uuid_auth = ssh.QueryPubKeyFile(
1575 [node_info.uuid for node_info in node_list
1576 if node_info.to_authorized_keys],
1577 key_file=pub_key_file)
1578 if to_authorized_keys:
1579 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1580 (constants.SSHS_ADD, keys_by_uuid_auth)
1581
1582 pot_mc_data = base_data.copy()
1583 keys_by_uuid_pub = ssh.QueryPubKeyFile(
1584 [node_info.uuid for node_info in node_list
1585 if node_info.to_public_keys],
1586 key_file=pub_key_file)
1587 if to_public_keys:
1588 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1589 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid_pub)
1590
1591 all_nodes = ssconf_store.GetNodeList()
1592 master_node = ssconf_store.GetMasterNode()
1593 online_nodes = ssconf_store.GetOnlineNodeList()
1594
1595 node_errors = []
1596 for node in all_nodes:
1597 if node == master_node:
1598 logging.debug("Skipping master node '%s'.", master_node)
1599 continue
1600 if node not in online_nodes:
1601 logging.debug("Skipping offline node '%s'.", node)
1602 continue
1603 if node in potential_master_candidates:
1604 logging.debug("Updating SSH key files of node '%s'.", node)
1605 try:
1606 utils.RetryByNumberOfTimes(
1607 constants.SSHS_MAX_RETRIES,
1608 errors.SshUpdateError,
1609 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1610 ssh_port_map.get(node), pot_mc_data,
1611 debug=False, verbose=False, use_cluster_key=False,
1612 ask_key=False, strict_host_check=False)
1613 except errors.SshUpdateError as last_exception:
1614 error_msg = ("When adding the key of node '%s', updating SSH key"
1615 " files of node '%s' failed after %s retries."
1616 " Not trying again. Last error was: %s." %
1617 (node, node_info.name, constants.SSHS_MAX_RETRIES,
1618 last_exception))
1619 node_errors.append((node, error_msg))
1620 # We only log the error and don't throw an exception, because
1621 # one unreachable node shall not abort the entire procedure.
1622 logging.error(error_msg)
1623
1624 else:
1625 if to_authorized_keys:
1626 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
1627 ssh_port_map.get(node), base_data,
1628 debug=False, verbose=False, use_cluster_key=False,
1629 ask_key=False, strict_host_check=False)
1630
1631 return node_errors
1632
1633
1634 def RemoveNodeSshKey(node_uuid, node_name,
1635 master_candidate_uuids,
1636 potential_master_candidates,
1637 master_uuid=None,
1638 keys_to_remove=None,
1639 from_authorized_keys=False,
1640 from_public_keys=False,
1641 clear_authorized_keys=False,
1642 clear_public_keys=False,
1643 pub_key_file=pathutils.SSH_PUB_KEYS,
1644 ssconf_store=None,
1645 noded_cert_file=pathutils.NODED_CERT_FILE,
1646 readd=False,
1647 run_cmd_fn=ssh.RunSshCmdWithStdin):
1648 """Removes the node's SSH keys from the key files and distributes those.
1649
1650 Note that at least one of the flags C{from_authorized_keys},
1651 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys}
1652 has to be set to C{True} for the function to perform any action at all.
1653 Not doing so will trigger an assertion in the function.
1654
1655 @type node_uuid: str
1656 @param node_uuid: UUID of the node whose key is removed
1657 @type node_name: str
1658 @param node_name: name of the node whose key is remove
1659 @type master_candidate_uuids: list of str
1660 @param master_candidate_uuids: list of UUIDs of the current master candidates
1661 @type potential_master_candidates: list of str
1662 @param potential_master_candidates: list of names of potential master
1663 candidates
1664 @type keys_to_remove: dict of str to list of str
1665 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys
1666 to be removed. This list is supposed to be used only if the keys are not
1667 in the public keys file. This is for example the case when removing a
1668 master node's key.
1669 @type from_authorized_keys: boolean
1670 @param from_authorized_keys: whether or not the key should be removed
1671 from the C{authorized_keys} file
1672 @type from_public_keys: boolean
1673 @param from_public_keys: whether or not the key should be remove from
1674 the C{ganeti_pub_keys} file
1675 @type clear_authorized_keys: boolean
1676 @param clear_authorized_keys: whether or not the C{authorized_keys} file
1677 should be cleared on the node whose keys are removed
1678 @type clear_public_keys: boolean
1679 @param clear_public_keys: whether to clear the node's C{ganeti_pub_key} file
1680 @type readd: boolean
1681 @param readd: whether this is called during a readd operation.
1682 @rtype: list of string
1683 @returns: list of feedback messages
1684
1685 """
1686 node_list = [SshRemoveNodeInfo(uuid=node_uuid,
1687 name=node_name,
1688 from_authorized_keys=from_authorized_keys,
1689 from_public_keys=from_public_keys,
1690 clear_authorized_keys=clear_authorized_keys,
1691 clear_public_keys=clear_public_keys)]
1692 return RemoveNodeSshKeyBulk(node_list,
1693 master_candidate_uuids,
1694 potential_master_candidates,
1695 master_uuid=master_uuid,
1696 keys_to_remove=keys_to_remove,
1697 pub_key_file=pub_key_file,
1698 ssconf_store=ssconf_store,
1699 noded_cert_file=noded_cert_file,
1700 readd=readd,
1701 run_cmd_fn=run_cmd_fn)
1702
1703
1704 # Node info named tuple specifically for the use with RemoveNodeSshKeyBulk
1705 SshRemoveNodeInfo = collections.namedtuple(
1706 "SshRemoveNodeInfo",
1707 ["uuid",
1708 "name",
1709 "from_authorized_keys",
1710 "from_public_keys",
1711 "clear_authorized_keys",
1712 "clear_public_keys"])
1713
1714
1715 def RemoveNodeSshKeyBulk(node_list,
1716 master_candidate_uuids,
1717 potential_master_candidates,
1718 master_uuid=None,
1719 keys_to_remove=None,
1720 pub_key_file=pathutils.SSH_PUB_KEYS,
1721 ssconf_store=None,
1722 noded_cert_file=pathutils.NODED_CERT_FILE,
1723 readd=False,
1724 run_cmd_fn=ssh.RunSshCmdWithStdin):
1725 """Removes the node's SSH keys from the key files and distributes those.
1726
1727 Note that at least one of the flags C{from_authorized_keys},
1728 C{from_public_keys}, C{clear_authorized_keys}, and C{clear_public_keys}
1729 of at least one node has to be set to C{True} for the function to perform any
1730 action at all. Not doing so will trigger an assertion in the function.
1731
1732 @type node_list: list of C{SshRemoveNodeInfo}.
1733 @param node_list: list of information about nodes whose keys are being removed
1734 @type master_candidate_uuids: list of str
1735 @param master_candidate_uuids: list of UUIDs of the current master candidates
1736 @type potential_master_candidates: list of str
1737 @param potential_master_candidates: list of names of potential master
1738 candidates
1739 @type keys_to_remove: dict of str to list of str
1740 @param keys_to_remove: a dictionary mapping node UUIDS to lists of SSH keys
1741 to be removed. This list is supposed to be used only if the keys are not
1742 in the public keys file. This is for example the case when removing a
1743 master node's key.
1744 @type readd: boolean
1745 @param readd: whether this is called during a readd operation.
1746 @rtype: list of string
1747 @returns: list of feedback messages
1748
1749 """
1750 # Non-disruptive error messages, list of (node, msg) pairs
1751 result_msgs = []
1752
1753 # whether there are any keys to be added or retrieved at all
1754 from_authorized_keys = any([node_info.from_authorized_keys for node_info in
1755 node_list])
1756 from_public_keys = any([node_info.from_public_keys for node_info in
1757 node_list])
1758 clear_authorized_keys = any([node_info.clear_authorized_keys for node_info in
1759 node_list])
1760 clear_public_keys = any([node_info.clear_public_keys for node_info in
1761 node_list])
1762
1763 # Make sure at least one of these flags is true.
1764 if not (from_authorized_keys or from_public_keys or clear_authorized_keys
1765 or clear_public_keys):
1766 raise errors.SshUpdateError("No removal from any key file was requested.")
1767
1768 if not ssconf_store:
1769 ssconf_store = ssconf.SimpleStore()
1770
1771 master_node = ssconf_store.GetMasterNode()
1772 ssh_port_map = ssconf_store.GetSshPortMap()
1773
1774 all_keys_to_remove = {}
1775 if from_authorized_keys or from_public_keys:
1776 for node_info in node_list:
1777 # Skip nodes that don't actually need any keys to be removed.
1778 if not (node_info.from_authorized_keys or node_info.from_public_keys):
1779 continue
1780 if node_info.name == master_node and not keys_to_remove:
1781 raise errors.SshUpdateError("Cannot remove the master node's keys.")
1782 if keys_to_remove:
1783 keys = keys_to_remove
1784 else:
1785 keys = ssh.QueryPubKeyFile([node_info.uuid], key_file=pub_key_file)
1786 if (not keys or node_info.uuid not in keys) and not readd:
1787 raise errors.SshUpdateError("Node '%s' not found in the list of"
1788 " public SSH keys. It seems someone"
1789 " tries to remove a key from outside"
1790 " the cluster!" % node_info.uuid)
1791 # During an upgrade all nodes have the master key. In this case we
1792 # should not remove it to avoid accidentally shutting down cluster
1793 # SSH communication
1794 master_keys = None
1795 if master_uuid:
1796 master_keys = ssh.QueryPubKeyFile([master_uuid],
1797 key_file=pub_key_file)
1798 for master_key in master_keys:
1799 if master_key in keys[node_info.uuid]:
1800 keys[node_info.uuid].remove(master_key)
1801
1802 all_keys_to_remove.update(keys)
1803
1804 if all_keys_to_remove:
1805 base_data = {}
1806 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store)
1807 cluster_name = base_data[constants.SSHS_CLUSTER_NAME]
1808
1809 if from_authorized_keys:
1810 # UUIDs of nodes that are supposed to be removed from the
1811 # authorized_keys files.
1812 nodes_remove_from_authorized_keys = [
1813 node_info.uuid for node_info in node_list
1814 if node_info.from_authorized_keys]
1815 keys_to_remove_from_authorized_keys = dict([
1816 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items()
1817 if uuid in nodes_remove_from_authorized_keys])
1818 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1819 (constants.SSHS_REMOVE, keys_to_remove_from_authorized_keys)
1820 (auth_key_file, _) = \
1821 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False,
1822 dircheck=False)
1823
1824 for uuid in nodes_remove_from_authorized_keys:
1825 ssh.RemoveAuthorizedKeys(auth_key_file,
1826 keys_to_remove_from_authorized_keys[uuid])
1827
1828 pot_mc_data = base_data.copy()
1829
1830 if from_public_keys:
1831 nodes_remove_from_public_keys = [
1832 node_info.uuid for node_info in node_list
1833 if node_info.from_public_keys]
1834 keys_to_remove_from_public_keys = dict([
1835 (uuid, keys) for (uuid, keys) in all_keys_to_remove.items()
1836 if uuid in nodes_remove_from_public_keys])
1837 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1838 (constants.SSHS_REMOVE, keys_to_remove_from_public_keys)
1839
1840 all_nodes = ssconf_store.GetNodeList()
1841 online_nodes = ssconf_store.GetOnlineNodeList()
1842 all_nodes_to_remove = [node_info.name for node_info in node_list]
1843 logging.debug("Removing keys of nodes '%s' from all nodes but itself and"
1844 " master.", ", ".join(all_nodes_to_remove))
1845 for node in all_nodes:
1846 if node == master_node:
1847 logging.debug("Skipping master node '%s'.", master_node)
1848 continue
1849 if node not in online_nodes:
1850 logging.debug("Skipping offline node '%s'.", node)
1851 continue
1852 if node in all_nodes_to_remove:
1853 logging.debug("Skipping node whose key is removed itself '%s'.", node)
1854 continue
1855 ssh_port = ssh_port_map.get(node)
1856 if not ssh_port:
1857 raise errors.OpExecError("No SSH port information available for"
1858 " node '%s', map: %s." %
1859 (node, ssh_port_map))
1860 error_msg_final = ("When removing the key of node '%s', updating the"
1861 " SSH key files of node '%s' failed. Last error"
1862 " was: %s.")
1863 if node in potential_master_candidates:
1864 logging.debug("Updating key setup of potential master candidate node"
1865 " %s.", node)
1866 try:
1867 utils.RetryByNumberOfTimes(
1868 constants.SSHS_MAX_RETRIES,
1869 errors.SshUpdateError,
1870 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1871 ssh_port, pot_mc_data,
1872 debug=False, verbose=False, use_cluster_key=False,
1873 ask_key=False, strict_host_check=False)
1874 except errors.SshUpdateError as last_exception:
1875 error_msg = error_msg_final % (
1876 node_info.name, node, last_exception)
1877 result_msgs.append((node, error_msg))
1878 logging.error(error_msg)
1879
1880 else:
1881 if from_authorized_keys:
1882 logging.debug("Updating key setup of normal node %s.", node)
1883 try:
1884 utils.RetryByNumberOfTimes(
1885 constants.SSHS_MAX_RETRIES,
1886 errors.SshUpdateError,
1887 run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
1888 ssh_port, base_data,
1889 debug=False, verbose=False, use_cluster_key=False,
1890 ask_key=False, strict_host_check=False)
1891 except errors.SshUpdateError as last_exception:
1892 error_msg = error_msg_final % (
1893 node_info.name, node, last_exception)
1894 result_msgs.append((node, error_msg))
1895 logging.error(error_msg)
1896
1897 for node_info in node_list:
1898 if node_info.clear_authorized_keys or node_info.from_public_keys or \
1899 node_info.clear_public_keys:
1900 data = {}
1901 _InitSshUpdateData(data, noded_cert_file, ssconf_store)
1902 cluster_name = data[constants.SSHS_CLUSTER_NAME]
1903 ssh_port = ssh_port_map.get(node_info.name)
1904 if not ssh_port:
1905 raise errors.OpExecError("No SSH port information available for"
1906 " node '%s', which is leaving the cluster.")
1907
1908 if node_info.clear_authorized_keys:
1909 # The 'authorized_keys' file is not solely managed by Ganeti. Therefore,
1910 # we have to specify exactly which keys to clear to leave keys untouched
1911 # that were not added by Ganeti.
1912 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids
1913 if uuid != node_info.uuid]
1914 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids,
1915 key_file=pub_key_file)
1916 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1917 (constants.SSHS_REMOVE, candidate_keys)
1918
1919 if node_info.clear_public_keys:
1920 data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1921 (constants.SSHS_CLEAR, {})
1922 elif node_info.from_public_keys:
1923 # Since clearing the public keys subsumes removing just a single key,
1924 # we only do it if clear_public_keys is 'False'.
1925
1926 if all_keys_to_remove:
1927 data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1928 (constants.SSHS_REMOVE, all_keys_to_remove)
1929
1930 # If we have no changes to any keyfile, just return
1931 if not (constants.SSHS_SSH_PUBLIC_KEYS in data or
1932 constants.SSHS_SSH_AUTHORIZED_KEYS in data):
1933 return
1934
1935 logging.debug("Updating SSH key setup of target node '%s'.",
1936 node_info.name)
1937 try:
1938 utils.RetryByNumberOfTimes(
1939 constants.SSHS_MAX_RETRIES,
1940 errors.SshUpdateError,
1941 run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE,
1942 ssh_port, data,
1943 debug=False, verbose=False, use_cluster_key=False,
1944 ask_key=False, strict_host_check=False)
1945 except errors.SshUpdateError as last_exception:
1946 result_msgs.append(
1947 (node_info.name,
1948 ("Removing SSH keys from node '%s' failed."
1949 " This can happen when the node is already unreachable."
1950 " Error: %s" % (node_info.name, last_exception))))
1951
1952 if all_keys_to_remove and from_public_keys:
1953 for node_uuid in nodes_remove_from_public_keys:
1954 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file)
1955
1956 return result_msgs
1957
1958
1959 def _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, ssh_key_type,
1960 ssh_key_bits, pub_key_file=pathutils.SSH_PUB_KEYS,
1961 ssconf_store=None,
1962 noded_cert_file=pathutils.NODED_CERT_FILE,
1963 run_cmd_fn=ssh.RunSshCmdWithStdin,
1964 suffix=""):
1965 """Generates the root SSH key pair on the node.
1966
1967 @type node_uuid: str
1968 @param node_uuid: UUID of the node whose key is removed
1969 @type node_name: str
1970 @param node_name: name of the node whose key is remove
1971 @type ssh_port_map: dict of str to int
1972 @param ssh_port_map: mapping of node names to their SSH port
1973 @type ssh_key_type: One of L{constants.SSHK_ALL}
1974 @param ssh_key_type: the type of SSH key to be generated
1975 @type ssh_key_bits: int
1976 @param ssh_key_bits: the length of the key to be generated
1977
1978 """
1979 if not ssconf_store:
1980 ssconf_store = ssconf.SimpleStore()
1981
1982 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1983 if not keys_by_uuid or node_uuid not in keys_by_uuid:
1984 raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to"
1985 " be regenerated is not registered in the"
1986 " public keys file." % (node_name, node_uuid))
1987
1988 data = {}
1989 _InitSshUpdateData(data, noded_cert_file, ssconf_store)
1990 cluster_name = data[constants.SSHS_CLUSTER_NAME]
1991 data[constants.SSHS_GENERATE] = (ssh_key_type, ssh_key_bits, suffix)
1992
1993 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
1994 ssh_port_map.get(node_name), data,
1995 debug=False, verbose=False, use_cluster_key=False,
1996 ask_key=False, strict_host_check=False)
1997
1998
1999 def _GetMasterNodeUUID(node_uuid_name_map, master_node_name):
2000 master_node_uuids = [node_uuid for (node_uuid, node_name)
2001 in node_uuid_name_map
2002 if node_name == master_node_name]
2003 if len(master_node_uuids) != 1:
2004 raise errors.SshUpdateError("No (unique) master UUID found. Master node"
2005 " name: '%s', Master UUID: '%s'" %
2006 (master_node_name, master_node_uuids))
2007 return master_node_uuids[0]
2008
2009
2010 def _GetOldMasterKeys(master_node_uuid, pub_key_file):
2011 old_master_keys_by_uuid = ssh.QueryPubKeyFile([master_node_uuid],
2012 key_file=pub_key_file)
2013 if not old_master_keys_by_uuid:
2014 raise errors.SshUpdateError("No public key of the master node (UUID '%s')"
2015 " found, not generating a new key."
2016 % master_node_uuid)
2017 return old_master_keys_by_uuid
2018
2019
2020 def _GetNewMasterKey(root_keyfiles, master_node_uuid):
2021 new_master_keys = []
2022 for (_, (_, public_key_file)) in root_keyfiles.items():
2023 public_key_dir = os.path.dirname(public_key_file)
2024 public_key_file_tmp_filename = \
2025 os.path.splitext(os.path.basename(public_key_file))[0] \
2026 + constants.SSHS_MASTER_SUFFIX + ".pub"
2027 public_key_path_tmp = os.path.join(public_key_dir,
2028 public_key_file_tmp_filename)
2029 if os.path.exists(public_key_path_tmp):
2030 # for some key types, there might not be any keys
2031 key = utils.ReadFile(public_key_path_tmp)
2032 new_master_keys.append(key)
2033 if not new_master_keys:
2034 raise errors.SshUpdateError("Cannot find any type of temporary SSH key.")
2035 return {master_node_uuid: new_master_keys}
2036
2037
2038 def _ReplaceMasterKeyOnMaster(root_keyfiles):
2039 number_of_moves = 0
2040 for (_, (private_key_file, public_key_file)) in root_keyfiles.items():
2041 key_dir = os.path.dirname(public_key_file)
2042 private_key_file_tmp = \
2043 os.path.basename(private_key_file) + constants.SSHS_MASTER_SUFFIX
2044 public_key_file_tmp = private_key_file_tmp + ".pub"
2045 private_key_path_tmp = os.path.join(key_dir,
2046 private_key_file_tmp)
2047 public_key_path_tmp = os.path.join(key_dir,
2048 public_key_file_tmp)
2049 if os.path.exists(public_key_file):
2050 utils.CreateBackup(public_key_file)
2051 utils.RemoveFile(public_key_file)
2052 if os.path.exists(private_key_file):
2053 utils.CreateBackup(private_key_file)
2054 utils.RemoveFile(private_key_file)
2055 if os.path.exists(public_key_path_tmp) and \
2056 os.path.exists(private_key_path_tmp):
2057 # for some key types, there might not be any keys
2058 shutil.move(public_key_path_tmp, public_key_file)
2059 shutil.move(private_key_path_tmp, private_key_file)
2060 number_of_moves += 1
2061 if not number_of_moves:
2062 raise errors.SshUpdateError("Could not move at least one master SSH key.")
2063
2064
2065 def RenewSshKeys(node_uuids, node_names, master_candidate_uuids,
2066 potential_master_candidates, old_key_type, new_key_type,
2067 new_key_bits,
2068 ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS,
2069 ssconf_store=None,
2070 noded_cert_file=pathutils.NODED_CERT_FILE,
2071 run_cmd_fn=ssh.RunSshCmdWithStdin):
2072 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys.
2073
2074 @type node_uuids: list of str
2075 @param node_uuids: list of node UUIDs whose keys should be renewed
2076 @type node_names: list of str
2077 @param node_names: list of node names whose keys should be removed. This list
2078 should match the C{node_uuids} parameter
2079 @type master_candidate_uuids: list of str
2080 @param master_candidate_uuids: list of UUIDs of master candidates or
2081 master node
2082 @type old_key_type: One of L{constants.SSHK_ALL}
2083 @param old_key_type: the type of SSH key already present on nodes
2084 @type new_key_type: One of L{constants.SSHK_ALL}
2085 @param new_key_type: the type of SSH key to be generated
2086 @type new_key_bits: int
2087 @param new_key_bits: the length of the key to be generated
2088 @type ganeti_pub_keys_file: str
2089 @param ganeti_pub_keys_file: file path of the the public key file
2090 @type noded_cert_file: str
2091 @param noded_cert_file: path of the noded SSL certificate file
2092 @type run_cmd_fn: function
2093 @param run_cmd_fn: function to run commands on remote nodes via SSH
2094 @raises ProgrammerError: if node_uuids and node_names don't match;
2095 SshUpdateError if a node's key is missing from the public key file,
2096 if a node's new SSH key could not be fetched from it, if there is
2097 none or more than one entry in the public key list for the master
2098 node.
2099
2100 """
2101 if not ssconf_store:
2102 ssconf_store = ssconf.SimpleStore()
2103 cluster_name = ssconf_store.GetClusterName()
2104
2105 if not len(node_uuids) == len(node_names):
2106 raise errors.ProgrammerError("List of nodes UUIDs and node names"
2107 " does not match in length.")
2108
2109 (_, root_keyfiles) = \
2110 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
2111 (_, old_pub_keyfile) = root_keyfiles[old_key_type]
2112 (_, new_pub_keyfile) = root_keyfiles[new_key_type]
2113 old_master_key = utils.ReadFile(old_pub_keyfile)
2114
2115 node_uuid_name_map = zip(node_uuids, node_names)
2116
2117 master_node_name = ssconf_store.GetMasterNode()
2118 master_node_uuid = _GetMasterNodeUUID(node_uuid_name_map, master_node_name)
2119 ssh_port_map = ssconf_store.GetSshPortMap()
2120 # List of all node errors that happened, but which did not abort the
2121 # procedure as a whole. It is important that this is a list to have a
2122 # somewhat chronological history of events.
2123 all_node_errors = []
2124
2125 # process non-master nodes
2126
2127 # keys to add in bulk at the end
2128 node_keys_to_add = []
2129
2130 # list of all nodes
2131 node_list = []
2132
2133 # list of keys to be removed before generating new keys
2134 node_info_to_remove = []
2135
2136 for node_uuid, node_name in node_uuid_name_map:
2137 if node_name == master_node_name:
2138 continue
2139 master_candidate = node_uuid in master_candidate_uuids
2140 potential_master_candidate = node_name in potential_master_candidates
2141 node_list.append((node_uuid, node_name, master_candidate,
2142 potential_master_candidate))
2143
2144 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid],
2145 key_file=ganeti_pub_keys_file)
2146 if not keys_by_uuid:
2147 raise errors.SshUpdateError("No public key of node %s (UUID %s) found,"
2148 " not generating a new key."
2149 % (node_name, node_uuid))
2150
2151 if master_candidate:
2152 logging.debug("Fetching old SSH key from node '%s'.", node_name)
2153 old_pub_key = ssh.ReadRemoteSshPubKeys(old_pub_keyfile,
2154 node_name, cluster_name,
2155 ssh_port_map[node_name],
2156 False, # ask_key
2157 False) # key_check
2158 if old_pub_key != old_master_key:
2159 # If we are already in a multi-key setup (that is past Ganeti 2.12),
2160 # we can safely remove the old key of the node. Otherwise, we cannot
2161 # remove that node's key, because it is also the master node's key
2162 # and that would terminate all communication from the master to the
2163 # node.
2164 node_info_to_remove.append(SshRemoveNodeInfo(
2165 uuid=node_uuid,
2166 name=node_name,
2167 from_authorized_keys=master_candidate,
2168 from_public_keys=False,
2169 clear_authorized_keys=False,
2170 clear_public_keys=False))
2171 else:
2172 logging.debug("Old key of node '%s' is the same as the current master"
2173 " key. Not deleting that key on the node.", node_name)
2174
2175 logging.debug("Removing old SSH keys of all master candidates.")
2176 if node_info_to_remove:
2177 node_errors = RemoveNodeSshKeyBulk(
2178 node_info_to_remove,
2179 master_candidate_uuids,
2180 potential_master_candidates,
2181 master_uuid=master_node_uuid)
2182 if node_errors:
2183 all_node_errors = all_node_errors + node_errors
2184
2185 for (node_uuid, node_name, master_candidate, potential_master_candidate) \
2186 in node_list:
2187
2188 logging.debug("Generating new SSH key for node '%s'.", node_name)
2189 _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, new_key_type,
2190 new_key_bits, pub_key_file=ganeti_pub_keys_file,
2191 ssconf_store=ssconf_store,
2192 noded_cert_file=noded_cert_file,
2193 run_cmd_fn=run_cmd_fn)
2194
2195 try:
2196 logging.debug("Fetching newly created SSH key from node '%s'.", node_name)
2197 pub_key = ssh.ReadRemoteSshPubKeys(new_pub_keyfile,
2198 node_name, cluster_name,
2199 ssh_port_map[node_name],
2200 False, # ask_key
2201 False) # key_check
2202 except:
2203 raise errors.SshUpdateError("Could not fetch key of node %s"
2204 " (UUID %s)" % (node_name, node_uuid))
2205
2206 if potential_master_candidate:
2207 ssh.RemovePublicKey(node_uuid, key_file=ganeti_pub_keys_file)
2208 ssh.AddPublicKey(node_uuid, pub_key, key_file=ganeti_pub_keys_file)
2209
2210 node_info = SshAddNodeInfo(name=node_name,
2211 uuid=node_uuid,
2212 to_authorized_keys=master_candidate,
2213 to_public_keys=potential_master_candidate,
2214 get_public_keys=True)
2215 node_keys_to_add.append(node_info)
2216
2217 node_errors = AddNodeSshKeyBulk(
2218 node_keys_to_add, potential_master_candidates,
2219 pub_key_file=ganeti_pub_keys_file, ssconf_store=ssconf_store,
2220 noded_cert_file=noded_cert_file,
2221 run_cmd_fn=run_cmd_fn)
2222 if node_errors:
2223 all_node_errors = all_node_errors + node_errors
2224
2225 # Renewing the master node's key
2226
2227 # Preserve the old keys for now
2228 old_master_keys_by_uuid = _GetOldMasterKeys(master_node_uuid,
2229 ganeti_pub_keys_file)
2230
2231 # Generate a new master key with a suffix, don't touch the old one for now
2232 logging.debug("Generate new ssh key of master.")
2233 _GenerateNodeSshKey(master_node_uuid, master_node_name, ssh_port_map,
2234 new_key_type, new_key_bits,
2235 pub_key_file=ganeti_pub_keys_file,
2236 ssconf_store=ssconf_store,
2237 noded_cert_file=noded_cert_file,
2238 run_cmd_fn=run_cmd_fn,
2239 suffix=constants.SSHS_MASTER_SUFFIX)
2240 # Read newly created master key
2241 new_master_key_dict = _GetNewMasterKey(root_keyfiles, master_node_uuid)
2242
2243 # Replace master key in the master nodes' public key file
2244 ssh.RemovePublicKey(master_node_uuid, key_file=ganeti_pub_keys_file)
2245 for pub_key in new_master_key_dict[master_node_uuid]:
2246 ssh.AddPublicKey(master_node_uuid, pub_key, key_file=ganeti_pub_keys_file)
2247
2248 # Add new master key to all node's public and authorized keys
2249 logging.debug("Add new master key to all nodes.")
2250 node_errors = AddNodeSshKey(
2251 master_node_uuid, master_node_name, potential_master_candidates,
2252 to_authorized_keys=True, to_public_keys=True,
2253 get_public_keys=False, pub_key_file=ganeti_pub_keys_file,
2254 ssconf_store=ssconf_store, noded_cert_file=noded_cert_file,
2255 run_cmd_fn=run_cmd_fn)
2256 if node_errors:
2257 all_node_errors = all_node_errors + node_errors
2258
2259 # Remove the old key file and rename the new key to the non-temporary filename
2260 _ReplaceMasterKeyOnMaster(root_keyfiles)
2261
2262 # Remove old key from authorized keys
2263 (auth_key_file, _) = \
2264 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
2265 ssh.RemoveAuthorizedKeys(auth_key_file,
2266 old_master_keys_by_uuid[master_node_uuid])
2267
2268 # Remove the old key from all node's authorized keys file
2269 logging.debug("Remove the old master key from all nodes.")
2270 node_errors = RemoveNodeSshKey(
2271 master_node_uuid, master_node_name, master_candidate_uuids,
2272 potential_master_candidates,
2273 keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True,
2274 from_public_keys=False, clear_authorized_keys=False,
2275 clear_public_keys=False)
2276 if node_errors:
2277 all_node_errors = all_node_errors + node_errors
2278
2279 return all_node_errors
2280
2281
2282 def GetBlockDevSizes(devices):
2283 """Return the size of the given block devices
2284
2285 @type devices: list
2286 @param devices: list of block device nodes to query
2287 @rtype: dict
2288 @return:
2289 dictionary of all block devices under /dev (key). The value is their
2290 size in MiB.
2291
2292 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124}
2293
2294 """
2295 DEV_PREFIX = "/dev/"
2296 blockdevs = {}
2297
2298 for devpath in devices:
2299 if not utils.IsBelowDir(DEV_PREFIX, devpath):
2300 continue
2301
2302 try:
2303 st = os.stat(devpath)
2304 except EnvironmentError, err:
2305 logging.warning("Error stat()'ing device %s: %s", devpath, str(err))
2306 continue
2307
2308 if stat.S_ISBLK(st.st_mode):
2309 result = utils.RunCmd(["blockdev", "--getsize64", devpath])
2310 if result.failed:
2311 # We don't want to fail, just do not list this device as available
2312 logging.warning("Cannot get size for block device %s", devpath)
2313 continue
2314
2315 size = int(result.stdout) / (1024 * 1024)
2316 blockdevs[devpath] = size
2317 return blockdevs
2318
2319
2320 def GetVolumeList(vg_names):
2321 """Compute list of logical volumes and their size.
2322
2323 @type vg_names: list
2324 @param vg_names: the volume groups whose LVs we should list, or
2325 empty for all volume groups
2326 @rtype: dict
2327 @return:
2328 dictionary of all partions (key) with value being a tuple of
2329 their size (in MiB), inactive and online status::
2330
2331 {'xenvg/test1': ('20.06', True, True)}
2332
2333 in case of errors, a string is returned with the error
2334 details.
2335
2336 """
2337 lvs = {}
2338 sep = "|"
2339 if not vg_names:
2340 vg_names = []
2341 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
2342 "--separator=%s" % sep,
2343 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names)
2344 if result.failed:
2345 _Fail("Failed to list logical volumes, lvs output: %s", result.output)
2346
2347 for line in result.stdout.splitlines():
2348 line = line.strip()
2349 match = _LVSLINE_REGEX.match(line)
2350 if not match:
2351 logging.error("Invalid line returned from lvs output: '%s'", line)
2352 continue
2353 vg_name, name, size, attr = match.groups()
2354 inactive = attr[4] == "-"
2355 online = attr[5] == "o"
2356 virtual = attr[0] == "v"
2357 if virtual:
2358 # we don't want to report such volumes as existing, since they
2359 # don't really hold data
2360 continue
2361 lvs[vg_name + "/" + name] = (size, inactive, online)
2362
2363 return lvs
2364
2365
2366 def ListVolumeGroups():
2367 """List the volume groups and their size.
2368
2369 @rtype: dict
2370 @return: dictionary with keys volume name and values the
2371 size of the volume
2372
2373 """
2374 return utils.ListVolumeGroups()
2375
2376
2377 def NodeVolumes():
2378 """List all volumes on this node.
2379
2380 @rtype: list
2381 @return:
2382 A list of dictionaries, each having four keys:
2383 - name: the logical volume name,
2384 - size: the size of the logical volume
2385 - dev: the physical device on which the LV lives
2386 - vg: the volume group to which it belongs
2387
2388 In case of errors, we return an empty list and log the
2389 error.
2390
2391 Note that since a logical volume can live on multiple physical
2392 volumes, the resulting list might include a logical volume
2393 multiple times.
2394
2395 """
2396 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
2397 "--separator=|",
2398 "--options=lv_name,lv_size,devices,vg_name"])
2399 if result.failed:
2400 _Fail("Failed to list logical volumes, lvs output: %s",
2401 result.output)
2402
2403 def parse_dev(dev):
2404 return dev.split("(")[0]
2405
2406 def handle_dev(dev):
2407 return [parse_dev(x) for x in dev.split(",")]
2408
2409 def map_line(line):
2410 line = [v.strip() for v in line]
2411 return [{"name": line[0], "size": line[1],
2412 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])]
2413
2414 all_devs = []
2415 for line in result.stdout.splitlines():
2416 if line.count("|") >= 3:
2417 all_devs.extend(map_line(line.split("|")))
2418 else:
2419 logging.warning("Strange line in the output from lvs: '%s'", line)
2420 return all_devs
2421
2422
2423 def BridgesExist(bridges_list):
2424 """Check if a list of bridges exist on the current node.
2425
2426 @rtype: boolean
2427 @return: C{True} if all of them exist, C{False} otherwise
2428
2429 """
2430 missing = []
2431 for bridge in bridges_list:
2432 if not utils.BridgeExists(bridge):
2433 missing.append(bridge)
2434
2435 if missing:
2436 _Fail("Missing bridges %s", utils.CommaJoin(missing))
2437
2438
2439 def GetInstanceListForHypervisor(hname, hvparams=None,
2440 get_hv_fn=hypervisor.GetHypervisor):
2441 """Provides a list of instances of the given hypervisor.
2442
2443 @type hname: string
2444 @param hname: name of the hypervisor
2445 @type hvparams: dict of strings
2446 @param hvparams: hypervisor parameters for the given hypervisor
2447 @type get_hv_fn: function
2448 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
2449 name; optional parameter to increase testability
2450
2451 @rtype: list
2452 @return: a list of all running instances on the current node
2453 - instance1.example.com
2454 - instance2.example.com
2455
2456 """
2457 try:
2458 return get_hv_fn(hname).ListInstances(hvparams=hvparams)
2459 except errors.HypervisorError, err:
2460 _Fail("Error enumerating instances (hypervisor %s): %s",
2461 hname, err, exc=True)
2462
2463
2464 def GetInstanceList(hypervisor_list, all_hvparams=None,
2465 get_hv_fn=hypervisor.GetHypervisor):
2466 """Provides a list of instances.
2467
2468 @type hypervisor_list: list
2469 @param hypervisor_list: the list of hypervisors to query information
2470 @type all_hvparams: dict of dict of strings
2471 @param all_hvparams: a dictionary mapping hypervisor types to respective
2472 cluster-wide hypervisor parameters
2473 @type get_hv_fn: function
2474 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
2475 name; optional parameter to increase testability
2476
2477 @rtype: list
2478 @return: a list of all running instances on the current node
2479 - instance1.example.com
2480 - instance2.example.com
2481
2482 """
2483 results = []
2484 for hname in hypervisor_list:
2485 hvparams = all_hvparams[hname]
2486 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams,
2487 get_hv_fn=get_hv_fn))
2488 return results
2489
2490
2491 def GetInstanceInfo(instance, hname, hvparams=None):
2492 """Gives back the information about an instance as a dictionary.
2493
2494 @type instance: string
2495 @param instance: the instance name
2496 @type hname: string
2497 @param hname: the hypervisor type of the instance
2498 @type hvparams: dict of strings
2499 @param hvparams: the instance's hvparams
2500
2501 @rtype: dict
2502 @return: dictionary with the following keys:
2503 - memory: memory size of instance (int)
2504 - state: state of instance (HvInstanceState)
2505 - time: cpu time of instance (float)
2506 - vcpus: the number of vcpus (int)
2507
2508 """
2509 output = {}
2510
2511 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance,
2512 hvparams=hvparams)
2513 if iinfo is not None:
2514 output["memory"] = iinfo[2]
2515 output["vcpus"] = iinfo[3]
2516 output["state"] = iinfo[4]
2517 output["time"] = iinfo[5]
2518
2519 return output
2520
2521
2522 def GetInstanceMigratable(instance):
2523 """Computes whether an instance can be migrated.
2524
2525 @type instance: L{objects.Instance}
2526 @param instance: object representing the instance to be checked.
2527
2528 @rtype: tuple
2529 @return: tuple of (result, description) where:
2530 - result: whether the instance can be migrated or not
2531 - description: a description of the issue, if relevant
2532
2533 """
2534 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2535 iname = instance.name
2536 if iname not in hyper.ListInstances(hvparams=instance.hvparams):
2537 _Fail("Instance %s is not running", iname)
2538
2539 for idx in range(len(instance.disks_info)):
2540 link_name = _GetBlockDevSymlinkPath(iname, idx)
2541 if not os.path.islink(link_name):
2542 logging.warning("Instance %s is missing symlink %s for disk %d",
2543 iname, link_name, idx)
2544
2545
2546 def GetAllInstancesInfo(hypervisor_list, all_hvparams):
2547 """Gather data about all instances.
2548
2549 This is the equivalent of L{GetInstanceInfo}, except that it
2550 computes data for all instances at once, thus being faster if one
2551 needs data about more than one instance.
2552
2553 @type hypervisor_list: list
2554 @param hypervisor_list: list of hypervisors to query for instance data
2555 @type all_hvparams: dict of dict of strings
2556 @param all_hvparams: mapping of hypervisor names to hvparams
2557
2558 @rtype: dict
2559 @return: dictionary of instance: data, with data having the following keys:
2560 - memory: memory size of instance (int)
2561 - state: xen state of instance (string)
2562 - time: cpu time of instance (float)
2563 - vcpus: the number of vcpus
2564
2565 """
2566 output = {}
2567 for hname in hypervisor_list:
2568 hvparams = all_hvparams[hname]
2569 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams)
2570 if iinfo:
2571 for name, _, memory, vcpus, state, times in iinfo:
2572 value = {
2573 "memory": memory,
2574 "vcpus": vcpus,
2575 "state": state,
2576 "time": times,
2577 }
2578 if name in output:
2579 # we only check static parameters, like memory and vcpus,
2580 # and not state and time which can change between the
2581 # invocations of the different hypervisors
2582 for key in "memory", "vcpus":
2583 if value[key] != output[name][key]:
2584 _Fail("Instance %s is running twice"
2585 " with different parameters", name)
2586 output[name] = value
2587
2588 return output
2589
2590
2591 def GetInstanceConsoleInfo(instance_param_dict,
2592 get_hv_fn=hypervisor.GetHypervisor):
2593 """Gather data about the console access of a set of instances of this node.
2594
2595 This function assumes that the caller already knows which instances are on
2596 this node, by calling a function such as L{GetAllInstancesInfo} or
2597 L{GetInstanceList}.
2598
2599 For every instance, a large amount of configuration data needs to be
2600 provided to the hypervisor interface in order to receive the console
2601 information. Whether this could or should be cut down can be discussed.
2602 The information is provided in a dictionary indexed by instance name,
2603 allowing any number of instance queries to be done.
2604
2605 @type instance_param_dict: dict of string to tuple of dictionaries, where the
2606 dictionaries represent: L{objects.Instance}, L{objects.Node},
2607 L{objects.NodeGroup}, HvParams, BeParams
2608 @param instance_param_dict: mapping of instance name to parameters necessary
2609 for console information retrieval
2610
2611 @rtype: dict
2612 @return: dictionary of instance: data, with data having the following keys:
2613 - instance: instance name
2614 - kind: console kind
2615 - message: used with kind == CONS_MESSAGE, indicates console to be
2616 unavailable, supplies error message
2617 - host: host to connect to
2618 - port: port to use
2619 - user: user for login
2620 - command: the command, broken into parts as an array
2621 - display: unknown, potentially unused?
2622
2623 """
2624
2625 output = {}
2626 for inst_name in instance_param_dict:
2627 instance = instance_param_dict[inst_name]["instance"]
2628 pnode = instance_param_dict[inst_name]["node"]
2629 group = instance_param_dict[inst_name]["group"]
2630 hvparams = instance_param_dict[inst_name]["hvParams"]
2631 beparams = instance_param_dict[inst_name]["beParams"]
2632
2633 instance = objects.Instance.FromDict(instance)
2634 pnode = objects.Node.FromDict(pnode)
2635 group = objects.NodeGroup.FromDict(group)
2636
2637 h = get_hv_fn(instance.hypervisor)
2638 output[inst_name] = h.GetInstanceConsole(instance, pnode, group,
2639 hvparams, beparams).ToDict()
2640
2641 return output
2642
2643
2644 def _InstanceLogName(kind, os_name, instance, component):
2645 """Compute the OS log filename for a given instance and operation.
2646
2647 The instance name and os name are passed in as strings since not all
2648 operations have these as part of an instance object.
2649
2650 @type kind: string
2651 @param kind: the operation type (e.g. add, import, etc.)
2652 @type os_name: string
2653 @param os_name: the os name
2654 @type instance: string
2655 @param instance: the name of the instance being imported/added/etc.
2656 @type component: string or None
2657 @param component: the name of the component of the instance being
2658 transferred
2659
2660 """
2661 # TODO: Use tempfile.mkstemp to create unique filename
2662 if component:
2663 assert "/" not in component
2664 c_msg = "-%s" % component
2665 else:
2666 c_msg = ""
2667 base = ("%s-%s-%s%s-%s.log" %
2668 (kind, os_name, instance, c_msg, utils.TimestampForFilename()))
2669 return utils.PathJoin(pathutils.LOG_OS_DIR, base)
2670
2671
2672 def InstanceOsAdd(instance, reinstall, debug):
2673 """Add an OS to an instance.
2674
2675 @type instance: L{objects.Instance}
2676 @param instance: Instance whose OS is to be installed
2677 @type reinstall: boolean
2678 @param reinstall: whether this is an instance reinstall
2679 @type debug: integer
2680 @param debug: debug level, passed to the OS scripts
2681 @rtype: None
2682
2683 """
2684 inst_os = OSFromDisk(instance.os)
2685
2686 create_env = OSEnvironment(instance, inst_os, debug)
2687 if reinstall:
2688 create_env["INSTANCE_REINSTALL"] = "1"
2689
2690 logfile = _InstanceLogName("add", instance.os, instance.name, None)
2691
2692 result = utils.RunCmd([inst_os.create_script], env=create_env,
2693 cwd=inst_os.path, output=logfile, reset_env=True)
2694 if result.failed:
2695 logging.error("os create command '%s' returned error: %s, logfile: %s,"
2696 " output: %s", result.cmd, result.fail_reason, logfile,
2697 result.output)
2698 lines = [utils.SafeEncode(val)
2699 for val in utils.TailFile(logfile, lines=20)]
2700 _Fail("OS create script failed (%s), last lines in the"
2701 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
2702
2703
2704 def RunRenameInstance(instance, old_name, debug):
2705 """Run the OS rename script for an instance.
2706
2707 @type instance: L{objects.Instance}
2708 @param instance: Instance whose OS is to be installed
2709 @type old_name: string
2710 @param old_name: previous instance name
2711 @type debug: integer
2712 @param debug: debug level, passed to the OS scripts
2713 @rtype: boolean
2714 @return: the success of the operation
2715
2716 """
2717 inst_os = OSFromDisk(instance.os)
2718
2719 rename_env = OSEnvironment(instance, inst_os, debug)
2720 rename_env["OLD_INSTANCE_NAME"] = old_name
2721
2722 logfile = _InstanceLogName("rename", instance.os,
2723 "%s-%s" % (old_name, instance.name), None)
2724
2725 result = utils.RunCmd([inst_os.rename_script], env=rename_env,
2726 cwd=inst_os.path, output=logfile, reset_env=True)
2727
2728 if result.failed:
2729 logging.error("os create command '%s' returned error: %s output: %s",
2730 result.cmd, result.fail_reason, result.output)
2731 lines = [utils.SafeEncode(val)
2732 for val in utils.TailFile(logfile, lines=20)]
2733 _Fail("OS rename script failed (%s), last lines in the"
2734 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
2735
2736
2737 def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):
2738 """Returns symlink path for block device.
2739
2740 """
2741 if _dir is None:
2742 _dir = pathutils.DISK_LINKS_DIR
2743
2744 return utils.PathJoin(_dir,
2745 ("%s%s%s" %
2746 (instance_name, constants.DISK_SEPARATOR, idx)))
2747
2748
2749 def _SymlinkBlockDev(instance_name, device_path, idx):
2750 """Set up symlinks to a instance's block device.
2751
2752 This is an auxiliary function run when an instance is start (on the primary
2753 node) or when an instance is migrated (on the target node).
2754
2755
2756 @param instance_name: the name of the target instance
2757 @param device_path: path of the physical block device, on the node
2758 @param idx: the disk index
2759 @return: absolute path to the disk's symlink
2760
2761 """
2762 # In case we have only a userspace access URI, device_path is None
2763 if not device_path:
2764 return None
2765
2766 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
2767 try:
2768 os.symlink(device_path, link_name)
2769 except OSError, err:
2770 if err.errno == errno.EEXIST:
2771 if (not os.path.islink(link_name) or
2772 os.readlink(link_name) != device_path):
2773 os.remove(link_name)
2774 os.symlink(device_path, link_name)
2775 else:
2776 raise
2777
2778 return link_name
2779
2780
2781 def _RemoveBlockDevLinks(instance_name, disks):
2782 """Remove the block device symlinks belonging to the given instance.
2783
2784 """
2785 for idx, _ in enumerate(disks):
2786 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
2787 if os.path.islink(link_name):
2788 try:
2789 os.remove(link_name)
2790 except OSError:
2791 logging.exception("Can't remove symlink '%s'", link_name)
2792
2793
2794 def _CalculateDeviceURI(instance, disk, device):
2795 """Get the URI for the device.
2796
2797 @type instance: L{objects.Instance}
2798 @param instance: the instance which disk belongs to
2799 @type disk: L{objects.Disk}
2800 @param disk: the target disk object
2801 @type device: L{bdev.BlockDev}
2802 @param device: the corresponding BlockDevice
2803 @rtype: string
2804 @return: the device uri if any else None
2805
2806 """
2807 access_mode = disk.params.get(constants.LDP_ACCESS,
2808 constants.DISK_KERNELSPACE)
2809 if access_mode == constants.DISK_USERSPACE:
2810 # This can raise errors.BlockDeviceError
2811 return device.GetUserspaceAccessUri(instance.hypervisor)
2812 else:
2813 return None
2814
2815
2816 def _GatherAndLinkBlockDevs(instance):
2817 """Set up an instance's block device(s).
2818
2819 This is run on the primary node at instance startup. The block
2820 devices must be already assembled.
2821
2822 @type instance: L{objects.Instance}
2823 @param instance: the instance whose disks we should assemble
2824 @rtype: list
2825 @return: list of (disk_object, link_name, drive_uri)
2826
2827 """
2828 block_devices = []
2829 for idx, disk in enumerate(instance.disks_info):
2830 device = _RecursiveFindBD(disk)
2831 if device is None:
2832 raise errors.BlockDeviceError("Block device '%s' is not set up." %
2833 str(disk))
2834 device.Open()
2835 try:
2836 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx)
2837 except OSError, e:
2838 raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
2839 e.strerror)
2840 uri = _CalculateDeviceURI(instance, disk, device)
2841
2842 block_devices.append((disk, link_name, uri))
2843
2844 return block_devices
2845
2846
2847 def _IsInstanceUserDown(instance_info):
2848 return instance_info and \
2849 "state" in instance_info and \
2850 hv_base.HvInstanceState.IsShutdown(instance_info["state"])
2851
2852
2853 def _GetInstanceInfo(instance):
2854 """Helper function L{GetInstanceInfo}"""
2855 return GetInstanceInfo(instance.name, instance.hypervisor,
2856 hvparams=instance.hvparams)
2857
2858
2859 def StartInstance(instance, startup_paused, reason, store_reason=True):
2860 """Start an instance.
2861
2862 @type instance: L{objects.Instance}
2863 @param instance: the instance object
2864 @type startup_paused: bool
2865 @param instance: pause instance at startup?
2866 @type reason: list of reasons
2867 @param reason: the reason trail for this startup
2868 @type store_reason: boolean
2869 @param store_reason: whether to store the shutdown reason trail on file
2870 @rtype: None
2871
2872 """
2873 instance_info = _GetInstanceInfo(instance)
2874
2875 if instance_info and not _IsInstanceUserDown(instance_info):
2876 logging.info("Instance '%s' already running, not starting", instance.name)
2877 return
2878
2879 try:
2880 block_devices = _GatherAndLinkBlockDevs(instance)
2881 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2882 hyper.StartInstance(instance, block_devices, startup_paused)
2883 if store_reason:
2884 _StoreInstReasonTrail(instance.name, reason)
2885 except errors.BlockDeviceError, err:
2886 _Fail("Block device error: %s", err, exc=True)
2887 except errors.HypervisorError, err:
2888 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2889 _Fail("Hypervisor error: %s", err, exc=True)
2890
2891
2892 def InstanceShutdown(instance, timeout, reason, store_reason=True):
2893 """Shut an instance down.
2894
2895 @note: this functions uses polling with a hardcoded timeout.
2896
2897 @type instance: L{objects.Instance}
2898 @param instance: the instance object
2899 @type timeout: integer
2900 @param timeout: maximum timeout for soft shutdown
2901 @type reason: list of reasons
2902 @param reason: the reason trail for this shutdown
2903 @type store_reason: boolean
2904 @param store_reason: whether to store the shutdown reason trail on file
2905 @rtype: None
2906
2907 """
2908 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2909
2910 if not _GetInstanceInfo(instance):
2911 logging.info("Instance '%s' not running, doing nothing", instance.name)
2912 return
2913
2914 class _TryShutdown(object):
2915 def __init__(self):
2916 self.tried_once = False
2917
2918 def __call__(self):
2919 if not _GetInstanceInfo(instance):
2920 return
2921
2922 try:
2923 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout)
2924 if store_reason:
2925 _StoreInstReasonTrail(instance.name, reason)
2926 except errors.HypervisorError, err:
2927 # if the instance is no longer existing, consider this a
2928 # success and go to cleanup
2929 if not _GetInstanceInfo(instance):
2930 return
2931
2932 _Fail("Failed to stop instance '%s': %s", instance.name, err)
2933
2934 self.tried_once = True
2935
2936 raise utils.RetryAgain()
2937
2938 try:
2939 utils.Retry(_TryShutdown(), 5, timeout)
2940 except utils.RetryTimeout:
2941 # the shutdown did not succeed
2942 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name)
2943
2944 try:
2945 hyper.StopInstance(instance, force=True)
2946 except errors.HypervisorError, err:
2947 # only raise an error if the instance still exists, otherwise
2948 # the error could simply be "instance ... unknown"!
2949 if _GetInstanceInfo(instance):
2950 _Fail("Failed to force stop instance '%s': %s", instance.name, err)
2951
2952 time.sleep(1)
2953
2954 if _GetInstanceInfo(instance):
2955 _Fail("Could not shutdown instance '%s' even by destroy", instance.name)
2956
2957 try:
2958 hyper.CleanupInstance(instance.name)
2959 except errors.HypervisorError, err:
2960 logging.warning("Failed to execute post-shutdown cleanup step: %s", err)
2961
2962 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2963
2964
2965 def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):
2966 """Reboot an instance.
2967
2968 @type instance: L{objects.Instance}
2969 @param instance: the instance object to reboot
2970 @type reboot_type: str
2971 @param reboot_type: the type of reboot, one the following
2972 constants:
2973 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the
2974 instance OS, do not recreate the VM
2975 - L{constants.INSTANCE_REBOOT_HARD}: tear down and
2976 restart the VM (at the hypervisor level)
2977 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is
2978 not accepted here, since that mode is handled differently, in
2979 cmdlib, and translates into full stop and start of the
2980 instance (instead of a call_instance_reboot RPC)
2981 @type shutdown_timeout: integer
2982 @param shutdown_timeout: maximum timeout for soft shutdown
2983 @type reason: list of reasons
2984 @param reason: the reason trail for this reboot
2985 @rtype: None
2986
2987 """
2988 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown'
2989 # because those functions simply 'return' on error whereas this one
2990 # raises an exception with '_Fail'
2991 if not _GetInstanceInfo(instance):
2992 _Fail("Cannot reboot instance '%s' that is not running", instance.name)
2993
2994 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2995 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
2996 try:
2997 hyper.RebootInstance(instance)
2998 except errors.HypervisorError, err:
2999 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err)
3000 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
3001 try:
3002 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False)
3003 result = StartInstance(instance, False, reason, store_reason=False)
3004 _StoreInstReasonTrail(instance.name, reason)
3005 return result
3006 except errors.HypervisorError, err:
3007 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err)
3008 else:
3009 _Fail("Invalid reboot_type received: '%s'", reboot_type)
3010
3011
3012 def InstanceBalloonMemory(instance, memory):
3013 """Resize an instance's memory.
3014
3015 @type instance: L{objects.Instance}
3016 @param instance: the instance object
3017 @type memory: int
3018 @param memory: new memory amount in MB
3019 @rtype: None
3020
3021 """
3022 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3023 running = hyper.ListInstances(hvparams=instance.hvparams)
3024 if instance.name not in running:
3025 logging.info("Instance %s is not running, cannot balloon", instance.name)
3026 return
3027 try:
3028 hyper.BalloonInstanceMemory(instance, memory)
3029 except errors.HypervisorError, err:
3030 _Fail("Failed to balloon instance memory: %s", err, exc=True)
3031
3032
3033 def MigrationInfo(instance):
3034 """Gather information about an instance to be migrated.
3035
3036 @type instance: L{objects.Instance}
3037 @param instance: the instance definition
3038
3039 """
3040 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3041 try:
3042 info = hyper.MigrationInfo(instance)
3043 except errors.HypervisorError, err:
3044 _Fail("Failed to fetch migration information: %s", err, exc=True)
3045 return info
3046
3047
3048 def AcceptInstance(instance, info, target):
3049 """Prepare the node to accept an instance.
3050
3051 @type instance: L{objects.Instance}
3052 @param instance: the instance definition
3053 @type info: string/data (opaque)
3054 @param info: migration information, from the source node
3055 @type target: string
3056 @param target: target host (usually ip), on this node
3057
3058 """
3059 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3060 try:
3061 hyper.AcceptInstance(instance, info, target)
3062 except errors.HypervisorError, err:
3063 _Fail("Failed to accept instance: %s", err, exc=True)
3064
3065
3066 def FinalizeMigrationDst(instance, info, success):
3067 """Finalize any preparation to accept an instance.
3068
3069 @type instance: L{objects.Instance}
3070 @param instance: the instance definition
3071 @type info: string/data (opaque)
3072 @param info: migration information, from the source node
3073 @type success: boolean
3074 @param success: whether the migration was a success or a failure
3075
3076 """
3077 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3078 try:
3079 hyper.FinalizeMigrationDst(instance, info, success)
3080 except errors.HypervisorError, err:
3081 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)
3082
3083
3084 def MigrateInstance(cluster_name, instance, target, live):
3085 """Migrates an instance to another node.
3086
3087 @type cluster_name: string
3088 @param cluster_name: name of the cluster
3089 @type instance: L{objects.Instance}
3090 @param instance: the instance definition
3091 @type target: string
3092 @param target: the target node name
3093 @type live: boolean
3094 @param live: whether the migration should be done live or not (the
3095 interpretation of this parameter is left to the hypervisor)
3096 @raise RPCFail: if migration fails for some reason
3097
3098 """
3099 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3100
3101 try:
3102 hyper.MigrateInstance(cluster_name, instance, target, live)
3103 except errors.HypervisorError, err:
3104 _Fail("Failed to migrate instance: %s", err, exc=True)
3105
3106
3107 def FinalizeMigrationSource(instance, success, live):
3108 """Finalize the instance migration on the source node.
3109
3110 @type instance: L{objects.Instance}
3111 @param instance: the instance definition of the migrated instance
3112 @type success: bool
3113 @param success: whether the migration succeeded or not
3114 @type live: bool
3115 @param live: whether the user requested a live migration or not
3116 @raise RPCFail: If the execution fails for some reason
3117
3118 """
3119 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3120
3121 try:
3122 hyper.FinalizeMigrationSource(instance, success, live)
3123 except Exception, err: # pylint: disable=W0703
3124 _Fail("Failed to finalize the migration on the source node: %s", err,
3125 exc=True)
3126
3127
3128 def GetMigrationStatus(instance):
3129 """Get the migration status
3130
3131 @type instance: L{objects.Instance}
3132 @param instance: the instance that is being migrated
3133 @rtype: L{objects.MigrationStatus}
3134 @return: the status of the current migration (one of
3135 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional
3136 progress info that can be retrieved from the hypervisor
3137 @raise RPCFail: If the migration status cannot be retrieved
3138
3139 """
3140 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3141 try:
3142 return hyper.GetMigrationStatus(instance)
3143 except Exception, err: # pylint: disable=W0703
3144 _Fail("Failed to get migration status: %s", err, exc=True)
3145
3146
3147 def HotplugDevice(instance, action, dev_type, device, extra, seq):
3148 """Hotplug a device
3149
3150 Hotplug is currently supported only for KVM Hypervisor.
3151 @type instance: L{objects.Instance}
3152 @param instance: the instance to which we hotplug a device
3153 @type action: string
3154 @param action: the hotplug action to perform
3155 @type dev_type: string
3156 @param dev_type: the device type to hotplug
3157 @type device: either L{objects.NIC} or L{objects.Disk}
3158 @param device: the device object to hotplug
3159 @type extra: tuple
3160 @param extra: extra info used for disk hotplug (disk link, drive uri)
3161 @type seq: int
3162 @param seq: the index of the device from master perspective
3163 @raise RPCFail: in case instance does not have KVM hypervisor
3164
3165 """
3166 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3167 try:
3168 hyper.VerifyHotplugSupport(instance, action, dev_type)
3169 except errors.HotplugError, err:
3170 _Fail("Hotplug is not supported: %s", err)
3171
3172 if action == constants.HOTPLUG_ACTION_ADD:
3173 fn = hyper.HotAddDevice
3174 elif action == constants.HOTPLUG_ACTION_REMOVE:
3175 fn = hyper.HotDelDevice
3176 elif action == constants.HOTPLUG_ACTION_MODIFY:
3177 fn = hyper.HotModDevice
3178 else:
3179 assert action in constants.HOTPLUG_ALL_ACTIONS
3180
3181 return fn(instance, dev_type, device, extra, seq)
3182
3183
3184 def HotplugSupported(instance):
3185 """Checks if hotplug is generally supported.
3186
3187 """
3188 hyper = hypervisor.GetHypervisor(instance.hypervisor)
3189 try:
3190 hyper.HotplugSupported(instance)
3191 except errors.HotplugError, err:
3192 _Fail("Hotplug is not supported: %s", err)
3193
3194
3195 def ModifyInstanceMetadata(metadata):
3196 """Sends instance data to the metadata daemon.
3197
3198 Uses the Luxi transport layer to communicate with the metadata
3199 daemon configuration server. It starts the metadata daemon if it is
3200 not running.
3201 The daemon must be enabled during at configuration time.
3202
3203 @type metadata: dict
3204 @param metadata: instance metadata obtained by calling
3205 L{objects.Instance.ToDict} on an instance object
3206
3207 """
3208 if not constants.ENABLE_METAD:
3209 raise errors.ProgrammerError("The metadata deamon is disabled, yet"
3210 " ModifyInstanceMetadata has been called")
3211
3212 if not utils.IsDaemonAlive(constants.METAD):
3213 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD])
3214 if result.failed:
3215 raise errors.HypervisorError("Failed to start metadata daemon")
3216
3217 with contextlib.closing(metad.Client()) as client:
3218 client.UpdateConfig(metadata)
3219
3220
3221 def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):
3222 """Creates a block device for an instance.
3223
3224 @type disk: L{objects.Disk}
3225 @param disk: the object describing the disk we should create
3226 @type size: int
3227 @param size: the size of the physical underlying device, in MiB
3228 @type owner: str
3229 @param owner: the name of the instance for which disk is created,
3230 used for device cache data
3231 @type on_primary: boolean
3232 @param on_primary: indicates if it is the primary node or not
3233 @type info: string
3234 @param info: string that will be sent to the physical device
3235 creation, used for example to set (LVM) tags on LVs
3236 @type excl_stor: boolean
3237 @param excl_stor: Whether exclusive_storage is active
3238
3239 @return: the new unique_id of the device (this can sometime be
3240 computed only after creation), or None. On secondary nodes,
3241 it's not required to return anything.
3242
3243 """
3244 # TODO: remove the obsolete "size" argument
3245 # pylint: disable=W0613
3246 clist = []
3247 if disk.children:
3248 for child in disk.children:
3249 try:
3250 crdev = _RecursiveAssembleBD(child, owner, on_primary)
3251 except errors.BlockDeviceError, err:
3252 _Fail("Can't assemble device %s: %s", child, err)
3253 if on_primary or disk.AssembleOnSecondary():
3254 # we need the children open in case the device itself has to
3255 # be assembled
3256 try:
3257 # pylint: disable=E1103
3258 crdev.Open()
3259 except errors.BlockDeviceError, err:
3260 _Fail("Can't make child '%s' read-write: %s", child, err)
3261 clist.append(crdev)
3262
3263 try:
3264 device = bdev.Create(disk, clist, excl_stor)
3265 except errors.BlockDeviceError, err:
3266 _Fail("Can't create block device: %s", err)
3267
3268 if on_primary or disk.AssembleOnSecondary():
3269 try:
3270 device.Assemble()
3271 except errors.BlockDeviceError, err:
3272 _Fail("Can't assemble device after creation, unusual event: %s", err)
3273 if on_primary or disk.OpenOnSecondary():
3274 try:
3275 device.Open(force=True)
3276 except errors.BlockDeviceError, err:
3277 _Fail("Can't make device r/w after creation, unusual event: %s", err)
3278 DevCacheManager.UpdateCache(device.dev_path, owner,
3279 on_primary, disk.iv_name)
3280
3281 device.SetInfo(info)
3282
3283 return device.unique_id
3284
3285
3286 def _DumpDevice(source_path, target_path, offset, size, truncate):
3287 """This function images/wipes the device using a local file.
3288
3289 @type source_path: string
3290 @param source_path: path of the image or data source (e.g., "/dev/zero")
3291
3292 @type target_path: string
3293 @param target_path: path of the device to image/wipe
3294
3295 @type offset: int
3296 @param offset: offset in MiB in the output file
3297
3298 @type size: int
3299 @param size: maximum size in MiB to write (data source might be smaller)
3300
3301 @type truncate: bool
3302 @param truncate: whether the file should be truncated
3303
3304 @return: None
3305 @raise RPCFail: in case of failure
3306
3307 """
3308 # Internal sizes are always in Mebibytes; if the following "dd" command
3309 # should use a different block size the offset and size given to this
3310 # function must be adjusted accordingly before being passed to "dd".
3311 block_size = constants.DD_BLOCK_SIZE
3312
3313 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset,
3314 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path,
3315 "count=%d" % size]
3316
3317 if not truncate:
3318 cmd.append("conv=notrunc")
3319
3320 result = utils.RunCmd(cmd)
3321
3322 if result.failed:
3323 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd,
3324 result.fail_reason, result.output)
3325
3326
3327 def _DownloadAndDumpDevice(source_url, target_path, size):
3328 """This function images a device using a downloaded image file.
3329
3330 @type source_url: string
3331 @param source_url: URL of image to dump to disk
3332
3333 @type target_path: string
3334 @param target_path: path of the device to image
3335
3336 @type size: int
3337 @param size: maximum size in MiB to write (data source might be smaller)
3338
3339 @rtype: NoneType
3340 @return: None
3341 @raise RPCFail: in case of download or write failures
3342
3343 """
3344 class DDParams(object):
3345 def __init__(self, current_size, total_size):
3346 self.current_size = current_size
3347 self.total_size = total_size
3348 self.image_size_error = False
3349
3350 def dd_write(ddparams, out):
3351 if ddparams.current_size < ddparams.total_size:
3352 ddparams.current_size += len(out)
3353 target_file.write(out)
3354 else:
3355 ddparams.image_size_error = True
3356 return -1
3357
3358 target_file = open(target_path, "r+")
3359 ddparams = DDParams(0, 1024 * 1024 * size)
3360
3361 curl = pycurl.Curl()
3362 curl.setopt(pycurl.VERBOSE, True)
3363 curl.setopt(pycurl.NOSIGNAL, True)
3364 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION)
3365 curl.setopt(pycurl.URL, source_url)
3366 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out))
3367
3368 try:
3369 curl.perform()
3370 except pycurl.error:
3371 if ddparams.image_size_error:
3372 _Fail("Disk image larger than the disk")
3373 else:
3374 raise
3375
3376 target_file.close()
3377
3378
3379 def BlockdevConvert(src_disk, target_disk):
3380 """Copies data from source block device to target.
3381
3382 This function gets the export and import commands from the source and
3383 target devices respectively, and then concatenates them to a single
3384 command using a pipe ("|"). Finally, executes the unified command that
3385 will transfer the data between the devices during the disk template
3386 conversion operation.
3387
3388 @type src_disk: L{objects.Disk}
3389 @param src_disk: the disk object we want to copy from
3390 @type target_disk: L{objects.Disk}
3391 @param target_disk: the disk object we want to copy to
3392
3393 @rtype: NoneType
3394 @return: None
3395 @raise RPCFail: in case of failure
3396
3397 """
3398 src_dev = _RecursiveFindBD(src_disk)
3399 if src_dev is None:
3400 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid)
3401
3402 dest_dev = _RecursiveFindBD(target_disk)
3403 if dest_dev is None:
3404 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid)
3405
3406 src_cmd = src_dev.Export()
3407 dest_cmd = dest_dev.Import()
3408 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd),
3409 utils.ShellQuoteArgs(dest_cmd))
3410
3411 result = utils.RunCmd(command)
3412 if result.failed:
3413 _Fail("Disk conversion command '%s' exited with error: %s; output: %s",
3414 result.cmd, result.fail_reason, result.output)
3415
3416
3417 def BlockdevWipe(disk, offset, size):
3418 """Wipes a block device.
3419
3420 @type disk: L{objects.Disk}
3421 @param disk: the disk object we want to wipe
3422 @type offset: int
3423 @param offset: The offset in MiB in the file
3424 @type size: int
3425 @param size: The size in MiB to write
3426
3427 """
3428 try:
3429 rdev = _RecursiveFindBD(disk)
3430 except errors.BlockDeviceError:
3431 rdev = None
3432
3433 if not rdev:
3434 _Fail("Cannot wipe device %s: device not found", disk.iv_name)
3435 if offset < 0:
3436 _Fail("Negative offset")
3437 if size < 0:
3438 _Fail("Negative size")
3439 if offset > rdev.size:
3440 _Fail("Wipe offset is bigger than device size")
3441 if (offset + size) > rdev.size:
3442 _Fail("Wipe offset and size are bigger than device size")
3443
3444 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)
3445
3446
3447 def BlockdevImage(disk, image, size):
3448 """Images a block device either by dumping a local file or
3449 downloading a URL.
3450
3451 @type disk: L{objects.Disk}
3452 @param disk: the disk object we want to image
3453
3454 @type image: string
3455 @param image: file path to the disk image be dumped
3456
3457 @type size: int
3458 @param size: The size in MiB to write
3459
3460 @rtype: NoneType
3461 @return: None
3462 @raise RPCFail: in case of failure
3463
3464 """
3465 if not (utils.IsUrl(image) or os.path.exists(image)):
3466 _Fail("Image '%s' not found", image)
3467
3468 try:
3469 rdev = _RecursiveFindBD(disk)
3470 except errors.BlockDeviceError:
3471 rdev = None
3472
3473 if not rdev:
3474 _Fail("Cannot image device %s: device not found", disk.iv_name)
3475 if size < 0:
3476 _Fail("Negative size")
3477 if size > rdev.size:
3478 _Fail("Image size is bigger than device size")
3479
3480 if utils.IsUrl(image):
3481 _DownloadAndDumpDevice(image, rdev.dev_path, size)
3482 else:
3483 _DumpDevice(image, rdev.dev_path, 0, size, False)
3484
3485
3486 def BlockdevPauseResumeSync(disks, pause):
3487 """Pause or resume the sync of the block device.
3488
3489 @type disks: list of L{objects.Disk}
3490 @param disks: the disks object we want to pause/resume
3491 @type pause: bool
3492 @param pause: Wheater to pause or resume
3493
3494 """
3495 success = []
3496 for disk in disks:
3497 try:
3498 rdev = _RecursiveFindBD(disk)
3499 except errors.BlockDeviceError:
3500 rdev = None
3501
3502 if not rdev:
3503 success.append((False, ("Cannot change sync for device %s:"
3504 " device not found" % disk.iv_name)))
3505 continue
3506
3507 result = rdev.PauseResumeSync(pause)
3508
3509 if result:
3510 success.append((result, None))
3511 else:
3512 if pause:
3513 msg = "Pause"
3514 else:
3515 msg = "Resume"
3516 success.append((result, "%s for device %s failed" % (msg, disk.iv_name)))
3517
3518 return success
3519
3520
3521 def BlockdevRemove(disk):
3522 """Remove a block device.
3523
3524 @note: This is intended to be called recursively.
3525
3526 @type disk: L{objects.Disk}
3527 @param disk: the disk object we should remove
3528 @rtype: boolean
3529 @return: the success of the operation
3530
3531 """
3532 msgs = []
3533 try:
3534 rdev = _RecursiveFindBD(disk)
3535 except errors.BlockDeviceError, err:
3536 # probably can't attach
3537 logging.info("Can't attach to device %s in remove", disk)
3538 rdev = None
3539 if rdev is not None:
3540 r_path = rdev.dev_path
3541
3542 def _TryRemove():
3543 try:
3544 rdev.Remove()
3545 return []
3546 except errors.BlockDeviceError, err:
3547 return [str(err)]
3548
3549 msgs.extend(utils.SimpleRetry([], _TryRemove,
3550 constants.DISK_REMOVE_RETRY_INTERVAL,
3551 constants.DISK_REMOVE_RETRY_TIMEOUT))
3552
3553 if not msgs:
3554 DevCacheManager.RemoveCache(r_path)
3555
3556 if disk.children:
3557 for child in disk.children:
3558 try:
3559 BlockdevRemove(child)
3560 except RPCFail, err:
3561 msgs.append(str(err))
3562
3563 if msgs:
3564 _Fail("; ".join(msgs))
3565
3566
3567 def _RecursiveAssembleBD(disk, owner, as_primary):
3568 """Activate a block device for an instance.
3569
3570 This is run on the primary and secondary nodes for an instance.
3571
3572 @note: this function is called recursively.
3573
3574 @type disk: L{objects.Disk}
3575 @param disk: the disk we try to assemble
3576 @type owner: str
3577 @param owner: the name of the instance which owns the disk
3578 @type as_primary: boolean
3579 @param as_primary: if we should make the block device
3580 read/write
3581
3582 @return: the assembled device or None (in case no device
3583 was assembled)
3584 @raise errors.BlockDeviceError: in case there is an error
3585 during the activation of the children or the device
3586 itself
3587
3588 """
3589 children = []
3590 if disk.children:
3591 mcn = disk.ChildrenNeeded()
3592 if mcn == -1:
3593 mcn = 0 # max number of Nones allowed
3594 else:
3595 mcn = len(disk.children) - mcn # max number of Nones
3596 for chld_disk in disk.children:
3597 try:
3598 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
3599 except errors.BlockDeviceError, err:
3600 if children.count(None) >= mcn:
3601 raise
3602 cdev = None
3603 logging.error("Error in child activation (but continuing): %s",
3604 str(err))
3605 children.append(cdev)
3606
3607 if as_primary or disk.AssembleOnSecondary():
3608 r_dev = bdev.Assemble(disk, children)
3609 result = r_dev
3610 if as_primary or disk.OpenOnSecondary():
3611 r_dev.Open()
3612 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
3613 as_primary, disk.iv_name)
3614
3615 else:
3616 result = True
3617 return result
3618
3619
3620 def BlockdevAssemble(disk, instance, as_primary, idx):
3621 """Activate a block device for an instance.
3622
3623 This is a wrapper over _RecursiveAssembleBD.
3624
3625 @rtype: str or boolean
3626 @return: a tuple with the C{/dev/...} path and the created symlink
3627 for primary nodes, and (C{True}, C{True}) for secondary nodes
3628
3629 """
3630 try:
3631 result = _RecursiveAssembleBD(disk, instance.name, as_primary)
3632 if isinstance(result, BlockDev):
3633 # pylint: disable=E1103
3634 dev_path = result.dev_path
3635 link_name = None
3636 uri = None
3637 if as_primary:
3638 link_name = _SymlinkBlockDev(instance.name, dev_path, idx)
3639 uri = _CalculateDeviceURI(instance, disk, result)
3640 elif result:
3641 return result, result
3642 else:
3643 _Fail("Unexpected result from _RecursiveAssembleBD")
3644 except errors.BlockDeviceError, err:
3645 _Fail("Error while assembling disk: %s", err, exc=True)
3646 except OSError, err:
3647 _Fail("Error while symlinking disk: %s", err, exc=True)
3648
3649 return dev_path, link_name, uri
3650
3651
3652 def BlockdevShutdown(disk):
3653 """Shut down a block device.
3654
3655 First, if the device is assembled (Attach() is successful), then
3656 the device is shutdown. Then the children of the device are
3657 shutdown.
3658
3659 This function is called recursively. Note that we don't cache the
3660 children or such, as oppossed to assemble, shutdown of different
3661 devices doesn't require that the upper device was active.
3662
3663 @type disk: L{objects.Disk}
3664 @param disk: the description of the disk we should
3665 shutdown
3666 @rtype: None
3667
3668 """
3669 msgs = []
3670 r_dev = _RecursiveFindBD(disk)
3671 if r_dev is not None:
3672 r_path = r_dev.dev_path
3673 try:
3674 r_dev.Shutdown()
3675 DevCacheManager.RemoveCache(r_path)
3676 except errors.BlockDeviceError, err:
3677 msgs.append(str(err))
3678
3679 if disk.children:
3680 for child in disk.children:
3681 try:
3682 BlockdevShutdown(child)
3683 except RPCFail, err:
3684 msgs.append(str(err))
3685
3686 if msgs:
3687 _Fail("; ".join(msgs))
3688
3689
3690 def BlockdevAddchildren(parent_cdev, new_cdevs):
3691 """Extend a mirrored block device.
3692
3693 @type parent_cdev: L{objects.Disk}
3694 @param parent_cdev: the disk to which we should add children
3695 @type new_cdevs: list of L{objects.Disk}
3696 @param new_cdevs: the list of children which we should add
3697 @rtype: None
3698
3699 """
3700 parent_bdev = _RecursiveFindBD(parent_cdev)
3701 if parent_bdev is None:
3702 _Fail("Can't find parent device '%s' in add children", parent_cdev)
3703 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
3704 if new_bdevs.count(None) > 0:
3705 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs)
3706 parent_bdev.AddChildren(new_bdevs)
3707
3708
3709 def BlockdevRemovechildren(parent_cdev, new_cdevs):
3710 """Shrink a mirrored block device.
3711
3712 @type parent_cdev: L{objects.Disk}
3713 @param parent_cdev: the disk from which we should remove children
3714 @type new_cdevs: list of L{objects.Disk}
3715 @param new_cdevs: the list of children which we should remove
3716 @rtype: None
3717
3718 """
3719 parent_bdev = _RecursiveFindBD(parent_cdev)
3720 if parent_bdev is None:
3721 _Fail("Can't find parent device '%s' in remove children", parent_cdev)
3722 devs = []
3723 for disk in new_cdevs:
3724 rpath = disk.StaticDevPath()
3725 if rpath is None:
3726 bd = _RecursiveFindBD(disk)
3727 if bd is None:
3728 _Fail("Can't find device %s while removing children", disk)
3729 else:
3730 devs.append(bd.dev_path)
3731 else:
3732 if not utils.IsNormAbsPath(rpath):
3733 _Fail("Strange path returned from StaticDevPath: '%s'", rpath)
3734 devs.append(rpath)
3735 parent_bdev.RemoveChildren(devs)
3736
3737
3738 def BlockdevGetmirrorstatus(disks):
3739 """Get the mirroring status of a list of devices.
3740
3741 @type disks: list of L{objects.Disk}
3742 @param disks: the list of disks which we should query
3743 @rtype: disk
3744 @return: List of L{objects.BlockDevStatus}, one for each disk
3745 @raise errors.BlockDeviceError: if any of the disks cannot be
3746 found
3747
3748 """
3749 stats = []
3750 for dsk in disks:
3751 rbd = _RecursiveFindBD(dsk)
3752 if rbd is None:
3753 _Fail("Can't find device %s", dsk)
3754
3755 stats.append(rbd.CombinedSyncStatus())
3756
3757 return stats
3758
3759
3760 def BlockdevGetmirrorstatusMulti(disks):
3761 """Get the mirroring status of a list of devices.
3762
3763 @type disks: list of L{objects.Disk}
3764 @param disks: the list of disks which we should query
3765 @rtype: disk
3766 @return: List of tuples, (bool, status), one for each disk; bool denotes
3767 success/failure, status is L{objects.BlockDevStatus} on success, string
3768 otherwise
3769
3770 """
3771 result = []
3772 for disk in disks:
3773 try:
3774 rbd = _RecursiveFindBD(disk)
3775 if rbd is None:
3776 result.append((False, "Can't find device %s" % disk))
3777 continue
3778
3779 status = rbd.CombinedSyncStatus()
3780 except errors.BlockDeviceError, err:
3781 logging.exception("Error while getting disk status")
3782 result.append((False, str(err)))
3783 else:
3784 result.append((True, status))
3785
3786 assert len(disks) == len(result)
3787
3788 return result
3789
3790
3791 def _RecursiveFindBD(disk):
3792 """Check if a device is activated.
3793
3794 If so, return information about the real device.
3795
3796 @type disk: L{objects.Disk}
3797 @param disk: the disk object we need to find
3798
3799 @return: None if the device can't be found,
3800 otherwise the device instance
3801
3802 """
3803 children = []
3804 if disk.children:
3805 for chdisk in disk.children:
3806 children.append(_RecursiveFindBD(chdisk))
3807
3808 return bdev.FindDevice(disk, children)
3809
3810
3811 def _OpenRealBD(disk):
3812 """Opens the underlying block device of a disk.
3813
3814 @type disk: L{objects.Disk}
3815 @param disk: the disk object we want to open
3816
3817 """
3818 real_disk = _RecursiveFindBD(disk)
3819 if real_disk is None:
3820 _Fail("Block device '%s' is not set up", disk)
3821
3822 real_disk.Open()
3823
3824 return real_disk
3825
3826
3827 def BlockdevFind(disk):
3828 """Check if a device is activated.
3829
3830 If it is, return information about the real device.
3831
3832 @type disk: L{objects.Disk}
3833 @param disk: the disk to find
3834 @rtype: None or objects.BlockDevStatus
3835 @return: None if the disk cannot be found, otherwise a the current
3836 information
3837
3838 """
3839 try:
3840 rbd = _RecursiveFindBD(disk)
3841 except errors.BlockDeviceError, err:
3842 _Fail("Failed to find device: %s", err, exc=True)
3843
3844 if rbd is None:
3845 return None
3846
3847 return rbd.GetSyncStatus()
3848
3849
3850 def BlockdevGetdimensions(disks):
3851 """Computes the size of the given disks.
3852
3853 If a disk is not found, returns None instead.
3854
3855 @type disks: list of L{objects.Disk}
3856 @param disks: the list of disk to compute the size for
3857 @rtype: list
3858 @return: list with elements None if the disk cannot be found,
3859 otherwise the pair (size, spindles), where spindles is None if the
3860 device doesn't support that
3861
3862 """
3863 result = []
3864 for cf in disks:
3865 try:
3866 rbd = _RecursiveFindBD(cf)
3867 except errors.BlockDeviceError:
3868 result.append(None)
3869 continue
3870 if rbd is None:
3871 result.append(None)
3872 else:
3873 result.append(rbd.GetActualDimensions())
3874 return result
3875
3876
3877 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
3878 """Write a file to the filesystem.
3879
3880 This allows the master to overwrite(!) a file. It will only perform
3881 the operation if the file belongs to a list of configuration files.
3882
3883 @type file_name: str
3884 @param file_name: the target file name
3885 @type data: str
3886 @param data: the new contents of the file
3887 @type mode: int
3888 @param mode: the mode to give the file (can be None)
3889 @type uid: string
3890 @param uid: the owner of the file
3891 @type gid: string
3892 @param gid: the group of the file
3893 @type atime: float
3894 @param atime: the atime to set on the file (can be None)
3895 @type mtime: float
3896 @param mtime: the mtime to set on the file (can be None)
3897 @rtype: None
3898
3899 """
3900 file_name = vcluster.LocalizeVirtualPath(file_name)
3901
3902 if not os.path.isabs(file_name):
3903 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name)
3904
3905 if file_name not in _ALLOWED_UPLOAD_FILES:
3906 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'",
3907 file_name)
3908
3909 raw_data = _Decompress(data)
3910
3911 if not (isinstance(uid, basestring) and isinstance(gid, basestring)):
3912 _Fail("Invalid username/groupname type")
3913
3914 getents = runtime.GetEnts()
3915 uid = getents.LookupUser(uid)
3916 gid = getents.LookupGroup(gid)
3917
3918 utils.SafeWriteFile(file_name, None,
3919 data=raw_data, mode=mode, uid=uid, gid=gid,
3920 atime=atime, mtime=mtime)
3921
3922
3923 def RunOob(oob_program, command, node, timeout):
3924 """Executes oob_program with given command on given node.
3925
3926 @param oob_program