Renew SSH keys and upgrade
[ganeti-github.git] / lib / backend.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Functions used by the node daemon
32
33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
34 the L{UploadFile} function
35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted
36 in the L{_CleanDirectory} function
37
38 """
39
40 # pylint: disable=E1103,C0302
41
42 # E1103: %s %r has no %r member (but some types could not be
43 # inferred), because the _TryOSFromDisk returns either (True, os_obj)
44 # or (False, "string") which confuses pylint
45
46 # C0302: This module has become too big and should be split up
47
48
49 import base64
50 import errno
51 import logging
52 import os
53 import os.path
54 import pycurl
55 import random
56 import re
57 import shutil
58 import signal
59 import socket
60 import stat
61 import tempfile
62 import time
63 import zlib
64 import copy
65
66 from ganeti import errors
67 from ganeti import http
68 from ganeti import utils
69 from ganeti import ssh
70 from ganeti import hypervisor
71 from ganeti.hypervisor import hv_base
72 from ganeti import constants
73 from ganeti.storage import bdev
74 from ganeti.storage import drbd
75 from ganeti.storage import extstorage
76 from ganeti.storage import filestorage
77 from ganeti import objects
78 from ganeti import ssconf
79 from ganeti import serializer
80 from ganeti import netutils
81 from ganeti import runtime
82 from ganeti import compat
83 from ganeti import pathutils
84 from ganeti import vcluster
85 from ganeti import ht
86 from ganeti.storage.base import BlockDev
87 from ganeti.storage.drbd import DRBD8
88 from ganeti import hooksmaster
89 from ganeti.rpc import transport
90 from ganeti.rpc.errors import NoMasterError, TimeoutError
91
92
93 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
94 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([
95 pathutils.DATA_DIR,
96 pathutils.JOB_QUEUE_ARCHIVE_DIR,
97 pathutils.QUEUE_DIR,
98 pathutils.CRYPTO_KEYS_DIR,
99 ])
100 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60
101 _X509_KEY_FILE = "key"
102 _X509_CERT_FILE = "cert"
103 _IES_STATUS_FILE = "status"
104 _IES_PID_FILE = "pid"
105 _IES_CA_FILE = "ca"
106
107 #: Valid LVS output line regex
108 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$")
109
110 # Actions for the master setup script
111 _MASTER_START = "start"
112 _MASTER_STOP = "stop"
113
114 #: Maximum file permissions for restricted command directory and executables
115 _RCMD_MAX_MODE = (stat.S_IRWXU |
116 stat.S_IRGRP | stat.S_IXGRP |
117 stat.S_IROTH | stat.S_IXOTH)
118
119 #: Delay before returning an error for restricted commands
120 _RCMD_INVALID_DELAY = 10
121
122 #: How long to wait to acquire lock for restricted commands (shorter than
123 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many
124 #: command requests arrive
125 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8
126
127
128 class RPCFail(Exception):
129 """Class denoting RPC failure.
130
131 Its argument is the error message.
132
133 """
134
135
136 def _GetInstReasonFilename(instance_name):
137 """Path of the file containing the reason of the instance status change.
138
139 @type instance_name: string
140 @param instance_name: The name of the instance
141 @rtype: string
142 @return: The path of the file
143
144 """
145 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)
146
147
148 def _StoreInstReasonTrail(instance_name, trail):
149 """Serialize a reason trail related to an instance change of state to file.
150
151 The exact location of the file depends on the name of the instance and on
152 the configuration of the Ganeti cluster defined at deploy time.
153
154 @type instance_name: string
155 @param instance_name: The name of the instance
156
157 @type trail: list of reasons
158 @param trail: reason trail
159
160 @rtype: None
161
162 """
163 json = serializer.DumpJson(trail)
164 filename = _GetInstReasonFilename(instance_name)
165 utils.WriteFile(filename, data=json)
166
167
168 def _Fail(msg, *args, **kwargs):
169 """Log an error and the raise an RPCFail exception.
170
171 This exception is then handled specially in the ganeti daemon and
172 turned into a 'failed' return type. As such, this function is a
173 useful shortcut for logging the error and returning it to the master
174 daemon.
175
176 @type msg: string
177 @param msg: the text of the exception
178 @raise RPCFail
179
180 """
181 if args:
182 msg = msg % args
183 if "log" not in kwargs or kwargs["log"]: # if we should log this error
184 if "exc" in kwargs and kwargs["exc"]:
185 logging.exception(msg)
186 else:
187 logging.error(msg)
188 raise RPCFail(msg)
189
190
191 def _GetConfig():
192 """Simple wrapper to return a SimpleStore.
193
194 @rtype: L{ssconf.SimpleStore}
195 @return: a SimpleStore instance
196
197 """
198 return ssconf.SimpleStore()
199
200
201 def _GetSshRunner(cluster_name):
202 """Simple wrapper to return an SshRunner.
203
204 @type cluster_name: str
205 @param cluster_name: the cluster name, which is needed
206 by the SshRunner constructor
207 @rtype: L{ssh.SshRunner}
208 @return: an SshRunner instance
209
210 """
211 return ssh.SshRunner(cluster_name)
212
213
214 def _Decompress(data):
215 """Unpacks data compressed by the RPC client.
216
217 @type data: list or tuple
218 @param data: Data sent by RPC client
219 @rtype: str
220 @return: Decompressed data
221
222 """
223 assert isinstance(data, (list, tuple))
224 assert len(data) == 2
225 (encoding, content) = data
226 if encoding == constants.RPC_ENCODING_NONE:
227 return content
228 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64:
229 return zlib.decompress(base64.b64decode(content))
230 else:
231 raise AssertionError("Unknown data encoding")
232
233
234 def _CleanDirectory(path, exclude=None):
235 """Removes all regular files in a directory.
236
237 @type path: str
238 @param path: the directory to clean
239 @type exclude: list
240 @param exclude: list of files to be excluded, defaults
241 to the empty list
242
243 """
244 if path not in _ALLOWED_CLEAN_DIRS:
245 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'",
246 path)
247
248 if not os.path.isdir(path):
249 return
250 if exclude is None:
251 exclude = []
252 else:
253 # Normalize excluded paths
254 exclude = [os.path.normpath(i) for i in exclude]
255
256 for rel_name in utils.ListVisibleFiles(path):
257 full_name = utils.PathJoin(path, rel_name)
258 if full_name in exclude:
259 continue
260 if os.path.isfile(full_name) and not os.path.islink(full_name):
261 utils.RemoveFile(full_name)
262
263
264 def _BuildUploadFileList():
265 """Build the list of allowed upload files.
266
267 This is abstracted so that it's built only once at module import time.
268
269 """
270 allowed_files = set([
271 pathutils.CLUSTER_CONF_FILE,
272 pathutils.ETC_HOSTS,
273 pathutils.SSH_KNOWN_HOSTS_FILE,
274 pathutils.VNC_PASSWORD_FILE,
275 pathutils.RAPI_CERT_FILE,
276 pathutils.SPICE_CERT_FILE,
277 pathutils.SPICE_CACERT_FILE,
278 pathutils.RAPI_USERS_FILE,
279 pathutils.CONFD_HMAC_KEY,
280 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
281 ])
282
283 for hv_name in constants.HYPER_TYPES:
284 hv_class = hypervisor.GetHypervisorClass(hv_name)
285 allowed_files.update(hv_class.GetAncillaryFiles()[0])
286
287 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \
288 "Allowed file storage paths should never be uploaded via RPC"
289
290 return frozenset(allowed_files)
291
292
293 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()
294
295
296 def JobQueuePurge():
297 """Removes job queue files and archived jobs.
298
299 @rtype: tuple
300 @return: True, None
301
302 """
303 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE])
304 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)
305
306
307 def GetMasterNodeName():
308 """Returns the master node name.
309
310 @rtype: string
311 @return: name of the master node
312 @raise RPCFail: in case of errors
313
314 """
315 try:
316 return _GetConfig().GetMasterNode()
317 except errors.ConfigurationError, err:
318 _Fail("Cluster configuration incomplete: %s", err, exc=True)
319
320
321 def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):
322 """Decorator that runs hooks before and after the decorated function.
323
324 @type hook_opcode: string
325 @param hook_opcode: opcode of the hook
326 @type hooks_path: string
327 @param hooks_path: path of the hooks
328 @type env_builder_fn: function
329 @param env_builder_fn: function that returns a dictionary containing the
330 environment variables for the hooks. Will get all the parameters of the
331 decorated function.
332 @raise RPCFail: in case of pre-hook failure
333
334 """
335 def decorator(fn):
336 def wrapper(*args, **kwargs):
337 _, myself = ssconf.GetMasterAndMyself()
338 nodes = ([myself], [myself]) # these hooks run locally
339
340 env_fn = compat.partial(env_builder_fn, *args, **kwargs)
341
342 cfg = _GetConfig()
343 hr = HooksRunner()
344 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes,
345 hr.RunLocalHooks, None, env_fn, None,
346 logging.warning, cfg.GetClusterName(),
347 cfg.GetMasterNode())
348 hm.RunPhase(constants.HOOKS_PHASE_PRE)
349 result = fn(*args, **kwargs)
350 hm.RunPhase(constants.HOOKS_PHASE_POST)
351
352 return result
353 return wrapper
354 return decorator
355
356
357 def _BuildMasterIpEnv(master_params, use_external_mip_script=None):
358 """Builds environment variables for master IP hooks.
359
360 @type master_params: L{objects.MasterNetworkParameters}
361 @param master_params: network parameters of the master
362 @type use_external_mip_script: boolean
363 @param use_external_mip_script: whether to use an external master IP
364 address setup script (unused, but necessary per the implementation of the
365 _RunLocalHooks decorator)
366
367 """
368 # pylint: disable=W0613
369 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family)
370 env = {
371 "MASTER_NETDEV": master_params.netdev,
372 "MASTER_IP": master_params.ip,
373 "MASTER_NETMASK": str(master_params.netmask),
374 "CLUSTER_IP_VERSION": str(ver),
375 }
376
377 return env
378
379
380 def _RunMasterSetupScript(master_params, action, use_external_mip_script):
381 """Execute the master IP address setup script.
382
383 @type master_params: L{objects.MasterNetworkParameters}
384 @param master_params: network parameters of the master
385 @type action: string
386 @param action: action to pass to the script. Must be one of
387 L{backend._MASTER_START} or L{backend._MASTER_STOP}
388 @type use_external_mip_script: boolean
389 @param use_external_mip_script: whether to use an external master IP
390 address setup script
391 @raise backend.RPCFail: if there are errors during the execution of the
392 script
393
394 """
395 env = _BuildMasterIpEnv(master_params)
396
397 if use_external_mip_script:
398 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT
399 else:
400 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT
401
402 result = utils.RunCmd([setup_script, action], env=env, reset_env=True)
403
404 if result.failed:
405 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" %
406 (action, result.exit_code, result.output), log=True)
407
408
409 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup",
410 _BuildMasterIpEnv)
411 def ActivateMasterIp(master_params, use_external_mip_script):
412 """Activate the IP address of the master daemon.
413
414 @type master_params: L{objects.MasterNetworkParameters}
415 @param master_params: network parameters of the master
416 @type use_external_mip_script: boolean
417 @param use_external_mip_script: whether to use an external master IP
418 address setup script
419 @raise RPCFail: in case of errors during the IP startup
420
421 """
422 _RunMasterSetupScript(master_params, _MASTER_START,
423 use_external_mip_script)
424
425
426 def StartMasterDaemons(no_voting):
427 """Activate local node as master node.
428
429 The function will start the master daemons (ganeti-masterd and ganeti-rapi).
430
431 @type no_voting: boolean
432 @param no_voting: whether to start ganeti-masterd without a node vote
433 but still non-interactively
434 @rtype: None
435
436 """
437
438 if no_voting:
439 masterd_args = "--no-voting --yes-do-it"
440 else:
441 masterd_args = ""
442
443 env = {
444 "EXTRA_MASTERD_ARGS": masterd_args,
445 }
446
447 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env)
448 if result.failed:
449 msg = "Can't start Ganeti master: %s" % result.output
450 logging.error(msg)
451 _Fail(msg)
452
453
454 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown",
455 _BuildMasterIpEnv)
456 def DeactivateMasterIp(master_params, use_external_mip_script):
457 """Deactivate the master IP on this node.
458
459 @type master_params: L{objects.MasterNetworkParameters}
460 @param master_params: network parameters of the master
461 @type use_external_mip_script: boolean
462 @param use_external_mip_script: whether to use an external master IP
463 address setup script
464 @raise RPCFail: in case of errors during the IP turndown
465
466 """
467 _RunMasterSetupScript(master_params, _MASTER_STOP,
468 use_external_mip_script)
469
470
471 def StopMasterDaemons():
472 """Stop the master daemons on this node.
473
474 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node.
475
476 @rtype: None
477
478 """
479 # TODO: log and report back to the caller the error failures; we
480 # need to decide in which case we fail the RPC for this
481
482 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"])
483 if result.failed:
484 logging.error("Could not stop Ganeti master, command %s had exitcode %s"
485 " and error %s",
486 result.cmd, result.exit_code, result.output)
487
488
489 def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):
490 """Change the netmask of the master IP.
491
492 @param old_netmask: the old value of the netmask
493 @param netmask: the new value of the netmask
494 @param master_ip: the master IP
495 @param master_netdev: the master network device
496
497 """
498 if old_netmask == netmask:
499 return
500
501 if not netutils.IPAddress.Own(master_ip):
502 _Fail("The master IP address is not up, not attempting to change its"
503 " netmask")
504
505 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add",
506 "%s/%s" % (master_ip, netmask),
507 "dev", master_netdev, "label",
508 "%s:0" % master_netdev])
509 if result.failed:
510 _Fail("Could not set the new netmask on the master IP address")
511
512 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del",
513 "%s/%s" % (master_ip, old_netmask),
514 "dev", master_netdev, "label",
515 "%s:0" % master_netdev])
516 if result.failed:
517 _Fail("Could not bring down the master IP address with the old netmask")
518
519
520 def EtcHostsModify(mode, host, ip):
521 """Modify a host entry in /etc/hosts.
522
523 @param mode: The mode to operate. Either add or remove entry
524 @param host: The host to operate on
525 @param ip: The ip associated with the entry
526
527 """
528 if mode == constants.ETC_HOSTS_ADD:
529 if not ip:
530 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not"
531 " present")
532 utils.AddHostToEtcHosts(host, ip)
533 elif mode == constants.ETC_HOSTS_REMOVE:
534 if ip:
535 RPCFail("Mode 'remove' does not allow 'ip' parameter, but"
536 " parameter is present")
537 utils.RemoveHostFromEtcHosts(host)
538 else:
539 RPCFail("Mode not supported")
540
541
542 def LeaveCluster(modify_ssh_setup):
543 """Cleans up and remove the current node.
544
545 This function cleans up and prepares the current node to be removed
546 from the cluster.
547
548 If processing is successful, then it raises an
549 L{errors.QuitGanetiException} which is used as a special case to
550 shutdown the node daemon.
551
552 @param modify_ssh_setup: boolean
553
554 """
555 _CleanDirectory(pathutils.DATA_DIR)
556 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR)
557 JobQueuePurge()
558
559 if modify_ssh_setup:
560 try:
561 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER)
562
563 ssh.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
564
565 utils.RemoveFile(priv_key)
566 utils.RemoveFile(pub_key)
567 except errors.OpExecError:
568 logging.exception("Error while processing ssh files")
569
570 try:
571 utils.RemoveFile(pathutils.CONFD_HMAC_KEY)
572 utils.RemoveFile(pathutils.RAPI_CERT_FILE)
573 utils.RemoveFile(pathutils.SPICE_CERT_FILE)
574 utils.RemoveFile(pathutils.SPICE_CACERT_FILE)
575 utils.RemoveFile(pathutils.NODED_CERT_FILE)
576 except: # pylint: disable=W0702
577 logging.exception("Error while removing cluster secrets")
578
579 utils.StopDaemon(constants.CONFD)
580 utils.StopDaemon(constants.KVMD)
581
582 # Raise a custom exception (handled in ganeti-noded)
583 raise errors.QuitGanetiException(True, "Shutdown scheduled")
584
585
586 def _CheckStorageParams(params, num_params):
587 """Performs sanity checks for storage parameters.
588
589 @type params: list
590 @param params: list of storage parameters
591 @type num_params: int
592 @param num_params: expected number of parameters
593
594 """
595 if params is None:
596 raise errors.ProgrammerError("No storage parameters for storage"
597 " reporting is provided.")
598 if not isinstance(params, list):
599 raise errors.ProgrammerError("The storage parameters are not of type"
600 " list: '%s'" % params)
601 if not len(params) == num_params:
602 raise errors.ProgrammerError("Did not receive the expected number of"
603 "storage parameters: expected %s,"
604 " received '%s'" % (num_params, len(params)))
605
606
607 def _CheckLvmStorageParams(params):
608 """Performs sanity check for the 'exclusive storage' flag.
609
610 @see: C{_CheckStorageParams}
611
612 """
613 _CheckStorageParams(params, 1)
614 excl_stor = params[0]
615 if not isinstance(params[0], bool):
616 raise errors.ProgrammerError("Exclusive storage parameter is not"
617 " boolean: '%s'." % excl_stor)
618 return excl_stor
619
620
621 def _GetLvmVgSpaceInfo(name, params):
622 """Wrapper around C{_GetVgInfo} which checks the storage parameters.
623
624 @type name: string
625 @param name: name of the volume group
626 @type params: list
627 @param params: list of storage parameters, which in this case should be
628 containing only one for exclusive storage
629
630 """
631 excl_stor = _CheckLvmStorageParams(params)
632 return _GetVgInfo(name, excl_stor)
633
634
635 def _GetVgInfo(
636 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):
637 """Retrieves information about a LVM volume group.
638
639 """
640 # TODO: GetVGInfo supports returning information for multiple VGs at once
641 vginfo = info_fn([name], excl_stor)
642 if vginfo:
643 vg_free = int(round(vginfo[0][0], 0))
644 vg_size = int(round(vginfo[0][1], 0))
645 else:
646 vg_free = None
647 vg_size = None
648
649 return {
650 "type": constants.ST_LVM_VG,
651 "name": name,
652 "storage_free": vg_free,
653 "storage_size": vg_size,
654 }
655
656
657 def _GetLvmPvSpaceInfo(name, params):
658 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks.
659
660 @see: C{_GetLvmVgSpaceInfo}
661
662 """
663 excl_stor = _CheckLvmStorageParams(params)
664 return _GetVgSpindlesInfo(name, excl_stor)
665
666
667 def _GetVgSpindlesInfo(
668 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):
669 """Retrieves information about spindles in an LVM volume group.
670
671 @type name: string
672 @param name: VG name
673 @type excl_stor: bool
674 @param excl_stor: exclusive storage
675 @rtype: dict
676 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name,
677 free spindles, total spindles respectively
678
679 """
680 if excl_stor:
681 (vg_free, vg_size) = info_fn(name)
682 else:
683 vg_free = 0
684 vg_size = 0
685 return {
686 "type": constants.ST_LVM_PV,
687 "name": name,
688 "storage_free": vg_free,
689 "storage_size": vg_size,
690 }
691
692
693 def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):
694 """Retrieves node information from a hypervisor.
695
696 The information returned depends on the hypervisor. Common items:
697
698 - vg_size is the size of the configured volume group in MiB
699 - vg_free is the free size of the volume group in MiB
700 - memory_dom0 is the memory allocated for domain0 in MiB
701 - memory_free is the currently available (free) ram in MiB
702 - memory_total is the total number of ram in MiB
703 - hv_version: the hypervisor version, if available
704
705 @type hvparams: dict of string
706 @param hvparams: the hypervisor's hvparams
707
708 """
709 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)
710
711
712 def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):
713 """Retrieves node information for all hypervisors.
714
715 See C{_GetHvInfo} for information on the output.
716
717 @type hv_specs: list of pairs (string, dict of strings)
718 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
719
720 """
721 if hv_specs is None:
722 return None
723
724 result = []
725 for hvname, hvparams in hv_specs:
726 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn))
727 return result
728
729
730 def _GetNamedNodeInfo(names, fn):
731 """Calls C{fn} for all names in C{names} and returns a dictionary.
732
733 @rtype: None or dict
734
735 """
736 if names is None:
737 return None
738 else:
739 return map(fn, names)
740
741
742 def GetNodeInfo(storage_units, hv_specs):
743 """Gives back a hash with different information about the node.
744
745 @type storage_units: list of tuples (string, string, list)
746 @param storage_units: List of tuples (storage unit, identifier, parameters) to
747 ask for disk space information. In case of lvm-vg, the identifier is
748 the VG name. The parameters can contain additional, storage-type-specific
749 parameters, for example exclusive storage for lvm storage.
750 @type hv_specs: list of pairs (string, dict of strings)
751 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
752 @rtype: tuple; (string, None/dict, None/dict)
753 @return: Tuple containing boot ID, volume group information and hypervisor
754 information
755
756 """
757 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
758 storage_info = _GetNamedNodeInfo(
759 storage_units,
760 (lambda (storage_type, storage_key, storage_params):
761 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params)))
762 hv_info = _GetHvInfoAll(hv_specs)
763 return (bootid, storage_info, hv_info)
764
765
766 def _GetFileStorageSpaceInfo(path, params):
767 """Wrapper around filestorage.GetSpaceInfo.
768
769 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo
770 and ignore the *args parameter to not leak it into the filestorage
771 module's code.
772
773 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the
774 parameters.
775
776 """
777 _CheckStorageParams(params, 0)
778 return filestorage.GetFileStorageSpaceInfo(path)
779
780
781 # FIXME: implement storage reporting for all missing storage types.
782 _STORAGE_TYPE_INFO_FN = {
783 constants.ST_BLOCK: None,
784 constants.ST_DISKLESS: None,
785 constants.ST_EXT: None,
786 constants.ST_FILE: _GetFileStorageSpaceInfo,
787 constants.ST_LVM_PV: _GetLvmPvSpaceInfo,
788 constants.ST_LVM_VG: _GetLvmVgSpaceInfo,
789 constants.ST_SHARED_FILE: None,
790 constants.ST_GLUSTER: None,
791 constants.ST_RADOS: None,
792 }
793
794
795 def _ApplyStorageInfoFunction(storage_type, storage_key, *args):
796 """Looks up and applies the correct function to calculate free and total
797 storage for the given storage type.
798
799 @type storage_type: string
800 @param storage_type: the storage type for which the storage shall be reported.
801 @type storage_key: string
802 @param storage_key: identifier of a storage unit, e.g. the volume group name
803 of an LVM storage unit
804 @type args: any
805 @param args: various parameters that can be used for storage reporting. These
806 parameters and their semantics vary from storage type to storage type and
807 are just propagated in this function.
808 @return: the results of the application of the storage space function (see
809 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that
810 storage type
811 @raises NotImplementedError: for storage types who don't support space
812 reporting yet
813 """
814 fn = _STORAGE_TYPE_INFO_FN[storage_type]
815 if fn is not None:
816 return fn(storage_key, *args)
817 else:
818 raise NotImplementedError
819
820
821 def _CheckExclusivePvs(pvi_list):
822 """Check that PVs are not shared among LVs
823
824 @type pvi_list: list of L{objects.LvmPvInfo} objects
825 @param pvi_list: information about the PVs
826
827 @rtype: list of tuples (string, list of strings)
828 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...])
829
830 """
831 res = []
832 for pvi in pvi_list:
833 if len(pvi.lv_list) > 1:
834 res.append((pvi.name, pvi.lv_list))
835 return res
836
837
838 def _VerifyHypervisors(what, vm_capable, result, all_hvparams,
839 get_hv_fn=hypervisor.GetHypervisor):
840 """Verifies the hypervisor. Appends the results to the 'results' list.
841
842 @type what: C{dict}
843 @param what: a dictionary of things to check
844 @type vm_capable: boolean
845 @param vm_capable: whether or not this node is vm capable
846 @type result: dict
847 @param result: dictionary of verification results; results of the
848 verifications in this function will be added here
849 @type all_hvparams: dict of dict of string
850 @param all_hvparams: dictionary mapping hypervisor names to hvparams
851 @type get_hv_fn: function
852 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
853
854 """
855 if not vm_capable:
856 return
857
858 if constants.NV_HYPERVISOR in what:
859 result[constants.NV_HYPERVISOR] = {}
860 for hv_name in what[constants.NV_HYPERVISOR]:
861 hvparams = all_hvparams[hv_name]
862 try:
863 val = get_hv_fn(hv_name).Verify(hvparams=hvparams)
864 except errors.HypervisorError, err:
865 val = "Error while checking hypervisor: %s" % str(err)
866 result[constants.NV_HYPERVISOR][hv_name] = val
867
868
869 def _VerifyHvparams(what, vm_capable, result,
870 get_hv_fn=hypervisor.GetHypervisor):
871 """Verifies the hvparams. Appends the results to the 'results' list.
872
873 @type what: C{dict}
874 @param what: a dictionary of things to check
875 @type vm_capable: boolean
876 @param vm_capable: whether or not this node is vm capable
877 @type result: dict
878 @param result: dictionary of verification results; results of the
879 verifications in this function will be added here
880 @type get_hv_fn: function
881 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
882
883 """
884 if not vm_capable:
885 return
886
887 if constants.NV_HVPARAMS in what:
888 result[constants.NV_HVPARAMS] = []
889 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]:
890 try:
891 logging.info("Validating hv %s, %s", hv_name, hvparms)
892 get_hv_fn(hv_name).ValidateParameters(hvparms)
893 except errors.HypervisorError, err:
894 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))
895
896
897 def _VerifyInstanceList(what, vm_capable, result, all_hvparams):
898 """Verifies the instance list.
899
900 @type what: C{dict}
901 @param what: a dictionary of things to check
902 @type vm_capable: boolean
903 @param vm_capable: whether or not this node is vm capable
904 @type result: dict
905 @param result: dictionary of verification results; results of the
906 verifications in this function will be added here
907 @type all_hvparams: dict of dict of string
908 @param all_hvparams: dictionary mapping hypervisor names to hvparams
909
910 """
911 if constants.NV_INSTANCELIST in what and vm_capable:
912 # GetInstanceList can fail
913 try:
914 val = GetInstanceList(what[constants.NV_INSTANCELIST],
915 all_hvparams=all_hvparams)
916 except RPCFail, err:
917 val = str(err)
918 result[constants.NV_INSTANCELIST] = val
919
920
921 def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):
922 """Verifies the node info.
923
924 @type what: C{dict}
925 @param what: a dictionary of things to check
926 @type vm_capable: boolean
927 @param vm_capable: whether or not this node is vm capable
928 @type result: dict
929 @param result: dictionary of verification results; results of the
930 verifications in this function will be added here
931 @type all_hvparams: dict of dict of string
932 @param all_hvparams: dictionary mapping hypervisor names to hvparams
933
934 """
935 if constants.NV_HVINFO in what and vm_capable:
936 hvname = what[constants.NV_HVINFO]
937 hyper = hypervisor.GetHypervisor(hvname)
938 hvparams = all_hvparams[hvname]
939 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)
940
941
942 def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):
943 """Verify the existance and validity of the client SSL certificate.
944
945 """
946 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates"
947 if not os.path.exists(cert_file):
948 return (constants.CV_ERROR,
949 "The client certificate does not exist. Run '%s' to create"
950 " client certificates for all nodes." % create_cert_cmd)
951
952 (errcode, msg) = utils.VerifyCertificate(cert_file)
953 if errcode is not None:
954 return (errcode, msg)
955 else:
956 # if everything is fine, we return the digest to be compared to the config
957 return (None, utils.GetCertificateDigest(cert_filename=cert_file))
958
959
960 def _VerifySshSetup(node_status_list, my_name):
961 """Verifies the state of the SSH key files.
962
963 @type node_status_list: list of tuples
964 @param node_status_list: list of nodes of the cluster associated with a
965 couple of flags: (uuid, name, is_master_candidate,
966 is_potential_master_candidate, online)
967 @type my_name: str
968 @param my_name: name of this node
969
970 """
971 if node_status_list is None:
972 return ["No node list to check against the pub_key_file received."]
973
974 my_status_list = [(my_uuid, name, mc, pot_mc) for (my_uuid, name, mc, pot_mc)
975 in node_status_list if name == my_name]
976 if len(my_status_list) == 0:
977 return ["Cannot find node information for node '%s'." % my_name]
978 (my_uuid, _, _, potential_master_candidate) = \
979 my_status_list[0]
980
981 result = []
982 pot_mc_uuids = [uuid for (uuid, _, _, _) in node_status_list]
983 pub_keys = ssh.QueryPubKeyFile(None)
984
985 if potential_master_candidate:
986 # Check that the set of potential master candidates matches the
987 # public key file
988 pub_uuids_set = set(pub_keys.keys())
989 pot_mc_uuids_set = set(pot_mc_uuids)
990 missing_uuids = set([])
991 if pub_uuids_set != pot_mc_uuids_set:
992 unknown_uuids = pub_uuids_set - pot_mc_uuids_set
993 if unknown_uuids:
994 result.append("The following node UUIDs are listed in the public key"
995 " file on node '%s', but are not potential master"
996 " candidates: %s."
997 % (my_name, ", ".join(list(unknown_uuids))))
998 missing_uuids = pot_mc_uuids_set - pub_uuids_set
999 if missing_uuids:
1000 result.append("The following node UUIDs of potential master candidates"
1001 " are missing in the public key file on node %s: %s."
1002 % (my_name, ", ".join(list(missing_uuids))))
1003
1004 (_, key_files) = \
1005 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1006 my_keys = pub_keys[my_uuid]
1007 num_keys = 0
1008 for (key_type, (_, pub_key_file)) in key_files.items():
1009 try:
1010 pub_key = utils.ReadFile(pub_key_file)
1011 if pub_key.strip() not in my_keys:
1012 result.append("The %s key of node %s does not match this node's keys"
1013 " in the pub key file." % (key_type, my_name))
1014 num_keys += 1
1015 except IOError:
1016 # There might not be keys of every type.
1017 pass
1018 if num_keys != len(my_keys):
1019 result.append("The number of keys for node %s in the public key file"
1020 " (%s) does not match the number of keys on the node"
1021 " (%s)." % (my_name, len(my_keys), len(key_files)))
1022 else:
1023 if len(pub_keys.keys()) > 0:
1024 result.append("The public key file of node '%s' is not empty, although"
1025 " the node is not a potential master candidate."
1026 % my_name)
1027
1028 # Check that all master candidate keys are in the authorized_keys file
1029 (auth_key_file, _) = \
1030 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1031 for (uuid, name, mc, _) in node_status_list:
1032 if uuid in missing_uuids:
1033 continue
1034 if mc:
1035 for key in pub_keys[uuid]:
1036 if not ssh.HasAuthorizedKey(auth_key_file, key):
1037 result.append("A SSH key of master candidate '%s' (UUID: '%s') is"
1038 " not in the 'authorized_keys' file of node '%s'."
1039 % (name, uuid, my_name))
1040 else:
1041 for key in pub_keys[uuid]:
1042 if name != my_name and ssh.HasAuthorizedKey(auth_key_file, key):
1043 result.append("A SSH key of normal node '%s' (UUID: '%s') is in the"
1044 " 'authorized_keys' file of node '%s'."
1045 % (name, uuid, my_name))
1046 if name == my_name and not ssh.HasAuthorizedKey(auth_key_file, key):
1047 result.append("A SSH key of normal node '%s' (UUID: '%s') is not"
1048 " in the 'authorized_keys' file of itself."
1049 % (my_name, uuid))
1050
1051 return result
1052
1053
1054 def VerifyNode(what, cluster_name, all_hvparams, node_groups, groups_cfg):
1055 """Verify the status of the local node.
1056
1057 Based on the input L{what} parameter, various checks are done on the
1058 local node.
1059
1060 If the I{filelist} key is present, this list of
1061 files is checksummed and the file/checksum pairs are returned.
1062
1063 If the I{nodelist} key is present, we check that we have
1064 connectivity via ssh with the target nodes (and check the hostname
1065 report).
1066
1067 If the I{node-net-test} key is present, we check that we have
1068 connectivity to the given nodes via both primary IP and, if
1069 applicable, secondary IPs.
1070
1071 @type what: C{dict}
1072 @param what: a dictionary of things to check:
1073 - filelist: list of files for which to compute checksums
1074 - nodelist: list of nodes we should check ssh communication with
1075 - node-net-test: list of nodes we should check node daemon port
1076 connectivity with
1077 - hypervisor: list with hypervisors to run the verify for
1078 @type cluster_name: string
1079 @param cluster_name: the cluster's name
1080 @type all_hvparams: dict of dict of strings
1081 @param all_hvparams: a dictionary mapping hypervisor names to hvparams
1082 @type node_groups: a dict of strings
1083 @param node_groups: node _names_ mapped to their group uuids (it's enough to
1084 have only those nodes that are in `what["nodelist"]`)
1085 @type groups_cfg: a dict of dict of strings
1086 @param groups_cfg: a dictionary mapping group uuids to their configuration
1087 @rtype: dict
1088 @return: a dictionary with the same keys as the input dict, and
1089 values representing the result of the checks
1090
1091 """
1092 result = {}
1093 my_name = netutils.Hostname.GetSysName()
1094 port = netutils.GetDaemonPort(constants.NODED)
1095 vm_capable = my_name not in what.get(constants.NV_VMNODES, [])
1096
1097 _VerifyHypervisors(what, vm_capable, result, all_hvparams)
1098 _VerifyHvparams(what, vm_capable, result)
1099
1100 if constants.NV_FILELIST in what:
1101 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath,
1102 what[constants.NV_FILELIST]))
1103 result[constants.NV_FILELIST] = \
1104 dict((vcluster.MakeVirtualPath(key), value)
1105 for (key, value) in fingerprints.items())
1106
1107 if constants.NV_CLIENT_CERT in what:
1108 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate()
1109
1110 if constants.NV_SSH_SETUP in what:
1111 result[constants.NV_SSH_SETUP] = \
1112 _VerifySshSetup(what[constants.NV_SSH_SETUP], my_name)
1113
1114 if constants.NV_NODELIST in what:
1115 (nodes, bynode, mcs) = what[constants.NV_NODELIST]
1116
1117 # Add nodes from other groups (different for each node)
1118 try:
1119 nodes.extend(bynode[my_name])
1120 except KeyError:
1121 pass
1122
1123 # Use a random order
1124 random.shuffle(nodes)
1125
1126 # Try to contact all nodes
1127 val = {}
1128 for node in nodes:
1129 params = groups_cfg.get(node_groups.get(node))
1130 ssh_port = params["ndparams"].get(constants.ND_SSH_PORT)
1131 logging.debug("Ssh port %s (None = default) for node %s",
1132 str(ssh_port), node)
1133
1134 # We only test if master candidates can communicate to other nodes.
1135 # We cannot test if normal nodes cannot communicate with other nodes,
1136 # because the administrator might have installed additional SSH keys,
1137 # over which Ganeti has no power.
1138 if my_name in mcs:
1139 success, message = _GetSshRunner(cluster_name). \
1140 VerifyNodeHostname(node, ssh_port)
1141 if not success:
1142 val[node] = message
1143
1144 result[constants.NV_NODELIST] = val
1145
1146 if constants.NV_NODENETTEST in what:
1147 result[constants.NV_NODENETTEST] = tmp = {}
1148 my_pip = my_sip = None
1149 for name, pip, sip in what[constants.NV_NODENETTEST]:
1150 if name == my_name:
1151 my_pip = pip
1152 my_sip = sip
1153 break
1154 if not my_pip:
1155 tmp[my_name] = ("Can't find my own primary/secondary IP"
1156 " in the node list")
1157 else:
1158 for name, pip, sip in what[constants.NV_NODENETTEST]:
1159 fail = []
1160 if not netutils.TcpPing(pip, port, source=my_pip):
1161 fail.append("primary")
1162 if sip != pip:
1163 if not netutils.TcpPing(sip, port, source=my_sip):
1164 fail.append("secondary")
1165 if fail:
1166 tmp[name] = ("failure using the %s interface(s)" %
1167 " and ".join(fail))
1168
1169 if constants.NV_MASTERIP in what:
1170 # FIXME: add checks on incoming data structures (here and in the
1171 # rest of the function)
1172 master_name, master_ip = what[constants.NV_MASTERIP]
1173 if master_name == my_name:
1174 source = constants.IP4_ADDRESS_LOCALHOST
1175 else:
1176 source = None
1177 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port,
1178 source=source)
1179
1180 if constants.NV_USERSCRIPTS in what:
1181 result[constants.NV_USERSCRIPTS] = \
1182 [script for script in what[constants.NV_USERSCRIPTS]
1183 if not utils.IsExecutable(script)]
1184
1185 if constants.NV_OOB_PATHS in what:
1186 result[constants.NV_OOB_PATHS] = tmp = []
1187 for path in what[constants.NV_OOB_PATHS]:
1188 try:
1189 st = os.stat(path)
1190 except OSError, err:
1191 tmp.append("error stating out of band helper: %s" % err)
1192 else:
1193 if stat.S_ISREG(st.st_mode):
1194 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR:
1195 tmp.append(None)
1196 else:
1197 tmp.append("out of band helper %s is not executable" % path)
1198 else:
1199 tmp.append("out of band helper %s is not a file" % path)
1200
1201 if constants.NV_LVLIST in what and vm_capable:
1202 try:
1203 val = GetVolumeList([what[constants.NV_LVLIST]])
1204 except RPCFail, err:
1205 val = str(err)
1206 result[constants.NV_LVLIST] = val
1207
1208 _VerifyInstanceList(what, vm_capable, result, all_hvparams)
1209
1210 if constants.NV_VGLIST in what and vm_capable:
1211 result[constants.NV_VGLIST] = utils.ListVolumeGroups()
1212
1213 if constants.NV_PVLIST in what and vm_capable:
1214 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what
1215 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
1216 filter_allocatable=False,
1217 include_lvs=check_exclusive_pvs)
1218 if check_exclusive_pvs:
1219 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val)
1220 for pvi in val:
1221 # Avoid sending useless data on the wire
1222 pvi.lv_list = []
1223 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val)
1224
1225 if constants.NV_VERSION in what:
1226 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
1227 constants.RELEASE_VERSION)
1228
1229 _VerifyNodeInfo(what, vm_capable, result, all_hvparams)
1230
1231 if constants.NV_DRBDVERSION in what and vm_capable:
1232 try:
1233 drbd_version = DRBD8.GetProcInfo().GetVersionString()
1234 except errors.BlockDeviceError, err:
1235 logging.warning("Can't get DRBD version", exc_info=True)
1236 drbd_version = str(err)
1237 result[constants.NV_DRBDVERSION] = drbd_version
1238
1239 if constants.NV_DRBDLIST in what and vm_capable:
1240 try:
1241 used_minors = drbd.DRBD8.GetUsedDevs()
1242 except errors.BlockDeviceError, err:
1243 logging.warning("Can't get used minors list", exc_info=True)
1244 used_minors = str(err)
1245 result[constants.NV_DRBDLIST] = used_minors
1246
1247 if constants.NV_DRBDHELPER in what and vm_capable:
1248 status = True
1249 try:
1250 payload = drbd.DRBD8.GetUsermodeHelper()
1251 except errors.BlockDeviceError, err:
1252 logging.error("Can't get DRBD usermode helper: %s", str(err))
1253 status = False
1254 payload = str(err)
1255 result[constants.NV_DRBDHELPER] = (status, payload)
1256
1257 if constants.NV_NODESETUP in what:
1258 result[constants.NV_NODESETUP] = tmpr = []
1259 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
1260 tmpr.append("The sysfs filesytem doesn't seem to be mounted"
1261 " under /sys, missing required directories /sys/block"
1262 " and /sys/class/net")
1263 if (not os.path.isdir("/proc/sys") or
1264 not os.path.isfile("/proc/sysrq-trigger")):
1265 tmpr.append("The procfs filesystem doesn't seem to be mounted"
1266 " under /proc, missing required directory /proc/sys and"
1267 " the file /proc/sysrq-trigger")
1268
1269 if constants.NV_TIME in what:
1270 result[constants.NV_TIME] = utils.SplitTime(time.time())
1271
1272 if constants.NV_OSLIST in what and vm_capable:
1273 result[constants.NV_OSLIST] = DiagnoseOS()
1274
1275 if constants.NV_BRIDGES in what and vm_capable:
1276 result[constants.NV_BRIDGES] = [bridge
1277 for bridge in what[constants.NV_BRIDGES]
1278 if not utils.BridgeExists(bridge)]
1279
1280 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name:
1281 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \
1282 filestorage.ComputeWrongFileStoragePaths()
1283
1284 if what.get(constants.NV_FILE_STORAGE_PATH):
1285 pathresult = filestorage.CheckFileStoragePath(
1286 what[constants.NV_FILE_STORAGE_PATH])
1287 if pathresult:
1288 result[constants.NV_FILE_STORAGE_PATH] = pathresult
1289
1290 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH):
1291 pathresult = filestorage.CheckFileStoragePath(
1292 what[constants.NV_SHARED_FILE_STORAGE_PATH])
1293 if pathresult:
1294 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult
1295
1296 return result
1297
1298
1299 def GetCryptoTokens(token_requests):
1300 """Perform actions on the node's cryptographic tokens.
1301
1302 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented
1303 for 'ssl'. Action 'get' returns the digest of the public client ssl
1304 certificate. Action 'create' creates a new client certificate and private key
1305 and also returns the digest of the certificate. The third parameter of a
1306 token request are optional parameters for the actions, so far only the
1307 filename is supported.
1308
1309 @type token_requests: list of tuples of (string, string, dict), where the
1310 first string is in constants.CRYPTO_TYPES, the second in
1311 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string
1312 to string.
1313 @param token_requests: list of requests of cryptographic tokens and actions
1314 to perform on them. The actions come with a dictionary of options.
1315 @rtype: list of tuples (string, string)
1316 @return: list of tuples of the token type and the public crypto token
1317
1318 """
1319 getents = runtime.GetEnts()
1320 _VALID_CERT_FILES = [pathutils.NODED_CERT_FILE,
1321 pathutils.NODED_CLIENT_CERT_FILE,
1322 pathutils.NODED_CLIENT_CERT_FILE_TMP]
1323 _DEFAULT_CERT_FILE = pathutils.NODED_CLIENT_CERT_FILE
1324 tokens = []
1325 for (token_type, action, options) in token_requests:
1326 if token_type not in constants.CRYPTO_TYPES:
1327 raise errors.ProgrammerError("Token type '%s' not supported." %
1328 token_type)
1329 if action not in constants.CRYPTO_ACTIONS:
1330 raise errors.ProgrammerError("Action '%s' is not supported." %
1331 action)
1332 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST:
1333 if action == constants.CRYPTO_ACTION_CREATE:
1334
1335 # extract file name from options
1336 cert_filename = None
1337 if options:
1338 cert_filename = options.get(constants.CRYPTO_OPTION_CERT_FILE)
1339 if not cert_filename:
1340 cert_filename = _DEFAULT_CERT_FILE
1341 # For security reason, we don't allow arbitrary filenames
1342 if not cert_filename in _VALID_CERT_FILES:
1343 raise errors.ProgrammerError(
1344 "The certificate file name path '%s' is not allowed." %
1345 cert_filename)
1346
1347 # extract serial number from options
1348 serial_no = None
1349 if options:
1350 try:
1351 serial_no = int(options[constants.CRYPTO_OPTION_SERIAL_NO])
1352 except ValueError:
1353 raise errors.ProgrammerError(
1354 "The given serial number is not an intenger: %s." %
1355 options.get(constants.CRYPTO_OPTION_SERIAL_NO))
1356 except KeyError:
1357 raise errors.ProgrammerError("No serial number was provided.")
1358
1359 if not serial_no:
1360 raise errors.ProgrammerError(
1361 "Cannot create an SSL certificate without a serial no.")
1362
1363 utils.GenerateNewSslCert(
1364 True, cert_filename, serial_no,
1365 "Create new client SSL certificate in %s." % cert_filename,
1366 uid=getents.masterd_uid, gid=getents.masterd_gid)
1367 tokens.append((token_type,
1368 utils.GetCertificateDigest(
1369 cert_filename=cert_filename)))
1370 elif action == constants.CRYPTO_ACTION_GET:
1371 tokens.append((token_type,
1372 utils.GetCertificateDigest()))
1373 return tokens
1374
1375
1376 def EnsureDaemon(daemon_name, run):
1377 """Ensures the given daemon is running or stopped.
1378
1379 @type daemon_name: string
1380 @param daemon_name: name of the daemon (e.g., constants.KVMD)
1381
1382 @type run: bool
1383 @param run: whether to start or stop the daemon
1384
1385 @rtype: bool
1386 @return: 'True' if daemon successfully started/stopped,
1387 'False' otherwise
1388
1389 """
1390 allowed_daemons = [constants.KVMD]
1391
1392 if daemon_name not in allowed_daemons:
1393 fn = lambda _: False
1394 elif run:
1395 fn = utils.EnsureDaemon
1396 else:
1397 fn = utils.StopDaemon
1398
1399 return fn(daemon_name)
1400
1401
1402 def _InitSshUpdateData(data, noded_cert_file, ssconf_store):
1403 (_, noded_cert) = \
1404 utils.ExtractX509Certificate(utils.ReadFile(noded_cert_file))
1405 data[constants.SSHS_NODE_DAEMON_CERTIFICATE] = noded_cert
1406
1407 cluster_name = ssconf_store.GetClusterName()
1408 data[constants.SSHS_CLUSTER_NAME] = cluster_name
1409
1410
1411 def AddNodeSshKey(node_uuid, node_name,
1412 to_authorized_keys, to_public_keys,
1413 get_pub_keys, ssh_port_map,
1414 potential_master_candidates,
1415 pub_key_file=pathutils.SSH_PUB_KEYS,
1416 ssconf_store=None,
1417 noded_cert_file=pathutils.NODED_CERT_FILE,
1418 run_cmd_fn=ssh.RunSshCmdWithStdin):
1419 """Distributes a node's public SSH key across the cluster.
1420
1421 Note that this function should only be executed on the master node, which
1422 then will copy the new node's key to all nodes in the cluster via SSH.
1423
1424 @type node_uuid: str
1425 @param node_uuid: the UUID of the node whose key is added
1426 @type node_name: str
1427 @param node_name: the name of the node whose key is added
1428 @type to_authorized_keys: boolean
1429 @param to_authorized_keys: whether the key should be added to the
1430 C{authorized_keys} file of all nodes
1431 @type to_public_keys: boolean
1432 @param to_public_keys: whether the keys should be added to the public key file
1433 @type get_pub_keys: boolean
1434 @param get_pub_keys: whether the node should add the clusters' public keys
1435 to its {ganeti_pub_keys} file
1436 @type ssh_port_map: dict from str to int
1437 @param ssh_port_map: a mapping from node names to SSH port numbers
1438 @type potential_master_candidates: list of str
1439 @param potential_master_candidates: list of node names of potential master
1440 candidates; this should match the list of uuids in the public key file
1441
1442 """
1443 if not ssconf_store:
1444 ssconf_store = ssconf.SimpleStore()
1445
1446 # Check and fix sanity of key file
1447 keys_by_name = ssh.QueryPubKeyFile([node_name], key_file=pub_key_file)
1448 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1449
1450 if (not keys_by_name or node_name not in keys_by_name) \
1451 and (not keys_by_uuid or node_uuid not in keys_by_uuid):
1452 raise errors.SshUpdateError(
1453 "No keys found for the new node '%s' (UUID %s) in the list of public"
1454 " SSH keys, neither for the name or the UUID" % (node_name, node_uuid))
1455 else:
1456 if node_name in keys_by_name:
1457 keys_by_uuid = {}
1458 # Replace the name by UUID in the file as the name should only be used
1459 # temporarily
1460 ssh.ReplaceNameByUuid(node_uuid, node_name,
1461 error_fn=errors.SshUpdateError,
1462 key_file=pub_key_file)
1463 keys_by_uuid[node_uuid] = keys_by_name[node_name]
1464
1465 # Update the master node's key files
1466 if to_authorized_keys:
1467 (auth_key_file, _) = \
1468 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1469 ssh.AddAuthorizedKeys(auth_key_file, keys_by_uuid[node_uuid])
1470
1471 base_data = {}
1472 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store)
1473 cluster_name = base_data[constants.SSHS_CLUSTER_NAME]
1474
1475 # Update all nodes except master and the target node
1476 if to_authorized_keys:
1477 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1478 (constants.SSHS_ADD, keys_by_uuid)
1479
1480 pot_mc_data = copy.deepcopy(base_data)
1481 if to_public_keys:
1482 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1483 (constants.SSHS_REPLACE_OR_ADD, keys_by_uuid)
1484
1485 all_nodes = ssconf_store.GetNodeList()
1486 master_node = ssconf_store.GetMasterNode()
1487
1488 for node in all_nodes:
1489 if node in [master_node, node_name]:
1490 continue
1491 if node in potential_master_candidates:
1492 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
1493 True, True, False, False, False,
1494 ssh_port_map.get(node), pot_mc_data, ssconf_store)
1495 else:
1496 if to_authorized_keys:
1497 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
1498 True, True, False, False, False,
1499 ssh_port_map.get(node), base_data, ssconf_store)
1500
1501 # Update the target node itself
1502 if get_pub_keys:
1503 node_data = {}
1504 _InitSshUpdateData(node_data, noded_cert_file, ssconf_store)
1505 all_keys = ssh.QueryPubKeyFile(None, key_file=pub_key_file)
1506 node_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1507 (constants.SSHS_OVERRIDE, all_keys)
1508 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
1509 True, True, False, False, False,
1510 ssh_port_map.get(node_name), node_data, ssconf_store)
1511
1512
1513 def RemoveNodeSshKey(node_uuid, node_name, from_authorized_keys,
1514 from_public_keys, clear_authorized_keys,
1515 ssh_port_map, master_candidate_uuids,
1516 potential_master_candidates,
1517 pub_key_file=pathutils.SSH_PUB_KEYS,
1518 ssconf_store=None,
1519 noded_cert_file=pathutils.NODED_CERT_FILE,
1520 run_cmd_fn=ssh.RunSshCmdWithStdin):
1521 """Removes the node's SSH keys from the key files and distributes those.
1522
1523 @type node_uuid: str
1524 @param node_uuid: UUID of the node whose key is removed
1525 @type node_name: str
1526 @param node_name: name of the node whose key is remove
1527 @type from_authorized_keys: boolean
1528 @param from_authorized_keys: whether or not the key should be removed
1529 from the C{authorized_keys} file
1530 @type from_public_keys: boolean
1531 @param from_public_keys: whether or not the key should be remove from
1532 the C{ganeti_pub_keys} file
1533 @type clear_authorized_keys: boolean
1534 @param clear_authorized_keys: whether or not the C{authorized_keys} file
1535 should be cleared on the node whose keys are removed
1536 @type ssh_port_map: dict of str to int
1537 @param ssh_port_map: mapping of node names to their SSH port
1538 @type master_candidate_uuids: list of str
1539 @param master_candidate_uuids: list of UUIDs of the current master candidates
1540 @type potential_master_candidates: list of str
1541 @param potential_master_candidates: list of names of potential master
1542 candidates
1543
1544 """
1545 if not ssconf_store:
1546 ssconf_store = ssconf.SimpleStore()
1547
1548 if not (from_authorized_keys or from_public_keys or clear_authorized_keys):
1549 raise errors.SshUpdateError("No removal from any key file was requested.")
1550
1551 master_node = ssconf_store.GetMasterNode()
1552
1553 if from_authorized_keys or from_public_keys:
1554 keys = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1555 if not keys or node_uuid not in keys:
1556 raise errors.SshUpdateError("Node '%s' not found in the list of public"
1557 " SSH keys. It seems someone tries to"
1558 " remove a key from outside the cluster!"
1559 % node_uuid)
1560
1561 if node_name == master_node:
1562 raise errors.SshUpdateError("Cannot remove the master node's keys.")
1563
1564 base_data = {}
1565 _InitSshUpdateData(base_data, noded_cert_file, ssconf_store)
1566 cluster_name = base_data[constants.SSHS_CLUSTER_NAME]
1567
1568 if from_authorized_keys:
1569 base_data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1570 (constants.SSHS_REMOVE, keys)
1571 (auth_key_file, _) = \
1572 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False,
1573 dircheck=False)
1574 ssh.RemoveAuthorizedKeys(auth_key_file, keys[node_uuid])
1575
1576 pot_mc_data = copy.deepcopy(base_data)
1577
1578 if from_public_keys:
1579 pot_mc_data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1580 (constants.SSHS_REMOVE, keys)
1581 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file)
1582
1583 all_nodes = ssconf_store.GetNodeList()
1584 for node in all_nodes:
1585 if node in [master_node, node_name]:
1586 continue
1587 ssh_port = ssh_port_map.get(node)
1588 if not ssh_port:
1589 raise errors.OpExecError("No SSH port information available for"
1590 " node '%s', map: %s." % (node, ssh_port_map))
1591 if node in potential_master_candidates:
1592 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
1593 True, True, False, False, False,
1594 ssh_port, pot_mc_data, ssconf_store)
1595 else:
1596 if from_authorized_keys:
1597 run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
1598 True, True, False, False, False,
1599 ssh_port, base_data, ssconf_store)
1600
1601 if clear_authorized_keys or from_public_keys:
1602 data = {}
1603 _InitSshUpdateData(data, noded_cert_file, ssconf_store)
1604 cluster_name = data[constants.SSHS_CLUSTER_NAME]
1605 ssh_port = ssh_port_map.get(node_name)
1606 if not ssh_port:
1607 raise errors.OpExecError("No SSH port information available for"
1608 " node '%s', which is leaving the cluster.")
1609
1610 if clear_authorized_keys:
1611 other_master_candidate_uuids = [uuid for uuid in master_candidate_uuids
1612 if uuid != node_uuid]
1613 candidate_keys = ssh.QueryPubKeyFile(other_master_candidate_uuids,
1614 key_file=pub_key_file)
1615 data[constants.SSHS_SSH_AUTHORIZED_KEYS] = \
1616 (constants.SSHS_REMOVE, candidate_keys)
1617
1618 if from_public_keys:
1619 data[constants.SSHS_SSH_PUBLIC_KEYS] = \
1620 (constants.SSHS_REMOVE, keys)
1621
1622 try:
1623 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
1624 True, True, False, False, False,
1625 ssh_port, data, ssconf_store)
1626 except errors.OpExecError, e:
1627 logging.info("Removing SSH keys from node '%s' failed. This can happen"
1628 " when the node is already unreachable. Error: %s",
1629 node_name, e)
1630
1631
1632 def _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map,
1633 pub_key_file=pathutils.SSH_PUB_KEYS,
1634 ssconf_store=None,
1635 noded_cert_file=pathutils.NODED_CERT_FILE,
1636 run_cmd_fn=ssh.RunSshCmdWithStdin):
1637 """Generates the root SSH key pair on the node.
1638
1639 @type node_uuid: str
1640 @param node_uuid: UUID of the node whose key is removed
1641 @type node_name: str
1642 @param node_name: name of the node whose key is remove
1643 @type ssh_port_map: dict of str to int
1644 @param ssh_port_map: mapping of node names to their SSH port
1645
1646 """
1647 if not ssconf_store:
1648 ssconf_store = ssconf.SimpleStore()
1649
1650 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1651 if not keys_by_uuid or node_uuid not in keys_by_uuid:
1652 raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to"
1653 " be regenerated is not registered in the"
1654 " public keys file." % (node_name, node_uuid))
1655
1656 data = {}
1657 _InitSshUpdateData(data, noded_cert_file, ssconf_store)
1658 cluster_name = data[constants.SSHS_CLUSTER_NAME]
1659 data[constants.SSHS_GENERATE] = True
1660
1661 run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
1662 True, True, False, False, False,
1663 ssh_port_map.get(node_name), data, ssconf_store)
1664
1665
1666 def RenewSshKeys(node_uuids, node_names, ssh_port_map,
1667 master_candidate_uuids,
1668 potential_master_candidates,
1669 pub_key_file=pathutils.SSH_PUB_KEYS,
1670 ssconf_store=None,
1671 noded_cert_file=pathutils.NODED_CERT_FILE,
1672 run_cmd_fn=ssh.RunSshCmdWithStdin):
1673 """Renews all SSH keys and updates authorized_keys and ganeti_pub_keys.
1674
1675 """
1676 if not ssconf_store:
1677 ssconf_store = ssconf.SimpleStore()
1678 cluster_name = ssconf_store.GetClusterName()
1679
1680 if not len(node_uuids) == len(node_names):
1681 raise errors.ProgrammerError("List of nodes UUIDs and node names"
1682 " does not match in length.")
1683
1684 (_, root_keyfiles) = \
1685 ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
1686
1687 node_uuid_name_map = zip(node_uuids, node_names)
1688
1689 master_node_name = ssconf_store.GetMasterNode()
1690 # process non-master nodes
1691 for node_uuid, node_name in node_uuid_name_map:
1692 if node_name == master_node_name:
1693 continue
1694 master_candidate = node_uuid in master_candidate_uuids
1695 potential_master_candidate = node_name in potential_master_candidates
1696
1697 keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
1698 if not keys_by_uuid:
1699 raise errors.SshUpdateError("No public key of node %s (UUID %s) found,"
1700 " not generating a new key."
1701 % (node_name, node_uuid))
1702
1703 RemoveNodeSshKey(node_uuid, node_name,
1704 master_candidate, # from authorized keys
1705 False, # Don't remove (yet) from public keys
1706 False, # Don't clear authorized_keys
1707 ssh_port_map,
1708 master_candidate_uuids,
1709 potential_master_candidates)
1710
1711 _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map,
1712 pub_key_file=pub_key_file,
1713 ssconf_store=ssconf_store,
1714 noded_cert_file=noded_cert_file,
1715 run_cmd_fn=run_cmd_fn)
1716
1717 fetched_keys = ssh.ReadRemoteSshPubKeys(root_keyfiles, node_name,
1718 cluster_name,
1719 ssh_port_map[node_name],
1720 False, # ask_key
1721 False) # key_check
1722 if not fetched_keys:
1723 raise errors.SshUpdateError("Could not fetch key of node %s"
1724 " (UUID %s)" % (node_name, node_uuid))
1725
1726 if potential_master_candidate:
1727 ssh.RemovePublicKey(node_uuid, key_file=pub_key_file)
1728 for pub_key in fetched_keys.values():
1729 ssh.AddPublicKey(node_uuid, pub_key, key_file=pub_key_file)
1730
1731 AddNodeSshKey(node_uuid, node_name,
1732 master_candidate, # Add to authorized_keys file
1733 potential_master_candidate, # Add to public_keys
1734 True, # Get public keys
1735 ssh_port_map, potential_master_candidates,
1736 pub_key_file=pub_key_file, ssconf_store=ssconf_store,
1737 noded_cert_file=noded_cert_file,
1738 run_cmd_fn=run_cmd_fn)
1739
1740 # FIXME: Update master key as well
1741
1742
1743 def GetBlockDevSizes(devices):
1744 """Return the size of the given block devices
1745
1746 @type devices: list
1747 @param devices: list of block device nodes to query
1748 @rtype: dict
1749 @return:
1750 dictionary of all block devices under /dev (key). The value is their
1751 size in MiB.
1752
1753 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124}
1754
1755 """
1756 DEV_PREFIX = "/dev/"
1757 blockdevs = {}
1758
1759 for devpath in devices:
1760 if not utils.IsBelowDir(DEV_PREFIX, devpath):
1761 continue
1762
1763 try:
1764 st = os.stat(devpath)
1765 except EnvironmentError, err:
1766 logging.warning("Error stat()'ing device %s: %s", devpath, str(err))
1767 continue
1768
1769 if stat.S_ISBLK(st.st_mode):
1770 result = utils.RunCmd(["blockdev", "--getsize64", devpath])
1771 if result.failed:
1772 # We don't want to fail, just do not list this device as available
1773 logging.warning("Cannot get size for block device %s", devpath)
1774 continue
1775
1776 size = int(result.stdout) / (1024 * 1024)
1777 blockdevs[devpath] = size
1778 return blockdevs
1779
1780
1781 def GetVolumeList(vg_names):
1782 """Compute list of logical volumes and their size.
1783
1784 @type vg_names: list
1785 @param vg_names: the volume groups whose LVs we should list, or
1786 empty for all volume groups
1787 @rtype: dict
1788 @return:
1789 dictionary of all partions (key) with value being a tuple of
1790 their size (in MiB), inactive and online status::
1791
1792 {'xenvg/test1': ('20.06', True, True)}
1793
1794 in case of errors, a string is returned with the error
1795 details.
1796
1797 """
1798 lvs = {}
1799 sep = "|"
1800 if not vg_names:
1801 vg_names = []
1802 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
1803 "--separator=%s" % sep,
1804 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names)
1805 if result.failed:
1806 _Fail("Failed to list logical volumes, lvs output: %s", result.output)
1807
1808 for line in result.stdout.splitlines():
1809 line = line.strip()
1810 match = _LVSLINE_REGEX.match(line)
1811 if not match:
1812 logging.error("Invalid line returned from lvs output: '%s'", line)
1813 continue
1814 vg_name, name, size, attr = match.groups()
1815 inactive = attr[4] == "-"
1816 online = attr[5] == "o"
1817 virtual = attr[0] == "v"
1818 if virtual:
1819 # we don't want to report such volumes as existing, since they
1820 # don't really hold data
1821 continue
1822 lvs[vg_name + "/" + name] = (size, inactive, online)
1823
1824 return lvs
1825
1826
1827 def ListVolumeGroups():
1828 """List the volume groups and their size.
1829
1830 @rtype: dict
1831 @return: dictionary with keys volume name and values the
1832 size of the volume
1833
1834 """
1835 return utils.ListVolumeGroups()
1836
1837
1838 def NodeVolumes():
1839 """List all volumes on this node.
1840
1841 @rtype: list
1842 @return:
1843 A list of dictionaries, each having four keys:
1844 - name: the logical volume name,
1845 - size: the size of the logical volume
1846 - dev: the physical device on which the LV lives
1847 - vg: the volume group to which it belongs
1848
1849 In case of errors, we return an empty list and log the
1850 error.
1851
1852 Note that since a logical volume can live on multiple physical
1853 volumes, the resulting list might include a logical volume
1854 multiple times.
1855
1856 """
1857 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
1858 "--separator=|",
1859 "--options=lv_name,lv_size,devices,vg_name"])
1860 if result.failed:
1861 _Fail("Failed to list logical volumes, lvs output: %s",
1862 result.output)
1863
1864 def parse_dev(dev):
1865 return dev.split("(")[0]
1866
1867 def handle_dev(dev):
1868 return [parse_dev(x) for x in dev.split(",")]
1869
1870 def map_line(line):
1871 line = [v.strip() for v in line]
1872 return [{"name": line[0], "size": line[1],
1873 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])]
1874
1875 all_devs = []
1876 for line in result.stdout.splitlines():
1877 if line.count("|") >= 3:
1878 all_devs.extend(map_line(line.split("|")))
1879 else:
1880 logging.warning("Strange line in the output from lvs: '%s'", line)
1881 return all_devs
1882
1883
1884 def BridgesExist(bridges_list):
1885 """Check if a list of bridges exist on the current node.
1886
1887 @rtype: boolean
1888 @return: C{True} if all of them exist, C{False} otherwise
1889
1890 """
1891 missing = []
1892 for bridge in bridges_list:
1893 if not utils.BridgeExists(bridge):
1894 missing.append(bridge)
1895
1896 if missing:
1897 _Fail("Missing bridges %s", utils.CommaJoin(missing))
1898
1899
1900 def GetInstanceListForHypervisor(hname, hvparams=None,
1901 get_hv_fn=hypervisor.GetHypervisor):
1902 """Provides a list of instances of the given hypervisor.
1903
1904 @type hname: string
1905 @param hname: name of the hypervisor
1906 @type hvparams: dict of strings
1907 @param hvparams: hypervisor parameters for the given hypervisor
1908 @type get_hv_fn: function
1909 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
1910 name; optional parameter to increase testability
1911
1912 @rtype: list
1913 @return: a list of all running instances on the current node
1914 - instance1.example.com
1915 - instance2.example.com
1916
1917 """
1918 try:
1919 return get_hv_fn(hname).ListInstances(hvparams=hvparams)
1920 except errors.HypervisorError, err:
1921 _Fail("Error enumerating instances (hypervisor %s): %s",
1922 hname, err, exc=True)
1923
1924
1925 def GetInstanceList(hypervisor_list, all_hvparams=None,
1926 get_hv_fn=hypervisor.GetHypervisor):
1927 """Provides a list of instances.
1928
1929 @type hypervisor_list: list
1930 @param hypervisor_list: the list of hypervisors to query information
1931 @type all_hvparams: dict of dict of strings
1932 @param all_hvparams: a dictionary mapping hypervisor types to respective
1933 cluster-wide hypervisor parameters
1934 @type get_hv_fn: function
1935 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
1936 name; optional parameter to increase testability
1937
1938 @rtype: list
1939 @return: a list of all running instances on the current node
1940 - instance1.example.com
1941 - instance2.example.com
1942
1943 """
1944 results = []
1945 for hname in hypervisor_list:
1946 hvparams = all_hvparams[hname]
1947 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams,
1948 get_hv_fn=get_hv_fn))
1949 return results
1950
1951
1952 def GetInstanceInfo(instance, hname, hvparams=None):
1953 """Gives back the information about an instance as a dictionary.
1954
1955 @type instance: string
1956 @param instance: the instance name
1957 @type hname: string
1958 @param hname: the hypervisor type of the instance
1959 @type hvparams: dict of strings
1960 @param hvparams: the instance's hvparams
1961
1962 @rtype: dict
1963 @return: dictionary with the following keys:
1964 - memory: memory size of instance (int)
1965 - state: state of instance (HvInstanceState)
1966 - time: cpu time of instance (float)
1967 - vcpus: the number of vcpus (int)
1968
1969 """
1970 output = {}
1971
1972 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance,
1973 hvparams=hvparams)
1974 if iinfo is not None:
1975 output["memory"] = iinfo[2]
1976 output["vcpus"] = iinfo[3]
1977 output["state"] = iinfo[4]
1978 output["time"] = iinfo[5]
1979
1980 return output
1981
1982
1983 def GetInstanceMigratable(instance):
1984 """Computes whether an instance can be migrated.
1985
1986 @type instance: L{objects.Instance}
1987 @param instance: object representing the instance to be checked.
1988
1989 @rtype: tuple
1990 @return: tuple of (result, description) where:
1991 - result: whether the instance can be migrated or not
1992 - description: a description of the issue, if relevant
1993
1994 """
1995 hyper = hypervisor.GetHypervisor(instance.hypervisor)
1996 iname = instance.name
1997 if iname not in hyper.ListInstances(hvparams=instance.hvparams):
1998 _Fail("Instance %s is not running", iname)
1999
2000 for idx in range(len(instance.disks_info)):
2001 link_name = _GetBlockDevSymlinkPath(iname, idx)
2002 if not os.path.islink(link_name):
2003 logging.warning("Instance %s is missing symlink %s for disk %d",
2004 iname, link_name, idx)
2005
2006
2007 def GetAllInstancesInfo(hypervisor_list, all_hvparams):
2008 """Gather data about all instances.
2009
2010 This is the equivalent of L{GetInstanceInfo}, except that it
2011 computes data for all instances at once, thus being faster if one
2012 needs data about more than one instance.
2013
2014 @type hypervisor_list: list
2015 @param hypervisor_list: list of hypervisors to query for instance data
2016 @type all_hvparams: dict of dict of strings
2017 @param all_hvparams: mapping of hypervisor names to hvparams
2018
2019 @rtype: dict
2020 @return: dictionary of instance: data, with data having the following keys:
2021 - memory: memory size of instance (int)
2022 - state: xen state of instance (string)
2023 - time: cpu time of instance (float)
2024 - vcpus: the number of vcpus
2025
2026 """
2027 output = {}
2028 for hname in hypervisor_list:
2029 hvparams = all_hvparams[hname]
2030 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams)
2031 if iinfo:
2032 for name, _, memory, vcpus, state, times in iinfo:
2033 value = {
2034 "memory": memory,
2035 "vcpus": vcpus,
2036 "state": state,
2037 "time": times,
2038 }
2039 if name in output:
2040 # we only check static parameters, like memory and vcpus,
2041 # and not state and time which can change between the
2042 # invocations of the different hypervisors
2043 for key in "memory", "vcpus":
2044 if value[key] != output[name][key]:
2045 _Fail("Instance %s is running twice"
2046 " with different parameters", name)
2047 output[name] = value
2048
2049 return output
2050
2051
2052 def GetInstanceConsoleInfo(instance_param_dict,
2053 get_hv_fn=hypervisor.GetHypervisor):
2054 """Gather data about the console access of a set of instances of this node.
2055
2056 This function assumes that the caller already knows which instances are on
2057 this node, by calling a function such as L{GetAllInstancesInfo} or
2058 L{GetInstanceList}.
2059
2060 For every instance, a large amount of configuration data needs to be
2061 provided to the hypervisor interface in order to receive the console
2062 information. Whether this could or should be cut down can be discussed.
2063 The information is provided in a dictionary indexed by instance name,
2064 allowing any number of instance queries to be done.
2065
2066 @type instance_param_dict: dict of string to tuple of dictionaries, where the
2067 dictionaries represent: L{objects.Instance}, L{objects.Node},
2068 L{objects.NodeGroup}, HvParams, BeParams
2069 @param instance_param_dict: mapping of instance name to parameters necessary
2070 for console information retrieval
2071
2072 @rtype: dict
2073 @return: dictionary of instance: data, with data having the following keys:
2074 - instance: instance name
2075 - kind: console kind
2076 - message: used with kind == CONS_MESSAGE, indicates console to be
2077 unavailable, supplies error message
2078 - host: host to connect to
2079 - port: port to use
2080 - user: user for login
2081 - command: the command, broken into parts as an array
2082 - display: unknown, potentially unused?
2083
2084 """
2085
2086 output = {}
2087 for inst_name in instance_param_dict:
2088 instance = instance_param_dict[inst_name]["instance"]
2089 pnode = instance_param_dict[inst_name]["node"]
2090 group = instance_param_dict[inst_name]["group"]
2091 hvparams = instance_param_dict[inst_name]["hvParams"]
2092 beparams = instance_param_dict[inst_name]["beParams"]
2093
2094 instance = objects.Instance.FromDict(instance)
2095 pnode = objects.Node.FromDict(pnode)
2096 group = objects.NodeGroup.FromDict(group)
2097
2098 h = get_hv_fn(instance.hypervisor)
2099 output[inst_name] = h.GetInstanceConsole(instance, pnode, group,
2100 hvparams, beparams).ToDict()
2101
2102 return output
2103
2104
2105 def _InstanceLogName(kind, os_name, instance, component):
2106 """Compute the OS log filename for a given instance and operation.
2107
2108 The instance name and os name are passed in as strings since not all
2109 operations have these as part of an instance object.
2110
2111 @type kind: string
2112 @param kind: the operation type (e.g. add, import, etc.)
2113 @type os_name: string
2114 @param os_name: the os name
2115 @type instance: string
2116 @param instance: the name of the instance being imported/added/etc.
2117 @type component: string or None
2118 @param component: the name of the component of the instance being
2119 transferred
2120
2121 """
2122 # TODO: Use tempfile.mkstemp to create unique filename
2123 if component:
2124 assert "/" not in component
2125 c_msg = "-%s" % component
2126 else:
2127 c_msg = ""
2128 base = ("%s-%s-%s%s-%s.log" %
2129 (kind, os_name, instance, c_msg, utils.TimestampForFilename()))
2130 return utils.PathJoin(pathutils.LOG_OS_DIR, base)
2131
2132
2133 def InstanceOsAdd(instance, reinstall, debug):
2134 """Add an OS to an instance.
2135
2136 @type instance: L{objects.Instance}
2137 @param instance: Instance whose OS is to be installed
2138 @type reinstall: boolean
2139 @param reinstall: whether this is an instance reinstall
2140 @type debug: integer
2141 @param debug: debug level, passed to the OS scripts
2142 @rtype: None
2143
2144 """
2145 inst_os = OSFromDisk(instance.os)
2146
2147 create_env = OSEnvironment(instance, inst_os, debug)
2148 if reinstall:
2149 create_env["INSTANCE_REINSTALL"] = "1"
2150
2151 logfile = _InstanceLogName("add", instance.os, instance.name, None)
2152
2153 result = utils.RunCmd([inst_os.create_script], env=create_env,
2154 cwd=inst_os.path, output=logfile, reset_env=True)
2155 if result.failed:
2156 logging.error("os create command '%s' returned error: %s, logfile: %s,"
2157 " output: %s", result.cmd, result.fail_reason, logfile,
2158 result.output)
2159 lines = [utils.SafeEncode(val)
2160 for val in utils.TailFile(logfile, lines=20)]
2161 _Fail("OS create script failed (%s), last lines in the"
2162 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
2163
2164
2165 def RunRenameInstance(instance, old_name, debug):
2166 """Run the OS rename script for an instance.
2167
2168 @type instance: L{objects.Instance}
2169 @param instance: Instance whose OS is to be installed
2170 @type old_name: string
2171 @param old_name: previous instance name
2172 @type debug: integer
2173 @param debug: debug level, passed to the OS scripts
2174 @rtype: boolean
2175 @return: the success of the operation
2176
2177 """
2178 inst_os = OSFromDisk(instance.os)
2179
2180 rename_env = OSEnvironment(instance, inst_os, debug)
2181 rename_env["OLD_INSTANCE_NAME"] = old_name
2182
2183 logfile = _InstanceLogName("rename", instance.os,
2184 "%s-%s" % (old_name, instance.name), None)
2185
2186 result = utils.RunCmd([inst_os.rename_script], env=rename_env,
2187 cwd=inst_os.path, output=logfile, reset_env=True)
2188
2189 if result.failed:
2190 logging.error("os create command '%s' returned error: %s output: %s",
2191 result.cmd, result.fail_reason, result.output)
2192 lines = [utils.SafeEncode(val)
2193 for val in utils.TailFile(logfile, lines=20)]
2194 _Fail("OS rename script failed (%s), last lines in the"
2195 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
2196
2197
2198 def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):
2199 """Returns symlink path for block device.
2200
2201 """
2202 if _dir is None:
2203 _dir = pathutils.DISK_LINKS_DIR
2204
2205 return utils.PathJoin(_dir,
2206 ("%s%s%s" %
2207 (instance_name, constants.DISK_SEPARATOR, idx)))
2208
2209
2210 def _SymlinkBlockDev(instance_name, device_path, idx):
2211 """Set up symlinks to a instance's block device.
2212
2213 This is an auxiliary function run when an instance is start (on the primary
2214 node) or when an instance is migrated (on the target node).
2215
2216
2217 @param instance_name: the name of the target instance
2218 @param device_path: path of the physical block device, on the node
2219 @param idx: the disk index
2220 @return: absolute path to the disk's symlink
2221
2222 """
2223 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
2224 try:
2225 os.symlink(device_path, link_name)
2226 except OSError, err:
2227 if err.errno == errno.EEXIST:
2228 if (not os.path.islink(link_name) or
2229 os.readlink(link_name) != device_path):
2230 os.remove(link_name)
2231 os.symlink(device_path, link_name)
2232 else:
2233 raise
2234
2235 return link_name
2236
2237
2238 def _RemoveBlockDevLinks(instance_name, disks):
2239 """Remove the block device symlinks belonging to the given instance.
2240
2241 """
2242 for idx, _ in enumerate(disks):
2243 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
2244 if os.path.islink(link_name):
2245 try:
2246 os.remove(link_name)
2247 except OSError:
2248 logging.exception("Can't remove symlink '%s'", link_name)
2249
2250
2251 def _CalculateDeviceURI(instance, disk, device):
2252 """Get the URI for the device.
2253
2254 @type instance: L{objects.Instance}
2255 @param instance: the instance which disk belongs to
2256 @type disk: L{objects.Disk}
2257 @param disk: the target disk object
2258 @type device: L{bdev.BlockDev}
2259 @param device: the corresponding BlockDevice
2260 @rtype: string
2261 @return: the device uri if any else None
2262
2263 """
2264 access_mode = disk.params.get(constants.LDP_ACCESS,
2265 constants.DISK_KERNELSPACE)
2266 if access_mode == constants.DISK_USERSPACE:
2267 # This can raise errors.BlockDeviceError
2268 return device.GetUserspaceAccessUri(instance.hypervisor)
2269 else:
2270 return None
2271
2272
2273 def _GatherAndLinkBlockDevs(instance):
2274 """Set up an instance's block device(s).
2275
2276 This is run on the primary node at instance startup. The block
2277 devices must be already assembled.
2278
2279 @type instance: L{objects.Instance}
2280 @param instance: the instance whose disks we should assemble
2281 @rtype: list
2282 @return: list of (disk_object, link_name, drive_uri)
2283
2284 """
2285 block_devices = []
2286 for idx, disk in enumerate(instance.disks_info):
2287 device = _RecursiveFindBD(disk)
2288 if device is None:
2289 raise errors.BlockDeviceError("Block device '%s' is not set up." %
2290 str(disk))
2291 device.Open()
2292 try:
2293 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx)
2294 except OSError, e:
2295 raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
2296 e.strerror)
2297 uri = _CalculateDeviceURI(instance, disk, device)
2298
2299 block_devices.append((disk, link_name, uri))
2300
2301 return block_devices
2302
2303
2304 def _IsInstanceUserDown(instance_info):
2305 return instance_info and \
2306 "state" in instance_info and \
2307 hv_base.HvInstanceState.IsShutdown(instance_info["state"])
2308
2309
2310 def _GetInstanceInfo(instance):
2311 """Helper function L{GetInstanceInfo}"""
2312 return GetInstanceInfo(instance.name, instance.hypervisor,
2313 hvparams=instance.hvparams)
2314
2315
2316 def StartInstance(instance, startup_paused, reason, store_reason=True):
2317 """Start an instance.
2318
2319 @type instance: L{objects.Instance}
2320 @param instance: the instance object
2321 @type startup_paused: bool
2322 @param instance: pause instance at startup?
2323 @type reason: list of reasons
2324 @param reason: the reason trail for this startup
2325 @type store_reason: boolean
2326 @param store_reason: whether to store the shutdown reason trail on file
2327 @rtype: None
2328
2329 """
2330 instance_info = _GetInstanceInfo(instance)
2331
2332 if instance_info and not _IsInstanceUserDown(instance_info):
2333 logging.info("Instance '%s' already running, not starting", instance.name)
2334 return
2335
2336 try:
2337 block_devices = _GatherAndLinkBlockDevs(instance)
2338 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2339 hyper.StartInstance(instance, block_devices, startup_paused)
2340 if store_reason:
2341 _StoreInstReasonTrail(instance.name, reason)
2342 except errors.BlockDeviceError, err:
2343 _Fail("Block device error: %s", err, exc=True)
2344 except errors.HypervisorError, err:
2345 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2346 _Fail("Hypervisor error: %s", err, exc=True)
2347
2348
2349 def InstanceShutdown(instance, timeout, reason, store_reason=True):
2350 """Shut an instance down.
2351
2352 @note: this functions uses polling with a hardcoded timeout.
2353
2354 @type instance: L{objects.Instance}
2355 @param instance: the instance object
2356 @type timeout: integer
2357 @param timeout: maximum timeout for soft shutdown
2358 @type reason: list of reasons
2359 @param reason: the reason trail for this shutdown
2360 @type store_reason: boolean
2361 @param store_reason: whether to store the shutdown reason trail on file
2362 @rtype: None
2363
2364 """
2365 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2366
2367 if not _GetInstanceInfo(instance):
2368 logging.info("Instance '%s' not running, doing nothing", instance.name)
2369 return
2370
2371 class _TryShutdown(object):
2372 def __init__(self):
2373 self.tried_once = False
2374
2375 def __call__(self):
2376 if not _GetInstanceInfo(instance):
2377 return
2378
2379 try:
2380 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout)
2381 if store_reason:
2382 _StoreInstReasonTrail(instance.name, reason)
2383 except errors.HypervisorError, err:
2384 # if the instance is no longer existing, consider this a
2385 # success and go to cleanup
2386 if not _GetInstanceInfo(instance):
2387 return
2388
2389 _Fail("Failed to stop instance '%s': %s", instance.name, err)
2390
2391 self.tried_once = True
2392
2393 raise utils.RetryAgain()
2394
2395 try:
2396 utils.Retry(_TryShutdown(), 5, timeout)
2397 except utils.RetryTimeout:
2398 # the shutdown did not succeed
2399 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name)
2400
2401 try:
2402 hyper.StopInstance(instance, force=True)
2403 except errors.HypervisorError, err:
2404 # only raise an error if the instance still exists, otherwise
2405 # the error could simply be "instance ... unknown"!
2406 if _GetInstanceInfo(instance):
2407 _Fail("Failed to force stop instance '%s': %s", instance.name, err)
2408
2409 time.sleep(1)
2410
2411 if _GetInstanceInfo(instance):
2412 _Fail("Could not shutdown instance '%s' even by destroy", instance.name)
2413
2414 try:
2415 hyper.CleanupInstance(instance.name)
2416 except errors.HypervisorError, err:
2417 logging.warning("Failed to execute post-shutdown cleanup step: %s", err)
2418
2419 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2420
2421
2422 def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):
2423 """Reboot an instance.
2424
2425 @type instance: L{objects.Instance}
2426 @param instance: the instance object to reboot
2427 @type reboot_type: str
2428 @param reboot_type: the type of reboot, one the following
2429 constants:
2430 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the
2431 instance OS, do not recreate the VM
2432 - L{constants.INSTANCE_REBOOT_HARD}: tear down and
2433 restart the VM (at the hypervisor level)
2434 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is
2435 not accepted here, since that mode is handled differently, in
2436 cmdlib, and translates into full stop and start of the
2437 instance (instead of a call_instance_reboot RPC)
2438 @type shutdown_timeout: integer
2439 @param shutdown_timeout: maximum timeout for soft shutdown
2440 @type reason: list of reasons
2441 @param reason: the reason trail for this reboot
2442 @rtype: None
2443
2444 """
2445 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown'
2446 # because those functions simply 'return' on error whereas this one
2447 # raises an exception with '_Fail'
2448 if not _GetInstanceInfo(instance):
2449 _Fail("Cannot reboot instance '%s' that is not running", instance.name)
2450
2451 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2452 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
2453 try:
2454 hyper.RebootInstance(instance)
2455 except errors.HypervisorError, err:
2456 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err)
2457 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
2458 try:
2459 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False)
2460 result = StartInstance(instance, False, reason, store_reason=False)
2461 _StoreInstReasonTrail(instance.name, reason)
2462 return result
2463 except errors.HypervisorError, err:
2464 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err)
2465 else:
2466 _Fail("Invalid reboot_type received: '%s'", reboot_type)
2467
2468
2469 def InstanceBalloonMemory(instance, memory):
2470 """Resize an instance's memory.
2471
2472 @type instance: L{objects.Instance}
2473 @param instance: the instance object
2474 @type memory: int
2475 @param memory: new memory amount in MB
2476 @rtype: None
2477
2478 """
2479 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2480 running = hyper.ListInstances(hvparams=instance.hvparams)
2481 if instance.name not in running:
2482 logging.info("Instance %s is not running, cannot balloon", instance.name)
2483 return
2484 try:
2485 hyper.BalloonInstanceMemory(instance, memory)
2486 except errors.HypervisorError, err:
2487 _Fail("Failed to balloon instance memory: %s", err, exc=True)
2488
2489
2490 def MigrationInfo(instance):
2491 """Gather information about an instance to be migrated.
2492
2493 @type instance: L{objects.Instance}
2494 @param instance: the instance definition
2495
2496 """
2497 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2498 try:
2499 info = hyper.MigrationInfo(instance)
2500 except errors.HypervisorError, err:
2501 _Fail("Failed to fetch migration information: %s", err, exc=True)
2502 return info
2503
2504
2505 def AcceptInstance(instance, info, target):
2506 """Prepare the node to accept an instance.
2507
2508 @type instance: L{objects.Instance}
2509 @param instance: the instance definition
2510 @type info: string/data (opaque)
2511 @param info: migration information, from the source node
2512 @type target: string
2513 @param target: target host (usually ip), on this node
2514
2515 """
2516 # TODO: why is this required only for DTS_EXT_MIRROR?
2517 if instance.disk_template in constants.DTS_EXT_MIRROR:
2518 # Create the symlinks, as the disks are not active
2519 # in any way
2520 try:
2521 _GatherAndLinkBlockDevs(instance)
2522 except errors.BlockDeviceError, err:
2523 _Fail("Block device error: %s", err, exc=True)
2524
2525 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2526 try:
2527 hyper.AcceptInstance(instance, info, target)
2528 except errors.HypervisorError, err:
2529 if instance.disk_template in constants.DTS_EXT_MIRROR:
2530 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2531 _Fail("Failed to accept instance: %s", err, exc=True)
2532
2533
2534 def FinalizeMigrationDst(instance, info, success):
2535 """Finalize any preparation to accept an instance.
2536
2537 @type instance: L{objects.Instance}
2538 @param instance: the instance definition
2539 @type info: string/data (opaque)
2540 @param info: migration information, from the source node
2541 @type success: boolean
2542 @param success: whether the migration was a success or a failure
2543
2544 """
2545 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2546 try:
2547 hyper.FinalizeMigrationDst(instance, info, success)
2548 except errors.HypervisorError, err:
2549 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)
2550
2551
2552 def MigrateInstance(cluster_name, instance, target, live):
2553 """Migrates an instance to another node.
2554
2555 @type cluster_name: string
2556 @param cluster_name: name of the cluster
2557 @type instance: L{objects.Instance}
2558 @param instance: the instance definition
2559 @type target: string
2560 @param target: the target node name
2561 @type live: boolean
2562 @param live: whether the migration should be done live or not (the
2563 interpretation of this parameter is left to the hypervisor)
2564 @raise RPCFail: if migration fails for some reason
2565
2566 """
2567 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2568
2569 try:
2570 hyper.MigrateInstance(cluster_name, instance, target, live)
2571 except errors.HypervisorError, err:
2572 _Fail("Failed to migrate instance: %s", err, exc=True)
2573
2574
2575 def FinalizeMigrationSource(instance, success, live):
2576 """Finalize the instance migration on the source node.
2577
2578 @type instance: L{objects.Instance}
2579 @param instance: the instance definition of the migrated instance
2580 @type success: bool
2581 @param success: whether the migration succeeded or not
2582 @type live: bool
2583 @param live: whether the user requested a live migration or not
2584 @raise RPCFail: If the execution fails for some reason
2585
2586 """
2587 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2588
2589 try:
2590 hyper.FinalizeMigrationSource(instance, success, live)
2591 except Exception, err: # pylint: disable=W0703
2592 _Fail("Failed to finalize the migration on the source node: %s", err,
2593 exc=True)
2594
2595
2596 def GetMigrationStatus(instance):
2597 """Get the migration status
2598
2599 @type instance: L{objects.Instance}
2600 @param instance: the instance that is being migrated
2601 @rtype: L{objects.MigrationStatus}
2602 @return: the status of the current migration (one of
2603 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional
2604 progress info that can be retrieved from the hypervisor
2605 @raise RPCFail: If the migration status cannot be retrieved
2606
2607 """
2608 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2609 try:
2610 return hyper.GetMigrationStatus(instance)
2611 except Exception, err: # pylint: disable=W0703
2612 _Fail("Failed to get migration status: %s", err, exc=True)
2613
2614
2615 def HotplugDevice(instance, action, dev_type, device, extra, seq):
2616 """Hotplug a device
2617
2618 Hotplug is currently supported only for KVM Hypervisor.
2619 @type instance: L{objects.Instance}
2620 @param instance: the instance to which we hotplug a device
2621 @type action: string
2622 @param action: the hotplug action to perform
2623 @type dev_type: string
2624 @param dev_type: the device type to hotplug
2625 @type device: either L{objects.NIC} or L{objects.Disk}
2626 @param device: the device object to hotplug
2627 @type extra: tuple
2628 @param extra: extra info used for disk hotplug (disk link, drive uri)
2629 @type seq: int
2630 @param seq: the index of the device from master perspective
2631 @raise RPCFail: in case instance does not have KVM hypervisor
2632
2633 """
2634 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2635 try:
2636 hyper.VerifyHotplugSupport(instance, action, dev_type)
2637 except errors.HotplugError, err:
2638 _Fail("Hotplug is not supported: %s", err)
2639
2640 if action == constants.HOTPLUG_ACTION_ADD:
2641 fn = hyper.HotAddDevice
2642 elif action == constants.HOTPLUG_ACTION_REMOVE:
2643 fn = hyper.HotDelDevice
2644 elif action == constants.HOTPLUG_ACTION_MODIFY:
2645 fn = hyper.HotModDevice
2646 else:
2647 assert action in constants.HOTPLUG_ALL_ACTIONS
2648
2649 return fn(instance, dev_type, device, extra, seq)
2650
2651
2652 def HotplugSupported(instance):
2653 """Checks if hotplug is generally supported.
2654
2655 """
2656 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2657 try:
2658 hyper.HotplugSupported(instance)
2659 except errors.HotplugError, err:
2660 _Fail("Hotplug is not supported: %s", err)
2661
2662
2663 def ModifyInstanceMetadata(metadata):
2664 """Sends instance data to the metadata daemon.
2665
2666 Uses the Luxi transport layer to communicate with the metadata
2667 daemon configuration server. It starts the metadata daemon if it is
2668 not running.
2669 The daemon must be enabled during at configuration time.
2670
2671 @type metadata: dict
2672 @param metadata: instance metadata obtained by calling
2673 L{objects.Instance.ToDict} on an instance object
2674
2675 """
2676 if not constants.ENABLE_METAD:
2677 raise errors.ProgrammerError("The metadata deamon is disabled, yet"
2678 " ModifyInstanceMetadata has been called")
2679
2680 if not utils.IsDaemonAlive(constants.METAD):
2681 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD])
2682 if result.failed:
2683 raise errors.HypervisorError("Failed to start metadata daemon")
2684
2685 def _Connect():
2686 return transport.Transport(pathutils.SOCKET_DIR + "/ganeti-metad")
2687
2688 retries = 5
2689
2690 while True:
2691 try:
2692 trans = utils.Retry(_Connect, 1.0, constants.LUXI_DEF_CTMO)
2693 break
2694 except utils.RetryTimeout:
2695 raise TimeoutError("Connection to metadata daemon timed out")
2696 except (socket.error, NoMasterError), err:
2697 if retries == 0:
2698 raise TimeoutError("Failed to connect to metadata daemon: %s" % err)
2699 else:
2700 retries -= 1
2701
2702 data = serializer.DumpJson(metadata,
2703 private_encoder=serializer.EncodeWithPrivateFields)
2704
2705 trans.Send(data)
2706 trans.Close()
2707
2708
2709 def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):
2710 """Creates a block device for an instance.
2711
2712 @type disk: L{objects.Disk}
2713 @param disk: the object describing the disk we should create
2714 @type size: int
2715 @param size: the size of the physical underlying device, in MiB
2716 @type owner: str
2717 @param owner: the name of the instance for which disk is created,
2718 used for device cache data
2719 @type on_primary: boolean
2720 @param on_primary: indicates if it is the primary node or not
2721 @type info: string
2722 @param info: string that will be sent to the physical device
2723 creation, used for example to set (LVM) tags on LVs
2724 @type excl_stor: boolean
2725 @param excl_stor: Whether exclusive_storage is active
2726
2727 @return: the new unique_id of the device (this can sometime be
2728 computed only after creation), or None. On secondary nodes,
2729 it's not required to return anything.
2730
2731 """
2732 # TODO: remove the obsolete "size" argument
2733 # pylint: disable=W0613
2734 clist = []
2735 if disk.children:
2736 for child in disk.children:
2737 try:
2738 crdev = _RecursiveAssembleBD(child, owner, on_primary)
2739 except errors.BlockDeviceError, err:
2740 _Fail("Can't assemble device %s: %s", child, err)
2741 if on_primary or disk.AssembleOnSecondary():
2742 # we need the children open in case the device itself has to
2743 # be assembled
2744 try:
2745 # pylint: disable=E1103
2746 crdev.Open()
2747 except errors.BlockDeviceError, err:
2748 _Fail("Can't make child '%s' read-write: %s", child, err)
2749 clist.append(crdev)
2750
2751 try:
2752 device = bdev.Create(disk, clist, excl_stor)
2753 except errors.BlockDeviceError, err:
2754 _Fail("Can't create block device: %s", err)
2755
2756 if on_primary or disk.AssembleOnSecondary():
2757 try:
2758 device.Assemble()
2759 except errors.BlockDeviceError, err:
2760 _Fail("Can't assemble device after creation, unusual event: %s", err)
2761 if on_primary or disk.OpenOnSecondary():
2762 try:
2763 device.Open(force=True)
2764 except errors.BlockDeviceError, err:
2765 _Fail("Can't make device r/w after creation, unusual event: %s", err)
2766 DevCacheManager.UpdateCache(device.dev_path, owner,
2767 on_primary, disk.iv_name)
2768
2769 device.SetInfo(info)
2770
2771 return device.unique_id
2772
2773
2774 def _DumpDevice(source_path, target_path, offset, size, truncate):
2775 """This function images/wipes the device using a local file.
2776
2777 @type source_path: string
2778 @param source_path: path of the image or data source (e.g., "/dev/zero")
2779
2780 @type target_path: string
2781 @param target_path: path of the device to image/wipe
2782
2783 @type offset: int
2784 @param offset: offset in MiB in the output file
2785
2786 @type size: int
2787 @param size: maximum size in MiB to write (data source might be smaller)
2788
2789 @type truncate: bool
2790 @param truncate: whether the file should be truncated
2791
2792 @return: None
2793 @raise RPCFail: in case of failure
2794
2795 """
2796 # Internal sizes are always in Mebibytes; if the following "dd" command
2797 # should use a different block size the offset and size given to this
2798 # function must be adjusted accordingly before being passed to "dd".
2799 block_size = constants.DD_BLOCK_SIZE
2800
2801 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset,
2802 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path,
2803 "count=%d" % size]
2804
2805 if not truncate:
2806 cmd.append("conv=notrunc")
2807
2808 result = utils.RunCmd(cmd)
2809
2810 if result.failed:
2811 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd,
2812 result.fail_reason, result.output)
2813
2814
2815 def _DownloadAndDumpDevice(source_url, target_path, size):
2816 """This function images a device using a downloaded image file.
2817
2818 @type source_url: string
2819 @param source_url: URL of image to dump to disk
2820
2821 @type target_path: string
2822 @param target_path: path of the device to image
2823
2824 @type size: int
2825 @param size: maximum size in MiB to write (data source might be smaller)
2826
2827 @rtype: NoneType
2828 @return: None
2829 @raise RPCFail: in case of download or write failures
2830
2831 """
2832 class DDParams(object):
2833 def __init__(self, current_size, total_size):
2834 self.current_size = current_size
2835 self.total_size = total_size
2836 self.image_size_error = False
2837
2838 def dd_write(ddparams, out):
2839 if ddparams.current_size < ddparams.total_size:
2840 ddparams.current_size += len(out)
2841 target_file.write(out)
2842 else:
2843 ddparams.image_size_error = True
2844 return -1
2845
2846 target_file = open(target_path, "r+")
2847 ddparams = DDParams(0, 1024 * 1024 * size)
2848
2849 curl = pycurl.Curl()
2850 curl.setopt(pycurl.VERBOSE, True)
2851 curl.setopt(pycurl.NOSIGNAL, True)
2852 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION)
2853 curl.setopt(pycurl.URL, source_url)
2854 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out))
2855
2856 try:
2857 curl.perform()
2858 except pycurl.error:
2859 if ddparams.image_size_error:
2860 _Fail("Disk image larger than the disk")
2861 else:
2862 raise
2863
2864 target_file.close()
2865
2866
2867 def BlockdevConvert(src_disk, target_disk):
2868 """Copies data from source block device to target.
2869
2870 This function gets the export and import commands from the source and
2871 target devices respectively, and then concatenates them to a single
2872 command using a pipe ("|"). Finally, executes the unified command that
2873 will transfer the data between the devices during the disk template
2874 conversion operation.
2875
2876 @type src_disk: L{objects.Disk}
2877 @param src_disk: the disk object we want to copy from
2878 @type target_disk: L{objects.Disk}
2879 @param target_disk: the disk object we want to copy to
2880
2881 @rtype: NoneType
2882 @return: None
2883 @raise RPCFail: in case of failure
2884
2885 """
2886 src_dev = _RecursiveFindBD(src_disk)
2887 if src_dev is None:
2888 _Fail("Cannot copy from device '%s': device not found", src_disk.uuid)
2889
2890 dest_dev = _RecursiveFindBD(target_disk)
2891 if dest_dev is None:
2892 _Fail("Cannot copy to device '%s': device not found", target_disk.uuid)
2893
2894 src_cmd = src_dev.Export()
2895 dest_cmd = dest_dev.Import()
2896 command = "%s | %s" % (utils.ShellQuoteArgs(src_cmd),
2897 utils.ShellQuoteArgs(dest_cmd))
2898
2899 result = utils.RunCmd(command)
2900 if result.failed:
2901 _Fail("Disk conversion command '%s' exited with error: %s; output: %s",
2902 result.cmd, result.fail_reason, result.output)
2903
2904
2905 def BlockdevWipe(disk, offset, size):
2906 """Wipes a block device.
2907
2908 @type disk: L{objects.Disk}
2909 @param disk: the disk object we want to wipe
2910 @type offset: int
2911 @param offset: The offset in MiB in the file
2912 @type size: int
2913 @param size: The size in MiB to write
2914
2915 """
2916 try:
2917 rdev = _RecursiveFindBD(disk)
2918 except errors.BlockDeviceError:
2919 rdev = None
2920
2921 if not rdev:
2922 _Fail("Cannot wipe device %s: device not found", disk.iv_name)
2923 if offset < 0:
2924 _Fail("Negative offset")
2925 if size < 0:
2926 _Fail("Negative size")
2927 if offset > rdev.size:
2928 _Fail("Wipe offset is bigger than device size")
2929 if (offset + size) > rdev.size:
2930 _Fail("Wipe offset and size are bigger than device size")
2931
2932 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)
2933
2934
2935 def BlockdevImage(disk, image, size):
2936 """Images a block device either by dumping a local file or
2937 downloading a URL.
2938
2939 @type disk: L{objects.Disk}
2940 @param disk: the disk object we want to image
2941
2942 @type image: string
2943 @param image: file path to the disk image be dumped
2944
2945 @type size: int
2946 @param size: The size in MiB to write
2947
2948 @rtype: NoneType
2949 @return: None
2950 @raise RPCFail: in case of failure
2951
2952 """
2953 if not (utils.IsUrl(image) or os.path.exists(image)):
2954 _Fail("Image '%s' not found", image)
2955
2956 try:
2957 rdev = _RecursiveFindBD(disk)
2958 except errors.BlockDeviceError:
2959 rdev = None
2960
2961 if not rdev:
2962 _Fail("Cannot image device %s: device not found", disk.iv_name)
2963 if size < 0:
2964 _Fail("Negative size")
2965 if size > rdev.size:
2966 _Fail("Image size is bigger than device size")
2967
2968 if utils.IsUrl(image):
2969 _DownloadAndDumpDevice(image, rdev.dev_path, size)
2970 else:
2971 _DumpDevice(image, rdev.dev_path, 0, size, False)
2972
2973
2974 def BlockdevPauseResumeSync(disks, pause):
2975 """Pause or resume the sync of the block device.
2976
2977 @type disks: list of L{objects.Disk}
2978 @param disks: the disks object we want to pause/resume
2979 @type pause: bool
2980 @param pause: Wheater to pause or resume
2981
2982 """
2983 success = []
2984 for disk in disks:
2985 try:
2986 rdev = _RecursiveFindBD(disk)
2987 except errors.BlockDeviceError:
2988 rdev = None
2989
2990 if not rdev:
2991 success.append((False, ("Cannot change sync for device %s:"
2992 " device not found" % disk.iv_name)))
2993 continue
2994
2995 result = rdev.PauseResumeSync(pause)
2996
2997 if result:
2998 success.append((result, None))
2999 else:
3000 if pause:
3001 msg = "Pause"
3002 else:
3003 msg = "Resume"
3004 success.append((result, "%s for device %s failed" % (msg, disk.iv_name)))
3005
3006 return success
3007
3008
3009 def BlockdevRemove(disk):
3010 """Remove a block device.
3011
3012 @note: This is intended to be called recursively.
3013
3014 @type disk: L{objects.Disk}
3015 @param disk: the disk object we should remove
3016 @rtype: boolean
3017 @return: the success of the operation
3018
3019 """
3020 msgs = []
3021 try:
3022 rdev = _RecursiveFindBD(disk)
3023 except errors.BlockDeviceError, err:
3024 # probably can't attach
3025 logging.info("Can't attach to device %s in remove", disk)
3026 rdev = None
3027 if rdev is not None:
3028 r_path = rdev.dev_path
3029
3030 def _TryRemove():
3031 try:
3032 rdev.Remove()
3033 return []
3034 except errors.BlockDeviceError, err:
3035 return [str(err)]
3036
3037 msgs.extend(utils.SimpleRetry([], _TryRemove,
3038 constants.DISK_REMOVE_RETRY_INTERVAL,
3039 constants.DISK_REMOVE_RETRY_TIMEOUT))
3040
3041 if not msgs:
3042 DevCacheManager.RemoveCache(r_path)
3043
3044 if disk.children:
3045 for child in disk.children:
3046 try:
3047 BlockdevRemove(child)
3048 except RPCFail, err:
3049 msgs.append(str(err))
3050
3051 if msgs:
3052 _Fail("; ".join(msgs))
3053
3054
3055 def _RecursiveAssembleBD(disk, owner, as_primary):
3056 """Activate a block device for an instance.
3057
3058 This is run on the primary and secondary nodes for an instance.
3059
3060 @note: this function is called recursively.
3061
3062 @type disk: L{objects.Disk}
3063 @param disk: the disk we try to assemble
3064 @type owner: str
3065 @param owner: the name of the instance which owns the disk
3066 @type as_primary: boolean
3067 @param as_primary: if we should make the block device
3068 read/write
3069
3070 @return: the assembled device or None (in case no device
3071 was assembled)
3072 @raise errors.BlockDeviceError: in case there is an error
3073 during the activation of the children or the device
3074 itself
3075
3076 """
3077 children = []
3078 if disk.children:
3079 mcn = disk.ChildrenNeeded()
3080 if mcn == -1:
3081 mcn = 0 # max number of Nones allowed
3082 else:
3083 mcn = len(disk.children) - mcn # max number of Nones
3084 for chld_disk in disk.children:
3085 try:
3086 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
3087 except errors.BlockDeviceError, err:
3088 if children.count(None) >= mcn:
3089 raise
3090 cdev = None
3091 logging.error("Error in child activation (but continuing): %s",
3092 str(err))
3093 children.append(cdev)
3094
3095 if as_primary or disk.AssembleOnSecondary():
3096 r_dev = bdev.Assemble(disk, children)
3097 result = r_dev
3098 if as_primary or disk.OpenOnSecondary():
3099 r_dev.Open()
3100 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
3101 as_primary, disk.iv_name)
3102
3103 else:
3104 result = True
3105 return result
3106
3107
3108 def BlockdevAssemble(disk, instance, as_primary, idx):
3109 """Activate a block device for an instance.
3110
3111 This is a wrapper over _RecursiveAssembleBD.
3112
3113 @rtype: str or boolean
3114 @return: a tuple with the C{/dev/...} path and the created symlink
3115 for primary nodes, and (C{True}, C{True}) for secondary nodes
3116
3117 """
3118 try:
3119 result = _RecursiveAssembleBD(disk, instance.name, as_primary)
3120 if isinstance(result, BlockDev):
3121 # pylint: disable=E1103
3122 dev_path = result.dev_path
3123 link_name = None
3124 uri = None
3125 if as_primary:
3126 link_name = _SymlinkBlockDev(instance.name, dev_path, idx)
3127 uri = _CalculateDeviceURI(instance, disk, result)
3128 elif result:
3129 return result, result
3130 else:
3131 _Fail("Unexpected result from _RecursiveAssembleBD")
3132 except errors.BlockDeviceError, err:
3133 _Fail("Error while assembling disk: %s", err, exc=True)
3134 except OSError, err:
3135 _Fail("Error while symlinking disk: %s", err, exc=True)
3136
3137 return dev_path, link_name, uri
3138
3139
3140 def BlockdevShutdown(disk):
3141 """Shut down a block device.
3142
3143 First, if the device is assembled (Attach() is successful), then
3144 the device is shutdown. Then the children of the device are
3145 shutdown.
3146
3147 This function is called recursively. Note that we don't cache the
3148 children or such, as oppossed to assemble, shutdown of different
3149 devices doesn't require that the upper device was active.
3150
3151 @type disk: L{objects.Disk}
3152 @param disk: the description of the disk we should
3153 shutdown
3154 @rtype: None
3155
3156 """
3157 msgs = []
3158 r_dev = _RecursiveFindBD(disk)
3159 if r_dev is not None:
3160 r_path = r_dev.dev_path
3161 try:
3162 r_dev.Shutdown()
3163 DevCacheManager.RemoveCache(r_path)
3164 except errors.BlockDeviceError, err:
3165 msgs.append(str(err))
3166
3167 if disk.children:
3168 for child in disk.children:
3169 try:
3170 BlockdevShutdown(child)
3171 except RPCFail, err:
3172 msgs.append(str(err))
3173
3174 if msgs:
3175 _Fail("; ".join(msgs))
3176
3177
3178 def BlockdevAddchildren(parent_cdev, new_cdevs):
3179 """Extend a mirrored block device.
3180
3181 @type parent_cdev: L{objects.Disk}
3182 @param parent_cdev: the disk to which we should add children
3183 @type new_cdevs: list of L{objects.Disk}
3184 @param new_cdevs: the list of children which we should add
3185 @rtype: None
3186
3187 """
3188 parent_bdev = _RecursiveFindBD(parent_cdev)
3189 if parent_bdev is None:
3190 _Fail("Can't find parent device '%s' in add children", parent_cdev)
3191 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
3192 if new_bdevs.count(None) > 0:
3193 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs)
3194 parent_bdev.AddChildren(new_bdevs)
3195
3196
3197 def BlockdevRemovechildren(parent_cdev, new_cdevs):
3198 """Shrink a mirrored block device.
3199
3200 @type parent_cdev: L{objects.Disk}
3201 @param parent_cdev: the disk from which we should remove children
3202 @type new_cdevs: list of L{objects.Disk}
3203 @param new_cdevs: the list of children which we should remove
3204 @rtype: None
3205
3206 """
3207 parent_bdev = _RecursiveFindBD(parent_cdev)
3208 if parent_bdev is None:
3209 _Fail("Can't find parent device '%s' in remove children", parent_cdev)
3210 devs = []
3211 for disk in new_cdevs:
3212 rpath = disk.StaticDevPath()
3213 if rpath is None:
3214 bd = _RecursiveFindBD(disk)
3215 if bd is None:
3216 _Fail("Can't find device %s while removing children", disk)
3217 else:
3218 devs.append(bd.dev_path)
3219 else:
3220 if not utils.IsNormAbsPath(rpath):
3221 _Fail("Strange path returned from StaticDevPath: '%s'", rpath)
3222 devs.append(rpath)
3223 parent_bdev.RemoveChildren(devs)
3224
3225
3226 def BlockdevGetmirrorstatus(disks):
3227 """Get the mirroring status of a list of devices.
3228
3229 @type disks: list of L{objects.Disk}
3230 @param disks: the list of disks which we should query
3231 @rtype: disk
3232 @return: List of L{objects.BlockDevStatus}, one for each disk
3233 @raise errors.BlockDeviceError: if any of the disks cannot be
3234 found
3235
3236 """
3237 stats = []
3238 for dsk in disks:
3239 rbd = _RecursiveFindBD(dsk)
3240 if rbd is None:
3241 _Fail("Can't find device %s", dsk)
3242
3243 stats.append(rbd.CombinedSyncStatus())
3244
3245 return stats
3246
3247
3248 def BlockdevGetmirrorstatusMulti(disks):
3249 """Get the mirroring status of a list of devices.
3250
3251 @type disks: list of L{objects.Disk}
3252 @param disks: the list of disks which we should query
3253 @rtype: disk
3254 @return: List of tuples, (bool, status), one for each disk; bool denotes
3255 success/failure, status is L{objects.BlockDevStatus} on success, string
3256 otherwise
3257
3258 """
3259 result = []
3260 for disk in disks:
3261 try:
3262 rbd = _RecursiveFindBD(disk)
3263 if rbd is None:
3264 result.append((False, "Can't find device %s" % disk))
3265 continue
3266
3267 status = rbd.CombinedSyncStatus()
3268 except errors.BlockDeviceError, err:
3269 logging.exception("Error while getting disk status")
3270 result.append((False, str(err)))
3271 else:
3272 result.append((True, status))
3273
3274 assert len(disks) == len(result)
3275
3276 return result
3277
3278
3279 def _RecursiveFindBD(disk):
3280 """Check if a device is activated.
3281
3282 If so, return information about the real device.
3283
3284 @type disk: L{objects.Disk}
3285 @param disk: the disk object we need to find
3286
3287 @return: None if the device can't be found,
3288 otherwise the device instance
3289
3290 """
3291 children = []
3292 if disk.children:
3293 for chdisk in disk.children:
3294 children.append(_RecursiveFindBD(chdisk))
3295
3296 return bdev.FindDevice(disk, children)
3297
3298
3299 def _OpenRealBD(disk):
3300 """Opens the underlying block device of a disk.
3301
3302 @type disk: L{objects.Disk}
3303 @param disk: the disk object we want to open
3304
3305 """
3306 real_disk = _RecursiveFindBD(disk)
3307 if real_disk is None:
3308 _Fail("Block device '%s' is not set up", disk)
3309
3310 real_disk.Open()
3311
3312 return real_disk
3313
3314
3315 def BlockdevFind(disk):
3316 """Check if a device is activated.
3317
3318 If it is, return information about the real device.
3319
3320 @type disk: L{objects.Disk}
3321 @param disk: the disk to find
3322 @rtype: None or objects.BlockDevStatus
3323 @return: None if the disk cannot be found, otherwise a the current
3324 information
3325
3326 """
3327 try:
3328 rbd = _RecursiveFindBD(disk)
3329 except errors.BlockDeviceError, err:
3330 _Fail("Failed to find device: %s", err, exc=True)
3331
3332 if rbd is None:
3333 return None
3334
3335 return rbd.GetSyncStatus()
3336
3337
3338 def BlockdevGetdimensions(disks):
3339 """Computes the size of the given disks.
3340
3341 If a disk is not found, returns None instead.
3342
3343 @type disks: list of L{objects.Disk}
3344 @param disks: the list of disk to compute the size for
3345 @rtype: list
3346 @return: list with elements None if the disk cannot be found,
3347 otherwise the pair (size, spindles), where spindles is None if the
3348 device doesn't support that
3349
3350 """
3351 result = []
3352 for cf in disks:
3353 try:
3354 rbd = _RecursiveFindBD(cf)
3355 except errors.BlockDeviceError:
3356 result.append(None)
3357 continue
3358 if rbd is None:
3359 result.append(None)
3360 else:
3361 result.append(rbd.GetActualDimensions())
3362 return result
3363
3364
3365 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
3366 """Write a file to the filesystem.
3367
3368 This allows the master to overwrite(!) a file. It will only perform
3369 the operation if the file belongs to a list of configuration files.
3370
3371 @type file_name: str
3372 @param file_name: the target file name
3373 @type data: str
3374 @param data: the new contents of the file
3375 @type mode: int
3376 @param mode: the mode to give the file (can be None)
3377 @type uid: string
3378 @param uid: the owner of the file
3379 @type gid: string
3380 @param gid: the group of the file
3381 @type atime: float
3382 @param atime: the atime to set on the file (can be None)
3383 @type mtime: float
3384 @param mtime: the mtime to set on the file (can be None)
3385 @rtype: None