59260d5e9844ed36c8007d22755a02a5f3ed2520
[ganeti-github.git] / lib / backend.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Functions used by the node daemon
32
33 @var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
34 the L{UploadFile} function
35 @var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted
36 in the L{_CleanDirectory} function
37
38 """
39
40 # pylint: disable=E1103,C0302
41
42 # E1103: %s %r has no %r member (but some types could not be
43 # inferred), because the _TryOSFromDisk returns either (True, os_obj)
44 # or (False, "string") which confuses pylint
45
46 # C0302: This module has become too big and should be split up
47
48
49 import base64
50 import errno
51 import logging
52 import os
53 import os.path
54 import pycurl
55 import random
56 import re
57 import shutil
58 import signal
59 import socket
60 import stat
61 import tempfile
62 import time
63 import zlib
64
65 from ganeti import errors
66 from ganeti import http
67 from ganeti import utils
68 from ganeti import ssh
69 from ganeti import hypervisor
70 from ganeti.hypervisor import hv_base
71 from ganeti import constants
72 from ganeti.storage import bdev
73 from ganeti.storage import drbd
74 from ganeti.storage import filestorage
75 from ganeti import objects
76 from ganeti import ssconf
77 from ganeti import serializer
78 from ganeti import netutils
79 from ganeti import runtime
80 from ganeti import compat
81 from ganeti import pathutils
82 from ganeti import vcluster
83 from ganeti import ht
84 from ganeti.storage.base import BlockDev
85 from ganeti.storage.drbd import DRBD8
86 from ganeti import hooksmaster
87 from ganeti.rpc import transport
88 from ganeti.rpc.errors import NoMasterError, TimeoutError
89
90
91 _BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
92 _ALLOWED_CLEAN_DIRS = compat.UniqueFrozenset([
93 pathutils.DATA_DIR,
94 pathutils.JOB_QUEUE_ARCHIVE_DIR,
95 pathutils.QUEUE_DIR,
96 pathutils.CRYPTO_KEYS_DIR,
97 ])
98 _MAX_SSL_CERT_VALIDITY = 7 * 24 * 60 * 60
99 _X509_KEY_FILE = "key"
100 _X509_CERT_FILE = "cert"
101 _IES_STATUS_FILE = "status"
102 _IES_PID_FILE = "pid"
103 _IES_CA_FILE = "ca"
104
105 #: Valid LVS output line regex
106 _LVSLINE_REGEX = re.compile(r"^ *([^|]+)\|([^|]+)\|([0-9.]+)\|([^|]{6,})\|?$")
107
108 # Actions for the master setup script
109 _MASTER_START = "start"
110 _MASTER_STOP = "stop"
111
112 #: Maximum file permissions for restricted command directory and executables
113 _RCMD_MAX_MODE = (stat.S_IRWXU |
114 stat.S_IRGRP | stat.S_IXGRP |
115 stat.S_IROTH | stat.S_IXOTH)
116
117 #: Delay before returning an error for restricted commands
118 _RCMD_INVALID_DELAY = 10
119
120 #: How long to wait to acquire lock for restricted commands (shorter than
121 #: L{_RCMD_INVALID_DELAY}) to reduce blockage of noded forks when many
122 #: command requests arrive
123 _RCMD_LOCK_TIMEOUT = _RCMD_INVALID_DELAY * 0.8
124
125
126 class RPCFail(Exception):
127 """Class denoting RPC failure.
128
129 Its argument is the error message.
130
131 """
132
133
134 def _GetInstReasonFilename(instance_name):
135 """Path of the file containing the reason of the instance status change.
136
137 @type instance_name: string
138 @param instance_name: The name of the instance
139 @rtype: string
140 @return: The path of the file
141
142 """
143 return utils.PathJoin(pathutils.INSTANCE_REASON_DIR, instance_name)
144
145
146 def _StoreInstReasonTrail(instance_name, trail):
147 """Serialize a reason trail related to an instance change of state to file.
148
149 The exact location of the file depends on the name of the instance and on
150 the configuration of the Ganeti cluster defined at deploy time.
151
152 @type instance_name: string
153 @param instance_name: The name of the instance
154
155 @type trail: list of reasons
156 @param trail: reason trail
157
158 @rtype: None
159
160 """
161 json = serializer.DumpJson(trail)
162 filename = _GetInstReasonFilename(instance_name)
163 utils.WriteFile(filename, data=json)
164
165
166 def _Fail(msg, *args, **kwargs):
167 """Log an error and the raise an RPCFail exception.
168
169 This exception is then handled specially in the ganeti daemon and
170 turned into a 'failed' return type. As such, this function is a
171 useful shortcut for logging the error and returning it to the master
172 daemon.
173
174 @type msg: string
175 @param msg: the text of the exception
176 @raise RPCFail
177
178 """
179 if args:
180 msg = msg % args
181 if "log" not in kwargs or kwargs["log"]: # if we should log this error
182 if "exc" in kwargs and kwargs["exc"]:
183 logging.exception(msg)
184 else:
185 logging.error(msg)
186 raise RPCFail(msg)
187
188
189 def _GetConfig():
190 """Simple wrapper to return a SimpleStore.
191
192 @rtype: L{ssconf.SimpleStore}
193 @return: a SimpleStore instance
194
195 """
196 return ssconf.SimpleStore()
197
198
199 def _GetSshRunner(cluster_name):
200 """Simple wrapper to return an SshRunner.
201
202 @type cluster_name: str
203 @param cluster_name: the cluster name, which is needed
204 by the SshRunner constructor
205 @rtype: L{ssh.SshRunner}
206 @return: an SshRunner instance
207
208 """
209 return ssh.SshRunner(cluster_name)
210
211
212 def _Decompress(data):
213 """Unpacks data compressed by the RPC client.
214
215 @type data: list or tuple
216 @param data: Data sent by RPC client
217 @rtype: str
218 @return: Decompressed data
219
220 """
221 assert isinstance(data, (list, tuple))
222 assert len(data) == 2
223 (encoding, content) = data
224 if encoding == constants.RPC_ENCODING_NONE:
225 return content
226 elif encoding == constants.RPC_ENCODING_ZLIB_BASE64:
227 return zlib.decompress(base64.b64decode(content))
228 else:
229 raise AssertionError("Unknown data encoding")
230
231
232 def _CleanDirectory(path, exclude=None):
233 """Removes all regular files in a directory.
234
235 @type path: str
236 @param path: the directory to clean
237 @type exclude: list
238 @param exclude: list of files to be excluded, defaults
239 to the empty list
240
241 """
242 if path not in _ALLOWED_CLEAN_DIRS:
243 _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'",
244 path)
245
246 if not os.path.isdir(path):
247 return
248 if exclude is None:
249 exclude = []
250 else:
251 # Normalize excluded paths
252 exclude = [os.path.normpath(i) for i in exclude]
253
254 for rel_name in utils.ListVisibleFiles(path):
255 full_name = utils.PathJoin(path, rel_name)
256 if full_name in exclude:
257 continue
258 if os.path.isfile(full_name) and not os.path.islink(full_name):
259 utils.RemoveFile(full_name)
260
261
262 def _BuildUploadFileList():
263 """Build the list of allowed upload files.
264
265 This is abstracted so that it's built only once at module import time.
266
267 """
268 allowed_files = set([
269 pathutils.CLUSTER_CONF_FILE,
270 pathutils.ETC_HOSTS,
271 pathutils.SSH_KNOWN_HOSTS_FILE,
272 pathutils.VNC_PASSWORD_FILE,
273 pathutils.RAPI_CERT_FILE,
274 pathutils.SPICE_CERT_FILE,
275 pathutils.SPICE_CACERT_FILE,
276 pathutils.RAPI_USERS_FILE,
277 pathutils.CONFD_HMAC_KEY,
278 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
279 ])
280
281 for hv_name in constants.HYPER_TYPES:
282 hv_class = hypervisor.GetHypervisorClass(hv_name)
283 allowed_files.update(hv_class.GetAncillaryFiles()[0])
284
285 assert pathutils.FILE_STORAGE_PATHS_FILE not in allowed_files, \
286 "Allowed file storage paths should never be uploaded via RPC"
287
288 return frozenset(allowed_files)
289
290
291 _ALLOWED_UPLOAD_FILES = _BuildUploadFileList()
292
293
294 def JobQueuePurge():
295 """Removes job queue files and archived jobs.
296
297 @rtype: tuple
298 @return: True, None
299
300 """
301 _CleanDirectory(pathutils.QUEUE_DIR, exclude=[pathutils.JOB_QUEUE_LOCK_FILE])
302 _CleanDirectory(pathutils.JOB_QUEUE_ARCHIVE_DIR)
303
304
305 def GetMasterNodeName():
306 """Returns the master node name.
307
308 @rtype: string
309 @return: name of the master node
310 @raise RPCFail: in case of errors
311
312 """
313 try:
314 return _GetConfig().GetMasterNode()
315 except errors.ConfigurationError, err:
316 _Fail("Cluster configuration incomplete: %s", err, exc=True)
317
318
319 def RunLocalHooks(hook_opcode, hooks_path, env_builder_fn):
320 """Decorator that runs hooks before and after the decorated function.
321
322 @type hook_opcode: string
323 @param hook_opcode: opcode of the hook
324 @type hooks_path: string
325 @param hooks_path: path of the hooks
326 @type env_builder_fn: function
327 @param env_builder_fn: function that returns a dictionary containing the
328 environment variables for the hooks. Will get all the parameters of the
329 decorated function.
330 @raise RPCFail: in case of pre-hook failure
331
332 """
333 def decorator(fn):
334 def wrapper(*args, **kwargs):
335 _, myself = ssconf.GetMasterAndMyself()
336 nodes = ([myself], [myself]) # these hooks run locally
337
338 env_fn = compat.partial(env_builder_fn, *args, **kwargs)
339
340 cfg = _GetConfig()
341 hr = HooksRunner()
342 hm = hooksmaster.HooksMaster(hook_opcode, hooks_path, nodes,
343 hr.RunLocalHooks, None, env_fn, None,
344 logging.warning, cfg.GetClusterName(),
345 cfg.GetMasterNode())
346 hm.RunPhase(constants.HOOKS_PHASE_PRE)
347 result = fn(*args, **kwargs)
348 hm.RunPhase(constants.HOOKS_PHASE_POST)
349
350 return result
351 return wrapper
352 return decorator
353
354
355 def _BuildMasterIpEnv(master_params, use_external_mip_script=None):
356 """Builds environment variables for master IP hooks.
357
358 @type master_params: L{objects.MasterNetworkParameters}
359 @param master_params: network parameters of the master
360 @type use_external_mip_script: boolean
361 @param use_external_mip_script: whether to use an external master IP
362 address setup script (unused, but necessary per the implementation of the
363 _RunLocalHooks decorator)
364
365 """
366 # pylint: disable=W0613
367 ver = netutils.IPAddress.GetVersionFromAddressFamily(master_params.ip_family)
368 env = {
369 "MASTER_NETDEV": master_params.netdev,
370 "MASTER_IP": master_params.ip,
371 "MASTER_NETMASK": str(master_params.netmask),
372 "CLUSTER_IP_VERSION": str(ver),
373 }
374
375 return env
376
377
378 def _RunMasterSetupScript(master_params, action, use_external_mip_script):
379 """Execute the master IP address setup script.
380
381 @type master_params: L{objects.MasterNetworkParameters}
382 @param master_params: network parameters of the master
383 @type action: string
384 @param action: action to pass to the script. Must be one of
385 L{backend._MASTER_START} or L{backend._MASTER_STOP}
386 @type use_external_mip_script: boolean
387 @param use_external_mip_script: whether to use an external master IP
388 address setup script
389 @raise backend.RPCFail: if there are errors during the execution of the
390 script
391
392 """
393 env = _BuildMasterIpEnv(master_params)
394
395 if use_external_mip_script:
396 setup_script = pathutils.EXTERNAL_MASTER_SETUP_SCRIPT
397 else:
398 setup_script = pathutils.DEFAULT_MASTER_SETUP_SCRIPT
399
400 result = utils.RunCmd([setup_script, action], env=env, reset_env=True)
401
402 if result.failed:
403 _Fail("Failed to %s the master IP. Script return value: %s, output: '%s'" %
404 (action, result.exit_code, result.output), log=True)
405
406
407 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNUP, "master-ip-turnup",
408 _BuildMasterIpEnv)
409 def ActivateMasterIp(master_params, use_external_mip_script):
410 """Activate the IP address of the master daemon.
411
412 @type master_params: L{objects.MasterNetworkParameters}
413 @param master_params: network parameters of the master
414 @type use_external_mip_script: boolean
415 @param use_external_mip_script: whether to use an external master IP
416 address setup script
417 @raise RPCFail: in case of errors during the IP startup
418
419 """
420 _RunMasterSetupScript(master_params, _MASTER_START,
421 use_external_mip_script)
422
423
424 def StartMasterDaemons(no_voting):
425 """Activate local node as master node.
426
427 The function will start the master daemons (ganeti-masterd and ganeti-rapi).
428
429 @type no_voting: boolean
430 @param no_voting: whether to start ganeti-masterd without a node vote
431 but still non-interactively
432 @rtype: None
433
434 """
435
436 if no_voting:
437 masterd_args = "--no-voting --yes-do-it"
438 else:
439 masterd_args = ""
440
441 env = {
442 "EXTRA_MASTERD_ARGS": masterd_args,
443 }
444
445 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start-master"], env=env)
446 if result.failed:
447 msg = "Can't start Ganeti master: %s" % result.output
448 logging.error(msg)
449 _Fail(msg)
450
451
452 @RunLocalHooks(constants.FAKE_OP_MASTER_TURNDOWN, "master-ip-turndown",
453 _BuildMasterIpEnv)
454 def DeactivateMasterIp(master_params, use_external_mip_script):
455 """Deactivate the master IP on this node.
456
457 @type master_params: L{objects.MasterNetworkParameters}
458 @param master_params: network parameters of the master
459 @type use_external_mip_script: boolean
460 @param use_external_mip_script: whether to use an external master IP
461 address setup script
462 @raise RPCFail: in case of errors during the IP turndown
463
464 """
465 _RunMasterSetupScript(master_params, _MASTER_STOP,
466 use_external_mip_script)
467
468
469 def StopMasterDaemons():
470 """Stop the master daemons on this node.
471
472 Stop the master daemons (ganeti-masterd and ganeti-rapi) on this node.
473
474 @rtype: None
475
476 """
477 # TODO: log and report back to the caller the error failures; we
478 # need to decide in which case we fail the RPC for this
479
480 result = utils.RunCmd([pathutils.DAEMON_UTIL, "stop-master"])
481 if result.failed:
482 logging.error("Could not stop Ganeti master, command %s had exitcode %s"
483 " and error %s",
484 result.cmd, result.exit_code, result.output)
485
486
487 def ChangeMasterNetmask(old_netmask, netmask, master_ip, master_netdev):
488 """Change the netmask of the master IP.
489
490 @param old_netmask: the old value of the netmask
491 @param netmask: the new value of the netmask
492 @param master_ip: the master IP
493 @param master_netdev: the master network device
494
495 """
496 if old_netmask == netmask:
497 return
498
499 if not netutils.IPAddress.Own(master_ip):
500 _Fail("The master IP address is not up, not attempting to change its"
501 " netmask")
502
503 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "add",
504 "%s/%s" % (master_ip, netmask),
505 "dev", master_netdev, "label",
506 "%s:0" % master_netdev])
507 if result.failed:
508 _Fail("Could not set the new netmask on the master IP address")
509
510 result = utils.RunCmd([constants.IP_COMMAND_PATH, "address", "del",
511 "%s/%s" % (master_ip, old_netmask),
512 "dev", master_netdev, "label",
513 "%s:0" % master_netdev])
514 if result.failed:
515 _Fail("Could not bring down the master IP address with the old netmask")
516
517
518 def EtcHostsModify(mode, host, ip):
519 """Modify a host entry in /etc/hosts.
520
521 @param mode: The mode to operate. Either add or remove entry
522 @param host: The host to operate on
523 @param ip: The ip associated with the entry
524
525 """
526 if mode == constants.ETC_HOSTS_ADD:
527 if not ip:
528 RPCFail("Mode 'add' needs 'ip' parameter, but parameter not"
529 " present")
530 utils.AddHostToEtcHosts(host, ip)
531 elif mode == constants.ETC_HOSTS_REMOVE:
532 if ip:
533 RPCFail("Mode 'remove' does not allow 'ip' parameter, but"
534 " parameter is present")
535 utils.RemoveHostFromEtcHosts(host)
536 else:
537 RPCFail("Mode not supported")
538
539
540 def LeaveCluster(modify_ssh_setup):
541 """Cleans up and remove the current node.
542
543 This function cleans up and prepares the current node to be removed
544 from the cluster.
545
546 If processing is successful, then it raises an
547 L{errors.QuitGanetiException} which is used as a special case to
548 shutdown the node daemon.
549
550 @param modify_ssh_setup: boolean
551
552 """
553 _CleanDirectory(pathutils.DATA_DIR)
554 _CleanDirectory(pathutils.CRYPTO_KEYS_DIR)
555 JobQueuePurge()
556
557 if modify_ssh_setup:
558 try:
559 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.SSH_LOGIN_USER)
560
561 utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
562
563 utils.RemoveFile(priv_key)
564 utils.RemoveFile(pub_key)
565 except errors.OpExecError:
566 logging.exception("Error while processing ssh files")
567
568 try:
569 utils.RemoveFile(pathutils.CONFD_HMAC_KEY)
570 utils.RemoveFile(pathutils.RAPI_CERT_FILE)
571 utils.RemoveFile(pathutils.SPICE_CERT_FILE)
572 utils.RemoveFile(pathutils.SPICE_CACERT_FILE)
573 utils.RemoveFile(pathutils.NODED_CERT_FILE)
574 except: # pylint: disable=W0702
575 logging.exception("Error while removing cluster secrets")
576
577 utils.StopDaemon(constants.CONFD)
578 utils.StopDaemon(constants.MOND)
579 utils.StopDaemon(constants.KVMD)
580
581 # Raise a custom exception (handled in ganeti-noded)
582 raise errors.QuitGanetiException(True, "Shutdown scheduled")
583
584
585 def _CheckStorageParams(params, num_params):
586 """Performs sanity checks for storage parameters.
587
588 @type params: list
589 @param params: list of storage parameters
590 @type num_params: int
591 @param num_params: expected number of parameters
592
593 """
594 if params is None:
595 raise errors.ProgrammerError("No storage parameters for storage"
596 " reporting is provided.")
597 if not isinstance(params, list):
598 raise errors.ProgrammerError("The storage parameters are not of type"
599 " list: '%s'" % params)
600 if not len(params) == num_params:
601 raise errors.ProgrammerError("Did not receive the expected number of"
602 "storage parameters: expected %s,"
603 " received '%s'" % (num_params, len(params)))
604
605
606 def _CheckLvmStorageParams(params):
607 """Performs sanity check for the 'exclusive storage' flag.
608
609 @see: C{_CheckStorageParams}
610
611 """
612 _CheckStorageParams(params, 1)
613 excl_stor = params[0]
614 if not isinstance(params[0], bool):
615 raise errors.ProgrammerError("Exclusive storage parameter is not"
616 " boolean: '%s'." % excl_stor)
617 return excl_stor
618
619
620 def _GetLvmVgSpaceInfo(name, params):
621 """Wrapper around C{_GetVgInfo} which checks the storage parameters.
622
623 @type name: string
624 @param name: name of the volume group
625 @type params: list
626 @param params: list of storage parameters, which in this case should be
627 containing only one for exclusive storage
628
629 """
630 excl_stor = _CheckLvmStorageParams(params)
631 return _GetVgInfo(name, excl_stor)
632
633
634 def _GetVgInfo(
635 name, excl_stor, info_fn=bdev.LogicalVolume.GetVGInfo):
636 """Retrieves information about a LVM volume group.
637
638 """
639 # TODO: GetVGInfo supports returning information for multiple VGs at once
640 vginfo = info_fn([name], excl_stor)
641 if vginfo:
642 vg_free = int(round(vginfo[0][0], 0))
643 vg_size = int(round(vginfo[0][1], 0))
644 else:
645 vg_free = None
646 vg_size = None
647
648 return {
649 "type": constants.ST_LVM_VG,
650 "name": name,
651 "storage_free": vg_free,
652 "storage_size": vg_size,
653 }
654
655
656 def _GetLvmPvSpaceInfo(name, params):
657 """Wrapper around C{_GetVgSpindlesInfo} with sanity checks.
658
659 @see: C{_GetLvmVgSpaceInfo}
660
661 """
662 excl_stor = _CheckLvmStorageParams(params)
663 return _GetVgSpindlesInfo(name, excl_stor)
664
665
666 def _GetVgSpindlesInfo(
667 name, excl_stor, info_fn=bdev.LogicalVolume.GetVgSpindlesInfo):
668 """Retrieves information about spindles in an LVM volume group.
669
670 @type name: string
671 @param name: VG name
672 @type excl_stor: bool
673 @param excl_stor: exclusive storage
674 @rtype: dict
675 @return: dictionary whose keys are "name", "vg_free", "vg_size" for VG name,
676 free spindles, total spindles respectively
677
678 """
679 if excl_stor:
680 (vg_free, vg_size) = info_fn(name)
681 else:
682 vg_free = 0
683 vg_size = 0
684 return {
685 "type": constants.ST_LVM_PV,
686 "name": name,
687 "storage_free": vg_free,
688 "storage_size": vg_size,
689 }
690
691
692 def _GetHvInfo(name, hvparams, get_hv_fn=hypervisor.GetHypervisor):
693 """Retrieves node information from a hypervisor.
694
695 The information returned depends on the hypervisor. Common items:
696
697 - vg_size is the size of the configured volume group in MiB
698 - vg_free is the free size of the volume group in MiB
699 - memory_dom0 is the memory allocated for domain0 in MiB
700 - memory_free is the currently available (free) ram in MiB
701 - memory_total is the total number of ram in MiB
702 - hv_version: the hypervisor version, if available
703
704 @type hvparams: dict of string
705 @param hvparams: the hypervisor's hvparams
706
707 """
708 return get_hv_fn(name).GetNodeInfo(hvparams=hvparams)
709
710
711 def _GetHvInfoAll(hv_specs, get_hv_fn=hypervisor.GetHypervisor):
712 """Retrieves node information for all hypervisors.
713
714 See C{_GetHvInfo} for information on the output.
715
716 @type hv_specs: list of pairs (string, dict of strings)
717 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
718
719 """
720 if hv_specs is None:
721 return None
722
723 result = []
724 for hvname, hvparams in hv_specs:
725 result.append(_GetHvInfo(hvname, hvparams, get_hv_fn))
726 return result
727
728
729 def _GetNamedNodeInfo(names, fn):
730 """Calls C{fn} for all names in C{names} and returns a dictionary.
731
732 @rtype: None or dict
733
734 """
735 if names is None:
736 return None
737 else:
738 return map(fn, names)
739
740
741 def GetNodeInfo(storage_units, hv_specs):
742 """Gives back a hash with different information about the node.
743
744 @type storage_units: list of tuples (string, string, list)
745 @param storage_units: List of tuples (storage unit, identifier, parameters) to
746 ask for disk space information. In case of lvm-vg, the identifier is
747 the VG name. The parameters can contain additional, storage-type-specific
748 parameters, for example exclusive storage for lvm storage.
749 @type hv_specs: list of pairs (string, dict of strings)
750 @param hv_specs: list of pairs of a hypervisor's name and its hvparams
751 @rtype: tuple; (string, None/dict, None/dict)
752 @return: Tuple containing boot ID, volume group information and hypervisor
753 information
754
755 """
756 bootid = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
757 storage_info = _GetNamedNodeInfo(
758 storage_units,
759 (lambda (storage_type, storage_key, storage_params):
760 _ApplyStorageInfoFunction(storage_type, storage_key, storage_params)))
761 hv_info = _GetHvInfoAll(hv_specs)
762 return (bootid, storage_info, hv_info)
763
764
765 def _GetFileStorageSpaceInfo(path, params):
766 """Wrapper around filestorage.GetSpaceInfo.
767
768 The purpose of this wrapper is to call filestorage.GetFileStorageSpaceInfo
769 and ignore the *args parameter to not leak it into the filestorage
770 module's code.
771
772 @see: C{filestorage.GetFileStorageSpaceInfo} for description of the
773 parameters.
774
775 """
776 _CheckStorageParams(params, 0)
777 return filestorage.GetFileStorageSpaceInfo(path)
778
779
780 # FIXME: implement storage reporting for all missing storage types.
781 _STORAGE_TYPE_INFO_FN = {
782 constants.ST_BLOCK: None,
783 constants.ST_DISKLESS: None,
784 constants.ST_EXT: None,
785 constants.ST_FILE: _GetFileStorageSpaceInfo,
786 constants.ST_LVM_PV: _GetLvmPvSpaceInfo,
787 constants.ST_LVM_VG: _GetLvmVgSpaceInfo,
788 constants.ST_SHARED_FILE: None,
789 constants.ST_GLUSTER: None,
790 constants.ST_RADOS: None,
791 }
792
793
794 def _ApplyStorageInfoFunction(storage_type, storage_key, *args):
795 """Looks up and applies the correct function to calculate free and total
796 storage for the given storage type.
797
798 @type storage_type: string
799 @param storage_type: the storage type for which the storage shall be reported.
800 @type storage_key: string
801 @param storage_key: identifier of a storage unit, e.g. the volume group name
802 of an LVM storage unit
803 @type args: any
804 @param args: various parameters that can be used for storage reporting. These
805 parameters and their semantics vary from storage type to storage type and
806 are just propagated in this function.
807 @return: the results of the application of the storage space function (see
808 _STORAGE_TYPE_INFO_FN) if storage space reporting is implemented for that
809 storage type
810 @raises NotImplementedError: for storage types who don't support space
811 reporting yet
812 """
813 fn = _STORAGE_TYPE_INFO_FN[storage_type]
814 if fn is not None:
815 return fn(storage_key, *args)
816 else:
817 raise NotImplementedError
818
819
820 def _CheckExclusivePvs(pvi_list):
821 """Check that PVs are not shared among LVs
822
823 @type pvi_list: list of L{objects.LvmPvInfo} objects
824 @param pvi_list: information about the PVs
825
826 @rtype: list of tuples (string, list of strings)
827 @return: offending volumes, as tuples: (pv_name, [lv1_name, lv2_name...])
828
829 """
830 res = []
831 for pvi in pvi_list:
832 if len(pvi.lv_list) > 1:
833 res.append((pvi.name, pvi.lv_list))
834 return res
835
836
837 def _VerifyHypervisors(what, vm_capable, result, all_hvparams,
838 get_hv_fn=hypervisor.GetHypervisor):
839 """Verifies the hypervisor. Appends the results to the 'results' list.
840
841 @type what: C{dict}
842 @param what: a dictionary of things to check
843 @type vm_capable: boolean
844 @param vm_capable: whether or not this node is vm capable
845 @type result: dict
846 @param result: dictionary of verification results; results of the
847 verifications in this function will be added here
848 @type all_hvparams: dict of dict of string
849 @param all_hvparams: dictionary mapping hypervisor names to hvparams
850 @type get_hv_fn: function
851 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
852
853 """
854 if not vm_capable:
855 return
856
857 if constants.NV_HYPERVISOR in what:
858 result[constants.NV_HYPERVISOR] = {}
859 for hv_name in what[constants.NV_HYPERVISOR]:
860 hvparams = all_hvparams[hv_name]
861 try:
862 val = get_hv_fn(hv_name).Verify(hvparams=hvparams)
863 except errors.HypervisorError, err:
864 val = "Error while checking hypervisor: %s" % str(err)
865 result[constants.NV_HYPERVISOR][hv_name] = val
866
867
868 def _VerifyHvparams(what, vm_capable, result,
869 get_hv_fn=hypervisor.GetHypervisor):
870 """Verifies the hvparams. Appends the results to the 'results' list.
871
872 @type what: C{dict}
873 @param what: a dictionary of things to check
874 @type vm_capable: boolean
875 @param vm_capable: whether or not this node is vm capable
876 @type result: dict
877 @param result: dictionary of verification results; results of the
878 verifications in this function will be added here
879 @type get_hv_fn: function
880 @param get_hv_fn: function to retrieve the hypervisor, to improve testability
881
882 """
883 if not vm_capable:
884 return
885
886 if constants.NV_HVPARAMS in what:
887 result[constants.NV_HVPARAMS] = []
888 for source, hv_name, hvparms in what[constants.NV_HVPARAMS]:
889 try:
890 logging.info("Validating hv %s, %s", hv_name, hvparms)
891 get_hv_fn(hv_name).ValidateParameters(hvparms)
892 except errors.HypervisorError, err:
893 result[constants.NV_HVPARAMS].append((source, hv_name, str(err)))
894
895
896 def _VerifyInstanceList(what, vm_capable, result, all_hvparams):
897 """Verifies the instance list.
898
899 @type what: C{dict}
900 @param what: a dictionary of things to check
901 @type vm_capable: boolean
902 @param vm_capable: whether or not this node is vm capable
903 @type result: dict
904 @param result: dictionary of verification results; results of the
905 verifications in this function will be added here
906 @type all_hvparams: dict of dict of string
907 @param all_hvparams: dictionary mapping hypervisor names to hvparams
908
909 """
910 if constants.NV_INSTANCELIST in what and vm_capable:
911 # GetInstanceList can fail
912 try:
913 val = GetInstanceList(what[constants.NV_INSTANCELIST],
914 all_hvparams=all_hvparams)
915 except RPCFail, err:
916 val = str(err)
917 result[constants.NV_INSTANCELIST] = val
918
919
920 def _VerifyNodeInfo(what, vm_capable, result, all_hvparams):
921 """Verifies the node info.
922
923 @type what: C{dict}
924 @param what: a dictionary of things to check
925 @type vm_capable: boolean
926 @param vm_capable: whether or not this node is vm capable
927 @type result: dict
928 @param result: dictionary of verification results; results of the
929 verifications in this function will be added here
930 @type all_hvparams: dict of dict of string
931 @param all_hvparams: dictionary mapping hypervisor names to hvparams
932
933 """
934 if constants.NV_HVINFO in what and vm_capable:
935 hvname = what[constants.NV_HVINFO]
936 hyper = hypervisor.GetHypervisor(hvname)
937 hvparams = all_hvparams[hvname]
938 result[constants.NV_HVINFO] = hyper.GetNodeInfo(hvparams=hvparams)
939
940
941 def _VerifyClientCertificate(cert_file=pathutils.NODED_CLIENT_CERT_FILE):
942 """Verify the existance and validity of the client SSL certificate.
943
944 Also, verify that the client certificate is not self-signed. Self-
945 signed client certificates stem from Ganeti versions 2.12.0 - 2.12.4
946 and should be replaced by client certificates signed by the server
947 certificate. Hence we output a warning when we encounter a self-signed
948 one.
949
950 """
951 create_cert_cmd = "gnt-cluster renew-crypto --new-node-certificates"
952 if not os.path.exists(cert_file):
953 return (constants.CV_ERROR,
954 "The client certificate does not exist. Run '%s' to create"
955 " client certificates for all nodes." % create_cert_cmd)
956
957 (errcode, msg) = utils.VerifyCertificate(cert_file)
958 if errcode is not None:
959 return (errcode, msg)
960
961 (errcode, msg) = utils.IsCertificateSelfSigned(cert_file)
962 if errcode is not None:
963 return (errcode, msg)
964
965 # if everything is fine, we return the digest to be compared to the config
966 return (None, utils.GetCertificateDigest(cert_filename=cert_file))
967
968
969 def VerifyNode(what, cluster_name, all_hvparams, node_groups, groups_cfg):
970 """Verify the status of the local node.
971
972 Based on the input L{what} parameter, various checks are done on the
973 local node.
974
975 If the I{filelist} key is present, this list of
976 files is checksummed and the file/checksum pairs are returned.
977
978 If the I{nodelist} key is present, we check that we have
979 connectivity via ssh with the target nodes (and check the hostname
980 report).
981
982 If the I{node-net-test} key is present, we check that we have
983 connectivity to the given nodes via both primary IP and, if
984 applicable, secondary IPs.
985
986 @type what: C{dict}
987 @param what: a dictionary of things to check:
988 - filelist: list of files for which to compute checksums
989 - nodelist: list of nodes we should check ssh communication with
990 - node-net-test: list of nodes we should check node daemon port
991 connectivity with
992 - hypervisor: list with hypervisors to run the verify for
993 @type cluster_name: string
994 @param cluster_name: the cluster's name
995 @type all_hvparams: dict of dict of strings
996 @param all_hvparams: a dictionary mapping hypervisor names to hvparams
997 @type node_groups: a dict of strings
998 @param node_groups: node _names_ mapped to their group uuids (it's enough to
999 have only those nodes that are in `what["nodelist"]`)
1000 @type groups_cfg: a dict of dict of strings
1001 @param groups_cfg: a dictionary mapping group uuids to their configuration
1002 @rtype: dict
1003 @return: a dictionary with the same keys as the input dict, and
1004 values representing the result of the checks
1005
1006 """
1007 result = {}
1008 my_name = netutils.Hostname.GetSysName()
1009 port = netutils.GetDaemonPort(constants.NODED)
1010 vm_capable = my_name not in what.get(constants.NV_NONVMNODES, [])
1011
1012 _VerifyHypervisors(what, vm_capable, result, all_hvparams)
1013 _VerifyHvparams(what, vm_capable, result)
1014
1015 if constants.NV_FILELIST in what:
1016 fingerprints = utils.FingerprintFiles(map(vcluster.LocalizeVirtualPath,
1017 what[constants.NV_FILELIST]))
1018 result[constants.NV_FILELIST] = \
1019 dict((vcluster.MakeVirtualPath(key), value)
1020 for (key, value) in fingerprints.items())
1021
1022 if constants.NV_CLIENT_CERT in what:
1023 result[constants.NV_CLIENT_CERT] = _VerifyClientCertificate()
1024
1025 if constants.NV_NODELIST in what:
1026 (nodes, bynode) = what[constants.NV_NODELIST]
1027
1028 # Add nodes from other groups (different for each node)
1029 try:
1030 nodes.extend(bynode[my_name])
1031 except KeyError:
1032 pass
1033
1034 # Use a random order
1035 random.shuffle(nodes)
1036
1037 # Try to contact all nodes
1038 val = {}
1039 for node in nodes:
1040 params = groups_cfg.get(node_groups.get(node))
1041 ssh_port = params["ndparams"].get(constants.ND_SSH_PORT)
1042 logging.debug("Ssh port %s (None = default) for node %s",
1043 str(ssh_port), node)
1044 success, message = _GetSshRunner(cluster_name). \
1045 VerifyNodeHostname(node, ssh_port)
1046 if not success:
1047 val[node] = message
1048
1049 result[constants.NV_NODELIST] = val
1050
1051 if constants.NV_NODENETTEST in what:
1052 result[constants.NV_NODENETTEST] = tmp = {}
1053 my_pip = my_sip = None
1054 for name, pip, sip in what[constants.NV_NODENETTEST]:
1055 if name == my_name:
1056 my_pip = pip
1057 my_sip = sip
1058 break
1059 if not my_pip:
1060 tmp[my_name] = ("Can't find my own primary/secondary IP"
1061 " in the node list")
1062 else:
1063 for name, pip, sip in what[constants.NV_NODENETTEST]:
1064 fail = []
1065 if not netutils.TcpPing(pip, port, source=my_pip):
1066 fail.append("primary")
1067 if sip != pip:
1068 if not netutils.TcpPing(sip, port, source=my_sip):
1069 fail.append("secondary")
1070 if fail:
1071 tmp[name] = ("failure using the %s interface(s)" %
1072 " and ".join(fail))
1073
1074 if constants.NV_MASTERIP in what:
1075 # FIXME: add checks on incoming data structures (here and in the
1076 # rest of the function)
1077 master_name, master_ip = what[constants.NV_MASTERIP]
1078 if master_name == my_name:
1079 source = constants.IP4_ADDRESS_LOCALHOST
1080 else:
1081 source = None
1082 result[constants.NV_MASTERIP] = netutils.TcpPing(master_ip, port,
1083 source=source)
1084
1085 if constants.NV_USERSCRIPTS in what:
1086 result[constants.NV_USERSCRIPTS] = \
1087 [script for script in what[constants.NV_USERSCRIPTS]
1088 if not utils.IsExecutable(script)]
1089
1090 if constants.NV_OOB_PATHS in what:
1091 result[constants.NV_OOB_PATHS] = tmp = []
1092 for path in what[constants.NV_OOB_PATHS]:
1093 try:
1094 st = os.stat(path)
1095 except OSError, err:
1096 tmp.append("error stating out of band helper: %s" % err)
1097 else:
1098 if stat.S_ISREG(st.st_mode):
1099 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR:
1100 tmp.append(None)
1101 else:
1102 tmp.append("out of band helper %s is not executable" % path)
1103 else:
1104 tmp.append("out of band helper %s is not a file" % path)
1105
1106 if constants.NV_LVLIST in what and vm_capable:
1107 try:
1108 val = GetVolumeList(utils.ListVolumeGroups().keys())
1109 except RPCFail, err:
1110 val = str(err)
1111 result[constants.NV_LVLIST] = val
1112
1113 _VerifyInstanceList(what, vm_capable, result, all_hvparams)
1114
1115 if constants.NV_VGLIST in what and vm_capable:
1116 result[constants.NV_VGLIST] = utils.ListVolumeGroups()
1117
1118 if constants.NV_PVLIST in what and vm_capable:
1119 check_exclusive_pvs = constants.NV_EXCLUSIVEPVS in what
1120 val = bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
1121 filter_allocatable=False,
1122 include_lvs=check_exclusive_pvs)
1123 if check_exclusive_pvs:
1124 result[constants.NV_EXCLUSIVEPVS] = _CheckExclusivePvs(val)
1125 for pvi in val:
1126 # Avoid sending useless data on the wire
1127 pvi.lv_list = []
1128 result[constants.NV_PVLIST] = map(objects.LvmPvInfo.ToDict, val)
1129
1130 if constants.NV_VERSION in what:
1131 result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
1132 constants.RELEASE_VERSION)
1133
1134 _VerifyNodeInfo(what, vm_capable, result, all_hvparams)
1135
1136 if constants.NV_DRBDVERSION in what and vm_capable:
1137 try:
1138 drbd_version = DRBD8.GetProcInfo().GetVersionString()
1139 except errors.BlockDeviceError, err:
1140 logging.warning("Can't get DRBD version", exc_info=True)
1141 drbd_version = str(err)
1142 result[constants.NV_DRBDVERSION] = drbd_version
1143
1144 if constants.NV_DRBDLIST in what and vm_capable:
1145 try:
1146 used_minors = drbd.DRBD8.GetUsedDevs()
1147 except errors.BlockDeviceError, err:
1148 logging.warning("Can't get used minors list", exc_info=True)
1149 used_minors = str(err)
1150 result[constants.NV_DRBDLIST] = used_minors
1151
1152 if constants.NV_DRBDHELPER in what and vm_capable:
1153 status = True
1154 try:
1155 payload = drbd.DRBD8.GetUsermodeHelper()
1156 except errors.BlockDeviceError, err:
1157 logging.error("Can't get DRBD usermode helper: %s", str(err))
1158 status = False
1159 payload = str(err)
1160 result[constants.NV_DRBDHELPER] = (status, payload)
1161
1162 if constants.NV_NODESETUP in what:
1163 result[constants.NV_NODESETUP] = tmpr = []
1164 if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
1165 tmpr.append("The sysfs filesytem doesn't seem to be mounted"
1166 " under /sys, missing required directories /sys/block"
1167 " and /sys/class/net")
1168 if (not os.path.isdir("/proc/sys") or
1169 not os.path.isfile("/proc/sysrq-trigger")):
1170 tmpr.append("The procfs filesystem doesn't seem to be mounted"
1171 " under /proc, missing required directory /proc/sys and"
1172 " the file /proc/sysrq-trigger")
1173
1174 if constants.NV_TIME in what:
1175 result[constants.NV_TIME] = utils.SplitTime(time.time())
1176
1177 if constants.NV_OSLIST in what and vm_capable:
1178 result[constants.NV_OSLIST] = DiagnoseOS()
1179
1180 if constants.NV_BRIDGES in what and vm_capable:
1181 result[constants.NV_BRIDGES] = [bridge
1182 for bridge in what[constants.NV_BRIDGES]
1183 if not utils.BridgeExists(bridge)]
1184
1185 if what.get(constants.NV_ACCEPTED_STORAGE_PATHS) == my_name:
1186 result[constants.NV_ACCEPTED_STORAGE_PATHS] = \
1187 filestorage.ComputeWrongFileStoragePaths()
1188
1189 if what.get(constants.NV_FILE_STORAGE_PATH):
1190 pathresult = filestorage.CheckFileStoragePath(
1191 what[constants.NV_FILE_STORAGE_PATH])
1192 if pathresult:
1193 result[constants.NV_FILE_STORAGE_PATH] = pathresult
1194
1195 if what.get(constants.NV_SHARED_FILE_STORAGE_PATH):
1196 pathresult = filestorage.CheckFileStoragePath(
1197 what[constants.NV_SHARED_FILE_STORAGE_PATH])
1198 if pathresult:
1199 result[constants.NV_SHARED_FILE_STORAGE_PATH] = pathresult
1200
1201 return result
1202
1203
1204 def GetCryptoTokens(token_requests):
1205 """Perform actions on the node's cryptographic tokens.
1206
1207 Token types can be 'ssl' or 'ssh'. So far only some actions are implemented
1208 for 'ssl'. Action 'get' returns the digest of the public client ssl
1209 certificate. Action 'create' creates a new client certificate and private key
1210 and also returns the digest of the certificate. The third parameter of a
1211 token request are optional parameters for the actions, so far only the
1212 filename is supported.
1213
1214 @type token_requests: list of tuples of (string, string, dict), where the
1215 first string is in constants.CRYPTO_TYPES, the second in
1216 constants.CRYPTO_ACTIONS. The third parameter is a dictionary of string
1217 to string.
1218 @param token_requests: list of requests of cryptographic tokens and actions
1219 to perform on them. The actions come with a dictionary of options.
1220 @rtype: list of tuples (string, string)
1221 @return: list of tuples of the token type and the public crypto token
1222
1223 """
1224 tokens = []
1225 for (token_type, action, _) in token_requests:
1226 if token_type not in constants.CRYPTO_TYPES:
1227 raise errors.ProgrammerError("Token type '%s' not supported." %
1228 token_type)
1229 if action not in constants.CRYPTO_ACTIONS:
1230 raise errors.ProgrammerError("Action '%s' is not supported." %
1231 action)
1232 if token_type == constants.CRYPTO_TYPE_SSL_DIGEST:
1233 tokens.append((token_type,
1234 utils.GetCertificateDigest()))
1235 return tokens
1236
1237
1238 def EnsureDaemon(daemon_name, run):
1239 """Ensures the given daemon is running or stopped.
1240
1241 @type daemon_name: string
1242 @param daemon_name: name of the daemon (e.g., constants.KVMD)
1243
1244 @type run: bool
1245 @param run: whether to start or stop the daemon
1246
1247 @rtype: bool
1248 @return: 'True' if daemon successfully started/stopped,
1249 'False' otherwise
1250
1251 """
1252 allowed_daemons = [constants.KVMD]
1253
1254 if daemon_name not in allowed_daemons:
1255 fn = lambda _: False
1256 elif run:
1257 fn = utils.EnsureDaemon
1258 else:
1259 fn = utils.StopDaemon
1260
1261 return fn(daemon_name)
1262
1263
1264 def GetBlockDevSizes(devices):
1265 """Return the size of the given block devices
1266
1267 @type devices: list
1268 @param devices: list of block device nodes to query
1269 @rtype: dict
1270 @return:
1271 dictionary of all block devices under /dev (key). The value is their
1272 size in MiB.
1273
1274 {'/dev/disk/by-uuid/123456-12321231-312312-312': 124}
1275
1276 """
1277 DEV_PREFIX = "/dev/"
1278 blockdevs = {}
1279
1280 for devpath in devices:
1281 if not utils.IsBelowDir(DEV_PREFIX, devpath):
1282 continue
1283
1284 try:
1285 st = os.stat(devpath)
1286 except EnvironmentError, err:
1287 logging.warning("Error stat()'ing device %s: %s", devpath, str(err))
1288 continue
1289
1290 if stat.S_ISBLK(st.st_mode):
1291 result = utils.RunCmd(["blockdev", "--getsize64", devpath])
1292 if result.failed:
1293 # We don't want to fail, just do not list this device as available
1294 logging.warning("Cannot get size for block device %s", devpath)
1295 continue
1296
1297 size = int(result.stdout) / (1024 * 1024)
1298 blockdevs[devpath] = size
1299 return blockdevs
1300
1301
1302 def GetVolumeList(vg_names):
1303 """Compute list of logical volumes and their size.
1304
1305 @type vg_names: list
1306 @param vg_names: the volume groups whose LVs we should list, or
1307 empty for all volume groups
1308 @rtype: dict
1309 @return:
1310 dictionary of all partions (key) with value being a tuple of
1311 their size (in MiB), inactive and online status::
1312
1313 {'xenvg/test1': ('20.06', True, True)}
1314
1315 in case of errors, a string is returned with the error
1316 details.
1317
1318 """
1319 lvs = {}
1320 sep = "|"
1321 if not vg_names:
1322 vg_names = []
1323 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
1324 "--separator=%s" % sep,
1325 "-ovg_name,lv_name,lv_size,lv_attr"] + vg_names)
1326 if result.failed:
1327 _Fail("Failed to list logical volumes, lvs output: %s", result.output)
1328
1329 for line in result.stdout.splitlines():
1330 line = line.strip()
1331 match = _LVSLINE_REGEX.match(line)
1332 if not match:
1333 logging.error("Invalid line returned from lvs output: '%s'", line)
1334 continue
1335 vg_name, name, size, attr = match.groups()
1336 inactive = attr[4] == "-"
1337 online = attr[5] == "o"
1338 virtual = attr[0] == "v"
1339 if virtual:
1340 # we don't want to report such volumes as existing, since they
1341 # don't really hold data
1342 continue
1343 lvs[vg_name + "/" + name] = (size, inactive, online)
1344
1345 return lvs
1346
1347
1348 def ListVolumeGroups():
1349 """List the volume groups and their size.
1350
1351 @rtype: dict
1352 @return: dictionary with keys volume name and values the
1353 size of the volume
1354
1355 """
1356 return utils.ListVolumeGroups()
1357
1358
1359 def NodeVolumes():
1360 """List all volumes on this node.
1361
1362 @rtype: list
1363 @return:
1364 A list of dictionaries, each having four keys:
1365 - name: the logical volume name,
1366 - size: the size of the logical volume
1367 - dev: the physical device on which the LV lives
1368 - vg: the volume group to which it belongs
1369
1370 In case of errors, we return an empty list and log the
1371 error.
1372
1373 Note that since a logical volume can live on multiple physical
1374 volumes, the resulting list might include a logical volume
1375 multiple times.
1376
1377 """
1378 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
1379 "--separator=|",
1380 "--options=lv_name,lv_size,devices,vg_name"])
1381 if result.failed:
1382 _Fail("Failed to list logical volumes, lvs output: %s",
1383 result.output)
1384
1385 def parse_dev(dev):
1386 return dev.split("(")[0]
1387
1388 def handle_dev(dev):
1389 return [parse_dev(x) for x in dev.split(",")]
1390
1391 def map_line(line):
1392 line = [v.strip() for v in line]
1393 return [{"name": line[0], "size": line[1],
1394 "dev": dev, "vg": line[3]} for dev in handle_dev(line[2])]
1395
1396 all_devs = []
1397 for line in result.stdout.splitlines():
1398 if line.count("|") >= 3:
1399 all_devs.extend(map_line(line.split("|")))
1400 else:
1401 logging.warning("Strange line in the output from lvs: '%s'", line)
1402 return all_devs
1403
1404
1405 def BridgesExist(bridges_list):
1406 """Check if a list of bridges exist on the current node.
1407
1408 @rtype: boolean
1409 @return: C{True} if all of them exist, C{False} otherwise
1410
1411 """
1412 missing = []
1413 for bridge in bridges_list:
1414 if not utils.BridgeExists(bridge):
1415 missing.append(bridge)
1416
1417 if missing:
1418 _Fail("Missing bridges %s", utils.CommaJoin(missing))
1419
1420
1421 def GetInstanceListForHypervisor(hname, hvparams=None,
1422 get_hv_fn=hypervisor.GetHypervisor):
1423 """Provides a list of instances of the given hypervisor.
1424
1425 @type hname: string
1426 @param hname: name of the hypervisor
1427 @type hvparams: dict of strings
1428 @param hvparams: hypervisor parameters for the given hypervisor
1429 @type get_hv_fn: function
1430 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
1431 name; optional parameter to increase testability
1432
1433 @rtype: list
1434 @return: a list of all running instances on the current node
1435 - instance1.example.com
1436 - instance2.example.com
1437
1438 """
1439 try:
1440 return get_hv_fn(hname).ListInstances(hvparams=hvparams)
1441 except errors.HypervisorError, err:
1442 _Fail("Error enumerating instances (hypervisor %s): %s",
1443 hname, err, exc=True)
1444
1445
1446 def GetInstanceList(hypervisor_list, all_hvparams=None,
1447 get_hv_fn=hypervisor.GetHypervisor):
1448 """Provides a list of instances.
1449
1450 @type hypervisor_list: list
1451 @param hypervisor_list: the list of hypervisors to query information
1452 @type all_hvparams: dict of dict of strings
1453 @param all_hvparams: a dictionary mapping hypervisor types to respective
1454 cluster-wide hypervisor parameters
1455 @type get_hv_fn: function
1456 @param get_hv_fn: function that returns a hypervisor for the given hypervisor
1457 name; optional parameter to increase testability
1458
1459 @rtype: list
1460 @return: a list of all running instances on the current node
1461 - instance1.example.com
1462 - instance2.example.com
1463
1464 """
1465 results = []
1466 for hname in hypervisor_list:
1467 hvparams = all_hvparams[hname]
1468 results.extend(GetInstanceListForHypervisor(hname, hvparams=hvparams,
1469 get_hv_fn=get_hv_fn))
1470 return results
1471
1472
1473 def GetInstanceInfo(instance, hname, hvparams=None):
1474 """Gives back the information about an instance as a dictionary.
1475
1476 @type instance: string
1477 @param instance: the instance name
1478 @type hname: string
1479 @param hname: the hypervisor type of the instance
1480 @type hvparams: dict of strings
1481 @param hvparams: the instance's hvparams
1482
1483 @rtype: dict
1484 @return: dictionary with the following keys:
1485 - memory: memory size of instance (int)
1486 - state: state of instance (HvInstanceState)
1487 - time: cpu time of instance (float)
1488 - vcpus: the number of vcpus (int)
1489
1490 """
1491 output = {}
1492
1493 iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance,
1494 hvparams=hvparams)
1495 if iinfo is not None:
1496 output["memory"] = iinfo[2]
1497 output["vcpus"] = iinfo[3]
1498 output["state"] = iinfo[4]
1499 output["time"] = iinfo[5]
1500
1501 return output
1502
1503
1504 def GetInstanceMigratable(instance):
1505 """Computes whether an instance can be migrated.
1506
1507 @type instance: L{objects.Instance}
1508 @param instance: object representing the instance to be checked.
1509
1510 @rtype: tuple
1511 @return: tuple of (result, description) where:
1512 - result: whether the instance can be migrated or not
1513 - description: a description of the issue, if relevant
1514
1515 """
1516 hyper = hypervisor.GetHypervisor(instance.hypervisor)
1517 iname = instance.name
1518 if iname not in hyper.ListInstances(hvparams=instance.hvparams):
1519 _Fail("Instance %s is not running", iname)
1520
1521 for idx in range(len(instance.disks_info)):
1522 link_name = _GetBlockDevSymlinkPath(iname, idx)
1523 if not os.path.islink(link_name):
1524 logging.warning("Instance %s is missing symlink %s for disk %d",
1525 iname, link_name, idx)
1526
1527
1528 def GetAllInstancesInfo(hypervisor_list, all_hvparams):
1529 """Gather data about all instances.
1530
1531 This is the equivalent of L{GetInstanceInfo}, except that it
1532 computes data for all instances at once, thus being faster if one
1533 needs data about more than one instance.
1534
1535 @type hypervisor_list: list
1536 @param hypervisor_list: list of hypervisors to query for instance data
1537 @type all_hvparams: dict of dict of strings
1538 @param all_hvparams: mapping of hypervisor names to hvparams
1539
1540 @rtype: dict
1541 @return: dictionary of instance: data, with data having the following keys:
1542 - memory: memory size of instance (int)
1543 - state: xen state of instance (string)
1544 - time: cpu time of instance (float)
1545 - vcpus: the number of vcpus
1546
1547 """
1548 output = {}
1549 for hname in hypervisor_list:
1550 hvparams = all_hvparams[hname]
1551 iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo(hvparams)
1552 if iinfo:
1553 for name, _, memory, vcpus, state, times in iinfo:
1554 value = {
1555 "memory": memory,
1556 "vcpus": vcpus,
1557 "state": state,
1558 "time": times,
1559 }
1560 if name in output:
1561 # we only check static parameters, like memory and vcpus,
1562 # and not state and time which can change between the
1563 # invocations of the different hypervisors
1564 for key in "memory", "vcpus":
1565 if value[key] != output[name][key]:
1566 _Fail("Instance %s is running twice"
1567 " with different parameters", name)
1568 output[name] = value
1569
1570 return output
1571
1572
1573 def GetInstanceConsoleInfo(instance_param_dict,
1574 get_hv_fn=hypervisor.GetHypervisor):
1575 """Gather data about the console access of a set of instances of this node.
1576
1577 This function assumes that the caller already knows which instances are on
1578 this node, by calling a function such as L{GetAllInstancesInfo} or
1579 L{GetInstanceList}.
1580
1581 For every instance, a large amount of configuration data needs to be
1582 provided to the hypervisor interface in order to receive the console
1583 information. Whether this could or should be cut down can be discussed.
1584 The information is provided in a dictionary indexed by instance name,
1585 allowing any number of instance queries to be done.
1586
1587 @type instance_param_dict: dict of string to tuple of dictionaries, where the
1588 dictionaries represent: L{objects.Instance}, L{objects.Node},
1589 L{objects.NodeGroup}, HvParams, BeParams
1590 @param instance_param_dict: mapping of instance name to parameters necessary
1591 for console information retrieval
1592
1593 @rtype: dict
1594 @return: dictionary of instance: data, with data having the following keys:
1595 - instance: instance name
1596 - kind: console kind
1597 - message: used with kind == CONS_MESSAGE, indicates console to be
1598 unavailable, supplies error message
1599 - host: host to connect to
1600 - port: port to use
1601 - user: user for login
1602 - command: the command, broken into parts as an array
1603 - display: unknown, potentially unused?
1604
1605 """
1606
1607 output = {}
1608 for inst_name in instance_param_dict:
1609 instance = instance_param_dict[inst_name]["instance"]
1610 pnode = instance_param_dict[inst_name]["node"]
1611 group = instance_param_dict[inst_name]["group"]
1612 hvparams = instance_param_dict[inst_name]["hvParams"]
1613 beparams = instance_param_dict[inst_name]["beParams"]
1614
1615 instance = objects.Instance.FromDict(instance)
1616 pnode = objects.Node.FromDict(pnode)
1617 group = objects.NodeGroup.FromDict(group)
1618
1619 h = get_hv_fn(instance.hypervisor)
1620 output[inst_name] = h.GetInstanceConsole(instance, pnode, group,
1621 hvparams, beparams).ToDict()
1622
1623 return output
1624
1625
1626 def _InstanceLogName(kind, os_name, instance, component):
1627 """Compute the OS log filename for a given instance and operation.
1628
1629 The instance name and os name are passed in as strings since not all
1630 operations have these as part of an instance object.
1631
1632 @type kind: string
1633 @param kind: the operation type (e.g. add, import, etc.)
1634 @type os_name: string
1635 @param os_name: the os name
1636 @type instance: string
1637 @param instance: the name of the instance being imported/added/etc.
1638 @type component: string or None
1639 @param component: the name of the component of the instance being
1640 transferred
1641
1642 """
1643 # TODO: Use tempfile.mkstemp to create unique filename
1644 if component:
1645 assert "/" not in component
1646 c_msg = "-%s" % component
1647 else:
1648 c_msg = ""
1649 base = ("%s-%s-%s%s-%s.log" %
1650 (kind, os_name, instance, c_msg, utils.TimestampForFilename()))
1651 return utils.PathJoin(pathutils.LOG_OS_DIR, base)
1652
1653
1654 def InstanceOsAdd(instance, reinstall, debug):
1655 """Add an OS to an instance.
1656
1657 @type instance: L{objects.Instance}
1658 @param instance: Instance whose OS is to be installed
1659 @type reinstall: boolean
1660 @param reinstall: whether this is an instance reinstall
1661 @type debug: integer
1662 @param debug: debug level, passed to the OS scripts
1663 @rtype: None
1664
1665 """
1666 inst_os = OSFromDisk(instance.os)
1667
1668 create_env = OSEnvironment(instance, inst_os, debug)
1669 if reinstall:
1670 create_env["INSTANCE_REINSTALL"] = "1"
1671
1672 logfile = _InstanceLogName("add", instance.os, instance.name, None)
1673
1674 result = utils.RunCmd([inst_os.create_script], env=create_env,
1675 cwd=inst_os.path, output=logfile, reset_env=True)
1676 if result.failed:
1677 logging.error("os create command '%s' returned error: %s, logfile: %s,"
1678 " output: %s", result.cmd, result.fail_reason, logfile,
1679 result.output)
1680 lines = [utils.SafeEncode(val)
1681 for val in utils.TailFile(logfile, lines=20)]
1682 _Fail("OS create script failed (%s), last lines in the"
1683 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
1684
1685
1686 def RunRenameInstance(instance, old_name, debug):
1687 """Run the OS rename script for an instance.
1688
1689 @type instance: L{objects.Instance}
1690 @param instance: Instance whose OS is to be installed
1691 @type old_name: string
1692 @param old_name: previous instance name
1693 @type debug: integer
1694 @param debug: debug level, passed to the OS scripts
1695 @rtype: boolean
1696 @return: the success of the operation
1697
1698 """
1699 inst_os = OSFromDisk(instance.os)
1700
1701 rename_env = OSEnvironment(instance, inst_os, debug)
1702 rename_env["OLD_INSTANCE_NAME"] = old_name
1703
1704 logfile = _InstanceLogName("rename", instance.os,
1705 "%s-%s" % (old_name, instance.name), None)
1706
1707 result = utils.RunCmd([inst_os.rename_script], env=rename_env,
1708 cwd=inst_os.path, output=logfile, reset_env=True)
1709
1710 if result.failed:
1711 logging.error("os create command '%s' returned error: %s output: %s",
1712 result.cmd, result.fail_reason, result.output)
1713 lines = [utils.SafeEncode(val)
1714 for val in utils.TailFile(logfile, lines=20)]
1715 _Fail("OS rename script failed (%s), last lines in the"
1716 " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
1717
1718
1719 def _GetBlockDevSymlinkPath(instance_name, idx, _dir=None):
1720 """Returns symlink path for block device.
1721
1722 """
1723 if _dir is None:
1724 _dir = pathutils.DISK_LINKS_DIR
1725
1726 return utils.PathJoin(_dir,
1727 ("%s%s%s" %
1728 (instance_name, constants.DISK_SEPARATOR, idx)))
1729
1730
1731 def _SymlinkBlockDev(instance_name, device_path, idx):
1732 """Set up symlinks to a instance's block device.
1733
1734 This is an auxiliary function run when an instance is start (on the primary
1735 node) or when an instance is migrated (on the target node).
1736
1737
1738 @param instance_name: the name of the target instance
1739 @param device_path: path of the physical block device, on the node
1740 @param idx: the disk index
1741 @return: absolute path to the disk's symlink
1742
1743 """
1744 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
1745 try:
1746 os.symlink(device_path, link_name)
1747 except OSError, err:
1748 if err.errno == errno.EEXIST:
1749 if (not os.path.islink(link_name) or
1750 os.readlink(link_name) != device_path):
1751 os.remove(link_name)
1752 os.symlink(device_path, link_name)
1753 else:
1754 raise
1755
1756 return link_name
1757
1758
1759 def _RemoveBlockDevLinks(instance_name, disks):
1760 """Remove the block device symlinks belonging to the given instance.
1761
1762 """
1763 for idx, _ in enumerate(disks):
1764 link_name = _GetBlockDevSymlinkPath(instance_name, idx)
1765 if os.path.islink(link_name):
1766 try:
1767 os.remove(link_name)
1768 except OSError:
1769 logging.exception("Can't remove symlink '%s'", link_name)
1770
1771
1772 def _CalculateDeviceURI(instance, disk, device):
1773 """Get the URI for the device.
1774
1775 @type instance: L{objects.Instance}
1776 @param instance: the instance which disk belongs to
1777 @type disk: L{objects.Disk}
1778 @param disk: the target disk object
1779 @type device: L{bdev.BlockDev}
1780 @param device: the corresponding BlockDevice
1781 @rtype: string
1782 @return: the device uri if any else None
1783
1784 """
1785 access_mode = disk.params.get(constants.LDP_ACCESS,
1786 constants.DISK_KERNELSPACE)
1787 if access_mode == constants.DISK_USERSPACE:
1788 # This can raise errors.BlockDeviceError
1789 return device.GetUserspaceAccessUri(instance.hypervisor)
1790 else:
1791 return None
1792
1793
1794 def _GatherAndLinkBlockDevs(instance):
1795 """Set up an instance's block device(s).
1796
1797 This is run on the primary node at instance startup. The block
1798 devices must be already assembled.
1799
1800 @type instance: L{objects.Instance}
1801 @param instance: the instance whose disks we should assemble
1802 @rtype: list
1803 @return: list of (disk_object, link_name, drive_uri)
1804
1805 """
1806 block_devices = []
1807 for idx, disk in enumerate(instance.disks_info):
1808 device = _RecursiveFindBD(disk)
1809 if device is None:
1810 raise errors.BlockDeviceError("Block device '%s' is not set up." %
1811 str(disk))
1812 device.Open()
1813 try:
1814 link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx)
1815 except OSError, e:
1816 raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
1817 e.strerror)
1818 uri = _CalculateDeviceURI(instance, disk, device)
1819
1820 block_devices.append((disk, link_name, uri))
1821
1822 return block_devices
1823
1824
1825 def _IsInstanceUserDown(instance_info):
1826 return instance_info and \
1827 "state" in instance_info and \
1828 hv_base.HvInstanceState.IsShutdown(instance_info["state"])
1829
1830
1831 def _GetInstanceInfo(instance):
1832 """Helper function L{GetInstanceInfo}"""
1833 return GetInstanceInfo(instance.name, instance.hypervisor,
1834 hvparams=instance.hvparams)
1835
1836
1837 def StartInstance(instance, startup_paused, reason, store_reason=True):
1838 """Start an instance.
1839
1840 @type instance: L{objects.Instance}
1841 @param instance: the instance object
1842 @type startup_paused: bool
1843 @param instance: pause instance at startup?
1844 @type reason: list of reasons
1845 @param reason: the reason trail for this startup
1846 @type store_reason: boolean
1847 @param store_reason: whether to store the shutdown reason trail on file
1848 @rtype: None
1849
1850 """
1851 instance_info = _GetInstanceInfo(instance)
1852
1853 if instance_info and not _IsInstanceUserDown(instance_info):
1854 logging.info("Instance '%s' already running, not starting", instance.name)
1855 return
1856
1857 try:
1858 block_devices = _GatherAndLinkBlockDevs(instance)
1859 hyper = hypervisor.GetHypervisor(instance.hypervisor)
1860 hyper.StartInstance(instance, block_devices, startup_paused)
1861 if store_reason:
1862 _StoreInstReasonTrail(instance.name, reason)
1863 except errors.BlockDeviceError, err:
1864 _Fail("Block device error: %s", err, exc=True)
1865 except errors.HypervisorError, err:
1866 _RemoveBlockDevLinks(instance.name, instance.disks_info)
1867 _Fail("Hypervisor error: %s", err, exc=True)
1868
1869
1870 def InstanceShutdown(instance, timeout, reason, store_reason=True):
1871 """Shut an instance down.
1872
1873 @note: this functions uses polling with a hardcoded timeout.
1874
1875 @type instance: L{objects.Instance}
1876 @param instance: the instance object
1877 @type timeout: integer
1878 @param timeout: maximum timeout for soft shutdown
1879 @type reason: list of reasons
1880 @param reason: the reason trail for this shutdown
1881 @type store_reason: boolean
1882 @param store_reason: whether to store the shutdown reason trail on file
1883 @rtype: None
1884
1885 """
1886 hyper = hypervisor.GetHypervisor(instance.hypervisor)
1887
1888 if not _GetInstanceInfo(instance):
1889 logging.info("Instance '%s' not running, doing nothing", instance.name)
1890 return
1891
1892 class _TryShutdown(object):
1893 def __init__(self):
1894 self.tried_once = False
1895
1896 def __call__(self):
1897 if not _GetInstanceInfo(instance):
1898 return
1899
1900 try:
1901 hyper.StopInstance(instance, retry=self.tried_once, timeout=timeout)
1902 if store_reason:
1903 _StoreInstReasonTrail(instance.name, reason)
1904 except errors.HypervisorError, err:
1905 # if the instance is no longer existing, consider this a
1906 # success and go to cleanup
1907 if not _GetInstanceInfo(instance):
1908 return
1909
1910 _Fail("Failed to stop instance '%s': %s", instance.name, err)
1911
1912 self.tried_once = True
1913
1914 raise utils.RetryAgain()
1915
1916 try:
1917 utils.Retry(_TryShutdown(), 5, timeout)
1918 except utils.RetryTimeout:
1919 # the shutdown did not succeed
1920 logging.error("Shutdown of '%s' unsuccessful, forcing", instance.name)
1921
1922 try:
1923 hyper.StopInstance(instance, force=True)
1924 except errors.HypervisorError, err:
1925 # only raise an error if the instance still exists, otherwise
1926 # the error could simply be "instance ... unknown"!
1927 if _GetInstanceInfo(instance):
1928 _Fail("Failed to force stop instance '%s': %s", instance.name, err)
1929
1930 time.sleep(1)
1931
1932 if _GetInstanceInfo(instance):
1933 _Fail("Could not shutdown instance '%s' even by destroy", instance.name)
1934
1935 try:
1936 hyper.CleanupInstance(instance.name)
1937 except errors.HypervisorError, err:
1938 logging.warning("Failed to execute post-shutdown cleanup step: %s", err)
1939
1940 _RemoveBlockDevLinks(instance.name, instance.disks_info)
1941
1942
1943 def InstanceReboot(instance, reboot_type, shutdown_timeout, reason):
1944 """Reboot an instance.
1945
1946 @type instance: L{objects.Instance}
1947 @param instance: the instance object to reboot
1948 @type reboot_type: str
1949 @param reboot_type: the type of reboot, one the following
1950 constants:
1951 - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the
1952 instance OS, do not recreate the VM
1953 - L{constants.INSTANCE_REBOOT_HARD}: tear down and
1954 restart the VM (at the hypervisor level)
1955 - the other reboot type (L{constants.INSTANCE_REBOOT_FULL}) is
1956 not accepted here, since that mode is handled differently, in
1957 cmdlib, and translates into full stop and start of the
1958 instance (instead of a call_instance_reboot RPC)
1959 @type shutdown_timeout: integer
1960 @param shutdown_timeout: maximum timeout for soft shutdown
1961 @type reason: list of reasons
1962 @param reason: the reason trail for this reboot
1963 @rtype: None
1964
1965 """
1966 # TODO: this is inconsistent with 'StartInstance' and 'InstanceShutdown'
1967 # because those functions simply 'return' on error whereas this one
1968 # raises an exception with '_Fail'
1969 if not _GetInstanceInfo(instance):
1970 _Fail("Cannot reboot instance '%s' that is not running", instance.name)
1971
1972 hyper = hypervisor.GetHypervisor(instance.hypervisor)
1973 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
1974 try:
1975 hyper.RebootInstance(instance)
1976 except errors.HypervisorError, err:
1977 _Fail("Failed to soft reboot instance '%s': %s", instance.name, err)
1978 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
1979 try:
1980 InstanceShutdown(instance, shutdown_timeout, reason, store_reason=False)
1981 result = StartInstance(instance, False, reason, store_reason=False)
1982 _StoreInstReasonTrail(instance.name, reason)
1983 return result
1984 except errors.HypervisorError, err:
1985 _Fail("Failed to hard reboot instance '%s': %s", instance.name, err)
1986 else:
1987 _Fail("Invalid reboot_type received: '%s'", reboot_type)
1988
1989
1990 def InstanceBalloonMemory(instance, memory):
1991 """Resize an instance's memory.
1992
1993 @type instance: L{objects.Instance}
1994 @param instance: the instance object
1995 @type memory: int
1996 @param memory: new memory amount in MB
1997 @rtype: None
1998
1999 """
2000 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2001 running = hyper.ListInstances(hvparams=instance.hvparams)
2002 if instance.name not in running:
2003 logging.info("Instance %s is not running, cannot balloon", instance.name)
2004 return
2005 try:
2006 hyper.BalloonInstanceMemory(instance, memory)
2007 except errors.HypervisorError, err:
2008 _Fail("Failed to balloon instance memory: %s", err, exc=True)
2009
2010
2011 def MigrationInfo(instance):
2012 """Gather information about an instance to be migrated.
2013
2014 @type instance: L{objects.Instance}
2015 @param instance: the instance definition
2016
2017 """
2018 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2019 try:
2020 info = hyper.MigrationInfo(instance)
2021 except errors.HypervisorError, err:
2022 _Fail("Failed to fetch migration information: %s", err, exc=True)
2023 return info
2024
2025
2026 def AcceptInstance(instance, info, target):
2027 """Prepare the node to accept an instance.
2028
2029 @type instance: L{objects.Instance}
2030 @param instance: the instance definition
2031 @type info: string/data (opaque)
2032 @param info: migration information, from the source node
2033 @type target: string
2034 @param target: target host (usually ip), on this node
2035
2036 """
2037 # TODO: why is this required only for DTS_EXT_MIRROR?
2038 if instance.disk_template in constants.DTS_EXT_MIRROR:
2039 # Create the symlinks, as the disks are not active
2040 # in any way
2041 try:
2042 _GatherAndLinkBlockDevs(instance)
2043 except errors.BlockDeviceError, err:
2044 _Fail("Block device error: %s", err, exc=True)
2045
2046 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2047 try:
2048 hyper.AcceptInstance(instance, info, target)
2049 except errors.HypervisorError, err:
2050 if instance.disk_template in constants.DTS_EXT_MIRROR:
2051 _RemoveBlockDevLinks(instance.name, instance.disks_info)
2052 _Fail("Failed to accept instance: %s", err, exc=True)
2053
2054
2055 def FinalizeMigrationDst(instance, info, success):
2056 """Finalize any preparation to accept an instance.
2057
2058 @type instance: L{objects.Instance}
2059 @param instance: the instance definition
2060 @type info: string/data (opaque)
2061 @param info: migration information, from the source node
2062 @type success: boolean
2063 @param success: whether the migration was a success or a failure
2064
2065 """
2066 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2067 try:
2068 hyper.FinalizeMigrationDst(instance, info, success)
2069 except errors.HypervisorError, err:
2070 _Fail("Failed to finalize migration on the target node: %s", err, exc=True)
2071
2072
2073 def MigrateInstance(cluster_name, instance, target, live):
2074 """Migrates an instance to another node.
2075
2076 @type cluster_name: string
2077 @param cluster_name: name of the cluster
2078 @type instance: L{objects.Instance}
2079 @param instance: the instance definition
2080 @type target: string
2081 @param target: the target node name
2082 @type live: boolean
2083 @param live: whether the migration should be done live or not (the
2084 interpretation of this parameter is left to the hypervisor)
2085 @raise RPCFail: if migration fails for some reason
2086
2087 """
2088 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2089
2090 try:
2091 hyper.MigrateInstance(cluster_name, instance, target, live)
2092 except errors.HypervisorError, err:
2093 _Fail("Failed to migrate instance: %s", err, exc=True)
2094
2095
2096 def FinalizeMigrationSource(instance, success, live):
2097 """Finalize the instance migration on the source node.
2098
2099 @type instance: L{objects.Instance}
2100 @param instance: the instance definition of the migrated instance
2101 @type success: bool
2102 @param success: whether the migration succeeded or not
2103 @type live: bool
2104 @param live: whether the user requested a live migration or not
2105 @raise RPCFail: If the execution fails for some reason
2106
2107 """
2108 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2109
2110 try:
2111 hyper.FinalizeMigrationSource(instance, success, live)
2112 except Exception, err: # pylint: disable=W0703
2113 _Fail("Failed to finalize the migration on the source node: %s", err,
2114 exc=True)
2115
2116
2117 def GetMigrationStatus(instance):
2118 """Get the migration status
2119
2120 @type instance: L{objects.Instance}
2121 @param instance: the instance that is being migrated
2122 @rtype: L{objects.MigrationStatus}
2123 @return: the status of the current migration (one of
2124 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional
2125 progress info that can be retrieved from the hypervisor
2126 @raise RPCFail: If the migration status cannot be retrieved
2127
2128 """
2129 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2130 try:
2131 return hyper.GetMigrationStatus(instance)
2132 except Exception, err: # pylint: disable=W0703
2133 _Fail("Failed to get migration status: %s", err, exc=True)
2134
2135
2136 def HotplugDevice(instance, action, dev_type, device, extra, seq):
2137 """Hotplug a device
2138
2139 Hotplug is currently supported only for KVM Hypervisor.
2140 @type instance: L{objects.Instance}
2141 @param instance: the instance to which we hotplug a device
2142 @type action: string
2143 @param action: the hotplug action to perform
2144 @type dev_type: string
2145 @param dev_type: the device type to hotplug
2146 @type device: either L{objects.NIC} or L{objects.Disk}
2147 @param device: the device object to hotplug
2148 @type extra: tuple
2149 @param extra: extra info used for disk hotplug (disk link, drive uri)
2150 @type seq: int
2151 @param seq: the index of the device from master perspective
2152 @raise RPCFail: in case instance does not have KVM hypervisor
2153
2154 """
2155 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2156 try:
2157 hyper.VerifyHotplugSupport(instance, action, dev_type)
2158 except errors.HotplugError, err:
2159 _Fail("Hotplug is not supported: %s", err)
2160
2161 if action == constants.HOTPLUG_ACTION_ADD:
2162 fn = hyper.HotAddDevice
2163 elif action == constants.HOTPLUG_ACTION_REMOVE:
2164 fn = hyper.HotDelDevice
2165 elif action == constants.HOTPLUG_ACTION_MODIFY:
2166 fn = hyper.HotModDevice
2167 else:
2168 assert action in constants.HOTPLUG_ALL_ACTIONS
2169
2170 return fn(instance, dev_type, device, extra, seq)
2171
2172
2173 def HotplugSupported(instance):
2174 """Checks if hotplug is generally supported.
2175
2176 """
2177 hyper = hypervisor.GetHypervisor(instance.hypervisor)
2178 try:
2179 hyper.HotplugSupported(instance)
2180 except errors.HotplugError, err:
2181 _Fail("Hotplug is not supported: %s", err)
2182
2183
2184 def ModifyInstanceMetadata(metadata):
2185 """Sends instance data to the metadata daemon.
2186
2187 Uses the Luxi transport layer to communicate with the metadata
2188 daemon configuration server. It starts the metadata daemon if it is
2189 not running.
2190 The daemon must be enabled during at configuration time.
2191
2192 @type metadata: dict
2193 @param metadata: instance metadata obtained by calling
2194 L{objects.Instance.ToDict} on an instance object
2195
2196 """
2197 if not constants.ENABLE_METAD:
2198 raise errors.ProgrammerError("The metadata deamon is disabled, yet"
2199 " ModifyInstanceMetadata has been called")
2200
2201 if not utils.IsDaemonAlive(constants.METAD):
2202 result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.METAD])
2203 if result.failed:
2204 raise errors.HypervisorError("Failed to start metadata daemon")
2205
2206 def _Connect():
2207 return transport.Transport(pathutils.SOCKET_DIR + "/ganeti-metad",
2208 allow_non_master=True)
2209
2210 retries = 5
2211
2212 while True:
2213 try:
2214 trans = utils.Retry(_Connect, 1.0, constants.LUXI_DEF_CTMO)
2215 break
2216 except utils.RetryTimeout:
2217 raise TimeoutError("Connection to metadata daemon timed out")
2218 except (socket.error, NoMasterError), err:
2219 if retries == 0:
2220 logging.error("Failed to connect to the metadata daemon",
2221 exc_info=True)
2222 raise TimeoutError("Failed to connect to metadata daemon: %s" % err)
2223 else:
2224 retries -= 1
2225
2226 data = serializer.DumpJson(metadata,
2227 private_encoder=serializer.EncodeWithPrivateFields)
2228
2229 trans.Send(data)
2230 trans.Close()
2231
2232
2233 def BlockdevCreate(disk, size, owner, on_primary, info, excl_stor):
2234 """Creates a block device for an instance.
2235
2236 @type disk: L{objects.Disk}
2237 @param disk: the object describing the disk we should create
2238 @type size: int
2239 @param size: the size of the physical underlying device, in MiB
2240 @type owner: str
2241 @param owner: the name of the instance for which disk is created,
2242 used for device cache data
2243 @type on_primary: boolean
2244 @param on_primary: indicates if it is the primary node or not
2245 @type info: string
2246 @param info: string that will be sent to the physical device
2247 creation, used for example to set (LVM) tags on LVs
2248 @type excl_stor: boolean
2249 @param excl_stor: Whether exclusive_storage is active
2250
2251 @return: the new unique_id of the device (this can sometime be
2252 computed only after creation), or None. On secondary nodes,
2253 it's not required to return anything.
2254
2255 """
2256 # TODO: remove the obsolete "size" argument
2257 # pylint: disable=W0613
2258 clist = []
2259 if disk.children:
2260 for child in disk.children:
2261 try:
2262 crdev = _RecursiveAssembleBD(child, owner, on_primary)
2263 except errors.BlockDeviceError, err:
2264 _Fail("Can't assemble device %s: %s", child, err)
2265 if on_primary or disk.AssembleOnSecondary():
2266 # we need the children open in case the device itself has to
2267 # be assembled
2268 try:
2269 # pylint: disable=E1103
2270 crdev.Open()
2271 except errors.BlockDeviceError, err:
2272 _Fail("Can't make child '%s' read-write: %s", child, err)
2273 clist.append(crdev)
2274
2275 try:
2276 device = bdev.Create(disk, clist, excl_stor)
2277 except errors.BlockDeviceError, err:
2278 _Fail("Can't create block device: %s", err)
2279
2280 if on_primary or disk.AssembleOnSecondary():
2281 try:
2282 device.Assemble()
2283 except errors.BlockDeviceError, err:
2284 _Fail("Can't assemble device after creation, unusual event: %s", err)
2285 if on_primary or disk.OpenOnSecondary():
2286 try:
2287 device.Open(force=True)
2288 except errors.BlockDeviceError, err:
2289 _Fail("Can't make device r/w after creation, unusual event: %s", err)
2290 DevCacheManager.UpdateCache(device.dev_path, owner,
2291 on_primary, disk.iv_name)
2292
2293 device.SetInfo(info)
2294
2295 return device.unique_id
2296
2297
2298 def _DumpDevice(source_path, target_path, offset, size, truncate):
2299 """This function images/wipes the device using a local file.
2300
2301 @type source_path: string
2302 @param source_path: path of the image or data source (e.g., "/dev/zero")
2303
2304 @type target_path: string
2305 @param target_path: path of the device to image/wipe
2306
2307 @type offset: int
2308 @param offset: offset in MiB in the output file
2309
2310 @type size: int
2311 @param size: maximum size in MiB to write (data source might be smaller)
2312
2313 @type truncate: bool
2314 @param truncate: whether the file should be truncated
2315
2316 @return: None
2317 @raise RPCFail: in case of failure
2318
2319 """
2320 # Internal sizes are always in Mebibytes; if the following "dd" command
2321 # should use a different block size the offset and size given to this
2322 # function must be adjusted accordingly before being passed to "dd".
2323 block_size = 1024 * 1024
2324
2325 cmd = [constants.DD_CMD, "if=%s" % source_path, "seek=%d" % offset,
2326 "bs=%s" % block_size, "oflag=direct", "of=%s" % target_path,
2327 "count=%d" % size]
2328
2329 if not truncate:
2330 cmd.append("conv=notrunc")
2331
2332 result = utils.RunCmd(cmd)
2333
2334 if result.failed:
2335 _Fail("Dump command '%s' exited with error: %s; output: %s", result.cmd,
2336 result.fail_reason, result.output)
2337
2338
2339 def _DownloadAndDumpDevice(source_url, target_path, size):
2340 """This function images a device using a downloaded image file.
2341
2342 @type source_url: string
2343 @param source_url: URL of image to dump to disk
2344
2345 @type target_path: string
2346 @param target_path: path of the device to image
2347
2348 @type size: int
2349 @param size: maximum size in MiB to write (data source might be smaller)
2350
2351 @rtype: NoneType
2352 @return: None
2353 @raise RPCFail: in case of download or write failures
2354
2355 """
2356 class DDParams(object):
2357 def __init__(self, current_size, total_size):
2358 self.current_size = current_size
2359 self.total_size = total_size
2360 self.image_size_error = False
2361
2362 def dd_write(ddparams, out):
2363 if ddparams.current_size < ddparams.total_size:
2364 ddparams.current_size += len(out)
2365 target_file.write(out)
2366 else:
2367 ddparams.image_size_error = True
2368 return -1
2369
2370 target_file = open(target_path, "r+")
2371 ddparams = DDParams(0, 1024 * 1024 * size)
2372
2373 curl = pycurl.Curl()
2374 curl.setopt(pycurl.VERBOSE, True)
2375 curl.setopt(pycurl.NOSIGNAL, True)
2376 curl.setopt(pycurl.USERAGENT, http.HTTP_GANETI_VERSION)
2377 curl.setopt(pycurl.URL, source_url)
2378 curl.setopt(pycurl.WRITEFUNCTION, lambda out: dd_write(ddparams, out))
2379
2380 try:
2381 curl.perform()
2382 except pycurl.error:
2383 if ddparams.image_size_error:
2384 _Fail("Disk image larger than the disk")
2385 else:
2386 raise
2387
2388 target_file.close()
2389
2390
2391 def BlockdevWipe(disk, offset, size):
2392 """Wipes a block device.
2393
2394 @type disk: L{objects.Disk}
2395 @param disk: the disk object we want to wipe
2396 @type offset: int
2397 @param offset: The offset in MiB in the file
2398 @type size: int
2399 @param size: The size in MiB to write
2400
2401 """
2402 try:
2403 rdev = _RecursiveFindBD(disk)
2404 except errors.BlockDeviceError:
2405 rdev = None
2406
2407 if not rdev:
2408 _Fail("Cannot wipe device %s: device not found", disk.iv_name)
2409 if offset < 0:
2410 _Fail("Negative offset")
2411 if size < 0:
2412 _Fail("Negative size")
2413 if offset > rdev.size:
2414 _Fail("Wipe offset is bigger than device size")
2415 if (offset + size) > rdev.size:
2416 _Fail("Wipe offset and size are bigger than device size")
2417
2418 _DumpDevice("/dev/zero", rdev.dev_path, offset, size, True)
2419
2420
2421 def BlockdevImage(disk, image, size):
2422 """Images a block device either by dumping a local file or
2423 downloading a URL.
2424
2425 @type disk: L{objects.Disk}
2426 @param disk: the disk object we want to image
2427
2428 @type image: string
2429 @param image: file path to the disk image be dumped
2430
2431 @type size: int
2432 @param size: The size in MiB to write
2433
2434 @rtype: NoneType
2435 @return: None
2436 @raise RPCFail: in case of failure
2437
2438 """
2439 if not (utils.IsUrl(image) or os.path.exists(image)):
2440 _Fail("Image '%s' not found", image)
2441
2442 try:
2443 rdev = _RecursiveFindBD(disk)
2444 except errors.BlockDeviceError:
2445 rdev = None
2446
2447 if not rdev:
2448 _Fail("Cannot image device %s: device not found", disk.iv_name)
2449 if size < 0:
2450 _Fail("Negative size")
2451 if size > rdev.size:
2452 _Fail("Image size is bigger than device size")
2453
2454 if utils.IsUrl(image):
2455 _DownloadAndDumpDevice(image, rdev.dev_path, size)
2456 else:
2457 _DumpDevice(image, rdev.dev_path, 0, size, False)
2458
2459
2460 def BlockdevPauseResumeSync(disks, pause):
2461 """Pause or resume the sync of the block device.
2462
2463 @type disks: list of L{objects.Disk}
2464 @param disks: the disks object we want to pause/resume
2465 @type pause: bool
2466 @param pause: Wheater to pause or resume
2467
2468 """
2469 success = []
2470 for disk in disks:
2471 try:
2472 rdev = _RecursiveFindBD(disk)
2473 except errors.BlockDeviceError:
2474 rdev = None
2475
2476 if not rdev:
2477 success.append((False, ("Cannot change sync for device %s:"
2478 " device not found" % disk.iv_name)))
2479 continue
2480
2481 result = rdev.PauseResumeSync(pause)
2482
2483 if result:
2484 success.append((result, None))
2485 else:
2486 if pause:
2487 msg = "Pause"
2488 else:
2489 msg = "Resume"
2490 success.append((result, "%s for device %s failed" % (msg, disk.iv_name)))
2491
2492 return success
2493
2494
2495 def BlockdevRemove(disk):
2496 """Remove a block device.
2497
2498 @note: This is intended to be called recursively.
2499
2500 @type disk: L{objects.Disk}
2501 @param disk: the disk object we should remove
2502 @rtype: boolean
2503 @return: the success of the operation
2504
2505 """
2506 msgs = []
2507 try:
2508 rdev = _RecursiveFindBD(disk)
2509 except errors.BlockDeviceError, err:
2510 # probably can't attach
2511 logging.info("Can't attach to device %s in remove", disk)
2512 rdev = None
2513 if rdev is not None:
2514 r_path = rdev.dev_path
2515
2516 def _TryRemove():
2517 try:
2518 rdev.Remove()
2519 return []
2520 except errors.BlockDeviceError, err:
2521 return [str(err)]
2522
2523 msgs.extend(utils.SimpleRetry([], _TryRemove,
2524 constants.DISK_REMOVE_RETRY_INTERVAL,
2525 constants.DISK_REMOVE_RETRY_TIMEOUT))
2526
2527 if not msgs:
2528 DevCacheManager.RemoveCache(r_path)
2529
2530 if disk.children:
2531 for child in disk.children:
2532 try:
2533 BlockdevRemove(child)
2534 except RPCFail, err:
2535 msgs.append(str(err))
2536
2537 if msgs:
2538 _Fail("; ".join(msgs))
2539
2540
2541 def _RecursiveAssembleBD(disk, owner, as_primary):
2542 """Activate a block device for an instance.
2543
2544 This is run on the primary and secondary nodes for an instance.
2545
2546 @note: this function is called recursively.
2547
2548 @type disk: L{objects.Disk}
2549 @param disk: the disk we try to assemble
2550 @type owner: str
2551 @param owner: the name of the instance which owns the disk
2552 @type as_primary: boolean
2553 @param as_primary: if we should make the block device
2554 read/write
2555
2556 @return: the assembled device or None (in case no device
2557 was assembled)
2558 @raise errors.BlockDeviceError: in case there is an error
2559 during the activation of the children or the device
2560 itself
2561
2562 """
2563 children = []
2564 if disk.children:
2565 mcn = disk.ChildrenNeeded()
2566 if mcn == -1:
2567 mcn = 0 # max number of Nones allowed
2568 else:
2569 mcn = len(disk.children) - mcn # max number of Nones
2570 for chld_disk in disk.children:
2571 try:
2572 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
2573 except errors.BlockDeviceError, err:
2574 if children.count(None) >= mcn:
2575 raise
2576 cdev = None
2577 logging.error("Error in child activation (but continuing): %s",
2578 str(err))
2579 children.append(cdev)
2580
2581 if as_primary or disk.AssembleOnSecondary():
2582 r_dev = bdev.Assemble(disk, children)
2583 result = r_dev
2584 if as_primary or disk.OpenOnSecondary():
2585 r_dev.Open()
2586 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
2587 as_primary, disk.iv_name)
2588
2589 else:
2590 result = True
2591 return result
2592
2593
2594 def BlockdevAssemble(disk, instance, as_primary, idx):
2595 """Activate a block device for an instance.
2596
2597 This is a wrapper over _RecursiveAssembleBD.
2598
2599 @rtype: str or boolean
2600 @return: a tuple with the C{/dev/...} path and the created symlink
2601 for primary nodes, and (C{True}, C{True}) for secondary nodes
2602
2603 """
2604 try:
2605 result = _RecursiveAssembleBD(disk, instance.name, as_primary)
2606 if isinstance(result, BlockDev):
2607 # pylint: disable=E1103
2608 dev_path = result.dev_path
2609 link_name = None
2610 uri = None
2611 if as_primary:
2612 link_name = _SymlinkBlockDev(instance.name, dev_path, idx)
2613 uri = _CalculateDeviceURI(instance, disk, result)
2614 elif result:
2615 return result, result
2616 else:
2617 _Fail("Unexpected result from _RecursiveAssembleBD")
2618 except errors.BlockDeviceError, err:
2619 _Fail("Error while assembling disk: %s", err, exc=True)
2620 except OSError, err:
2621 _Fail("Error while symlinking disk: %s", err, exc=True)
2622
2623 return dev_path, link_name, uri
2624
2625
2626 def BlockdevShutdown(disk):
2627 """Shut down a block device.
2628
2629 First, if the device is assembled (Attach() is successful), then
2630 the device is shutdown. Then the children of the device are
2631 shutdown.
2632
2633 This function is called recursively. Note that we don't cache the
2634 children or such, as oppossed to assemble, shutdown of different
2635 devices doesn't require that the upper device was active.
2636
2637 @type disk: L{objects.Disk}
2638 @param disk: the description of the disk we should
2639 shutdown
2640 @rtype: None
2641
2642 """
2643 msgs = []
2644 r_dev = _RecursiveFindBD(disk)
2645 if r_dev is not None:
2646 r_path = r_dev.dev_path
2647 try:
2648 r_dev.Shutdown()
2649 DevCacheManager.RemoveCache(r_path)
2650 except errors.BlockDeviceError, err:
2651 msgs.append(str(err))
2652
2653 if disk.children:
2654 for child in disk.children:
2655 try:
2656 BlockdevShutdown(child)
2657 except RPCFail, err:
2658 msgs.append(str(err))
2659
2660 if msgs:
2661 _Fail("; ".join(msgs))
2662
2663
2664 def BlockdevAddchildren(parent_cdev, new_cdevs):
2665 """Extend a mirrored block device.
2666
2667 @type parent_cdev: L{objects.Disk}
2668 @param parent_cdev: the disk to which we should add children
2669 @type new_cdevs: list of L{objects.Disk}
2670 @param new_cdevs: the list of children which we should add
2671 @rtype: None
2672
2673 """
2674 parent_bdev = _RecursiveFindBD(parent_cdev)
2675 if parent_bdev is None:
2676 _Fail("Can't find parent device '%s' in add children", parent_cdev)
2677 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
2678 if new_bdevs.count(None) > 0:
2679 _Fail("Can't find new device(s) to add: %s:%s", new_bdevs, new_cdevs)
2680 parent_bdev.AddChildren(new_bdevs)
2681
2682
2683 def BlockdevRemovechildren(parent_cdev, new_cdevs):
2684 """Shrink a mirrored block device.
2685
2686 @type parent_cdev: L{objects.Disk}
2687 @param parent_cdev: the disk from which we should remove children
2688 @type new_cdevs: list of L{objects.Disk}
2689 @param new_cdevs: the list of children which we should remove
2690 @rtype: None
2691
2692 """
2693 parent_bdev = _RecursiveFindBD(parent_cdev)
2694 if parent_bdev is None:
2695 _Fail("Can't find parent device '%s' in remove children", parent_cdev)
2696 devs = []
2697 for disk in new_cdevs:
2698 rpath = disk.StaticDevPath()
2699 if rpath is None:
2700 bd = _RecursiveFindBD(disk)
2701 if bd is None:
2702 _Fail("Can't find device %s while removing children", disk)
2703 else:
2704 devs.append(bd.dev_path)
2705 else:
2706 if not utils.IsNormAbsPath(rpath):
2707 _Fail("Strange path returned from StaticDevPath: '%s'", rpath)
2708 devs.append(rpath)
2709 parent_bdev.RemoveChildren(devs)
2710
2711
2712 def BlockdevGetmirrorstatus(disks):
2713 """Get the mirroring status of a list of devices.
2714
2715 @type disks: list of L{objects.Disk}
2716 @param disks: the list of disks which we should query
2717 @rtype: disk
2718 @return: List of L{objects.BlockDevStatus}, one for each disk
2719 @raise errors.BlockDeviceError: if any of the disks cannot be
2720 found
2721
2722 """
2723 stats = []
2724 for dsk in disks:
2725 rbd = _RecursiveFindBD(dsk)
2726 if rbd is None:
2727 _Fail("Can't find device %s", dsk)
2728
2729 stats.append(rbd.CombinedSyncStatus())
2730
2731 return stats
2732
2733
2734 def BlockdevGetmirrorstatusMulti(disks):
2735 """Get the mirroring status of a list of devices.
2736
2737 @type disks: list of L{objects.Disk}
2738 @param disks: the list of disks which we should query
2739 @rtype: disk
2740 @return: List of tuples, (bool, status), one for each disk; bool denotes
2741 success/failure, status is L{objects.BlockDevStatus} on success, string
2742 otherwise
2743
2744 """
2745 result = []
2746 for disk in disks:
2747 try:
2748 rbd = _RecursiveFindBD(disk)
2749 if rbd is None:
2750 result.append((False, "Can't find device %s" % disk))
2751 continue
2752
2753 status = rbd.CombinedSyncStatus()
2754 except errors.BlockDeviceError, err:
2755 logging.exception("Error while getting disk status")
2756 result.append((False, str(err)))
2757 else:
2758 result.append((True, status))
2759
2760 assert len(disks) == len(result)
2761
2762 return result
2763
2764
2765 def _RecursiveFindBD(disk):
2766 """Check if a device is activated.
2767
2768 If so, return information about the real device.
2769
2770 @type disk: L{objects.Disk}
2771 @param disk: the disk object we need to find
2772
2773 @return: None if the device can't be found,
2774 otherwise the device instance
2775
2776 """
2777 children = []
2778 if disk.children:
2779 for chdisk in disk.children:
2780 children.append(_RecursiveFindBD(chdisk))
2781
2782 return bdev.FindDevice(disk, children)
2783
2784
2785 def _OpenRealBD(disk):
2786 """Opens the underlying block device of a disk.
2787
2788 @type disk: L{objects.Disk}
2789 @param disk: the disk object we want to open
2790
2791 """
2792 real_disk = _RecursiveFindBD(disk)
2793 if real_disk is None:
2794 _Fail("Block device '%s' is not set up", disk)
2795
2796 real_disk.Open()
2797
2798 return real_disk
2799
2800
2801 def BlockdevFind(disk):
2802 """Check if a device is activated.
2803
2804 If it is, return information about the real device.
2805
2806 @type disk: L{objects.Disk}
2807 @param disk: the disk to find
2808 @rtype: None or objects.BlockDevStatus
2809 @return: None if the disk cannot be found, otherwise a the current
2810 information
2811
2812 """
2813 try:
2814 rbd = _RecursiveFindBD(disk)
2815 except errors.BlockDeviceError, err:
2816 _Fail("Failed to find device: %s", err, exc=True)
2817
2818 if rbd is None:
2819 return None
2820
2821 return rbd.GetSyncStatus()
2822
2823
2824 def BlockdevGetdimensions(disks):
2825 """Computes the size of the given disks.
2826
2827 If a disk is not found, returns None instead.
2828
2829 @type disks: list of L{objects.Disk}
2830 @param disks: the list of disk to compute the size for
2831 @rtype: list
2832 @return: list with elements None if the disk cannot be found,
2833 otherwise the pair (size, spindles), where spindles is None if the
2834 device doesn't support that
2835
2836 """
2837 result = []
2838 for cf in disks:
2839 try:
2840 rbd = _RecursiveFindBD(cf)
2841 except errors.BlockDeviceError:
2842 result.append(None)
2843 continue
2844 if rbd is None:
2845 result.append(None)
2846 else:
2847 result.append(rbd.GetActualDimensions())
2848 return result
2849
2850
2851 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
2852 """Write a file to the filesystem.
2853
2854 This allows the master to overwrite(!) a file. It will only perform
2855 the operation if the file belongs to a list of configuration files.
2856
2857 @type file_name: str
2858 @param file_name: the target file name
2859 @type data: str
2860 @param data: the new contents of the file
2861 @type mode: int
2862 @param mode: the mode to give the file (can be None)
2863 @type uid: string
2864 @param uid: the owner of the file
2865 @type gid: string
2866 @param gid: the group of the file
2867 @type atime: float
2868 @param atime: the atime to set on the file (can be None)
2869 @type mtime: float
2870 @param mtime: the mtime to set on the file (can be None)
2871 @rtype: None
2872
2873 """
2874 file_name = vcluster.LocalizeVirtualPath(file_name)
2875
2876 if not os.path.isabs(file_name):
2877 _Fail("Filename passed to UploadFile is not absolute: '%s'", file_name)
2878
2879 if file_name not in _ALLOWED_UPLOAD_FILES:
2880 _Fail("Filename passed to UploadFile not in allowed upload targets: '%s'",
2881 file_name)
2882
2883 raw_data = _Decompress(data)
2884
2885 if not (isinstance(uid, basestring) and isinstance(gid, basestring)):
2886 _Fail("Invalid username/groupname type")
2887
2888 getents = runtime.GetEnts()
2889 uid = getents.LookupUser(uid)
2890 gid = getents.LookupGroup(gid)
2891
2892 utils.SafeWriteFile(file_name, None,
2893 data=raw_data, mode=mode, uid=uid, gid=gid,
2894 atime=atime, mtime=mtime)
2895
2896
2897 def RunOob(oob_program, command, node, timeout):
2898 """Executes oob_program with given command on given node.
2899
2900 @param oob_program: The path to the executable oob_program
2901 @param command: The command to invoke on oob_program
2902 @param node: The node given as an argument to the program
2903 @param timeout: Timeout after which we kill the oob program
2904
2905 @return: stdout
2906 @raise RPCFail: If execution fails for some reason
2907
2908 """
2909 result = utils.RunCmd([oob_program, command, node], timeout=timeout)
2910
2911 if result.failed:
2912 _Fail("'%s' failed with reason '%s'; output: %s", result.cmd,
2913 result.fail_reason, result.output)
2914
2915 return result.stdout
2916
2917
2918 def _OSOndiskAPIVersion(os_dir):
2919 """Compute and return the API version of a given OS.
2920
2921 This function will try to read the API version of the OS residing in
2922 the 'os_dir' directory.
2923
2924 @type os_dir: str
2925 @param os_dir: the directory in which we should look for the OS
2926 @rtype: tuple
2927 @return: tuple (status, data) with status denoting the validity and
2928 data holding either the valid versions or an error message
2929
2930 """
2931 api_file = utils.PathJoin(os_dir, constants.OS_API_FILE)
2932
2933 try:
2934 st = os.stat(api_file)
2935 except EnvironmentError, err:
2936 return False, ("Required file '%s' not found under path %s: %s" %
2937 (constants.OS_API_FILE, os_dir, utils.ErrnoOrStr(err)))
2938
2939 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
2940 return False, ("File '%s' in %s is not a regular file" %
2941 (constants.OS_API_FILE, os_dir))
2942
2943 try:
2944 api_versions = utils.ReadFile(api_file).splitlines()
2945 except EnvironmentError, err:
2946 return False, ("Error while reading the API version file at %s: %s" %
2947 (api_file, utils.ErrnoOrStr(err)))
2948
2949 try:
2950 api_versions = [int(version.strip()) for version in api_versions]
2951 except (TypeError, ValueError), err:
2952 return False, ("API version(s) can't be converted to integer: %s" %
2953 str(err))
2954
2955 return True, api_versions
2956
2957
2958 def DiagnoseOS(top_dirs=None):
2959 """Compute the validity for all OSes.
2960
2961 @type top_dirs: list
2962 @param top_dirs: the list of directories in which to
2963 search (if not given defaults to
2964 L{pathutils.OS_SEARCH_PATH})
2965 @rtype: list of L{objects.OS}
2966 @return: a list of tuples (name, path, status, diagnose, variants,
2967 parameters, api_version) for all (potential) OSes under all
2968 search paths, where:
2969 - name is the (potential) OS name
2970 - path is the full path to the OS
2971 - status True/False is the validity of the OS
2972 - diagnose is the error message for an invalid OS, otherwise empty
2973 - variants is a list of supported OS variants, if any
2974 - parameters is a list of (name, help) parameters, if any
2975 - api_version is a list of support OS API versions
2976
2977 """
2978 if top_dirs is None:
2979 top_dirs = pathutils.OS_SEARCH_PATH
2980
2981 result = []
2982 for dir_name in top_dirs:
2983 if os.path.isdir(dir_name):
2984 try:
2985 f_names = utils.ListVisibleFiles(dir_name)
2986 except EnvironmentError, err:
2987 logging.exception("Can't list the OS directory %s: %s", dir_name, err)
2988 break
2989 for name in f_names:
2990 os_path = utils.PathJoin(dir_name, name)
2991 status, os_inst = _TryOSFromDisk(name, base_dir=dir_name)
2992 if status:
2993 diagnose = ""
2994 variants = os_inst.supported_variants
2995 parameters = os_inst.supported_parameters
2996 api_versions = os_inst.api_versions
2997 trusted = False if os_inst.create_script_untrusted else True
2998 else:
2999 diagnose = os_inst
3000 variants = parameters = api_versions = []
3001 trusted = True
3002 result.append((name, os_path, status, diagnose, variants,
3003 parameters, api_versions, trusted))
3004
3005 return result
3006
3007
3008 def _TryOSFromDisk(name, base_dir=None):
3009 """Create an OS instance from disk.
3010
3011 This function will return an OS instance if the given name is a
3012 valid OS name.
3013
3014 @type base_dir: string
3015 @keyword base_dir: Base directory containing OS installations.
3016 Defaults to a search in all the OS_SEARCH_PATH dirs.
3017 @rtype: tuple
3018 @return: success and either the OS instance if we find a valid one,
3019 or error message
3020
3021 """
3022 if base_dir is None:
3023 os_dir = utils.FindFile(name, pathutils.OS_SEARCH_PATH, os.path.isdir)
3024 else:
3025 os_dir = utils.FindFile(name, [base_dir], os.path.isdir)
3026
3027 if os_dir is None:
3028 return False, "Directory for OS %s not found in search path" % name
3029
3030 status, api_versions = _OSOndiskAPIVersion(os_dir)
3031 if not status:
3032 # push the error up
3033 return status, api_versions
3034
3035 if not constants.OS_API_VERSIONS.intersection(api_versions):
3036 return False, ("API version mismatch for path '%s': found %s, want %s." %
3037 (os_dir, api_versions, constants.OS_API_VERSIONS))
3038
3039 # OS Files dictionary, we will populate it with the absolute path
3040 # names; if the value is True, then it is a required file, otherwise
3041 # an optional one
3042 os_files = dict.fromkeys(constants.OS_SCRIPTS, True)
3043
3044 os_files[constants.OS_SCRIPT_CREATE] = False
3045 os_files[constants.OS_SCRIPT_CREATE_UNTRUSTED] = False
3046
3047 if max(api_versions) >= constants.OS_API_V15:
3048 os_files[constants.OS_VARIANTS_FILE] = False
3049
3050 if max(api_versions) >= constants.OS_API_V20:
3051 os_files[constants.OS_PARAMETERS_FILE] = True
3052 else:
3053 del os_files[constants.OS_SCRIPT_VERIFY]
3054
3055 for (filename, required) in os_files.items():
3056 os_files[filename] = utils.PathJoin(os_dir, filename)
3057
3058 try:
3059 st = os.stat(os_files[filename])
3060 except EnvironmentError, err:
3061 if err.errno == errno.ENOENT and not required:
3062 del os_files[filename]
3063 continue
3064 return False, ("File '%s' under path '%s' is missing (%s)" %
3065 (filename, os_dir, utils.ErrnoOrStr(err)))
3066
3067 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
3068 return False, ("File '%s' under path '%s' is not a regular file" %
3069 (filename, os_dir))
3070
3071 if filename in constants.OS_SCRIPTS:
3072 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
3073 return False, ("File '%s' under path '%s' is not executable" %
3074 (filename, os_dir))
3075
3076 if not constants.OS_SCRIPT_CREATE in os_files and \
3077 not constants.OS_SCRIPT_CREATE_UNTRUSTED in os_files:
3078 return False, ("A create script (trusted or untrusted) under path '%s'"
3079 " must exist" % os_dir)
3080
3081 create_script = os_files.get(constants.OS_SCRIPT_CREATE, None)
3082 create_script_untrusted = os_files.get(constants.OS_SCRIPT_CREATE_UNTRUSTED,
3083 None)
3084
3085 variants = []
3086 if constants.OS_VARIANTS_FILE in os_files:
3087 variants_file = os_files[constants.OS_VARIANTS_FILE]
3088 try:
3089 variants = \
3090 utils.FilterEmptyLinesAndComments(utils.ReadFile(variants_file))
3091 except EnvironmentError, err:
3092 # we accept missing files, but not other errors
3093 if err.errno != errno.ENOENT:
3094 return False, ("Error while reading the OS variants file at %s: %s" %
3095 (variants_file, utils.ErrnoOrStr(err)))
3096
3097 parameters = []
3098 if constants.OS_PARAMETERS_FILE in os_files:
3099 parameters_file = os_files[constants.OS_PARAMETERS_FILE]
3100 try:
3101 parameters = utils.ReadFile(parameters_file).splitlines()
3102 except EnvironmentError, err:
3103 return False, ("Error while reading the OS parameters file at %s: %s" %
3104 (parameters_file, utils.ErrnoOrStr(err)))
3105 parameters = [v.split(None, 1) for v in parameters]
3106
3107 os_obj = objects.OS(name=name, path=os_dir,
3108 create_script=create_script,
3109 create_script_untrusted=create_script_untrusted,
3110 export_script=os_files[constants.OS_SCRIPT_EXPORT],
3111 import_script=os_files[constants.OS_SCRIPT_IMPORT],
3112 rename_script=os_files[constants.OS_SCRIPT_RENAME],
3113 verify_script=os_files.get(constants.OS_SCRIPT_VERIFY,
3114 None),
3115 supported_variants=variants,
3116 supported_parameters=parameters,
3117 api_versions=api_versions)
3118 return True, os_obj
3119
3120
3121 def OSFromDisk(name, base_dir=None):
3122 """Create an OS instance from disk.
3123
3124 This function will return an OS instance if the given name is a
3125 valid OS name. Otherwise, it will raise an appropriate
3126 L{RPCFail} exception, detailing why this is not a valid OS.
3127
3128 This is just a wrapper over L{_TryOSFromDisk}, which doesn't raise
3129 an exception but returns true/false status data.
3130
3131 @type base_dir: string
3132 @keyword base_dir: Base directory containing OS installations.
3133 Defaults to a search in all the OS_SEARCH_PATH dirs.
3134 @rtype: L{objects.OS}
3135 @return: the OS instance if we find a valid one
3136 @raise RPCFail: if we don't find a valid OS
3137
3138 """
3139 name_only = objects.OS.GetName(name)
3140 status, payload = _TryOSFromDisk(name_only, base_dir)
3141
3142 if not status:
3143 _Fail(payload)
3144
3145 return payload
3146
3147
3148 def OSCoreEnv(os_name, inst_os, os_params, debug=0):
3149 """Calculate the basic environment for an os script.
3150
3151 @type os_name: str
3152 @param os_name: full operating system name (including variant)
3153 @type inst_os: L{objects.OS}
3154 @param inst_os: operating system for which the environment is being built
3155 @type os_params: dict
3156 @param os_params: the OS parameters
3157 @type debug: integer
3158 @param debug: debug level (0 or 1, for OS Api 10)
3159 @rtype: dict
3160 @return: dict of environment variables
3161 @raise errors.BlockDeviceError: if the block device
3162 cannot be found
3163
3164 """
3165 result = {}
3166 api_version = \
3167 max(constants.OS_API_VERSIONS.intersection(inst_os.api_versions))
3168 result["OS_API_VERSION"] = "%d" % api_version
3169 result["OS_NAME"] = inst_os.name
3170 result["DEBUG_LEVEL"] = "%d" % debug
3171
3172 # OS variants
3173 if api_version >= constants.OS_API_V15 and inst_os.supported_variants:
3174 variant = objects.OS.GetVariant(os_name)
3175 if not variant:
3176 variant = inst_os.supported_variants[0]
3177 else:
3178 variant = ""
3179 result["OS_VARIANT"] = variant
3180
3181 # OS params
3182 for pname, pvalue in os_params.items():
3183 result["OSP_%s" % pname.upper().replace("-", "_")] = pvalue
3184
3185 # Set a default path otherwise programs called by OS scripts (or
3186 # even hooks called from OS scripts) might break, and we don't want
3187 # to have each script require setting a PATH variable
3188 result["PATH"] = constants.HOOKS_PATH
3189
3190 return result
3191
3192
3193 def OSEnvironment(instance, inst_os, debug=0):
3194 """Calculate the environment for an os script.
3195
3196 @type instance: L{objects.Instance}
3197 @param instance: target instance for the os script run
3198 @type inst_os: L{objects.OS}
3199 @param inst_os: operating system for which the environment is being built
3200 @type debug: integer
3201 @param debug: debug level (0 or 1, for OS Api 10)
3202 @rtype: dict
3203 @return: dict of environment variables
3204 @raise errors.BlockDeviceError: if the block device
3205 cannot be found
3206
3207 """
3208 result = OSCoreEnv(instance.os, inst_os, objects.FillDict(instance.osparams,
3209 instance.osparams_private.Unprivate()), debug=debug)
3210
3211 for attr in ["name", "os", "uuid", "ctime", "mtime", "primary_node"]:
3212 result["INSTANCE_%s" % attr.upper()] = str(getattr(instance, attr))
3213
3214 result["HYPERVISOR"] = instance.hypervisor
3215 result["DISK_COUNT"] = "%d" % len(instance.disks_info)
3216 result["NIC_COUNT"] = "%d" % len(instance.nics)
3217 result["INSTANCE_SECONDARY_NODES"] = \
3218 ("%s" % " ".join(instance.secondary_nodes))
3219
3220 # Disks
3221 for idx, disk in enumerate(instance.disks_info):
3222 real_disk = _OpenRealBD(disk)
3223 result["DISK_%d_PATH" % idx] = real_disk.dev_path
3224 result["DISK_%d_ACCESS" % idx] = disk.mode
3225 result["DISK_%d_UUID" % idx] = disk.uuid
3226 if disk.name:
3227 result["DISK_%d_NAME" % idx] = disk.name
3228 if constants.HV_DISK_TYPE in instance.hvparams:
3229 result["DISK_%d_FRONTEND_TYPE" % idx] = \
3230 instance.hvparams[constants.HV_DISK_TYPE]
3231 if disk.dev_type in constants.DTS_BLOCK:
3232 result["DISK_%d_BACKEND_TYPE" % idx] = "block"
3233 elif disk.dev_type in constants.DTS_FILEBASED:
3234 result["DISK_%d_BACKEND_TYPE" % idx] = \
3235 "file:%s" % disk.logical_id[0]
3236
3237 # NICs
3238 for idx, nic in enumerate(instance.nics):
3239 result["NIC_%d_MAC" % idx] = nic.mac
3240 result["NIC_%d_UUID" % idx] = nic.uuid
3241 if nic.name:
3242 result["NIC_%d_NAME" % idx] = nic.name
3243 if nic.ip:
3244 result["NIC_%d_IP" % idx] = nic.ip
3245 result["NIC_%d_MODE" % idx] = nic.nicparams[constants.NIC_MODE]
3246 if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3247 result["NIC_%d_BRIDGE" % idx] = nic.nicparams[constants.NIC_LINK]
3248 if nic.nicparams[constants.NIC_LINK]:
3249 result["NIC_%d_LINK" % idx] = nic.nicparams[constants.NIC_LINK]
3250 if nic.netinfo:
3251 nobj = objects.Network.FromDict(nic.netinfo)
3252 result.update(nobj.HooksDict("NIC_%d_" % idx))
3253 if constants.HV_NIC_TYPE in instance.hvparams:
3254 result["NIC_%d_FRONTEND_TYPE" % idx] = \
3255 instance.hvparams[constants.HV_NIC_TYPE]
3256
3257 # HV/BE params
3258 for source, kind in [(instance.beparams, "BE"), (instance.hvparams, "HV")]:
3259 for key, value in source.items():
3260 result["INSTANCE_%s_%s" % (kind, key)] = str(value)
3261
3262 return result
3263
3264
3265 def DiagnoseExtStorage(top_dirs=None):
3266 """Compute the validity for all ExtStorage Providers.
3267
3268 @type top_dirs: list
3269 @param top_dirs: the list of directories in which to
3270 search (if not given defaults to
3271 L{pathutils.ES_SEARCH_PATH})
3272 @rtype: list of L{objects.ExtStorage}
3273 @return: a list of tuples (name, path, status, diagnose, parameters)
3274 for all (potential) ExtStorage Providers under all
3275 search paths, where:
3276 - name is the (potential) ExtStorage Provider
3277 - path is the full path to the ExtStorage Provider
3278 - status True/False is the validity of the ExtStorage Provider
3279 - diagnose is the error message for an invalid ExtStorage Provider,
3280 otherwise empty
3281 - parameters is a list of (name, help) parameters, if any
3282
3283 """
3284 if top_dirs is None:
3285 top_dirs = pathutils.ES_SEARCH_PATH
3286
3287 result = []
3288 for dir_name in top_dirs:
3289 if os.path.isdir(dir_name):
3290 try:
3291 f_names = utils.ListVisibleFiles(dir_name)
3292 except EnvironmentError, err:
3293 logging.exception("Can't list the ExtStorage directory %s: %s",
3294 dir_name, err)
3295 break
3296 for name in f_names:
3297 es_path = utils.PathJoin(dir_name, name)
3298 status, es_inst = bdev.ExtStorageFromDisk(name, base_dir=dir_name)
3299 if status:
3300 diagnose = ""
3301 parameters = es_inst.supported_parameters
3302 else:
3303 diagnose = es_inst
3304 parameters = []
3305 result.append((name, es_path, status, diagnose, parameters))
3306
3307 return result
3308
3309
3310 def BlockdevGrow(disk, amount, dryrun, backingstore, excl_stor):
3311 """Grow a stack of block devices.
3312
3313 This function is called recursively, with the childrens being the
3314 first ones to resize.
3315
3316 @type disk: L{objects.Disk}
3317 @param disk: the disk to be grown
3318 @type amount: integer
3319 @param amount: the amount (in mebibytes) to grow with
3320 @type dryrun: boolean
3321 @param dryrun: whether to execute the operation in simulation mode
3322 only, without actually increasing the size
3323 @param backingstore: whether to execute the operation on backing storage
3324 only, or on "logical" storage only; e.g. DRBD is logical storage,
3325 whereas LVM, file, RBD are backing storage
3326 @rtype: (status, result)
3327 @type excl_stor: boolean
3328 @param excl_stor: Whether exclusive_storage is active
3329 @return: a tuple with the status of the operation (True/False), and
3330 the errors message if status is False
3331
3332 """
3333 r_dev = _RecursiveFindBD(disk)
3334 if r_dev is None:
3335 _Fail("Cannot find block device %s", disk)
3336
3337 try:
3338 r_dev.Grow(amount, dryrun, backingstore, excl_stor)
3339 except errors.BlockDeviceError, err:
3340 _Fail("Failed to grow block device: %s", err, exc=True)
3341
3342
3343 def BlockdevSnapshot(disk):
3344 """Create a snapshot copy of a block device.
3345
3346 This function is called recursively, and the snapshot is actually created
3347 just for the leaf lvm backend device.
3348
3349 @type disk: L{objects.Disk}
3350 @param disk: the disk to be snapshotted
3351 @rtype: string
3352 @return: snapshot disk ID as (vg, lv)
3353
3354 """
3355 if disk.dev_type == constants.DT_DRBD8:
3356 if not disk.children:
3357 _Fail("DRBD device '%s' without backing storage cannot be snapshotted",
3358 disk.unique_id)
3359 return BlockdevSnapshot(disk.children[0])
3360 elif disk.dev_type == constants.DT_PLAIN:
3361 r_dev = _RecursiveFindBD(disk)
3362 if r_dev is not None:
3363 # FIXME: choose a saner value for the snapshot size
3364 # let's stay on the safe side and ask for the full size, for now
3365 return r_dev.Snapshot(disk.size)
3366 else:
3367 _Fail("Cannot find block device %s", disk)
3368 else:
3369 _Fail("Cannot snapshot non-lvm block device '%s' of type '%s'",
3370 disk.logical_id, disk.dev_type)
3371
3372
3373 def BlockdevSetInfo(disk, info):
3374 """Sets 'metadata' information on block devices.
3375
3376 This function sets 'info' metadata on block devices. Initial
3377 information is set at device creation; this function should be used
3378 for example after renames.
3379
3380 @type disk: L{objects.Disk}
3381 @param disk: the disk to be grown
3382 @type info: string
3383 @param info: new 'info' metadata
3384 @rtype: (status, result)
3385 @return: a tuple with the status of the operation (True/False), and
3386 the errors message if status is False
3387
3388 """
3389 r_dev = _RecursiveFindBD(disk)
3390 if r_dev is None:
3391 _Fail("Cannot find block device %s", disk)
3392
3393 try:
3394 r_dev.SetInfo(info)
3395 except errors.BlockDeviceError, err:
3396 _Fail("Failed to set information on block device: %s", err, exc=True)
3397
3398
3399 def FinalizeExport(instance, snap_disks):
3400 """Write out the export configuration information.
3401
3402 @type instance: L{objects.Instance}
3403 @param instance: the instance which we export, used for
3404 saving configuration
3405 @type snap_disks: list of L{objects.Disk}
3406 @param snap_disks: list of snapshot block devices, which
3407 will be used to get the actual name of the dump file
3408
3409 @rtype: None
3410
3411 """
3412 destdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name + ".new")
3413 finaldestdir = utils.PathJoin(pathutils.EXPORT_DIR, instance.name)
3414
3415 config = objects.SerializableConfigParser()
3416
3417 config.add_section(constants.INISECT_EXP)
3418 config.set(constants.INISECT_EXP, "version", "0")
3419 config.set(constants.INISECT_EXP, "timestamp", "%d" % int(time.time()))
3420 config.set(constants.INISECT_EXP, "source", instance.primary_node)
3421 config.set(constants.INISECT_EXP, "os", instance.os)
3422 config.set(constants.INISECT_EXP, "compression", "none")
3423
3424 config.add_section(constants.INISECT_INS)
3425 config.set(constants.INISECT_INS, "name", instance.name)
3426 config.set(constants.INISECT_INS, "maxmem", "%d" %
3427 instance.beparams[constants.BE_MAXMEM])
3428 config.set(constants.INISECT_INS, "minmem", "%d" %
3429 instance.beparams[constants.BE_MINMEM])
3430 # "memory" is deprecated, but useful for exporting to old ganeti versions
3431 config.set(constants.INISECT_INS, "memory", "%d" %
3432 instance.beparams[constants.BE_MAXMEM])
3433 config.set(constants.INISECT_INS, "vcpus", "%d" %
3434 instance.beparams[constants.BE_VCPUS])
3435 config.set(constants.INISECT_INS, "disk_template", instance.disk_template)
3436 config.set(constants.INISECT_INS, "hypervisor", instance.hypervisor)
3437 config.set(constants.INISECT_INS, "tags", " ".join(instance.GetTags()))
3438
3439 nic_total = 0
3440 for nic_count, nic in enumerate(instance.nics):
3441 nic_total += 1
3442 config.set(constants.INISECT_INS, "nic%d_mac" %
3443 nic_count, "%s" % nic.mac)
3444 config.set(constants.INISECT_INS, "nic%d_ip" % nic_count, "%s" % nic.ip)
3445 config.set(constants.INISECT_INS, "nic%d_network" % nic_count,
3446 "%s" % nic.network)
3447 config.set(constants.INISECT_INS, "nic%d_name" % nic_count,
3448 "%s" % nic.name)
3449 for param in constants.NICS_PARAMETER_TYPES:
3450 config.set(constants.INISECT_INS, "nic%d_%s" % (nic_count, param),
3451 "%s" % nic.nicparams.get(param, None))
3452 # TODO: redundant: on load can read nics until it doesn't exist
3453 config.set(constants.INISECT_INS, "nic_count", "%d" % nic_total)
3454
3455 disk_total = 0
3456 for disk_count, disk in enumerate(snap_disks):
3457 if disk:
3458 disk_total += 1
3459 config.set(constants.INISECT_INS, "disk%d_ivname" % disk_count,
3460 ("%s" % disk.iv_name))
3461 config.set(constants.INISECT_INS, "disk%d_dump" % disk_count,
3462 ("%s" % disk.logical_id[1]))
3463 config.set(constants.INISECT_INS, "disk%d_size" % disk_count,
3464 ("%d" % disk.size))
3465 config.set(constants.INISECT_INS, "disk%d_name" % disk_count,
3466 "%s" % disk.name)
3467
3468 config.set(constants.INISECT_INS, "disk_count", "%d" % disk_total)
3469
3470 # New-style hypervisor/backend parameters
3471
3472 config.add_section(constants.INISECT_HYP)
3473 for name, value in instance.hvparams.items():
3474 if name not in constants.HVC_GLOBALS:
3475 config.set(constants.INISECT_HYP, name, str(value))
3476
3477 config.add_section(constants.INISECT_BEP)
3478 for name, value in instance.beparams.items():
3479 config.set(constants.INISECT_BEP, name, str(value))
3480
3481 config.add_section(constants.INISECT_OSP)
3482 for name, value in instance.osparams.items():
3483 config.set(constants.INISECT_OSP, name, str(value))
3484
3485 config.add_section(constants.INISECT_OSP_PRIVATE)
3486 for name, value in instance.osparams_private.items():
3487 config.set(constants.INISECT_OSP_PRIVATE, name, str(value.Get()))
3488
3489 utils.WriteFile(utils.PathJoin(destdir, constants.EXPORT_CONF_FILE),
3490 data=config.Dumps())
3491 shutil.rmtree(finaldestdir, ignore_errors=True)
3492 shutil.move(destdir, finaldestdir)
3493
3494
3495 def ExportInfo(dest):
3496 """Get export configuration information.
3497
3498 @type dest: str
3499 @param dest: directory containing the export
3500
3501 @rtype: L{objects.SerializableConfigParser}
3502 @return: a serializable config file containing the
3503 export info
3504
3505 """
3506 cff = utils.PathJoin(dest, constants.EXPORT_CONF_FILE)
3507
3508 config = objects.SerializableConfigParser()
3509 config.read(cff)
3510
3511 if (not config.has_section(constants.INISECT_EXP) or
3512 not config.has_section(constants.INISECT_INS)):
3513 _Fail("Export info file doesn't have the required fields")
3514
3515 return config.Dumps()
3516
3517
3518 def ListExports():
3519 """Return a list of exports currently available on this machine.
3520
3521 @rtype: list
3522 @return: list of the exports
3523
3524 """
3525 if os.path.isdir(pathutils.EXPORT_DIR):
3526 return sorted(utils.ListVisibleFiles(pathutils.EXPORT_DIR))
3527 else:
3528 _Fail("No exports directory")
3529
3530
3531 def RemoveExport(export):
3532 """Remove an existing export from the node.
3533
3534 @type export: str
3535 @param export: the name of the export to remove
3536 @rtype: None
3537
3538 """
3539 target = utils.PathJoin(pathutils.EXPORT_DIR, export)
3540
3541 try:
3542 shutil.rmtree(target)
3543 except EnvironmentError, err:
3544 _Fail("Error while removing the export: %s", err, exc=True)
3545
3546
3547 def BlockdevRename(devlist):
3548 """Rename a list of block devices.
3549
3550 @type devlist: list of tuples
3551 @param devlist: list of tuples of the form (disk, new_unique_id); disk is
3552 an L{objects.Disk} object describing the current disk, and new
3553 unique_id is the name we rename it to
3554 @rtype: boolean
3555 @return: True if all renames succeeded, False otherwise
3556
3557 """
3558 msgs = []
3559 result = True
3560 for disk, unique_id in devlist:
3561 dev = _RecursiveFindBD(disk)
3562 if dev is None:
3563 msgs.append("Can't find device %s in rename" % str(disk))
3564 result = False
3565 continue
3566 try:
3567 old_rpath = dev.dev_path
3568 dev.Rename(unique_id)
3569 new_rpath = dev.dev_path
3570 if old_rpath != new_rpath:
3571 DevCacheManager.RemoveCache(old_rpath)
3572 # FIXME: we should add the new cache information here, like:
3573 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
3574 # but we don't have the owner here - maybe parse from existing
3575 # cache? for now, we only lose lvm data when we rename, which
3576 # is less critical than DRBD or MD
3577 except errors.BlockDeviceError, err:
3578 msgs.append("Can't rename device '%s' to '%s': %s" %
3579 (dev, unique_id, err))
3580 logging.exception("Can't rename device '%s' to '%s'", dev, unique_id)
3581 result = False
3582 if not result:
3583 _Fail("; ".join(msgs))
3584
3585
3586 def _TransformFileStorageDir(fs_dir):
3587 """Checks whether given file_storage_dir is valid.
3588
3589 Checks wheter the given fs_dir is within the cluster-wide default
3590 file_storage_dir or the shared_file_storage_dir, which are stored in
3591 SimpleStore. Only paths under those directories are allowed.
3592
3593 @type fs_dir: str
3594 @param fs_dir: the path to check
3595
3596 @return: the normalized path if valid, None otherwise
3597
3598 """
3599 filestorage.CheckFileStoragePath(fs_dir)
3600
3601 return os.path.normpath(fs_dir)
3602
3603
3604 def CreateFileStorageDir(file_storage_dir):
3605 """Create file storage directory.
3606
3607 @type file_storage_dir: str
3608 @param file_storage_dir: directory to create
3609
3610 @rtype: tuple
3611 @return: tuple with first element a boolean indicating wheter dir
3612 creation was successful or not
3613
3614 """
3615 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
3616 if os.path.exists(file_storage_dir):
3617 if not os.path.isdir(file_storage_dir):
3618 _Fail("Specified storage dir '%s' is not a directory",
3619 file_storage_dir)
3620 else:
3621 try:
3622 os.makedirs(file_storage_dir, 0750)
3623 except OSError, err:
3624 _Fail("Cannot create file storage directory '%s': %s",
3625 file_storage_dir, err, exc=True)
3626
3627
3628 def RemoveFileStorageDir(file_storage_dir):
3629 """Remove file storage directory.
3630
3631 Remove it only if it's empty. If not log an error and return.
3632
3633 @type file_storage_dir: str
3634 @param file_storage_dir: the directory we should cleanup
3635 @rtype: tuple (success,)
3636 @return: tuple of one element, C{success}, denoting
3637 whether the operation was successful
3638
3639 """
3640 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
3641 if os.path.exists(file_storage_dir):
3642 if not os.path.isdir(file_storage_dir):
3643 _Fail("Specified Storage directory '%s' is not a directory",
3644 file_storage_dir)
3645 # deletes dir only if empty, otherwise we want to fail the rpc call
3646 try:
3647 os.rmdir(file_storage_dir)
3648 except OSError, err:
3649 _Fail("Cannot remove file storage directory '%s': %s",
3650 file_storage_dir, err)
3651
3652
3653 def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
3654 """Rename the file storage directory.
3655
3656 @type old_file_storage_dir: str
3657 @param old_file_storage_dir: the current path
3658 @type new_file_storage_dir: str
3659 @param new_file_storage_dir: the name we should rename to
3660 @rtype: tuple (success,)
3661 @return: tuple of one element, C{success}, denoting
3662 whether the operation was successful
3663
3664 """
3665 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
3666 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
3667 if not os.path.exists(new_file_storage_dir):
3668 if os.path.isdir(old_file_storage_dir):
3669 try:
3670 os.rename(old_file_storage_dir, new_file_storage_dir)
3671 except OSError, err:
3672 _Fail("Cannot rename '%s' to '%s': %s",
3673 old_file_storage_dir, new_file_storage_dir, err)
3674 else:
3675 _Fail("Specified storage dir '%s' is not a directory",
3676 old_file_storage_dir)
3677 else:
3678 if os.path.exists(old_file_storage_dir):
3679 _Fail("Cannot rename '%s' to '%s': both locations exist",
3680 old_file_storage_dir, new_file_storage_dir)
3681
3682
3683 def _EnsureJobQueueFile(file_name):
3684 """Checks whether the given filename is in the queue directory.
3685
3686 @type file_name: str
3687 @param file_name: the file name we should check
3688 @rtype: None
3689 @raises RPCFail: if the file is not valid
3690
3691 """
3692 if not utils.IsBelowDir(pathutils.QUEUE_DIR, file_name):
3693 _Fail("Passed job queue file '%s' does not belong to"
3694 " the queue directory '%s'", file_name, pathutils.QUEUE_DIR)
3695
3696
3697 def JobQueueUpdate(file_name, content):
3698 """Updates a file in the queue directory.
3699
3700 This is just a wrapper over L{utils.io.WriteFile}, with proper
3701 checking.
3702
3703 @type file_name: str
3704 @param file_name: the job file name
3705 @type content: str
3706 @param content: the new job contents
3707 @rtype: boolean
3708 @return: the success of the operation
3709
3710 """
3711 file_name = vcluster.LocalizeVirtualPath(file_name)
3712
3713 _EnsureJobQueueFile(file_name)
3714 getents = runtime.GetEnts()
3715
3716 # Write and replace the file atomically
3717 utils.WriteFile(file_name, data=_Decompress(content), uid=getents.masterd_uid,
3718 gid=getents.daemons_gid, mode=constants.JOB_QUEUE_FILES_PERMS)
3719
3720
3721 def JobQueueRename(old, new):
3722 """Renames a job queue file.
3723
3724 This is just a wrapper over os.rename with proper checking.
3725
3726 @type old: str
3727 @param old: the old (actual) file name
3728 @type new: str
3729 @param new: the desired file name
3730 @rtype: tuple
3731 @return: the success of the operation and payload
3732
3733 """
3734 old = vcluster.LocalizeVirtualPath(old)
3735 new = vcluster.LocalizeVirtualPath(new)
3736
3737 _EnsureJobQueueFile(old)
3738 _EnsureJobQueueFile(new)
3739
3740 getents = runtime.GetEnts()
3741
3742 utils.RenameFile(old, new, mkdir=True, mkdir_mode=0750,
3743 dir_uid=getents.masterd_uid, dir_gid=getents.daemons_gid)
3744
3745
3746 def BlockdevClose(instance_name, disks):
3747 """Closes the given block devices.
3748
3749 This means they will be switched to secondary mode (in case of
3750 DRBD).
3751
3752 @param instance_name: if the argument is not empty, the symlinks
3753 of this instance will be removed
3754 @type disks: list of L{objects.Disk}
3755 @param disks: the list of disks to be closed
3756 @rtype: tuple (success, message)
3757 @return: a tuple of success and message, where success
3758 indicates the succes of the operation, and message
3759 which will contain the error details in case we
3760 failed
3761
3762 """
3763 bdevs = []
3764 for cf in disks:
3765 rd = _RecursiveFindBD(cf)
3766 if rd is None:
3767 _Fail("Can't find device %s", cf)
3768 bdevs.append(rd)
3769
3770 msg = []
3771 for rd in bdevs:
3772 try:
3773 rd.Close()
3774 except errors.BlockDeviceError, err:
3775 msg.append(str(err))
3776 if msg:
3777 _Fail("Can't make devices secondary: %s", ",".join(msg))
3778 else:
3779 if instance_name:
3780 _RemoveBlockDevLinks(instance_name, disks)
3781
3782
3783 def ValidateHVParams(hvname, hvparams):
3784 """Validates the given hypervisor parameters.
3785
3786 @type hvname: string
3787 @param hvname: the hypervisor name
3788 @type hvparams: dict
3789 @param hvparams: the hypervisor parameters to be validated
3790 @rtype: None
3791
3792 """
3793 try:
3794 hv_type = hypervisor.GetHypervisor(hvname)
3795 hv_type.ValidateParameters(hvparams)
3796 except errors.HypervisorError, err:
3797 _Fail(str(err), log=False)
3798
3799
3800 def _CheckOSPList(os_obj, parameters):
3801 """Check whether a list of parameters is supported by the OS.
3802
3803 @type os_obj: L{objects.OS}
3804 @param os_obj: OS object to check
3805 @type parameters: list
3806 @param parameters: the list of parameters to check
3807
3808 """
3809 supported = [v[0] for v in os_obj.supported_parameters]
3810 delta = frozenset(parameters).difference(supported)
3811 if delta:
3812 _Fail("The following parameters are not supported"
3813 " by the OS %s: %s" % (os_obj.name, utils.CommaJoin(delta)))
3814
3815
3816 def _CheckOSVariant(os_obj, name):
3817 """Check whether an OS name conforms to the os variants specification.
3818
3819 @type os_obj: L{objects.OS}
3820 @param os_obj: OS object to check
3821
3822 @type name: string
3823 @param name: OS name passed by the user, to check for validity
3824
3825 @rtype: NoneType
3826 @return: None
3827 @raise RPCFail: if OS variant is not valid
3828
3829 """
3830 variant = objects.OS.GetVariant(name)
3831
3832 if not os_obj.supported_variants:
3833 if variant:
3834 _Fail("OS '%s' does not support variants ('%s' passed)" %
3835 (os_obj.name, variant))
3836 else:
3837 return
3838
3839 if not variant:
3840 _Fail("OS name '%s' must include a variant" % name)
3841
3842 if variant not in os_obj.supported_variants:
3843 _Fail("OS '%s' does not support variant '%s'" % (os_obj.name, variant))
3844
3845
3846 def ValidateOS(required, osname, checks, osparams, force_variant):
3847 """Validate the given OS parameters.
3848
3849 @type required: boolean
3850 @param required: whether absence of the OS should translate into
3851 failure or not
3852 @type osname: string
3853 @param osname: the OS to be validated
3854 @type checks: list
3855 @param checks: list of the checks to run (currently only 'parameters')
3856 @type osparams: dict
3857 @param osparams: dictionary with OS parameters, some of which may be
3858 private.
3859 @rtype: boolean
3860 @return: True if the validation passed, or False if the OS was not
3861 found and L{required} was false
3862
3863 """
3864 if not constants.OS_VALIDATE_CALLS.issuperset(checks):
3865 _Fail("Unknown checks required for OS %s: %s", osname,
3866 set(checks).difference(constants.OS_VALIDATE_CALLS))
3867
3868 name_only = objects.OS.GetName(osname)
3869 status, tbv = _TryOSFromDisk(name_only, None)
3870
3871 if not status:
3872 if required:
3873 _Fail(tbv)
3874 else:
3875 return False
3876
3877 if not force_variant:
3878 _CheckOSVariant(tbv, osname)
3879
3880 if max(tbv.api_versions) < constants.OS_API_V20:
3881 return True
3882
3883 if constants.OS_VALIDATE_PARAMETERS in checks:
3884 _CheckOSPList(tbv, osparams.keys())
3885
3886 validate_env = OSCoreEnv(osname, tbv, osparams)
3887 result = utils.RunCmd([tbv.verify_script] + checks, env=validate_env,
3888 cwd=tbv.path, reset_env=True)
3889 if result.failed:
3890 logging.error("os validate command '%s' returned error: %s output: %s",
3891 result.cmd, result.fail_reason, result.output)
3892 _Fail("OS validation script failed (%s), output: %s",
3893 result.fail_reason, result.output, log=False)
3894
3895 return True
3896
3897
3898 def ExportOS(instance, override_env):
3899 """Creates a GZIPed tarball with an OS definition and environment.
3900
3901 The archive contains a file with the environment variables needed by
3902 the OS scripts.
3903
3904 @type instance: L{objects.Instance}
3905 @param instance: instance for which the OS definition is exported
3906
3907 @type override_env: dict of string to string
3908 @param override_env: if supplied, it overrides the environment on a
3909 key-by-key basis that is part of the archive
3910
3911 @rtype: string
3912 @return: filepath of the archive
3913
3914 """
3915 assert instance
3916 assert instance.os
3917
3918 temp_dir = tempfile.mkdtemp()
3919 inst_os = OSFromDisk(instance.os)
3920
3921 result = utils.RunCmd(["ln", "-s", inst_os.path,
3922 utils.PathJoin(temp_dir, "os")])
3923 if result.failed:
3924 _Fail("Failed to copy OS package '%s' to '%s': %s, output '%s'",
3925 inst_os, temp_dir, result.fail_reason, result.output)
3926
3927 env = OSEnvironment(instance, inst_os)
3928 env.update(override_env)
3929
3930 with open(utils.PathJoin(temp_dir, "environment"), "w") as f:
3931 for var in env:
3932 f.write(var + "=" + env[var] + "\n")
3933
3934 (fd, os_package) = tempfile.mkstemp(suffix=".tgz")
3935 os.close(fd)
3936
3937 result = utils.RunCmd(["tar", "--dereference", "-czv",
3938 "-f", os_package,
3939 "-C", temp_dir,
3940 "."])
3941 if result.failed: