Xen handle domain shutdown
[ganeti-github.git] / lib / hypervisor / hv_base.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2012, 2013 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Base class for all hypervisors
23
24 The syntax for the _CHECK variables and the contents of the PARAMETERS
25 dict is the same, see the docstring for L{BaseHypervisor.PARAMETERS}.
26
27 @var _FILE_CHECK: stub for file checks, without the required flag
28 @var _DIR_CHECK: stub for directory checks, without the required flag
29 @var REQ_FILE_CHECK: mandatory file parameter
30 @var OPT_FILE_CHECK: optional file parameter
31 @var REQ_DIR_CHECK: mandatory directory parametr
32 @var OPT_DIR_CHECK: optional directory parameter
33 @var NO_CHECK: parameter without any checks at all
34 @var REQUIRED_CHECK: parameter required to exist (and non-false), but
35 without other checks; beware that this can't be used for boolean
36 parameters, where you should use NO_CHECK or a custom checker
37
38 """
39
40 import os
41 import re
42 import logging
43
44
45 from ganeti import errors
46 from ganeti import utils
47 from ganeti import constants
48
49
50 def _IsCpuMaskWellFormed(cpu_mask):
51 """Verifies if the given single CPU mask is valid
52
53 The single CPU mask should be in the form "a,b,c,d", where each
54 letter is a positive number or range.
55
56 """
57 try:
58 cpu_list = utils.ParseCpuMask(cpu_mask)
59 except errors.ParseError, _:
60 return False
61 return isinstance(cpu_list, list) and len(cpu_list) > 0
62
63
64 def _IsMultiCpuMaskWellFormed(cpu_mask):
65 """Verifies if the given multiple CPU mask is valid
66
67 A valid multiple CPU mask is in the form "a:b:c:d", where each
68 letter is a single CPU mask.
69
70 """
71 try:
72 utils.ParseMultiCpuMask(cpu_mask)
73 except errors.ParseError, _:
74 return False
75
76 return True
77
78
79 # Read the BaseHypervisor.PARAMETERS docstring for the syntax of the
80 # _CHECK values
81
82 # must be afile
83 _FILE_CHECK = (utils.IsNormAbsPath, "must be an absolute normalized path",
84 os.path.isfile, "not found or not a file")
85
86 # must be a directory
87 _DIR_CHECK = (utils.IsNormAbsPath, "must be an absolute normalized path",
88 os.path.isdir, "not found or not a directory")
89
90 # CPU mask must be well-formed
91 # TODO: implement node level check for the CPU mask
92 _CPU_MASK_CHECK = (_IsCpuMaskWellFormed,
93 "CPU mask definition is not well-formed",
94 None, None)
95
96 # Multiple CPU mask must be well-formed
97 _MULTI_CPU_MASK_CHECK = (_IsMultiCpuMaskWellFormed,
98 "Multiple CPU mask definition is not well-formed",
99 None, None)
100
101 # Check for validity of port number
102 _NET_PORT_CHECK = (lambda x: 0 < x < 65535, "invalid port number",
103 None, None)
104
105 # Check that an integer is non negative
106 _NONNEGATIVE_INT_CHECK = (lambda x: x >= 0, "cannot be negative", None, None)
107
108 # nice wrappers for users
109 REQ_FILE_CHECK = (True, ) + _FILE_CHECK
110 OPT_FILE_CHECK = (False, ) + _FILE_CHECK
111 REQ_DIR_CHECK = (True, ) + _DIR_CHECK
112 OPT_DIR_CHECK = (False, ) + _DIR_CHECK
113 REQ_NET_PORT_CHECK = (True, ) + _NET_PORT_CHECK
114 OPT_NET_PORT_CHECK = (False, ) + _NET_PORT_CHECK
115 REQ_CPU_MASK_CHECK = (True, ) + _CPU_MASK_CHECK
116 OPT_CPU_MASK_CHECK = (False, ) + _CPU_MASK_CHECK
117 REQ_MULTI_CPU_MASK_CHECK = (True, ) + _MULTI_CPU_MASK_CHECK
118 OPT_MULTI_CPU_MASK_CHECK = (False, ) + _MULTI_CPU_MASK_CHECK
119 REQ_NONNEGATIVE_INT_CHECK = (True, ) + _NONNEGATIVE_INT_CHECK
120 OPT_NONNEGATIVE_INT_CHECK = (False, ) + _NONNEGATIVE_INT_CHECK
121
122 # no checks at all
123 NO_CHECK = (False, None, None, None, None)
124
125 # required, but no other checks
126 REQUIRED_CHECK = (True, None, None, None, None)
127
128 # migration type
129 MIGRATION_MODE_CHECK = (True, lambda x: x in constants.HT_MIGRATION_MODES,
130 "invalid migration mode", None, None)
131
132
133 def ParamInSet(required, my_set):
134 """Builds parameter checker for set membership.
135
136 @type required: boolean
137 @param required: whether this is a required parameter
138 @type my_set: tuple, list or set
139 @param my_set: allowed values set
140
141 """
142 fn = lambda x: x in my_set
143 err = ("The value must be one of: %s" % utils.CommaJoin(my_set))
144 return (required, fn, err, None, None)
145
146
147 class HvInstanceState(object):
148 RUNNING = 0
149 SHUTDOWN = 1
150
151 @staticmethod
152 def IsRunning(s):
153 return s == HvInstanceState.RUNNING
154
155 @staticmethod
156 def IsShutdown(s):
157 return s == HvInstanceState.SHUTDOWN
158
159
160 class BaseHypervisor(object):
161 """Abstract virtualisation technology interface
162
163 The goal is that all aspects of the virtualisation technology are
164 abstracted away from the rest of code.
165
166 @cvar PARAMETERS: a dict of parameter name: check type; the check type is
167 a five-tuple containing:
168 - the required flag (boolean)
169 - a function to check for syntax, that will be used in
170 L{CheckParameterSyntax}, in the master daemon process
171 - an error message for the above function
172 - a function to check for parameter validity on the remote node,
173 in the L{ValidateParameters} function
174 - an error message for the above function
175 @type CAN_MIGRATE: boolean
176 @cvar CAN_MIGRATE: whether this hypervisor can do migration (either
177 live or non-live)
178
179 """
180 PARAMETERS = {}
181 ANCILLARY_FILES = []
182 ANCILLARY_FILES_OPT = []
183 CAN_MIGRATE = False
184
185 def StartInstance(self, instance, block_devices, startup_paused):
186 """Start an instance."""
187 raise NotImplementedError
188
189 def StopInstance(self, instance, force=False, retry=False, name=None):
190 """Stop an instance
191
192 @type instance: L{objects.Instance}
193 @param instance: instance to stop
194 @type force: boolean
195 @param force: whether to do a "hard" stop (destroy)
196 @type retry: boolean
197 @param retry: whether this is just a retry call
198 @type name: string or None
199 @param name: if this parameter is passed, the the instance object
200 should not be used (will be passed as None), and the shutdown
201 must be done by name only
202
203 """
204 raise NotImplementedError
205
206 def CleanupInstance(self, instance_name):
207 """Cleanup after a stopped instance
208
209 This is an optional method, used by hypervisors that need to cleanup after
210 an instance has been stopped.
211
212 @type instance_name: string
213 @param instance_name: instance name to cleanup after
214
215 """
216 pass
217
218 def RebootInstance(self, instance):
219 """Reboot an instance."""
220 raise NotImplementedError
221
222 def ListInstances(self, hvparams=None):
223 """Get the list of running instances."""
224 raise NotImplementedError
225
226 def GetInstanceInfo(self, instance_name, hvparams=None):
227 """Get instance properties.
228
229 @type instance_name: string
230 @param instance_name: the instance name
231 @type hvparams: dict of strings
232 @param hvparams: hvparams to be used with this instance
233
234 @rtype: (string, string, int, int, HvInstanceState, int)
235 @return: tuple (name, id, memory, vcpus, state, times)
236
237 """
238 raise NotImplementedError
239
240 def GetAllInstancesInfo(self, hvparams=None):
241 """Get properties of all instances.
242
243 @type hvparams: dict of strings
244 @param hvparams: hypervisor parameter
245
246 @rtype: (string, string, int, int, HvInstanceState, int)
247 @return: list of tuples (name, id, memory, vcpus, state, times)
248
249 """
250 raise NotImplementedError
251
252 def GetNodeInfo(self, hvparams=None):
253 """Return information about the node.
254
255 @type hvparams: dict of strings
256 @param hvparams: hypervisor parameters
257
258 @return: a dict with at least the following keys (memory values in MiB):
259 - memory_total: the total memory size on the node
260 - memory_free: the available memory on the node for instances
261 - memory_dom0: the memory used by the node itself, if available
262 - cpu_total: total number of CPUs
263 - cpu_dom0: number of CPUs used by the node OS
264 - cpu_nodes: number of NUMA domains
265 - cpu_sockets: number of physical CPU sockets
266
267 """
268 raise NotImplementedError
269
270 @classmethod
271 def GetInstanceConsole(cls, instance, primary_node, hvparams, beparams):
272 """Return information for connecting to the console of an instance.
273
274 """
275 raise NotImplementedError
276
277 @classmethod
278 def GetAncillaryFiles(cls):
279 """Return a list of ancillary files to be copied to all nodes as ancillary
280 configuration files.
281
282 @rtype: (list of absolute paths, list of absolute paths)
283 @return: (all files, optional files)
284
285 """
286 # By default we return a member variable, so that if an hypervisor has just
287 # a static list of files it doesn't have to override this function.
288 assert set(cls.ANCILLARY_FILES).issuperset(cls.ANCILLARY_FILES_OPT), \
289 "Optional ancillary files must be a subset of ancillary files"
290
291 return (cls.ANCILLARY_FILES, cls.ANCILLARY_FILES_OPT)
292
293 def Verify(self, hvparams=None):
294 """Verify the hypervisor.
295
296 @type hvparams: dict of strings
297 @param hvparams: hypervisor parameters to be verified against
298
299 @return: Problem description if something is wrong, C{None} otherwise
300
301 """
302 raise NotImplementedError
303
304 def MigrationInfo(self, instance): # pylint: disable=R0201,W0613
305 """Get instance information to perform a migration.
306
307 By default assume no information is needed.
308
309 @type instance: L{objects.Instance}
310 @param instance: instance to be migrated
311 @rtype: string/data (opaque)
312 @return: instance migration information - serialized form
313
314 """
315 return ""
316
317 def AcceptInstance(self, instance, info, target):
318 """Prepare to accept an instance.
319
320 By default assume no preparation is needed.
321
322 @type instance: L{objects.Instance}
323 @param instance: instance to be accepted
324 @type info: string/data (opaque)
325 @param info: migration information, from the source node
326 @type target: string
327 @param target: target host (usually ip), on this node
328
329 """
330 pass
331
332 def BalloonInstanceMemory(self, instance, mem):
333 """Balloon an instance memory to a certain value.
334
335 @type instance: L{objects.Instance}
336 @param instance: instance to be accepted
337 @type mem: int
338 @param mem: actual memory size to use for instance runtime
339
340 """
341 raise NotImplementedError
342
343 def FinalizeMigrationDst(self, instance, info, success):
344 """Finalize the instance migration on the target node.
345
346 Should finalize or revert any preparation done to accept the instance.
347 Since by default we do no preparation, we also don't have anything to do
348
349 @type instance: L{objects.Instance}
350 @param instance: instance whose migration is being finalized
351 @type info: string/data (opaque)
352 @param info: migration information, from the source node
353 @type success: boolean
354 @param success: whether the migration was a success or a failure
355
356 """
357 pass
358
359 def MigrateInstance(self, cluster_name, instance, target, live):
360 """Migrate an instance.
361
362 @type cluster_name: string
363 @param cluster_name: name of the cluster
364 @type instance: L{objects.Instance}
365 @param instance: the instance to be migrated
366 @type target: string
367 @param target: hostname (usually ip) of the target node
368 @type live: boolean
369 @param live: whether to do a live or non-live migration
370
371 """
372 raise NotImplementedError
373
374 def FinalizeMigrationSource(self, instance, success, live):
375 """Finalize the instance migration on the source node.
376
377 @type instance: L{objects.Instance}
378 @param instance: the instance that was migrated
379 @type success: bool
380 @param success: whether the migration succeeded or not
381 @type live: bool
382 @param live: whether the user requested a live migration or not
383
384 """
385 pass
386
387 def GetMigrationStatus(self, instance):
388 """Get the migration status
389
390 @type instance: L{objects.Instance}
391 @param instance: the instance that is being migrated
392 @rtype: L{objects.MigrationStatus}
393 @return: the status of the current migration (one of
394 L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional
395 progress info that can be retrieved from the hypervisor
396
397 """
398 raise NotImplementedError
399
400 def _InstanceStartupMemory(self, instance, hvparams=None):
401 """Get the correct startup memory for an instance
402
403 This function calculates how much memory an instance should be started
404 with, making sure it's a value between the minimum and the maximum memory,
405 but also trying to use no more than the current free memory on the node.
406
407 @type instance: L{objects.Instance}
408 @param instance: the instance that is being started
409 @rtype: integer
410 @return: memory the instance should be started with
411
412 """
413 free_memory = self.GetNodeInfo(hvparams=hvparams)["memory_free"]
414 max_start_mem = min(instance.beparams[constants.BE_MAXMEM], free_memory)
415 start_mem = max(instance.beparams[constants.BE_MINMEM], max_start_mem)
416 return start_mem
417
418 @classmethod
419 def CheckParameterSyntax(cls, hvparams):
420 """Check the given parameters for validity.
421
422 This should check the passed set of parameters for
423 validity. Classes should extend, not replace, this function.
424
425 @type hvparams: dict
426 @param hvparams: dictionary with parameter names/value
427 @raise errors.HypervisorError: when a parameter is not valid
428
429 """
430 for key in hvparams:
431 if key not in cls.PARAMETERS:
432 raise errors.HypervisorError("Parameter '%s' is not supported" % key)
433
434 # cheap tests that run on the master, should not access the world
435 for name, (required, check_fn, errstr, _, _) in cls.PARAMETERS.items():
436 if name not in hvparams:
437 raise errors.HypervisorError("Parameter '%s' is missing" % name)
438 value = hvparams[name]
439 if not required and not value:
440 continue
441 if not value:
442 raise errors.HypervisorError("Parameter '%s' is required but"
443 " is currently not defined" % (name, ))
444 if check_fn is not None and not check_fn(value):
445 raise errors.HypervisorError("Parameter '%s' fails syntax"
446 " check: %s (current value: '%s')" %
447 (name, errstr, value))
448
449 @classmethod
450 def ValidateParameters(cls, hvparams):
451 """Check the given parameters for validity.
452
453 This should check the passed set of parameters for
454 validity. Classes should extend, not replace, this function.
455
456 @type hvparams: dict
457 @param hvparams: dictionary with parameter names/value
458 @raise errors.HypervisorError: when a parameter is not valid
459
460 """
461 for name, (required, _, _, check_fn, errstr) in cls.PARAMETERS.items():
462 value = hvparams[name]
463 if not required and not value:
464 continue
465 if check_fn is not None and not check_fn(value):
466 raise errors.HypervisorError("Parameter '%s' fails"
467 " validation: %s (current value: '%s')" %
468 (name, errstr, value))
469
470 @classmethod
471 def PowercycleNode(cls, hvparams=None):
472 """Hard powercycle a node using hypervisor specific methods.
473
474 This method should hard powercycle the node, using whatever
475 methods the hypervisor provides. Note that this means that all
476 instances running on the node must be stopped too.
477
478 @type hvparams: dict of strings
479 @param hvparams: hypervisor params to be used on this node
480
481 """
482 raise NotImplementedError
483
484 @staticmethod
485 def GetLinuxNodeInfo(meminfo="/proc/meminfo", cpuinfo="/proc/cpuinfo"):
486 """For linux systems, return actual OS information.
487
488 This is an abstraction for all non-hypervisor-based classes, where
489 the node actually sees all the memory and CPUs via the /proc
490 interface and standard commands. The other case if for example
491 xen, where you only see the hardware resources via xen-specific
492 tools.
493
494 @param meminfo: name of the file containing meminfo
495 @type meminfo: string
496 @param cpuinfo: name of the file containing cpuinfo
497 @type cpuinfo: string
498 @return: a dict with the following keys (values in MiB):
499 - memory_total: the total memory size on the node
500 - memory_free: the available memory on the node for instances
501 - memory_dom0: the memory used by the node itself, if available
502 - cpu_total: total number of CPUs
503 - cpu_dom0: number of CPUs used by the node OS
504 - cpu_nodes: number of NUMA domains
505 - cpu_sockets: number of physical CPU sockets
506
507 """
508 try:
509 data = utils.ReadFile(meminfo).splitlines()
510 except EnvironmentError, err:
511 raise errors.HypervisorError("Failed to list node info: %s" % (err,))
512
513 result = {}
514 sum_free = 0
515 try:
516 for line in data:
517 splitfields = line.split(":", 1)
518
519 if len(splitfields) > 1:
520 key = splitfields[0].strip()
521 val = splitfields[1].strip()
522 if key == "MemTotal":
523 result["memory_total"] = int(val.split()[0]) / 1024
524 elif key in ("MemFree", "Buffers", "Cached"):
525 sum_free += int(val.split()[0]) / 1024
526 elif key == "Active":
527 result["memory_dom0"] = int(val.split()[0]) / 1024
528 except (ValueError, TypeError), err:
529 raise errors.HypervisorError("Failed to compute memory usage: %s" %
530 (err,))
531 result["memory_free"] = sum_free
532
533 cpu_total = 0
534 try:
535 fh = open(cpuinfo)
536 try:
537 cpu_total = len(re.findall(r"(?m)^processor\s*:\s*[0-9]+\s*$",
538 fh.read()))
539 finally:
540 fh.close()
541 except EnvironmentError, err:
542 raise errors.HypervisorError("Failed to list node info: %s" % (err,))
543 result["cpu_total"] = cpu_total
544 # We assume that the node OS can access all the CPUs
545 result["cpu_dom0"] = cpu_total
546 # FIXME: export correct data here
547 result["cpu_nodes"] = 1
548 result["cpu_sockets"] = 1
549
550 return result
551
552 @classmethod
553 def LinuxPowercycle(cls):
554 """Linux-specific powercycle method.
555
556 """
557 try:
558 fd = os.open("/proc/sysrq-trigger", os.O_WRONLY)
559 try:
560 os.write(fd, "b")
561 finally:
562 fd.close()
563 except OSError:
564 logging.exception("Can't open the sysrq-trigger file")
565 result = utils.RunCmd(["reboot", "-n", "-f"])
566 if not result:
567 logging.error("Can't run shutdown: %s", result.output)
568
569 @staticmethod
570 def _FormatVerifyResults(msgs):
571 """Formats the verification results, given a list of errors.
572
573 @param msgs: list of errors, possibly empty
574 @return: overall problem description if something is wrong,
575 C{None} otherwise
576
577 """
578 if msgs:
579 return "; ".join(msgs)
580 else:
581 return None
582
583 # pylint: disable=R0201,W0613
584 def HotAddDevice(self, instance, dev_type, device, extra, seq):
585 """Hot-add a device.
586
587 """
588 raise errors.HotplugError("Hotplug is not supported by this hypervisor")
589
590 # pylint: disable=R0201,W0613
591 def HotDelDevice(self, instance, dev_type, device, extra, seq):
592 """Hot-del a device.
593
594 """
595 raise errors.HotplugError("Hotplug is not supported by this hypervisor")
596
597 # pylint: disable=R0201,W0613
598 def HotModDevice(self, instance, dev_type, device, extra, seq):
599 """Hot-mod a device.
600
601 """
602 raise errors.HotplugError("Hotplug is not supported by this hypervisor")
603
604 # pylint: disable=R0201,W0613
605 def VerifyHotplugSupport(self, instance, action, dev_type):
606 """Verifies that hotplug is supported.
607
608 Given the target device and hotplug action checks if hotplug is
609 actually supported.
610
611 @type instance: L{objects.Instance}
612 @param instance: the instance object
613 @type action: string
614 @param action: one of the supported hotplug commands
615 @type dev_type: string
616 @param dev_type: one of the supported device types to hotplug
617 @raise errors.HotplugError: if hotplugging is not supported
618
619 """
620 raise errors.HotplugError("Hotplug is not supported.")
621
622 def HotplugSupported(self, instance):
623 """Checks if hotplug is supported.
624
625 By default is not. Currently only KVM hypervisor supports it.
626
627 """
628 raise errors.HotplugError("Hotplug is not supported by this hypervisor")