8718af36f52246dab556490292a3b89ba9acf3a1
[ganeti-github.git] / lib / cmdlib / instance_utils.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 # 02110-1301, USA.
20
21
22 """Utility function mainly, but not only used by instance LU's."""
23
24 import logging
25 import os
26
27 from ganeti import constants
28 from ganeti import errors
29 from ganeti import locking
30 from ganeti import network
31 from ganeti import objects
32 from ganeti import pathutils
33 from ganeti import utils
34
35 from ganeti.cmdlib.common import _AnnotateDiskParams
36
37
38 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
39 minmem, maxmem, vcpus, nics, disk_template, disks,
40 bep, hvp, hypervisor_name, tags):
41 """Builds instance related env variables for hooks
42
43 This builds the hook environment from individual variables.
44
45 @type name: string
46 @param name: the name of the instance
47 @type primary_node: string
48 @param primary_node: the name of the instance's primary node
49 @type secondary_nodes: list
50 @param secondary_nodes: list of secondary nodes as strings
51 @type os_type: string
52 @param os_type: the name of the instance's OS
53 @type status: string
54 @param status: the desired status of the instance
55 @type minmem: string
56 @param minmem: the minimum memory size of the instance
57 @type maxmem: string
58 @param maxmem: the maximum memory size of the instance
59 @type vcpus: string
60 @param vcpus: the count of VCPUs the instance has
61 @type nics: list
62 @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
63 representing the NICs the instance has
64 @type disk_template: string
65 @param disk_template: the disk template of the instance
66 @type disks: list
67 @param disks: list of tuples (name, uuid, size, mode)
68 @type bep: dict
69 @param bep: the backend parameters for the instance
70 @type hvp: dict
71 @param hvp: the hypervisor parameters for the instance
72 @type hypervisor_name: string
73 @param hypervisor_name: the hypervisor for the instance
74 @type tags: list
75 @param tags: list of instance tags as strings
76 @rtype: dict
77 @return: the hook environment for this instance
78
79 """
80 env = {
81 "OP_TARGET": name,
82 "INSTANCE_NAME": name,
83 "INSTANCE_PRIMARY": primary_node,
84 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
85 "INSTANCE_OS_TYPE": os_type,
86 "INSTANCE_STATUS": status,
87 "INSTANCE_MINMEM": minmem,
88 "INSTANCE_MAXMEM": maxmem,
89 # TODO(2.9) remove deprecated "memory" value
90 "INSTANCE_MEMORY": maxmem,
91 "INSTANCE_VCPUS": vcpus,
92 "INSTANCE_DISK_TEMPLATE": disk_template,
93 "INSTANCE_HYPERVISOR": hypervisor_name,
94 }
95 if nics:
96 nic_count = len(nics)
97 for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
98 if ip is None:
99 ip = ""
100 env["INSTANCE_NIC%d_NAME" % idx] = name
101 env["INSTANCE_NIC%d_IP" % idx] = ip
102 env["INSTANCE_NIC%d_MAC" % idx] = mac
103 env["INSTANCE_NIC%d_MODE" % idx] = mode
104 env["INSTANCE_NIC%d_LINK" % idx] = link
105 if netinfo:
106 nobj = objects.Network.FromDict(netinfo)
107 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
108 elif network:
109 # FIXME: broken network reference: the instance NIC specifies a
110 # network, but the relevant network entry was not in the config. This
111 # should be made impossible.
112 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
113 if mode == constants.NIC_MODE_BRIDGED:
114 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
115 else:
116 nic_count = 0
117
118 env["INSTANCE_NIC_COUNT"] = nic_count
119
120 if disks:
121 disk_count = len(disks)
122 for idx, (name, size, mode) in enumerate(disks):
123 env["INSTANCE_DISK%d_NAME" % idx] = name
124 env["INSTANCE_DISK%d_SIZE" % idx] = size
125 env["INSTANCE_DISK%d_MODE" % idx] = mode
126 else:
127 disk_count = 0
128
129 env["INSTANCE_DISK_COUNT"] = disk_count
130
131 if not tags:
132 tags = []
133
134 env["INSTANCE_TAGS"] = " ".join(tags)
135
136 for source, kind in [(bep, "BE"), (hvp, "HV")]:
137 for key, value in source.items():
138 env["INSTANCE_%s_%s" % (kind, key)] = value
139
140 return env
141
142
143 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
144 """Builds instance related env variables for hooks from an object.
145
146 @type lu: L{LogicalUnit}
147 @param lu: the logical unit on whose behalf we execute
148 @type instance: L{objects.Instance}
149 @param instance: the instance for which we should build the
150 environment
151 @type override: dict
152 @param override: dictionary with key/values that will override
153 our values
154 @rtype: dict
155 @return: the hook environment dictionary
156
157 """
158 cluster = lu.cfg.GetClusterInfo()
159 bep = cluster.FillBE(instance)
160 hvp = cluster.FillHV(instance)
161 args = {
162 "name": instance.name,
163 "primary_node": instance.primary_node,
164 "secondary_nodes": instance.secondary_nodes,
165 "os_type": instance.os,
166 "status": instance.admin_state,
167 "maxmem": bep[constants.BE_MAXMEM],
168 "minmem": bep[constants.BE_MINMEM],
169 "vcpus": bep[constants.BE_VCPUS],
170 "nics": _NICListToTuple(lu, instance.nics),
171 "disk_template": instance.disk_template,
172 "disks": [(disk.name, disk.size, disk.mode)
173 for disk in instance.disks],
174 "bep": bep,
175 "hvp": hvp,
176 "hypervisor_name": instance.hypervisor,
177 "tags": instance.tags,
178 }
179 if override:
180 args.update(override)
181 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
182
183
184 def _GetClusterDomainSecret():
185 """Reads the cluster domain secret.
186
187 """
188 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
189 strict=True)
190
191
192 def _CheckNodeNotDrained(lu, node):
193 """Ensure that a given node is not drained.
194
195 @param lu: the LU on behalf of which we make the check
196 @param node: the node to check
197 @raise errors.OpPrereqError: if the node is drained
198
199 """
200 if lu.cfg.GetNodeInfo(node).drained:
201 raise errors.OpPrereqError("Can't use drained node %s" % node,
202 errors.ECODE_STATE)
203
204
205 def _StartInstanceDisks(lu, instance, force):
206 """Start the disks of an instance.
207
208 """
209 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
210 ignore_secondaries=force)
211 if not disks_ok:
212 _ShutdownInstanceDisks(lu, instance)
213 if force is not None and not force:
214 lu.LogWarning("",
215 hint=("If the message above refers to a secondary node,"
216 " you can retry the operation using '--force'"))
217 raise errors.OpExecError("Disk consistency error")
218
219
220 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
221 """Shutdown block devices of an instance.
222
223 This does the shutdown on all nodes of the instance.
224
225 If the ignore_primary is false, errors on the primary node are
226 ignored.
227
228 """
229 all_result = True
230 disks = _ExpandCheckDisks(instance, disks)
231
232 for disk in disks:
233 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
234 lu.cfg.SetDiskID(top_disk, node)
235 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
236 msg = result.fail_msg
237 if msg:
238 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
239 disk.iv_name, node, msg)
240 if ((node == instance.primary_node and not ignore_primary) or
241 (node != instance.primary_node and not result.offline)):
242 all_result = False
243 return all_result
244
245
246 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
247 """Utility function to remove an instance.
248
249 """
250 logging.info("Removing block devices for instance %s", instance.name)
251
252 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
253 if not ignore_failures:
254 raise errors.OpExecError("Can't remove instance's disks")
255 feedback_fn("Warning: can't remove instance's disks")
256
257 logging.info("Removing instance %s out of cluster config", instance.name)
258
259 lu.cfg.RemoveInstance(instance.name)
260
261 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
262 "Instance lock removal conflict"
263
264 # Remove lock for the instance
265 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
266
267
268 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
269 ignore_size=False):
270 """Prepare the block devices for an instance.
271
272 This sets up the block devices on all nodes.
273
274 @type lu: L{LogicalUnit}
275 @param lu: the logical unit on whose behalf we execute
276 @type instance: L{objects.Instance}
277 @param instance: the instance for whose disks we assemble
278 @type disks: list of L{objects.Disk} or None
279 @param disks: which disks to assemble (or all, if None)
280 @type ignore_secondaries: boolean
281 @param ignore_secondaries: if true, errors on secondary nodes
282 won't result in an error return from the function
283 @type ignore_size: boolean
284 @param ignore_size: if true, the current known size of the disk
285 will not be used during the disk activation, useful for cases
286 when the size is wrong
287 @return: False if the operation failed, otherwise a list of
288 (host, instance_visible_name, node_visible_name)
289 with the mapping from node devices to instance devices
290
291 """
292 device_info = []
293 disks_ok = True
294 iname = instance.name
295 disks = _ExpandCheckDisks(instance, disks)
296
297 # With the two passes mechanism we try to reduce the window of
298 # opportunity for the race condition of switching DRBD to primary
299 # before handshaking occured, but we do not eliminate it
300
301 # The proper fix would be to wait (with some limits) until the
302 # connection has been made and drbd transitions from WFConnection
303 # into any other network-connected state (Connected, SyncTarget,
304 # SyncSource, etc.)
305
306 # 1st pass, assemble on all nodes in secondary mode
307 for idx, inst_disk in enumerate(disks):
308 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
309 if ignore_size:
310 node_disk = node_disk.Copy()
311 node_disk.UnsetSize()
312 lu.cfg.SetDiskID(node_disk, node)
313 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
314 False, idx)
315 msg = result.fail_msg
316 if msg:
317 is_offline_secondary = (node in instance.secondary_nodes and
318 result.offline)
319 lu.LogWarning("Could not prepare block device %s on node %s"
320 " (is_primary=False, pass=1): %s",
321 inst_disk.iv_name, node, msg)
322 if not (ignore_secondaries or is_offline_secondary):
323 disks_ok = False
324
325 # FIXME: race condition on drbd migration to primary
326
327 # 2nd pass, do only the primary node
328 for idx, inst_disk in enumerate(disks):
329 dev_path = None
330
331 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
332 if node != instance.primary_node:
333 continue
334 if ignore_size:
335 node_disk = node_disk.Copy()
336 node_disk.UnsetSize()
337 lu.cfg.SetDiskID(node_disk, node)
338 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
339 True, idx)
340 msg = result.fail_msg
341 if msg:
342 lu.LogWarning("Could not prepare block device %s on node %s"
343 " (is_primary=True, pass=2): %s",
344 inst_disk.iv_name, node, msg)
345 disks_ok = False
346 else:
347 dev_path = result.payload
348
349 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
350
351 # leave the disks configured for the primary node
352 # this is a workaround that would be fixed better by
353 # improving the logical/physical id handling
354 for disk in disks:
355 lu.cfg.SetDiskID(disk, instance.primary_node)
356
357 return disks_ok, device_info
358
359
360 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
361 """Remove all disks for an instance.
362
363 This abstracts away some work from `AddInstance()` and
364 `RemoveInstance()`. Note that in case some of the devices couldn't
365 be removed, the removal will continue with the other ones.
366
367 @type lu: L{LogicalUnit}
368 @param lu: the logical unit on whose behalf we execute
369 @type instance: L{objects.Instance}
370 @param instance: the instance whose disks we should remove
371 @type target_node: string
372 @param target_node: used to override the node on which to remove the disks
373 @rtype: boolean
374 @return: the success of the removal
375
376 """
377 logging.info("Removing block devices for instance %s", instance.name)
378
379 all_result = True
380 ports_to_release = set()
381 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
382 for (idx, device) in enumerate(anno_disks):
383 if target_node:
384 edata = [(target_node, device)]
385 else:
386 edata = device.ComputeNodeTree(instance.primary_node)
387 for node, disk in edata:
388 lu.cfg.SetDiskID(disk, node)
389 result = lu.rpc.call_blockdev_remove(node, disk)
390 if result.fail_msg:
391 lu.LogWarning("Could not remove disk %s on node %s,"
392 " continuing anyway: %s", idx, node, result.fail_msg)
393 if not (result.offline and node != instance.primary_node):
394 all_result = False
395
396 # if this is a DRBD disk, return its port to the pool
397 if device.dev_type in constants.LDS_DRBD:
398 ports_to_release.add(device.logical_id[2])
399
400 if all_result or ignore_failures:
401 for port in ports_to_release:
402 lu.cfg.AddTcpUdpPort(port)
403
404 if instance.disk_template in constants.DTS_FILEBASED:
405 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
406 if target_node:
407 tgt = target_node
408 else:
409 tgt = instance.primary_node
410 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
411 if result.fail_msg:
412 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
413 file_storage_dir, instance.primary_node, result.fail_msg)
414 all_result = False
415
416 return all_result
417
418
419 def _ExpandCheckDisks(instance, disks):
420 """Return the instance disks selected by the disks list
421
422 @type disks: list of L{objects.Disk} or None
423 @param disks: selected disks
424 @rtype: list of L{objects.Disk}
425 @return: selected instance disks to act on
426
427 """
428 if disks is None:
429 return instance.disks
430 else:
431 if not set(disks).issubset(instance.disks):
432 raise errors.ProgrammerError("Can only act on disks belonging to the"
433 " target instance")
434 return disks
435
436
437 def _NICToTuple(lu, nic):
438 """Build a tupple of nic information.
439
440 @type lu: L{LogicalUnit}
441 @param lu: the logical unit on whose behalf we execute
442 @type nic: L{objects.NIC}
443 @param nic: nic to convert to hooks tuple
444
445 """
446 cluster = lu.cfg.GetClusterInfo()
447 filled_params = cluster.SimpleFillNIC(nic.nicparams)
448 mode = filled_params[constants.NIC_MODE]
449 link = filled_params[constants.NIC_LINK]
450 netinfo = None
451 if nic.network:
452 nobj = lu.cfg.GetNetwork(nic.network)
453 netinfo = objects.Network.ToDict(nobj)
454 return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
455
456
457 def _NICListToTuple(lu, nics):
458 """Build a list of nic information tuples.
459
460 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
461 value in LUInstanceQueryData.
462
463 @type lu: L{LogicalUnit}
464 @param lu: the logical unit on whose behalf we execute
465 @type nics: list of L{objects.NIC}
466 @param nics: list of nics to convert to hooks tuples
467
468 """
469 hooks_nics = []
470 for nic in nics:
471 hooks_nics.append(_NICToTuple(lu, nic))
472 return hooks_nics