Merge branch 'stable-2.16' into stable-2.17
[ganeti-github.git] / lib / cmdlib / misc.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Miscellaneous logical units that don't fit into any category."""
32
33 import logging
34 import time
35
36 from ganeti import constants
37 from ganeti import errors
38 from ganeti import locking
39 from ganeti import qlang
40 from ganeti import query
41 from ganeti import utils
42 from ganeti.cmdlib.base import NoHooksLU, QueryBase
43 from ganeti.cmdlib.common import (
44 GetWantedNodes,
45 SupportsOob,
46 ExpandNodeUuidAndName
47 )
48
49
50 class LUOobCommand(NoHooksLU):
51 """Logical unit for OOB handling.
52
53 """
54 REQ_BGL = False
55 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
56
57 def ExpandNames(self):
58 """Gather locks we need.
59
60 """
61 if self.op.node_names:
62 (self.op.node_uuids, self.op.node_names) = \
63 GetWantedNodes(self, self.op.node_names)
64 lock_node_uuids = self.op.node_uuids
65 else:
66 lock_node_uuids = locking.ALL_SET
67
68 self.needed_locks = {
69 locking.LEVEL_NODE: lock_node_uuids,
70 }
71
72 def CheckPrereq(self):
73 """Check prerequisites.
74
75 This checks:
76 - the node exists in the configuration
77 - OOB is supported
78
79 Any errors are signaled by raising errors.OpPrereqError.
80
81 """
82 self.nodes = []
83 self.master_node_uuid = self.cfg.GetMasterNode()
84 master_node_obj = self.cfg.GetNodeInfo(self.master_node_uuid)
85
86 assert self.op.power_delay >= 0.0
87
88 if self.op.node_uuids:
89 if (self.op.command in self._SKIP_MASTER and
90 master_node_obj.uuid in self.op.node_uuids):
91 master_oob_handler = SupportsOob(self.cfg, master_node_obj)
92
93 if master_oob_handler:
94 additional_text = ("run '%s %s %s' if you want to operate on the"
95 " master regardless") % (master_oob_handler,
96 self.op.command,
97 master_node_obj.name)
98 else:
99 additional_text = "it does not support out-of-band operations"
100
101 raise errors.OpPrereqError(("Operating on the master node %s is not"
102 " allowed for %s; %s") %
103 (master_node_obj.name, self.op.command,
104 additional_text), errors.ECODE_INVAL)
105 else:
106 self.op.node_uuids = self.cfg.GetNodeList()
107 if self.op.command in self._SKIP_MASTER:
108 self.op.node_uuids.remove(master_node_obj.uuid)
109
110 if self.op.command in self._SKIP_MASTER:
111 assert master_node_obj.uuid not in self.op.node_uuids
112
113 for node_uuid in self.op.node_uuids:
114 node = self.cfg.GetNodeInfo(node_uuid)
115 if node is None:
116 raise errors.OpPrereqError("Node %s not found" % node_uuid,
117 errors.ECODE_NOENT)
118
119 self.nodes.append(node)
120
121 if (not self.op.ignore_status and
122 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
123 raise errors.OpPrereqError(("Cannot power off node %s because it is"
124 " not marked offline") % node.name,
125 errors.ECODE_STATE)
126
127 def Exec(self, feedback_fn):
128 """Execute OOB and return result if we expect any.
129
130 """
131 ret = []
132
133 for idx, node in enumerate(utils.NiceSort(self.nodes,
134 key=lambda node: node.name)):
135 node_entry = [(constants.RS_NORMAL, node.name)]
136 ret.append(node_entry)
137
138 oob_program = SupportsOob(self.cfg, node)
139
140 if not oob_program:
141 node_entry.append((constants.RS_UNAVAIL, None))
142 continue
143
144 logging.info("Executing out-of-band command '%s' using '%s' on %s",
145 self.op.command, oob_program, node.name)
146 result = self.rpc.call_run_oob(self.master_node_uuid, oob_program,
147 self.op.command, node.name,
148 self.op.timeout)
149
150 if result.fail_msg:
151 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
152 node.name, result.fail_msg)
153 node_entry.append((constants.RS_NODATA, None))
154 continue
155
156 try:
157 self._CheckPayload(result)
158 except errors.OpExecError, err:
159 self.LogWarning("Payload returned by node '%s' is not valid: %s",
160 node.name, err)
161 node_entry.append((constants.RS_NODATA, None))
162 else:
163 if self.op.command == constants.OOB_HEALTH:
164 # For health we should log important events
165 for item, status in result.payload:
166 if status in [constants.OOB_STATUS_WARNING,
167 constants.OOB_STATUS_CRITICAL]:
168 self.LogWarning("Item '%s' on node '%s' has status '%s'",
169 item, node.name, status)
170
171 if self.op.command == constants.OOB_POWER_ON:
172 node.powered = True
173 elif self.op.command == constants.OOB_POWER_OFF:
174 node.powered = False
175 elif self.op.command == constants.OOB_POWER_STATUS:
176 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
177 if powered != node.powered:
178 logging.warning(("Recorded power state (%s) of node '%s' does not"
179 " match actual power state (%s)"), node.powered,
180 node.name, powered)
181
182 # For configuration changing commands we should update the node
183 if self.op.command in (constants.OOB_POWER_ON,
184 constants.OOB_POWER_OFF):
185 self.cfg.Update(node, feedback_fn)
186
187 node_entry.append((constants.RS_NORMAL, result.payload))
188
189 if (self.op.command == constants.OOB_POWER_ON and
190 idx < len(self.nodes) - 1):
191 time.sleep(self.op.power_delay)
192
193 return ret
194
195 def _CheckPayload(self, result):
196 """Checks if the payload is valid.
197
198 @param result: RPC result
199 @raises errors.OpExecError: If payload is not valid
200
201 """
202 errs = []
203 if self.op.command == constants.OOB_HEALTH:
204 if not isinstance(result.payload, list):
205 errs.append("command 'health' is expected to return a list but got %s" %
206 type(result.payload))
207 else:
208 for item, status in result.payload:
209 if status not in constants.OOB_STATUSES:
210 errs.append("health item '%s' has invalid status '%s'" %
211 (item, status))
212
213 if self.op.command == constants.OOB_POWER_STATUS:
214 if not isinstance(result.payload, dict):
215 errs.append("power-status is expected to return a dict but got %s" %
216 type(result.payload))
217
218 if self.op.command in [
219 constants.OOB_POWER_ON,
220 constants.OOB_POWER_OFF,
221 constants.OOB_POWER_CYCLE,
222 ]:
223 if result.payload is not None:
224 errs.append("%s is expected to not return payload but got '%s'" %
225 (self.op.command, result.payload))
226
227 if errs:
228 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
229 utils.CommaJoin(errs))
230
231
232 class ExtStorageQuery(QueryBase):
233 FIELDS = query.EXTSTORAGE_FIELDS
234
235 def ExpandNames(self, lu):
236 # Lock all nodes in shared mode
237 # Temporary removal of locks, should be reverted later
238 # TODO: reintroduce locks when they are lighter-weight
239 lu.needed_locks = {}
240 #self.share_locks[locking.LEVEL_NODE] = 1
241 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
242
243 # The following variables interact with _QueryBase._GetNames
244 if self.names:
245 self.wanted = [lu.cfg.GetNodeInfoByName(name).uuid for name in self.names]
246 else:
247 self.wanted = locking.ALL_SET
248
249 self.do_locking = self.use_locking
250
251 def DeclareLocks(self, lu, level):
252 pass
253
254 @staticmethod
255 def _DiagnoseByProvider(rlist):
256 """Remaps a per-node return list into an a per-provider per-node dictionary
257
258 @param rlist: a map with node uuids as keys and ExtStorage objects as values
259
260 @rtype: dict
261 @return: a dictionary with extstorage providers as keys and as
262 value another map, with node uuids as keys and tuples of
263 (path, status, diagnose, parameters) as values, eg::
264
265 {"provider1": {"node_uuid1": [(/usr/lib/..., True, "", [])]
266 "node_uuid2": [(/srv/..., False, "missing file")]
267 "node_uuid3": [(/srv/..., True, "", [])]
268 }
269
270 """
271 all_es = {}
272 # we build here the list of nodes that didn't fail the RPC (at RPC
273 # level), so that nodes with a non-responding node daemon don't
274 # make all OSes invalid
275 good_nodes = [node_uuid for node_uuid in rlist
276 if not rlist[node_uuid].fail_msg]
277 for node_uuid, nr in rlist.items():
278 if nr.fail_msg or not nr.payload:
279 continue
280 for (name, path, status, diagnose, params) in nr.payload:
281 if name not in all_es:
282 # build a list of nodes for this os containing empty lists
283 # for each node in node_list
284 all_es[name] = {}
285 for nuuid in good_nodes:
286 all_es[name][nuuid] = []
287 # convert params from [name, help] to (name, help)
288 params = [tuple(v) for v in params]
289 all_es[name][node_uuid].append((path, status, diagnose, params))
290 return all_es
291
292 def _GetQueryData(self, lu):
293 """Computes the list of nodes and their attributes.
294
295 """
296 valid_nodes = [node.uuid
297 for node in lu.cfg.GetAllNodesInfo().values()
298 if not node.offline and node.vm_capable]
299 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
300
301 data = {}
302
303 nodegroup_list = lu.cfg.GetNodeGroupList()
304
305 for (es_name, es_data) in pol.items():
306 # For every provider compute the nodegroup validity.
307 # To do this we need to check the validity of each node in es_data
308 # and then construct the corresponding nodegroup dict:
309 # { nodegroup1: status
310 # nodegroup2: status
311 # }
312 ndgrp_data = {}
313 for nodegroup in nodegroup_list:
314 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
315
316 nodegroup_nodes = ndgrp.members
317 nodegroup_name = ndgrp.name
318 node_statuses = []
319
320 for node in nodegroup_nodes:
321 if node in valid_nodes:
322 if es_data[node] != []:
323 node_status = es_data[node][0][1]
324 node_statuses.append(node_status)
325 else:
326 node_statuses.append(False)
327
328 if False in node_statuses:
329 ndgrp_data[nodegroup_name] = False
330 else:
331 ndgrp_data[nodegroup_name] = True
332
333 # Compute the provider's parameters
334 parameters = set()
335 for idx, esl in enumerate(es_data.values()):
336 valid = bool(esl and esl[0][1])
337 if not valid:
338 break
339
340 node_params = esl[0][3]
341 if idx == 0:
342 # First entry
343 parameters.update(node_params)
344 else:
345 # Filter out inconsistent values
346 parameters.intersection_update(node_params)
347
348 params = list(parameters)
349
350 # Now fill all the info for this provider
351 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
352 nodegroup_status=ndgrp_data,
353 parameters=params)
354
355 data[es_name] = info
356
357 # Prepare data in requested order
358 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
359 if name in data]
360
361
362 class LUExtStorageDiagnose(NoHooksLU):
363 """Logical unit for ExtStorage diagnose/query.
364
365 """
366 REQ_BGL = False
367
368 def CheckArguments(self):
369 self.eq = ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
370 self.op.output_fields, False)
371
372 def ExpandNames(self):
373 self.eq.ExpandNames(self)
374
375 def Exec(self, feedback_fn):
376 return self.eq.OldStyleQuery(self)
377
378
379 class LURestrictedCommand(NoHooksLU):
380 """Logical unit for executing restricted commands.
381
382 """
383 REQ_BGL = False
384
385 def ExpandNames(self):
386 if self.op.nodes:
387 (self.op.node_uuids, self.op.nodes) = GetWantedNodes(self, self.op.nodes)
388
389 self.needed_locks = {
390 locking.LEVEL_NODE: self.op.node_uuids,
391 }
392 self.share_locks = {
393 locking.LEVEL_NODE: not self.op.use_locking,
394 }
395
396 def CheckPrereq(self):
397 """Check prerequisites.
398
399 """
400
401 def Exec(self, feedback_fn):
402 """Execute restricted command and return output.
403
404 """
405 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
406
407 # Check if correct locks are held
408 assert set(self.op.node_uuids).issubset(owned_nodes)
409
410 rpcres = self.rpc.call_restricted_command(self.op.node_uuids,
411 self.op.command)
412
413 result = []
414
415 for node_uuid in self.op.node_uuids:
416 nres = rpcres[node_uuid]
417 if nres.fail_msg:
418 msg = ("Command '%s' on node '%s' failed: %s" %
419 (self.op.command, self.cfg.GetNodeName(node_uuid),
420 nres.fail_msg))
421 result.append((False, msg))
422 else:
423 result.append((True, nres.payload))
424
425 return result
426
427
428 class LURepairCommand(NoHooksLU):
429 """Logical unit for executing repair commands.
430
431 """
432 REQ_BGL = False
433
434 def ExpandNames(self):
435 self.node_uuid, _ = ExpandNodeUuidAndName(self.cfg, None, self.op.node_name)
436
437 self.needed_locks = {
438 locking.LEVEL_NODE: self.node_uuid,
439 }
440 self.share_locks = {
441 locking.LEVEL_NODE: False,
442 }
443
444 def CheckPrereq(self):
445 """Check prerequisites.
446
447 """
448
449 def Exec(self, feedback_fn):
450 """Execute restricted command and return output.
451
452 """
453 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
454 assert self.node_uuid in owned_nodes
455 return self.rpc.call_repair_command(self.op.node_name,
456 self.op.command,
457 self.op.input).data[1]