Merge branch 'stable-2.12' into stable-2.13
[ganeti-github.git] / qa / ganeti-qa.py
1 #!/usr/bin/python -u
2 #
3
4 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Script for doing QA on Ganeti.
32
33 """
34
35 # pylint: disable=C0103
36 # due to invalid name
37
38 import copy
39 import datetime
40 import optparse
41 import sys
42
43 import colors
44 import qa_cluster
45 import qa_config
46 import qa_daemon
47 import qa_env
48 import qa_error
49 import qa_filters
50 import qa_group
51 import qa_instance
52 import qa_iptables
53 import qa_monitoring
54 import qa_network
55 import qa_node
56 import qa_os
57 import qa_performance
58 import qa_job
59 import qa_rapi
60 import qa_tags
61 import qa_utils
62
63 from ganeti import utils
64 from ganeti import rapi # pylint: disable=W0611
65 from ganeti import constants
66 from ganeti import netutils
67
68 import ganeti.rapi.client # pylint: disable=W0611
69 from ganeti.rapi.client import UsesRapiClient
70
71
72 def _FormatHeader(line, end=72, mark="-", color=None):
73 """Fill a line up to the end column.
74
75 """
76 line = (mark * 4) + " " + line + " "
77 line += "-" * (end - len(line))
78 line = line.rstrip()
79 line = colors.colorize(line, color=color)
80 return line
81
82
83 def _DescriptionOf(fn):
84 """Computes the description of an item.
85
86 """
87 if fn.__doc__:
88 desc = fn.__doc__.splitlines()[0].strip()
89 desc = desc.rstrip(".")
90 if fn.__name__:
91 desc = "[" + fn.__name__ + "] " + desc
92 else:
93 desc = "%r" % fn
94
95 return desc
96
97
98 def RunTest(fn, *args, **kwargs):
99 """Runs a test after printing a header.
100
101 """
102
103 tstart = datetime.datetime.now()
104
105 desc = _DescriptionOf(fn)
106
107 print
108 print _FormatHeader("%s start %s" % (tstart, desc),
109 color=colors.YELLOW, mark="<")
110
111 try:
112 retval = fn(*args, **kwargs)
113 print _FormatHeader("PASSED %s" % (desc, ), color=colors.GREEN)
114 return retval
115 except Exception, e:
116 print _FormatHeader("FAILED %s: %s" % (desc, e), color=colors.RED)
117 raise
118 finally:
119 tstop = datetime.datetime.now()
120 tdelta = tstop - tstart
121 print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc),
122 color=colors.MAGENTA, mark=">")
123
124
125 def ReportTestSkip(desc, testnames):
126 """Reports that tests have been skipped.
127
128 @type desc: string
129 @param desc: string
130 @type testnames: string or list of string
131 @param testnames: either a single test name in the configuration
132 file, or a list of testnames (which will be AND-ed together)
133
134 """
135 tstart = datetime.datetime.now()
136 # TODO: Formatting test names when non-string names are involved
137 print _FormatHeader("%s skipping %s, test(s) %s disabled" %
138 (tstart, desc, testnames),
139 color=colors.BLUE, mark="*")
140
141
142 def RunTestIf(testnames, fn, *args, **kwargs):
143 """Runs a test conditionally.
144
145 @param testnames: either a single test name in the configuration
146 file, or a list of testnames (which will be AND-ed together)
147
148 """
149 if qa_config.TestEnabled(testnames):
150 RunTest(fn, *args, **kwargs)
151 else:
152 desc = _DescriptionOf(fn)
153 ReportTestSkip(desc, testnames)
154
155
156 def RunTestBlock(fn, *args, **kwargs):
157 """Runs a block of tests after printing a header.
158
159 """
160 tstart = datetime.datetime.now()
161
162 desc = _DescriptionOf(fn)
163
164 print
165 print _FormatHeader("BLOCK %s start %s" % (tstart, desc),
166 color=[colors.YELLOW, colors.BOLD], mark="v")
167
168 try:
169 return fn(*args, **kwargs)
170 except Exception, e:
171 print _FormatHeader("BLOCK FAILED %s: %s" % (desc, e),
172 color=[colors.RED, colors.BOLD])
173 raise
174 finally:
175 tstop = datetime.datetime.now()
176 tdelta = tstop - tstart
177 print _FormatHeader("BLOCK %s time=%s %s" % (tstop, tdelta, desc),
178 color=[colors.MAGENTA, colors.BOLD], mark="^")
179
180
181 def RunEnvTests():
182 """Run several environment tests.
183
184 """
185 RunTestIf("env", qa_env.TestSshConnection)
186 RunTestIf("env", qa_env.TestIcmpPing)
187 RunTestIf("env", qa_env.TestGanetiCommands)
188
189
190 def SetupCluster():
191 """Initializes the cluster.
192
193 """
194
195 RunTestIf("create-cluster", qa_cluster.TestClusterInit)
196 if not qa_config.TestEnabled("create-cluster"):
197 # If the cluster is already in place, we assume that exclusive-storage is
198 # already set according to the configuration
199 qa_config.SetExclusiveStorage(qa_config.get("exclusive-storage", False))
200
201 qa_rapi.SetupRapi()
202
203 qa_group.ConfigureGroups()
204
205 # Test on empty cluster
206 RunTestIf("node-list", qa_node.TestNodeList)
207 RunTestIf("instance-list", qa_instance.TestInstanceList)
208 RunTestIf("job-list", qa_job.TestJobList)
209
210 RunTestIf("create-cluster", qa_node.TestNodeAddAll)
211 if not qa_config.TestEnabled("create-cluster"):
212 # consider the nodes are already there
213 qa_node.MarkNodeAddedAll()
214
215 RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
216 RunTestIf("test-jobqueue", qa_job.TestJobCancellation)
217
218 # enable the watcher (unconditionally)
219 RunTest(qa_daemon.TestResumeWatcher)
220
221 RunTestIf("node-list", qa_node.TestNodeList)
222
223 # Test listing fields
224 RunTestIf("node-list", qa_node.TestNodeListFields)
225 RunTestIf("instance-list", qa_instance.TestInstanceListFields)
226 RunTestIf("job-list", qa_job.TestJobListFields)
227 RunTestIf("instance-export", qa_instance.TestBackupListFields)
228
229 RunTestIf("node-info", qa_node.TestNodeInfo)
230
231
232 def RunClusterTests():
233 """Runs tests related to gnt-cluster.
234
235 """
236 for test, fn in [
237 ("create-cluster", qa_cluster.TestClusterInitDisk),
238 ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto)
239 ]:
240 RunTestIf(test, fn)
241
242 for test, fn in [
243 ("cluster-verify", qa_cluster.TestClusterVerify),
244 ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
245 # TODO: add more cluster modify tests
246 ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
247 ("cluster-modify", qa_cluster.TestClusterModifyIPolicy),
248 ("cluster-modify", qa_cluster.TestClusterModifyISpecs),
249 ("cluster-modify", qa_cluster.TestClusterModifyBe),
250 ("cluster-modify", qa_cluster.TestClusterModifyDisk),
251 ("cluster-modify", qa_cluster.TestClusterModifyDiskTemplates),
252 ("cluster-modify", qa_cluster.TestClusterModifyFileStorageDir),
253 ("cluster-modify", qa_cluster.TestClusterModifySharedFileStorageDir),
254 ("cluster-modify", qa_cluster.TestClusterModifyInstallImage),
255 ("cluster-modify", qa_cluster.TestClusterModifyUserShutdown),
256 ("cluster-rename", qa_cluster.TestClusterRename),
257 ("cluster-info", qa_cluster.TestClusterVersion),
258 ("cluster-info", qa_cluster.TestClusterInfo),
259 ("cluster-info", qa_cluster.TestClusterGetmaster),
260 ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
261 (["cluster-copyfile", qa_config.NoVirtualCluster],
262 qa_cluster.TestClusterCopyfile),
263 ("cluster-command", qa_cluster.TestClusterCommand),
264 ("cluster-burnin", qa_cluster.TestClusterBurnin),
265 ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
266 ("cluster-master-failover",
267 qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
268 (["cluster-oob", qa_config.NoVirtualCluster],
269 qa_cluster.TestClusterOob),
270 ("cluster-instance-communication", qa_cluster.TestInstanceCommunication),
271 (qa_rapi.Enabled, qa_rapi.TestVersion),
272 (qa_rapi.Enabled, qa_rapi.TestEmptyCluster),
273 (qa_rapi.Enabled, qa_rapi.TestRapiQuery),
274 ]:
275 RunTestIf(test, fn)
276
277
278 def RunRepairDiskSizes():
279 """Run the repair disk-sizes test.
280
281 """
282 RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
283
284
285 def RunOsTests():
286 """Runs all tests related to gnt-os.
287
288 """
289 os_enabled = ["os", qa_config.NoVirtualCluster]
290
291 if qa_config.TestEnabled(qa_rapi.Enabled):
292 rapi_getos = qa_rapi.GetOperatingSystems
293 else:
294 rapi_getos = None
295
296 for fn in [
297 qa_os.TestOsList,
298 qa_os.TestOsDiagnose,
299 ]:
300 RunTestIf(os_enabled, fn)
301
302 for fn in [
303 qa_os.TestOsValid,
304 qa_os.TestOsInvalid,
305 qa_os.TestOsPartiallyValid,
306 ]:
307 RunTestIf(os_enabled, fn, rapi_getos)
308
309 for fn in [
310 qa_os.TestOsModifyValid,
311 qa_os.TestOsModifyInvalid,
312 qa_os.TestOsStatesNonExisting,
313 ]:
314 RunTestIf(os_enabled, fn)
315
316
317 def RunCommonInstanceTests(instance, inst_nodes):
318 """Runs a few tests that are common to all disk types.
319
320 """
321 RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
322 RunTestIf(["instance-shutdown", "instance-console", qa_rapi.Enabled],
323 qa_rapi.TestRapiStoppedInstanceConsole, instance)
324 RunTestIf(["instance-shutdown", "instance-modify"],
325 qa_instance.TestInstanceStoppedModify, instance)
326 RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
327
328 # Test shutdown/start via RAPI
329 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
330 qa_rapi.TestRapiInstanceShutdown, instance)
331 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
332 qa_rapi.TestRapiInstanceStartup, instance)
333
334 RunTestIf("instance-list", qa_instance.TestInstanceList)
335
336 RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
337
338 RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
339 RunTestIf(["instance-modify", qa_rapi.Enabled],
340 qa_rapi.TestRapiInstanceModify, instance)
341
342 RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
343 RunTestIf(["instance-console", qa_rapi.Enabled],
344 qa_rapi.TestRapiInstanceConsole, instance)
345
346 RunTestIf("instance-device-names", qa_instance.TestInstanceDeviceNames,
347 instance)
348 DOWN_TESTS = qa_config.Either([
349 "instance-reinstall",
350 "instance-rename",
351 "instance-grow-disk",
352 ])
353
354 # shutdown instance for any 'down' tests
355 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
356
357 # now run the 'down' state tests
358 RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
359 RunTestIf(["instance-reinstall", qa_rapi.Enabled],
360 qa_rapi.TestRapiInstanceReinstall, instance)
361
362 if qa_config.TestEnabled("instance-rename"):
363 tgt_instance = qa_config.AcquireInstance()
364 try:
365 rename_source = instance.name
366 rename_target = tgt_instance.name
367 # perform instance rename to the same name
368 RunTest(qa_instance.TestInstanceRenameAndBack,
369 rename_source, rename_source)
370 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
371 rename_source, rename_source)
372 if rename_target is not None:
373 # perform instance rename to a different name, if we have one configured
374 RunTest(qa_instance.TestInstanceRenameAndBack,
375 rename_source, rename_target)
376 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
377 rename_source, rename_target)
378 finally:
379 tgt_instance.Release()
380
381 RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
382
383 # and now start the instance again
384 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
385
386 RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
387
388 RunTestIf("tags", qa_tags.TestInstanceTags, instance)
389
390 if instance.disk_template == constants.DT_DRBD8:
391 RunTestIf("cluster-verify",
392 qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes)
393 RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
394
395 RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance)
396
397 # Lists instances, too
398 RunTestIf("node-list", qa_node.TestNodeList)
399
400 # Some jobs have been run, let's test listing them
401 RunTestIf("job-list", qa_job.TestJobList)
402
403
404 def RunCommonNodeTests():
405 """Run a few common node tests.
406
407 """
408 RunTestIf("node-volumes", qa_node.TestNodeVolumes)
409 RunTestIf("node-storage", qa_node.TestNodeStorage)
410 RunTestIf(["node-oob", qa_config.NoVirtualCluster], qa_node.TestOutOfBand)
411
412
413 def RunGroupListTests():
414 """Run tests for listing node groups.
415
416 """
417 RunTestIf("group-list", qa_group.TestGroupList)
418 RunTestIf("group-list", qa_group.TestGroupListFields)
419
420
421 def RunNetworkTests():
422 """Run tests for network management.
423
424 """
425 RunTestIf("network", qa_network.TestNetworkAddRemove)
426 RunTestIf("network", qa_network.TestNetworkConnect)
427 RunTestIf(["network", "tags"], qa_network.TestNetworkTags)
428
429
430 def RunFilterTests():
431 """Run tests for job filter management.
432
433 """
434 RunTestIf("filters", qa_filters.TestFilterList)
435 RunTestIf("filters", qa_filters.TestFilterListFields)
436 RunTestIf("filters", qa_filters.TestFilterAddRemove)
437 RunTestIf("filters", qa_filters.TestFilterReject)
438 RunTestIf("filters", qa_filters.TestFilterOpCode)
439 RunTestIf("filters", qa_filters.TestFilterReasonChain)
440 RunTestIf("filters", qa_filters.TestFilterContinue)
441 RunTestIf("filters", qa_filters.TestFilterAcceptPause)
442 RunTestIf("filters", qa_filters.TestFilterWatermark)
443 RunTestIf("filters", qa_filters.TestFilterRateLimit)
444 RunTestIf("filters", qa_filters.TestAdHocReasonRateLimit)
445
446
447 def RunGroupRwTests():
448 """Run tests for adding/removing/renaming groups.
449
450 """
451 RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
452 RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
453 RunTestIf("group-rwops", qa_group.TestGroupModify)
454 RunTestIf(["group-rwops", qa_rapi.Enabled], qa_rapi.TestRapiNodeGroups)
455 RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
456 qa_group.GetDefaultGroup())
457
458
459 def RunExportImportTests(instance, inodes):
460 """Tries to export and import the instance.
461
462 @type inodes: list of nodes
463 @param inodes: current nodes of the instance
464
465 """
466 # FIXME: export explicitly bails out on file based storage. other non-lvm
467 # based storage types are untested, though. Also note that import could still
468 # work, but is deeply embedded into the "export" case.
469 if (qa_config.TestEnabled("instance-export") and
470 instance.disk_template not in constants.DTS_FILEBASED):
471 RunTest(qa_instance.TestInstanceExportNoTarget, instance)
472
473 pnode = inodes[0]
474 expnode = qa_config.AcquireNode(exclude=pnode)
475 try:
476 name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
477
478 RunTest(qa_instance.TestBackupList, expnode)
479
480 if qa_config.TestEnabled("instance-import"):
481 newinst = qa_config.AcquireInstance()
482 try:
483 RunTest(qa_instance.TestInstanceImport, newinst, pnode,
484 expnode, name)
485 # Check if starting the instance works
486 RunTest(qa_instance.TestInstanceStartup, newinst)
487 RunTest(qa_instance.TestInstanceRemove, newinst)
488 finally:
489 newinst.Release()
490 finally:
491 expnode.Release()
492
493 # FIXME: inter-cluster-instance-move crashes on file based instances :/
494 # See Issue 414.
495 if (qa_config.TestEnabled([qa_rapi.Enabled, "inter-cluster-instance-move"])
496 and (instance.disk_template not in constants.DTS_FILEBASED)):
497 newinst = qa_config.AcquireInstance()
498 try:
499 tnode = qa_config.AcquireNode(exclude=inodes)
500 try:
501 RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
502 inodes, tnode)
503 finally:
504 tnode.Release()
505 finally:
506 newinst.Release()
507
508
509 def RunDaemonTests(instance):
510 """Test the ganeti-watcher script.
511
512 """
513 RunTest(qa_daemon.TestPauseWatcher)
514
515 RunTestIf("instance-automatic-restart",
516 qa_daemon.TestInstanceAutomaticRestart, instance)
517 RunTestIf("instance-consecutive-failures",
518 qa_daemon.TestInstanceConsecutiveFailures, instance)
519
520 RunTest(qa_daemon.TestResumeWatcher)
521
522
523 def RunHardwareFailureTests(instance, inodes):
524 """Test cluster internal hardware failure recovery.
525
526 """
527 RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
528 RunTestIf(["instance-failover", qa_rapi.Enabled],
529 qa_rapi.TestRapiInstanceFailover, instance)
530
531 RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
532 RunTestIf(["instance-migrate", qa_rapi.Enabled],
533 qa_rapi.TestRapiInstanceMigrate, instance)
534
535 if qa_config.TestEnabled("instance-replace-disks"):
536 # We just need alternative secondary nodes, hence "- 1"
537 othernodes = qa_config.AcquireManyNodes(len(inodes) - 1, exclude=inodes)
538 try:
539 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceReplaceDisks, instance)
540 RunTest(qa_instance.TestReplaceDisks,
541 instance, inodes, othernodes)
542 finally:
543 qa_config.ReleaseManyNodes(othernodes)
544 del othernodes
545
546 if qa_config.TestEnabled("instance-recreate-disks"):
547 try:
548 acquirednodes = qa_config.AcquireManyNodes(len(inodes), exclude=inodes)
549 othernodes = acquirednodes
550 except qa_error.OutOfNodesError:
551 if len(inodes) > 1:
552 # If the cluster is not big enough, let's reuse some of the nodes, but
553 # with different roles. In this way, we can test a DRBD instance even on
554 # a 3-node cluster.
555 acquirednodes = [qa_config.AcquireNode(exclude=inodes)]
556 othernodes = acquirednodes + inodes[:-1]
557 else:
558 raise
559 try:
560 RunTest(qa_instance.TestRecreateDisks,
561 instance, inodes, othernodes)
562 finally:
563 qa_config.ReleaseManyNodes(acquirednodes)
564
565 if len(inodes) >= 2:
566 RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, inodes[0], inodes[1])
567 RunTestIf("node-failover", qa_node.TestNodeFailover, inodes[0], inodes[1])
568 RunTestIf("node-migrate", qa_node.TestNodeMigrate, inodes[0], inodes[1])
569
570
571 def RunExclusiveStorageTests():
572 """Test exclusive storage."""
573 if not qa_config.TestEnabled("cluster-exclusive-storage"):
574 return
575
576 node = qa_config.AcquireNode()
577 try:
578 old_es = qa_cluster.TestSetExclStorCluster(False)
579 qa_node.TestExclStorSingleNode(node)
580
581 qa_cluster.TestSetExclStorCluster(True)
582 qa_cluster.TestExclStorSharedPv(node)
583
584 if qa_config.TestEnabled("instance-add-plain-disk"):
585 # Make sure that the cluster doesn't have any pre-existing problem
586 qa_cluster.AssertClusterVerify()
587
588 # Create and allocate instances
589 instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
590 try:
591 instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
592 try:
593 # cluster-verify checks that disks are allocated correctly
594 qa_cluster.AssertClusterVerify()
595
596 # Remove instances
597 qa_instance.TestInstanceRemove(instance2)
598 qa_instance.TestInstanceRemove(instance1)
599 finally:
600 instance2.Release()
601 finally:
602 instance1.Release()
603
604 if qa_config.TestEnabled("instance-add-drbd-disk"):
605 snode = qa_config.AcquireNode()
606 try:
607 qa_cluster.TestSetExclStorCluster(False)
608 instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
609 try:
610 qa_cluster.TestSetExclStorCluster(True)
611 exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
612 qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
613 qa_instance.TestInstanceRemove(instance)
614 finally:
615 instance.Release()
616 finally:
617 snode.Release()
618 qa_cluster.TestSetExclStorCluster(old_es)
619 finally:
620 node.Release()
621
622
623 def RunCustomSshPortTests():
624 """Test accessing nodes with custom SSH ports.
625
626 This requires removing nodes, adding them to a new group, and then undoing
627 the change.
628 """
629 if not qa_config.TestEnabled("group-custom-ssh-port"):
630 return
631
632 std_port = netutils.GetDaemonPort(constants.SSH)
633 port = 211
634 master = qa_config.GetMasterNode()
635 with qa_config.AcquireManyNodesCtx(1, exclude=master) as nodes:
636 # Checks if the node(s) could be contacted through IPv6.
637 # If yes, better skip the whole test.
638
639 for node in nodes:
640 if qa_utils.UsesIPv6Connection(node.primary, std_port):
641 print ("Node %s is likely to be reached using IPv6,"
642 "skipping the test" % (node.primary, ))
643 return
644
645 for node in nodes:
646 qa_node.NodeRemove(node)
647 with qa_iptables.RulesContext() as r:
648 with qa_group.NewGroupCtx() as group:
649 qa_group.ModifyGroupSshPort(r, group, nodes, port)
650
651 for node in nodes:
652 qa_node.NodeAdd(node, group=group)
653
654 # Make sure that the cluster doesn't have any pre-existing problem
655 qa_cluster.AssertClusterVerify()
656
657 # Create and allocate instances
658 instance1 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
659 try:
660 instance2 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
661 try:
662 # cluster-verify checks that disks are allocated correctly
663 qa_cluster.AssertClusterVerify()
664
665 # Remove instances
666 qa_instance.TestInstanceRemove(instance2)
667 qa_instance.TestInstanceRemove(instance1)
668 finally:
669 instance2.Release()
670 finally:
671 instance1.Release()
672
673 for node in nodes:
674 qa_node.NodeRemove(node)
675
676 for node in nodes:
677 qa_node.NodeAdd(node)
678
679 qa_cluster.AssertClusterVerify()
680
681
682 def _BuildSpecDict(par, mn, st, mx):
683 return {
684 constants.ISPECS_MINMAX: [{
685 constants.ISPECS_MIN: {par: mn},
686 constants.ISPECS_MAX: {par: mx},
687 }],
688 constants.ISPECS_STD: {par: st},
689 }
690
691
692 def _BuildDoubleSpecDict(index, par, mn, st, mx):
693 new_spec = {
694 constants.ISPECS_MINMAX: [{}, {}],
695 }
696 if st is not None:
697 new_spec[constants.ISPECS_STD] = {par: st}
698 new_spec[constants.ISPECS_MINMAX][index] = {
699 constants.ISPECS_MIN: {par: mn},
700 constants.ISPECS_MAX: {par: mx},
701 }
702 return new_spec
703
704
705 def TestIPolicyPlainInstance():
706 """Test instance policy interaction with instances"""
707 params = ["memory-size", "cpu-count", "disk-count", "disk-size", "nic-count"]
708 if not qa_config.IsTemplateSupported(constants.DT_PLAIN):
709 print "Template %s not supported" % constants.DT_PLAIN
710 return
711
712 # This test assumes that the group policy is empty
713 (_, old_specs) = qa_cluster.TestClusterSetISpecs()
714 # We also assume to have only one min/max bound
715 assert len(old_specs[constants.ISPECS_MINMAX]) == 1
716 node = qa_config.AcquireNode()
717 try:
718 # Log of policy changes, list of tuples:
719 # (full_change, incremental_change, policy_violated)
720 history = []
721 instance = qa_instance.TestInstanceAddWithPlainDisk([node])
722 try:
723 policyerror = [constants.CV_EINSTANCEPOLICY]
724 for par in params:
725 (iminval, imaxval) = qa_instance.GetInstanceSpec(instance.name, par)
726 # Some specs must be multiple of 4
727 new_spec = _BuildSpecDict(par, imaxval + 4, imaxval + 4, imaxval + 4)
728 history.append((None, new_spec, True))
729 if iminval > 0:
730 # Some specs must be multiple of 4
731 if iminval >= 4:
732 upper = iminval - 4
733 else:
734 upper = iminval - 1
735 new_spec = _BuildSpecDict(par, 0, upper, upper)
736 history.append((None, new_spec, True))
737 history.append((old_specs, None, False))
738
739 # Test with two instance specs
740 double_specs = copy.deepcopy(old_specs)
741 double_specs[constants.ISPECS_MINMAX] = \
742 double_specs[constants.ISPECS_MINMAX] * 2
743 (par1, par2) = params[0:2]
744 (_, imaxval1) = qa_instance.GetInstanceSpec(instance.name, par1)
745 (_, imaxval2) = qa_instance.GetInstanceSpec(instance.name, par2)
746 old_minmax = old_specs[constants.ISPECS_MINMAX][0]
747 history.extend([
748 (double_specs, None, False),
749 # The first min/max limit is being violated
750 (None,
751 _BuildDoubleSpecDict(0, par1, imaxval1 + 4, imaxval1 + 4,
752 imaxval1 + 4),
753 False),
754 # Both min/max limits are being violated
755 (None,
756 _BuildDoubleSpecDict(1, par2, imaxval2 + 4, None, imaxval2 + 4),
757 True),
758 # The second min/max limit is being violated
759 (None,
760 _BuildDoubleSpecDict(0, par1,
761 old_minmax[constants.ISPECS_MIN][par1],
762 old_specs[constants.ISPECS_STD][par1],
763 old_minmax[constants.ISPECS_MAX][par1]),
764 False),
765 (old_specs, None, False),
766 ])
767
768 # Apply the changes, and check policy violations after each change
769 qa_cluster.AssertClusterVerify()
770 for (new_specs, diff_specs, failed) in history:
771 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
772 diff_specs=diff_specs)
773 if failed:
774 qa_cluster.AssertClusterVerify(warnings=policyerror)
775 else:
776 qa_cluster.AssertClusterVerify()
777
778 qa_instance.TestInstanceRemove(instance)
779 finally:
780 instance.Release()
781
782 # Now we replay the same policy changes, and we expect that the instance
783 # cannot be created for the cases where we had a policy violation above
784 for (new_specs, diff_specs, failed) in history:
785 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
786 diff_specs=diff_specs)
787 if failed:
788 qa_instance.TestInstanceAddWithPlainDisk([node], fail=True)
789 # Instance creation with no policy violation has been tested already
790 finally:
791 node.Release()
792
793
794 def IsExclusiveStorageInstanceTestEnabled():
795 test_name = "exclusive-storage-instance-tests"
796 if qa_config.TestEnabled(test_name):
797 vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
798 vgscmd = utils.ShellQuoteArgs([
799 "vgs", "--noheadings", "-o", "pv_count", vgname,
800 ])
801 nodes = qa_config.GetConfig()["nodes"]
802 for node in nodes:
803 try:
804 pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd))
805 except Exception, e:
806 msg = ("Cannot get the number of PVs on %s, needed by '%s': %s" %
807 (node.primary, test_name, e))
808 raise qa_error.Error(msg)
809 if pvnum < 2:
810 raise qa_error.Error("Node %s has not enough PVs (%s) to run '%s'" %
811 (node.primary, pvnum, test_name))
812 res = True
813 else:
814 res = False
815 return res
816
817
818 def RunInstanceTests():
819 """Create and exercise instances."""
820
821 requested_conversions = qa_config.get("convert-disk-templates", [])
822 supported_conversions = \
823 set(requested_conversions).difference(constants.DTS_NOT_CONVERTIBLE_TO)
824 for (test_name, templ, create_fun, num_nodes) in \
825 qa_instance.available_instance_tests:
826 if (qa_config.TestEnabled(test_name) and
827 qa_config.IsTemplateSupported(templ)):
828 inodes = qa_config.AcquireManyNodes(num_nodes)
829 try:
830 instance = RunTest(create_fun, inodes)
831 try:
832 RunTestIf("instance-user-down", qa_instance.TestInstanceUserDown,
833 instance)
834 RunTestIf("instance-communication",
835 qa_instance.TestInstanceCommunication,
836 instance,
837 qa_config.GetMasterNode())
838 RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
839 RunDaemonTests(instance)
840 for node in inodes:
841 RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node,
842 templ == constants.DT_DRBD8)
843 if len(inodes) > 1:
844 RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
845 constants.INITIAL_NODE_GROUP_NAME,
846 inodes[0].primary, inodes[1].primary)
847 # This test will run once but it will cover all the supported
848 # user-provided disk template conversions
849 if qa_config.TestEnabled("instance-convert-disk"):
850 if (len(supported_conversions) > 1 and
851 instance.disk_template in supported_conversions):
852 RunTest(qa_instance.TestInstanceShutdown, instance)
853 RunTest(qa_instance.TestInstanceConvertDiskTemplate, instance,
854 supported_conversions)
855 RunTest(qa_instance.TestInstanceStartup, instance)
856 # At this point we clear the set because the requested conversions
857 # has been tested
858 supported_conversions.clear()
859 else:
860 test_desc = "Converting instance of template %s" % templ
861 ReportTestSkip(test_desc, "conversion feature")
862 RunTestIf("instance-modify-disks",
863 qa_instance.TestInstanceModifyDisks, instance)
864 RunCommonInstanceTests(instance, inodes)
865 if qa_config.TestEnabled("instance-modify-primary"):
866 othernode = qa_config.AcquireNode()
867 RunTest(qa_instance.TestInstanceModifyPrimaryAndBack,
868 instance, inodes[0], othernode)
869 othernode.Release()
870 RunGroupListTests()
871 RunExportImportTests(instance, inodes)
872 RunHardwareFailureTests(instance, inodes)
873 RunRepairDiskSizes()
874 RunTestIf(["rapi", "instance-data-censorship"],
875 qa_rapi.TestInstanceDataCensorship, instance, inodes)
876 RunTest(qa_instance.TestInstanceRemove, instance)
877 finally:
878 instance.Release()
879 del instance
880 finally:
881 qa_config.ReleaseManyNodes(inodes)
882 qa_cluster.AssertClusterVerify()
883 else:
884 test_desc = "Creating instances of template %s" % templ
885 if not qa_config.TestEnabled(test_name):
886 ReportTestSkip(test_desc, test_name)
887 else:
888 ReportTestSkip(test_desc, "disk template %s" % templ)
889
890
891 def RunMonitoringTests():
892 RunTestIf("mon-collector", qa_monitoring.TestInstStatusCollector)
893
894
895 PARALLEL_TEST_DICT = {
896 "parallel-failover": qa_performance.TestParallelInstanceFailover,
897 "parallel-migration": qa_performance.TestParallelInstanceMigration,
898 "parallel-replace-disks": qa_performance.TestParallelInstanceReplaceDisks,
899 "parallel-reboot": qa_performance.TestParallelInstanceReboot,
900 "parallel-reinstall": qa_performance.TestParallelInstanceReinstall,
901 "parallel-rename": qa_performance.TestParallelInstanceRename,
902 }
903
904
905 def RunPerformanceTests():
906 if not qa_config.TestEnabled("performance"):
907 ReportTestSkip("performance related tests", "performance")
908 return
909
910 if qa_config.TestEnabled("jobqueue-performance"):
911 RunTest(qa_performance.TestParallelMaxInstanceCreationPerformance)
912 RunTest(qa_performance.TestParallelNodeCountInstanceCreationPerformance)
913
914 instances = qa_performance.CreateAllInstances()
915
916 RunTest(qa_performance.TestParallelModify, instances)
917 RunTest(qa_performance.TestParallelInstanceOSOperations, instances)
918 RunTest(qa_performance.TestParallelInstanceQueries, instances)
919
920 qa_performance.RemoveAllInstances(instances)
921
922 RunTest(qa_performance.TestJobQueueSubmissionPerformance)
923
924 if qa_config.TestEnabled("parallel-performance"):
925 if qa_config.IsTemplateSupported(constants.DT_DRBD8):
926 RunTest(qa_performance.TestParallelDRBDInstanceCreationPerformance)
927 if qa_config.IsTemplateSupported(constants.DT_PLAIN):
928 RunTest(qa_performance.TestParallelPlainInstanceCreationPerformance)
929
930 # Preparations need to be made only if some of these tests are enabled
931 if qa_config.IsTemplateSupported(constants.DT_DRBD8) and \
932 qa_config.TestEnabled(qa_config.Either(PARALLEL_TEST_DICT.keys())):
933 inodes = qa_config.AcquireManyNodes(2)
934 try:
935 instance = qa_instance.TestInstanceAddWithDrbdDisk(inodes)
936 try:
937 for (test_name, test_fn) in PARALLEL_TEST_DICT.items():
938 RunTestIf(test_name, test_fn, instance)
939 finally:
940 instance.Release()
941 qa_instance.TestInstanceRemove(instance)
942 finally:
943 qa_config.ReleaseManyNodes(inodes)
944
945
946 def RunQa():
947 """Main QA body.
948
949 """
950 RunTestBlock(RunEnvTests)
951 SetupCluster()
952
953 RunTestBlock(RunClusterTests)
954 RunTestBlock(RunOsTests)
955
956 RunTestIf("tags", qa_tags.TestClusterTags)
957
958 RunTestBlock(RunCommonNodeTests)
959 RunTestBlock(RunGroupListTests)
960 RunTestBlock(RunGroupRwTests)
961 RunTestBlock(RunNetworkTests)
962 RunTestBlock(RunFilterTests)
963
964 # The master shouldn't be readded or put offline; "delay" needs a non-master
965 # node to test
966 pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
967 try:
968 RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
969 RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
970 RunTestIf("delay", qa_cluster.TestDelay, pnode)
971 finally:
972 pnode.Release()
973
974 # Make sure the cluster is clean before running instance tests
975 qa_cluster.AssertClusterVerify()
976
977 pnode = qa_config.AcquireNode()
978 try:
979 RunTestIf("tags", qa_tags.TestNodeTags, pnode)
980
981 if qa_rapi.Enabled():
982 RunTest(qa_rapi.TestNode, pnode)
983
984 if (qa_config.TestEnabled("instance-add-plain-disk")
985 and qa_config.IsTemplateSupported(constants.DT_PLAIN)):
986 # Normal instance allocation via RAPI
987 for use_client in [True, False]:
988 rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
989 use_client)
990 try:
991 if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
992 RunCommonInstanceTests(rapi_instance, [pnode])
993 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
994 finally:
995 rapi_instance.Release()
996 del rapi_instance
997
998 # Multi-instance allocation
999 rapi_instance_one, rapi_instance_two = \
1000 RunTest(qa_rapi.TestRapiInstanceMultiAlloc, pnode)
1001
1002 try:
1003 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_one, True)
1004 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_two, True)
1005 finally:
1006 rapi_instance_one.Release()
1007 rapi_instance_two.Release()
1008 finally:
1009 pnode.Release()
1010
1011 config_list = [
1012 ("default-instance-tests", lambda: None, lambda _: None),
1013 (IsExclusiveStorageInstanceTestEnabled,
1014 lambda: qa_cluster.TestSetExclStorCluster(True),
1015 qa_cluster.TestSetExclStorCluster),
1016 ]
1017 for (conf_name, setup_conf_f, restore_conf_f) in config_list:
1018 if qa_config.TestEnabled(conf_name):
1019 oldconf = setup_conf_f()
1020 RunTestBlock(RunInstanceTests)
1021 restore_conf_f(oldconf)
1022
1023 pnode = qa_config.AcquireNode()
1024 try:
1025 if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
1026 for shutdown in [False, True]:
1027 instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
1028 try:
1029 expnode = qa_config.AcquireNode(exclude=pnode)
1030 try:
1031 if shutdown:
1032 # Stop instance before exporting and removing it
1033 RunTest(qa_instance.TestInstanceShutdown, instance)
1034 RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
1035 RunTest(qa_instance.TestBackupList, expnode)
1036 finally:
1037 expnode.Release()
1038 finally:
1039 instance.Release()
1040 del expnode
1041 del instance
1042 qa_cluster.AssertClusterVerify()
1043
1044 finally:
1045 pnode.Release()
1046
1047 if qa_rapi.Enabled():
1048 RunTestIf("filters", qa_rapi.TestFilters)
1049
1050 RunTestIf("cluster-upgrade", qa_cluster.TestUpgrade)
1051
1052 RunTestBlock(RunExclusiveStorageTests)
1053 RunTestIf(["cluster-instance-policy", "instance-add-plain-disk"],
1054 TestIPolicyPlainInstance)
1055
1056 RunTestBlock(RunCustomSshPortTests)
1057
1058 RunTestIf(
1059 "instance-add-restricted-by-disktemplates",
1060 qa_instance.TestInstanceCreationRestrictedByDiskTemplates)
1061
1062 # Test removing instance with offline drbd secondary
1063 if qa_config.TestEnabled(["instance-remove-drbd-offline",
1064 "instance-add-drbd-disk"]):
1065 # Make sure the master is not put offline
1066 snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
1067 try:
1068 pnode = qa_config.AcquireNode(exclude=snode)
1069 try:
1070 instance = qa_instance.TestInstanceAddWithDrbdDisk([pnode, snode])
1071 set_offline = lambda node: qa_node.MakeNodeOffline(node, "yes")
1072 set_online = lambda node: qa_node.MakeNodeOffline(node, "no")
1073 RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
1074 set_offline, set_online)
1075 finally:
1076 pnode.Release()
1077 finally:
1078 snode.Release()
1079 qa_cluster.AssertClusterVerify()
1080
1081 RunTestBlock(RunMonitoringTests)
1082
1083 RunPerformanceTests()
1084
1085 RunTestIf("cluster-destroy", qa_node.TestNodeRemoveAll)
1086
1087 RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
1088
1089
1090 @UsesRapiClient
1091 def main():
1092 """Main program.
1093
1094 """
1095 colors.check_for_colors()
1096
1097 parser = optparse.OptionParser(usage="%prog [options] <config-file>")
1098 parser.add_option("--yes-do-it", dest="yes_do_it",
1099 action="store_true",
1100 help="Really execute the tests")
1101 (opts, args) = parser.parse_args()
1102
1103 if len(args) == 1:
1104 (config_file, ) = args
1105 else:
1106 parser.error("Wrong number of arguments.")
1107
1108 if not opts.yes_do_it:
1109 print ("Executing this script irreversibly destroys any Ganeti\n"
1110 "configuration on all nodes involved. If you really want\n"
1111 "to start testing, supply the --yes-do-it option.")
1112 sys.exit(1)
1113
1114 qa_config.Load(config_file)
1115
1116 primary = qa_config.GetMasterNode().primary
1117 qa_utils.StartMultiplexer(primary)
1118 print ("SSH command for primary node: %s" %
1119 utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
1120 print ("SSH command for other nodes: %s" %
1121 utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
1122 try:
1123 RunQa()
1124 finally:
1125 qa_utils.CloseMultiplexers()
1126
1127 if __name__ == "__main__":
1128 main()