92ab27f439c0cc916ae6a06bbff66d1496ce7a89
[ganeti-github.git] / qa / ganeti-qa.py
1 #!/usr/bin/python -u
2 #
3
4 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Script for doing QA on Ganeti.
32
33 """
34
35 # pylint: disable=C0103
36 # due to invalid name
37
38 import copy
39 import datetime
40 import optparse
41 import sys
42
43 import colors
44 import qa_cluster
45 import qa_config
46 import qa_daemon
47 import qa_env
48 import qa_error
49 import qa_group
50 import qa_instance
51 import qa_iptables
52 import qa_monitoring
53 import qa_network
54 import qa_node
55 import qa_os
56 import qa_performance
57 import qa_job
58 import qa_rapi
59 import qa_tags
60 import qa_utils
61
62 from ganeti import utils
63 from ganeti import rapi # pylint: disable=W0611
64 from ganeti import constants
65 from ganeti import netutils
66
67 import ganeti.rapi.client # pylint: disable=W0611
68 from ganeti.rapi.client import UsesRapiClient
69
70
71 def _FormatHeader(line, end=72, mark="-", color=None):
72 """Fill a line up to the end column.
73
74 """
75 line = (mark * 4) + " " + line + " "
76 line += "-" * (end - len(line))
77 line = line.rstrip()
78 line = colors.colorize(line, color=color)
79 return line
80
81
82 def _DescriptionOf(fn):
83 """Computes the description of an item.
84
85 """
86 if fn.__doc__:
87 desc = fn.__doc__.splitlines()[0].strip()
88 desc = desc.rstrip(".")
89 if fn.__name__:
90 desc = "[" + fn.__name__ + "] " + desc
91 else:
92 desc = "%r" % fn
93
94 return desc
95
96
97 def RunTest(fn, *args, **kwargs):
98 """Runs a test after printing a header.
99
100 """
101
102 tstart = datetime.datetime.now()
103
104 desc = _DescriptionOf(fn)
105
106 print
107 print _FormatHeader("%s start %s" % (tstart, desc),
108 color=colors.YELLOW, mark="<")
109
110 try:
111 retval = fn(*args, **kwargs)
112 print _FormatHeader("PASSED %s" % (desc, ), color=colors.GREEN)
113 return retval
114 except Exception, e:
115 print _FormatHeader("FAILED %s: %s" % (desc, e), color=colors.RED)
116 raise
117 finally:
118 tstop = datetime.datetime.now()
119 tdelta = tstop - tstart
120 print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc),
121 color=colors.MAGENTA, mark=">")
122
123
124 def ReportTestSkip(desc, testnames):
125 """Reports that tests have been skipped.
126
127 @type desc: string
128 @param desc: string
129 @type testnames: string or list of string
130 @param testnames: either a single test name in the configuration
131 file, or a list of testnames (which will be AND-ed together)
132
133 """
134 tstart = datetime.datetime.now()
135 # TODO: Formatting test names when non-string names are involved
136 print _FormatHeader("%s skipping %s, test(s) %s disabled" %
137 (tstart, desc, testnames),
138 color=colors.BLUE, mark="*")
139
140
141 def RunTestIf(testnames, fn, *args, **kwargs):
142 """Runs a test conditionally.
143
144 @param testnames: either a single test name in the configuration
145 file, or a list of testnames (which will be AND-ed together)
146
147 """
148 if qa_config.TestEnabled(testnames):
149 RunTest(fn, *args, **kwargs)
150 else:
151 desc = _DescriptionOf(fn)
152 ReportTestSkip(desc, testnames)
153
154
155 def RunTestBlock(fn, *args, **kwargs):
156 """Runs a block of tests after printing a header.
157
158 """
159 tstart = datetime.datetime.now()
160
161 desc = _DescriptionOf(fn)
162
163 print
164 print _FormatHeader("BLOCK %s start %s" % (tstart, desc),
165 color=[colors.YELLOW, colors.BOLD], mark="v")
166
167 try:
168 return fn(*args, **kwargs)
169 except Exception, e:
170 print _FormatHeader("BLOCK FAILED %s: %s" % (desc, e),
171 color=[colors.RED, colors.BOLD])
172 raise
173 finally:
174 tstop = datetime.datetime.now()
175 tdelta = tstop - tstart
176 print _FormatHeader("BLOCK %s time=%s %s" % (tstop, tdelta, desc),
177 color=[colors.MAGENTA, colors.BOLD], mark="^")
178
179
180 def RunEnvTests():
181 """Run several environment tests.
182
183 """
184 RunTestIf("env", qa_env.TestSshConnection)
185 RunTestIf("env", qa_env.TestIcmpPing)
186 RunTestIf("env", qa_env.TestGanetiCommands)
187
188
189 def SetupCluster():
190 """Initializes the cluster.
191
192 """
193
194 RunTestIf("create-cluster", qa_cluster.TestClusterInit)
195 if not qa_config.TestEnabled("create-cluster"):
196 # If the cluster is already in place, we assume that exclusive-storage is
197 # already set according to the configuration
198 qa_config.SetExclusiveStorage(qa_config.get("exclusive-storage", False))
199
200 qa_rapi.SetupRapi()
201
202 qa_group.ConfigureGroups()
203
204 # Test on empty cluster
205 RunTestIf("node-list", qa_node.TestNodeList)
206 RunTestIf("instance-list", qa_instance.TestInstanceList)
207 RunTestIf("job-list", qa_job.TestJobList)
208
209 RunTestIf("create-cluster", qa_node.TestNodeAddAll)
210 if not qa_config.TestEnabled("create-cluster"):
211 # consider the nodes are already there
212 qa_node.MarkNodeAddedAll()
213
214 RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
215 RunTestIf("test-jobqueue", qa_job.TestJobCancellation)
216
217 # enable the watcher (unconditionally)
218 RunTest(qa_daemon.TestResumeWatcher)
219
220 RunTestIf("node-list", qa_node.TestNodeList)
221
222 # Test listing fields
223 RunTestIf("node-list", qa_node.TestNodeListFields)
224 RunTestIf("instance-list", qa_instance.TestInstanceListFields)
225 RunTestIf("job-list", qa_job.TestJobListFields)
226 RunTestIf("instance-export", qa_instance.TestBackupListFields)
227
228 RunTestIf("node-info", qa_node.TestNodeInfo)
229
230
231 def RunClusterTests():
232 """Runs tests related to gnt-cluster.
233
234 """
235 for test, fn in [
236 ("create-cluster", qa_cluster.TestClusterInitDisk),
237 ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto)
238 ]:
239 RunTestIf(test, fn)
240
241 # Since renew-crypto replaces the RAPI cert, reload it.
242 if qa_rapi.Enabled():
243 qa_rapi.ReloadCertificates()
244
245 for test, fn in [
246 ("cluster-verify", qa_cluster.TestClusterVerify),
247 ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
248 # TODO: add more cluster modify tests
249 ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
250 ("cluster-modify", qa_cluster.TestClusterModifyIPolicy),
251 ("cluster-modify", qa_cluster.TestClusterModifyISpecs),
252 ("cluster-modify", qa_cluster.TestClusterModifyBe),
253 ("cluster-modify", qa_cluster.TestClusterModifyDisk),
254 ("cluster-modify", qa_cluster.TestClusterModifyDiskTemplates),
255 ("cluster-modify", qa_cluster.TestClusterModifyFileStorageDir),
256 ("cluster-modify", qa_cluster.TestClusterModifySharedFileStorageDir),
257 ("cluster-modify", qa_cluster.TestClusterModifyInstallImage),
258 ("cluster-modify", qa_cluster.TestClusterModifyUserShutdown),
259 ("cluster-rename", qa_cluster.TestClusterRename),
260 ("cluster-info", qa_cluster.TestClusterVersion),
261 ("cluster-info", qa_cluster.TestClusterInfo),
262 ("cluster-info", qa_cluster.TestClusterGetmaster),
263 ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
264 (["cluster-copyfile", qa_config.NoVirtualCluster],
265 qa_cluster.TestClusterCopyfile),
266 ("cluster-command", qa_cluster.TestClusterCommand),
267 ("cluster-burnin", qa_cluster.TestClusterBurnin),
268 ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
269 ("cluster-master-failover",
270 qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
271 (["cluster-oob", qa_config.NoVirtualCluster],
272 qa_cluster.TestClusterOob),
273 ("cluster-instance-communication", qa_cluster.TestInstanceCommunication),
274 (qa_rapi.Enabled, qa_rapi.TestVersion),
275 (qa_rapi.Enabled, qa_rapi.TestEmptyCluster),
276 (qa_rapi.Enabled, qa_rapi.TestRapiQuery),
277 ]:
278 RunTestIf(test, fn)
279
280
281 def RunRepairDiskSizes():
282 """Run the repair disk-sizes test.
283
284 """
285 RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
286
287
288 def RunOsTests():
289 """Runs all tests related to gnt-os.
290
291 """
292 os_enabled = ["os", qa_config.NoVirtualCluster]
293
294 if qa_config.TestEnabled(qa_rapi.Enabled):
295 rapi_getos = qa_rapi.GetOperatingSystems
296 else:
297 rapi_getos = None
298
299 for fn in [
300 qa_os.TestOsList,
301 qa_os.TestOsDiagnose,
302 ]:
303 RunTestIf(os_enabled, fn)
304
305 for fn in [
306 qa_os.TestOsValid,
307 qa_os.TestOsInvalid,
308 qa_os.TestOsPartiallyValid,
309 ]:
310 RunTestIf(os_enabled, fn, rapi_getos)
311
312 for fn in [
313 qa_os.TestOsModifyValid,
314 qa_os.TestOsModifyInvalid,
315 qa_os.TestOsStatesNonExisting,
316 ]:
317 RunTestIf(os_enabled, fn)
318
319
320 def RunCommonInstanceTests(instance, inst_nodes):
321 """Runs a few tests that are common to all disk types.
322
323 """
324 RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
325 RunTestIf(["instance-shutdown", "instance-console", qa_rapi.Enabled],
326 qa_rapi.TestRapiStoppedInstanceConsole, instance)
327 RunTestIf(["instance-shutdown", "instance-modify"],
328 qa_instance.TestInstanceStoppedModify, instance)
329 RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
330
331 # Test shutdown/start via RAPI
332 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
333 qa_rapi.TestRapiInstanceShutdown, instance)
334 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
335 qa_rapi.TestRapiInstanceStartup, instance)
336
337 RunTestIf("instance-list", qa_instance.TestInstanceList)
338
339 RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
340
341 RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
342 RunTestIf(["instance-modify", qa_rapi.Enabled],
343 qa_rapi.TestRapiInstanceModify, instance)
344
345 RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
346 RunTestIf(["instance-console", qa_rapi.Enabled],
347 qa_rapi.TestRapiInstanceConsole, instance)
348
349 RunTestIf("instance-device-names", qa_instance.TestInstanceDeviceNames,
350 instance)
351 DOWN_TESTS = qa_config.Either([
352 "instance-reinstall",
353 "instance-rename",
354 "instance-grow-disk",
355 ])
356
357 # shutdown instance for any 'down' tests
358 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
359
360 # now run the 'down' state tests
361 RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
362 RunTestIf(["instance-reinstall", qa_rapi.Enabled],
363 qa_rapi.TestRapiInstanceReinstall, instance)
364
365 if qa_config.TestEnabled("instance-rename"):
366 tgt_instance = qa_config.AcquireInstance()
367 try:
368 rename_source = instance.name
369 rename_target = tgt_instance.name
370 # perform instance rename to the same name
371 RunTest(qa_instance.TestInstanceRenameAndBack,
372 rename_source, rename_source)
373 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
374 rename_source, rename_source)
375 if rename_target is not None:
376 # perform instance rename to a different name, if we have one configured
377 RunTest(qa_instance.TestInstanceRenameAndBack,
378 rename_source, rename_target)
379 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
380 rename_source, rename_target)
381 finally:
382 tgt_instance.Release()
383
384 RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
385
386 # and now start the instance again
387 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
388
389 RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
390
391 RunTestIf("tags", qa_tags.TestInstanceTags, instance)
392
393 if instance.disk_template == constants.DT_DRBD8:
394 RunTestIf("cluster-verify",
395 qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes)
396 RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
397
398 RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance)
399
400 # Lists instances, too
401 RunTestIf("node-list", qa_node.TestNodeList)
402
403 # Some jobs have been run, let's test listing them
404 RunTestIf("job-list", qa_job.TestJobList)
405
406
407 def RunCommonNodeTests():
408 """Run a few common node tests.
409
410 """
411 RunTestIf("node-volumes", qa_node.TestNodeVolumes)
412 RunTestIf("node-storage", qa_node.TestNodeStorage)
413 RunTestIf(["node-oob", qa_config.NoVirtualCluster], qa_node.TestOutOfBand)
414
415
416 def RunGroupListTests():
417 """Run tests for listing node groups.
418
419 """
420 RunTestIf("group-list", qa_group.TestGroupList)
421 RunTestIf("group-list", qa_group.TestGroupListFields)
422
423
424 def RunNetworkTests():
425 """Run tests for network management.
426
427 """
428 RunTestIf("network", qa_network.TestNetworkAddRemove)
429 RunTestIf("network", qa_network.TestNetworkConnect)
430 RunTestIf(["network", "tags"], qa_network.TestNetworkTags)
431
432
433 def RunGroupRwTests():
434 """Run tests for adding/removing/renaming groups.
435
436 """
437 RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
438 RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
439 RunTestIf("group-rwops", qa_group.TestGroupModify)
440 RunTestIf(["group-rwops", qa_rapi.Enabled], qa_rapi.TestRapiNodeGroups)
441 RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
442 qa_group.GetDefaultGroup())
443
444
445 def RunExportImportTests(instance, inodes):
446 """Tries to export and import the instance.
447
448 @type inodes: list of nodes
449 @param inodes: current nodes of the instance
450
451 """
452 # FIXME: export explicitly bails out on file based storage. other non-lvm
453 # based storage types are untested, though. Also note that import could still
454 # work, but is deeply embedded into the "export" case.
455 if (qa_config.TestEnabled("instance-export") and
456 instance.disk_template not in constants.DTS_FILEBASED):
457 RunTest(qa_instance.TestInstanceExportNoTarget, instance)
458
459 pnode = inodes[0]
460 expnode = qa_config.AcquireNode(exclude=pnode)
461 try:
462 name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
463
464 RunTest(qa_instance.TestBackupList, expnode)
465
466 if qa_config.TestEnabled("instance-import"):
467 newinst = qa_config.AcquireInstance()
468 try:
469 RunTest(qa_instance.TestInstanceImport, newinst, pnode,
470 expnode, name)
471 # Check if starting the instance works
472 RunTest(qa_instance.TestInstanceStartup, newinst)
473 RunTest(qa_instance.TestInstanceRemove, newinst)
474 finally:
475 newinst.Release()
476 finally:
477 expnode.Release()
478
479 # FIXME: inter-cluster-instance-move crashes on file based instances :/
480 # See Issue 414.
481 if (qa_config.TestEnabled([qa_rapi.Enabled, "inter-cluster-instance-move"])
482 and (instance.disk_template not in constants.DTS_FILEBASED)):
483 newinst = qa_config.AcquireInstance()
484 try:
485 tnode = qa_config.AcquireNode(exclude=inodes)
486 try:
487 RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
488 inodes, tnode)
489 finally:
490 tnode.Release()
491 finally:
492 newinst.Release()
493
494
495 def RunDaemonTests(instance):
496 """Test the ganeti-watcher script.
497
498 """
499 RunTest(qa_daemon.TestPauseWatcher)
500
501 RunTestIf("instance-automatic-restart",
502 qa_daemon.TestInstanceAutomaticRestart, instance)
503 RunTestIf("instance-consecutive-failures",
504 qa_daemon.TestInstanceConsecutiveFailures, instance)
505
506 RunTest(qa_daemon.TestResumeWatcher)
507
508
509 def RunHardwareFailureTests(instance, inodes):
510 """Test cluster internal hardware failure recovery.
511
512 """
513 RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
514 RunTestIf(["instance-failover", qa_rapi.Enabled],
515 qa_rapi.TestRapiInstanceFailover, instance)
516
517 RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
518 RunTestIf(["instance-migrate", qa_rapi.Enabled],
519 qa_rapi.TestRapiInstanceMigrate, instance)
520
521 if qa_config.TestEnabled("instance-replace-disks"):
522 # We just need alternative secondary nodes, hence "- 1"
523 othernodes = qa_config.AcquireManyNodes(len(inodes) - 1, exclude=inodes)
524 try:
525 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceReplaceDisks, instance)
526 RunTest(qa_instance.TestReplaceDisks,
527 instance, inodes, othernodes)
528 finally:
529 qa_config.ReleaseManyNodes(othernodes)
530 del othernodes
531
532 if qa_config.TestEnabled("instance-recreate-disks"):
533 try:
534 acquirednodes = qa_config.AcquireManyNodes(len(inodes), exclude=inodes)
535 othernodes = acquirednodes
536 except qa_error.OutOfNodesError:
537 if len(inodes) > 1:
538 # If the cluster is not big enough, let's reuse some of the nodes, but
539 # with different roles. In this way, we can test a DRBD instance even on
540 # a 3-node cluster.
541 acquirednodes = [qa_config.AcquireNode(exclude=inodes)]
542 othernodes = acquirednodes + inodes[:-1]
543 else:
544 raise
545 try:
546 RunTest(qa_instance.TestRecreateDisks,
547 instance, inodes, othernodes)
548 finally:
549 qa_config.ReleaseManyNodes(acquirednodes)
550
551 if len(inodes) >= 2:
552 RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, inodes[0], inodes[1])
553 RunTestIf("node-failover", qa_node.TestNodeFailover, inodes[0], inodes[1])
554 RunTestIf("node-migrate", qa_node.TestNodeMigrate, inodes[0], inodes[1])
555
556
557 def RunExclusiveStorageTests():
558 """Test exclusive storage."""
559 if not qa_config.TestEnabled("cluster-exclusive-storage"):
560 return
561
562 node = qa_config.AcquireNode()
563 try:
564 old_es = qa_cluster.TestSetExclStorCluster(False)
565 qa_node.TestExclStorSingleNode(node)
566
567 qa_cluster.TestSetExclStorCluster(True)
568 qa_cluster.TestExclStorSharedPv(node)
569
570 if qa_config.TestEnabled("instance-add-plain-disk"):
571 # Make sure that the cluster doesn't have any pre-existing problem
572 qa_cluster.AssertClusterVerify()
573
574 # Create and allocate instances
575 instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
576 try:
577 instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
578 try:
579 # cluster-verify checks that disks are allocated correctly
580 qa_cluster.AssertClusterVerify()
581
582 # Remove instances
583 qa_instance.TestInstanceRemove(instance2)
584 qa_instance.TestInstanceRemove(instance1)
585 finally:
586 instance2.Release()
587 finally:
588 instance1.Release()
589
590 if qa_config.TestEnabled("instance-add-drbd-disk"):
591 snode = qa_config.AcquireNode()
592 try:
593 qa_cluster.TestSetExclStorCluster(False)
594 instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
595 try:
596 qa_cluster.TestSetExclStorCluster(True)
597 exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
598 qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
599 qa_instance.TestInstanceRemove(instance)
600 finally:
601 instance.Release()
602 finally:
603 snode.Release()
604 qa_cluster.TestSetExclStorCluster(old_es)
605 finally:
606 node.Release()
607
608
609 def RunCustomSshPortTests():
610 """Test accessing nodes with custom SSH ports.
611
612 This requires removing nodes, adding them to a new group, and then undoing
613 the change.
614 """
615 if not qa_config.TestEnabled("group-custom-ssh-port"):
616 return
617
618 std_port = netutils.GetDaemonPort(constants.SSH)
619 port = 211
620 master = qa_config.GetMasterNode()
621 with qa_config.AcquireManyNodesCtx(1, exclude=master) as nodes:
622 # Checks if the node(s) could be contacted through IPv6.
623 # If yes, better skip the whole test.
624
625 for node in nodes:
626 if qa_utils.UsesIPv6Connection(node.primary, std_port):
627 print ("Node %s is likely to be reached using IPv6,"
628 "skipping the test" % (node.primary, ))
629 return
630
631 for node in nodes:
632 qa_node.NodeRemove(node)
633 with qa_iptables.RulesContext(nodes) as r:
634 with qa_group.NewGroupCtx() as group:
635 qa_group.ModifyGroupSshPort(r, group, nodes, port)
636
637 for node in nodes:
638 qa_node.NodeAdd(node, group=group)
639
640 # Make sure that the cluster doesn't have any pre-existing problem
641 qa_cluster.AssertClusterVerify()
642
643 # Create and allocate instances
644 instance1 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
645 try:
646 instance2 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
647 try:
648 # cluster-verify checks that disks are allocated correctly
649 qa_cluster.AssertClusterVerify()
650
651 # Remove instances
652 qa_instance.TestInstanceRemove(instance2)
653 qa_instance.TestInstanceRemove(instance1)
654 finally:
655 instance2.Release()
656 finally:
657 instance1.Release()
658
659 for node in nodes:
660 qa_node.NodeRemove(node)
661
662 for node in nodes:
663 qa_node.NodeAdd(node)
664
665 qa_cluster.AssertClusterVerify()
666
667
668 def _BuildSpecDict(par, mn, st, mx):
669 return {
670 constants.ISPECS_MINMAX: [{
671 constants.ISPECS_MIN: {par: mn},
672 constants.ISPECS_MAX: {par: mx},
673 }],
674 constants.ISPECS_STD: {par: st},
675 }
676
677
678 def _BuildDoubleSpecDict(index, par, mn, st, mx):
679 new_spec = {
680 constants.ISPECS_MINMAX: [{}, {}],
681 }
682 if st is not None:
683 new_spec[constants.ISPECS_STD] = {par: st}
684 new_spec[constants.ISPECS_MINMAX][index] = {
685 constants.ISPECS_MIN: {par: mn},
686 constants.ISPECS_MAX: {par: mx},
687 }
688 return new_spec
689
690
691 def TestIPolicyPlainInstance():
692 """Test instance policy interaction with instances"""
693 params = ["memory-size", "cpu-count", "disk-count", "disk-size", "nic-count"]
694 if not qa_config.IsTemplateSupported(constants.DT_PLAIN):
695 print "Template %s not supported" % constants.DT_PLAIN
696 return
697
698 # This test assumes that the group policy is empty
699 (_, old_specs) = qa_cluster.TestClusterSetISpecs()
700 # We also assume to have only one min/max bound
701 assert len(old_specs[constants.ISPECS_MINMAX]) == 1
702 node = qa_config.AcquireNode()
703 try:
704 # Log of policy changes, list of tuples:
705 # (full_change, incremental_change, policy_violated)
706 history = []
707 instance = qa_instance.TestInstanceAddWithPlainDisk([node])
708 try:
709 policyerror = [constants.CV_EINSTANCEPOLICY]
710 for par in params:
711 (iminval, imaxval) = qa_instance.GetInstanceSpec(instance.name, par)
712 # Some specs must be multiple of 4
713 new_spec = _BuildSpecDict(par, imaxval + 4, imaxval + 4, imaxval + 4)
714 history.append((None, new_spec, True))
715 if iminval > 0:
716 # Some specs must be multiple of 4
717 if iminval >= 4:
718 upper = iminval - 4
719 else:
720 upper = iminval - 1
721 new_spec = _BuildSpecDict(par, 0, upper, upper)
722 history.append((None, new_spec, True))
723 history.append((old_specs, None, False))
724
725 # Test with two instance specs
726 double_specs = copy.deepcopy(old_specs)
727 double_specs[constants.ISPECS_MINMAX] = \
728 double_specs[constants.ISPECS_MINMAX] * 2
729 (par1, par2) = params[0:2]
730 (_, imaxval1) = qa_instance.GetInstanceSpec(instance.name, par1)
731 (_, imaxval2) = qa_instance.GetInstanceSpec(instance.name, par2)
732 old_minmax = old_specs[constants.ISPECS_MINMAX][0]
733 history.extend([
734 (double_specs, None, False),
735 # The first min/max limit is being violated
736 (None,
737 _BuildDoubleSpecDict(0, par1, imaxval1 + 4, imaxval1 + 4,
738 imaxval1 + 4),
739 False),
740 # Both min/max limits are being violated
741 (None,
742 _BuildDoubleSpecDict(1, par2, imaxval2 + 4, None, imaxval2 + 4),
743 True),
744 # The second min/max limit is being violated
745 (None,
746 _BuildDoubleSpecDict(0, par1,
747 old_minmax[constants.ISPECS_MIN][par1],
748 old_specs[constants.ISPECS_STD][par1],
749 old_minmax[constants.ISPECS_MAX][par1]),
750 False),
751 (old_specs, None, False),
752 ])
753
754 # Apply the changes, and check policy violations after each change
755 qa_cluster.AssertClusterVerify()
756 for (new_specs, diff_specs, failed) in history:
757 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
758 diff_specs=diff_specs)
759 if failed:
760 qa_cluster.AssertClusterVerify(warnings=policyerror)
761 else:
762 qa_cluster.AssertClusterVerify()
763
764 qa_instance.TestInstanceRemove(instance)
765 finally:
766 instance.Release()
767
768 # Now we replay the same policy changes, and we expect that the instance
769 # cannot be created for the cases where we had a policy violation above
770 for (new_specs, diff_specs, failed) in history:
771 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
772 diff_specs=diff_specs)
773 if failed:
774 qa_instance.TestInstanceAddWithPlainDisk([node], fail=True)
775 # Instance creation with no policy violation has been tested already
776 finally:
777 node.Release()
778
779
780 def IsExclusiveStorageInstanceTestEnabled():
781 test_name = "exclusive-storage-instance-tests"
782 if qa_config.TestEnabled(test_name):
783 vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
784 vgscmd = utils.ShellQuoteArgs([
785 "vgs", "--noheadings", "-o", "pv_count", vgname,
786 ])
787 nodes = qa_config.GetConfig()["nodes"]
788 for node in nodes:
789 try:
790 pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd))
791 except Exception, e:
792 msg = ("Cannot get the number of PVs on %s, needed by '%s': %s" %
793 (node.primary, test_name, e))
794 raise qa_error.Error(msg)
795 if pvnum < 2:
796 raise qa_error.Error("Node %s has not enough PVs (%s) to run '%s'" %
797 (node.primary, pvnum, test_name))
798 res = True
799 else:
800 res = False
801 return res
802
803
804 def RunInstanceTests():
805 """Create and exercise instances."""
806
807 for (test_name, templ, create_fun, num_nodes) in \
808 qa_instance.available_instance_tests:
809 if (qa_config.TestEnabled(test_name) and
810 qa_config.IsTemplateSupported(templ)):
811 inodes = qa_config.AcquireManyNodes(num_nodes)
812 try:
813 instance = RunTest(create_fun, inodes)
814 try:
815 RunTestIf("instance-user-down", qa_instance.TestInstanceUserDown,
816 instance)
817 RunTestIf("instance-communication",
818 qa_instance.TestInstanceCommunication,
819 instance,
820 qa_config.GetMasterNode())
821 RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
822 RunDaemonTests(instance)
823 for node in inodes:
824 RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node,
825 templ == constants.DT_DRBD8)
826 if len(inodes) > 1:
827 RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
828 constants.INITIAL_NODE_GROUP_NAME,
829 inodes[0].primary, inodes[1].primary)
830 if qa_config.TestEnabled("instance-convert-disk"):
831 RunTest(qa_instance.TestInstanceShutdown, instance)
832 RunTest(qa_instance.TestInstanceConvertDiskToPlain,
833 instance, inodes)
834 RunTest(qa_instance.TestInstanceStartup, instance)
835 RunTestIf("instance-modify-disks",
836 qa_instance.TestInstanceModifyDisks, instance)
837 RunCommonInstanceTests(instance, inodes)
838 if qa_config.TestEnabled("instance-modify-primary"):
839 othernode = qa_config.AcquireNode()
840 RunTest(qa_instance.TestInstanceModifyPrimaryAndBack,
841 instance, inodes[0], othernode)
842 othernode.Release()
843 RunGroupListTests()
844 RunExportImportTests(instance, inodes)
845 RunHardwareFailureTests(instance, inodes)
846 RunRepairDiskSizes()
847 RunTest(qa_instance.TestInstanceRemove, instance)
848 finally:
849 instance.Release()
850 del instance
851 finally:
852 qa_config.ReleaseManyNodes(inodes)
853 qa_cluster.AssertClusterVerify()
854 else:
855 test_desc = "Creating instances of template %s" % templ
856 if not qa_config.TestEnabled(test_name):
857 ReportTestSkip(test_desc, test_name)
858 else:
859 ReportTestSkip(test_desc, "disk template %s" % templ)
860
861
862 def RunMonitoringTests():
863 RunTestIf("mon-collector", qa_monitoring.TestInstStatusCollector)
864
865
866 PARALLEL_TEST_DICT = {
867 "parallel-failover": qa_performance.TestParallelInstanceFailover,
868 "parallel-migration": qa_performance.TestParallelInstanceMigration,
869 "parallel-replace-disks": qa_performance.TestParallelInstanceReplaceDisks,
870 "parallel-reboot": qa_performance.TestParallelInstanceReboot,
871 "parallel-reinstall": qa_performance.TestParallelInstanceReinstall,
872 "parallel-rename": qa_performance.TestParallelInstanceRename,
873 }
874
875
876 def RunPerformanceTests():
877 if not qa_config.TestEnabled("performance"):
878 ReportTestSkip("performance related tests", "performance")
879 return
880
881 if qa_config.TestEnabled("jobqueue-performance"):
882 RunTest(qa_performance.TestParallelMaxInstanceCreationPerformance)
883 RunTest(qa_performance.TestParallelNodeCountInstanceCreationPerformance)
884
885 instances = qa_performance.CreateAllInstances()
886
887 RunTest(qa_performance.TestParallelModify, instances)
888 RunTest(qa_performance.TestParallelInstanceOSOperations, instances)
889 RunTest(qa_performance.TestParallelInstanceQueries, instances)
890
891 qa_performance.RemoveAllInstances(instances)
892
893 RunTest(qa_performance.TestJobQueueSubmissionPerformance)
894
895 if qa_config.TestEnabled("parallel-performance"):
896 if qa_config.IsTemplateSupported(constants.DT_DRBD8):
897 RunTest(qa_performance.TestParallelDRBDInstanceCreationPerformance)
898 if qa_config.IsTemplateSupported(constants.DT_PLAIN):
899 RunTest(qa_performance.TestParallelPlainInstanceCreationPerformance)
900
901 # Preparations need to be made only if some of these tests are enabled
902 if qa_config.IsTemplateSupported(constants.DT_DRBD8) and \
903 qa_config.TestEnabled(qa_config.Either(PARALLEL_TEST_DICT.keys())):
904 inodes = qa_config.AcquireManyNodes(2)
905 try:
906 instance = qa_instance.TestInstanceAddWithDrbdDisk(inodes)
907 try:
908 for (test_name, test_fn) in PARALLEL_TEST_DICT.items():
909 RunTestIf(test_name, test_fn, instance)
910 finally:
911 instance.Release()
912 qa_instance.TestInstanceRemove(instance)
913 finally:
914 qa_config.ReleaseManyNodes(inodes)
915
916
917 def RunQa():
918 """Main QA body.
919
920 """
921 RunTestBlock(RunEnvTests)
922 SetupCluster()
923
924 RunTestBlock(RunClusterTests)
925 RunTestBlock(RunOsTests)
926
927 RunTestIf("tags", qa_tags.TestClusterTags)
928
929 RunTestBlock(RunCommonNodeTests)
930 RunTestBlock(RunGroupListTests)
931 RunTestBlock(RunGroupRwTests)
932 RunTestBlock(RunNetworkTests)
933
934 # The master shouldn't be readded or put offline; "delay" needs a non-master
935 # node to test
936 pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
937 try:
938 RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
939 RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
940 RunTestIf("delay", qa_cluster.TestDelay, pnode)
941 finally:
942 pnode.Release()
943
944 # Make sure the cluster is clean before running instance tests
945 qa_cluster.AssertClusterVerify()
946
947 pnode = qa_config.AcquireNode()
948 try:
949 RunTestIf("tags", qa_tags.TestNodeTags, pnode)
950
951 if qa_rapi.Enabled():
952 RunTest(qa_rapi.TestNode, pnode)
953
954 if (qa_config.TestEnabled("instance-add-plain-disk")
955 and qa_config.IsTemplateSupported(constants.DT_PLAIN)):
956 # Normal instance allocation via RAPI
957 for use_client in [True, False]:
958 rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
959 use_client)
960 try:
961 if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
962 RunCommonInstanceTests(rapi_instance, [pnode])
963 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
964 finally:
965 rapi_instance.Release()
966 del rapi_instance
967
968 # Multi-instance allocation
969 rapi_instance_one, rapi_instance_two = \
970 RunTest(qa_rapi.TestRapiInstanceMultiAlloc, pnode)
971
972 try:
973 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_one, True)
974 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_two, True)
975 finally:
976 rapi_instance_one.Release()
977 rapi_instance_two.Release()
978 finally:
979 pnode.Release()
980
981 config_list = [
982 ("default-instance-tests", lambda: None, lambda _: None),
983 (IsExclusiveStorageInstanceTestEnabled,
984 lambda: qa_cluster.TestSetExclStorCluster(True),
985 qa_cluster.TestSetExclStorCluster),
986 ]
987 for (conf_name, setup_conf_f, restore_conf_f) in config_list:
988 if qa_config.TestEnabled(conf_name):
989 oldconf = setup_conf_f()
990 RunTestBlock(RunInstanceTests)
991 restore_conf_f(oldconf)
992
993 pnode = qa_config.AcquireNode()
994 try:
995 if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
996 for shutdown in [False, True]:
997 instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
998 try:
999 expnode = qa_config.AcquireNode(exclude=pnode)
1000 try:
1001 if shutdown:
1002 # Stop instance before exporting and removing it
1003 RunTest(qa_instance.TestInstanceShutdown, instance)
1004 RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
1005 RunTest(qa_instance.TestBackupList, expnode)
1006 finally:
1007 expnode.Release()
1008 finally:
1009 instance.Release()
1010 del expnode
1011 del instance
1012 qa_cluster.AssertClusterVerify()
1013
1014 finally:
1015 pnode.Release()
1016
1017 RunTestIf("cluster-upgrade", qa_cluster.TestUpgrade)
1018
1019 RunTestBlock(RunExclusiveStorageTests)
1020 RunTestIf(["cluster-instance-policy", "instance-add-plain-disk"],
1021 TestIPolicyPlainInstance)
1022
1023 RunTestBlock(RunCustomSshPortTests)
1024
1025 RunTestIf(
1026 "instance-add-restricted-by-disktemplates",
1027 qa_instance.TestInstanceCreationRestrictedByDiskTemplates)
1028
1029 # Test removing instance with offline drbd secondary
1030 if qa_config.TestEnabled(["instance-remove-drbd-offline",
1031 "instance-add-drbd-disk"]):
1032 # Make sure the master is not put offline
1033 snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
1034 try:
1035 pnode = qa_config.AcquireNode(exclude=snode)
1036 try:
1037 instance = qa_instance.TestInstanceAddWithDrbdDisk([pnode, snode])
1038 set_offline = lambda node: qa_node.MakeNodeOffline(node, "yes")
1039 set_online = lambda node: qa_node.MakeNodeOffline(node, "no")
1040 RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
1041 set_offline, set_online)
1042 finally:
1043 pnode.Release()
1044 finally:
1045 snode.Release()
1046 qa_cluster.AssertClusterVerify()
1047
1048 RunTestBlock(RunMonitoringTests)
1049
1050 RunPerformanceTests()
1051
1052 RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
1053
1054 RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
1055
1056
1057 @UsesRapiClient
1058 def main():
1059 """Main program.
1060
1061 """
1062 colors.check_for_colors()
1063
1064 parser = optparse.OptionParser(usage="%prog [options] <config-file>")
1065 parser.add_option("--yes-do-it", dest="yes_do_it",
1066 action="store_true",
1067 help="Really execute the tests")
1068 (opts, args) = parser.parse_args()
1069
1070 if len(args) == 1:
1071 (config_file, ) = args
1072 else:
1073 parser.error("Wrong number of arguments.")
1074
1075 if not opts.yes_do_it:
1076 print ("Executing this script irreversibly destroys any Ganeti\n"
1077 "configuration on all nodes involved. If you really want\n"
1078 "to start testing, supply the --yes-do-it option.")
1079 sys.exit(1)
1080
1081 qa_config.Load(config_file)
1082
1083 primary = qa_config.GetMasterNode().primary
1084 qa_utils.StartMultiplexer(primary)
1085 print ("SSH command for primary node: %s" %
1086 utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
1087 print ("SSH command for other nodes: %s" %
1088 utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
1089 try:
1090 RunQa()
1091 finally:
1092 qa_utils.CloseMultiplexers()
1093
1094 if __name__ == "__main__":
1095 main()