Merge branch 'stable-2.14' into stable-2.15
[ganeti-github.git] / qa / ganeti-qa.py
1 #!/usr/bin/python -u
2 #
3
4 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Script for doing QA on Ganeti.
32
33 """
34
35 # pylint: disable=C0103
36 # due to invalid name
37
38 import copy
39 import datetime
40 import optparse
41 import sys
42
43 import colors
44 import qa_cluster
45 import qa_config
46 import qa_daemon
47 import qa_env
48 import qa_error
49 import qa_filters
50 import qa_group
51 import qa_instance
52 import qa_iptables
53 import qa_monitoring
54 import qa_network
55 import qa_node
56 import qa_os
57 import qa_performance
58 import qa_job
59 import qa_rapi
60 import qa_tags
61 import qa_utils
62
63 from ganeti import utils
64 from ganeti import rapi # pylint: disable=W0611
65 from ganeti import constants
66 from ganeti import netutils
67
68 import ganeti.rapi.client # pylint: disable=W0611
69 from ganeti.rapi.client import UsesRapiClient
70
71
72 def _FormatHeader(line, end=72, mark="-", color=None):
73 """Fill a line up to the end column.
74
75 """
76 line = (mark * 4) + " " + line + " "
77 line += "-" * (end - len(line))
78 line = line.rstrip()
79 line = colors.colorize(line, color=color)
80 return line
81
82
83 def _DescriptionOf(fn):
84 """Computes the description of an item.
85
86 """
87 if fn.__doc__:
88 desc = fn.__doc__.splitlines()[0].strip()
89 desc = desc.rstrip(".")
90 if fn.__name__:
91 desc = "[" + fn.__name__ + "] " + desc
92 else:
93 desc = "%r" % fn
94
95 return desc
96
97
98 def RunTest(fn, *args, **kwargs):
99 """Runs a test after printing a header.
100
101 """
102
103 tstart = datetime.datetime.now()
104
105 desc = _DescriptionOf(fn)
106
107 print
108 print _FormatHeader("%s start %s" % (tstart, desc),
109 color=colors.YELLOW, mark="<")
110
111 try:
112 retval = fn(*args, **kwargs)
113 print _FormatHeader("PASSED %s" % (desc, ), color=colors.GREEN)
114 return retval
115 except Exception, e:
116 print _FormatHeader("FAILED %s: %s" % (desc, e), color=colors.RED)
117 raise
118 finally:
119 tstop = datetime.datetime.now()
120 tdelta = tstop - tstart
121 print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc),
122 color=colors.MAGENTA, mark=">")
123
124
125 def ReportTestSkip(desc, testnames):
126 """Reports that tests have been skipped.
127
128 @type desc: string
129 @param desc: string
130 @type testnames: string or list of string
131 @param testnames: either a single test name in the configuration
132 file, or a list of testnames (which will be AND-ed together)
133
134 """
135 tstart = datetime.datetime.now()
136 # TODO: Formatting test names when non-string names are involved
137 print _FormatHeader("%s skipping %s, test(s) %s disabled" %
138 (tstart, desc, testnames),
139 color=colors.BLUE, mark="*")
140
141
142 def RunTestIf(testnames, fn, *args, **kwargs):
143 """Runs a test conditionally.
144
145 @param testnames: either a single test name in the configuration
146 file, or a list of testnames (which will be AND-ed together)
147
148 """
149 if qa_config.TestEnabled(testnames):
150 RunTest(fn, *args, **kwargs)
151 else:
152 desc = _DescriptionOf(fn)
153 ReportTestSkip(desc, testnames)
154
155
156 def RunTestBlock(fn, *args, **kwargs):
157 """Runs a block of tests after printing a header.
158
159 """
160 tstart = datetime.datetime.now()
161
162 desc = _DescriptionOf(fn)
163
164 print
165 print _FormatHeader("BLOCK %s start %s" % (tstart, desc),
166 color=[colors.YELLOW, colors.BOLD], mark="v")
167
168 try:
169 return fn(*args, **kwargs)
170 except Exception, e:
171 print _FormatHeader("BLOCK FAILED %s: %s" % (desc, e),
172 color=[colors.RED, colors.BOLD])
173 raise
174 finally:
175 tstop = datetime.datetime.now()
176 tdelta = tstop - tstart
177 print _FormatHeader("BLOCK %s time=%s %s" % (tstop, tdelta, desc),
178 color=[colors.MAGENTA, colors.BOLD], mark="^")
179
180
181 def RunEnvTests():
182 """Run several environment tests.
183
184 """
185 RunTestIf("env", qa_env.TestSshConnection)
186 RunTestIf("env", qa_env.TestIcmpPing)
187 RunTestIf("env", qa_env.TestGanetiCommands)
188
189
190 def SetupCluster():
191 """Initializes the cluster.
192
193 """
194
195 RunTestIf("create-cluster", qa_cluster.TestClusterInit)
196 if not qa_config.TestEnabled("create-cluster"):
197 # If the cluster is already in place, we assume that exclusive-storage is
198 # already set according to the configuration
199 qa_config.SetExclusiveStorage(qa_config.get("exclusive-storage", False))
200
201 qa_rapi.SetupRapi()
202
203 qa_group.ConfigureGroups()
204
205 # Test on empty cluster
206 RunTestIf("node-list", qa_node.TestNodeList)
207 RunTestIf("instance-list", qa_instance.TestInstanceList)
208 RunTestIf("job-list", qa_job.TestJobList)
209
210 RunTestIf("create-cluster", qa_node.TestNodeAddAll)
211 if not qa_config.TestEnabled("create-cluster"):
212 # consider the nodes are already there
213 qa_node.MarkNodeAddedAll()
214
215 RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
216 RunTestIf("test-jobqueue", qa_job.TestJobCancellation)
217
218 # enable the watcher (unconditionally)
219 RunTest(qa_daemon.TestResumeWatcher)
220
221 RunTestIf("node-list", qa_node.TestNodeList)
222
223 # Test listing fields
224 RunTestIf("node-list", qa_node.TestNodeListFields)
225 RunTestIf("instance-list", qa_instance.TestInstanceListFields)
226 RunTestIf("job-list", qa_job.TestJobListFields)
227 RunTestIf("instance-export", qa_instance.TestBackupListFields)
228
229 RunTestIf("node-info", qa_node.TestNodeInfo)
230
231
232 def RunClusterTests():
233 """Runs tests related to gnt-cluster.
234
235 """
236 for test, fn in [
237 ("create-cluster", qa_cluster.TestClusterInitDisk),
238 ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto)
239 ]:
240 RunTestIf(test, fn)
241
242 for test, fn in [
243 ("cluster-verify", qa_cluster.TestClusterVerify),
244 ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
245 # TODO: add more cluster modify tests
246 ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
247 ("cluster-modify", qa_cluster.TestClusterModifyIPolicy),
248 ("cluster-modify", qa_cluster.TestClusterModifyISpecs),
249 ("cluster-modify", qa_cluster.TestClusterModifyBe),
250 ("cluster-modify", qa_cluster.TestClusterModifyDisk),
251 ("cluster-modify", qa_cluster.TestClusterModifyDiskTemplates),
252 ("cluster-modify", qa_cluster.TestClusterModifyFileStorageDir),
253 ("cluster-modify", qa_cluster.TestClusterModifySharedFileStorageDir),
254 ("cluster-modify", qa_cluster.TestClusterModifyInstallImage),
255 ("cluster-modify", qa_cluster.TestClusterModifyUserShutdown),
256 ("cluster-rename", qa_cluster.TestClusterRename),
257 ("cluster-info", qa_cluster.TestClusterVersion),
258 ("cluster-info", qa_cluster.TestClusterInfo),
259 ("cluster-info", qa_cluster.TestClusterGetmaster),
260 ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
261 (["cluster-copyfile", qa_config.NoVirtualCluster],
262 qa_cluster.TestClusterCopyfile),
263 ("cluster-command", qa_cluster.TestClusterCommand),
264 ("cluster-burnin", qa_cluster.TestClusterBurnin),
265 ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
266 ("cluster-master-failover",
267 qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
268 (["cluster-oob", qa_config.NoVirtualCluster],
269 qa_cluster.TestClusterOob),
270 ("cluster-instance-communication", qa_cluster.TestInstanceCommunication),
271 (qa_rapi.Enabled, qa_rapi.TestVersion),
272 (qa_rapi.Enabled, qa_rapi.TestEmptyCluster),
273 (qa_rapi.Enabled, qa_rapi.TestRapiQuery),
274 ]:
275 RunTestIf(test, fn)
276
277
278 def RunRepairDiskSizes():
279 """Run the repair disk-sizes test.
280
281 """
282 RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
283
284
285 def RunOsTests():
286 """Runs all tests related to gnt-os.
287
288 """
289 os_enabled = ["os", qa_config.NoVirtualCluster]
290
291 if qa_config.TestEnabled(qa_rapi.Enabled):
292 rapi_getos = qa_rapi.GetOperatingSystems
293 else:
294 rapi_getos = None
295
296 for fn in [
297 qa_os.TestOsList,
298 qa_os.TestOsDiagnose,
299 ]:
300 RunTestIf(os_enabled, fn)
301
302 for fn in [
303 qa_os.TestOsValid,
304 qa_os.TestOsInvalid,
305 qa_os.TestOsPartiallyValid,
306 ]:
307 RunTestIf(os_enabled, fn, rapi_getos)
308
309 for fn in [
310 qa_os.TestOsModifyValid,
311 qa_os.TestOsModifyInvalid,
312 qa_os.TestOsStatesNonExisting,
313 ]:
314 RunTestIf(os_enabled, fn)
315
316
317 def RunCommonInstanceTests(instance, inst_nodes):
318 """Runs a few tests that are common to all disk types.
319
320 """
321 RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
322 RunTestIf(["instance-shutdown", "instance-console", qa_rapi.Enabled],
323 qa_rapi.TestRapiStoppedInstanceConsole, instance)
324 RunTestIf(["instance-shutdown", "instance-modify"],
325 qa_instance.TestInstanceStoppedModify, instance)
326 RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
327
328 # Test shutdown/start via RAPI
329 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
330 qa_rapi.TestRapiInstanceShutdown, instance)
331 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
332 qa_rapi.TestRapiInstanceStartup, instance)
333
334 RunTestIf("instance-list", qa_instance.TestInstanceList)
335
336 RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
337
338 RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
339 RunTestIf(["instance-modify", qa_rapi.Enabled],
340 qa_rapi.TestRapiInstanceModify, instance)
341
342 RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
343 RunTestIf(["instance-console", qa_rapi.Enabled],
344 qa_rapi.TestRapiInstanceConsole, instance)
345
346 RunTestIf("instance-device-names", qa_instance.TestInstanceDeviceNames,
347 instance)
348 DOWN_TESTS = qa_config.Either([
349 "instance-reinstall",
350 "instance-rename",
351 "instance-grow-disk",
352 ])
353
354 # shutdown instance for any 'down' tests
355 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
356
357 # now run the 'down' state tests
358 RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
359 RunTestIf(["instance-reinstall", qa_rapi.Enabled],
360 qa_rapi.TestRapiInstanceReinstall, instance)
361
362 if qa_config.TestEnabled("instance-rename"):
363 tgt_instance = qa_config.AcquireInstance()
364 try:
365 rename_source = instance.name
366 rename_target = tgt_instance.name
367 # perform instance rename to the same name
368 RunTest(qa_instance.TestInstanceRenameAndBack,
369 rename_source, rename_source)
370 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
371 rename_source, rename_source)
372 if rename_target is not None:
373 # perform instance rename to a different name, if we have one configured
374 RunTest(qa_instance.TestInstanceRenameAndBack,
375 rename_source, rename_target)
376 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
377 rename_source, rename_target)
378 finally:
379 tgt_instance.Release()
380
381 RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
382
383 # and now start the instance again
384 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
385
386 RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
387
388 RunTestIf("tags", qa_tags.TestInstanceTags, instance)
389
390 if instance.disk_template == constants.DT_DRBD8:
391 RunTestIf("cluster-verify",
392 qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes)
393 RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
394
395 RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance)
396
397 # Lists instances, too
398 RunTestIf("node-list", qa_node.TestNodeList)
399
400 # Some jobs have been run, let's test listing them
401 RunTestIf("job-list", qa_job.TestJobList)
402
403
404 def RunCommonNodeTests():
405 """Run a few common node tests.
406
407 """
408 RunTestIf("node-volumes", qa_node.TestNodeVolumes)
409 RunTestIf("node-storage", qa_node.TestNodeStorage)
410 RunTestIf(["node-oob", qa_config.NoVirtualCluster], qa_node.TestOutOfBand)
411
412
413 def RunGroupListTests():
414 """Run tests for listing node groups.
415
416 """
417 RunTestIf("group-list", qa_group.TestGroupList)
418 RunTestIf("group-list", qa_group.TestGroupListFields)
419
420
421 def RunNetworkTests():
422 """Run tests for network management.
423
424 """
425 RunTestIf("network", qa_network.TestNetworkAddRemove)
426 RunTestIf("network", qa_network.TestNetworkConnect)
427 RunTestIf(["network", "tags"], qa_network.TestNetworkTags)
428
429
430 def RunFilterTests():
431 """Run tests for job filter management.
432
433 """
434 RunTestIf("filters", qa_filters.TestFilterList)
435 RunTestIf("filters", qa_filters.TestFilterListFields)
436 RunTestIf("filters", qa_filters.TestFilterAddRemove)
437 RunTestIf("filters", qa_filters.TestFilterReject)
438 RunTestIf("filters", qa_filters.TestFilterOpCode)
439 RunTestIf("filters", qa_filters.TestFilterReasonChain)
440 RunTestIf("filters", qa_filters.TestFilterContinue)
441 RunTestIf("filters", qa_filters.TestFilterAcceptPause)
442 RunTestIf("filters", qa_filters.TestFilterWatermark)
443 RunTestIf("filters", qa_filters.TestFilterRateLimit)
444 RunTestIf("filters", qa_filters.TestAdHocReasonRateLimit)
445
446
447 def RunGroupRwTests():
448 """Run tests for adding/removing/renaming groups.
449
450 """
451 RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
452 RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
453 RunTestIf("group-rwops", qa_group.TestGroupModify)
454 RunTestIf(["group-rwops", qa_rapi.Enabled], qa_rapi.TestRapiNodeGroups)
455 RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
456 qa_group.GetDefaultGroup())
457
458
459 def RunExportImportTests(instance, inodes):
460 """Tries to export and import the instance.
461
462 @type inodes: list of nodes
463 @param inodes: current nodes of the instance
464
465 """
466 # FIXME: export explicitly bails out on file based storage. other non-lvm
467 # based storage types are untested, though. Also note that import could still
468 # work, but is deeply embedded into the "export" case.
469 if qa_config.TestEnabled("instance-export"):
470 RunTest(qa_instance.TestInstanceExportNoTarget, instance)
471
472 pnode = inodes[0]
473 expnode = qa_config.AcquireNode(exclude=pnode)
474 try:
475 name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
476
477 RunTest(qa_instance.TestBackupList, expnode)
478
479 if qa_config.TestEnabled("instance-import"):
480 newinst = qa_config.AcquireInstance()
481 try:
482 RunTest(qa_instance.TestInstanceImport, newinst, pnode,
483 expnode, name)
484 # Check if starting the instance works
485 RunTest(qa_instance.TestInstanceStartup, newinst)
486 RunTest(qa_instance.TestInstanceRemove, newinst)
487 finally:
488 newinst.Release()
489 finally:
490 expnode.Release()
491
492 # FIXME: inter-cluster-instance-move crashes on file based instances :/
493 # See Issue 414.
494 if (qa_config.TestEnabled([qa_rapi.Enabled, "inter-cluster-instance-move"])):
495 newinst = qa_config.AcquireInstance()
496 try:
497 tnode = qa_config.AcquireNode(exclude=inodes)
498 try:
499 RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
500 inodes, tnode)
501 finally:
502 tnode.Release()
503 finally:
504 newinst.Release()
505
506
507 def RunDaemonTests(instance):
508 """Test the ganeti-watcher script.
509
510 """
511 RunTest(qa_daemon.TestPauseWatcher)
512
513 RunTestIf("instance-automatic-restart",
514 qa_daemon.TestInstanceAutomaticRestart, instance)
515 RunTestIf("instance-consecutive-failures",
516 qa_daemon.TestInstanceConsecutiveFailures, instance)
517
518 RunTest(qa_daemon.TestResumeWatcher)
519
520
521 def RunHardwareFailureTests(instance, inodes):
522 """Test cluster internal hardware failure recovery.
523
524 """
525 RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
526 RunTestIf(["instance-failover", qa_rapi.Enabled],
527 qa_rapi.TestRapiInstanceFailover, instance)
528
529 RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
530 RunTestIf(["instance-migrate", qa_rapi.Enabled],
531 qa_rapi.TestRapiInstanceMigrate, instance)
532
533 if qa_config.TestEnabled("instance-replace-disks"):
534 # We just need alternative secondary nodes, hence "- 1"
535 othernodes = qa_config.AcquireManyNodes(len(inodes) - 1, exclude=inodes)
536 try:
537 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceReplaceDisks, instance)
538 RunTest(qa_instance.TestReplaceDisks,
539 instance, inodes, othernodes)
540 finally:
541 qa_config.ReleaseManyNodes(othernodes)
542 del othernodes
543
544 if qa_config.TestEnabled("instance-recreate-disks"):
545 try:
546 acquirednodes = qa_config.AcquireManyNodes(len(inodes), exclude=inodes)
547 othernodes = acquirednodes
548 except qa_error.OutOfNodesError:
549 if len(inodes) > 1:
550 # If the cluster is not big enough, let's reuse some of the nodes, but
551 # with different roles. In this way, we can test a DRBD instance even on
552 # a 3-node cluster.
553 acquirednodes = [qa_config.AcquireNode(exclude=inodes)]
554 othernodes = acquirednodes + inodes[:-1]
555 else:
556 raise
557 try:
558 RunTest(qa_instance.TestRecreateDisks,
559 instance, inodes, othernodes)
560 finally:
561 qa_config.ReleaseManyNodes(acquirednodes)
562
563 if len(inodes) >= 2:
564 RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, inodes[0], inodes[1])
565 RunTestIf("node-failover", qa_node.TestNodeFailover, inodes[0], inodes[1])
566 RunTestIf("node-migrate", qa_node.TestNodeMigrate, inodes[0], inodes[1])
567
568
569 def RunExclusiveStorageTests():
570 """Test exclusive storage."""
571 if not qa_config.TestEnabled("cluster-exclusive-storage"):
572 return
573
574 node = qa_config.AcquireNode()
575 try:
576 old_es = qa_cluster.TestSetExclStorCluster(False)
577 qa_node.TestExclStorSingleNode(node)
578
579 qa_cluster.TestSetExclStorCluster(True)
580 qa_cluster.TestExclStorSharedPv(node)
581
582 if qa_config.TestEnabled("instance-add-plain-disk"):
583 # Make sure that the cluster doesn't have any pre-existing problem
584 qa_cluster.AssertClusterVerify()
585
586 # Create and allocate instances
587 instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
588 try:
589 instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
590 try:
591 # cluster-verify checks that disks are allocated correctly
592 qa_cluster.AssertClusterVerify()
593
594 # Remove instances
595 qa_instance.TestInstanceRemove(instance2)
596 qa_instance.TestInstanceRemove(instance1)
597 finally:
598 instance2.Release()
599 finally:
600 instance1.Release()
601
602 if qa_config.TestEnabled("instance-add-drbd-disk"):
603 snode = qa_config.AcquireNode()
604 try:
605 qa_cluster.TestSetExclStorCluster(False)
606 instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
607 try:
608 qa_cluster.TestSetExclStorCluster(True)
609 exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
610 qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
611 qa_instance.TestInstanceRemove(instance)
612 finally:
613 instance.Release()
614 finally:
615 snode.Release()
616 qa_cluster.TestSetExclStorCluster(old_es)
617 finally:
618 node.Release()
619
620
621 def RunCustomSshPortTests():
622 """Test accessing nodes with custom SSH ports.
623
624 This requires removing nodes, adding them to a new group, and then undoing
625 the change.
626 """
627 if not qa_config.TestEnabled("group-custom-ssh-port"):
628 return
629
630 std_port = netutils.GetDaemonPort(constants.SSH)
631 port = 211
632 master = qa_config.GetMasterNode()
633 with qa_config.AcquireManyNodesCtx(1, exclude=master) as nodes:
634 # Checks if the node(s) could be contacted through IPv6.
635 # If yes, better skip the whole test.
636
637 for node in nodes:
638 if qa_utils.UsesIPv6Connection(node.primary, std_port):
639 print ("Node %s is likely to be reached using IPv6,"
640 "skipping the test" % (node.primary, ))
641 return
642
643 for node in nodes:
644 qa_node.NodeRemove(node)
645 with qa_iptables.RulesContext() as r:
646 with qa_group.NewGroupCtx() as group:
647 qa_group.ModifyGroupSshPort(r, group, nodes, port)
648
649 for node in nodes:
650 qa_node.NodeAdd(node, group=group)
651
652 # Make sure that the cluster doesn't have any pre-existing problem
653 qa_cluster.AssertClusterVerify()
654
655 # Create and allocate instances
656 instance1 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
657 try:
658 instance2 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
659 try:
660 # cluster-verify checks that disks are allocated correctly
661 qa_cluster.AssertClusterVerify()
662
663 # Remove instances
664 qa_instance.TestInstanceRemove(instance2)
665 qa_instance.TestInstanceRemove(instance1)
666 finally:
667 instance2.Release()
668 finally:
669 instance1.Release()
670
671 for node in nodes:
672 qa_node.NodeRemove(node)
673
674 for node in nodes:
675 qa_node.NodeAdd(node)
676
677 qa_cluster.AssertClusterVerify()
678
679
680 def _BuildSpecDict(par, mn, st, mx):
681 return {
682 constants.ISPECS_MINMAX: [{
683 constants.ISPECS_MIN: {par: mn},
684 constants.ISPECS_MAX: {par: mx},
685 }],
686 constants.ISPECS_STD: {par: st},
687 }
688
689
690 def _BuildDoubleSpecDict(index, par, mn, st, mx):
691 new_spec = {
692 constants.ISPECS_MINMAX: [{}, {}],
693 }
694 if st is not None:
695 new_spec[constants.ISPECS_STD] = {par: st}
696 new_spec[constants.ISPECS_MINMAX][index] = {
697 constants.ISPECS_MIN: {par: mn},
698 constants.ISPECS_MAX: {par: mx},
699 }
700 return new_spec
701
702
703 def TestIPolicyPlainInstance():
704 """Test instance policy interaction with instances"""
705 params = ["memory-size", "cpu-count", "disk-count", "disk-size", "nic-count"]
706 if not qa_config.IsTemplateSupported(constants.DT_PLAIN):
707 print "Template %s not supported" % constants.DT_PLAIN
708 return
709
710 # This test assumes that the group policy is empty
711 (_, old_specs) = qa_cluster.TestClusterSetISpecs()
712 # We also assume to have only one min/max bound
713 assert len(old_specs[constants.ISPECS_MINMAX]) == 1
714 node = qa_config.AcquireNode()
715 try:
716 # Log of policy changes, list of tuples:
717 # (full_change, incremental_change, policy_violated)
718 history = []
719 instance = qa_instance.TestInstanceAddWithPlainDisk([node])
720 try:
721 policyerror = [constants.CV_EINSTANCEPOLICY]
722 for par in params:
723 (iminval, imaxval) = qa_instance.GetInstanceSpec(instance.name, par)
724 # Some specs must be multiple of 4
725 new_spec = _BuildSpecDict(par, imaxval + 4, imaxval + 4, imaxval + 4)
726 history.append((None, new_spec, True))
727 if iminval > 0:
728 # Some specs must be multiple of 4
729 if iminval >= 4:
730 upper = iminval - 4
731 else:
732 upper = iminval - 1
733 new_spec = _BuildSpecDict(par, 0, upper, upper)
734 history.append((None, new_spec, True))
735 history.append((old_specs, None, False))
736
737 # Test with two instance specs
738 double_specs = copy.deepcopy(old_specs)
739 double_specs[constants.ISPECS_MINMAX] = \
740 double_specs[constants.ISPECS_MINMAX] * 2
741 (par1, par2) = params[0:2]
742 (_, imaxval1) = qa_instance.GetInstanceSpec(instance.name, par1)
743 (_, imaxval2) = qa_instance.GetInstanceSpec(instance.name, par2)
744 old_minmax = old_specs[constants.ISPECS_MINMAX][0]
745 history.extend([
746 (double_specs, None, False),
747 # The first min/max limit is being violated
748 (None,
749 _BuildDoubleSpecDict(0, par1, imaxval1 + 4, imaxval1 + 4,
750 imaxval1 + 4),
751 False),
752 # Both min/max limits are being violated
753 (None,
754 _BuildDoubleSpecDict(1, par2, imaxval2 + 4, None, imaxval2 + 4),
755 True),
756 # The second min/max limit is being violated
757 (None,
758 _BuildDoubleSpecDict(0, par1,
759 old_minmax[constants.ISPECS_MIN][par1],
760 old_specs[constants.ISPECS_STD][par1],
761 old_minmax[constants.ISPECS_MAX][par1]),
762 False),
763 (old_specs, None, False),
764 ])
765
766 # Apply the changes, and check policy violations after each change
767 qa_cluster.AssertClusterVerify()
768 for (new_specs, diff_specs, failed) in history:
769 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
770 diff_specs=diff_specs)
771 if failed:
772 qa_cluster.AssertClusterVerify(warnings=policyerror)
773 else:
774 qa_cluster.AssertClusterVerify()
775
776 qa_instance.TestInstanceRemove(instance)
777 finally:
778 instance.Release()
779
780 # Now we replay the same policy changes, and we expect that the instance
781 # cannot be created for the cases where we had a policy violation above
782 for (new_specs, diff_specs, failed) in history:
783 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
784 diff_specs=diff_specs)
785 if failed:
786 qa_instance.TestInstanceAddWithPlainDisk([node], fail=True)
787 # Instance creation with no policy violation has been tested already
788 finally:
789 node.Release()
790
791
792 def IsExclusiveStorageInstanceTestEnabled():
793 test_name = "exclusive-storage-instance-tests"
794 if qa_config.TestEnabled(test_name):
795 vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
796 vgscmd = utils.ShellQuoteArgs([
797 "vgs", "--noheadings", "-o", "pv_count", vgname,
798 ])
799 nodes = qa_config.GetConfig()["nodes"]
800 for node in nodes:
801 try:
802 pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd))
803 except Exception, e:
804 msg = ("Cannot get the number of PVs on %s, needed by '%s': %s" %
805 (node.primary, test_name, e))
806 raise qa_error.Error(msg)
807 if pvnum < 2:
808 raise qa_error.Error("Node %s has not enough PVs (%s) to run '%s'" %
809 (node.primary, pvnum, test_name))
810 res = True
811 else:
812 res = False
813 return res
814
815
816 def RunInstanceTests():
817 """Create and exercise instances."""
818
819 requested_conversions = qa_config.get("convert-disk-templates", [])
820 supported_conversions = \
821 set(requested_conversions).difference(constants.DTS_NOT_CONVERTIBLE_TO)
822 for (test_name, templ, create_fun, num_nodes) in \
823 qa_instance.available_instance_tests:
824 if (qa_config.TestEnabled(test_name) and
825 qa_config.IsTemplateSupported(templ)):
826 inodes = qa_config.AcquireManyNodes(num_nodes)
827 try:
828 instance = RunTest(create_fun, inodes)
829 try:
830 RunTestIf("instance-user-down", qa_instance.TestInstanceUserDown,
831 instance)
832 RunTestIf("instance-communication",
833 qa_instance.TestInstanceCommunication,
834 instance,
835 qa_config.GetMasterNode())
836 RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
837 RunDaemonTests(instance)
838 for node in inodes:
839 RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node,
840 templ == constants.DT_DRBD8)
841 if len(inodes) > 1:
842 RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
843 constants.INITIAL_NODE_GROUP_NAME,
844 inodes[0].primary, inodes[1].primary)
845 # This test will run once but it will cover all the supported
846 # user-provided disk template conversions
847 if qa_config.TestEnabled("instance-convert-disk"):
848 if (len(supported_conversions) > 1 and
849 instance.disk_template in supported_conversions):
850 RunTest(qa_instance.TestInstanceShutdown, instance)
851 RunTest(qa_instance.TestInstanceConvertDiskTemplate, instance,
852 supported_conversions)
853 RunTest(qa_instance.TestInstanceStartup, instance)
854 # At this point we clear the set because the requested conversions
855 # has been tested
856 supported_conversions.clear()
857 else:
858 test_desc = "Converting instance of template %s" % templ
859 ReportTestSkip(test_desc, "conversion feature")
860 RunTestIf("instance-modify-disks",
861 qa_instance.TestInstanceModifyDisks, instance)
862 RunCommonInstanceTests(instance, inodes)
863 if qa_config.TestEnabled("instance-modify-primary"):
864 othernode = qa_config.AcquireNode()
865 RunTest(qa_instance.TestInstanceModifyPrimaryAndBack,
866 instance, inodes[0], othernode)
867 othernode.Release()
868 RunGroupListTests()
869 RunExportImportTests(instance, inodes)
870 RunHardwareFailureTests(instance, inodes)
871 RunRepairDiskSizes()
872 RunTestIf(["rapi", "instance-data-censorship"],
873 qa_rapi.TestInstanceDataCensorship, instance, inodes)
874 RunTest(qa_instance.TestInstanceRemove, instance)
875 finally:
876 instance.Release()
877 del instance
878 finally:
879 qa_config.ReleaseManyNodes(inodes)
880 qa_cluster.AssertClusterVerify()
881 else:
882 test_desc = "Creating instances of template %s" % templ
883 if not qa_config.TestEnabled(test_name):
884 ReportTestSkip(test_desc, test_name)
885 else:
886 ReportTestSkip(test_desc, "disk template %s" % templ)
887
888
889 def RunMonitoringTests():
890 RunTestIf("mon-collector", qa_monitoring.TestInstStatusCollector)
891
892
893 PARALLEL_TEST_DICT = {
894 "parallel-failover": qa_performance.TestParallelInstanceFailover,
895 "parallel-migration": qa_performance.TestParallelInstanceMigration,
896 "parallel-replace-disks": qa_performance.TestParallelInstanceReplaceDisks,
897 "parallel-reboot": qa_performance.TestParallelInstanceReboot,
898 "parallel-reinstall": qa_performance.TestParallelInstanceReinstall,
899 "parallel-rename": qa_performance.TestParallelInstanceRename,
900 }
901
902
903 def RunPerformanceTests():
904 if not qa_config.TestEnabled("performance"):
905 ReportTestSkip("performance related tests", "performance")
906 return
907
908 # For reproducable performance, run performance tests with the watcher
909 # paused.
910 qa_utils.AssertCommand(["gnt-cluster", "watcher", "pause", "4h"])
911
912 if qa_config.TestEnabled("jobqueue-performance"):
913 RunTest(qa_performance.TestParallelMaxInstanceCreationPerformance)
914 RunTest(qa_performance.TestParallelNodeCountInstanceCreationPerformance)
915
916 instances = qa_performance.CreateAllInstances()
917
918 RunTest(qa_performance.TestParallelModify, instances)
919 RunTest(qa_performance.TestParallelInstanceOSOperations, instances)
920 RunTest(qa_performance.TestParallelInstanceQueries, instances)
921
922 qa_performance.RemoveAllInstances(instances)
923
924 RunTest(qa_performance.TestJobQueueSubmissionPerformance)
925
926 if qa_config.TestEnabled("parallel-performance"):
927 if qa_config.IsTemplateSupported(constants.DT_DRBD8):
928 RunTest(qa_performance.TestParallelDRBDInstanceCreationPerformance)
929 if qa_config.IsTemplateSupported(constants.DT_PLAIN):
930 RunTest(qa_performance.TestParallelPlainInstanceCreationPerformance)
931
932 # Preparations need to be made only if some of these tests are enabled
933 if qa_config.IsTemplateSupported(constants.DT_DRBD8) and \
934 qa_config.TestEnabled(qa_config.Either(PARALLEL_TEST_DICT.keys())):
935 inodes = qa_config.AcquireManyNodes(2)
936 try:
937 instance = qa_instance.TestInstanceAddWithDrbdDisk(inodes)
938 try:
939 for (test_name, test_fn) in PARALLEL_TEST_DICT.items():
940 RunTestIf(test_name, test_fn, instance)
941 finally:
942 instance.Release()
943 qa_instance.TestInstanceRemove(instance)
944 finally:
945 qa_config.ReleaseManyNodes(inodes)
946
947 qa_utils.AssertCommand(["gnt-cluster", "watcher", "continue"])
948
949
950 def RunQa():
951 """Main QA body.
952
953 """
954 RunTestBlock(RunEnvTests)
955 SetupCluster()
956
957 RunTestBlock(RunClusterTests)
958 RunTestBlock(RunOsTests)
959
960 RunTestIf("tags", qa_tags.TestClusterTags)
961
962 RunTestBlock(RunCommonNodeTests)
963 RunTestBlock(RunGroupListTests)
964 RunTestBlock(RunGroupRwTests)
965 RunTestBlock(RunNetworkTests)
966 RunTestBlock(RunFilterTests)
967
968 # The master shouldn't be readded or put offline; "delay" needs a non-master
969 # node to test
970 pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
971 try:
972 RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
973 RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
974 RunTestIf("delay", qa_cluster.TestDelay, pnode)
975 finally:
976 pnode.Release()
977
978 # Make sure the cluster is clean before running instance tests
979 qa_cluster.AssertClusterVerify()
980
981 pnode = qa_config.AcquireNode()
982 try:
983 RunTestIf("tags", qa_tags.TestNodeTags, pnode)
984
985 if qa_rapi.Enabled():
986 RunTest(qa_rapi.TestNode, pnode)
987
988 if (qa_config.TestEnabled("instance-add-plain-disk")
989 and qa_config.IsTemplateSupported(constants.DT_PLAIN)):
990 # Normal instance allocation via RAPI
991 for use_client in [True, False]:
992 rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
993 use_client)
994 try:
995 if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
996 RunCommonInstanceTests(rapi_instance, [pnode])
997 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
998 finally:
999 rapi_instance.Release()
1000 del rapi_instance
1001
1002 # Multi-instance allocation
1003 rapi_instance_one, rapi_instance_two = \
1004 RunTest(qa_rapi.TestRapiInstanceMultiAlloc, pnode)
1005
1006 try:
1007 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_one, True)
1008 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_two, True)
1009 finally:
1010 rapi_instance_one.Release()
1011 rapi_instance_two.Release()
1012 finally:
1013 pnode.Release()
1014
1015 config_list = [
1016 ("default-instance-tests", lambda: None, lambda _: None),
1017 (IsExclusiveStorageInstanceTestEnabled,
1018 lambda: qa_cluster.TestSetExclStorCluster(True),
1019 qa_cluster.TestSetExclStorCluster),
1020 ]
1021 for (conf_name, setup_conf_f, restore_conf_f) in config_list:
1022 if qa_config.TestEnabled(conf_name):
1023 oldconf = setup_conf_f()
1024 RunTestBlock(RunInstanceTests)
1025 restore_conf_f(oldconf)
1026
1027 pnode = qa_config.AcquireNode()
1028 try:
1029 if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
1030 for shutdown in [False, True]:
1031 instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
1032 try:
1033 expnode = qa_config.AcquireNode(exclude=pnode)
1034 try:
1035 if shutdown:
1036 # Stop instance before exporting and removing it
1037 RunTest(qa_instance.TestInstanceShutdown, instance)
1038 RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
1039 RunTest(qa_instance.TestBackupList, expnode)
1040 finally:
1041 expnode.Release()
1042 finally:
1043 instance.Release()
1044 del expnode
1045 del instance
1046 qa_cluster.AssertClusterVerify()
1047
1048 finally:
1049 pnode.Release()
1050
1051 if qa_rapi.Enabled():
1052 RunTestIf("filters", qa_rapi.TestFilters)
1053
1054 RunTestIf("cluster-upgrade", qa_cluster.TestUpgrade)
1055
1056 RunTestBlock(RunExclusiveStorageTests)
1057 RunTestIf(["cluster-instance-policy", "instance-add-plain-disk"],
1058 TestIPolicyPlainInstance)
1059
1060 RunTestBlock(RunCustomSshPortTests)
1061
1062 RunTestIf(
1063 "instance-add-restricted-by-disktemplates",
1064 qa_instance.TestInstanceCreationRestrictedByDiskTemplates)
1065
1066 # Test removing instance with offline drbd secondary
1067 if qa_config.TestEnabled(["instance-remove-drbd-offline",
1068 "instance-add-drbd-disk"]):
1069 # Make sure the master is not put offline
1070 snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
1071 try:
1072 pnode = qa_config.AcquireNode(exclude=snode)
1073 try:
1074 instance = qa_instance.TestInstanceAddWithDrbdDisk([pnode, snode])
1075 set_offline = lambda node: qa_node.MakeNodeOffline(node, "yes")
1076 set_online = lambda node: qa_node.MakeNodeOffline(node, "no")
1077 RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
1078 set_offline, set_online)
1079 finally:
1080 pnode.Release()
1081 finally:
1082 snode.Release()
1083 qa_cluster.AssertClusterVerify()
1084
1085 RunTestBlock(RunMonitoringTests)
1086
1087 RunPerformanceTests()
1088
1089 RunTestIf("cluster-destroy", qa_node.TestNodeRemoveAll)
1090
1091 RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
1092
1093
1094 @UsesRapiClient
1095 def main():
1096 """Main program.
1097
1098 """
1099 colors.check_for_colors()
1100
1101 parser = optparse.OptionParser(usage="%prog [options] <config-file>")
1102 parser.add_option("--yes-do-it", dest="yes_do_it",
1103 action="store_true",
1104 help="Really execute the tests")
1105 (opts, args) = parser.parse_args()
1106
1107 if len(args) == 1:
1108 (config_file, ) = args
1109 else:
1110 parser.error("Wrong number of arguments.")
1111
1112 if not opts.yes_do_it:
1113 print ("Executing this script irreversibly destroys any Ganeti\n"
1114 "configuration on all nodes involved. If you really want\n"
1115 "to start testing, supply the --yes-do-it option.")
1116 sys.exit(1)
1117
1118 qa_config.Load(config_file)
1119
1120 primary = qa_config.GetMasterNode().primary
1121 qa_utils.StartMultiplexer(primary)
1122 print ("SSH command for primary node: %s" %
1123 utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
1124 print ("SSH command for other nodes: %s" %
1125 utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
1126 try:
1127 RunQa()
1128 finally:
1129 qa_utils.CloseMultiplexers()
1130
1131 if __name__ == "__main__":
1132 main()