f5da39e2e5e7061efa3f414538f6aabc6aeb380d
[ganeti-github.git] / qa / ganeti-qa.py
1 #!/usr/bin/python -u
2 #
3
4 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Script for doing QA on Ganeti.
32
33 """
34
35 # pylint: disable=C0103
36 # due to invalid name
37
38 import copy
39 import datetime
40 import optparse
41 import sys
42
43 import colors
44 import qa_cluster
45 import qa_config
46 import qa_daemon
47 import qa_env
48 import qa_error
49 import qa_group
50 import qa_instance
51 import qa_iptables
52 import qa_monitoring
53 import qa_network
54 import qa_node
55 import qa_os
56 import qa_performance
57 import qa_job
58 import qa_rapi
59 import qa_tags
60 import qa_utils
61
62 from ganeti import utils
63 from ganeti import rapi # pylint: disable=W0611
64 from ganeti import constants
65 from ganeti import netutils
66
67 import ganeti.rapi.client # pylint: disable=W0611
68 from ganeti.rapi.client import UsesRapiClient
69
70
71 def _FormatHeader(line, end=72, mark="-", color=None):
72 """Fill a line up to the end column.
73
74 """
75 line = (mark * 4) + " " + line + " "
76 line += "-" * (end - len(line))
77 line = line.rstrip()
78 line = colors.colorize(line, color=color)
79 return line
80
81
82 def _DescriptionOf(fn):
83 """Computes the description of an item.
84
85 """
86 if fn.__doc__:
87 desc = fn.__doc__.splitlines()[0].strip()
88 desc = desc.rstrip(".")
89 if fn.__name__:
90 desc = "[" + fn.__name__ + "] " + desc
91 else:
92 desc = "%r" % fn
93
94 return desc
95
96
97 def RunTest(fn, *args, **kwargs):
98 """Runs a test after printing a header.
99
100 """
101
102 tstart = datetime.datetime.now()
103
104 desc = _DescriptionOf(fn)
105
106 print
107 print _FormatHeader("%s start %s" % (tstart, desc),
108 color=colors.YELLOW, mark="<")
109
110 try:
111 retval = fn(*args, **kwargs)
112 print _FormatHeader("PASSED %s" % (desc, ), color=colors.GREEN)
113 return retval
114 except Exception, e:
115 print _FormatHeader("FAILED %s: %s" % (desc, e), color=colors.RED)
116 raise
117 finally:
118 tstop = datetime.datetime.now()
119 tdelta = tstop - tstart
120 print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc),
121 color=colors.MAGENTA, mark=">")
122
123
124 def ReportTestSkip(desc, testnames):
125 """Reports that tests have been skipped.
126
127 @type desc: string
128 @param desc: string
129 @type testnames: string or list of string
130 @param testnames: either a single test name in the configuration
131 file, or a list of testnames (which will be AND-ed together)
132
133 """
134 tstart = datetime.datetime.now()
135 # TODO: Formatting test names when non-string names are involved
136 print _FormatHeader("%s skipping %s, test(s) %s disabled" %
137 (tstart, desc, testnames),
138 color=colors.BLUE, mark="*")
139
140
141 def RunTestIf(testnames, fn, *args, **kwargs):
142 """Runs a test conditionally.
143
144 @param testnames: either a single test name in the configuration
145 file, or a list of testnames (which will be AND-ed together)
146
147 """
148 if qa_config.TestEnabled(testnames):
149 RunTest(fn, *args, **kwargs)
150 else:
151 desc = _DescriptionOf(fn)
152 ReportTestSkip(desc, testnames)
153
154
155 def RunTestBlock(fn, *args, **kwargs):
156 """Runs a block of tests after printing a header.
157
158 """
159 tstart = datetime.datetime.now()
160
161 desc = _DescriptionOf(fn)
162
163 print
164 print _FormatHeader("BLOCK %s start %s" % (tstart, desc),
165 color=[colors.YELLOW, colors.BOLD], mark="v")
166
167 try:
168 return fn(*args, **kwargs)
169 except Exception, e:
170 print _FormatHeader("BLOCK FAILED %s: %s" % (desc, e),
171 color=[colors.RED, colors.BOLD])
172 raise
173 finally:
174 tstop = datetime.datetime.now()
175 tdelta = tstop - tstart
176 print _FormatHeader("BLOCK %s time=%s %s" % (tstop, tdelta, desc),
177 color=[colors.MAGENTA, colors.BOLD], mark="^")
178
179
180 def RunEnvTests():
181 """Run several environment tests.
182
183 """
184 RunTestIf("env", qa_env.TestSshConnection)
185 RunTestIf("env", qa_env.TestIcmpPing)
186 RunTestIf("env", qa_env.TestGanetiCommands)
187
188
189 def SetupCluster(rapi_user):
190 """Initializes the cluster.
191
192 @param rapi_user: Login user for RAPI
193 @return: Login secret for RAPI
194
195 """
196 rapi_secret = utils.GenerateSecret()
197 RunTestIf("create-cluster", qa_cluster.TestClusterInit,
198 rapi_user, rapi_secret)
199 if not qa_config.TestEnabled("create-cluster"):
200 # If the cluster is already in place, we assume that exclusive-storage is
201 # already set according to the configuration
202 qa_config.SetExclusiveStorage(qa_config.get("exclusive-storage", False))
203 if qa_rapi.Enabled():
204 # To support RAPI on an existing cluster we have to find out the secret
205 rapi_secret = qa_rapi.LookupRapiSecret(rapi_user)
206
207 qa_group.ConfigureGroups()
208
209 # Test on empty cluster
210 RunTestIf("node-list", qa_node.TestNodeList)
211 RunTestIf("instance-list", qa_instance.TestInstanceList)
212 RunTestIf("job-list", qa_job.TestJobList)
213
214 RunTestIf("create-cluster", qa_node.TestNodeAddAll)
215 if not qa_config.TestEnabled("create-cluster"):
216 # consider the nodes are already there
217 qa_node.MarkNodeAddedAll()
218
219 RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
220 RunTestIf("test-jobqueue", qa_job.TestJobCancellation)
221
222 # enable the watcher (unconditionally)
223 RunTest(qa_daemon.TestResumeWatcher)
224
225 RunTestIf("node-list", qa_node.TestNodeList)
226
227 # Test listing fields
228 RunTestIf("node-list", qa_node.TestNodeListFields)
229 RunTestIf("instance-list", qa_instance.TestInstanceListFields)
230 RunTestIf("job-list", qa_job.TestJobListFields)
231 RunTestIf("instance-export", qa_instance.TestBackupListFields)
232
233 RunTestIf("node-info", qa_node.TestNodeInfo)
234
235 return rapi_secret
236
237
238 def RunClusterTests():
239 """Runs tests related to gnt-cluster.
240
241 """
242 for test, fn in [
243 ("create-cluster", qa_cluster.TestClusterInitDisk),
244 ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto),
245 ("cluster-verify", qa_cluster.TestClusterVerify),
246 ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
247 # TODO: add more cluster modify tests
248 ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
249 ("cluster-modify", qa_cluster.TestClusterModifyIPolicy),
250 ("cluster-modify", qa_cluster.TestClusterModifyISpecs),
251 ("cluster-modify", qa_cluster.TestClusterModifyBe),
252 ("cluster-modify", qa_cluster.TestClusterModifyDisk),
253 ("cluster-modify", qa_cluster.TestClusterModifyDiskTemplates),
254 ("cluster-modify", qa_cluster.TestClusterModifyFileStorageDir),
255 ("cluster-modify", qa_cluster.TestClusterModifySharedFileStorageDir),
256 ("cluster-modify", qa_cluster.TestClusterModifyInstallImage),
257 ("cluster-modify", qa_cluster.TestClusterModifyUserShutdown),
258 ("cluster-rename", qa_cluster.TestClusterRename),
259 ("cluster-info", qa_cluster.TestClusterVersion),
260 ("cluster-info", qa_cluster.TestClusterInfo),
261 ("cluster-info", qa_cluster.TestClusterGetmaster),
262 ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
263 (["cluster-copyfile", qa_config.NoVirtualCluster],
264 qa_cluster.TestClusterCopyfile),
265 ("cluster-command", qa_cluster.TestClusterCommand),
266 ("cluster-burnin", qa_cluster.TestClusterBurnin),
267 ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
268 ("cluster-master-failover",
269 qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
270 (["cluster-oob", qa_config.NoVirtualCluster],
271 qa_cluster.TestClusterOob),
272 ("cluster-instance-communication", qa_cluster.TestInstanceCommunication),
273 (qa_rapi.Enabled, qa_rapi.TestVersion),
274 (qa_rapi.Enabled, qa_rapi.TestEmptyCluster),
275 (qa_rapi.Enabled, qa_rapi.TestRapiQuery),
276 ]:
277 RunTestIf(test, fn)
278
279
280 def RunRepairDiskSizes():
281 """Run the repair disk-sizes test.
282
283 """
284 RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
285
286
287 def RunOsTests():
288 """Runs all tests related to gnt-os.
289
290 """
291 os_enabled = ["os", qa_config.NoVirtualCluster]
292
293 if qa_config.TestEnabled(qa_rapi.Enabled):
294 rapi_getos = qa_rapi.GetOperatingSystems
295 else:
296 rapi_getos = None
297
298 for fn in [
299 qa_os.TestOsList,
300 qa_os.TestOsDiagnose,
301 ]:
302 RunTestIf(os_enabled, fn)
303
304 for fn in [
305 qa_os.TestOsValid,
306 qa_os.TestOsInvalid,
307 qa_os.TestOsPartiallyValid,
308 ]:
309 RunTestIf(os_enabled, fn, rapi_getos)
310
311 for fn in [
312 qa_os.TestOsModifyValid,
313 qa_os.TestOsModifyInvalid,
314 qa_os.TestOsStatesNonExisting,
315 ]:
316 RunTestIf(os_enabled, fn)
317
318
319 def RunCommonInstanceTests(instance, inst_nodes):
320 """Runs a few tests that are common to all disk types.
321
322 """
323 RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
324 RunTestIf(["instance-shutdown", "instance-console", qa_rapi.Enabled],
325 qa_rapi.TestRapiStoppedInstanceConsole, instance)
326 RunTestIf(["instance-shutdown", "instance-modify"],
327 qa_instance.TestInstanceStoppedModify, instance)
328 RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
329
330 # Test shutdown/start via RAPI
331 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
332 qa_rapi.TestRapiInstanceShutdown, instance)
333 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
334 qa_rapi.TestRapiInstanceStartup, instance)
335
336 RunTestIf("instance-list", qa_instance.TestInstanceList)
337
338 RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
339
340 RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
341 RunTestIf(["instance-modify", qa_rapi.Enabled],
342 qa_rapi.TestRapiInstanceModify, instance)
343
344 RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
345 RunTestIf(["instance-console", qa_rapi.Enabled],
346 qa_rapi.TestRapiInstanceConsole, instance)
347
348 RunTestIf("instance-device-names", qa_instance.TestInstanceDeviceNames,
349 instance)
350 DOWN_TESTS = qa_config.Either([
351 "instance-reinstall",
352 "instance-rename",
353 "instance-grow-disk",
354 ])
355
356 # shutdown instance for any 'down' tests
357 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
358
359 # now run the 'down' state tests
360 RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
361 RunTestIf(["instance-reinstall", qa_rapi.Enabled],
362 qa_rapi.TestRapiInstanceReinstall, instance)
363
364 if qa_config.TestEnabled("instance-rename"):
365 tgt_instance = qa_config.AcquireInstance()
366 try:
367 rename_source = instance.name
368 rename_target = tgt_instance.name
369 # perform instance rename to the same name
370 RunTest(qa_instance.TestInstanceRenameAndBack,
371 rename_source, rename_source)
372 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
373 rename_source, rename_source)
374 if rename_target is not None:
375 # perform instance rename to a different name, if we have one configured
376 RunTest(qa_instance.TestInstanceRenameAndBack,
377 rename_source, rename_target)
378 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
379 rename_source, rename_target)
380 finally:
381 tgt_instance.Release()
382
383 RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
384
385 # and now start the instance again
386 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
387
388 RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
389
390 RunTestIf("tags", qa_tags.TestInstanceTags, instance)
391
392 if instance.disk_template == constants.DT_DRBD8:
393 RunTestIf("cluster-verify",
394 qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes)
395 RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
396
397 RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance)
398
399 # Lists instances, too
400 RunTestIf("node-list", qa_node.TestNodeList)
401
402 # Some jobs have been run, let's test listing them
403 RunTestIf("job-list", qa_job.TestJobList)
404
405
406 def RunCommonNodeTests():
407 """Run a few common node tests.
408
409 """
410 RunTestIf("node-volumes", qa_node.TestNodeVolumes)
411 RunTestIf("node-storage", qa_node.TestNodeStorage)
412 RunTestIf(["node-oob", qa_config.NoVirtualCluster], qa_node.TestOutOfBand)
413
414
415 def RunGroupListTests():
416 """Run tests for listing node groups.
417
418 """
419 RunTestIf("group-list", qa_group.TestGroupList)
420 RunTestIf("group-list", qa_group.TestGroupListFields)
421
422
423 def RunNetworkTests():
424 """Run tests for network management.
425
426 """
427 RunTestIf("network", qa_network.TestNetworkAddRemove)
428 RunTestIf("network", qa_network.TestNetworkConnect)
429 RunTestIf(["network", "tags"], qa_network.TestNetworkTags)
430
431
432 def RunGroupRwTests():
433 """Run tests for adding/removing/renaming groups.
434
435 """
436 RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
437 RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
438 RunTestIf("group-rwops", qa_group.TestGroupModify)
439 RunTestIf(["group-rwops", qa_rapi.Enabled], qa_rapi.TestRapiNodeGroups)
440 RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
441 qa_group.GetDefaultGroup())
442
443
444 def RunExportImportTests(instance, inodes):
445 """Tries to export and import the instance.
446
447 @type inodes: list of nodes
448 @param inodes: current nodes of the instance
449
450 """
451 # FIXME: export explicitly bails out on file based storage. other non-lvm
452 # based storage types are untested, though. Also note that import could still
453 # work, but is deeply embedded into the "export" case.
454 if (qa_config.TestEnabled("instance-export") and
455 instance.disk_template not in constants.DTS_FILEBASED):
456 RunTest(qa_instance.TestInstanceExportNoTarget, instance)
457
458 pnode = inodes[0]
459 expnode = qa_config.AcquireNode(exclude=pnode)
460 try:
461 name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
462
463 RunTest(qa_instance.TestBackupList, expnode)
464
465 if qa_config.TestEnabled("instance-import"):
466 newinst = qa_config.AcquireInstance()
467 try:
468 RunTest(qa_instance.TestInstanceImport, newinst, pnode,
469 expnode, name)
470 # Check if starting the instance works
471 RunTest(qa_instance.TestInstanceStartup, newinst)
472 RunTest(qa_instance.TestInstanceRemove, newinst)
473 finally:
474 newinst.Release()
475 finally:
476 expnode.Release()
477
478 # FIXME: inter-cluster-instance-move crashes on file based instances :/
479 # See Issue 414.
480 if (qa_config.TestEnabled([qa_rapi.Enabled, "inter-cluster-instance-move"])
481 and (instance.disk_template not in constants.DTS_FILEBASED)):
482 newinst = qa_config.AcquireInstance()
483 try:
484 tnode = qa_config.AcquireNode(exclude=inodes)
485 try:
486 RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
487 inodes, tnode)
488 finally:
489 tnode.Release()
490 finally:
491 newinst.Release()
492
493
494 def RunDaemonTests(instance):
495 """Test the ganeti-watcher script.
496
497 """
498 RunTest(qa_daemon.TestPauseWatcher)
499
500 RunTestIf("instance-automatic-restart",
501 qa_daemon.TestInstanceAutomaticRestart, instance)
502 RunTestIf("instance-consecutive-failures",
503 qa_daemon.TestInstanceConsecutiveFailures, instance)
504
505 RunTest(qa_daemon.TestResumeWatcher)
506
507
508 def RunHardwareFailureTests(instance, inodes):
509 """Test cluster internal hardware failure recovery.
510
511 """
512 RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
513 RunTestIf(["instance-failover", qa_rapi.Enabled],
514 qa_rapi.TestRapiInstanceFailover, instance)
515
516 RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
517 RunTestIf(["instance-migrate", qa_rapi.Enabled],
518 qa_rapi.TestRapiInstanceMigrate, instance)
519
520 if qa_config.TestEnabled("instance-replace-disks"):
521 # We just need alternative secondary nodes, hence "- 1"
522 othernodes = qa_config.AcquireManyNodes(len(inodes) - 1, exclude=inodes)
523 try:
524 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceReplaceDisks, instance)
525 RunTest(qa_instance.TestReplaceDisks,
526 instance, inodes, othernodes)
527 finally:
528 qa_config.ReleaseManyNodes(othernodes)
529 del othernodes
530
531 if qa_config.TestEnabled("instance-recreate-disks"):
532 try:
533 acquirednodes = qa_config.AcquireManyNodes(len(inodes), exclude=inodes)
534 othernodes = acquirednodes
535 except qa_error.OutOfNodesError:
536 if len(inodes) > 1:
537 # If the cluster is not big enough, let's reuse some of the nodes, but
538 # with different roles. In this way, we can test a DRBD instance even on
539 # a 3-node cluster.
540 acquirednodes = [qa_config.AcquireNode(exclude=inodes)]
541 othernodes = acquirednodes + inodes[:-1]
542 else:
543 raise
544 try:
545 RunTest(qa_instance.TestRecreateDisks,
546 instance, inodes, othernodes)
547 finally:
548 qa_config.ReleaseManyNodes(acquirednodes)
549
550 if len(inodes) >= 2:
551 RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, inodes[0], inodes[1])
552 RunTestIf("node-failover", qa_node.TestNodeFailover, inodes[0], inodes[1])
553 RunTestIf("node-migrate", qa_node.TestNodeMigrate, inodes[0], inodes[1])
554
555
556 def RunExclusiveStorageTests():
557 """Test exclusive storage."""
558 if not qa_config.TestEnabled("cluster-exclusive-storage"):
559 return
560
561 node = qa_config.AcquireNode()
562 try:
563 old_es = qa_cluster.TestSetExclStorCluster(False)
564 qa_node.TestExclStorSingleNode(node)
565
566 qa_cluster.TestSetExclStorCluster(True)
567 qa_cluster.TestExclStorSharedPv(node)
568
569 if qa_config.TestEnabled("instance-add-plain-disk"):
570 # Make sure that the cluster doesn't have any pre-existing problem
571 qa_cluster.AssertClusterVerify()
572
573 # Create and allocate instances
574 instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
575 try:
576 instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
577 try:
578 # cluster-verify checks that disks are allocated correctly
579 qa_cluster.AssertClusterVerify()
580
581 # Remove instances
582 qa_instance.TestInstanceRemove(instance2)
583 qa_instance.TestInstanceRemove(instance1)
584 finally:
585 instance2.Release()
586 finally:
587 instance1.Release()
588
589 if qa_config.TestEnabled("instance-add-drbd-disk"):
590 snode = qa_config.AcquireNode()
591 try:
592 qa_cluster.TestSetExclStorCluster(False)
593 instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
594 try:
595 qa_cluster.TestSetExclStorCluster(True)
596 exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
597 qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
598 qa_instance.TestInstanceRemove(instance)
599 finally:
600 instance.Release()
601 finally:
602 snode.Release()
603 qa_cluster.TestSetExclStorCluster(old_es)
604 finally:
605 node.Release()
606
607
608 def RunCustomSshPortTests():
609 """Test accessing nodes with custom SSH ports.
610
611 This requires removing nodes, adding them to a new group, and then undoing
612 the change.
613 """
614 if not qa_config.TestEnabled("group-custom-ssh-port"):
615 return
616
617 std_port = netutils.GetDaemonPort(constants.SSH)
618 port = 211
619 master = qa_config.GetMasterNode()
620 with qa_config.AcquireManyNodesCtx(1, exclude=master) as nodes:
621 # Checks if the node(s) could be contacted through IPv6.
622 # If yes, better skip the whole test.
623
624 for node in nodes:
625 if qa_utils.UsesIPv6Connection(node.primary, std_port):
626 print ("Node %s is likely to be reached using IPv6,"
627 "skipping the test" % (node.primary, ))
628 return
629
630 for node in nodes:
631 qa_node.NodeRemove(node)
632 with qa_iptables.RulesContext(nodes) as r:
633 with qa_group.NewGroupCtx() as group:
634 qa_group.ModifyGroupSshPort(r, group, nodes, port)
635
636 for node in nodes:
637 qa_node.NodeAdd(node, group=group)
638
639 # Make sure that the cluster doesn't have any pre-existing problem
640 qa_cluster.AssertClusterVerify()
641
642 # Create and allocate instances
643 instance1 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
644 try:
645 instance2 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
646 try:
647 # cluster-verify checks that disks are allocated correctly
648 qa_cluster.AssertClusterVerify()
649
650 # Remove instances
651 qa_instance.TestInstanceRemove(instance2)
652 qa_instance.TestInstanceRemove(instance1)
653 finally:
654 instance2.Release()
655 finally:
656 instance1.Release()
657
658 for node in nodes:
659 qa_node.NodeRemove(node)
660
661 for node in nodes:
662 qa_node.NodeAdd(node)
663
664 qa_cluster.AssertClusterVerify()
665
666
667 def _BuildSpecDict(par, mn, st, mx):
668 return {
669 constants.ISPECS_MINMAX: [{
670 constants.ISPECS_MIN: {par: mn},
671 constants.ISPECS_MAX: {par: mx},
672 }],
673 constants.ISPECS_STD: {par: st},
674 }
675
676
677 def _BuildDoubleSpecDict(index, par, mn, st, mx):
678 new_spec = {
679 constants.ISPECS_MINMAX: [{}, {}],
680 }
681 if st is not None:
682 new_spec[constants.ISPECS_STD] = {par: st}
683 new_spec[constants.ISPECS_MINMAX][index] = {
684 constants.ISPECS_MIN: {par: mn},
685 constants.ISPECS_MAX: {par: mx},
686 }
687 return new_spec
688
689
690 def TestIPolicyPlainInstance():
691 """Test instance policy interaction with instances"""
692 params = ["memory-size", "cpu-count", "disk-count", "disk-size", "nic-count"]
693 if not qa_config.IsTemplateSupported(constants.DT_PLAIN):
694 print "Template %s not supported" % constants.DT_PLAIN
695 return
696
697 # This test assumes that the group policy is empty
698 (_, old_specs) = qa_cluster.TestClusterSetISpecs()
699 # We also assume to have only one min/max bound
700 assert len(old_specs[constants.ISPECS_MINMAX]) == 1
701 node = qa_config.AcquireNode()
702 try:
703 # Log of policy changes, list of tuples:
704 # (full_change, incremental_change, policy_violated)
705 history = []
706 instance = qa_instance.TestInstanceAddWithPlainDisk([node])
707 try:
708 policyerror = [constants.CV_EINSTANCEPOLICY]
709 for par in params:
710 (iminval, imaxval) = qa_instance.GetInstanceSpec(instance.name, par)
711 # Some specs must be multiple of 4
712 new_spec = _BuildSpecDict(par, imaxval + 4, imaxval + 4, imaxval + 4)
713 history.append((None, new_spec, True))
714 if iminval > 0:
715 # Some specs must be multiple of 4
716 if iminval >= 4:
717 upper = iminval - 4
718 else:
719 upper = iminval - 1
720 new_spec = _BuildSpecDict(par, 0, upper, upper)
721 history.append((None, new_spec, True))
722 history.append((old_specs, None, False))
723
724 # Test with two instance specs
725 double_specs = copy.deepcopy(old_specs)
726 double_specs[constants.ISPECS_MINMAX] = \
727 double_specs[constants.ISPECS_MINMAX] * 2
728 (par1, par2) = params[0:2]
729 (_, imaxval1) = qa_instance.GetInstanceSpec(instance.name, par1)
730 (_, imaxval2) = qa_instance.GetInstanceSpec(instance.name, par2)
731 old_minmax = old_specs[constants.ISPECS_MINMAX][0]
732 history.extend([
733 (double_specs, None, False),
734 # The first min/max limit is being violated
735 (None,
736 _BuildDoubleSpecDict(0, par1, imaxval1 + 4, imaxval1 + 4,
737 imaxval1 + 4),
738 False),
739 # Both min/max limits are being violated
740 (None,
741 _BuildDoubleSpecDict(1, par2, imaxval2 + 4, None, imaxval2 + 4),
742 True),
743 # The second min/max limit is being violated
744 (None,
745 _BuildDoubleSpecDict(0, par1,
746 old_minmax[constants.ISPECS_MIN][par1],
747 old_specs[constants.ISPECS_STD][par1],
748 old_minmax[constants.ISPECS_MAX][par1]),
749 False),
750 (old_specs, None, False),
751 ])
752
753 # Apply the changes, and check policy violations after each change
754 qa_cluster.AssertClusterVerify()
755 for (new_specs, diff_specs, failed) in history:
756 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
757 diff_specs=diff_specs)
758 if failed:
759 qa_cluster.AssertClusterVerify(warnings=policyerror)
760 else:
761 qa_cluster.AssertClusterVerify()
762
763 qa_instance.TestInstanceRemove(instance)
764 finally:
765 instance.Release()
766
767 # Now we replay the same policy changes, and we expect that the instance
768 # cannot be created for the cases where we had a policy violation above
769 for (new_specs, diff_specs, failed) in history:
770 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
771 diff_specs=diff_specs)
772 if failed:
773 qa_instance.TestInstanceAddWithPlainDisk([node], fail=True)
774 # Instance creation with no policy violation has been tested already
775 finally:
776 node.Release()
777
778
779 def IsExclusiveStorageInstanceTestEnabled():
780 test_name = "exclusive-storage-instance-tests"
781 if qa_config.TestEnabled(test_name):
782 vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
783 vgscmd = utils.ShellQuoteArgs([
784 "vgs", "--noheadings", "-o", "pv_count", vgname,
785 ])
786 nodes = qa_config.GetConfig()["nodes"]
787 for node in nodes:
788 try:
789 pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd))
790 except Exception, e:
791 msg = ("Cannot get the number of PVs on %s, needed by '%s': %s" %
792 (node.primary, test_name, e))
793 raise qa_error.Error(msg)
794 if pvnum < 2:
795 raise qa_error.Error("Node %s has not enough PVs (%s) to run '%s'" %
796 (node.primary, pvnum, test_name))
797 res = True
798 else:
799 res = False
800 return res
801
802
803 def RunInstanceTests():
804 """Create and exercise instances."""
805
806 for (test_name, templ, create_fun, num_nodes) in \
807 qa_instance.available_instance_tests:
808 if (qa_config.TestEnabled(test_name) and
809 qa_config.IsTemplateSupported(templ)):
810 inodes = qa_config.AcquireManyNodes(num_nodes)
811 try:
812 instance = RunTest(create_fun, inodes)
813 try:
814 RunTestIf("instance-user-down", qa_instance.TestInstanceUserDown,
815 instance)
816 RunTestIf("instance-communication",
817 qa_instance.TestInstanceCommunication,
818 instance,
819 qa_config.GetMasterNode())
820 RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
821 RunDaemonTests(instance)
822 for node in inodes:
823 RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node,
824 templ == constants.DT_DRBD8)
825 if len(inodes) > 1:
826 RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
827 constants.INITIAL_NODE_GROUP_NAME,
828 inodes[0].primary, inodes[1].primary)
829 if qa_config.TestEnabled("instance-convert-disk"):
830 RunTest(qa_instance.TestInstanceShutdown, instance)
831 RunTest(qa_instance.TestInstanceConvertDiskToPlain,
832 instance, inodes)
833 RunTest(qa_instance.TestInstanceStartup, instance)
834 RunTestIf("instance-modify-disks",
835 qa_instance.TestInstanceModifyDisks, instance)
836 RunCommonInstanceTests(instance, inodes)
837 if qa_config.TestEnabled("instance-modify-primary"):
838 othernode = qa_config.AcquireNode()
839 RunTest(qa_instance.TestInstanceModifyPrimaryAndBack,
840 instance, inodes[0], othernode)
841 othernode.Release()
842 RunGroupListTests()
843 RunExportImportTests(instance, inodes)
844 RunHardwareFailureTests(instance, inodes)
845 RunRepairDiskSizes()
846 RunTest(qa_instance.TestInstanceRemove, instance)
847 finally:
848 instance.Release()
849 del instance
850 finally:
851 qa_config.ReleaseManyNodes(inodes)
852 qa_cluster.AssertClusterVerify()
853 else:
854 test_desc = "Creating instances of template %s" % templ
855 if not qa_config.TestEnabled(test_name):
856 ReportTestSkip(test_desc, test_name)
857 else:
858 ReportTestSkip(test_desc, "disk template %s" % templ)
859
860
861 def RunMonitoringTests():
862 RunTestIf("mon-collector", qa_monitoring.TestInstStatusCollector)
863
864
865 PARALLEL_TEST_DICT = {
866 "parallel-failover": qa_performance.TestParallelInstanceFailover,
867 "parallel-migration": qa_performance.TestParallelInstanceMigration,
868 "parallel-replace-disks": qa_performance.TestParallelInstanceReplaceDisks,
869 "parallel-reboot": qa_performance.TestParallelInstanceReboot,
870 "parallel-reinstall": qa_performance.TestParallelInstanceReinstall,
871 "parallel-rename": qa_performance.TestParallelInstanceRename,
872 }
873
874
875 def RunPerformanceTests():
876 if not qa_config.TestEnabled("performance"):
877 ReportTestSkip("performance related tests", "performance")
878 return
879
880 if qa_config.TestEnabled("jobqueue-performance"):
881 RunTest(qa_performance.TestParallelMaxInstanceCreationPerformance)
882 RunTest(qa_performance.TestParallelNodeCountInstanceCreationPerformance)
883
884 instances = qa_performance.CreateAllInstances()
885
886 RunTest(qa_performance.TestParallelModify, instances)
887 RunTest(qa_performance.TestParallelInstanceOSOperations, instances)
888 RunTest(qa_performance.TestParallelInstanceQueries, instances)
889
890 qa_performance.RemoveAllInstances(instances)
891
892 RunTest(qa_performance.TestJobQueueSubmissionPerformance)
893
894 if qa_config.TestEnabled("parallel-performance"):
895 if qa_config.IsTemplateSupported(constants.DT_DRBD8):
896 RunTest(qa_performance.TestParallelDRBDInstanceCreationPerformance)
897 if qa_config.IsTemplateSupported(constants.DT_PLAIN):
898 RunTest(qa_performance.TestParallelPlainInstanceCreationPerformance)
899
900 # Preparations need to be made only if some of these tests are enabled
901 if qa_config.IsTemplateSupported(constants.DT_DRBD8) and \
902 qa_config.TestEnabled(qa_config.Either(PARALLEL_TEST_DICT.keys())):
903 inodes = qa_config.AcquireManyNodes(2)
904 try:
905 instance = qa_instance.TestInstanceAddWithDrbdDisk(inodes)
906 try:
907 for (test_name, test_fn) in PARALLEL_TEST_DICT.items():
908 RunTestIf(test_name, test_fn, instance)
909 finally:
910 instance.Release()
911 qa_instance.TestInstanceRemove(instance)
912 finally:
913 qa_config.ReleaseManyNodes(inodes)
914
915
916 def RunQa():
917 """Main QA body.
918
919 """
920 rapi_user = "ganeti-qa"
921
922 RunTestBlock(RunEnvTests)
923 rapi_secret = SetupCluster(rapi_user)
924
925 if qa_rapi.Enabled():
926 # Load RAPI certificate
927 qa_rapi.Setup(rapi_user, rapi_secret)
928
929 RunTestBlock(RunClusterTests)
930 RunTestBlock(RunOsTests)
931
932 RunTestIf("tags", qa_tags.TestClusterTags)
933
934 RunTestBlock(RunCommonNodeTests)
935 RunTestBlock(RunGroupListTests)
936 RunTestBlock(RunGroupRwTests)
937 RunTestBlock(RunNetworkTests)
938
939 # The master shouldn't be readded or put offline; "delay" needs a non-master
940 # node to test
941 pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
942 try:
943 RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
944 RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
945 RunTestIf("delay", qa_cluster.TestDelay, pnode)
946 finally:
947 pnode.Release()
948
949 # Make sure the cluster is clean before running instance tests
950 qa_cluster.AssertClusterVerify()
951
952 pnode = qa_config.AcquireNode()
953 try:
954 RunTestIf("tags", qa_tags.TestNodeTags, pnode)
955
956 if qa_rapi.Enabled():
957 RunTest(qa_rapi.TestNode, pnode)
958
959 if (qa_config.TestEnabled("instance-add-plain-disk")
960 and qa_config.IsTemplateSupported(constants.DT_PLAIN)):
961 # Normal instance allocation via RAPI
962 for use_client in [True, False]:
963 rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
964 use_client)
965 try:
966 if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
967 RunCommonInstanceTests(rapi_instance, [pnode])
968 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
969 finally:
970 rapi_instance.Release()
971 del rapi_instance
972
973 # Multi-instance allocation
974 rapi_instance_one, rapi_instance_two = \
975 RunTest(qa_rapi.TestRapiInstanceMultiAlloc, pnode)
976
977 try:
978 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_one, True)
979 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_two, True)
980 finally:
981 rapi_instance_one.Release()
982 rapi_instance_two.Release()
983 finally:
984 pnode.Release()
985
986 config_list = [
987 ("default-instance-tests", lambda: None, lambda _: None),
988 (IsExclusiveStorageInstanceTestEnabled,
989 lambda: qa_cluster.TestSetExclStorCluster(True),
990 qa_cluster.TestSetExclStorCluster),
991 ]
992 for (conf_name, setup_conf_f, restore_conf_f) in config_list:
993 if qa_config.TestEnabled(conf_name):
994 oldconf = setup_conf_f()
995 RunTestBlock(RunInstanceTests)
996 restore_conf_f(oldconf)
997
998 pnode = qa_config.AcquireNode()
999 try:
1000 if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
1001 for shutdown in [False, True]:
1002 instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
1003 try:
1004 expnode = qa_config.AcquireNode(exclude=pnode)
1005 try:
1006 if shutdown:
1007 # Stop instance before exporting and removing it
1008 RunTest(qa_instance.TestInstanceShutdown, instance)
1009 RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
1010 RunTest(qa_instance.TestBackupList, expnode)
1011 finally:
1012 expnode.Release()
1013 finally:
1014 instance.Release()
1015 del expnode
1016 del instance
1017 qa_cluster.AssertClusterVerify()
1018
1019 finally:
1020 pnode.Release()
1021
1022 RunTestIf("cluster-upgrade", qa_cluster.TestUpgrade)
1023
1024 RunTestBlock(RunExclusiveStorageTests)
1025 RunTestIf(["cluster-instance-policy", "instance-add-plain-disk"],
1026 TestIPolicyPlainInstance)
1027
1028 RunTestBlock(RunCustomSshPortTests)
1029
1030 RunTestIf(
1031 "instance-add-restricted-by-disktemplates",
1032 qa_instance.TestInstanceCreationRestrictedByDiskTemplates)
1033
1034 # Test removing instance with offline drbd secondary
1035 if qa_config.TestEnabled(["instance-remove-drbd-offline",
1036 "instance-add-drbd-disk"]):
1037 # Make sure the master is not put offline
1038 snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
1039 try:
1040 pnode = qa_config.AcquireNode(exclude=snode)
1041 try:
1042 instance = qa_instance.TestInstanceAddWithDrbdDisk([pnode, snode])
1043 set_offline = lambda node: qa_node.MakeNodeOffline(node, "yes")
1044 set_online = lambda node: qa_node.MakeNodeOffline(node, "no")
1045 RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
1046 set_offline, set_online)
1047 finally:
1048 pnode.Release()
1049 finally:
1050 snode.Release()
1051 qa_cluster.AssertClusterVerify()
1052
1053 RunTestBlock(RunMonitoringTests)
1054
1055 RunPerformanceTests()
1056
1057 RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
1058
1059 RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
1060
1061
1062 @UsesRapiClient
1063 def main():
1064 """Main program.
1065
1066 """
1067 colors.check_for_colors()
1068
1069 parser = optparse.OptionParser(usage="%prog [options] <config-file>")
1070 parser.add_option("--yes-do-it", dest="yes_do_it",
1071 action="store_true",
1072 help="Really execute the tests")
1073 (opts, args) = parser.parse_args()
1074
1075 if len(args) == 1:
1076 (config_file, ) = args
1077 else:
1078 parser.error("Wrong number of arguments.")
1079
1080 if not opts.yes_do_it:
1081 print ("Executing this script irreversibly destroys any Ganeti\n"
1082 "configuration on all nodes involved. If you really want\n"
1083 "to start testing, supply the --yes-do-it option.")
1084 sys.exit(1)
1085
1086 qa_config.Load(config_file)
1087
1088 primary = qa_config.GetMasterNode().primary
1089 qa_utils.StartMultiplexer(primary)
1090 print ("SSH command for primary node: %s" %
1091 utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
1092 print ("SSH command for other nodes: %s" %
1093 utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
1094 try:
1095 RunQa()
1096 finally:
1097 qa_utils.CloseMultiplexers()
1098
1099 if __name__ == "__main__":
1100 main()