Merge branch 'stable-2.10' into stable-2.11
[ganeti-github.git] / qa / ganeti-qa.py
1 #!/usr/bin/python -u
2 #
3
4 # Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 """Script for doing QA on Ganeti.
32
33 """
34
35 # pylint: disable=C0103
36 # due to invalid name
37
38 import copy
39 import datetime
40 import optparse
41 import sys
42
43 import colors
44 import qa_cluster
45 import qa_config
46 import qa_daemon
47 import qa_env
48 import qa_error
49 import qa_group
50 import qa_instance
51 import qa_iptables
52 import qa_monitoring
53 import qa_network
54 import qa_node
55 import qa_os
56 import qa_performance
57 import qa_job
58 import qa_rapi
59 import qa_tags
60 import qa_utils
61
62 from ganeti import utils
63 from ganeti import rapi # pylint: disable=W0611
64 from ganeti import constants
65 from ganeti import netutils
66
67 import ganeti.rapi.client # pylint: disable=W0611
68 from ganeti.rapi.client import UsesRapiClient
69
70
71 def _FormatHeader(line, end=72, mark="-", color=None):
72 """Fill a line up to the end column.
73
74 """
75 line = (mark * 4) + " " + line + " "
76 line += "-" * (end - len(line))
77 line = line.rstrip()
78 line = colors.colorize(line, color=color)
79 return line
80
81
82 def _DescriptionOf(fn):
83 """Computes the description of an item.
84
85 """
86 if fn.__doc__:
87 desc = fn.__doc__.splitlines()[0].strip()
88 desc = desc.rstrip(".")
89 if fn.__name__:
90 desc = "[" + fn.__name__ + "] " + desc
91 else:
92 desc = "%r" % fn
93
94 return desc
95
96
97 def RunTest(fn, *args, **kwargs):
98 """Runs a test after printing a header.
99
100 """
101
102 tstart = datetime.datetime.now()
103
104 desc = _DescriptionOf(fn)
105
106 print
107 print _FormatHeader("%s start %s" % (tstart, desc),
108 color=colors.YELLOW, mark="<")
109
110 try:
111 retval = fn(*args, **kwargs)
112 print _FormatHeader("PASSED %s" % (desc, ), color=colors.GREEN)
113 return retval
114 except Exception, e:
115 print _FormatHeader("FAILED %s: %s" % (desc, e), color=colors.RED)
116 raise
117 finally:
118 tstop = datetime.datetime.now()
119 tdelta = tstop - tstart
120 print _FormatHeader("%s time=%s %s" % (tstop, tdelta, desc),
121 color=colors.MAGENTA, mark=">")
122
123
124 def ReportTestSkip(desc, testnames):
125 """Reports that tests have been skipped.
126
127 @type desc: string
128 @param desc: string
129 @type testnames: string or list of string
130 @param testnames: either a single test name in the configuration
131 file, or a list of testnames (which will be AND-ed together)
132
133 """
134 tstart = datetime.datetime.now()
135 # TODO: Formatting test names when non-string names are involved
136 print _FormatHeader("%s skipping %s, test(s) %s disabled" %
137 (tstart, desc, testnames),
138 color=colors.BLUE, mark="*")
139
140
141 def RunTestIf(testnames, fn, *args, **kwargs):
142 """Runs a test conditionally.
143
144 @param testnames: either a single test name in the configuration
145 file, or a list of testnames (which will be AND-ed together)
146
147 """
148 if qa_config.TestEnabled(testnames):
149 RunTest(fn, *args, **kwargs)
150 else:
151 desc = _DescriptionOf(fn)
152 ReportTestSkip(desc, testnames)
153
154
155 def RunTestBlock(fn, *args, **kwargs):
156 """Runs a block of tests after printing a header.
157
158 """
159 tstart = datetime.datetime.now()
160
161 desc = _DescriptionOf(fn)
162
163 print
164 print _FormatHeader("BLOCK %s start %s" % (tstart, desc),
165 color=[colors.YELLOW, colors.BOLD], mark="v")
166
167 try:
168 return fn(*args, **kwargs)
169 except Exception, e:
170 print _FormatHeader("BLOCK FAILED %s: %s" % (desc, e),
171 color=[colors.RED, colors.BOLD])
172 raise
173 finally:
174 tstop = datetime.datetime.now()
175 tdelta = tstop - tstart
176 print _FormatHeader("BLOCK %s time=%s %s" % (tstop, tdelta, desc),
177 color=[colors.MAGENTA, colors.BOLD], mark="^")
178
179
180 def RunEnvTests():
181 """Run several environment tests.
182
183 """
184 RunTestIf("env", qa_env.TestSshConnection)
185 RunTestIf("env", qa_env.TestIcmpPing)
186 RunTestIf("env", qa_env.TestGanetiCommands)
187
188
189 def SetupCluster(rapi_user):
190 """Initializes the cluster.
191
192 @param rapi_user: Login user for RAPI
193 @return: Login secret for RAPI
194
195 """
196 rapi_secret = utils.GenerateSecret()
197 RunTestIf("create-cluster", qa_cluster.TestClusterInit,
198 rapi_user, rapi_secret)
199 if not qa_config.TestEnabled("create-cluster"):
200 # If the cluster is already in place, we assume that exclusive-storage is
201 # already set according to the configuration
202 qa_config.SetExclusiveStorage(qa_config.get("exclusive-storage", False))
203 if qa_rapi.Enabled():
204 # To support RAPI on an existing cluster we have to find out the secret
205 rapi_secret = qa_rapi.LookupRapiSecret(rapi_user)
206
207 qa_group.ConfigureGroups()
208
209 # Test on empty cluster
210 RunTestIf("node-list", qa_node.TestNodeList)
211 RunTestIf("instance-list", qa_instance.TestInstanceList)
212 RunTestIf("job-list", qa_job.TestJobList)
213
214 RunTestIf("create-cluster", qa_node.TestNodeAddAll)
215 if not qa_config.TestEnabled("create-cluster"):
216 # consider the nodes are already there
217 qa_node.MarkNodeAddedAll()
218
219 RunTestIf("test-jobqueue", qa_cluster.TestJobqueue)
220 RunTestIf("test-jobqueue", qa_job.TestJobCancellation)
221
222 # enable the watcher (unconditionally)
223 RunTest(qa_daemon.TestResumeWatcher)
224
225 RunTestIf("node-list", qa_node.TestNodeList)
226
227 # Test listing fields
228 RunTestIf("node-list", qa_node.TestNodeListFields)
229 RunTestIf("instance-list", qa_instance.TestInstanceListFields)
230 RunTestIf("job-list", qa_job.TestJobListFields)
231 RunTestIf("instance-export", qa_instance.TestBackupListFields)
232
233 RunTestIf("node-info", qa_node.TestNodeInfo)
234
235 return rapi_secret
236
237
238 def RunClusterTests():
239 """Runs tests related to gnt-cluster.
240
241 """
242 for test, fn in [
243 ("create-cluster", qa_cluster.TestClusterInitDisk),
244 ("cluster-renew-crypto", qa_cluster.TestClusterRenewCrypto),
245 ("cluster-verify", qa_cluster.TestClusterVerify),
246 ("cluster-reserved-lvs", qa_cluster.TestClusterReservedLvs),
247 # TODO: add more cluster modify tests
248 ("cluster-modify", qa_cluster.TestClusterModifyEmpty),
249 ("cluster-modify", qa_cluster.TestClusterModifyIPolicy),
250 ("cluster-modify", qa_cluster.TestClusterModifyISpecs),
251 ("cluster-modify", qa_cluster.TestClusterModifyBe),
252 ("cluster-modify", qa_cluster.TestClusterModifyDisk),
253 ("cluster-modify", qa_cluster.TestClusterModifyDiskTemplates),
254 ("cluster-modify", qa_cluster.TestClusterModifyFileStorageDir),
255 ("cluster-modify", qa_cluster.TestClusterModifySharedFileStorageDir),
256 ("cluster-modify", qa_cluster.TestClusterModifyUserShutdown),
257 ("cluster-rename", qa_cluster.TestClusterRename),
258 ("cluster-info", qa_cluster.TestClusterVersion),
259 ("cluster-info", qa_cluster.TestClusterInfo),
260 ("cluster-info", qa_cluster.TestClusterGetmaster),
261 ("cluster-redist-conf", qa_cluster.TestClusterRedistConf),
262 (["cluster-copyfile", qa_config.NoVirtualCluster],
263 qa_cluster.TestClusterCopyfile),
264 ("cluster-command", qa_cluster.TestClusterCommand),
265 ("cluster-burnin", qa_cluster.TestClusterBurnin),
266 ("cluster-master-failover", qa_cluster.TestClusterMasterFailover),
267 ("cluster-master-failover",
268 qa_cluster.TestClusterMasterFailoverWithDrainedQueue),
269 (["cluster-oob", qa_config.NoVirtualCluster],
270 qa_cluster.TestClusterOob),
271 (qa_rapi.Enabled, qa_rapi.TestVersion),
272 (qa_rapi.Enabled, qa_rapi.TestEmptyCluster),
273 (qa_rapi.Enabled, qa_rapi.TestRapiQuery),
274 ]:
275 RunTestIf(test, fn)
276
277
278 def RunRepairDiskSizes():
279 """Run the repair disk-sizes test.
280
281 """
282 RunTestIf("cluster-repair-disk-sizes", qa_cluster.TestClusterRepairDiskSizes)
283
284
285 def RunOsTests():
286 """Runs all tests related to gnt-os.
287
288 """
289 os_enabled = ["os", qa_config.NoVirtualCluster]
290
291 if qa_config.TestEnabled(qa_rapi.Enabled):
292 rapi_getos = qa_rapi.GetOperatingSystems
293 else:
294 rapi_getos = None
295
296 for fn in [
297 qa_os.TestOsList,
298 qa_os.TestOsDiagnose,
299 ]:
300 RunTestIf(os_enabled, fn)
301
302 for fn in [
303 qa_os.TestOsValid,
304 qa_os.TestOsInvalid,
305 qa_os.TestOsPartiallyValid,
306 ]:
307 RunTestIf(os_enabled, fn, rapi_getos)
308
309 for fn in [
310 qa_os.TestOsModifyValid,
311 qa_os.TestOsModifyInvalid,
312 qa_os.TestOsStatesNonExisting,
313 ]:
314 RunTestIf(os_enabled, fn)
315
316
317 def RunCommonInstanceTests(instance, inst_nodes):
318 """Runs a few tests that are common to all disk types.
319
320 """
321 RunTestIf("instance-shutdown", qa_instance.TestInstanceShutdown, instance)
322 RunTestIf(["instance-shutdown", "instance-console", qa_rapi.Enabled],
323 qa_rapi.TestRapiStoppedInstanceConsole, instance)
324 RunTestIf(["instance-shutdown", "instance-modify"],
325 qa_instance.TestInstanceStoppedModify, instance)
326 RunTestIf("instance-shutdown", qa_instance.TestInstanceStartup, instance)
327
328 # Test shutdown/start via RAPI
329 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
330 qa_rapi.TestRapiInstanceShutdown, instance)
331 RunTestIf(["instance-shutdown", qa_rapi.Enabled],
332 qa_rapi.TestRapiInstanceStartup, instance)
333
334 RunTestIf("instance-list", qa_instance.TestInstanceList)
335
336 RunTestIf("instance-info", qa_instance.TestInstanceInfo, instance)
337
338 RunTestIf("instance-modify", qa_instance.TestInstanceModify, instance)
339 RunTestIf(["instance-modify", qa_rapi.Enabled],
340 qa_rapi.TestRapiInstanceModify, instance)
341
342 RunTestIf("instance-console", qa_instance.TestInstanceConsole, instance)
343 RunTestIf(["instance-console", qa_rapi.Enabled],
344 qa_rapi.TestRapiInstanceConsole, instance)
345
346 RunTestIf("instance-device-names", qa_instance.TestInstanceDeviceNames,
347 instance)
348 DOWN_TESTS = qa_config.Either([
349 "instance-reinstall",
350 "instance-rename",
351 "instance-grow-disk",
352 ])
353
354 # shutdown instance for any 'down' tests
355 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceShutdown, instance)
356
357 # now run the 'down' state tests
358 RunTestIf("instance-reinstall", qa_instance.TestInstanceReinstall, instance)
359 RunTestIf(["instance-reinstall", qa_rapi.Enabled],
360 qa_rapi.TestRapiInstanceReinstall, instance)
361
362 if qa_config.TestEnabled("instance-rename"):
363 tgt_instance = qa_config.AcquireInstance()
364 try:
365 rename_source = instance.name
366 rename_target = tgt_instance.name
367 # perform instance rename to the same name
368 RunTest(qa_instance.TestInstanceRenameAndBack,
369 rename_source, rename_source)
370 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
371 rename_source, rename_source)
372 if rename_target is not None:
373 # perform instance rename to a different name, if we have one configured
374 RunTest(qa_instance.TestInstanceRenameAndBack,
375 rename_source, rename_target)
376 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceRenameAndBack,
377 rename_source, rename_target)
378 finally:
379 tgt_instance.Release()
380
381 RunTestIf(["instance-grow-disk"], qa_instance.TestInstanceGrowDisk, instance)
382
383 # and now start the instance again
384 RunTestIf(DOWN_TESTS, qa_instance.TestInstanceStartup, instance)
385
386 RunTestIf("instance-reboot", qa_instance.TestInstanceReboot, instance)
387
388 RunTestIf("tags", qa_tags.TestInstanceTags, instance)
389
390 if instance.disk_template == constants.DT_DRBD8:
391 RunTestIf("cluster-verify",
392 qa_cluster.TestClusterVerifyDisksBrokenDRBD, instance, inst_nodes)
393 RunTestIf("cluster-verify", qa_cluster.TestClusterVerify)
394
395 RunTestIf(qa_rapi.Enabled, qa_rapi.TestInstance, instance)
396
397 # Lists instances, too
398 RunTestIf("node-list", qa_node.TestNodeList)
399
400 # Some jobs have been run, let's test listing them
401 RunTestIf("job-list", qa_job.TestJobList)
402
403
404 def RunCommonNodeTests():
405 """Run a few common node tests.
406
407 """
408 RunTestIf("node-volumes", qa_node.TestNodeVolumes)
409 RunTestIf("node-storage", qa_node.TestNodeStorage)
410 RunTestIf(["node-oob", qa_config.NoVirtualCluster], qa_node.TestOutOfBand)
411
412
413 def RunGroupListTests():
414 """Run tests for listing node groups.
415
416 """
417 RunTestIf("group-list", qa_group.TestGroupList)
418 RunTestIf("group-list", qa_group.TestGroupListFields)
419
420
421 def RunNetworkTests():
422 """Run tests for network management.
423
424 """
425 RunTestIf("network", qa_network.TestNetworkAddRemove)
426 RunTestIf("network", qa_network.TestNetworkConnect)
427 RunTestIf(["network", "tags"], qa_network.TestNetworkTags)
428
429
430 def RunGroupRwTests():
431 """Run tests for adding/removing/renaming groups.
432
433 """
434 RunTestIf("group-rwops", qa_group.TestGroupAddRemoveRename)
435 RunTestIf("group-rwops", qa_group.TestGroupAddWithOptions)
436 RunTestIf("group-rwops", qa_group.TestGroupModify)
437 RunTestIf(["group-rwops", qa_rapi.Enabled], qa_rapi.TestRapiNodeGroups)
438 RunTestIf(["group-rwops", "tags"], qa_tags.TestGroupTags,
439 qa_group.GetDefaultGroup())
440
441
442 def RunExportImportTests(instance, inodes):
443 """Tries to export and import the instance.
444
445 @type inodes: list of nodes
446 @param inodes: current nodes of the instance
447
448 """
449 # FIXME: export explicitly bails out on file based storage. other non-lvm
450 # based storage types are untested, though. Also note that import could still
451 # work, but is deeply embedded into the "export" case.
452 if (qa_config.TestEnabled("instance-export") and
453 instance.disk_template not in constants.DTS_FILEBASED):
454 RunTest(qa_instance.TestInstanceExportNoTarget, instance)
455
456 pnode = inodes[0]
457 expnode = qa_config.AcquireNode(exclude=pnode)
458 try:
459 name = RunTest(qa_instance.TestInstanceExport, instance, expnode)
460
461 RunTest(qa_instance.TestBackupList, expnode)
462
463 if qa_config.TestEnabled("instance-import"):
464 newinst = qa_config.AcquireInstance()
465 try:
466 RunTest(qa_instance.TestInstanceImport, newinst, pnode,
467 expnode, name)
468 # Check if starting the instance works
469 RunTest(qa_instance.TestInstanceStartup, newinst)
470 RunTest(qa_instance.TestInstanceRemove, newinst)
471 finally:
472 newinst.Release()
473 finally:
474 expnode.Release()
475
476 # FIXME: inter-cluster-instance-move crashes on file based instances :/
477 # See Issue 414.
478 if (qa_config.TestEnabled([qa_rapi.Enabled, "inter-cluster-instance-move"])
479 and (instance.disk_template not in constants.DTS_FILEBASED)):
480 newinst = qa_config.AcquireInstance()
481 try:
482 tnode = qa_config.AcquireNode(exclude=inodes)
483 try:
484 RunTest(qa_rapi.TestInterClusterInstanceMove, instance, newinst,
485 inodes, tnode)
486 finally:
487 tnode.Release()
488 finally:
489 newinst.Release()
490
491
492 def RunDaemonTests(instance):
493 """Test the ganeti-watcher script.
494
495 """
496 RunTest(qa_daemon.TestPauseWatcher)
497
498 RunTestIf("instance-automatic-restart",
499 qa_daemon.TestInstanceAutomaticRestart, instance)
500 RunTestIf("instance-consecutive-failures",
501 qa_daemon.TestInstanceConsecutiveFailures, instance)
502
503 RunTest(qa_daemon.TestResumeWatcher)
504
505
506 def RunHardwareFailureTests(instance, inodes):
507 """Test cluster internal hardware failure recovery.
508
509 """
510 RunTestIf("instance-failover", qa_instance.TestInstanceFailover, instance)
511 RunTestIf(["instance-failover", qa_rapi.Enabled],
512 qa_rapi.TestRapiInstanceFailover, instance)
513
514 RunTestIf("instance-migrate", qa_instance.TestInstanceMigrate, instance)
515 RunTestIf(["instance-migrate", qa_rapi.Enabled],
516 qa_rapi.TestRapiInstanceMigrate, instance)
517
518 if qa_config.TestEnabled("instance-replace-disks"):
519 # We just need alternative secondary nodes, hence "- 1"
520 othernodes = qa_config.AcquireManyNodes(len(inodes) - 1, exclude=inodes)
521 try:
522 RunTestIf(qa_rapi.Enabled, qa_rapi.TestRapiInstanceReplaceDisks, instance)
523 RunTest(qa_instance.TestReplaceDisks,
524 instance, inodes, othernodes)
525 finally:
526 qa_config.ReleaseManyNodes(othernodes)
527 del othernodes
528
529 if qa_config.TestEnabled("instance-recreate-disks"):
530 try:
531 acquirednodes = qa_config.AcquireManyNodes(len(inodes), exclude=inodes)
532 othernodes = acquirednodes
533 except qa_error.OutOfNodesError:
534 if len(inodes) > 1:
535 # If the cluster is not big enough, let's reuse some of the nodes, but
536 # with different roles. In this way, we can test a DRBD instance even on
537 # a 3-node cluster.
538 acquirednodes = [qa_config.AcquireNode(exclude=inodes)]
539 othernodes = acquirednodes + inodes[:-1]
540 else:
541 raise
542 try:
543 RunTest(qa_instance.TestRecreateDisks,
544 instance, inodes, othernodes)
545 finally:
546 qa_config.ReleaseManyNodes(acquirednodes)
547
548 if len(inodes) >= 2:
549 RunTestIf("node-evacuate", qa_node.TestNodeEvacuate, inodes[0], inodes[1])
550 RunTestIf("node-failover", qa_node.TestNodeFailover, inodes[0], inodes[1])
551 RunTestIf("node-migrate", qa_node.TestNodeMigrate, inodes[0], inodes[1])
552
553
554 def RunExclusiveStorageTests():
555 """Test exclusive storage."""
556 if not qa_config.TestEnabled("cluster-exclusive-storage"):
557 return
558
559 node = qa_config.AcquireNode()
560 try:
561 old_es = qa_cluster.TestSetExclStorCluster(False)
562 qa_node.TestExclStorSingleNode(node)
563
564 qa_cluster.TestSetExclStorCluster(True)
565 qa_cluster.TestExclStorSharedPv(node)
566
567 if qa_config.TestEnabled("instance-add-plain-disk"):
568 # Make sure that the cluster doesn't have any pre-existing problem
569 qa_cluster.AssertClusterVerify()
570
571 # Create and allocate instances
572 instance1 = qa_instance.TestInstanceAddWithPlainDisk([node])
573 try:
574 instance2 = qa_instance.TestInstanceAddWithPlainDisk([node])
575 try:
576 # cluster-verify checks that disks are allocated correctly
577 qa_cluster.AssertClusterVerify()
578
579 # Remove instances
580 qa_instance.TestInstanceRemove(instance2)
581 qa_instance.TestInstanceRemove(instance1)
582 finally:
583 instance2.Release()
584 finally:
585 instance1.Release()
586
587 if qa_config.TestEnabled("instance-add-drbd-disk"):
588 snode = qa_config.AcquireNode()
589 try:
590 qa_cluster.TestSetExclStorCluster(False)
591 instance = qa_instance.TestInstanceAddWithDrbdDisk([node, snode])
592 try:
593 qa_cluster.TestSetExclStorCluster(True)
594 exp_err = [constants.CV_EINSTANCEUNSUITABLENODE]
595 qa_cluster.AssertClusterVerify(fail=True, errors=exp_err)
596 qa_instance.TestInstanceRemove(instance)
597 finally:
598 instance.Release()
599 finally:
600 snode.Release()
601 qa_cluster.TestSetExclStorCluster(old_es)
602 finally:
603 node.Release()
604
605
606 def RunCustomSshPortTests():
607 """Test accessing nodes with custom SSH ports.
608
609 This requires removing nodes, adding them to a new group, and then undoing
610 the change.
611 """
612 if not qa_config.TestEnabled("group-custom-ssh-port"):
613 return
614
615 std_port = netutils.GetDaemonPort(constants.SSH)
616 port = 211
617 master = qa_config.GetMasterNode()
618 with qa_config.AcquireManyNodesCtx(1, exclude=master) as nodes:
619 # Checks if the node(s) could be contacted through IPv6.
620 # If yes, better skip the whole test.
621
622 for node in nodes:
623 if qa_utils.UsesIPv6Connection(node.primary, std_port):
624 print ("Node %s is likely to be reached using IPv6,"
625 "skipping the test" % (node.primary, ))
626 return
627
628 for node in nodes:
629 qa_node.NodeRemove(node)
630 with qa_iptables.RulesContext(nodes) as r:
631 with qa_group.NewGroupCtx() as group:
632 qa_group.ModifyGroupSshPort(r, group, nodes, port)
633
634 for node in nodes:
635 qa_node.NodeAdd(node, group=group)
636
637 # Make sure that the cluster doesn't have any pre-existing problem
638 qa_cluster.AssertClusterVerify()
639
640 # Create and allocate instances
641 instance1 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
642 try:
643 instance2 = qa_instance.TestInstanceAddWithPlainDisk(nodes)
644 try:
645 # cluster-verify checks that disks are allocated correctly
646 qa_cluster.AssertClusterVerify()
647
648 # Remove instances
649 qa_instance.TestInstanceRemove(instance2)
650 qa_instance.TestInstanceRemove(instance1)
651 finally:
652 instance2.Release()
653 finally:
654 instance1.Release()
655
656 for node in nodes:
657 qa_node.NodeRemove(node)
658
659 for node in nodes:
660 qa_node.NodeAdd(node)
661
662 qa_cluster.AssertClusterVerify()
663
664
665 def _BuildSpecDict(par, mn, st, mx):
666 return {
667 constants.ISPECS_MINMAX: [{
668 constants.ISPECS_MIN: {par: mn},
669 constants.ISPECS_MAX: {par: mx},
670 }],
671 constants.ISPECS_STD: {par: st},
672 }
673
674
675 def _BuildDoubleSpecDict(index, par, mn, st, mx):
676 new_spec = {
677 constants.ISPECS_MINMAX: [{}, {}],
678 }
679 if st is not None:
680 new_spec[constants.ISPECS_STD] = {par: st}
681 new_spec[constants.ISPECS_MINMAX][index] = {
682 constants.ISPECS_MIN: {par: mn},
683 constants.ISPECS_MAX: {par: mx},
684 }
685 return new_spec
686
687
688 def TestIPolicyPlainInstance():
689 """Test instance policy interaction with instances"""
690 params = ["memory-size", "cpu-count", "disk-count", "disk-size", "nic-count"]
691 if not qa_config.IsTemplateSupported(constants.DT_PLAIN):
692 print "Template %s not supported" % constants.DT_PLAIN
693 return
694
695 # This test assumes that the group policy is empty
696 (_, old_specs) = qa_cluster.TestClusterSetISpecs()
697 # We also assume to have only one min/max bound
698 assert len(old_specs[constants.ISPECS_MINMAX]) == 1
699 node = qa_config.AcquireNode()
700 try:
701 # Log of policy changes, list of tuples:
702 # (full_change, incremental_change, policy_violated)
703 history = []
704 instance = qa_instance.TestInstanceAddWithPlainDisk([node])
705 try:
706 policyerror = [constants.CV_EINSTANCEPOLICY]
707 for par in params:
708 (iminval, imaxval) = qa_instance.GetInstanceSpec(instance.name, par)
709 # Some specs must be multiple of 4
710 new_spec = _BuildSpecDict(par, imaxval + 4, imaxval + 4, imaxval + 4)
711 history.append((None, new_spec, True))
712 if iminval > 0:
713 # Some specs must be multiple of 4
714 if iminval >= 4:
715 upper = iminval - 4
716 else:
717 upper = iminval - 1
718 new_spec = _BuildSpecDict(par, 0, upper, upper)
719 history.append((None, new_spec, True))
720 history.append((old_specs, None, False))
721
722 # Test with two instance specs
723 double_specs = copy.deepcopy(old_specs)
724 double_specs[constants.ISPECS_MINMAX] = \
725 double_specs[constants.ISPECS_MINMAX] * 2
726 (par1, par2) = params[0:2]
727 (_, imaxval1) = qa_instance.GetInstanceSpec(instance.name, par1)
728 (_, imaxval2) = qa_instance.GetInstanceSpec(instance.name, par2)
729 old_minmax = old_specs[constants.ISPECS_MINMAX][0]
730 history.extend([
731 (double_specs, None, False),
732 # The first min/max limit is being violated
733 (None,
734 _BuildDoubleSpecDict(0, par1, imaxval1 + 4, imaxval1 + 4,
735 imaxval1 + 4),
736 False),
737 # Both min/max limits are being violated
738 (None,
739 _BuildDoubleSpecDict(1, par2, imaxval2 + 4, None, imaxval2 + 4),
740 True),
741 # The second min/max limit is being violated
742 (None,
743 _BuildDoubleSpecDict(0, par1,
744 old_minmax[constants.ISPECS_MIN][par1],
745 old_specs[constants.ISPECS_STD][par1],
746 old_minmax[constants.ISPECS_MAX][par1]),
747 False),
748 (old_specs, None, False),
749 ])
750
751 # Apply the changes, and check policy violations after each change
752 qa_cluster.AssertClusterVerify()
753 for (new_specs, diff_specs, failed) in history:
754 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
755 diff_specs=diff_specs)
756 if failed:
757 qa_cluster.AssertClusterVerify(warnings=policyerror)
758 else:
759 qa_cluster.AssertClusterVerify()
760
761 qa_instance.TestInstanceRemove(instance)
762 finally:
763 instance.Release()
764
765 # Now we replay the same policy changes, and we expect that the instance
766 # cannot be created for the cases where we had a policy violation above
767 for (new_specs, diff_specs, failed) in history:
768 qa_cluster.TestClusterSetISpecs(new_specs=new_specs,
769 diff_specs=diff_specs)
770 if failed:
771 qa_instance.TestInstanceAddWithPlainDisk([node], fail=True)
772 # Instance creation with no policy violation has been tested already
773 finally:
774 node.Release()
775
776
777 def IsExclusiveStorageInstanceTestEnabled():
778 test_name = "exclusive-storage-instance-tests"
779 if qa_config.TestEnabled(test_name):
780 vgname = qa_config.get("vg-name", constants.DEFAULT_VG)
781 vgscmd = utils.ShellQuoteArgs([
782 "vgs", "--noheadings", "-o", "pv_count", vgname,
783 ])
784 nodes = qa_config.GetConfig()["nodes"]
785 for node in nodes:
786 try:
787 pvnum = int(qa_utils.GetCommandOutput(node.primary, vgscmd))
788 except Exception, e:
789 msg = ("Cannot get the number of PVs on %s, needed by '%s': %s" %
790 (node.primary, test_name, e))
791 raise qa_error.Error(msg)
792 if pvnum < 2:
793 raise qa_error.Error("Node %s has not enough PVs (%s) to run '%s'" %
794 (node.primary, pvnum, test_name))
795 res = True
796 else:
797 res = False
798 return res
799
800
801 def RunInstanceTests():
802 """Create and exercise instances."""
803
804 for (test_name, templ, create_fun, num_nodes) in \
805 qa_instance.available_instance_tests:
806 if (qa_config.TestEnabled(test_name) and
807 qa_config.IsTemplateSupported(templ)):
808 inodes = qa_config.AcquireManyNodes(num_nodes)
809 try:
810 instance = RunTest(create_fun, inodes)
811 try:
812 RunTestIf("instance-user-down", qa_instance.TestInstanceUserDown,
813 instance)
814 RunTestIf("cluster-epo", qa_cluster.TestClusterEpo)
815 RunDaemonTests(instance)
816 for node in inodes:
817 RunTestIf("haskell-confd", qa_node.TestNodeListDrbd, node,
818 templ == constants.DT_DRBD8)
819 if len(inodes) > 1:
820 RunTestIf("group-rwops", qa_group.TestAssignNodesIncludingSplit,
821 constants.INITIAL_NODE_GROUP_NAME,
822 inodes[0].primary, inodes[1].primary)
823 if qa_config.TestEnabled("instance-convert-disk"):
824 RunTest(qa_instance.TestInstanceShutdown, instance)
825 RunTest(qa_instance.TestInstanceConvertDiskToPlain,
826 instance, inodes)
827 RunTest(qa_instance.TestInstanceStartup, instance)
828 RunTestIf("instance-modify-disks",
829 qa_instance.TestInstanceModifyDisks, instance)
830 RunCommonInstanceTests(instance, inodes)
831 if qa_config.TestEnabled("instance-modify-primary"):
832 othernode = qa_config.AcquireNode()
833 RunTest(qa_instance.TestInstanceModifyPrimaryAndBack,
834 instance, inodes[0], othernode)
835 othernode.Release()
836 RunGroupListTests()
837 RunExportImportTests(instance, inodes)
838 RunHardwareFailureTests(instance, inodes)
839 RunRepairDiskSizes()
840 RunTestIf(["rapi", "instance-data-censorship"],
841 qa_rapi.TestInstanceDataCensorship, instance, inodes)
842 RunTest(qa_instance.TestInstanceRemove, instance)
843 finally:
844 instance.Release()
845 del instance
846 finally:
847 qa_config.ReleaseManyNodes(inodes)
848 qa_cluster.AssertClusterVerify()
849 else:
850 test_desc = "Creating instances of template %s" % templ
851 if not qa_config.TestEnabled(test_name):
852 ReportTestSkip(test_desc, test_name)
853 else:
854 ReportTestSkip(test_desc, "disk template %s" % templ)
855
856
857 def RunMonitoringTests():
858 if qa_config.TestEnabled("mon-collector"):
859 RunTest(qa_monitoring.TestInstStatusCollector)
860
861
862 def RunPerformanceTests():
863 if not qa_config.TestEnabled("performance"):
864 ReportTestSkip("performance related tests", "performance")
865 return
866
867 if qa_config.TestEnabled("jobqueue-performance"):
868 RunTest(qa_performance.TestParallelMaxInstanceCreationPerformance)
869 RunTest(qa_performance.TestParallelNodeCountInstanceCreationPerformance)
870
871 instances = qa_performance.CreateAllInstances()
872
873 RunTest(qa_performance.TestParallelModify, instances)
874 RunTest(qa_performance.TestParallelInstanceOSOperations, instances)
875 RunTest(qa_performance.TestParallelInstanceQueries, instances)
876
877 qa_performance.RemoveAllInstances(instances)
878
879 RunTest(qa_performance.TestJobQueueSubmissionPerformance)
880
881 if qa_config.TestEnabled("parallel-performance"):
882 if qa_config.IsTemplateSupported(constants.DT_DRBD8):
883 RunTest(qa_performance.TestParallelDRBDInstanceCreationPerformance)
884 if qa_config.IsTemplateSupported(constants.DT_PLAIN):
885 RunTest(qa_performance.TestParallelPlainInstanceCreationPerformance)
886
887 if qa_config.IsTemplateSupported(constants.DT_DRBD8):
888 inodes = qa_config.AcquireManyNodes(2)
889 try:
890 instance = qa_instance.TestInstanceAddWithDrbdDisk(inodes)
891 try:
892 RunTest(qa_performance.TestParallelInstanceFailover, instance)
893 RunTest(qa_performance.TestParallelInstanceMigration, instance)
894 RunTest(qa_performance.TestParallelInstanceReplaceDisks, instance)
895 RunTest(qa_performance.TestParallelInstanceReboot, instance)
896 RunTest(qa_performance.TestParallelInstanceReinstall, instance)
897 RunTest(qa_performance.TestParallelInstanceRename, instance)
898 finally:
899 qa_instance.TestInstanceRemove(instance)
900 instance.Release()
901 finally:
902 qa_config.ReleaseManyNodes(inodes)
903
904
905 def RunQa():
906 """Main QA body.
907
908 """
909 rapi_user = "ganeti-qa"
910
911 RunTestBlock(RunEnvTests)
912 rapi_secret = SetupCluster(rapi_user)
913
914 if qa_rapi.Enabled():
915 # Load RAPI certificate
916 qa_rapi.Setup(rapi_user, rapi_secret)
917
918 RunTestBlock(RunClusterTests)
919 RunTestBlock(RunOsTests)
920
921 RunTestIf("tags", qa_tags.TestClusterTags)
922
923 RunTestBlock(RunCommonNodeTests)
924 RunTestBlock(RunGroupListTests)
925 RunTestBlock(RunGroupRwTests)
926 RunTestBlock(RunNetworkTests)
927
928 # The master shouldn't be readded or put offline; "delay" needs a non-master
929 # node to test
930 pnode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
931 try:
932 RunTestIf("node-readd", qa_node.TestNodeReadd, pnode)
933 RunTestIf("node-modify", qa_node.TestNodeModify, pnode)
934 RunTestIf("delay", qa_cluster.TestDelay, pnode)
935 finally:
936 pnode.Release()
937
938 # Make sure the cluster is clean before running instance tests
939 qa_cluster.AssertClusterVerify()
940
941 pnode = qa_config.AcquireNode()
942 try:
943 RunTestIf("tags", qa_tags.TestNodeTags, pnode)
944
945 if qa_rapi.Enabled():
946 RunTest(qa_rapi.TestNode, pnode)
947
948 if (qa_config.TestEnabled("instance-add-plain-disk")
949 and qa_config.IsTemplateSupported(constants.DT_PLAIN)):
950 # Normal instance allocation via RAPI
951 for use_client in [True, False]:
952 rapi_instance = RunTest(qa_rapi.TestRapiInstanceAdd, pnode,
953 use_client)
954 try:
955 if qa_config.TestEnabled("instance-plain-rapi-common-tests"):
956 RunCommonInstanceTests(rapi_instance, [pnode])
957 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance, use_client)
958 finally:
959 rapi_instance.Release()
960 del rapi_instance
961
962 # Multi-instance allocation
963 rapi_instance_one, rapi_instance_two = \
964 RunTest(qa_rapi.TestRapiInstanceMultiAlloc, pnode)
965
966 try:
967 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_one, True)
968 RunTest(qa_rapi.TestRapiInstanceRemove, rapi_instance_two, True)
969 finally:
970 rapi_instance_one.Release()
971 rapi_instance_two.Release()
972 finally:
973 pnode.Release()
974
975 config_list = [
976 ("default-instance-tests", lambda: None, lambda _: None),
977 (IsExclusiveStorageInstanceTestEnabled,
978 lambda: qa_cluster.TestSetExclStorCluster(True),
979 qa_cluster.TestSetExclStorCluster),
980 ]
981 for (conf_name, setup_conf_f, restore_conf_f) in config_list:
982 if qa_config.TestEnabled(conf_name):
983 oldconf = setup_conf_f()
984 RunTestBlock(RunInstanceTests)
985 restore_conf_f(oldconf)
986
987 pnode = qa_config.AcquireNode()
988 try:
989 if qa_config.TestEnabled(["instance-add-plain-disk", "instance-export"]):
990 for shutdown in [False, True]:
991 instance = RunTest(qa_instance.TestInstanceAddWithPlainDisk, [pnode])
992 try:
993 expnode = qa_config.AcquireNode(exclude=pnode)
994 try:
995 if shutdown:
996 # Stop instance before exporting and removing it
997 RunTest(qa_instance.TestInstanceShutdown, instance)
998 RunTest(qa_instance.TestInstanceExportWithRemove, instance, expnode)
999 RunTest(qa_instance.TestBackupList, expnode)
1000 finally:
1001 expnode.Release()
1002 finally:
1003 instance.Release()
1004 del expnode
1005 del instance
1006 qa_cluster.AssertClusterVerify()
1007
1008 finally:
1009 pnode.Release()
1010
1011 RunTestIf("cluster-upgrade", qa_cluster.TestUpgrade)
1012
1013 RunTestBlock(RunExclusiveStorageTests)
1014 RunTestIf(["cluster-instance-policy", "instance-add-plain-disk"],
1015 TestIPolicyPlainInstance)
1016
1017 RunTestBlock(RunCustomSshPortTests)
1018
1019 RunTestIf(
1020 "instance-add-restricted-by-disktemplates",
1021 qa_instance.TestInstanceCreationRestrictedByDiskTemplates)
1022
1023 # Test removing instance with offline drbd secondary
1024 if qa_config.TestEnabled(["instance-remove-drbd-offline",
1025 "instance-add-drbd-disk"]):
1026 # Make sure the master is not put offline
1027 snode = qa_config.AcquireNode(exclude=qa_config.GetMasterNode())
1028 try:
1029 pnode = qa_config.AcquireNode(exclude=snode)
1030 try:
1031 instance = qa_instance.TestInstanceAddWithDrbdDisk([pnode, snode])
1032 set_offline = lambda node: qa_node.MakeNodeOffline(node, "yes")
1033 set_online = lambda node: qa_node.MakeNodeOffline(node, "no")
1034 RunTest(qa_instance.TestRemoveInstanceOfflineNode, instance, snode,
1035 set_offline, set_online)
1036 finally:
1037 pnode.Release()
1038 finally:
1039 snode.Release()
1040 qa_cluster.AssertClusterVerify()
1041
1042 RunTestBlock(RunMonitoringTests)
1043
1044 RunPerformanceTests()
1045
1046 RunTestIf("create-cluster", qa_node.TestNodeRemoveAll)
1047
1048 RunTestIf("cluster-destroy", qa_cluster.TestClusterDestroy)
1049
1050
1051 @UsesRapiClient
1052 def main():
1053 """Main program.
1054
1055 """
1056 colors.check_for_colors()
1057
1058 parser = optparse.OptionParser(usage="%prog [options] <config-file>")
1059 parser.add_option("--yes-do-it", dest="yes_do_it",
1060 action="store_true",
1061 help="Really execute the tests")
1062 (opts, args) = parser.parse_args()
1063
1064 if len(args) == 1:
1065 (config_file, ) = args
1066 else:
1067 parser.error("Wrong number of arguments.")
1068
1069 if not opts.yes_do_it:
1070 print ("Executing this script irreversibly destroys any Ganeti\n"
1071 "configuration on all nodes involved. If you really want\n"
1072 "to start testing, supply the --yes-do-it option.")
1073 sys.exit(1)
1074
1075 qa_config.Load(config_file)
1076
1077 primary = qa_config.GetMasterNode().primary
1078 qa_utils.StartMultiplexer(primary)
1079 print ("SSH command for primary node: %s" %
1080 utils.ShellQuoteArgs(qa_utils.GetSSHCommand(primary, "")))
1081 print ("SSH command for other nodes: %s" %
1082 utils.ShellQuoteArgs(qa_utils.GetSSHCommand("NODE", "")))
1083 try:
1084 RunQa()
1085 finally:
1086 qa_utils.CloseMultiplexers()
1087
1088 if __name__ == "__main__":
1089 main()