a0aa650128b073bc44ab913376b743f9c4de882c
[ganeti-github.git] / test / hs / Test / Ganeti / JQScheduler.hs
1 {-# LANGUAGE TemplateHaskell, ScopedTypeVariables, NamedFieldPuns #-}
2 {-# OPTIONS_GHC -fno-warn-orphans #-}
3
4 {-| Unittests for the job scheduler.
5
6 -}
7
8 {-
9
10 Copyright (C) 2014 Google Inc.
11 All rights reserved.
12
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are
15 met:
16
17 1. Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 2. Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
25 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
28 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
36 -}
37
38 module Test.Ganeti.JQScheduler (testJQScheduler) where
39
40 import Control.Applicative
41 import Control.Lens ((&), (.~), _2)
42 import Data.List (inits)
43 import Data.Maybe
44 import qualified Data.Map as Map
45 import Data.Set (Set, difference)
46 import qualified Data.Set as Set
47 import Data.Traversable (traverse)
48 import Text.JSON (JSValue(..))
49 import Test.HUnit
50 import Test.QuickCheck
51
52 import Test.Ganeti.JQueue.Objects (genQueuedOpCode, genJobId, justNoTs)
53 import Test.Ganeti.SlotMap (genTestKey, overfullKeys)
54 import Test.Ganeti.TestCommon
55 import Test.Ganeti.TestHelper
56 import Test.Ganeti.Types ()
57
58 import Ganeti.JQScheduler.Filtering
59 import Ganeti.JQScheduler.ReasonRateLimiting
60 import Ganeti.JQScheduler.Types
61 import Ganeti.JQueue.Lens
62 import Ganeti.JQueue.Objects
63 import Ganeti.Objects (FilterRule(..), FilterPredicate(..), FilterAction(..),
64 filterRuleOrder)
65 import Ganeti.OpCodes
66 import Ganeti.OpCodes.Lens
67 import Ganeti.Query.Language (Filter(..), FilterValue(..))
68 import Ganeti.SlotMap
69 import Ganeti.Types
70 import Ganeti.Utils (isSubsequenceOf, newUUID)
71
72 {-# ANN module "HLint: ignore Use camelCase" #-}
73
74
75 genRateLimitReason :: Gen String
76 genRateLimitReason = do
77 Slot{ slotLimit = n } <- arbitrary
78 l <- genTestKey
79 return $ "rate-limit:" ++ show n ++ ":" ++ l
80
81
82 instance Arbitrary QueuedJob where
83 arbitrary = do
84 -- For our scheduler testing purposes here, we only care about
85 -- opcodes, job ID and reason rate limits.
86 jid <- genJobId
87
88 ops <- resize 5 . listOf1 $ do
89 o <- genQueuedOpCode
90 -- Put some rate limits into the OpCode.
91 limitString <- genRateLimitReason
92 return $
93 o & qoInputL . validOpCodeL . metaParamsL . opReasonL . traverse . _2
94 .~ limitString
95
96 return $ QueuedJob jid ops justNoTs justNoTs justNoTs Nothing Nothing
97
98
99 instance Arbitrary JobWithStat where
100 arbitrary = nullJobWithStat <$> arbitrary
101 shrink job = [ job { jJob = x } | x <- shrink (jJob job) ]
102
103
104 instance Arbitrary Queue where
105 arbitrary = do
106
107 let genJobsUniqueJIDs :: [JobWithStat] -> Gen [JobWithStat]
108 genJobsUniqueJIDs = listOfUniqueBy arbitrary (qjId . jJob)
109
110 queued <- genJobsUniqueJIDs []
111 running <- genJobsUniqueJIDs queued
112 manip <- genJobsUniqueJIDs (queued ++ running)
113
114 return $ Queue queued running manip
115 shrink q =
116 [ q { qEnqueued = x } | x <- shrink (qEnqueued q) ] ++
117 [ q { qRunning = x } | x <- shrink (qRunning q) ] ++
118 [ q { qManipulated = x } | x <- shrink (qManipulated q) ]
119
120
121 -- * Test cases
122
123 -- | Tests rate limit reason trail parsing.
124 case_parseReasonRateLimit :: Assertion
125 case_parseReasonRateLimit = do
126
127 assertBool "default case" $
128 let a = parseReasonRateLimit "rate-limit:20:my label"
129 b = parseReasonRateLimit "rate-limit:21:my label"
130 in and
131 [ a == Just ("20:my label", 20)
132 , b == Just ("21:my label", 21)
133 ]
134
135 assertEqual "be picky about whitespace"
136 Nothing
137 (parseReasonRateLimit " rate-limit:20:my label")
138
139
140 -- | Tests that "rateLimit:n:..." and "rateLimit:m:..." become different
141 -- rate limiting buckets.
142 prop_slotMapFromJob_conflicting_buckets :: Property
143 prop_slotMapFromJob_conflicting_buckets = do
144
145 let sameBucketReasonStringGen :: Gen (String, String)
146 sameBucketReasonStringGen = do
147 (Positive (n :: Int), Positive (m :: Int)) <- arbitrary
148 l <- genPrintableAsciiString
149 return ( "rate-limit:" ++ show n ++ ":" ++ l
150 , "rate-limit:" ++ show m ++ ":" ++ l )
151
152 forAll sameBucketReasonStringGen $ \(s1, s2) ->
153 (s1 /= s2) ==> do
154 (lab1, lim1) <- parseReasonRateLimit s1
155 (lab2, _ ) <- parseReasonRateLimit s2
156 let sm = Map.fromList [(lab1, Slot 1 lim1)]
157 cm = Map.fromList [(lab2, 1)]
158 in return $
159 (sm `occupySlots` cm) ==? Map.fromList [ (lab1, Slot 1 lim1)
160 , (lab2, Slot 1 0)
161 ] :: Gen Property
162
163
164 -- | Tests some basic cases for reason rate limiting.
165 case_reasonRateLimit :: Assertion
166 case_reasonRateLimit = do
167
168 let mkJobWithReason jobNum reasonTrail = do
169 opc <- genSample genQueuedOpCode
170 jid <- makeJobId jobNum
171 let opc' = opc & (qoInputL . validOpCodeL . metaParamsL . opReasonL)
172 .~ reasonTrail
173 return . nullJobWithStat
174 $ QueuedJob
175 { qjId = jid
176 , qjOps = [opc']
177 , qjReceivedTimestamp = Nothing
178 , qjStartTimestamp = Nothing
179 , qjEndTimestamp = Nothing
180 , qjLivelock = Nothing
181 , qjProcessId = Nothing
182 }
183
184 -- 3 jobs, limited to 2 of them running.
185 j1 <- mkJobWithReason 1 [("source1", "rate-limit:2:hello", 0)]
186 j2 <- mkJobWithReason 2 [("source1", "rate-limit:2:hello", 0)]
187 j3 <- mkJobWithReason 3 [("source1", "rate-limit:2:hello", 0)]
188
189 assertEqual "[j1] should not be rate-limited"
190 [j1]
191 (reasonRateLimit (Queue [j1] [] []) [j1])
192
193 assertEqual "[j1, j2] should not be rate-limited"
194 [j1, j2]
195 (reasonRateLimit (Queue [j1, j2] [] []) [j1, j2])
196
197 assertEqual "j3 should be rate-limited 1"
198 [j1, j2]
199 (reasonRateLimit (Queue [j1, j2, j3] [] []) [j1, j2, j3])
200
201 assertEqual "j3 should be rate-limited 2"
202 [j2]
203 (reasonRateLimit (Queue [j2, j3] [j1] []) [j2, j3])
204
205 assertEqual "j3 should be rate-limited 3"
206 []
207 (reasonRateLimit (Queue [j3] [j1] [j2]) [j3])
208
209
210 -- | Tests the specified properties of `reasonRateLimit`, as defined in
211 -- `doc/design-optables.rst`.
212 prop_reasonRateLimit :: Property
213 prop_reasonRateLimit =
214 forAllShrink arbitrary shrink $ \q ->
215
216 let slotMapFromJobWithStat = slotMapFromJobs . map jJob
217
218 enqueued = qEnqueued q
219
220 toRun = reasonRateLimit q enqueued
221
222 oldSlots = slotMapFromJobWithStat (qRunning q)
223 newSlots = slotMapFromJobWithStat (qRunning q ++ toRun)
224 -- What would happen without rate limiting.
225 newSlotsNoLimits = slotMapFromJobWithStat (qRunning q ++ enqueued)
226
227 in -- Ensure it's unlikely that jobs are all in different buckets.
228 cover
229 (any ((> 1) . slotOccupied) . Map.elems $ newSlotsNoLimits)
230 50
231 "some jobs have the same rate-limit bucket"
232
233 -- Ensure it's likely that rate limiting has any effect.
234 . cover
235 (overfullKeys newSlotsNoLimits
236 `difference` overfullKeys oldSlots /= Set.empty)
237 50
238 "queued jobs cannot be started because of rate limiting"
239
240 $ conjoin
241 [ counterexample "scheduled jobs must be subsequence" $
242 toRun `isSubsequenceOf` enqueued
243
244 -- This is the key property:
245 , counterexample "no job may exceed its bucket limits, except from\
246 \ jobs that were already running with exceeded\
247 \ limits; those must not increase" $
248 conjoin
249 [ if occup <= limit
250 -- Within limits, all fine.
251 then passTest
252 -- Bucket exceeds limits - it must have exceeded them
253 -- in the initial running list already, with the same
254 -- slot count.
255 else Map.lookup k oldSlots ==? Just slot
256 | (k, slot@(Slot occup limit)) <- Map.toList newSlots ]
257 ]
258
259 -- | Tests that filter rule ordering is determined (solely) by priority,
260 -- watermark and UUID, as defined in `doc/design-optables.rst`.
261 prop_filterRuleOrder :: Property
262 prop_filterRuleOrder = property $ do
263 a <- arbitrary
264 b <- arbitrary `suchThat` ((frUuid a /=) . frUuid)
265 return $ filterRuleOrder a b ==? (frPriority a, frWatermark a, frUuid a)
266 `compare`
267 (frPriority b, frWatermark b, frUuid b)
268
269
270 -- | Tests common inputs for `matchPredicate`, especially the predicates
271 -- and fields available to them as defined in the spec.
272 case_matchPredicate :: Assertion
273 case_matchPredicate = do
274
275 jid1 <- makeJobId 1
276 clusterName <- mkNonEmpty "cluster1"
277
278 let job =
279 QueuedJob
280 { qjId = jid1
281 , qjOps =
282 [ QueuedOpCode
283 { qoInput = ValidOpCode MetaOpCode
284 { metaParams = CommonOpParams
285 { opDryRun = Nothing
286 , opDebugLevel = Nothing
287 , opPriority = OpPrioHigh
288 , opDepends = Just []
289 , opComment = Nothing
290 , opReason = [("source1", "reason1", 1234)]
291 }
292 , metaOpCode = OpClusterRename
293 { opName = clusterName
294 }
295 }
296 , qoStatus = OP_STATUS_QUEUED
297 , qoResult = JSNull
298 , qoLog = []
299 , qoPriority = -1
300 , qoStartTimestamp = Nothing
301 , qoExecTimestamp = Nothing
302 , qoEndTimestamp = Nothing
303 }
304 ]
305 , qjReceivedTimestamp = Nothing
306 , qjStartTimestamp = Nothing
307 , qjEndTimestamp = Nothing
308 , qjLivelock = Nothing
309 , qjProcessId = Nothing
310 }
311
312 let watermark = jid1
313
314 check = matchPredicate job watermark
315
316 -- jobid filters
317
318 assertEqual "matching jobid filter"
319 True
320 . check $ FPJobId (EQFilter "id" (NumericValue 1))
321
322 assertEqual "non-matching jobid filter"
323 False
324 . check $ FPJobId (EQFilter "id" (NumericValue 2))
325
326 assertEqual "non-matching jobid filter (string passed)"
327 False
328 . check $ FPJobId (EQFilter "id" (QuotedString "1"))
329
330 -- jobid filters: watermarks
331
332 assertEqual "matching jobid watermark filter"
333 True
334 . check $ FPJobId (EQFilter "id" (QuotedString "watermark"))
335
336 -- opcode filters
337
338 assertEqual "matching opcode filter (type of opcode)"
339 True
340 . check $ FPOpCode (EQFilter "OP_ID" (QuotedString "OP_CLUSTER_RENAME"))
341
342 assertEqual "non-matching opcode filter (type of opcode)"
343 False
344 . check $ FPOpCode (EQFilter "OP_ID" (QuotedString "OP_INSTANCE_CREATE"))
345
346 assertEqual "matching opcode filter (nested access)"
347 True
348 . check $ FPOpCode (EQFilter "name" (QuotedString "cluster1"))
349
350 assertEqual "non-matching opcode filter (nonexistent nested access)"
351 False
352 . check $ FPOpCode (EQFilter "something" (QuotedString "cluster1"))
353
354 -- reason filters
355
356 assertEqual "matching reason filter (reason field)"
357 True
358 . check $ FPReason (EQFilter "reason" (QuotedString "reason1"))
359
360 assertEqual "non-matching reason filter (reason field)"
361 False
362 . check $ FPReason (EQFilter "reason" (QuotedString "reasonGarbage"))
363
364 assertEqual "matching reason filter (source field)"
365 True
366 . check $ FPReason (EQFilter "source" (QuotedString "source1"))
367
368 assertEqual "matching reason filter (timestamp field)"
369 True
370 . check $ FPReason (EQFilter "timestamp" (NumericValue 1234))
371
372 assertEqual "non-matching reason filter (nonexistent field)"
373 False
374 . check $ FPReason (EQFilter "something" (QuotedString ""))
375
376
377 -- | Tests that jobs selected by `applyingFilter` actually match
378 -- and have an effect (are not CONTINUE filters).
379 prop_applyingFilter :: Property
380 prop_applyingFilter =
381 forAllShrink arbitrary shrink $ \(job, filters) ->
382
383 let applying = applyingFilter (Set.fromList filters) job
384
385 in isJust applying ==> case applying of
386 Just f -> job `matches` f && frAction f /= Continue
387 Nothing -> True
388
389
390 case_jobFiltering :: Assertion
391 case_jobFiltering = do
392
393 clusterName <- mkNonEmpty "cluster1"
394 jid1 <- makeJobId 1
395 jid2 <- makeJobId 2
396 jid3 <- makeJobId 3
397 jid4 <- makeJobId 4
398 unsetPrio <- mkNonNegative 1234
399 uuid1 <- newUUID
400
401 let j1 =
402 nullJobWithStat QueuedJob
403 { qjId = jid1
404 , qjOps =
405 [ QueuedOpCode
406 { qoInput = ValidOpCode MetaOpCode
407 { metaParams = CommonOpParams
408 { opDryRun = Nothing
409 , opDebugLevel = Nothing
410 , opPriority = OpPrioHigh
411 , opDepends = Just []
412 , opComment = Nothing
413 , opReason = [("source1", "reason1", 1234)]}
414 , metaOpCode = OpClusterRename
415 { opName = clusterName
416 }
417 }
418 , qoStatus = OP_STATUS_QUEUED
419 , qoResult = JSNull
420 , qoLog = []
421 , qoPriority = -1
422 , qoStartTimestamp = Nothing
423 , qoExecTimestamp = Nothing
424 , qoEndTimestamp = Nothing
425 }
426 ]
427 , qjReceivedTimestamp = Nothing
428 , qjStartTimestamp = Nothing
429 , qjEndTimestamp = Nothing
430 , qjLivelock = Nothing
431 , qjProcessId = Nothing
432 }
433
434 j2 = j1 & jJobL . qjIdL .~ jid2
435 j3 = j1 & jJobL . qjIdL .~ jid3
436 j4 = j1 & jJobL . qjIdL .~ jid4
437
438
439 fr1 =
440 FilterRule
441 { frWatermark = jid1
442 , frPriority = unsetPrio
443 , frPredicates = [FPJobId (EQFilter "id" (NumericValue 1))]
444 , frAction = Reject
445 , frReasonTrail = []
446 , frUuid = uuid1
447 }
448
449 -- Gives the rule a new UUID.
450 rule fr = do
451 uuid <- newUUID
452 return fr{ frUuid = uuid }
453
454 -- Helper to create filter chains: assigns the filters in the list
455 -- increasing priorities, so that filters listed first are processed
456 -- first.
457 chain :: [FilterRule] -> Set FilterRule
458 chain frs
459 | any ((/= unsetPrio) . frPriority) frs =
460 error "Filter was passed to `chain` that already had a priority."
461 | otherwise =
462 Set.fromList
463 [ fr{ frPriority = prio }
464 | (fr, Just prio) <- zip frs (map mkNonNegative [1..]) ]
465
466 fr2 <- rule fr1{ frAction = Accept }
467 fr3 <- rule fr1{ frAction = Pause }
468
469 fr4 <- rule fr1{ frPredicates =
470 [FPJobId (GTFilter "id" (QuotedString "watermark"))]
471 }
472
473 fr5 <- rule fr1{ frPredicates = [] }
474
475 fr6 <- rule fr5{ frAction = Continue }
476 fr7 <- rule fr6{ frAction = RateLimit 2 }
477
478 fr8 <- rule fr4{ frAction = Continue, frWatermark = jid1 }
479 fr9 <- rule fr8{ frAction = RateLimit 2 }
480
481 assertEqual "j1 should be rejected (by fr1)"
482 []
483 (jobFiltering (Queue [j1] [] []) (chain [fr1]) [j1])
484
485 assertEqual "j1 should be rejected (by fr1, it has priority)"
486 []
487 (jobFiltering (Queue [j1] [] []) (chain [fr1, fr2]) [j1])
488
489 assertEqual "j1 should be accepted (by fr2, it has priority)"
490 [j1]
491 (jobFiltering (Queue [j1] [] []) (chain [fr2, fr1]) [j1])
492
493 assertEqual "j1 should be paused (by fr3)"
494 []
495 (jobFiltering (Queue [j1] [] []) (chain [fr3]) [j1])
496
497 assertEqual "j2 should be rejected (over watermark1)"
498 [j1]
499 (jobFiltering (Queue [j1, j2] [] []) (chain [fr4]) [j1, j2])
500
501 assertEqual "all jobs should be rejected (since no predicates)"
502 []
503 (jobFiltering (Queue [j1, j2] [] []) (chain [fr5]) [j1, j2])
504
505 assertEqual "j3 should be rate-limited"
506 [j1, j2]
507 (jobFiltering (Queue [j1, j2, j3] [] []) (chain [fr6, fr7]) [j1, j2, j3])
508
509 assertEqual "j4 should be rate-limited"
510 -- j1 doesn't apply to fr8/fr9 (since they match only watermark > jid1)
511 -- so j1 gets scheduled
512 [j1, j2, j3]
513 (jobFiltering (Queue [j1, j2, j3, j4] [] []) (chain [fr8, fr9])
514 [j1, j2, j3, j4])
515
516
517 -- | Tests the specified properties of `jobFiltering`, as defined in
518 -- `doc/design-optables.rst`.
519 prop_jobFiltering :: Property
520 prop_jobFiltering =
521 forAllShrink arbitrary shrink $ \q ->
522 forAllShrink (resize 4 arbitrary) shrink $ \(NonEmpty filterList) ->
523
524 let running = qRunning q ++ qManipulated q
525 enqueued = qEnqueued q
526
527 filters = Set.fromList filterList
528
529 toRun = jobFiltering q filters enqueued -- do the filtering
530
531 -- Helpers
532
533 -- Whether `fr` applies to more than `n` of the `jobs`
534 -- (that is, more than allowed).
535 exceeds :: Int -> FilterRule -> [JobWithStat] -> Bool
536 exceeds n fr jobs =
537 n < (length
538 . filter ((frUuid fr ==) . frUuid)
539 . mapMaybe (applyingFilter filters)
540 $ map jJob jobs)
541
542 -- Helpers for ensuring sensible coverage.
543
544 -- Makes sure that each action appears with some probability.
545 actionName = head . words . show
546 allActions = map actionName [ Accept, Continue, Pause, Reject
547 , RateLimit 0 ]
548 applyingActions = map (actionName . frAction)
549 . mapMaybe (applyingFilter filters)
550 $ map jJob enqueued
551 perc = 4 -- percent; low because it's per action
552 actionCovers =
553 foldr (.) id
554 [ stableCover (a `elem` applyingActions) perc ("is " ++ a)
555 | a <- allActions ]
556
557 -- `covers` should be after `==>` and before `conjoin` (see QuickCheck
558 -- bugs 25 and 27).
559 in (enqueued /= []) ==> actionCovers $ conjoin
560
561 [ counterexample "scheduled jobs must be subsequence" $
562 toRun `isSubsequenceOf` enqueued
563
564 , counterexample "a reason for each job (not) being scheduled" .
565
566 -- All enqueued jobs must have a reason why they were (not)
567 -- scheduled, determined by the filter that applies.
568 flip all enqueued $ \job ->
569 case applyingFilter filters (jJob job) of
570 -- If no filter matches, the job must run.
571 Nothing -> job `elem` toRun
572 Just fr@FilterRule{ frAction } -> case frAction of
573 -- ACCEPT filter permit the job immediately,
574 -- PAUSE/REJECT forbid running, CONTINUE filters cannot
575 -- be the output of `applyingFilter`, and
576 -- RATE_LIMIT filters have a more more complex property.
577 Accept -> job `elem` toRun
578 Continue -> error "must not happen"
579 Pause -> job `notElem` toRun
580 Reject -> job `notElem` toRun
581 RateLimit n ->
582
583 let -- Jobs in queue before our job.
584 jobsBefore = takeWhile (/= job) enqueued
585
586 in if job `elem` toRun
587 -- If it got scheduled, the job and any job
588 -- before it doesn't overfill the rate limit.
589 then not . exceeds n fr $ running
590 ++ jobsBefore ++ [job]
591 -- If didn't get scheduled, then the rate limit
592 -- was already full before scheduling or the job
593 -- or one of the jobs before made it full.
594 else any (exceeds n fr . (running ++))
595 (inits $ jobsBefore ++ [job])
596 -- The `inits` bit includes the [] and [...job]
597 -- cases.
598
599 ]
600
601
602 testSuite "JQScheduler"
603 [ 'case_parseReasonRateLimit
604 , 'prop_slotMapFromJob_conflicting_buckets
605 , 'case_reasonRateLimit
606 , 'prop_reasonRateLimit
607 , 'prop_filterRuleOrder
608 , 'case_matchPredicate
609 , 'prop_applyingFilter
610 , 'case_jobFiltering
611 , 'prop_jobFiltering
612 ]