Encode UUIDs as ByteStrings
[ganeti-github.git] / test / hs / Test / Ganeti / JQScheduler.hs
1 {-# LANGUAGE TemplateHaskell, ScopedTypeVariables, NamedFieldPuns #-}
2 {-# OPTIONS_GHC -fno-warn-orphans #-}
3
4 {-| Unittests for the job scheduler.
5
6 -}
7
8 {-
9
10 Copyright (C) 2014 Google Inc.
11 All rights reserved.
12
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are
15 met:
16
17 1. Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 2. Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
25 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
28 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
36 -}
37
38 module Test.Ganeti.JQScheduler (testJQScheduler) where
39
40 import Control.Applicative
41 import Control.Lens ((&), (.~), _2)
42 import qualified Data.ByteString.UTF8 as UTF8
43 import Data.List (inits)
44 import Data.Maybe
45 import qualified Data.Map as Map
46 import Data.Set (Set, difference)
47 import qualified Data.Set as Set
48 import Data.Traversable (traverse)
49 import Text.JSON (JSValue(..))
50 import Test.HUnit
51 import Test.QuickCheck
52
53 import Test.Ganeti.JQueue.Objects (genQueuedOpCode, genJobId, justNoTs)
54 import Test.Ganeti.SlotMap (genTestKey, overfullKeys)
55 import Test.Ganeti.TestCommon
56 import Test.Ganeti.TestHelper
57 import Test.Ganeti.Types ()
58
59 import Ganeti.JQScheduler.Filtering
60 import Ganeti.JQScheduler.ReasonRateLimiting
61 import Ganeti.JQScheduler.Types
62 import Ganeti.JQueue.Lens
63 import Ganeti.JQueue.Objects
64 import Ganeti.Objects (FilterRule(..), FilterPredicate(..), FilterAction(..),
65 filterRuleOrder)
66 import Ganeti.OpCodes
67 import Ganeti.OpCodes.Lens
68 import Ganeti.Query.Language (Filter(..), FilterValue(..))
69 import Ganeti.SlotMap
70 import Ganeti.Types
71 import Ganeti.Utils (isSubsequenceOf, newUUID)
72
73 {-# ANN module "HLint: ignore Use camelCase" #-}
74
75
76 genRateLimitReason :: Gen String
77 genRateLimitReason = do
78 Slot{ slotLimit = n } <- arbitrary
79 l <- genTestKey
80 return $ "rate-limit:" ++ show n ++ ":" ++ l
81
82
83 instance Arbitrary QueuedJob where
84 arbitrary = do
85 -- For our scheduler testing purposes here, we only care about
86 -- opcodes, job ID and reason rate limits.
87 jid <- genJobId
88
89 ops <- resize 5 . listOf1 $ do
90 o <- genQueuedOpCode
91 -- Put some rate limits into the OpCode.
92 limitString <- genRateLimitReason
93 return $
94 o & qoInputL . validOpCodeL . metaParamsL . opReasonL . traverse . _2
95 .~ limitString
96
97 return $ QueuedJob jid ops justNoTs justNoTs justNoTs Nothing Nothing
98
99
100 instance Arbitrary JobWithStat where
101 arbitrary = nullJobWithStat <$> arbitrary
102 shrink job = [ job { jJob = x } | x <- shrink (jJob job) ]
103
104
105 instance Arbitrary Queue where
106 arbitrary = do
107
108 let genJobsUniqueJIDs :: [JobWithStat] -> Gen [JobWithStat]
109 genJobsUniqueJIDs = listOfUniqueBy arbitrary (qjId . jJob)
110
111 queued <- genJobsUniqueJIDs []
112 running <- genJobsUniqueJIDs queued
113 manip <- genJobsUniqueJIDs (queued ++ running)
114
115 return $ Queue queued running manip
116 shrink q =
117 [ q { qEnqueued = x } | x <- shrink (qEnqueued q) ] ++
118 [ q { qRunning = x } | x <- shrink (qRunning q) ] ++
119 [ q { qManipulated = x } | x <- shrink (qManipulated q) ]
120
121
122 -- * Test cases
123
124 -- | Tests rate limit reason trail parsing.
125 case_parseReasonRateLimit :: Assertion
126 case_parseReasonRateLimit = do
127
128 assertBool "default case" $
129 let a = parseReasonRateLimit "rate-limit:20:my label"
130 b = parseReasonRateLimit "rate-limit:21:my label"
131 in and
132 [ a == Just ("20:my label", 20)
133 , b == Just ("21:my label", 21)
134 ]
135
136 assertEqual "be picky about whitespace"
137 Nothing
138 (parseReasonRateLimit " rate-limit:20:my label")
139
140
141 -- | Tests that "rateLimit:n:..." and "rateLimit:m:..." become different
142 -- rate limiting buckets.
143 prop_slotMapFromJob_conflicting_buckets :: Property
144 prop_slotMapFromJob_conflicting_buckets = do
145
146 let sameBucketReasonStringGen :: Gen (String, String)
147 sameBucketReasonStringGen = do
148 (Positive (n :: Int), Positive (m :: Int)) <- arbitrary
149 l <- genPrintableAsciiString
150 return ( "rate-limit:" ++ show n ++ ":" ++ l
151 , "rate-limit:" ++ show m ++ ":" ++ l )
152
153 forAll sameBucketReasonStringGen $ \(s1, s2) ->
154 (s1 /= s2) ==> do
155 (lab1, lim1) <- parseReasonRateLimit s1
156 (lab2, _ ) <- parseReasonRateLimit s2
157 let sm = Map.fromList [(lab1, Slot 1 lim1)]
158 cm = Map.fromList [(lab2, 1)]
159 in return $
160 (sm `occupySlots` cm) ==? Map.fromList [ (lab1, Slot 1 lim1)
161 , (lab2, Slot 1 0)
162 ] :: Gen Property
163
164
165 -- | Tests some basic cases for reason rate limiting.
166 case_reasonRateLimit :: Assertion
167 case_reasonRateLimit = do
168
169 let mkJobWithReason jobNum reasonTrail = do
170 opc <- genSample genQueuedOpCode
171 jid <- makeJobId jobNum
172 let opc' = opc & (qoInputL . validOpCodeL . metaParamsL . opReasonL)
173 .~ reasonTrail
174 return . nullJobWithStat
175 $ QueuedJob
176 { qjId = jid
177 , qjOps = [opc']
178 , qjReceivedTimestamp = Nothing
179 , qjStartTimestamp = Nothing
180 , qjEndTimestamp = Nothing
181 , qjLivelock = Nothing
182 , qjProcessId = Nothing
183 }
184
185 -- 3 jobs, limited to 2 of them running.
186 j1 <- mkJobWithReason 1 [("source1", "rate-limit:2:hello", 0)]
187 j2 <- mkJobWithReason 2 [("source1", "rate-limit:2:hello", 0)]
188 j3 <- mkJobWithReason 3 [("source1", "rate-limit:2:hello", 0)]
189
190 assertEqual "[j1] should not be rate-limited"
191 [j1]
192 (reasonRateLimit (Queue [j1] [] []) [j1])
193
194 assertEqual "[j1, j2] should not be rate-limited"
195 [j1, j2]
196 (reasonRateLimit (Queue [j1, j2] [] []) [j1, j2])
197
198 assertEqual "j3 should be rate-limited 1"
199 [j1, j2]
200 (reasonRateLimit (Queue [j1, j2, j3] [] []) [j1, j2, j3])
201
202 assertEqual "j3 should be rate-limited 2"
203 [j2]
204 (reasonRateLimit (Queue [j2, j3] [j1] []) [j2, j3])
205
206 assertEqual "j3 should be rate-limited 3"
207 []
208 (reasonRateLimit (Queue [j3] [j1] [j2]) [j3])
209
210
211 -- | Tests the specified properties of `reasonRateLimit`, as defined in
212 -- `doc/design-optables.rst`.
213 prop_reasonRateLimit :: Property
214 prop_reasonRateLimit =
215 forAllShrink arbitrary shrink $ \q ->
216
217 let slotMapFromJobWithStat = slotMapFromJobs . map jJob
218
219 enqueued = qEnqueued q
220
221 toRun = reasonRateLimit q enqueued
222
223 oldSlots = slotMapFromJobWithStat (qRunning q)
224 newSlots = slotMapFromJobWithStat (qRunning q ++ toRun)
225 -- What would happen without rate limiting.
226 newSlotsNoLimits = slotMapFromJobWithStat (qRunning q ++ enqueued)
227
228 in -- Ensure it's unlikely that jobs are all in different buckets.
229 cover
230 (any ((> 1) . slotOccupied) . Map.elems $ newSlotsNoLimits)
231 50
232 "some jobs have the same rate-limit bucket"
233
234 -- Ensure it's likely that rate limiting has any effect.
235 . cover
236 (overfullKeys newSlotsNoLimits
237 `difference` overfullKeys oldSlots /= Set.empty)
238 50
239 "queued jobs cannot be started because of rate limiting"
240
241 $ conjoin
242 [ counterexample "scheduled jobs must be subsequence" $
243 toRun `isSubsequenceOf` enqueued
244
245 -- This is the key property:
246 , counterexample "no job may exceed its bucket limits, except from\
247 \ jobs that were already running with exceeded\
248 \ limits; those must not increase" $
249 conjoin
250 [ if occup <= limit
251 -- Within limits, all fine.
252 then passTest
253 -- Bucket exceeds limits - it must have exceeded them
254 -- in the initial running list already, with the same
255 -- slot count.
256 else Map.lookup k oldSlots ==? Just slot
257 | (k, slot@(Slot occup limit)) <- Map.toList newSlots ]
258 ]
259
260 -- | Tests that filter rule ordering is determined (solely) by priority,
261 -- watermark and UUID, as defined in `doc/design-optables.rst`.
262 prop_filterRuleOrder :: Property
263 prop_filterRuleOrder = property $ do
264 a <- arbitrary
265 b <- arbitrary `suchThat` ((frUuid a /=) . frUuid)
266 return $ filterRuleOrder a b ==? (frPriority a, frWatermark a, frUuid a)
267 `compare`
268 (frPriority b, frWatermark b, frUuid b)
269
270
271 -- | Tests common inputs for `matchPredicate`, especially the predicates
272 -- and fields available to them as defined in the spec.
273 case_matchPredicate :: Assertion
274 case_matchPredicate = do
275
276 jid1 <- makeJobId 1
277 clusterName <- mkNonEmpty "cluster1"
278
279 let job =
280 QueuedJob
281 { qjId = jid1
282 , qjOps =
283 [ QueuedOpCode
284 { qoInput = ValidOpCode MetaOpCode
285 { metaParams = CommonOpParams
286 { opDryRun = Nothing
287 , opDebugLevel = Nothing
288 , opPriority = OpPrioHigh
289 , opDepends = Just []
290 , opComment = Nothing
291 , opReason = [("source1", "reason1", 1234)]
292 }
293 , metaOpCode = OpClusterRename
294 { opName = clusterName
295 }
296 }
297 , qoStatus = OP_STATUS_QUEUED
298 , qoResult = JSNull
299 , qoLog = []
300 , qoPriority = -1
301 , qoStartTimestamp = Nothing
302 , qoExecTimestamp = Nothing
303 , qoEndTimestamp = Nothing
304 }
305 ]
306 , qjReceivedTimestamp = Nothing
307 , qjStartTimestamp = Nothing
308 , qjEndTimestamp = Nothing
309 , qjLivelock = Nothing
310 , qjProcessId = Nothing
311 }
312
313 let watermark = jid1
314
315 check = matchPredicate job watermark
316
317 -- jobid filters
318
319 assertEqual "matching jobid filter"
320 True
321 . check $ FPJobId (EQFilter "id" (NumericValue 1))
322
323 assertEqual "non-matching jobid filter"
324 False
325 . check $ FPJobId (EQFilter "id" (NumericValue 2))
326
327 assertEqual "non-matching jobid filter (string passed)"
328 False
329 . check $ FPJobId (EQFilter "id" (QuotedString "1"))
330
331 -- jobid filters: watermarks
332
333 assertEqual "matching jobid watermark filter"
334 True
335 . check $ FPJobId (EQFilter "id" (QuotedString "watermark"))
336
337 -- opcode filters
338
339 assertEqual "matching opcode filter (type of opcode)"
340 True
341 . check $ FPOpCode (EQFilter "OP_ID" (QuotedString "OP_CLUSTER_RENAME"))
342
343 assertEqual "non-matching opcode filter (type of opcode)"
344 False
345 . check $ FPOpCode (EQFilter "OP_ID" (QuotedString "OP_INSTANCE_CREATE"))
346
347 assertEqual "matching opcode filter (nested access)"
348 True
349 . check $ FPOpCode (EQFilter "name" (QuotedString "cluster1"))
350
351 assertEqual "non-matching opcode filter (nonexistent nested access)"
352 False
353 . check $ FPOpCode (EQFilter "something" (QuotedString "cluster1"))
354
355 -- reason filters
356
357 assertEqual "matching reason filter (reason field)"
358 True
359 . check $ FPReason (EQFilter "reason" (QuotedString "reason1"))
360
361 assertEqual "non-matching reason filter (reason field)"
362 False
363 . check $ FPReason (EQFilter "reason" (QuotedString "reasonGarbage"))
364
365 assertEqual "matching reason filter (source field)"
366 True
367 . check $ FPReason (EQFilter "source" (QuotedString "source1"))
368
369 assertEqual "matching reason filter (timestamp field)"
370 True
371 . check $ FPReason (EQFilter "timestamp" (NumericValue 1234))
372
373 assertEqual "non-matching reason filter (nonexistent field)"
374 False
375 . check $ FPReason (EQFilter "something" (QuotedString ""))
376
377
378 -- | Tests that jobs selected by `applyingFilter` actually match
379 -- and have an effect (are not CONTINUE filters).
380 prop_applyingFilter :: Property
381 prop_applyingFilter =
382 forAllShrink arbitrary shrink $ \(job, filters) ->
383
384 let applying = applyingFilter (Set.fromList filters) job
385
386 in isJust applying ==> case applying of
387 Just f -> job `matches` f && frAction f /= Continue
388 Nothing -> True
389
390
391 case_jobFiltering :: Assertion
392 case_jobFiltering = do
393
394 clusterName <- mkNonEmpty "cluster1"
395 jid1 <- makeJobId 1
396 jid2 <- makeJobId 2
397 jid3 <- makeJobId 3
398 jid4 <- makeJobId 4
399 unsetPrio <- mkNonNegative 1234
400 uuid1 <- fmap UTF8.fromString newUUID
401
402 let j1 =
403 nullJobWithStat QueuedJob
404 { qjId = jid1
405 , qjOps =
406 [ QueuedOpCode
407 { qoInput = ValidOpCode MetaOpCode
408 { metaParams = CommonOpParams
409 { opDryRun = Nothing
410 , opDebugLevel = Nothing
411 , opPriority = OpPrioHigh
412 , opDepends = Just []
413 , opComment = Nothing
414 , opReason = [("source1", "reason1", 1234)]}
415 , metaOpCode = OpClusterRename
416 { opName = clusterName
417 }
418 }
419 , qoStatus = OP_STATUS_QUEUED
420 , qoResult = JSNull
421 , qoLog = []
422 , qoPriority = -1
423 , qoStartTimestamp = Nothing
424 , qoExecTimestamp = Nothing
425 , qoEndTimestamp = Nothing
426 }
427 ]
428 , qjReceivedTimestamp = Nothing
429 , qjStartTimestamp = Nothing
430 , qjEndTimestamp = Nothing
431 , qjLivelock = Nothing
432 , qjProcessId = Nothing
433 }
434
435 j2 = j1 & jJobL . qjIdL .~ jid2
436 j3 = j1 & jJobL . qjIdL .~ jid3
437 j4 = j1 & jJobL . qjIdL .~ jid4
438
439
440 fr1 =
441 FilterRule
442 { frWatermark = jid1
443 , frPriority = unsetPrio
444 , frPredicates = [FPJobId (EQFilter "id" (NumericValue 1))]
445 , frAction = Reject
446 , frReasonTrail = []
447 , frUuid = uuid1
448 }
449
450 -- Gives the rule a new UUID.
451 rule fr = do
452 uuid <- fmap UTF8.fromString newUUID
453 return fr{ frUuid = uuid }
454
455 -- Helper to create filter chains: assigns the filters in the list
456 -- increasing priorities, so that filters listed first are processed
457 -- first.
458 chain :: [FilterRule] -> Set FilterRule
459 chain frs
460 | any ((/= unsetPrio) . frPriority) frs =
461 error "Filter was passed to `chain` that already had a priority."
462 | otherwise =
463 Set.fromList
464 [ fr{ frPriority = prio }
465 | (fr, Just prio) <- zip frs (map mkNonNegative [1..]) ]
466
467 fr2 <- rule fr1{ frAction = Accept }
468 fr3 <- rule fr1{ frAction = Pause }
469
470 fr4 <- rule fr1{ frPredicates =
471 [FPJobId (GTFilter "id" (QuotedString "watermark"))]
472 }
473
474 fr5 <- rule fr1{ frPredicates = [] }
475
476 fr6 <- rule fr5{ frAction = Continue }
477 fr7 <- rule fr6{ frAction = RateLimit 2 }
478
479 fr8 <- rule fr4{ frAction = Continue, frWatermark = jid1 }
480 fr9 <- rule fr8{ frAction = RateLimit 2 }
481
482 assertEqual "j1 should be rejected (by fr1)"
483 []
484 (jobFiltering (Queue [j1] [] []) (chain [fr1]) [j1])
485
486 assertEqual "j1 should be rejected (by fr1, it has priority)"
487 []
488 (jobFiltering (Queue [j1] [] []) (chain [fr1, fr2]) [j1])
489
490 assertEqual "j1 should be accepted (by fr2, it has priority)"
491 [j1]
492 (jobFiltering (Queue [j1] [] []) (chain [fr2, fr1]) [j1])
493
494 assertEqual "j1 should be paused (by fr3)"
495 []
496 (jobFiltering (Queue [j1] [] []) (chain [fr3]) [j1])
497
498 assertEqual "j2 should be rejected (over watermark1)"
499 [j1]
500 (jobFiltering (Queue [j1, j2] [] []) (chain [fr4]) [j1, j2])
501
502 assertEqual "all jobs should be rejected (since no predicates)"
503 []
504 (jobFiltering (Queue [j1, j2] [] []) (chain [fr5]) [j1, j2])
505
506 assertEqual "j3 should be rate-limited"
507 [j1, j2]
508 (jobFiltering (Queue [j1, j2, j3] [] []) (chain [fr6, fr7]) [j1, j2, j3])
509
510 assertEqual "j4 should be rate-limited"
511 -- j1 doesn't apply to fr8/fr9 (since they match only watermark > jid1)
512 -- so j1 gets scheduled
513 [j1, j2, j3]
514 (jobFiltering (Queue [j1, j2, j3, j4] [] []) (chain [fr8, fr9])
515 [j1, j2, j3, j4])
516
517
518 -- | Tests the specified properties of `jobFiltering`, as defined in
519 -- `doc/design-optables.rst`.
520 prop_jobFiltering :: Property
521 prop_jobFiltering =
522 forAllShrink arbitrary shrink $ \q ->
523 forAllShrink (resize 4 arbitrary) shrink $ \(NonEmpty filterList) ->
524
525 let running = qRunning q ++ qManipulated q
526 enqueued = qEnqueued q
527
528 filters = Set.fromList filterList
529
530 toRun = jobFiltering q filters enqueued -- do the filtering
531
532 -- Helpers
533
534 -- Whether `fr` applies to more than `n` of the `jobs`
535 -- (that is, more than allowed).
536 exceeds :: Int -> FilterRule -> [JobWithStat] -> Bool
537 exceeds n fr jobs =
538 n < (length
539 . filter ((frUuid fr ==) . frUuid)
540 . mapMaybe (applyingFilter filters)
541 $ map jJob jobs)
542
543 -- Helpers for ensuring sensible coverage.
544
545 -- Makes sure that each action appears with some probability.
546 actionName = head . words . show
547 allActions = map actionName [ Accept, Continue, Pause, Reject
548 , RateLimit 0 ]
549 applyingActions = map (actionName . frAction)
550 . mapMaybe (applyingFilter filters)
551 $ map jJob enqueued
552 perc = 4 -- percent; low because it's per action
553 actionCovers =
554 foldr (.) id
555 [ stableCover (a `elem` applyingActions) perc ("is " ++ a)
556 | a <- allActions ]
557
558 -- `covers` should be after `==>` and before `conjoin` (see QuickCheck
559 -- bugs 25 and 27).
560 in (enqueued /= []) ==> actionCovers $ conjoin
561
562 [ counterexample "scheduled jobs must be subsequence" $
563 toRun `isSubsequenceOf` enqueued
564
565 , counterexample "a reason for each job (not) being scheduled" .
566
567 -- All enqueued jobs must have a reason why they were (not)
568 -- scheduled, determined by the filter that applies.
569 flip all enqueued $ \job ->
570 case applyingFilter filters (jJob job) of
571 -- If no filter matches, the job must run.
572 Nothing -> job `elem` toRun
573 Just fr@FilterRule{ frAction } -> case frAction of
574 -- ACCEPT filter permit the job immediately,
575 -- PAUSE/REJECT forbid running, CONTINUE filters cannot
576 -- be the output of `applyingFilter`, and
577 -- RATE_LIMIT filters have a more more complex property.
578 Accept -> job `elem` toRun
579 Continue -> error "must not happen"
580 Pause -> job `notElem` toRun
581 Reject -> job `notElem` toRun
582 RateLimit n ->
583
584 let -- Jobs in queue before our job.
585 jobsBefore = takeWhile (/= job) enqueued
586
587 in if job `elem` toRun
588 -- If it got scheduled, the job and any job
589 -- before it doesn't overfill the rate limit.
590 then not . exceeds n fr $ running
591 ++ jobsBefore ++ [job]
592 -- If didn't get scheduled, then the rate limit
593 -- was already full before scheduling or the job
594 -- or one of the jobs before made it full.
595 else any (exceeds n fr . (running ++))
596 (inits $ jobsBefore ++ [job])
597 -- The `inits` bit includes the [] and [...job]
598 -- cases.
599
600 ]
601
602
603 testSuite "JQScheduler"
604 [ 'case_parseReasonRateLimit
605 , 'prop_slotMapFromJob_conflicting_buckets
606 , 'case_reasonRateLimit
607 , 'prop_reasonRateLimit
608 , 'prop_filterRuleOrder
609 , 'case_matchPredicate
610 , 'prop_applyingFilter
611 , 'case_jobFiltering
612 , 'prop_jobFiltering
613 ]