Merge branch 'stable-2.16' into stable-2.17
[ganeti-github.git] / lib / utils / retry.py
1 #
2 #
3
4 # Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """Utility functions for retrying function calls with a timeout.
31
32 """
33
34
35 import logging
36 import time
37
38 from ganeti import errors
39
40
41 #: Special delay to specify whole remaining timeout
42 RETRY_REMAINING_TIME = object()
43
44
45 class RetryTimeout(Exception):
46 """Retry loop timed out.
47
48 Any arguments which was passed by the retried function to RetryAgain will be
49 preserved in RetryTimeout, if it is raised. If such argument was an exception
50 the RaiseInner helper method will reraise it.
51
52 """
53 def RaiseInner(self):
54 if self.args and isinstance(self.args[0], Exception):
55 raise self.args[0]
56 else:
57 raise RetryTimeout(*self.args)
58
59
60 class RetryAgain(Exception):
61 """Retry again.
62
63 Any arguments passed to RetryAgain will be preserved, if a timeout occurs, as
64 arguments to RetryTimeout. If an exception is passed, the RaiseInner() method
65 of the RetryTimeout() method can be used to reraise it.
66
67 """
68
69
70 class _RetryDelayCalculator(object):
71 """Calculator for increasing delays.
72
73 """
74 __slots__ = [
75 "_factor",
76 "_limit",
77 "_next",
78 "_start",
79 ]
80
81 def __init__(self, start, factor, limit):
82 """Initializes this class.
83
84 @type start: float
85 @param start: Initial delay
86 @type factor: float
87 @param factor: Factor for delay increase
88 @type limit: float or None
89 @param limit: Upper limit for delay or None for no limit
90
91 """
92 assert start > 0.0
93 assert factor >= 1.0
94 assert limit is None or limit >= 0.0
95
96 self._start = start
97 self._factor = factor
98 self._limit = limit
99
100 self._next = start
101
102 def __call__(self):
103 """Returns current delay and calculates the next one.
104
105 """
106 current = self._next
107
108 # Update for next run
109 if self._limit is None or self._next < self._limit:
110 self._next = min(self._limit, self._next * self._factor)
111
112 return current
113
114
115 def Retry(fn, delay, timeout, args=None, wait_fn=time.sleep,
116 _time_fn=time.time):
117 """Call a function repeatedly until it succeeds.
118
119 The function C{fn} is called repeatedly until it doesn't throw L{RetryAgain}
120 anymore. Between calls a delay, specified by C{delay}, is inserted. After a
121 total of C{timeout} seconds, this function throws L{RetryTimeout}.
122
123 C{delay} can be one of the following:
124 - callable returning the delay length as a float
125 - Tuple of (start, factor, limit)
126 - L{RETRY_REMAINING_TIME} to sleep until the timeout expires (this is
127 useful when overriding L{wait_fn} to wait for an external event)
128 - A static delay as a number (int or float)
129
130 @type fn: callable
131 @param fn: Function to be called
132 @param delay: Either a callable (returning the delay), a tuple of (start,
133 factor, limit) (see L{_RetryDelayCalculator}),
134 L{RETRY_REMAINING_TIME} or a number (int or float)
135 @type timeout: float
136 @param timeout: Total timeout
137 @type wait_fn: callable
138 @param wait_fn: Waiting function
139 @return: Return value of function
140
141 """
142 assert callable(fn)
143 assert callable(wait_fn)
144 assert callable(_time_fn)
145
146 if args is None:
147 args = []
148
149 end_time = _time_fn() + timeout
150
151 if callable(delay):
152 # External function to calculate delay
153 calc_delay = delay
154
155 elif isinstance(delay, (tuple, list)):
156 # Increasing delay with optional upper boundary
157 (start, factor, limit) = delay
158 calc_delay = _RetryDelayCalculator(start, factor, limit)
159
160 elif delay is RETRY_REMAINING_TIME:
161 # Always use the remaining time
162 calc_delay = None
163
164 else:
165 # Static delay
166 calc_delay = lambda: delay
167
168 assert calc_delay is None or callable(calc_delay)
169
170 while True:
171 retry_args = []
172 try:
173 return fn(*args)
174 except RetryAgain, err:
175 retry_args = err.args
176 except RetryTimeout:
177 raise errors.ProgrammerError("Nested retry loop detected that didn't"
178 " handle RetryTimeout")
179
180 remaining_time = end_time - _time_fn()
181
182 if remaining_time <= 0.0:
183 raise RetryTimeout(*retry_args)
184
185 assert remaining_time > 0.0
186
187 if calc_delay is None:
188 wait_fn(remaining_time)
189 else:
190 current_delay = calc_delay()
191 if current_delay > 0.0:
192 wait_fn(current_delay)
193
194
195 def SimpleRetry(expected, fn, delay, timeout, args=None, wait_fn=time.sleep,
196 _time_fn=time.time):
197 """A wrapper over L{Retry} implementing a simpler interface.
198
199 All the parameters are the same as for L{Retry}, except it has one
200 extra argument: expected, which can be either a value (will be
201 compared with the result of the function, or a callable (which will
202 get the result passed and has to return a boolean). If the test is
203 false, we will retry until either the timeout has passed or the
204 tests succeeds. In both cases, the last result from calling the
205 function will be returned.
206
207 Note that this function is not expected to raise any retry-related
208 exceptions, always simply returning values. As such, the function is
209 designed to allow easy wrapping of code that doesn't use retry at
210 all (e.g. "if fn(args)" replaced with "if SimpleRetry(True, fn,
211 ...)".
212
213 @see: L{Retry}
214
215 """
216 rdict = {}
217
218 def helper(*innerargs):
219 result = rdict["result"] = fn(*innerargs)
220 if not ((callable(expected) and expected(result)) or result == expected):
221 raise RetryAgain()
222 return result
223
224 try:
225 result = Retry(helper, delay, timeout, args=args,
226 wait_fn=wait_fn, _time_fn=_time_fn)
227 except RetryTimeout:
228 assert "result" in rdict
229 result = rdict["result"]
230 return result
231
232
233 def CountRetry(expected, fn, count, args=None):
234 """A wrapper over L{SimpleRetry} implementing a count down.
235
236 Where L{Retry} fixes the time, after which the command is assumed to be
237 failing, this function assumes the total number of tries.
238
239 @see: L{Retry}
240 """
241
242 rdict = {"tries": 0}
243
244 get_tries = lambda: rdict["tries"]
245
246 def inc_tries(t):
247 rdict["tries"] += t
248
249 return SimpleRetry(expected, fn, 1, count, args=args,
250 wait_fn=inc_tries, _time_fn=get_tries)
251
252
253 def RetryByNumberOfTimes(max_retries, backoff, exception_class, fn, *args,
254 **kwargs):
255 """Retries calling a function up to the specified number of times.
256
257 @type max_retries: integer
258 @param max_retries: Maximum number of retries.
259 @type exception_class: class
260 @param exception_class: Exception class which is used for throwing the
261 final exception.
262 @type fn: callable
263 @param fn: Function to be called (up to the specified maximum number of
264 retries.
265 @type backoff: int
266 @param backoff: this enables and configures the back off behavior after
267 failed tries. If value is '0', there will be no delay between failed
268 tries. If the value is a positive integer, it is interpreted as the
269 base length of the back off delay (in seconds). That means there will be a
270 delay between failed tries of the length specified in this paramter. With
271 each next retry, the delay is increased by the factor of two. For example,
272 if the value is '2', the first delay is 2 seconds, the second 4 seconds,
273 the third 8 seconds (until the max_retries) are hit or the function call
274 succeeds.
275
276 """
277 if backoff < 0:
278 raise exception_class("Backoff must be a non-negative integer.")
279
280 last_exception = None
281 delay = backoff
282 for i in range(max_retries):
283 try:
284 fn(*args, **kwargs)
285 break
286 except errors.OpExecError as e:
287 logging.error("Error after retry no. %s: %s.", i, e)
288 last_exception = e
289 time.sleep(delay)
290 delay *= 2
291 else:
292 if last_exception:
293 raise exception_class("Error after %s retries. Last exception: %s."
294 % (max_retries, last_exception))