Merge branch 'stable-2.16' into stable-2.17
[ganeti-github.git] / lib / serializer.py
1 #
2 #
3
4 # Copyright (C) 2007, 2008, 2014 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """Serializer abstraction module
31
32 This module introduces a simple abstraction over the serialization
33 backend (currently json).
34
35 """
36 # pylint: disable=C0103
37
38 # C0103: Invalid name, since pylint doesn't see that Dump points to a
39 # function and not a constant
40
41 import re
42
43 # Python 2.6 and above contain a JSON module based on simplejson. Unfortunately
44 # the standard library version is significantly slower than the external
45 # module. While it should be better from at least Python 3.2 on (see Python
46 # issue 7451), for now Ganeti needs to work well with older Python versions
47 # too.
48 import simplejson
49
50 from ganeti import errors
51 from ganeti import utils
52 from ganeti import constants
53
54 _RE_EOLSP = re.compile("[ \t]+$", re.MULTILINE)
55
56
57 def DumpJson(data, private_encoder=None):
58 """Serialize a given object.
59
60 @param data: the data to serialize
61 @return: the string representation of data
62 @param private_encoder: specify L{serializer.EncodeWithPrivateFields} if you
63 require the produced JSON to also contain private
64 parameters. Otherwise, they will encode to null.
65
66 """
67 if private_encoder is None:
68 # Do not leak private fields by default.
69 private_encoder = EncodeWithoutPrivateFields
70 encoded = simplejson.dumps(data, default=private_encoder)
71
72 txt = _RE_EOLSP.sub("", encoded)
73 if not txt.endswith("\n"):
74 txt += "\n"
75
76 return txt
77
78
79 def LoadJson(txt):
80 """Unserialize data from a string.
81
82 @param txt: the json-encoded form
83 @return: the original data
84 @raise JSONDecodeError: if L{txt} is not a valid JSON document
85
86 """
87 values = simplejson.loads(txt)
88
89 # Hunt and seek for Private fields and wrap them.
90 WrapPrivateValues(values)
91
92 return values
93
94
95 def WrapPrivateValues(json):
96 """Crawl a JSON decoded structure for private values and wrap them.
97
98 @param json: the json-decoded value to protect.
99
100 """
101 # This function used to be recursive. I use this list to avoid actual
102 # recursion, however, since this is a very high-traffic area.
103 todo = [json]
104
105 while todo:
106 data = todo.pop()
107
108 if isinstance(data, list): # Array
109 for item in data:
110 todo.append(item)
111 elif isinstance(data, dict): # Object
112
113 # This is kind of a kludge, but the only place where we know what should
114 # be protected is in ganeti.opcodes, and not in a way that is helpful to
115 # us, especially in such a high traffic method; on the other hand, the
116 # Haskell `py_compat_fields` test should complain whenever this check
117 # does not protect fields properly.
118 for field in data:
119 value = data[field]
120 if field in constants.PRIVATE_PARAMETERS_BLACKLIST:
121 if not field.endswith("_cluster"):
122 data[field] = PrivateDict(value)
123 elif data[field] is not None:
124 for os in data[field]:
125 value[os] = PrivateDict(value[os])
126 else:
127 todo.append(value)
128 else: # Values
129 pass
130
131
132 def DumpSignedJson(data, key, salt=None, key_selector=None,
133 private_encoder=None):
134 """Serialize a given object and authenticate it.
135
136 @param data: the data to serialize
137 @param key: shared hmac key
138 @param key_selector: name/id that identifies the key (in case there are
139 multiple keys in use, e.g. in a multi-cluster environment)
140 @param private_encoder: see L{DumpJson}
141 @return: the string representation of data signed by the hmac key
142
143 """
144 txt = DumpJson(data, private_encoder=private_encoder)
145 if salt is None:
146 salt = ""
147 signed_dict = {
148 "msg": txt,
149 "salt": salt,
150 }
151
152 if key_selector:
153 signed_dict["key_selector"] = key_selector
154 else:
155 key_selector = ""
156
157 signed_dict["hmac"] = utils.Sha1Hmac(key, txt, salt=salt + key_selector)
158
159 return DumpJson(signed_dict)
160
161
162 def LoadSignedJson(txt, key):
163 """Verify that a given message was signed with the given key, and load it.
164
165 @param txt: json-encoded hmac-signed message
166 @param key: the shared hmac key or a callable taking one argument (the key
167 selector), which returns the hmac key belonging to the key selector.
168 Typical usage is to pass a reference to the get method of a dict.
169 @rtype: tuple of original data, string
170 @return: original data, salt
171 @raises errors.SignatureError: if the message signature doesn't verify
172
173 """
174 signed_dict = LoadJson(txt)
175
176 WrapPrivateValues(signed_dict)
177
178 if not isinstance(signed_dict, dict):
179 raise errors.SignatureError("Invalid external message")
180 try:
181 msg = signed_dict["msg"]
182 salt = signed_dict["salt"]
183 hmac_sign = signed_dict["hmac"]
184 except KeyError:
185 raise errors.SignatureError("Invalid external message")
186
187 if callable(key):
188 # pylint: disable=E1103
189 key_selector = signed_dict.get("key_selector", None)
190 hmac_key = key(key_selector)
191 if not hmac_key:
192 raise errors.SignatureError("No key with key selector '%s' found" %
193 key_selector)
194 else:
195 key_selector = ""
196 hmac_key = key
197
198 if not utils.VerifySha1Hmac(hmac_key, msg, hmac_sign,
199 salt=salt + key_selector):
200 raise errors.SignatureError("Invalid Signature")
201
202 return LoadJson(msg), salt
203
204
205 def LoadAndVerifyJson(raw, verify_fn):
206 """Parses and verifies JSON data.
207
208 @type raw: string
209 @param raw: Input data in JSON format
210 @type verify_fn: callable
211 @param verify_fn: Verification function, usually from L{ht}
212 @return: De-serialized data
213
214 """
215 try:
216 data = LoadJson(raw)
217 except Exception, err:
218 raise errors.ParseError("Can't parse input data: %s" % err)
219
220 if not verify_fn(data):
221 raise errors.ParseError("Data does not match expected format: %s" %
222 verify_fn)
223
224 return data
225
226
227 Dump = DumpJson
228 Load = LoadJson
229 DumpSigned = DumpSignedJson
230 LoadSigned = LoadSignedJson
231
232
233 class Private(object):
234 """Wrap a value so it is hard to leak it accidentally.
235
236 >>> x = Private("foo")
237 >>> print "Value: %s" % x
238 Value: <redacted>
239 >>> print "Value: {0}".format(x)
240 Value: <redacted>
241 >>> x.upper() == "FOO"
242 True
243
244 """
245 def __init__(self, item, descr="redacted"):
246 if isinstance(item, Private):
247 raise ValueError("Attempted to nest Private values.")
248 self._item = item
249 self._descr = descr
250
251 def Get(self):
252 "Return the wrapped value."
253 return self._item
254
255 def __str__(self):
256 return "<%s>" % (self._descr, )
257
258 def __repr__(self):
259 return "Private(?, descr=%r)" % (self._descr, )
260
261 # pylint: disable=W0212
262 # If it doesn't access _item directly, the call will go through __getattr__
263 # because this class defines __slots__ and "item" is not in it.
264 # OTOH, if we do add it there, we'd risk shadowing an "item" attribute.
265 def __eq__(self, other):
266 if isinstance(other, Private):
267 return self._item == other._item
268 else:
269 return self._item == other
270
271 def __hash__(self):
272 return hash(self._item)
273
274 def __format__(self, *_1, **_2):
275 return self.__str__()
276
277 def __getattr__(self, attr):
278 return Private(getattr(self._item, attr),
279 descr="%s.%s" % (self._descr, attr))
280
281 def __call__(self, *args, **kwargs):
282 return Private(self._item(*args, **kwargs),
283 descr="%s()" % (self._descr, ))
284
285 # pylint: disable=R0201
286 # While this could get away with being a function, it needs to be a method.
287 # Required by the copy.deepcopy function used by FillDict.
288 def __getnewargs__(self):
289 return tuple()
290
291 def __nonzero__(self):
292 return bool(self._item)
293
294 # Get in the way of Pickle by implementing __slots__ but not __getstate__
295 # ...and get a performance boost, too.
296 __slots__ = ["_item", "_descr"]
297
298
299 class PrivateDict(dict):
300 """A dictionary that turns its values to private fields.
301
302 >>> PrivateDict()
303 {}
304 >>> supersekkrit = PrivateDict({"password": "foobar"})
305 >>> print supersekkrit["password"]
306 <password>
307 >>> supersekkrit["password"].Get()
308 'foobar'
309 >>> supersekkrit.GetPrivate("password")
310 'foobar'
311 >>> supersekkrit["user"] = "eggspam"
312 >>> supersekkrit.Unprivate()
313 {'password': 'foobar', 'user': 'eggspam'}
314
315 """
316 def __init__(self, data=None):
317 dict.__init__(self)
318 self.update(data)
319
320 def __setitem__(self, item, value):
321 if not isinstance(value, Private):
322 if not isinstance(item, dict):
323 value = Private(value, descr=item)
324 else:
325 value = PrivateDict(value)
326 dict.__setitem__(self, item, value)
327
328 # The actual conversion to Private containers is done by __setitem__
329
330 # copied straight from cpython/Lib/UserDict.py
331 # Copyright (c) 2001-2014 Python Software Foundation; All Rights Reserved
332 def update(self, other=None, **kwargs):
333 # Make progressively weaker assumptions about "other"
334 if other is None:
335 pass
336 elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups
337 for k, v in other.iteritems():
338 self[k] = v
339 elif hasattr(other, 'keys'):
340 for k in other.keys():
341 self[k] = other[k]
342 else:
343 for k, v in other:
344 self[k] = v
345 if kwargs:
346 self.update(kwargs)
347
348 def GetPrivate(self, *args):
349 """Like dict.get, but extracting the value in the process.
350
351 Arguments are semantically equivalent to ``dict.get``
352
353 >>> PrivateDict({"foo": "bar"}).GetPrivate("foo")
354 'bar'
355 >>> PrivateDict({"foo": "bar"}).GetPrivate("baz", "spam")
356 'spam'
357
358 """
359 if len(args) == 1:
360 key, = args
361 return self[key].Get()
362 elif len(args) == 2:
363 key, default = args
364 if key not in self:
365 return default
366 else:
367 return self[key].Get()
368 else:
369 raise TypeError("GetPrivate() takes 2 arguments (%d given)" % len(args))
370
371 def Unprivate(self):
372 """Turn this dict of Private() values to a dict of values.
373
374 >>> PrivateDict({"foo": "bar"}).Unprivate()
375 {'foo': 'bar'}
376
377 @rtype: dict
378
379 """
380 returndict = {}
381 for key in self:
382 returndict[key] = self[key].Get()
383 return returndict
384
385
386 def EncodeWithoutPrivateFields(obj):
387 if isinstance(obj, Private):
388 return None
389 raise TypeError(repr(obj) + " is not JSON serializable")
390
391
392 def EncodeWithPrivateFields(obj):
393 if isinstance(obj, Private):
394 return obj.Get()
395 raise TypeError(repr(obj) + " is not JSON serializable")