serializer.py 11.3 KB
Newer Older
1 2 3
#
#

4
# Copyright (C) 2007, 2008, 2014 Google Inc.
Klaus Aehlig's avatar
Klaus Aehlig committed
5
# All rights reserved.
6
#
Klaus Aehlig's avatar
Klaus Aehlig committed
7 8 9
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
10
#
Klaus Aehlig's avatar
Klaus Aehlig committed
11 12
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
13
#
Klaus Aehlig's avatar
Klaus Aehlig committed
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 30 31 32 33 34 35

"""Serializer abstraction module

This module introduces a simple abstraction over the serialization
backend (currently json).

"""
36
# pylint: disable=C0103
Iustin Pop's avatar
Iustin Pop committed
37 38 39

# C0103: Invalid name, since pylint doesn't see that Dump points to a
# function and not a constant
40 41

import re
42

43 44 45 46 47 48 49
# Python 2.6 and above contain a JSON module based on simplejson. Unfortunately
# the standard library version is significantly slower than the external
# module. While it should be better from at least Python 3.2 on (see Python
# issue 7451), for now Ganeti needs to work well with older Python versions
# too.
import simplejson

50
from ganeti import errors
51
from ganeti import utils
52
from ganeti import constants
Michael Hanselmann's avatar
Michael Hanselmann committed
53

Iustin Pop's avatar
Iustin Pop committed
54
_RE_EOLSP = re.compile("[ \t]+$", re.MULTILINE)
55 56


57
def DumpJson(data, private_encoder=None):
58 59
  """Serialize a given object.

Iustin Pop's avatar
Iustin Pop committed
60 61
  @param data: the data to serialize
  @return: the string representation of data
62 63 64
  @param private_encoder: specify L{serializer.EncodeWithPrivateFields} if you
                          require the produced JSON to also contain private
                          parameters. Otherwise, they will encode to null.
65

66
  """
67 68 69 70
  if private_encoder is None:
    # Do not leak private fields by default.
    private_encoder = EncodeWithoutPrivateFields
  encoded = simplejson.dumps(data, default=private_encoder)
71

72
  txt = _RE_EOLSP.sub("", encoded)
Iustin Pop's avatar
Iustin Pop committed
73 74
  if not txt.endswith("\n"):
    txt += "\n"
Michael Hanselmann's avatar
Michael Hanselmann committed
75

76 77 78
  return txt


79
def LoadJson(txt):
80 81
  """Unserialize data from a string.

Iustin Pop's avatar
Iustin Pop committed
82 83
  @param txt: the json-encoded form
  @return: the original data
84
  @raise JSONDecodeError: if L{txt} is not a valid JSON document
Iustin Pop's avatar
Iustin Pop committed
85

86
  """
87 88 89 90 91 92 93
  values = simplejson.loads(txt)

  # Hunt and seek for Private fields and wrap them.
  WrapPrivateValues(values)

  return values

94

95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
def WrapPrivateValues(json):
  """Crawl a JSON decoded structure for private values and wrap them.

  @param json: the json-decoded value to protect.

  """
  # This function used to be recursive. I use this list to avoid actual
  # recursion, however, since this is a very high-traffic area.
  todo = [json]

  while todo:
    data = todo.pop()

    if isinstance(data, list): # Array
      for item in data:
        todo.append(item)
    elif isinstance(data, dict): # Object

      # This is kind of a kludge, but the only place where we know what should
      # be protected is in ganeti.opcodes, and not in a way that is helpful to
      # us, especially in such a high traffic method; on the other hand, the
      # Haskell `py_compat_fields` test should complain whenever this check
      # does not protect fields properly.
      for field in data:
        value = data[field]
        if field in constants.PRIVATE_PARAMETERS_BLACKLIST:
          if not field.endswith("_cluster"):
            data[field] = PrivateDict(value)
123
          elif data[field] is not None:
124 125 126 127 128 129
            for os in data[field]:
              value[os] = PrivateDict(value[os])
        else:
          todo.append(value)
    else: # Values
      pass
130

131 132 133

def DumpSignedJson(data, key, salt=None, key_selector=None,
                   private_encoder=None):
134 135 136 137
  """Serialize a given object and authenticate it.

  @param data: the data to serialize
  @param key: shared hmac key
138 139
  @param key_selector: name/id that identifies the key (in case there are
    multiple keys in use, e.g. in a multi-cluster environment)
140
  @param private_encoder: see L{DumpJson}
141 142 143
  @return: the string representation of data signed by the hmac key

  """
144
  txt = DumpJson(data, private_encoder=private_encoder)
145
  if salt is None:
Iustin Pop's avatar
Iustin Pop committed
146
    salt = ""
147
  signed_dict = {
Iustin Pop's avatar
Iustin Pop committed
148 149
    "msg": txt,
    "salt": salt,
150 151
    }

152 153 154
  if key_selector:
    signed_dict["key_selector"] = key_selector
  else:
155 156 157
    key_selector = ""

  signed_dict["hmac"] = utils.Sha1Hmac(key, txt, salt=salt + key_selector)
158

159
  return DumpJson(signed_dict)
160 161


162
def LoadSignedJson(txt, key):
163 164 165
  """Verify that a given message was signed with the given key, and load it.

  @param txt: json-encoded hmac-signed message
166 167 168
  @param key: the shared hmac key or a callable taking one argument (the key
    selector), which returns the hmac key belonging to the key selector.
    Typical usage is to pass a reference to the get method of a dict.
169
  @rtype: tuple of original data, string
170
  @return: original data, salt
171 172 173 174
  @raises errors.SignatureError: if the message signature doesn't verify

  """
  signed_dict = LoadJson(txt)
175 176 177

  WrapPrivateValues(signed_dict)

178
  if not isinstance(signed_dict, dict):
Iustin Pop's avatar
Iustin Pop committed
179
    raise errors.SignatureError("Invalid external message")
180
  try:
Iustin Pop's avatar
Iustin Pop committed
181 182 183
    msg = signed_dict["msg"]
    salt = signed_dict["salt"]
    hmac_sign = signed_dict["hmac"]
184
  except KeyError:
Iustin Pop's avatar
Iustin Pop committed
185
    raise errors.SignatureError("Invalid external message")
186

187
  if callable(key):
188
    # pylint: disable=E1103
189 190 191 192 193 194 195 196 197
    key_selector = signed_dict.get("key_selector", None)
    hmac_key = key(key_selector)
    if not hmac_key:
      raise errors.SignatureError("No key with key selector '%s' found" %
                                  key_selector)
  else:
    key_selector = ""
    hmac_key = key

198 199
  if not utils.VerifySha1Hmac(hmac_key, msg, hmac_sign,
                              salt=salt + key_selector):
Iustin Pop's avatar
Iustin Pop committed
200
    raise errors.SignatureError("Invalid Signature")
201

202
  return LoadJson(msg), salt
203 204


205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
def LoadAndVerifyJson(raw, verify_fn):
  """Parses and verifies JSON data.

  @type raw: string
  @param raw: Input data in JSON format
  @type verify_fn: callable
  @param verify_fn: Verification function, usually from L{ht}
  @return: De-serialized data

  """
  try:
    data = LoadJson(raw)
  except Exception, err:
    raise errors.ParseError("Can't parse input data: %s" % err)

  if not verify_fn(data):
    raise errors.ParseError("Data does not match expected format: %s" %
                            verify_fn)

  return data


227 228
Dump = DumpJson
Load = LoadJson
229 230
DumpSigned = DumpSignedJson
LoadSigned = LoadSignedJson
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255


class Private(object):
  """Wrap a value so it is hard to leak it accidentally.

  >>> x = Private("foo")
  >>> print "Value: %s" % x
  Value: <redacted>
  >>> print "Value: {0}".format(x)
  Value: <redacted>
  >>> x.upper() == "FOO"
  True

  """
  def __init__(self, item, descr="redacted"):
    if isinstance(item, Private):
      raise ValueError("Attempted to nest Private values.")
    self._item = item
    self._descr = descr

  def Get(self):
    "Return the wrapped value."
    return self._item

  def __str__(self):
256
    return "<%s>" % (self._descr, )
257 258

  def __repr__(self):
259
    return "Private(?, descr=%r)" % (self._descr, )
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282

  # pylint: disable=W0212
  # If it doesn't access _item directly, the call will go through __getattr__
  # because this class defines __slots__ and "item" is not in it.
  # OTOH, if we do add it there, we'd risk shadowing an "item" attribute.
  def __eq__(self, other):
    if isinstance(other, Private):
      return self._item == other._item
    else:
      return self._item == other

  def __hash__(self):
    return hash(self._item)

  def __format__(self, *_1, **_2):
    return self.__str__()

  def __getattr__(self, attr):
    return Private(getattr(self._item, attr),
                   descr="%s.%s" % (self._descr, attr))

  def __call__(self, *args, **kwargs):
    return Private(self._item(*args, **kwargs),
283
                   descr="%s()" % (self._descr, ))
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383

  # pylint: disable=R0201
  # While this could get away with being a function, it needs to be a method.
  # Required by the copy.deepcopy function used by FillDict.
  def __getnewargs__(self):
    return tuple()

  def __nonzero__(self):
    return bool(self._item)

  # Get in the way of Pickle by implementing __slots__ but not __getstate__
  # ...and get a performance boost, too.
  __slots__ = ["_item", "_descr"]


class PrivateDict(dict):
  """A dictionary that turns its values to private fields.

  >>> PrivateDict()
  {}
  >>> supersekkrit = PrivateDict({"password": "foobar"})
  >>> print supersekkrit["password"]
  <password>
  >>> supersekkrit["password"].Get()
  'foobar'
  >>> supersekkrit.GetPrivate("password")
  'foobar'
  >>> supersekkrit["user"] = "eggspam"
  >>> supersekkrit.Unprivate()
  {'password': 'foobar', 'user': 'eggspam'}

  """
  def __init__(self, data=None):
    dict.__init__(self)
    self.update(data)

  def __setitem__(self, item, value):
    if not isinstance(value, Private):
      if not isinstance(item, dict):
        value = Private(value, descr=item)
      else:
        value = PrivateDict(value)
    dict.__setitem__(self, item, value)

  # The actual conversion to Private containers is done by __setitem__

  # copied straight from cpython/Lib/UserDict.py
  # Copyright (c) 2001-2014 Python Software Foundation; All Rights Reserved
  def update(self, other=None, **kwargs):
    # Make progressively weaker assumptions about "other"
    if other is None:
      pass
    elif hasattr(other, 'iteritems'):  # iteritems saves memory and lookups
      for k, v in other.iteritems():
        self[k] = v
    elif hasattr(other, 'keys'):
      for k in other.keys():
        self[k] = other[k]
    else:
      for k, v in other:
        self[k] = v
    if kwargs:
      self.update(kwargs)

  def GetPrivate(self, *args):
    """Like dict.get, but extracting the value in the process.

    Arguments are semantically equivalent to ``dict.get``

    >>> PrivateDict({"foo": "bar"}).GetPrivate("foo")
    'bar'
    >>> PrivateDict({"foo": "bar"}).GetPrivate("baz", "spam")
    'spam'

    """
    if len(args) == 1:
      key, = args
      return self[key].Get()
    elif len(args) == 2:
      key, default = args
      if key not in self:
        return default
      else:
        return self[key].Get()
    else:
      raise TypeError("GetPrivate() takes 2 arguments (%d given)" % len(args))

  def Unprivate(self):
    """Turn this dict of Private() values to a dict of values.

    >>> PrivateDict({"foo": "bar"}).Unprivate()
    {'foo': 'bar'}

    @rtype: dict

    """
    returndict = {}
    for key in self:
      returndict[key] = self[key].Get()
    return returndict
384 385 386 387 388 389 390 391 392 393 394 395


def EncodeWithoutPrivateFields(obj):
  if isinstance(obj, Private):
    return None
  raise TypeError(repr(obj) + " is not JSON serializable")


def EncodeWithPrivateFields(obj):
  if isinstance(obj, Private):
    return obj.Get()
  raise TypeError(repr(obj) + " is not JSON serializable")