__init__.py 27.8 KB
Newer Older
1
2
#
#
3
4
5

# Copyright (C) 2007, 2008 Google Inc.
#
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

21
"""HTTP module.
22
23
24

"""

25
26
import logging
import mimetools
27
import OpenSSL
28
29
import select
import socket
Michael Hanselmann's avatar
Michael Hanselmann committed
30
31
32
import errno

from cStringIO import StringIO
33

34
from ganeti import constants
35
from ganeti import serializer
36
from ganeti import utils
37
38


39
40
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION

41
42
43
44
45
46
47
48
49
50
HTTP_OK = 200
HTTP_NO_CONTENT = 204
HTTP_NOT_MODIFIED = 304

HTTP_0_9 = "HTTP/0.9"
HTTP_1_0 = "HTTP/1.0"
HTTP_1_1 = "HTTP/1.1"

HTTP_GET = "GET"
HTTP_HEAD = "HEAD"
51
52
HTTP_POST = "POST"
HTTP_PUT = "PUT"
53
HTTP_DELETE = "DELETE"
54

Oleksiy Mishchenko's avatar
Oleksiy Mishchenko committed
55
HTTP_ETAG = "ETag"
56
57
58
59
60
61
62
63
HTTP_HOST = "Host"
HTTP_SERVER = "Server"
HTTP_DATE = "Date"
HTTP_USER_AGENT = "User-Agent"
HTTP_CONTENT_TYPE = "Content-Type"
HTTP_CONTENT_LENGTH = "Content-Length"
HTTP_CONNECTION = "Connection"
HTTP_KEEP_ALIVE = "Keep-Alive"
64
65
66
67
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
HTTP_AUTHORIZATION = "Authorization"
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
HTTP_ALLOW = "Allow"
68

69
70
_SSL_UNEXPECTED_EOF = "Unexpected EOF"

71
72
73
# Socket operations
(SOCKOP_SEND,
 SOCKOP_RECV,
74
75
 SOCKOP_SHUTDOWN,
 SOCKOP_HANDSHAKE) = range(4)
76

77
78
# send/receive quantum
SOCK_BUF_SIZE = 32768
79

80

81
82
83
84
85
86
87
88
class HttpError(Exception):
  """Internal exception for HTTP errors.

  This should only be used for internal error reporting.

  """


89
90
91
92
93
94
95
96
97
class HttpConnectionClosed(Exception):
  """Internal exception for a closed connection.

  This should only be used for internal error reporting. Only use
  it if there's no other way to report this condition.

  """


98
99
100
101
102
103
104
105
class HttpSessionHandshakeUnexpectedEOF(HttpError):
  """Internal exception for errors during SSL handshake.

  This should only be used for internal error reporting.

  """


106
class HttpSocketTimeout(Exception):
107
108
109
110
111
  """Internal exception for socket timeouts.

  This should only be used for internal error reporting.

  """
Michael Hanselmann's avatar
Michael Hanselmann committed
112
113


114
class HttpException(Exception):
115
116
117
  code = None
  message = None

118
  def __init__(self, message=None, headers=None):
119
    Exception.__init__(self)
120
121
    self.message = message
    self.headers = headers
122
123


124
class HttpBadRequest(HttpException):
125
126
  """400 Bad Request

Iustin Pop's avatar
Iustin Pop committed
127
128
129
  RFC2616, 10.4.1: The request could not be understood by the server
  due to malformed syntax. The client SHOULD NOT repeat the request
  without modifications.
130
131

  """
132
133
134
  code = 400


135
136
137
class HttpUnauthorized(HttpException):
  """401 Unauthorized

Iustin Pop's avatar
Iustin Pop committed
138
139
140
141
  RFC2616, section 10.4.2: The request requires user
  authentication. The response MUST include a WWW-Authenticate header
  field (section 14.47) containing a challenge applicable to the
  requested resource.
142
143
144
145
146

  """
  code = 401


147
class HttpForbidden(HttpException):
148
149
  """403 Forbidden

Iustin Pop's avatar
Iustin Pop committed
150
151
152
  RFC2616, 10.4.4: The server understood the request, but is refusing
  to fulfill it.  Authorization will not help and the request SHOULD
  NOT be repeated.
153
154

  """
155
156
157
  code = 403


158
class HttpNotFound(HttpException):
159
160
  """404 Not Found

Iustin Pop's avatar
Iustin Pop committed
161
162
163
  RFC2616, 10.4.5: The server has not found anything matching the
  Request-URI.  No indication is given of whether the condition is
  temporary or permanent.
164
165

  """
166
167
168
  code = 404


169
170
171
class HttpMethodNotAllowed(HttpException):
  """405 Method Not Allowed

Iustin Pop's avatar
Iustin Pop committed
172
173
174
175
  RFC2616, 10.4.6: The method specified in the Request-Line is not
  allowed for the resource identified by the Request-URI. The response
  MUST include an Allow header containing a list of valid methods for
  the requested resource.
176
177
178
179
180
181
182
183

  """
  code = 405


class HttpRequestTimeout(HttpException):
  """408 Request Timeout

Iustin Pop's avatar
Iustin Pop committed
184
185
186
  RFC2616, 10.4.9: The client did not produce a request within the
  time that the server was prepared to wait. The client MAY repeat the
  request without modifications at any later time.
187
188
189
190
191
192
193
194

  """
  code = 408


class HttpConflict(HttpException):
  """409 Conflict

Iustin Pop's avatar
Iustin Pop committed
195
196
197
198
  RFC2616, 10.4.10: The request could not be completed due to a
  conflict with the current state of the resource. This code is only
  allowed in situations where it is expected that the user might be
  able to resolve the conflict and resubmit the request.
199
200
201
202
203

  """
  code = 409


204
class HttpGone(HttpException):
205
206
  """410 Gone

Iustin Pop's avatar
Iustin Pop committed
207
208
209
  RFC2616, 10.4.11: The requested resource is no longer available at
  the server and no forwarding address is known. This condition is
  expected to be considered permanent.
210
211

  """
212
213
214
  code = 410


215
class HttpLengthRequired(HttpException):
216
217
  """411 Length Required

Iustin Pop's avatar
Iustin Pop committed
218
219
220
221
  RFC2616, 10.4.12: The server refuses to accept the request without a
  defined Content-Length. The client MAY repeat the request if it adds
  a valid Content-Length header field containing the length of the
  message-body in the request message.
222
223

  """
224
225
226
  code = 411


227
228
229
class HttpPreconditionFailed(HttpException):
  """412 Precondition Failed

Iustin Pop's avatar
Iustin Pop committed
230
231
232
  RFC2616, 10.4.13: The precondition given in one or more of the
  request-header fields evaluated to false when it was tested on the
  server.
233
234
235
236
237

  """
  code = 412


238
class HttpInternalServerError(HttpException):
239
240
  """500 Internal Server Error

Iustin Pop's avatar
Iustin Pop committed
241
242
  RFC2616, 10.5.1: The server encountered an unexpected condition
  which prevented it from fulfilling the request.
243
244

  """
245
246
247
  code = 500


248
class HttpNotImplemented(HttpException):
249
250
  """501 Not Implemented

Iustin Pop's avatar
Iustin Pop committed
251
252
  RFC2616, 10.5.2: The server does not support the functionality
  required to fulfill the request.
253
254

  """
255
256
257
  code = 501


258
259
260
261
262
263
264
265
266
267
268
class HttpBadGateway(HttpException):
  """502 Bad Gateway

  RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
  received an invalid response from the upstream server it accessed in
  attempting to fulfill the request.

  """
  code = 502


269
class HttpServiceUnavailable(HttpException):
270
271
  """503 Service Unavailable

Iustin Pop's avatar
Iustin Pop committed
272
273
  RFC2616, 10.5.4: The server is currently unable to handle the
  request due to a temporary overloading or maintenance of the server.
274
275

  """
276
277
278
  code = 503


279
280
281
282
283
284
285
286
287
288
289
290
291
class HttpGatewayTimeout(HttpException):
  """504 Gateway Timeout

  RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
  not receive a timely response from the upstream server specified by
  the URI (e.g.  HTTP, FTP, LDAP) or some other auxiliary server
  (e.g. DNS) it needed to access in attempting to complete the
  request.

  """
  code = 504


292
class HttpVersionNotSupported(HttpException):
293
294
  """505 HTTP Version Not Supported

Iustin Pop's avatar
Iustin Pop committed
295
296
  RFC2616, 10.5.6: The server does not support, or refuses to support,
  the HTTP protocol version that was used in the request message.
297
298

  """
299
300
301
  code = 505


Iustin Pop's avatar
Iustin Pop committed
302
class HttpJsonConverter: # pylint: disable-msg=W0232
303
304
  CONTENT_TYPE = "application/json"

305
306
  @staticmethod
  def Encode(data):
307
308
    return serializer.DumpJson(data)

309
310
  @staticmethod
  def Decode(data):
311
312
313
    return serializer.LoadJson(data)


314
def WaitForSocketCondition(sock, event, timeout):
315
316
  """Waits for a condition to occur on the socket.

317
  @type sock: socket
318
  @param sock: Wait for events on this socket
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
  @type event: int
  @param event: ORed condition (see select module)
  @type timeout: float or None
  @param timeout: Timeout in seconds
  @rtype: int or None
  @return: None for timeout, otherwise occured conditions

  """
  check = (event | select.POLLPRI |
           select.POLLNVAL | select.POLLHUP | select.POLLERR)

  if timeout is not None:
    # Poller object expects milliseconds
    timeout *= 1000

334
  poller = select.poll()
335
336
337
338
339
340
341
342
343
344
  poller.register(sock, event)
  try:
    while True:
      # TODO: If the main thread receives a signal and we have no timeout, we
      # could wait forever. This should check a global "quit" flag or
      # something every so often.
      io_events = poller.poll(timeout)
      if not io_events:
        # Timeout
        return None
345
      for (_, evcond) in io_events:
346
347
348
349
350
351
        if evcond & check:
          return evcond
  finally:
    poller.unregister(sock)


352
def SocketOperation(sock, op, arg1, timeout):
353
354
355
356
357
358
  """Wrapper around socket functions.

  This function abstracts error handling for socket operations, especially
  for the complicated interaction with OpenSSL.

  @type sock: socket
359
  @param sock: Socket for the operation
360
361
362
363
364
365
  @type op: int
  @param op: Operation to execute (SOCKOP_* constants)
  @type arg1: any
  @param arg1: Parameter for function (if needed)
  @type timeout: None or float
  @param timeout: Timeout in seconds or None
366
  @return: Return value of socket function
367
368
369

  """
  # TODO: event_poll/event_check/override
370
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
    event_poll = select.POLLOUT

  elif op == SOCKOP_RECV:
    event_poll = select.POLLIN

  elif op == SOCKOP_SHUTDOWN:
    event_poll = None

    # The timeout is only used when OpenSSL requests polling for a condition.
    # It is not advisable to have no timeout for shutdown.
    assert timeout

  else:
    raise AssertionError("Invalid socket operation")

386
387
388
389
390
  # Handshake is only supported by SSL sockets
  if (op == SOCKOP_HANDSHAKE and
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
    return

391
392
393
394
395
  # No override by default
  event_override = 0

  while True:
    # Poll only for certain operations and when asked for by an override
396
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
397
398
399
400
401
      if event_override:
        wait_for_event = event_override
      else:
        wait_for_event = event_poll

402
      event = WaitForSocketCondition(sock, wait_for_event, timeout)
403
      if event is None:
404
        raise HttpSocketTimeout()
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430

      if (op == SOCKOP_RECV and
          event & (select.POLLNVAL | select.POLLHUP | select.POLLERR)):
        return ""

      if not event & wait_for_event:
        continue

    # Reset override
    event_override = 0

    try:
      try:
        if op == SOCKOP_SEND:
          return sock.send(arg1)

        elif op == SOCKOP_RECV:
          return sock.recv(arg1)

        elif op == SOCKOP_SHUTDOWN:
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
            # PyOpenSSL's shutdown() doesn't take arguments
            return sock.shutdown()
          else:
            return sock.shutdown(arg1)

431
432
433
        elif op == SOCKOP_HANDSHAKE:
          return sock.do_handshake()

434
435
436
437
438
439
440
441
442
443
444
445
446
      except OpenSSL.SSL.WantWriteError:
        # OpenSSL wants to write, poll for POLLOUT
        event_override = select.POLLOUT
        continue

      except OpenSSL.SSL.WantReadError:
        # OpenSSL wants to read, poll for POLLIN
        event_override = select.POLLIN | select.POLLPRI
        continue

      except OpenSSL.SSL.WantX509LookupError:
        continue

447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
      except OpenSSL.SSL.ZeroReturnError, err:
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
        # occurs if a closure alert has occurred in the protocol, i.e. the
        # connection has been closed cleanly. Note that this does not
        # necessarily mean that the transport layer (e.g. a socket) has been
        # closed.
        if op == SOCKOP_SEND:
          # Can happen during a renegotiation
          raise HttpConnectionClosed(err.args)
        elif op == SOCKOP_RECV:
          return ""

        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
        raise socket.error(err.args)

462
463
464
465
466
467
468
469
      except OpenSSL.SSL.SysCallError, err:
        if op == SOCKOP_SEND:
          # arg1 is the data when writing
          if err.args and err.args[0] == -1 and arg1 == "":
            # errors when writing empty strings are expected
            # and can be ignored
            return 0

470
471
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
          if op == SOCKOP_RECV:
472
            return ""
473
474
475
476
          elif op == SOCKOP_HANDSHAKE:
            # Can happen if peer disconnects directly after the connection is
            # opened.
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
477
478
479
480
481
482
483
484
485
486
487
488
489
490

        raise socket.error(err.args)

      except OpenSSL.SSL.Error, err:
        raise socket.error(err.args)

    except socket.error, err:
      if err.args and err.args[0] == errno.EAGAIN:
        # Ignore EAGAIN
        continue

      raise


491
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
492
493
  """Closes the connection.

494
495
496
  @type sock: socket
  @param sock: Socket to be shut down
  @type close_timeout: float
Iustin Pop's avatar
Iustin Pop committed
497
498
  @param close_timeout: How long to wait for the peer to close
      the connection
499
500
501
  @type write_timeout: float
  @param write_timeout: Write timeout for shutdown
  @type msgreader: http.HttpMessageReader
Iustin Pop's avatar
Iustin Pop committed
502
503
  @param msgreader: Request message reader, used to determine whether
      peer should close connection
504
  @type force: bool
Iustin Pop's avatar
Iustin Pop committed
505
506
  @param force: Whether to forcibly close the connection without
      waiting for peer
507

508
509
510
511
512
513
  """
  #print msgreader.peer_will_close, force
  if msgreader and msgreader.peer_will_close and not force:
    # Wait for peer to close
    try:
      # Check whether it's actually closed
514
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
515
516
517
518
519
520
521
        return
    except (socket.error, HttpError, HttpSocketTimeout):
      # Ignore errors at this stage
      pass

  # Close the connection from our side
  try:
522
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
523
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
524
525
526
527
                    write_timeout)
  except HttpSocketTimeout:
    raise HttpError("Timeout while shutting down connection")
  except socket.error, err:
528
529
530
    # Ignore ENOTCONN
    if not (err.args and err.args[0] == errno.ENOTCONN):
      raise HttpError("Error while shutting down connection: %s" % err)
531
532


533
def Handshake(sock, write_timeout):
534
535
536
537
538
539
540
541
542
  """Shakes peer's hands.

  @type sock: socket
  @param sock: Socket to be shut down
  @type write_timeout: float
  @param write_timeout: Write timeout for handshake

  """
  try:
543
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
544
545
546
547
548
549
  except HttpSocketTimeout:
    raise HttpError("Timeout during SSL handshake")
  except socket.error, err:
    raise HttpError("Error in SSL handshake: %s" % err)


550
551
552
553
554
555
556
557
558
559
560
561
562
def InitSsl():
  """Initializes the SSL infrastructure.

  This function is idempotent.

  """
  if not OpenSSL.rand.status():
    raise EnvironmentError("OpenSSL could not collect enough entropy"
                           " for the PRNG")

  # TODO: Maybe add some additional seeding for OpenSSL's PRNG


563
564
565
566
567
568
569
570
571
572
class HttpSslParams(object):
  """Data class for SSL key and certificate.

  """
  def __init__(self, ssl_key_path, ssl_cert_path):
    """Initializes this class.

    @type ssl_key_path: string
    @param ssl_key_path: Path to file containing SSL key in PEM format
    @type ssl_cert_path: string
Iustin Pop's avatar
Iustin Pop committed
573
574
    @param ssl_cert_path: Path to file containing SSL certificate
        in PEM format
575
576

    """
577
578
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
579

580
581
582
583
584
585
586
  def GetKey(self):
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
                                          self.ssl_key_pem)

  def GetCertificate(self):
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                           self.ssl_cert_pem)
587
588


589
class HttpBase(object):
590
591
592
593
  """Base class for HTTP server and client.

  """
  def __init__(self):
594
    self.using_ssl = None
595
    self._ssl_params = None
596
597
    self._ssl_key = None
    self._ssl_cert = None
598

599
  def _CreateSocket(self, ssl_params, ssl_verify_peer):
600
601
    """Creates a TCP socket and initializes SSL if needed.

602
603
    @type ssl_params: HttpSslParams
    @param ssl_params: SSL key and certificate
604
    @type ssl_verify_peer: bool
Iustin Pop's avatar
Iustin Pop committed
605
606
    @param ssl_verify_peer: Whether to require client certificate
        and compare it with our certificate
607
608

    """
609
610
    self._ssl_params = ssl_params

611
612
613
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # Should we enable SSL?
614
    self.using_ssl = ssl_params is not None
615

616
    if not self.using_ssl:
617
618
      return sock

619
620
621
    self._ssl_key = ssl_params.GetKey()
    self._ssl_cert = ssl_params.GetCertificate()

622
623
624
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)

625
626
    ctx.use_privatekey(self._ssl_key)
    ctx.use_certificate(self._ssl_cert)
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
    ctx.check_privatekey()

    if ssl_verify_peer:
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
                     self._SSLVerifyCallback)

    return OpenSSL.SSL.Connection(ctx, sock)

  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
    """Verify the certificate provided by the peer

    We only compare fingerprints. The client must use the same certificate as
    we do on our side.

    """
643
644
    # some parameters are unused, but this is the API
    # pylint: disable-msg=W0613
645
    assert self._ssl_params, "SSL not initialized"
646

647
648
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
            self._ssl_cert.digest("md5") == cert.digest("md5"))
649
650


651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
class HttpMessage(object):
  """Data structure for HTTP message.

  """
  def __init__(self):
    self.start_line = None
    self.headers = None
    self.body = None
    self.decoded_body = None


class HttpClientToServerStartLine(object):
  """Data structure for HTTP request start line.

  """
  def __init__(self, method, path, version):
    self.method = method
    self.path = path
    self.version = version

  def __str__(self):
    return "%s %s %s" % (self.method, self.path, self.version)


class HttpServerToClientStartLine(object):
  """Data structure for HTTP response start line.

  """
  def __init__(self, version, code, reason):
    self.version = version
    self.code = code
    self.reason = reason

  def __str__(self):
    return "%s %s %s" % (self.version, self.code, self.reason)


class HttpMessageWriter(object):
  """Writes an HTTP message to a socket.

  """
  def __init__(self, sock, msg, write_timeout):
693
694
695
696
697
698
699
700
701
702
    """Initializes this class and writes an HTTP message to a socket.

    @type sock: socket
    @param sock: Socket to be written to
    @type msg: http.HttpMessage
    @param msg: HTTP message to be written
    @type write_timeout: float
    @param write_timeout: Write timeout for socket

    """
703
704
705
706
707
708
    self._msg = msg

    self._PrepareMessage()

    buf = self._FormatMessage()

709
710
711
712
    pos = 0
    end = len(buf)
    while pos < end:
      # Send only SOCK_BUF_SIZE bytes at a time
713
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
714

715
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
716
717

      # Remove sent bytes
718
      pos += sent
719

720
    assert pos == end, "Message wasn't sent completely"
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760

  def _PrepareMessage(self):
    """Prepares the HTTP message by setting mandatory headers.

    """
    # RFC2616, section 4.3: "The presence of a message-body in a request is
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
    # field in the request's message-headers."
    if self._msg.body:
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)

  def _FormatMessage(self):
    """Serializes the HTTP message into a string.

    """
    buf = StringIO()

    # Add start line
    buf.write(str(self._msg.start_line))
    buf.write("\r\n")

    # Add headers
    if self._msg.start_line.version != HTTP_0_9:
      for name, value in self._msg.headers.iteritems():
        buf.write("%s: %s\r\n" % (name, value))

    buf.write("\r\n")

    # Add message body if needed
    if self.HasMessageBody():
      buf.write(self._msg.body)

    elif self._msg.body:
      logging.warning("Ignoring message body")

    return buf.getvalue()

  def HasMessageBody(self):
    """Checks whether the HTTP message contains a body.

Michael Hanselmann's avatar
Michael Hanselmann committed
761
    Can be overridden by subclasses.
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781

    """
    return bool(self._msg.body)


class HttpMessageReader(object):
  """Reads HTTP message from socket.

  """
  # Length limits
  START_LINE_LENGTH_MAX = None
  HEADER_LENGTH_MAX = None

  # Parser state machine
  PS_START_LINE = "start-line"
  PS_HEADERS = "headers"
  PS_BODY = "entity-body"
  PS_COMPLETE = "complete"

  def __init__(self, sock, msg, read_timeout):
782
783
784
785
786
787
788
789
790
791
    """Reads an HTTP message from a socket.

    @type sock: socket
    @param sock: Socket to be read from
    @type msg: http.HttpMessage
    @param msg: Object for the read message
    @type read_timeout: float
    @param read_timeout: Read timeout for socket

    """
792
793
794
795
796
797
798
799
800
801
802
803
804
    self.sock = sock
    self.msg = msg

    self.start_line_buffer = None
    self.header_buffer = StringIO()
    self.body_buffer = StringIO()
    self.parser_status = self.PS_START_LINE
    self.content_length = None
    self.peer_will_close = None

    buf = ""
    eof = False
    while self.parser_status != self.PS_COMPLETE:
805
806
      # TODO: Don't read more than necessary (Content-Length), otherwise
      # data might be lost and/or an error could occur
807
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851

      if data:
        buf += data
      else:
        eof = True

      # Do some parsing and error checking while more data arrives
      buf = self._ContinueParsing(buf, eof)

      # Must be done only after the buffer has been evaluated
      # TODO: Connection-length < len(data read) and connection closed
      if (eof and
          self.parser_status in (self.PS_START_LINE,
                                 self.PS_HEADERS)):
        raise HttpError("Connection closed prematurely")

    # Parse rest
    buf = self._ContinueParsing(buf, True)

    assert self.parser_status == self.PS_COMPLETE
    assert not buf, "Parser didn't read full response"

    msg.body = self.body_buffer.getvalue()

    # TODO: Content-type, error handling
    if msg.body:
      msg.decoded_body = HttpJsonConverter().Decode(msg.body)
    else:
      msg.decoded_body = None

    if msg.decoded_body:
      logging.debug("Message body: %s", msg.decoded_body)

  def _ContinueParsing(self, buf, eof):
    """Main function for HTTP message state machine.

    @type buf: string
    @param buf: Receive buffer
    @type eof: bool
    @param eof: Whether we've reached EOF on the socket
    @rtype: string
    @return: Updated receive buffer

    """
852
    # TODO: Use offset instead of slicing when possible
853
854
855
856
857
858
859
860
861
862
863
    if self.parser_status == self.PS_START_LINE:
      # Expect start line
      while True:
        idx = buf.find("\r\n")

        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
        # ignore any empty line(s) received where a Request-Line is expected.
        # In other words, if the server is reading the protocol stream at the
        # beginning of a message and receives a CRLF first, it should ignore
        # the CRLF."
        if idx == 0:
864
          # TODO: Limit number of CRLFs/empty lines for safety?
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
          buf = buf[:2]
          continue

        if idx > 0:
          self.start_line_buffer = buf[:idx]

          self._CheckStartLineLength(len(self.start_line_buffer))

          # Remove status line, including CRLF
          buf = buf[idx + 2:]

          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)

          self.parser_status = self.PS_HEADERS
        else:
          # Check whether incoming data is getting too large, otherwise we just
          # fill our read buffer.
          self._CheckStartLineLength(len(buf))

        break

    # TODO: Handle messages without headers
    if self.parser_status == self.PS_HEADERS:
      # Wait for header end
      idx = buf.find("\r\n\r\n")
      if idx >= 0:
        self.header_buffer.write(buf[:idx + 2])

        self._CheckHeaderLength(self.header_buffer.tell())

        # Remove headers, including CRLF
        buf = buf[idx + 4:]

        self._ParseHeaders()

        self.parser_status = self.PS_BODY
      else:
        # Check whether incoming data is getting too large, otherwise we just
        # fill our read buffer.
        self._CheckHeaderLength(len(buf))

    if self.parser_status == self.PS_BODY:
      # TODO: Implement max size for body_buffer
      self.body_buffer.write(buf)
      buf = ""

      # Check whether we've read everything
      #
      # RFC2616, section 4.4: "When a message-body is included with a message,
      # the transfer-length of that body is determined by one of the following
      # [...] 5. By the server closing the connection. (Closing the connection
      # cannot be used to indicate the end of a request body, since that would
      # leave no possibility for the server to send back a response.)"
918
919
      #
      # TODO: Error when buffer length > Content-Length header
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
      if (eof or
          self.content_length is None or
          (self.content_length is not None and
           self.body_buffer.tell() >= self.content_length)):
        self.parser_status = self.PS_COMPLETE

    return buf

  def _CheckStartLineLength(self, length):
    """Limits the start line buffer size.

    @type length: int
    @param length: Buffer size

    """
    if (self.START_LINE_LENGTH_MAX is not None and
        length > self.START_LINE_LENGTH_MAX):
      raise HttpError("Start line longer than %d chars" %
                       self.START_LINE_LENGTH_MAX)

  def _CheckHeaderLength(self, length):
    """Limits the header buffer size.

    @type length: int
    @param length: Buffer size

    """
    if (self.HEADER_LENGTH_MAX is not None and
        length > self.HEADER_LENGTH_MAX):
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)

  def ParseStartLine(self, start_line):
    """Parses the start line of a message.

Michael Hanselmann's avatar
Michael Hanselmann committed
954
    Must be overridden by subclass.
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005

    @type start_line: string
    @param start_line: Start line string

    """
    raise NotImplementedError()

  def _WillPeerCloseConnection(self):
    """Evaluate whether peer will close the connection.

    @rtype: bool
    @return: Whether peer will close the connection

    """
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
    # for the sender to signal that the connection will be closed after
    # completion of the response. For example,
    #
    #        Connection: close
    #
    # in either the request or the response header fields indicates that the
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
    # current request/response is complete."

    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
    if hdr_connection:
      hdr_connection = hdr_connection.lower()

    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
    if self.msg.start_line.version == HTTP_1_1:
      return (hdr_connection and "close" in hdr_connection)

    # Some HTTP/1.0 implementations have support for persistent connections,
    # using rules different than HTTP/1.1.

    # For older HTTP, Keep-Alive indicates persistent connection.
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
      return False

    # At least Akamai returns a "Connection: Keep-Alive" header, which was
    # supposed to be sent by the client.
    if hdr_connection and "keep-alive" in hdr_connection:
      return False

    return True

  def _ParseHeaders(self):
    """Parses the headers.

    This function also adjusts internal variables based on header values.

Iustin Pop's avatar
Iustin Pop committed
1006
    RFC2616, section 4.3: The presence of a message-body in a request is
1007
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
Iustin Pop's avatar
Iustin Pop committed
1008
    field in the request's message-headers.
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030

    """
    # Parse headers
    self.header_buffer.seek(0, 0)
    self.msg.headers = mimetools.Message(self.header_buffer, 0)

    self.peer_will_close = self._WillPeerCloseConnection()

    # Do we have a Content-Length header?
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
    if hdr_content_length:
      try:
        self.content_length = int(hdr_content_length)
      except ValueError:
        self.content_length = None
      if self.content_length is not None and self.content_length < 0:
        self.content_length = None

    # if the connection remains open and a content-length was not provided,
    # then assume that the connection WILL close.
    if self.content_length is None:
      self.peer_will_close = True