__init__.py 26.8 KB
Newer Older
1 2
#
#
3 4 5

# Copyright (C) 2007, 2008 Google Inc.
#
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

21
"""HTTP module.
22 23 24

"""

25 26
import logging
import mimetools
27
import OpenSSL
28 29
import select
import socket
Michael Hanselmann's avatar
Michael Hanselmann committed
30 31 32
import errno

from cStringIO import StringIO
33

34
from ganeti import constants
35
from ganeti import serializer
36
from ganeti import utils
37 38


39 40
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION

41 42 43 44 45 46 47 48 49 50
HTTP_OK = 200
HTTP_NO_CONTENT = 204
HTTP_NOT_MODIFIED = 304

HTTP_0_9 = "HTTP/0.9"
HTTP_1_0 = "HTTP/1.0"
HTTP_1_1 = "HTTP/1.1"

HTTP_GET = "GET"
HTTP_HEAD = "HEAD"
51 52
HTTP_POST = "POST"
HTTP_PUT = "PUT"
53
HTTP_DELETE = "DELETE"
54

Oleksiy Mishchenko's avatar
Oleksiy Mishchenko committed
55
HTTP_ETAG = "ETag"
56 57 58 59 60 61 62 63
HTTP_HOST = "Host"
HTTP_SERVER = "Server"
HTTP_DATE = "Date"
HTTP_USER_AGENT = "User-Agent"
HTTP_CONTENT_TYPE = "Content-Type"
HTTP_CONTENT_LENGTH = "Content-Length"
HTTP_CONNECTION = "Connection"
HTTP_KEEP_ALIVE = "Keep-Alive"
64 65 66 67
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
HTTP_AUTHORIZATION = "Authorization"
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
HTTP_ALLOW = "Allow"
68

69 70
_SSL_UNEXPECTED_EOF = "Unexpected EOF"

71 72 73
# Socket operations
(SOCKOP_SEND,
 SOCKOP_RECV,
74 75
 SOCKOP_SHUTDOWN,
 SOCKOP_HANDSHAKE) = range(4)
76

77 78
# send/receive quantum
SOCK_BUF_SIZE = 32768
79

80

81 82 83 84 85 86 87 88
class HttpError(Exception):
  """Internal exception for HTTP errors.

  This should only be used for internal error reporting.

  """


89 90 91 92 93 94 95 96 97
class HttpConnectionClosed(Exception):
  """Internal exception for a closed connection.

  This should only be used for internal error reporting. Only use
  it if there's no other way to report this condition.

  """


98 99 100 101 102 103 104 105
class HttpSessionHandshakeUnexpectedEOF(HttpError):
  """Internal exception for errors during SSL handshake.

  This should only be used for internal error reporting.

  """


106
class HttpSocketTimeout(Exception):
107 108 109 110 111
  """Internal exception for socket timeouts.

  This should only be used for internal error reporting.

  """
Michael Hanselmann's avatar
Michael Hanselmann committed
112 113


114
class HttpException(Exception):
115 116 117
  code = None
  message = None

118
  def __init__(self, message=None, headers=None):
119
    Exception.__init__(self)
120 121
    self.message = message
    self.headers = headers
122 123


124
class HttpBadRequest(HttpException):
125 126
  """400 Bad Request

Iustin Pop's avatar
Iustin Pop committed
127 128 129
  RFC2616, 10.4.1: The request could not be understood by the server
  due to malformed syntax. The client SHOULD NOT repeat the request
  without modifications.
130 131

  """
132 133 134
  code = 400


135 136 137
class HttpUnauthorized(HttpException):
  """401 Unauthorized

Iustin Pop's avatar
Iustin Pop committed
138 139 140 141
  RFC2616, section 10.4.2: The request requires user
  authentication. The response MUST include a WWW-Authenticate header
  field (section 14.47) containing a challenge applicable to the
  requested resource.
142 143 144 145 146

  """
  code = 401


147
class HttpForbidden(HttpException):
148 149
  """403 Forbidden

Iustin Pop's avatar
Iustin Pop committed
150 151 152
  RFC2616, 10.4.4: The server understood the request, but is refusing
  to fulfill it.  Authorization will not help and the request SHOULD
  NOT be repeated.
153 154

  """
155 156 157
  code = 403


158
class HttpNotFound(HttpException):
159 160
  """404 Not Found

Iustin Pop's avatar
Iustin Pop committed
161 162 163
  RFC2616, 10.4.5: The server has not found anything matching the
  Request-URI.  No indication is given of whether the condition is
  temporary or permanent.
164 165

  """
166 167 168
  code = 404


169 170 171
class HttpMethodNotAllowed(HttpException):
  """405 Method Not Allowed

Iustin Pop's avatar
Iustin Pop committed
172 173 174 175
  RFC2616, 10.4.6: The method specified in the Request-Line is not
  allowed for the resource identified by the Request-URI. The response
  MUST include an Allow header containing a list of valid methods for
  the requested resource.
176 177 178 179 180 181 182 183

  """
  code = 405


class HttpRequestTimeout(HttpException):
  """408 Request Timeout

Iustin Pop's avatar
Iustin Pop committed
184 185 186
  RFC2616, 10.4.9: The client did not produce a request within the
  time that the server was prepared to wait. The client MAY repeat the
  request without modifications at any later time.
187 188 189 190 191 192 193 194

  """
  code = 408


class HttpConflict(HttpException):
  """409 Conflict

Iustin Pop's avatar
Iustin Pop committed
195 196 197 198
  RFC2616, 10.4.10: The request could not be completed due to a
  conflict with the current state of the resource. This code is only
  allowed in situations where it is expected that the user might be
  able to resolve the conflict and resubmit the request.
199 200 201 202 203

  """
  code = 409


204
class HttpGone(HttpException):
205 206
  """410 Gone

Iustin Pop's avatar
Iustin Pop committed
207 208 209
  RFC2616, 10.4.11: The requested resource is no longer available at
  the server and no forwarding address is known. This condition is
  expected to be considered permanent.
210 211

  """
212 213 214
  code = 410


215
class HttpLengthRequired(HttpException):
216 217
  """411 Length Required

Iustin Pop's avatar
Iustin Pop committed
218 219 220 221
  RFC2616, 10.4.12: The server refuses to accept the request without a
  defined Content-Length. The client MAY repeat the request if it adds
  a valid Content-Length header field containing the length of the
  message-body in the request message.
222 223

  """
224 225 226
  code = 411


227 228 229
class HttpPreconditionFailed(HttpException):
  """412 Precondition Failed

Iustin Pop's avatar
Iustin Pop committed
230 231 232
  RFC2616, 10.4.13: The precondition given in one or more of the
  request-header fields evaluated to false when it was tested on the
  server.
233 234 235 236 237

  """
  code = 412


238
class HttpInternalServerError(HttpException):
239 240
  """500 Internal Server Error

Iustin Pop's avatar
Iustin Pop committed
241 242
  RFC2616, 10.5.1: The server encountered an unexpected condition
  which prevented it from fulfilling the request.
243 244

  """
245 246 247
  code = 500


248
class HttpNotImplemented(HttpException):
249 250
  """501 Not Implemented

Iustin Pop's avatar
Iustin Pop committed
251 252
  RFC2616, 10.5.2: The server does not support the functionality
  required to fulfill the request.
253 254

  """
255 256 257
  code = 501


258 259 260 261 262 263 264 265 266 267 268
class HttpBadGateway(HttpException):
  """502 Bad Gateway

  RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
  received an invalid response from the upstream server it accessed in
  attempting to fulfill the request.

  """
  code = 502


269
class HttpServiceUnavailable(HttpException):
270 271
  """503 Service Unavailable

Iustin Pop's avatar
Iustin Pop committed
272 273
  RFC2616, 10.5.4: The server is currently unable to handle the
  request due to a temporary overloading or maintenance of the server.
274 275

  """
276 277 278
  code = 503


279 280 281 282 283 284 285 286 287 288 289 290 291
class HttpGatewayTimeout(HttpException):
  """504 Gateway Timeout

  RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
  not receive a timely response from the upstream server specified by
  the URI (e.g.  HTTP, FTP, LDAP) or some other auxiliary server
  (e.g. DNS) it needed to access in attempting to complete the
  request.

  """
  code = 504


292
class HttpVersionNotSupported(HttpException):
293 294
  """505 HTTP Version Not Supported

Iustin Pop's avatar
Iustin Pop committed
295 296
  RFC2616, 10.5.6: The server does not support, or refuses to support,
  the HTTP protocol version that was used in the request message.
297 298

  """
299 300 301
  code = 505


Iustin Pop's avatar
Iustin Pop committed
302
class HttpJsonConverter: # pylint: disable-msg=W0232
303 304
  CONTENT_TYPE = "application/json"

305 306
  @staticmethod
  def Encode(data):
307 308
    return serializer.DumpJson(data)

309 310
  @staticmethod
  def Decode(data):
311 312 313
    return serializer.LoadJson(data)


314
def SocketOperation(sock, op, arg1, timeout):
315 316 317 318 319 320
  """Wrapper around socket functions.

  This function abstracts error handling for socket operations, especially
  for the complicated interaction with OpenSSL.

  @type sock: socket
321
  @param sock: Socket for the operation
322 323 324 325 326 327
  @type op: int
  @param op: Operation to execute (SOCKOP_* constants)
  @type arg1: any
  @param arg1: Parameter for function (if needed)
  @type timeout: None or float
  @param timeout: Timeout in seconds or None
328
  @return: Return value of socket function
329 330 331

  """
  # TODO: event_poll/event_check/override
332
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
    event_poll = select.POLLOUT

  elif op == SOCKOP_RECV:
    event_poll = select.POLLIN

  elif op == SOCKOP_SHUTDOWN:
    event_poll = None

    # The timeout is only used when OpenSSL requests polling for a condition.
    # It is not advisable to have no timeout for shutdown.
    assert timeout

  else:
    raise AssertionError("Invalid socket operation")

348 349 350 351 352
  # Handshake is only supported by SSL sockets
  if (op == SOCKOP_HANDSHAKE and
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
    return

353 354 355 356 357
  # No override by default
  event_override = 0

  while True:
    # Poll only for certain operations and when asked for by an override
358
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
359 360 361 362 363
      if event_override:
        wait_for_event = event_override
      else:
        wait_for_event = event_poll

364
      event = utils.WaitForSocketCondition(sock, wait_for_event, timeout)
365
      if event is None:
366
        raise HttpSocketTimeout()
367

368 369 370
      if event & (select.POLLNVAL | select.POLLHUP | select.POLLERR):
        # Let the socket functions handle these
        break
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392

      if not event & wait_for_event:
        continue

    # Reset override
    event_override = 0

    try:
      try:
        if op == SOCKOP_SEND:
          return sock.send(arg1)

        elif op == SOCKOP_RECV:
          return sock.recv(arg1)

        elif op == SOCKOP_SHUTDOWN:
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
            # PyOpenSSL's shutdown() doesn't take arguments
            return sock.shutdown()
          else:
            return sock.shutdown(arg1)

393 394 395
        elif op == SOCKOP_HANDSHAKE:
          return sock.do_handshake()

396 397 398 399 400 401 402 403 404 405 406 407 408
      except OpenSSL.SSL.WantWriteError:
        # OpenSSL wants to write, poll for POLLOUT
        event_override = select.POLLOUT
        continue

      except OpenSSL.SSL.WantReadError:
        # OpenSSL wants to read, poll for POLLIN
        event_override = select.POLLIN | select.POLLPRI
        continue

      except OpenSSL.SSL.WantX509LookupError:
        continue

409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
      except OpenSSL.SSL.ZeroReturnError, err:
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
        # occurs if a closure alert has occurred in the protocol, i.e. the
        # connection has been closed cleanly. Note that this does not
        # necessarily mean that the transport layer (e.g. a socket) has been
        # closed.
        if op == SOCKOP_SEND:
          # Can happen during a renegotiation
          raise HttpConnectionClosed(err.args)
        elif op == SOCKOP_RECV:
          return ""

        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
        raise socket.error(err.args)

424 425 426 427 428 429 430 431
      except OpenSSL.SSL.SysCallError, err:
        if op == SOCKOP_SEND:
          # arg1 is the data when writing
          if err.args and err.args[0] == -1 and arg1 == "":
            # errors when writing empty strings are expected
            # and can be ignored
            return 0

432 433
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
          if op == SOCKOP_RECV:
434
            return ""
435 436 437 438
          elif op == SOCKOP_HANDSHAKE:
            # Can happen if peer disconnects directly after the connection is
            # opened.
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
439 440 441 442 443 444 445 446 447 448 449 450 451 452

        raise socket.error(err.args)

      except OpenSSL.SSL.Error, err:
        raise socket.error(err.args)

    except socket.error, err:
      if err.args and err.args[0] == errno.EAGAIN:
        # Ignore EAGAIN
        continue

      raise


453
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
454 455
  """Closes the connection.

456 457 458
  @type sock: socket
  @param sock: Socket to be shut down
  @type close_timeout: float
Iustin Pop's avatar
Iustin Pop committed
459 460
  @param close_timeout: How long to wait for the peer to close
      the connection
461 462 463
  @type write_timeout: float
  @param write_timeout: Write timeout for shutdown
  @type msgreader: http.HttpMessageReader
Iustin Pop's avatar
Iustin Pop committed
464 465
  @param msgreader: Request message reader, used to determine whether
      peer should close connection
466
  @type force: bool
Iustin Pop's avatar
Iustin Pop committed
467 468
  @param force: Whether to forcibly close the connection without
      waiting for peer
469

470 471 472 473 474 475
  """
  #print msgreader.peer_will_close, force
  if msgreader and msgreader.peer_will_close and not force:
    # Wait for peer to close
    try:
      # Check whether it's actually closed
476
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
477 478 479 480 481 482 483
        return
    except (socket.error, HttpError, HttpSocketTimeout):
      # Ignore errors at this stage
      pass

  # Close the connection from our side
  try:
484
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
485
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
486 487 488 489
                    write_timeout)
  except HttpSocketTimeout:
    raise HttpError("Timeout while shutting down connection")
  except socket.error, err:
490 491 492
    # Ignore ENOTCONN
    if not (err.args and err.args[0] == errno.ENOTCONN):
      raise HttpError("Error while shutting down connection: %s" % err)
493 494


495
def Handshake(sock, write_timeout):
496 497 498 499 500 501 502 503 504
  """Shakes peer's hands.

  @type sock: socket
  @param sock: Socket to be shut down
  @type write_timeout: float
  @param write_timeout: Write timeout for handshake

  """
  try:
505
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
506 507 508 509 510 511
  except HttpSocketTimeout:
    raise HttpError("Timeout during SSL handshake")
  except socket.error, err:
    raise HttpError("Error in SSL handshake: %s" % err)


512 513 514 515 516 517 518 519 520 521 522 523 524
def InitSsl():
  """Initializes the SSL infrastructure.

  This function is idempotent.

  """
  if not OpenSSL.rand.status():
    raise EnvironmentError("OpenSSL could not collect enough entropy"
                           " for the PRNG")

  # TODO: Maybe add some additional seeding for OpenSSL's PRNG


525 526 527 528 529 530 531 532 533 534
class HttpSslParams(object):
  """Data class for SSL key and certificate.

  """
  def __init__(self, ssl_key_path, ssl_cert_path):
    """Initializes this class.

    @type ssl_key_path: string
    @param ssl_key_path: Path to file containing SSL key in PEM format
    @type ssl_cert_path: string
Iustin Pop's avatar
Iustin Pop committed
535 536
    @param ssl_cert_path: Path to file containing SSL certificate
        in PEM format
537 538

    """
539 540
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
541

542 543 544 545 546 547 548
  def GetKey(self):
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
                                          self.ssl_key_pem)

  def GetCertificate(self):
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                           self.ssl_cert_pem)
549 550


551
class HttpBase(object):
552 553 554 555
  """Base class for HTTP server and client.

  """
  def __init__(self):
556
    self.using_ssl = None
557
    self._ssl_params = None
558 559
    self._ssl_key = None
    self._ssl_cert = None
560

561
  def _CreateSocket(self, ssl_params, ssl_verify_peer):
562 563
    """Creates a TCP socket and initializes SSL if needed.

564 565
    @type ssl_params: HttpSslParams
    @param ssl_params: SSL key and certificate
566
    @type ssl_verify_peer: bool
Iustin Pop's avatar
Iustin Pop committed
567 568
    @param ssl_verify_peer: Whether to require client certificate
        and compare it with our certificate
569 570

    """
571 572
    self._ssl_params = ssl_params

573 574 575
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # Should we enable SSL?
576
    self.using_ssl = ssl_params is not None
577

578
    if not self.using_ssl:
579 580
      return sock

581 582 583
    self._ssl_key = ssl_params.GetKey()
    self._ssl_cert = ssl_params.GetCertificate()

584 585 586
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)

587 588
    ctx.use_privatekey(self._ssl_key)
    ctx.use_certificate(self._ssl_cert)
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
    ctx.check_privatekey()

    if ssl_verify_peer:
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
                     self._SSLVerifyCallback)

    return OpenSSL.SSL.Connection(ctx, sock)

  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
    """Verify the certificate provided by the peer

    We only compare fingerprints. The client must use the same certificate as
    we do on our side.

    """
605 606
    # some parameters are unused, but this is the API
    # pylint: disable-msg=W0613
607
    assert self._ssl_params, "SSL not initialized"
608

609 610
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
            self._ssl_cert.digest("md5") == cert.digest("md5"))
611 612


613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
class HttpMessage(object):
  """Data structure for HTTP message.

  """
  def __init__(self):
    self.start_line = None
    self.headers = None
    self.body = None
    self.decoded_body = None


class HttpClientToServerStartLine(object):
  """Data structure for HTTP request start line.

  """
  def __init__(self, method, path, version):
    self.method = method
    self.path = path
    self.version = version

  def __str__(self):
    return "%s %s %s" % (self.method, self.path, self.version)


class HttpServerToClientStartLine(object):
  """Data structure for HTTP response start line.

  """
  def __init__(self, version, code, reason):
    self.version = version
    self.code = code
    self.reason = reason

  def __str__(self):
    return "%s %s %s" % (self.version, self.code, self.reason)


class HttpMessageWriter(object):
  """Writes an HTTP message to a socket.

  """
  def __init__(self, sock, msg, write_timeout):
655 656 657 658 659 660 661 662 663 664
    """Initializes this class and writes an HTTP message to a socket.

    @type sock: socket
    @param sock: Socket to be written to
    @type msg: http.HttpMessage
    @param msg: HTTP message to be written
    @type write_timeout: float
    @param write_timeout: Write timeout for socket

    """
665 666 667 668 669 670
    self._msg = msg

    self._PrepareMessage()

    buf = self._FormatMessage()

671 672 673 674
    pos = 0
    end = len(buf)
    while pos < end:
      # Send only SOCK_BUF_SIZE bytes at a time
675
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
676

677
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
678 679

      # Remove sent bytes
680
      pos += sent
681

682
    assert pos == end, "Message wasn't sent completely"
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722

  def _PrepareMessage(self):
    """Prepares the HTTP message by setting mandatory headers.

    """
    # RFC2616, section 4.3: "The presence of a message-body in a request is
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
    # field in the request's message-headers."
    if self._msg.body:
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)

  def _FormatMessage(self):
    """Serializes the HTTP message into a string.

    """
    buf = StringIO()

    # Add start line
    buf.write(str(self._msg.start_line))
    buf.write("\r\n")

    # Add headers
    if self._msg.start_line.version != HTTP_0_9:
      for name, value in self._msg.headers.iteritems():
        buf.write("%s: %s\r\n" % (name, value))

    buf.write("\r\n")

    # Add message body if needed
    if self.HasMessageBody():
      buf.write(self._msg.body)

    elif self._msg.body:
      logging.warning("Ignoring message body")

    return buf.getvalue()

  def HasMessageBody(self):
    """Checks whether the HTTP message contains a body.

Michael Hanselmann's avatar
Michael Hanselmann committed
723
    Can be overridden by subclasses.
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743

    """
    return bool(self._msg.body)


class HttpMessageReader(object):
  """Reads HTTP message from socket.

  """
  # Length limits
  START_LINE_LENGTH_MAX = None
  HEADER_LENGTH_MAX = None

  # Parser state machine
  PS_START_LINE = "start-line"
  PS_HEADERS = "headers"
  PS_BODY = "entity-body"
  PS_COMPLETE = "complete"

  def __init__(self, sock, msg, read_timeout):
744 745 746 747 748 749 750 751 752 753
    """Reads an HTTP message from a socket.

    @type sock: socket
    @param sock: Socket to be read from
    @type msg: http.HttpMessage
    @param msg: Object for the read message
    @type read_timeout: float
    @param read_timeout: Read timeout for socket

    """
754 755 756 757 758 759 760 761 762 763 764 765 766
    self.sock = sock
    self.msg = msg

    self.start_line_buffer = None
    self.header_buffer = StringIO()
    self.body_buffer = StringIO()
    self.parser_status = self.PS_START_LINE
    self.content_length = None
    self.peer_will_close = None

    buf = ""
    eof = False
    while self.parser_status != self.PS_COMPLETE:
767 768
      # TODO: Don't read more than necessary (Content-Length), otherwise
      # data might be lost and/or an error could occur
769
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813

      if data:
        buf += data
      else:
        eof = True

      # Do some parsing and error checking while more data arrives
      buf = self._ContinueParsing(buf, eof)

      # Must be done only after the buffer has been evaluated
      # TODO: Connection-length < len(data read) and connection closed
      if (eof and
          self.parser_status in (self.PS_START_LINE,
                                 self.PS_HEADERS)):
        raise HttpError("Connection closed prematurely")

    # Parse rest
    buf = self._ContinueParsing(buf, True)

    assert self.parser_status == self.PS_COMPLETE
    assert not buf, "Parser didn't read full response"

    msg.body = self.body_buffer.getvalue()

    # TODO: Content-type, error handling
    if msg.body:
      msg.decoded_body = HttpJsonConverter().Decode(msg.body)
    else:
      msg.decoded_body = None

    if msg.decoded_body:
      logging.debug("Message body: %s", msg.decoded_body)

  def _ContinueParsing(self, buf, eof):
    """Main function for HTTP message state machine.

    @type buf: string
    @param buf: Receive buffer
    @type eof: bool
    @param eof: Whether we've reached EOF on the socket
    @rtype: string
    @return: Updated receive buffer

    """
814
    # TODO: Use offset instead of slicing when possible
815 816 817 818 819 820 821 822 823 824 825
    if self.parser_status == self.PS_START_LINE:
      # Expect start line
      while True:
        idx = buf.find("\r\n")

        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
        # ignore any empty line(s) received where a Request-Line is expected.
        # In other words, if the server is reading the protocol stream at the
        # beginning of a message and receives a CRLF first, it should ignore
        # the CRLF."
        if idx == 0:
826
          # TODO: Limit number of CRLFs/empty lines for safety?
827
          buf = buf[2:]
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
          continue

        if idx > 0:
          self.start_line_buffer = buf[:idx]

          self._CheckStartLineLength(len(self.start_line_buffer))

          # Remove status line, including CRLF
          buf = buf[idx + 2:]

          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)

          self.parser_status = self.PS_HEADERS
        else:
          # Check whether incoming data is getting too large, otherwise we just
          # fill our read buffer.
          self._CheckStartLineLength(len(buf))

        break

    # TODO: Handle messages without headers
    if self.parser_status == self.PS_HEADERS:
      # Wait for header end
      idx = buf.find("\r\n\r\n")
      if idx >= 0:
        self.header_buffer.write(buf[:idx + 2])

        self._CheckHeaderLength(self.header_buffer.tell())

        # Remove headers, including CRLF
        buf = buf[idx + 4:]

        self._ParseHeaders()

        self.parser_status = self.PS_BODY
      else:
        # Check whether incoming data is getting too large, otherwise we just
        # fill our read buffer.
        self._CheckHeaderLength(len(buf))

    if self.parser_status == self.PS_BODY:
      # TODO: Implement max size for body_buffer
      self.body_buffer.write(buf)
      buf = ""

      # Check whether we've read everything
      #
      # RFC2616, section 4.4: "When a message-body is included with a message,
      # the transfer-length of that body is determined by one of the following
      # [...] 5. By the server closing the connection. (Closing the connection
      # cannot be used to indicate the end of a request body, since that would
      # leave no possibility for the server to send back a response.)"
880 881
      #
      # TODO: Error when buffer length > Content-Length header
882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
      if (eof or
          self.content_length is None or
          (self.content_length is not None and
           self.body_buffer.tell() >= self.content_length)):
        self.parser_status = self.PS_COMPLETE

    return buf

  def _CheckStartLineLength(self, length):
    """Limits the start line buffer size.

    @type length: int
    @param length: Buffer size

    """
    if (self.START_LINE_LENGTH_MAX is not None and
        length > self.START_LINE_LENGTH_MAX):
      raise HttpError("Start line longer than %d chars" %
                       self.START_LINE_LENGTH_MAX)

  def _CheckHeaderLength(self, length):
    """Limits the header buffer size.

    @type length: int
    @param length: Buffer size

    """
    if (self.HEADER_LENGTH_MAX is not None and
        length > self.HEADER_LENGTH_MAX):
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)

  def ParseStartLine(self, start_line):
    """Parses the start line of a message.

Michael Hanselmann's avatar
Michael Hanselmann committed
916
    Must be overridden by subclass.
917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967

    @type start_line: string
    @param start_line: Start line string

    """
    raise NotImplementedError()

  def _WillPeerCloseConnection(self):
    """Evaluate whether peer will close the connection.

    @rtype: bool
    @return: Whether peer will close the connection

    """
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
    # for the sender to signal that the connection will be closed after
    # completion of the response. For example,
    #
    #        Connection: close
    #
    # in either the request or the response header fields indicates that the
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
    # current request/response is complete."

    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
    if hdr_connection:
      hdr_connection = hdr_connection.lower()

    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
    if self.msg.start_line.version == HTTP_1_1:
      return (hdr_connection and "close" in hdr_connection)

    # Some HTTP/1.0 implementations have support for persistent connections,
    # using rules different than HTTP/1.1.

    # For older HTTP, Keep-Alive indicates persistent connection.
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
      return False

    # At least Akamai returns a "Connection: Keep-Alive" header, which was
    # supposed to be sent by the client.
    if hdr_connection and "keep-alive" in hdr_connection:
      return False

    return True

  def _ParseHeaders(self):
    """Parses the headers.

    This function also adjusts internal variables based on header values.

Iustin Pop's avatar
Iustin Pop committed
968
    RFC2616, section 4.3: The presence of a message-body in a request is
969
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
Iustin Pop's avatar
Iustin Pop committed
970
    field in the request's message-headers.
971 972 973 974 975 976 977 978 979 980 981 982 983

    """
    # Parse headers
    self.header_buffer.seek(0, 0)
    self.msg.headers = mimetools.Message(self.header_buffer, 0)

    self.peer_will_close = self._WillPeerCloseConnection()

    # Do we have a Content-Length header?
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
    if hdr_content_length:
      try:
        self.content_length = int(hdr_content_length)
984
      except (TypeError, ValueError):
985 986 987 988 989 990 991 992
        self.content_length = None
      if self.content_length is not None and self.content_length < 0:
        self.content_length = None

    # if the connection remains open and a content-length was not provided,
    # then assume that the connection WILL close.
    if self.content_length is None:
      self.peer_will_close = True