__init__.py 27.7 KB
Newer Older
1
2
#
#
3
4
5

# Copyright (C) 2007, 2008 Google Inc.
#
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

21
"""HTTP module.
22
23
24

"""

25
26
import logging
import mimetools
27
import OpenSSL
28
29
import select
import socket
Michael Hanselmann's avatar
Michael Hanselmann committed
30
31
32
import errno

from cStringIO import StringIO
33

34
from ganeti import constants
35
from ganeti import serializer
36
from ganeti import utils
37
38


39
40
HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION

41
42
43
44
45
46
47
48
49
50
HTTP_OK = 200
HTTP_NO_CONTENT = 204
HTTP_NOT_MODIFIED = 304

HTTP_0_9 = "HTTP/0.9"
HTTP_1_0 = "HTTP/1.0"
HTTP_1_1 = "HTTP/1.1"

HTTP_GET = "GET"
HTTP_HEAD = "HEAD"
51
52
HTTP_POST = "POST"
HTTP_PUT = "PUT"
53
HTTP_DELETE = "DELETE"
54

Oleksiy Mishchenko's avatar
Oleksiy Mishchenko committed
55
HTTP_ETAG = "ETag"
56
57
58
59
60
61
62
63
HTTP_HOST = "Host"
HTTP_SERVER = "Server"
HTTP_DATE = "Date"
HTTP_USER_AGENT = "User-Agent"
HTTP_CONTENT_TYPE = "Content-Type"
HTTP_CONTENT_LENGTH = "Content-Length"
HTTP_CONNECTION = "Connection"
HTTP_KEEP_ALIVE = "Keep-Alive"
64
65
66
67
HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
HTTP_AUTHORIZATION = "Authorization"
HTTP_AUTHENTICATION_INFO = "Authentication-Info"
HTTP_ALLOW = "Allow"
68

69
70
_SSL_UNEXPECTED_EOF = "Unexpected EOF"

71
72
73
# Socket operations
(SOCKOP_SEND,
 SOCKOP_RECV,
74
75
 SOCKOP_SHUTDOWN,
 SOCKOP_HANDSHAKE) = range(4)
76

77
78
# send/receive quantum
SOCK_BUF_SIZE = 32768
79

80

81
82
83
84
85
86
87
88
class HttpError(Exception):
  """Internal exception for HTTP errors.

  This should only be used for internal error reporting.

  """


89
90
91
92
93
94
95
96
97
class HttpConnectionClosed(Exception):
  """Internal exception for a closed connection.

  This should only be used for internal error reporting. Only use
  it if there's no other way to report this condition.

  """


98
99
100
101
102
103
104
105
class HttpSessionHandshakeUnexpectedEOF(HttpError):
  """Internal exception for errors during SSL handshake.

  This should only be used for internal error reporting.

  """


106
class HttpSocketTimeout(Exception):
107
108
109
110
111
  """Internal exception for socket timeouts.

  This should only be used for internal error reporting.

  """
Michael Hanselmann's avatar
Michael Hanselmann committed
112
113


114
class HttpException(Exception):
115
116
117
  code = None
  message = None

118
  def __init__(self, message=None, headers=None):
119
    Exception.__init__(self)
120
121
    self.message = message
    self.headers = headers
122
123


124
class HttpBadRequest(HttpException):
125
126
  """400 Bad Request

Iustin Pop's avatar
Iustin Pop committed
127
128
129
  RFC2616, 10.4.1: The request could not be understood by the server
  due to malformed syntax. The client SHOULD NOT repeat the request
  without modifications.
130
131

  """
132
133
134
  code = 400


135
136
137
class HttpUnauthorized(HttpException):
  """401 Unauthorized

Iustin Pop's avatar
Iustin Pop committed
138
139
140
141
  RFC2616, section 10.4.2: The request requires user
  authentication. The response MUST include a WWW-Authenticate header
  field (section 14.47) containing a challenge applicable to the
  requested resource.
142
143
144
145
146

  """
  code = 401


147
class HttpForbidden(HttpException):
148
149
  """403 Forbidden

Iustin Pop's avatar
Iustin Pop committed
150
151
152
  RFC2616, 10.4.4: The server understood the request, but is refusing
  to fulfill it.  Authorization will not help and the request SHOULD
  NOT be repeated.
153
154

  """
155
156
157
  code = 403


158
class HttpNotFound(HttpException):
159
160
  """404 Not Found

Iustin Pop's avatar
Iustin Pop committed
161
162
163
  RFC2616, 10.4.5: The server has not found anything matching the
  Request-URI.  No indication is given of whether the condition is
  temporary or permanent.
164
165

  """
166
167
168
  code = 404


169
170
171
class HttpMethodNotAllowed(HttpException):
  """405 Method Not Allowed

Iustin Pop's avatar
Iustin Pop committed
172
173
174
175
  RFC2616, 10.4.6: The method specified in the Request-Line is not
  allowed for the resource identified by the Request-URI. The response
  MUST include an Allow header containing a list of valid methods for
  the requested resource.
176
177
178
179
180
181
182
183

  """
  code = 405


class HttpRequestTimeout(HttpException):
  """408 Request Timeout

Iustin Pop's avatar
Iustin Pop committed
184
185
186
  RFC2616, 10.4.9: The client did not produce a request within the
  time that the server was prepared to wait. The client MAY repeat the
  request without modifications at any later time.
187
188
189
190
191
192
193
194

  """
  code = 408


class HttpConflict(HttpException):
  """409 Conflict

Iustin Pop's avatar
Iustin Pop committed
195
196
197
198
  RFC2616, 10.4.10: The request could not be completed due to a
  conflict with the current state of the resource. This code is only
  allowed in situations where it is expected that the user might be
  able to resolve the conflict and resubmit the request.
199
200
201
202
203

  """
  code = 409


204
class HttpGone(HttpException):
205
206
  """410 Gone

Iustin Pop's avatar
Iustin Pop committed
207
208
209
  RFC2616, 10.4.11: The requested resource is no longer available at
  the server and no forwarding address is known. This condition is
  expected to be considered permanent.
210
211

  """
212
213
214
  code = 410


215
class HttpLengthRequired(HttpException):
216
217
  """411 Length Required

Iustin Pop's avatar
Iustin Pop committed
218
219
220
221
  RFC2616, 10.4.12: The server refuses to accept the request without a
  defined Content-Length. The client MAY repeat the request if it adds
  a valid Content-Length header field containing the length of the
  message-body in the request message.
222
223

  """
224
225
226
  code = 411


227
228
229
class HttpPreconditionFailed(HttpException):
  """412 Precondition Failed

Iustin Pop's avatar
Iustin Pop committed
230
231
232
  RFC2616, 10.4.13: The precondition given in one or more of the
  request-header fields evaluated to false when it was tested on the
  server.
233
234
235
236
237

  """
  code = 412


238
class HttpInternalServerError(HttpException):
239
240
  """500 Internal Server Error

Iustin Pop's avatar
Iustin Pop committed
241
242
  RFC2616, 10.5.1: The server encountered an unexpected condition
  which prevented it from fulfilling the request.
243
244

  """
245
246
247
  code = 500


248
class HttpNotImplemented(HttpException):
249
250
  """501 Not Implemented

Iustin Pop's avatar
Iustin Pop committed
251
252
  RFC2616, 10.5.2: The server does not support the functionality
  required to fulfill the request.
253
254

  """
255
256
257
  code = 501


258
259
260
261
262
263
264
265
266
267
268
class HttpBadGateway(HttpException):
  """502 Bad Gateway

  RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
  received an invalid response from the upstream server it accessed in
  attempting to fulfill the request.

  """
  code = 502


269
class HttpServiceUnavailable(HttpException):
270
271
  """503 Service Unavailable

Iustin Pop's avatar
Iustin Pop committed
272
273
  RFC2616, 10.5.4: The server is currently unable to handle the
  request due to a temporary overloading or maintenance of the server.
274
275

  """
276
277
278
  code = 503


279
280
281
282
283
284
285
286
287
288
289
290
291
class HttpGatewayTimeout(HttpException):
  """504 Gateway Timeout

  RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
  not receive a timely response from the upstream server specified by
  the URI (e.g.  HTTP, FTP, LDAP) or some other auxiliary server
  (e.g. DNS) it needed to access in attempting to complete the
  request.

  """
  code = 504


292
class HttpVersionNotSupported(HttpException):
293
294
  """505 HTTP Version Not Supported

Iustin Pop's avatar
Iustin Pop committed
295
296
  RFC2616, 10.5.6: The server does not support, or refuses to support,
  the HTTP protocol version that was used in the request message.
297
298

  """
299
300
301
  code = 505


Iustin Pop's avatar
Iustin Pop committed
302
class HttpJsonConverter: # pylint: disable-msg=W0232
303
304
305
306
307
308
309
310
311
  CONTENT_TYPE = "application/json"

  def Encode(self, data):
    return serializer.DumpJson(data)

  def Decode(self, data):
    return serializer.LoadJson(data)


312
def WaitForSocketCondition(sock, event, timeout):
313
314
  """Waits for a condition to occur on the socket.

315
  @type sock: socket
316
  @param sock: Wait for events on this socket
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
  @type event: int
  @param event: ORed condition (see select module)
  @type timeout: float or None
  @param timeout: Timeout in seconds
  @rtype: int or None
  @return: None for timeout, otherwise occured conditions

  """
  check = (event | select.POLLPRI |
           select.POLLNVAL | select.POLLHUP | select.POLLERR)

  if timeout is not None:
    # Poller object expects milliseconds
    timeout *= 1000

332
  poller = select.poll()
333
334
335
336
337
338
339
340
341
342
  poller.register(sock, event)
  try:
    while True:
      # TODO: If the main thread receives a signal and we have no timeout, we
      # could wait forever. This should check a global "quit" flag or
      # something every so often.
      io_events = poller.poll(timeout)
      if not io_events:
        # Timeout
        return None
343
      for (_, evcond) in io_events:
344
345
346
347
348
349
        if evcond & check:
          return evcond
  finally:
    poller.unregister(sock)


350
def SocketOperation(sock, op, arg1, timeout):
351
352
353
354
355
356
  """Wrapper around socket functions.

  This function abstracts error handling for socket operations, especially
  for the complicated interaction with OpenSSL.

  @type sock: socket
357
  @param sock: Socket for the operation
358
359
360
361
362
363
  @type op: int
  @param op: Operation to execute (SOCKOP_* constants)
  @type arg1: any
  @param arg1: Parameter for function (if needed)
  @type timeout: None or float
  @param timeout: Timeout in seconds or None
364
  @return: Return value of socket function
365
366
367

  """
  # TODO: event_poll/event_check/override
368
  if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
    event_poll = select.POLLOUT

  elif op == SOCKOP_RECV:
    event_poll = select.POLLIN

  elif op == SOCKOP_SHUTDOWN:
    event_poll = None

    # The timeout is only used when OpenSSL requests polling for a condition.
    # It is not advisable to have no timeout for shutdown.
    assert timeout

  else:
    raise AssertionError("Invalid socket operation")

384
385
386
387
388
  # Handshake is only supported by SSL sockets
  if (op == SOCKOP_HANDSHAKE and
      not isinstance(sock, OpenSSL.SSL.ConnectionType)):
    return

389
390
391
392
393
  # No override by default
  event_override = 0

  while True:
    # Poll only for certain operations and when asked for by an override
394
    if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
395
396
397
398
399
      if event_override:
        wait_for_event = event_override
      else:
        wait_for_event = event_poll

400
      event = WaitForSocketCondition(sock, wait_for_event, timeout)
401
      if event is None:
402
        raise HttpSocketTimeout()
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428

      if (op == SOCKOP_RECV and
          event & (select.POLLNVAL | select.POLLHUP | select.POLLERR)):
        return ""

      if not event & wait_for_event:
        continue

    # Reset override
    event_override = 0

    try:
      try:
        if op == SOCKOP_SEND:
          return sock.send(arg1)

        elif op == SOCKOP_RECV:
          return sock.recv(arg1)

        elif op == SOCKOP_SHUTDOWN:
          if isinstance(sock, OpenSSL.SSL.ConnectionType):
            # PyOpenSSL's shutdown() doesn't take arguments
            return sock.shutdown()
          else:
            return sock.shutdown(arg1)

429
430
431
        elif op == SOCKOP_HANDSHAKE:
          return sock.do_handshake()

432
433
434
435
436
437
438
439
440
441
442
443
444
      except OpenSSL.SSL.WantWriteError:
        # OpenSSL wants to write, poll for POLLOUT
        event_override = select.POLLOUT
        continue

      except OpenSSL.SSL.WantReadError:
        # OpenSSL wants to read, poll for POLLIN
        event_override = select.POLLIN | select.POLLPRI
        continue

      except OpenSSL.SSL.WantX509LookupError:
        continue

445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
      except OpenSSL.SSL.ZeroReturnError, err:
        # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
        # occurs if a closure alert has occurred in the protocol, i.e. the
        # connection has been closed cleanly. Note that this does not
        # necessarily mean that the transport layer (e.g. a socket) has been
        # closed.
        if op == SOCKOP_SEND:
          # Can happen during a renegotiation
          raise HttpConnectionClosed(err.args)
        elif op == SOCKOP_RECV:
          return ""

        # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
        raise socket.error(err.args)

460
461
462
463
464
465
466
467
      except OpenSSL.SSL.SysCallError, err:
        if op == SOCKOP_SEND:
          # arg1 is the data when writing
          if err.args and err.args[0] == -1 and arg1 == "":
            # errors when writing empty strings are expected
            # and can be ignored
            return 0

468
469
        if err.args == (-1, _SSL_UNEXPECTED_EOF):
          if op == SOCKOP_RECV:
470
            return ""
471
472
473
474
          elif op == SOCKOP_HANDSHAKE:
            # Can happen if peer disconnects directly after the connection is
            # opened.
            raise HttpSessionHandshakeUnexpectedEOF(err.args)
475
476
477
478
479
480
481
482
483
484
485
486
487
488

        raise socket.error(err.args)

      except OpenSSL.SSL.Error, err:
        raise socket.error(err.args)

    except socket.error, err:
      if err.args and err.args[0] == errno.EAGAIN:
        # Ignore EAGAIN
        continue

      raise


489
def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
490
491
  """Closes the connection.

492
493
494
  @type sock: socket
  @param sock: Socket to be shut down
  @type close_timeout: float
Iustin Pop's avatar
Iustin Pop committed
495
496
  @param close_timeout: How long to wait for the peer to close
      the connection
497
498
499
  @type write_timeout: float
  @param write_timeout: Write timeout for shutdown
  @type msgreader: http.HttpMessageReader
Iustin Pop's avatar
Iustin Pop committed
500
501
  @param msgreader: Request message reader, used to determine whether
      peer should close connection
502
  @type force: bool
Iustin Pop's avatar
Iustin Pop committed
503
504
  @param force: Whether to forcibly close the connection without
      waiting for peer
505

506
507
508
509
510
511
  """
  #print msgreader.peer_will_close, force
  if msgreader and msgreader.peer_will_close and not force:
    # Wait for peer to close
    try:
      # Check whether it's actually closed
512
      if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
513
514
515
516
517
518
519
        return
    except (socket.error, HttpError, HttpSocketTimeout):
      # Ignore errors at this stage
      pass

  # Close the connection from our side
  try:
520
    # We don't care about the return value, see NOTES in SSL_shutdown(3).
521
    SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
522
523
524
525
                    write_timeout)
  except HttpSocketTimeout:
    raise HttpError("Timeout while shutting down connection")
  except socket.error, err:
526
527
528
    # Ignore ENOTCONN
    if not (err.args and err.args[0] == errno.ENOTCONN):
      raise HttpError("Error while shutting down connection: %s" % err)
529
530


531
def Handshake(sock, write_timeout):
532
533
534
535
536
537
538
539
540
  """Shakes peer's hands.

  @type sock: socket
  @param sock: Socket to be shut down
  @type write_timeout: float
  @param write_timeout: Write timeout for handshake

  """
  try:
541
    return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
542
543
544
545
546
547
  except HttpSocketTimeout:
    raise HttpError("Timeout during SSL handshake")
  except socket.error, err:
    raise HttpError("Error in SSL handshake: %s" % err)


548
549
550
551
552
553
554
555
556
557
558
559
560
def InitSsl():
  """Initializes the SSL infrastructure.

  This function is idempotent.

  """
  if not OpenSSL.rand.status():
    raise EnvironmentError("OpenSSL could not collect enough entropy"
                           " for the PRNG")

  # TODO: Maybe add some additional seeding for OpenSSL's PRNG


561
562
563
564
565
566
567
568
569
570
class HttpSslParams(object):
  """Data class for SSL key and certificate.

  """
  def __init__(self, ssl_key_path, ssl_cert_path):
    """Initializes this class.

    @type ssl_key_path: string
    @param ssl_key_path: Path to file containing SSL key in PEM format
    @type ssl_cert_path: string
Iustin Pop's avatar
Iustin Pop committed
571
572
    @param ssl_cert_path: Path to file containing SSL certificate
        in PEM format
573
574

    """
575
576
    self.ssl_key_pem = utils.ReadFile(ssl_key_path)
    self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
577

578
579
580
581
582
583
584
  def GetKey(self):
    return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
                                          self.ssl_key_pem)

  def GetCertificate(self):
    return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                           self.ssl_cert_pem)
585
586


587
class HttpBase(object):
588
589
590
591
  """Base class for HTTP server and client.

  """
  def __init__(self):
592
    self.using_ssl = None
593
    self._ssl_params = None
594
595
    self._ssl_key = None
    self._ssl_cert = None
596

597
  def _CreateSocket(self, ssl_params, ssl_verify_peer):
598
599
    """Creates a TCP socket and initializes SSL if needed.

600
601
    @type ssl_params: HttpSslParams
    @param ssl_params: SSL key and certificate
602
    @type ssl_verify_peer: bool
Iustin Pop's avatar
Iustin Pop committed
603
604
    @param ssl_verify_peer: Whether to require client certificate
        and compare it with our certificate
605
606

    """
607
608
    self._ssl_params = ssl_params

609
610
611
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

    # Should we enable SSL?
612
    self.using_ssl = ssl_params is not None
613

614
    if not self.using_ssl:
615
616
      return sock

617
618
619
    self._ssl_key = ssl_params.GetKey()
    self._ssl_cert = ssl_params.GetCertificate()

620
621
622
    ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
    ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)

623
624
    ctx.use_privatekey(self._ssl_key)
    ctx.use_certificate(self._ssl_cert)
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
    ctx.check_privatekey()

    if ssl_verify_peer:
      ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
                     OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
                     self._SSLVerifyCallback)

    return OpenSSL.SSL.Connection(ctx, sock)

  def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
    """Verify the certificate provided by the peer

    We only compare fingerprints. The client must use the same certificate as
    we do on our side.

    """
641
    assert self._ssl_params, "SSL not initialized"
642

643
644
    return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
            self._ssl_cert.digest("md5") == cert.digest("md5"))
645
646


647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
class HttpMessage(object):
  """Data structure for HTTP message.

  """
  def __init__(self):
    self.start_line = None
    self.headers = None
    self.body = None
    self.decoded_body = None


class HttpClientToServerStartLine(object):
  """Data structure for HTTP request start line.

  """
  def __init__(self, method, path, version):
    self.method = method
    self.path = path
    self.version = version

  def __str__(self):
    return "%s %s %s" % (self.method, self.path, self.version)


class HttpServerToClientStartLine(object):
  """Data structure for HTTP response start line.

  """
  def __init__(self, version, code, reason):
    self.version = version
    self.code = code
    self.reason = reason

  def __str__(self):
    return "%s %s %s" % (self.version, self.code, self.reason)


class HttpMessageWriter(object):
  """Writes an HTTP message to a socket.

  """
  def __init__(self, sock, msg, write_timeout):
689
690
691
692
693
694
695
696
697
698
    """Initializes this class and writes an HTTP message to a socket.

    @type sock: socket
    @param sock: Socket to be written to
    @type msg: http.HttpMessage
    @param msg: HTTP message to be written
    @type write_timeout: float
    @param write_timeout: Write timeout for socket

    """
699
700
701
702
703
704
    self._msg = msg

    self._PrepareMessage()

    buf = self._FormatMessage()

705
706
707
708
    pos = 0
    end = len(buf)
    while pos < end:
      # Send only SOCK_BUF_SIZE bytes at a time
709
      data = buf[pos:(pos + SOCK_BUF_SIZE)]
710

711
      sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
712
713

      # Remove sent bytes
714
      pos += sent
715

716
    assert pos == end, "Message wasn't sent completely"
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756

  def _PrepareMessage(self):
    """Prepares the HTTP message by setting mandatory headers.

    """
    # RFC2616, section 4.3: "The presence of a message-body in a request is
    # signaled by the inclusion of a Content-Length or Transfer-Encoding header
    # field in the request's message-headers."
    if self._msg.body:
      self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)

  def _FormatMessage(self):
    """Serializes the HTTP message into a string.

    """
    buf = StringIO()

    # Add start line
    buf.write(str(self._msg.start_line))
    buf.write("\r\n")

    # Add headers
    if self._msg.start_line.version != HTTP_0_9:
      for name, value in self._msg.headers.iteritems():
        buf.write("%s: %s\r\n" % (name, value))

    buf.write("\r\n")

    # Add message body if needed
    if self.HasMessageBody():
      buf.write(self._msg.body)

    elif self._msg.body:
      logging.warning("Ignoring message body")

    return buf.getvalue()

  def HasMessageBody(self):
    """Checks whether the HTTP message contains a body.

Michael Hanselmann's avatar
Michael Hanselmann committed
757
    Can be overridden by subclasses.
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777

    """
    return bool(self._msg.body)


class HttpMessageReader(object):
  """Reads HTTP message from socket.

  """
  # Length limits
  START_LINE_LENGTH_MAX = None
  HEADER_LENGTH_MAX = None

  # Parser state machine
  PS_START_LINE = "start-line"
  PS_HEADERS = "headers"
  PS_BODY = "entity-body"
  PS_COMPLETE = "complete"

  def __init__(self, sock, msg, read_timeout):
778
779
780
781
782
783
784
785
786
787
    """Reads an HTTP message from a socket.

    @type sock: socket
    @param sock: Socket to be read from
    @type msg: http.HttpMessage
    @param msg: Object for the read message
    @type read_timeout: float
    @param read_timeout: Read timeout for socket

    """
788
789
790
791
792
793
794
795
796
797
798
799
800
    self.sock = sock
    self.msg = msg

    self.start_line_buffer = None
    self.header_buffer = StringIO()
    self.body_buffer = StringIO()
    self.parser_status = self.PS_START_LINE
    self.content_length = None
    self.peer_will_close = None

    buf = ""
    eof = False
    while self.parser_status != self.PS_COMPLETE:
801
802
      # TODO: Don't read more than necessary (Content-Length), otherwise
      # data might be lost and/or an error could occur
803
      data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847

      if data:
        buf += data
      else:
        eof = True

      # Do some parsing and error checking while more data arrives
      buf = self._ContinueParsing(buf, eof)

      # Must be done only after the buffer has been evaluated
      # TODO: Connection-length < len(data read) and connection closed
      if (eof and
          self.parser_status in (self.PS_START_LINE,
                                 self.PS_HEADERS)):
        raise HttpError("Connection closed prematurely")

    # Parse rest
    buf = self._ContinueParsing(buf, True)

    assert self.parser_status == self.PS_COMPLETE
    assert not buf, "Parser didn't read full response"

    msg.body = self.body_buffer.getvalue()

    # TODO: Content-type, error handling
    if msg.body:
      msg.decoded_body = HttpJsonConverter().Decode(msg.body)
    else:
      msg.decoded_body = None

    if msg.decoded_body:
      logging.debug("Message body: %s", msg.decoded_body)

  def _ContinueParsing(self, buf, eof):
    """Main function for HTTP message state machine.

    @type buf: string
    @param buf: Receive buffer
    @type eof: bool
    @param eof: Whether we've reached EOF on the socket
    @rtype: string
    @return: Updated receive buffer

    """
848
    # TODO: Use offset instead of slicing when possible
849
850
851
852
853
854
855
856
857
858
859
    if self.parser_status == self.PS_START_LINE:
      # Expect start line
      while True:
        idx = buf.find("\r\n")

        # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
        # ignore any empty line(s) received where a Request-Line is expected.
        # In other words, if the server is reading the protocol stream at the
        # beginning of a message and receives a CRLF first, it should ignore
        # the CRLF."
        if idx == 0:
860
          # TODO: Limit number of CRLFs/empty lines for safety?
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
          buf = buf[:2]
          continue

        if idx > 0:
          self.start_line_buffer = buf[:idx]

          self._CheckStartLineLength(len(self.start_line_buffer))

          # Remove status line, including CRLF
          buf = buf[idx + 2:]

          self.msg.start_line = self.ParseStartLine(self.start_line_buffer)

          self.parser_status = self.PS_HEADERS
        else:
          # Check whether incoming data is getting too large, otherwise we just
          # fill our read buffer.
          self._CheckStartLineLength(len(buf))

        break

    # TODO: Handle messages without headers
    if self.parser_status == self.PS_HEADERS:
      # Wait for header end
      idx = buf.find("\r\n\r\n")
      if idx >= 0:
        self.header_buffer.write(buf[:idx + 2])

        self._CheckHeaderLength(self.header_buffer.tell())

        # Remove headers, including CRLF
        buf = buf[idx + 4:]

        self._ParseHeaders()

        self.parser_status = self.PS_BODY
      else:
        # Check whether incoming data is getting too large, otherwise we just
        # fill our read buffer.
        self._CheckHeaderLength(len(buf))

    if self.parser_status == self.PS_BODY:
      # TODO: Implement max size for body_buffer
      self.body_buffer.write(buf)
      buf = ""

      # Check whether we've read everything
      #
      # RFC2616, section 4.4: "When a message-body is included with a message,
      # the transfer-length of that body is determined by one of the following
      # [...] 5. By the server closing the connection. (Closing the connection
      # cannot be used to indicate the end of a request body, since that would
      # leave no possibility for the server to send back a response.)"
914
915
      #
      # TODO: Error when buffer length > Content-Length header
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
      if (eof or
          self.content_length is None or
          (self.content_length is not None and
           self.body_buffer.tell() >= self.content_length)):
        self.parser_status = self.PS_COMPLETE

    return buf

  def _CheckStartLineLength(self, length):
    """Limits the start line buffer size.

    @type length: int
    @param length: Buffer size

    """
    if (self.START_LINE_LENGTH_MAX is not None and
        length > self.START_LINE_LENGTH_MAX):
      raise HttpError("Start line longer than %d chars" %
                       self.START_LINE_LENGTH_MAX)

  def _CheckHeaderLength(self, length):
    """Limits the header buffer size.

    @type length: int
    @param length: Buffer size

    """
    if (self.HEADER_LENGTH_MAX is not None and
        length > self.HEADER_LENGTH_MAX):
      raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)

  def ParseStartLine(self, start_line):
    """Parses the start line of a message.

Michael Hanselmann's avatar
Michael Hanselmann committed
950
    Must be overridden by subclass.
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001

    @type start_line: string
    @param start_line: Start line string

    """
    raise NotImplementedError()

  def _WillPeerCloseConnection(self):
    """Evaluate whether peer will close the connection.

    @rtype: bool
    @return: Whether peer will close the connection

    """
    # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
    # for the sender to signal that the connection will be closed after
    # completion of the response. For example,
    #
    #        Connection: close
    #
    # in either the request or the response header fields indicates that the
    # connection SHOULD NOT be considered `persistent' (section 8.1) after the
    # current request/response is complete."

    hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
    if hdr_connection:
      hdr_connection = hdr_connection.lower()

    # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
    if self.msg.start_line.version == HTTP_1_1:
      return (hdr_connection and "close" in hdr_connection)

    # Some HTTP/1.0 implementations have support for persistent connections,
    # using rules different than HTTP/1.1.

    # For older HTTP, Keep-Alive indicates persistent connection.
    if self.msg.headers.get(HTTP_KEEP_ALIVE):
      return False

    # At least Akamai returns a "Connection: Keep-Alive" header, which was
    # supposed to be sent by the client.
    if hdr_connection and "keep-alive" in hdr_connection:
      return False

    return True

  def _ParseHeaders(self):
    """Parses the headers.

    This function also adjusts internal variables based on header values.

Iustin Pop's avatar
Iustin Pop committed
1002
    RFC2616, section 4.3: The presence of a message-body in a request is
1003
    signaled by the inclusion of a Content-Length or Transfer-Encoding header
Iustin Pop's avatar
Iustin Pop committed
1004
    field in the request's message-headers.
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026

    """
    # Parse headers
    self.header_buffer.seek(0, 0)
    self.msg.headers = mimetools.Message(self.header_buffer, 0)

    self.peer_will_close = self._WillPeerCloseConnection()

    # Do we have a Content-Length header?
    hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
    if hdr_content_length:
      try:
        self.content_length = int(hdr_content_length)
      except ValueError:
        self.content_length = None
      if self.content_length is not None and self.content_length < 0:
        self.content_length = None

    # if the connection remains open and a content-length was not provided,
    # then assume that the connection WILL close.
    if self.content_length is None:
      self.peer_will_close = True