Noded: log the certificate and digest on noded startup
[ganeti-github.git] / lib / http / __init__.py
1 #
2 #
3
4 # Copyright (C) 2007, 2008, 2010, 2012 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """HTTP module.
31
32 """
33
34 import logging
35 import mimetools
36 import OpenSSL
37 import select
38 import socket
39 import errno
40
41 from cStringIO import StringIO
42
43 from ganeti import constants
44 from ganeti import utils
45
46
47 HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION
48
49 HTTP_OK = 200
50 HTTP_NO_CONTENT = 204
51 HTTP_NOT_MODIFIED = 304
52
53 HTTP_0_9 = "HTTP/0.9"
54 HTTP_1_0 = "HTTP/1.0"
55 HTTP_1_1 = "HTTP/1.1"
56
57 HTTP_GET = "GET"
58 HTTP_HEAD = "HEAD"
59 HTTP_POST = "POST"
60 HTTP_PUT = "PUT"
61 HTTP_DELETE = "DELETE"
62
63 HTTP_ETAG = "ETag"
64 HTTP_HOST = "Host"
65 HTTP_SERVER = "Server"
66 HTTP_DATE = "Date"
67 HTTP_USER_AGENT = "User-Agent"
68 HTTP_CONTENT_TYPE = "Content-Type"
69 HTTP_CONTENT_LENGTH = "Content-Length"
70 HTTP_CONNECTION = "Connection"
71 HTTP_KEEP_ALIVE = "Keep-Alive"
72 HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
73 HTTP_AUTHORIZATION = "Authorization"
74 HTTP_AUTHENTICATION_INFO = "Authentication-Info"
75 HTTP_ALLOW = "Allow"
76
77 HTTP_APP_OCTET_STREAM = "application/octet-stream"
78 HTTP_APP_JSON = "application/json"
79
80 _SSL_UNEXPECTED_EOF = "Unexpected EOF"
81
82 # Socket operations
83 (SOCKOP_SEND,
84 SOCKOP_RECV,
85 SOCKOP_SHUTDOWN,
86 SOCKOP_HANDSHAKE) = range(4)
87
88 # send/receive quantum
89 SOCK_BUF_SIZE = 32768
90
91
92 class HttpError(Exception):
93 """Internal exception for HTTP errors.
94
95 This should only be used for internal error reporting.
96
97 """
98
99
100 class HttpConnectionClosed(Exception):
101 """Internal exception for a closed connection.
102
103 This should only be used for internal error reporting. Only use
104 it if there's no other way to report this condition.
105
106 """
107
108
109 class HttpSessionHandshakeUnexpectedEOF(HttpError):
110 """Internal exception for errors during SSL handshake.
111
112 This should only be used for internal error reporting.
113
114 """
115
116
117 class HttpSocketTimeout(Exception):
118 """Internal exception for socket timeouts.
119
120 This should only be used for internal error reporting.
121
122 """
123
124
125 class HttpException(Exception):
126 code = None
127 message = None
128
129 def __init__(self, message=None, headers=None):
130 Exception.__init__(self)
131 self.message = message
132 self.headers = headers
133
134
135 class HttpBadRequest(HttpException):
136 """400 Bad Request
137
138 RFC2616, 10.4.1: The request could not be understood by the server
139 due to malformed syntax. The client SHOULD NOT repeat the request
140 without modifications.
141
142 """
143 code = 400
144
145
146 class HttpUnauthorized(HttpException):
147 """401 Unauthorized
148
149 RFC2616, section 10.4.2: The request requires user
150 authentication. The response MUST include a WWW-Authenticate header
151 field (section 14.47) containing a challenge applicable to the
152 requested resource.
153
154 """
155 code = 401
156
157
158 class HttpForbidden(HttpException):
159 """403 Forbidden
160
161 RFC2616, 10.4.4: The server understood the request, but is refusing
162 to fulfill it. Authorization will not help and the request SHOULD
163 NOT be repeated.
164
165 """
166 code = 403
167
168
169 class HttpNotFound(HttpException):
170 """404 Not Found
171
172 RFC2616, 10.4.5: The server has not found anything matching the
173 Request-URI. No indication is given of whether the condition is
174 temporary or permanent.
175
176 """
177 code = 404
178
179
180 class HttpMethodNotAllowed(HttpException):
181 """405 Method Not Allowed
182
183 RFC2616, 10.4.6: The method specified in the Request-Line is not
184 allowed for the resource identified by the Request-URI. The response
185 MUST include an Allow header containing a list of valid methods for
186 the requested resource.
187
188 """
189 code = 405
190
191
192 class HttpNotAcceptable(HttpException):
193 """406 Not Acceptable
194
195 RFC2616, 10.4.7: The resource identified by the request is only capable of
196 generating response entities which have content characteristics not
197 acceptable according to the accept headers sent in the request.
198
199 """
200 code = 406
201
202
203 class HttpRequestTimeout(HttpException):
204 """408 Request Timeout
205
206 RFC2616, 10.4.9: The client did not produce a request within the
207 time that the server was prepared to wait. The client MAY repeat the
208 request without modifications at any later time.
209
210 """
211 code = 408
212
213
214 class HttpConflict(HttpException):
215 """409 Conflict
216
217 RFC2616, 10.4.10: The request could not be completed due to a
218 conflict with the current state of the resource. This code is only
219 allowed in situations where it is expected that the user might be
220 able to resolve the conflict and resubmit the request.
221
222 """
223 code = 409
224
225
226 class HttpGone(HttpException):
227 """410 Gone
228
229 RFC2616, 10.4.11: The requested resource is no longer available at
230 the server and no forwarding address is known. This condition is
231 expected to be considered permanent.
232
233 """
234 code = 410
235
236
237 class HttpLengthRequired(HttpException):
238 """411 Length Required
239
240 RFC2616, 10.4.12: The server refuses to accept the request without a
241 defined Content-Length. The client MAY repeat the request if it adds
242 a valid Content-Length header field containing the length of the
243 message-body in the request message.
244
245 """
246 code = 411
247
248
249 class HttpPreconditionFailed(HttpException):
250 """412 Precondition Failed
251
252 RFC2616, 10.4.13: The precondition given in one or more of the
253 request-header fields evaluated to false when it was tested on the
254 server.
255
256 """
257 code = 412
258
259
260 class HttpUnsupportedMediaType(HttpException):
261 """415 Unsupported Media Type
262
263 RFC2616, 10.4.16: The server is refusing to service the request because the
264 entity of the request is in a format not supported by the requested resource
265 for the requested method.
266
267 """
268 code = 415
269
270
271 class HttpInternalServerError(HttpException):
272 """500 Internal Server Error
273
274 RFC2616, 10.5.1: The server encountered an unexpected condition
275 which prevented it from fulfilling the request.
276
277 """
278 code = 500
279
280
281 class HttpNotImplemented(HttpException):
282 """501 Not Implemented
283
284 RFC2616, 10.5.2: The server does not support the functionality
285 required to fulfill the request.
286
287 """
288 code = 501
289
290
291 class HttpBadGateway(HttpException):
292 """502 Bad Gateway
293
294 RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
295 received an invalid response from the upstream server it accessed in
296 attempting to fulfill the request.
297
298 """
299 code = 502
300
301
302 class HttpServiceUnavailable(HttpException):
303 """503 Service Unavailable
304
305 RFC2616, 10.5.4: The server is currently unable to handle the
306 request due to a temporary overloading or maintenance of the server.
307
308 """
309 code = 503
310
311
312 class HttpGatewayTimeout(HttpException):
313 """504 Gateway Timeout
314
315 RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
316 not receive a timely response from the upstream server specified by
317 the URI (e.g. HTTP, FTP, LDAP) or some other auxiliary server
318 (e.g. DNS) it needed to access in attempting to complete the
319 request.
320
321 """
322 code = 504
323
324
325 class HttpVersionNotSupported(HttpException):
326 """505 HTTP Version Not Supported
327
328 RFC2616, 10.5.6: The server does not support, or refuses to support,
329 the HTTP protocol version that was used in the request message.
330
331 """
332 code = 505
333
334
335 def ParseHeaders(buf):
336 """Parses HTTP headers.
337
338 @note: This is just a trivial wrapper around C{mimetools.Message}
339
340 """
341 return mimetools.Message(buf, 0)
342
343
344 def SocketOperation(sock, op, arg1, timeout):
345 """Wrapper around socket functions.
346
347 This function abstracts error handling for socket operations, especially
348 for the complicated interaction with OpenSSL.
349
350 @type sock: socket
351 @param sock: Socket for the operation
352 @type op: int
353 @param op: Operation to execute (SOCKOP_* constants)
354 @type arg1: any
355 @param arg1: Parameter for function (if needed)
356 @type timeout: None or float
357 @param timeout: Timeout in seconds or None
358 @return: Return value of socket function
359
360 """
361 # TODO: event_poll/event_check/override
362 if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
363 event_poll = select.POLLOUT
364
365 elif op == SOCKOP_RECV:
366 event_poll = select.POLLIN
367
368 elif op == SOCKOP_SHUTDOWN:
369 event_poll = None
370
371 # The timeout is only used when OpenSSL requests polling for a condition.
372 # It is not advisable to have no timeout for shutdown.
373 assert timeout
374
375 else:
376 raise AssertionError("Invalid socket operation")
377
378 # Handshake is only supported by SSL sockets
379 if (op == SOCKOP_HANDSHAKE and
380 not isinstance(sock, OpenSSL.SSL.ConnectionType)):
381 return
382
383 # No override by default
384 event_override = 0
385
386 while True:
387 # Poll only for certain operations and when asked for by an override
388 if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
389 if event_override:
390 wait_for_event = event_override
391 else:
392 wait_for_event = event_poll
393
394 event = utils.WaitForFdCondition(sock, wait_for_event, timeout)
395 if event is None:
396 raise HttpSocketTimeout()
397
398 if event & (select.POLLNVAL | select.POLLHUP | select.POLLERR):
399 # Let the socket functions handle these
400 break
401
402 if not event & wait_for_event:
403 continue
404
405 # Reset override
406 event_override = 0
407
408 try:
409 try:
410 if op == SOCKOP_SEND:
411 return sock.send(arg1)
412
413 elif op == SOCKOP_RECV:
414 return sock.recv(arg1)
415
416 elif op == SOCKOP_SHUTDOWN:
417 if isinstance(sock, OpenSSL.SSL.ConnectionType):
418 # PyOpenSSL's shutdown() doesn't take arguments
419 return sock.shutdown()
420 else:
421 return sock.shutdown(arg1)
422
423 elif op == SOCKOP_HANDSHAKE:
424 return sock.do_handshake()
425
426 except OpenSSL.SSL.WantWriteError:
427 # OpenSSL wants to write, poll for POLLOUT
428 event_override = select.POLLOUT
429 continue
430
431 except OpenSSL.SSL.WantReadError:
432 # OpenSSL wants to read, poll for POLLIN
433 event_override = select.POLLIN | select.POLLPRI
434 continue
435
436 except OpenSSL.SSL.WantX509LookupError:
437 continue
438
439 except OpenSSL.SSL.ZeroReturnError, err:
440 # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
441 # occurs if a closure alert has occurred in the protocol, i.e. the
442 # connection has been closed cleanly. Note that this does not
443 # necessarily mean that the transport layer (e.g. a socket) has been
444 # closed.
445 if op == SOCKOP_SEND:
446 # Can happen during a renegotiation
447 raise HttpConnectionClosed(err.args)
448 elif op == SOCKOP_RECV:
449 return ""
450
451 # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
452 raise socket.error(err.args)
453
454 except OpenSSL.SSL.SysCallError, err:
455 if op == SOCKOP_SEND:
456 # arg1 is the data when writing
457 if err.args and err.args[0] == -1 and arg1 == "":
458 # errors when writing empty strings are expected
459 # and can be ignored
460 return 0
461
462 if err.args == (-1, _SSL_UNEXPECTED_EOF):
463 if op == SOCKOP_RECV:
464 return ""
465 elif op == SOCKOP_HANDSHAKE:
466 # Can happen if peer disconnects directly after the connection is
467 # opened.
468 raise HttpSessionHandshakeUnexpectedEOF(err.args)
469
470 raise socket.error(err.args)
471
472 except OpenSSL.SSL.Error, err:
473 raise socket.error(err.args)
474
475 except socket.error, err:
476 if err.args and err.args[0] == errno.EAGAIN:
477 # Ignore EAGAIN
478 continue
479
480 raise
481
482
483 def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
484 """Closes the connection.
485
486 @type sock: socket
487 @param sock: Socket to be shut down
488 @type close_timeout: float
489 @param close_timeout: How long to wait for the peer to close
490 the connection
491 @type write_timeout: float
492 @param write_timeout: Write timeout for shutdown
493 @type msgreader: http.HttpMessageReader
494 @param msgreader: Request message reader, used to determine whether
495 peer should close connection
496 @type force: bool
497 @param force: Whether to forcibly close the connection without
498 waiting for peer
499
500 """
501 #print msgreader.peer_will_close, force
502 if msgreader and msgreader.peer_will_close and not force:
503 # Wait for peer to close
504 try:
505 # Check whether it's actually closed
506 if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
507 return
508 except (socket.error, HttpError, HttpSocketTimeout):
509 # Ignore errors at this stage
510 pass
511
512 # Close the connection from our side
513 try:
514 # We don't care about the return value, see NOTES in SSL_shutdown(3).
515 SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
516 write_timeout)
517 except HttpSocketTimeout:
518 raise HttpError("Timeout while shutting down connection")
519 except socket.error, err:
520 # Ignore ENOTCONN
521 if not (err.args and err.args[0] == errno.ENOTCONN):
522 raise HttpError("Error while shutting down connection: %s" % err)
523
524
525 def Handshake(sock, write_timeout):
526 """Shakes peer's hands.
527
528 @type sock: socket
529 @param sock: Socket to be shut down
530 @type write_timeout: float
531 @param write_timeout: Write timeout for handshake
532
533 """
534 try:
535 return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
536 except HttpSocketTimeout:
537 raise HttpError("Timeout during SSL handshake")
538 except socket.error, err:
539 raise HttpError("Error in SSL handshake: %s" % err)
540
541
542 class HttpSslParams(object):
543 """Data class for SSL key and certificate.
544
545 """
546 def __init__(self, ssl_key_path, ssl_cert_path):
547 """Initializes this class.
548
549 @type ssl_key_path: string
550 @param ssl_key_path: Path to file containing SSL key in PEM format
551 @type ssl_cert_path: string
552 @param ssl_cert_path: Path to file containing SSL certificate
553 in PEM format
554
555 """
556 self.ssl_key_pem = utils.ReadFile(ssl_key_path)
557 self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
558 self.ssl_cert_path = ssl_cert_path
559
560 def GetCertificateDigest(self):
561 return utils.GetCertificateDigest(cert_filename=self.ssl_cert_path)
562
563 def GetCertificateFilename(self):
564 return self.ssl_cert_path
565
566 def GetKey(self):
567 return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
568 self.ssl_key_pem)
569
570 def GetCertificate(self):
571 return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
572 self.ssl_cert_pem)
573
574
575 class HttpBase(object):
576 """Base class for HTTP server and client.
577
578 """
579 def __init__(self):
580 self.using_ssl = None
581 self._ssl_params = None
582 self._ssl_key = None
583 self._ssl_cert = None
584
585 def _CreateSocket(self, ssl_params, ssl_verify_peer, family,
586 ssl_verify_callback):
587 """Creates a TCP socket and initializes SSL if needed.
588
589 @type ssl_params: HttpSslParams
590 @param ssl_params: SSL key and certificate
591 @type ssl_verify_peer: bool
592 @param ssl_verify_peer: Whether to require client certificate
593 and compare it with our certificate
594 @type family: int
595 @param family: socket.AF_INET | socket.AF_INET6
596
597 """
598 assert family in (socket.AF_INET, socket.AF_INET6)
599 if ssl_verify_peer:
600 assert ssl_verify_callback is not None
601
602 self._ssl_params = ssl_params
603 sock = socket.socket(family, socket.SOCK_STREAM)
604
605 # Should we enable SSL?
606 self.using_ssl = ssl_params is not None
607
608 if not self.using_ssl:
609 return sock
610
611 self._ssl_key = ssl_params.GetKey()
612 self._ssl_cert = ssl_params.GetCertificate()
613
614 ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
615 ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)
616
617 ciphers = self.GetSslCiphers()
618 logging.debug("Setting SSL cipher string %s", ciphers)
619 ctx.set_cipher_list(ciphers)
620
621 ctx.use_privatekey(self._ssl_key)
622 ctx.use_certificate(self._ssl_cert)
623 ctx.check_privatekey()
624 logging.debug("Certificate digest: %s.", ssl_params.GetCertificateDigest())
625 logging.debug("Certificate filename: %s.",
626 ssl_params.GetCertificateFilename())
627
628 if ssl_verify_peer:
629 ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
630 OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
631 ssl_verify_callback)
632
633 # Also add our certificate as a trusted CA to be sent to the client.
634 # This is required at least for GnuTLS clients to work.
635 try:
636 # This will fail for PyOpenssl versions before 0.10
637 ctx.add_client_ca(self._ssl_cert)
638 except AttributeError:
639 # Fall back to letting OpenSSL read the certificate file directly.
640 ctx.load_client_ca(ssl_params.ssl_cert_path)
641
642 return OpenSSL.SSL.Connection(ctx, sock)
643
644 def GetSslCiphers(self): # pylint: disable=R0201
645 """Returns the ciphers string for SSL.
646
647 """
648 return constants.OPENSSL_CIPHERS
649
650 def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
651 """Verify the certificate provided by the peer
652
653 We only compare fingerprints. The client must use the same certificate as
654 we do on our side.
655
656 """
657 # some parameters are unused, but this is the API
658 # pylint: disable=W0613
659 assert self._ssl_params, "SSL not initialized"
660
661 return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
662 self._ssl_cert.digest("md5") == cert.digest("md5"))
663
664
665 class HttpMessage(object):
666 """Data structure for HTTP message.
667
668 """
669 def __init__(self):
670 self.start_line = None
671 self.headers = None
672 self.body = None
673
674
675 class HttpClientToServerStartLine(object):
676 """Data structure for HTTP request start line.
677
678 """
679 def __init__(self, method, path, version):
680 self.method = method
681 self.path = path
682 self.version = version
683
684 def __str__(self):
685 return "%s %s %s" % (self.method, self.path, self.version)
686
687
688 class HttpServerToClientStartLine(object):
689 """Data structure for HTTP response start line.
690
691 """
692 def __init__(self, version, code, reason):
693 self.version = version
694 self.code = code
695 self.reason = reason
696
697 def __str__(self):
698 return "%s %s %s" % (self.version, self.code, self.reason)
699
700
701 class HttpMessageWriter(object):
702 """Writes an HTTP message to a socket.
703
704 """
705 def __init__(self, sock, msg, write_timeout):
706 """Initializes this class and writes an HTTP message to a socket.
707
708 @type sock: socket
709 @param sock: Socket to be written to
710 @type msg: http.HttpMessage
711 @param msg: HTTP message to be written
712 @type write_timeout: float
713 @param write_timeout: Write timeout for socket
714
715 """
716 self._msg = msg
717
718 self._PrepareMessage()
719
720 buf = self._FormatMessage()
721
722 pos = 0
723 end = len(buf)
724 while pos < end:
725 # Send only SOCK_BUF_SIZE bytes at a time
726 data = buf[pos:(pos + SOCK_BUF_SIZE)]
727
728 sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
729
730 # Remove sent bytes
731 pos += sent
732
733 assert pos == end, "Message wasn't sent completely"
734
735 def _PrepareMessage(self):
736 """Prepares the HTTP message by setting mandatory headers.
737
738 """
739 # RFC2616, section 4.3: "The presence of a message-body in a request is
740 # signaled by the inclusion of a Content-Length or Transfer-Encoding header
741 # field in the request's message-headers."
742 if self._msg.body:
743 self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)
744
745 def _FormatMessage(self):
746 """Serializes the HTTP message into a string.
747
748 """
749 buf = StringIO()
750
751 # Add start line
752 buf.write(str(self._msg.start_line))
753 buf.write("\r\n")
754
755 # Add headers
756 if self._msg.start_line.version != HTTP_0_9:
757 for name, value in self._msg.headers.iteritems():
758 buf.write("%s: %s\r\n" % (name, value))
759
760 buf.write("\r\n")
761
762 # Add message body if needed
763 if self.HasMessageBody():
764 buf.write(self._msg.body)
765
766 elif self._msg.body:
767 logging.warning("Ignoring message body")
768
769 return buf.getvalue()
770
771 def HasMessageBody(self):
772 """Checks whether the HTTP message contains a body.
773
774 Can be overridden by subclasses.
775
776 """
777 return bool(self._msg.body)
778
779
780 class HttpMessageReader(object):
781 """Reads HTTP message from socket.
782
783 """
784 # Length limits
785 START_LINE_LENGTH_MAX = None
786 HEADER_LENGTH_MAX = None
787
788 # Parser state machine
789 PS_START_LINE = "start-line"
790 PS_HEADERS = "headers"
791 PS_BODY = "entity-body"
792 PS_COMPLETE = "complete"
793
794 def __init__(self, sock, msg, read_timeout):
795 """Reads an HTTP message from a socket.
796
797 @type sock: socket
798 @param sock: Socket to be read from
799 @type msg: http.HttpMessage
800 @param msg: Object for the read message
801 @type read_timeout: float
802 @param read_timeout: Read timeout for socket
803
804 """
805 self.sock = sock
806 self.msg = msg
807
808 self.start_line_buffer = None
809 self.header_buffer = StringIO()
810 self.body_buffer = StringIO()
811 self.parser_status = self.PS_START_LINE
812 self.content_length = None
813 self.peer_will_close = None
814
815 buf = ""
816 eof = False
817 while self.parser_status != self.PS_COMPLETE:
818 # TODO: Don't read more than necessary (Content-Length), otherwise
819 # data might be lost and/or an error could occur
820 data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
821
822 if data:
823 buf += data
824 else:
825 eof = True
826
827 # Do some parsing and error checking while more data arrives
828 buf = self._ContinueParsing(buf, eof)
829
830 # Must be done only after the buffer has been evaluated
831 # TODO: Content-Length < len(data read) and connection closed
832 if (eof and
833 self.parser_status in (self.PS_START_LINE,
834 self.PS_HEADERS)):
835 raise HttpError("Connection closed prematurely")
836
837 # Parse rest
838 buf = self._ContinueParsing(buf, True)
839
840 assert self.parser_status == self.PS_COMPLETE
841 assert not buf, "Parser didn't read full response"
842
843 # Body is complete
844 msg.body = self.body_buffer.getvalue()
845
846 def _ContinueParsing(self, buf, eof):
847 """Main function for HTTP message state machine.
848
849 @type buf: string
850 @param buf: Receive buffer
851 @type eof: bool
852 @param eof: Whether we've reached EOF on the socket
853 @rtype: string
854 @return: Updated receive buffer
855
856 """
857 # TODO: Use offset instead of slicing when possible
858 if self.parser_status == self.PS_START_LINE:
859 # Expect start line
860 while True:
861 idx = buf.find("\r\n")
862
863 # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
864 # ignore any empty line(s) received where a Request-Line is expected.
865 # In other words, if the server is reading the protocol stream at the
866 # beginning of a message and receives a CRLF first, it should ignore
867 # the CRLF."
868 if idx == 0:
869 # TODO: Limit number of CRLFs/empty lines for safety?
870 buf = buf[2:]
871 continue
872
873 if idx > 0:
874 self.start_line_buffer = buf[:idx]
875
876 self._CheckStartLineLength(len(self.start_line_buffer))
877
878 # Remove status line, including CRLF
879 buf = buf[idx + 2:]
880
881 self.msg.start_line = self.ParseStartLine(self.start_line_buffer)
882
883 self.parser_status = self.PS_HEADERS
884 else:
885 # Check whether incoming data is getting too large, otherwise we just
886 # fill our read buffer.
887 self._CheckStartLineLength(len(buf))
888
889 break
890
891 # TODO: Handle messages without headers
892 if self.parser_status == self.PS_HEADERS:
893 # Wait for header end
894 idx = buf.find("\r\n\r\n")
895 if idx >= 0:
896 self.header_buffer.write(buf[:idx + 2])
897
898 self._CheckHeaderLength(self.header_buffer.tell())
899
900 # Remove headers, including CRLF
901 buf = buf[idx + 4:]
902
903 self._ParseHeaders()
904
905 self.parser_status = self.PS_BODY
906 else:
907 # Check whether incoming data is getting too large, otherwise we just
908 # fill our read buffer.
909 self._CheckHeaderLength(len(buf))
910
911 if self.parser_status == self.PS_BODY:
912 # TODO: Implement max size for body_buffer
913 self.body_buffer.write(buf)
914 buf = ""
915
916 # Check whether we've read everything
917 #
918 # RFC2616, section 4.4: "When a message-body is included with a message,
919 # the transfer-length of that body is determined by one of the following
920 # [...] 5. By the server closing the connection. (Closing the connection
921 # cannot be used to indicate the end of a request body, since that would
922 # leave no possibility for the server to send back a response.)"
923 #
924 # TODO: Error when buffer length > Content-Length header
925 if (eof or
926 self.content_length is None or
927 (self.content_length is not None and
928 self.body_buffer.tell() >= self.content_length)):
929 self.parser_status = self.PS_COMPLETE
930
931 return buf
932
933 def _CheckStartLineLength(self, length):
934 """Limits the start line buffer size.
935
936 @type length: int
937 @param length: Buffer size
938
939 """
940 if (self.START_LINE_LENGTH_MAX is not None and
941 length > self.START_LINE_LENGTH_MAX):
942 raise HttpError("Start line longer than %d chars" %
943 self.START_LINE_LENGTH_MAX)
944
945 def _CheckHeaderLength(self, length):
946 """Limits the header buffer size.
947
948 @type length: int
949 @param length: Buffer size
950
951 """
952 if (self.HEADER_LENGTH_MAX is not None and
953 length > self.HEADER_LENGTH_MAX):
954 raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)
955
956 def ParseStartLine(self, start_line):
957 """Parses the start line of a message.
958
959 Must be overridden by subclass.
960
961 @type start_line: string
962 @param start_line: Start line string
963
964 """
965 raise NotImplementedError()
966
967 def _WillPeerCloseConnection(self):
968 """Evaluate whether peer will close the connection.
969
970 @rtype: bool
971 @return: Whether peer will close the connection
972
973 """
974 # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
975 # for the sender to signal that the connection will be closed after
976 # completion of the response. For example,
977 #
978 # Connection: close
979 #
980 # in either the request or the response header fields indicates that the
981 # connection SHOULD NOT be considered `persistent' (section 8.1) after the
982 # current request/response is complete."
983
984 hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
985 if hdr_connection:
986 hdr_connection = hdr_connection.lower()
987
988 # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
989 if self.msg.start_line.version == HTTP_1_1:
990 return (hdr_connection and "close" in hdr_connection)
991
992 # Some HTTP/1.0 implementations have support for persistent connections,
993 # using rules different than HTTP/1.1.
994
995 # For older HTTP, Keep-Alive indicates persistent connection.
996 if self.msg.headers.get(HTTP_KEEP_ALIVE):
997 return False
998
999 # At least Akamai returns a "Connection: Keep-Alive" header, which was
1000 # supposed to be sent by the client.
1001 if hdr_connection and "keep-alive" in hdr_connection:
1002 return False
1003
1004 return True
1005
1006 def _ParseHeaders(self):
1007 """Parses the headers.
1008
1009 This function also adjusts internal variables based on header values.
1010
1011 RFC2616, section 4.3: The presence of a message-body in a request is
1012 signaled by the inclusion of a Content-Length or Transfer-Encoding header
1013 field in the request's message-headers.
1014
1015 """
1016 # Parse headers
1017 self.header_buffer.seek(0, 0)
1018 self.msg.headers = ParseHeaders(self.header_buffer)
1019
1020 self.peer_will_close = self._WillPeerCloseConnection()
1021
1022 # Do we have a Content-Length header?
1023 hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
1024 if hdr_content_length:
1025 try:
1026 self.content_length = int(hdr_content_length)
1027 except (TypeError, ValueError):
1028 self.content_length = None
1029 if self.content_length is not None and self.content_length < 0:
1030 self.content_length = None
1031
1032 # if the connection remains open and a content-length was not provided,
1033 # then assume that the connection WILL close.
1034 if self.content_length is None:
1035 self.peer_will_close = True