b01d6fb9a6a889bf8cdf630269d2f67c7e85605d
[ganeti-github.git] / lib / http / __init__.py
1 #
2 #
3
4 # Copyright (C) 2007, 2008, 2010, 2012 Google Inc.
5 # All rights reserved.
6 #
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are
9 # met:
10 #
11 # 1. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 #
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
19 # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """HTTP module.
31
32 """
33
34 import logging
35 import mimetools
36 import OpenSSL
37 import select
38 import socket
39 import errno
40
41 from cStringIO import StringIO
42
43 from ganeti import constants
44 from ganeti import utils
45
46
47 HTTP_GANETI_VERSION = "Ganeti %s" % constants.RELEASE_VERSION
48
49 HTTP_OK = 200
50 HTTP_NO_CONTENT = 204
51 HTTP_NOT_MODIFIED = 304
52
53 HTTP_0_9 = "HTTP/0.9"
54 HTTP_1_0 = "HTTP/1.0"
55 HTTP_1_1 = "HTTP/1.1"
56
57 HTTP_GET = "GET"
58 HTTP_HEAD = "HEAD"
59 HTTP_POST = "POST"
60 HTTP_PUT = "PUT"
61 HTTP_DELETE = "DELETE"
62
63 HTTP_ETAG = "ETag"
64 HTTP_HOST = "Host"
65 HTTP_SERVER = "Server"
66 HTTP_DATE = "Date"
67 HTTP_USER_AGENT = "User-Agent"
68 HTTP_CONTENT_TYPE = "Content-Type"
69 HTTP_CONTENT_LENGTH = "Content-Length"
70 HTTP_CONNECTION = "Connection"
71 HTTP_KEEP_ALIVE = "Keep-Alive"
72 HTTP_WWW_AUTHENTICATE = "WWW-Authenticate"
73 HTTP_AUTHORIZATION = "Authorization"
74 HTTP_AUTHENTICATION_INFO = "Authentication-Info"
75 HTTP_ALLOW = "Allow"
76
77 HTTP_APP_OCTET_STREAM = "application/octet-stream"
78 HTTP_APP_JSON = "application/json"
79
80 _SSL_UNEXPECTED_EOF = "Unexpected EOF"
81
82 # Socket operations
83 (SOCKOP_SEND,
84 SOCKOP_RECV,
85 SOCKOP_SHUTDOWN,
86 SOCKOP_HANDSHAKE) = range(4)
87
88 # send/receive quantum
89 SOCK_BUF_SIZE = 32768
90
91
92 class HttpError(Exception):
93 """Internal exception for HTTP errors.
94
95 This should only be used for internal error reporting.
96
97 """
98
99
100 class HttpConnectionClosed(Exception):
101 """Internal exception for a closed connection.
102
103 This should only be used for internal error reporting. Only use
104 it if there's no other way to report this condition.
105
106 """
107
108
109 class HttpSessionHandshakeUnexpectedEOF(HttpError):
110 """Internal exception for errors during SSL handshake.
111
112 This should only be used for internal error reporting.
113
114 """
115
116
117 class HttpSocketTimeout(Exception):
118 """Internal exception for socket timeouts.
119
120 This should only be used for internal error reporting.
121
122 """
123
124
125 class HttpException(Exception):
126 code = None
127 message = None
128
129 def __init__(self, message=None, headers=None):
130 Exception.__init__(self)
131 self.message = message
132 self.headers = headers
133
134
135 class HttpBadRequest(HttpException):
136 """400 Bad Request
137
138 RFC2616, 10.4.1: The request could not be understood by the server
139 due to malformed syntax. The client SHOULD NOT repeat the request
140 without modifications.
141
142 """
143 code = 400
144
145
146 class HttpUnauthorized(HttpException):
147 """401 Unauthorized
148
149 RFC2616, section 10.4.2: The request requires user
150 authentication. The response MUST include a WWW-Authenticate header
151 field (section 14.47) containing a challenge applicable to the
152 requested resource.
153
154 """
155 code = 401
156
157
158 class HttpForbidden(HttpException):
159 """403 Forbidden
160
161 RFC2616, 10.4.4: The server understood the request, but is refusing
162 to fulfill it. Authorization will not help and the request SHOULD
163 NOT be repeated.
164
165 """
166 code = 403
167
168
169 class HttpNotFound(HttpException):
170 """404 Not Found
171
172 RFC2616, 10.4.5: The server has not found anything matching the
173 Request-URI. No indication is given of whether the condition is
174 temporary or permanent.
175
176 """
177 code = 404
178
179
180 class HttpMethodNotAllowed(HttpException):
181 """405 Method Not Allowed
182
183 RFC2616, 10.4.6: The method specified in the Request-Line is not
184 allowed for the resource identified by the Request-URI. The response
185 MUST include an Allow header containing a list of valid methods for
186 the requested resource.
187
188 """
189 code = 405
190
191
192 class HttpNotAcceptable(HttpException):
193 """406 Not Acceptable
194
195 RFC2616, 10.4.7: The resource identified by the request is only capable of
196 generating response entities which have content characteristics not
197 acceptable according to the accept headers sent in the request.
198
199 """
200 code = 406
201
202
203 class HttpRequestTimeout(HttpException):
204 """408 Request Timeout
205
206 RFC2616, 10.4.9: The client did not produce a request within the
207 time that the server was prepared to wait. The client MAY repeat the
208 request without modifications at any later time.
209
210 """
211 code = 408
212
213
214 class HttpConflict(HttpException):
215 """409 Conflict
216
217 RFC2616, 10.4.10: The request could not be completed due to a
218 conflict with the current state of the resource. This code is only
219 allowed in situations where it is expected that the user might be
220 able to resolve the conflict and resubmit the request.
221
222 """
223 code = 409
224
225
226 class HttpGone(HttpException):
227 """410 Gone
228
229 RFC2616, 10.4.11: The requested resource is no longer available at
230 the server and no forwarding address is known. This condition is
231 expected to be considered permanent.
232
233 """
234 code = 410
235
236
237 class HttpLengthRequired(HttpException):
238 """411 Length Required
239
240 RFC2616, 10.4.12: The server refuses to accept the request without a
241 defined Content-Length. The client MAY repeat the request if it adds
242 a valid Content-Length header field containing the length of the
243 message-body in the request message.
244
245 """
246 code = 411
247
248
249 class HttpPreconditionFailed(HttpException):
250 """412 Precondition Failed
251
252 RFC2616, 10.4.13: The precondition given in one or more of the
253 request-header fields evaluated to false when it was tested on the
254 server.
255
256 """
257 code = 412
258
259
260 class HttpUnsupportedMediaType(HttpException):
261 """415 Unsupported Media Type
262
263 RFC2616, 10.4.16: The server is refusing to service the request because the
264 entity of the request is in a format not supported by the requested resource
265 for the requested method.
266
267 """
268 code = 415
269
270
271 class HttpInternalServerError(HttpException):
272 """500 Internal Server Error
273
274 RFC2616, 10.5.1: The server encountered an unexpected condition
275 which prevented it from fulfilling the request.
276
277 """
278 code = 500
279
280
281 class HttpNotImplemented(HttpException):
282 """501 Not Implemented
283
284 RFC2616, 10.5.2: The server does not support the functionality
285 required to fulfill the request.
286
287 """
288 code = 501
289
290
291 class HttpBadGateway(HttpException):
292 """502 Bad Gateway
293
294 RFC2616, 10.5.3: The server, while acting as a gateway or proxy,
295 received an invalid response from the upstream server it accessed in
296 attempting to fulfill the request.
297
298 """
299 code = 502
300
301
302 class HttpServiceUnavailable(HttpException):
303 """503 Service Unavailable
304
305 RFC2616, 10.5.4: The server is currently unable to handle the
306 request due to a temporary overloading or maintenance of the server.
307
308 """
309 code = 503
310
311
312 class HttpGatewayTimeout(HttpException):
313 """504 Gateway Timeout
314
315 RFC2616, 10.5.5: The server, while acting as a gateway or proxy, did
316 not receive a timely response from the upstream server specified by
317 the URI (e.g. HTTP, FTP, LDAP) or some other auxiliary server
318 (e.g. DNS) it needed to access in attempting to complete the
319 request.
320
321 """
322 code = 504
323
324
325 class HttpVersionNotSupported(HttpException):
326 """505 HTTP Version Not Supported
327
328 RFC2616, 10.5.6: The server does not support, or refuses to support,
329 the HTTP protocol version that was used in the request message.
330
331 """
332 code = 505
333
334
335 def ParseHeaders(buf):
336 """Parses HTTP headers.
337
338 @note: This is just a trivial wrapper around C{mimetools.Message}
339
340 """
341 return mimetools.Message(buf, 0)
342
343
344 def SocketOperation(sock, op, arg1, timeout):
345 """Wrapper around socket functions.
346
347 This function abstracts error handling for socket operations, especially
348 for the complicated interaction with OpenSSL.
349
350 @type sock: socket
351 @param sock: Socket for the operation
352 @type op: int
353 @param op: Operation to execute (SOCKOP_* constants)
354 @type arg1: any
355 @param arg1: Parameter for function (if needed)
356 @type timeout: None or float
357 @param timeout: Timeout in seconds or None
358 @return: Return value of socket function
359
360 """
361 # TODO: event_poll/event_check/override
362 if op in (SOCKOP_SEND, SOCKOP_HANDSHAKE):
363 event_poll = select.POLLOUT
364
365 elif op == SOCKOP_RECV:
366 event_poll = select.POLLIN
367
368 elif op == SOCKOP_SHUTDOWN:
369 event_poll = None
370
371 # The timeout is only used when OpenSSL requests polling for a condition.
372 # It is not advisable to have no timeout for shutdown.
373 assert timeout
374
375 else:
376 raise AssertionError("Invalid socket operation")
377
378 # Handshake is only supported by SSL sockets
379 if (op == SOCKOP_HANDSHAKE and
380 not isinstance(sock, OpenSSL.SSL.ConnectionType)):
381 return
382
383 # No override by default
384 event_override = 0
385
386 while True:
387 # Poll only for certain operations and when asked for by an override
388 if event_override or op in (SOCKOP_SEND, SOCKOP_RECV, SOCKOP_HANDSHAKE):
389 if event_override:
390 wait_for_event = event_override
391 else:
392 wait_for_event = event_poll
393
394 event = utils.WaitForFdCondition(sock, wait_for_event, timeout)
395 if event is None:
396 raise HttpSocketTimeout()
397
398 if event & (select.POLLNVAL | select.POLLHUP | select.POLLERR):
399 # Let the socket functions handle these
400 break
401
402 if not event & wait_for_event:
403 continue
404
405 # Reset override
406 event_override = 0
407
408 try:
409 try:
410 if op == SOCKOP_SEND:
411 return sock.send(arg1)
412
413 elif op == SOCKOP_RECV:
414 return sock.recv(arg1)
415
416 elif op == SOCKOP_SHUTDOWN:
417 if isinstance(sock, OpenSSL.SSL.ConnectionType):
418 # PyOpenSSL's shutdown() doesn't take arguments
419 return sock.shutdown()
420 else:
421 return sock.shutdown(arg1)
422
423 elif op == SOCKOP_HANDSHAKE:
424 return sock.do_handshake()
425
426 except OpenSSL.SSL.WantWriteError:
427 # OpenSSL wants to write, poll for POLLOUT
428 event_override = select.POLLOUT
429 continue
430
431 except OpenSSL.SSL.WantReadError:
432 # OpenSSL wants to read, poll for POLLIN
433 event_override = select.POLLIN | select.POLLPRI
434 continue
435
436 except OpenSSL.SSL.WantX509LookupError:
437 continue
438
439 except OpenSSL.SSL.ZeroReturnError, err:
440 # SSL Connection has been closed. In SSL 3.0 and TLS 1.0, this only
441 # occurs if a closure alert has occurred in the protocol, i.e. the
442 # connection has been closed cleanly. Note that this does not
443 # necessarily mean that the transport layer (e.g. a socket) has been
444 # closed.
445 if op == SOCKOP_SEND:
446 # Can happen during a renegotiation
447 raise HttpConnectionClosed(err.args)
448 elif op == SOCKOP_RECV:
449 return ""
450
451 # SSL_shutdown shouldn't return SSL_ERROR_ZERO_RETURN
452 raise socket.error(err.args)
453
454 except OpenSSL.SSL.SysCallError, err:
455 if op == SOCKOP_SEND:
456 # arg1 is the data when writing
457 if err.args and err.args[0] == -1 and arg1 == "":
458 # errors when writing empty strings are expected
459 # and can be ignored
460 return 0
461
462 if err.args == (-1, _SSL_UNEXPECTED_EOF):
463 if op == SOCKOP_RECV:
464 return ""
465 elif op == SOCKOP_HANDSHAKE:
466 # Can happen if peer disconnects directly after the connection is
467 # opened.
468 raise HttpSessionHandshakeUnexpectedEOF(err.args)
469
470 raise socket.error(err.args)
471
472 except OpenSSL.SSL.Error, err:
473 raise socket.error(err.args)
474
475 except socket.error, err:
476 if err.args and err.args[0] == errno.EAGAIN:
477 # Ignore EAGAIN
478 continue
479
480 raise
481
482
483 def ShutdownConnection(sock, close_timeout, write_timeout, msgreader, force):
484 """Closes the connection.
485
486 @type sock: socket
487 @param sock: Socket to be shut down
488 @type close_timeout: float
489 @param close_timeout: How long to wait for the peer to close
490 the connection
491 @type write_timeout: float
492 @param write_timeout: Write timeout for shutdown
493 @type msgreader: http.HttpMessageReader
494 @param msgreader: Request message reader, used to determine whether
495 peer should close connection
496 @type force: bool
497 @param force: Whether to forcibly close the connection without
498 waiting for peer
499
500 """
501 #print msgreader.peer_will_close, force
502 if msgreader and msgreader.peer_will_close and not force:
503 # Wait for peer to close
504 try:
505 # Check whether it's actually closed
506 if not SocketOperation(sock, SOCKOP_RECV, 1, close_timeout):
507 return
508 except (socket.error, HttpError, HttpSocketTimeout):
509 # Ignore errors at this stage
510 pass
511
512 # Close the connection from our side
513 try:
514 # We don't care about the return value, see NOTES in SSL_shutdown(3).
515 SocketOperation(sock, SOCKOP_SHUTDOWN, socket.SHUT_RDWR,
516 write_timeout)
517 except HttpSocketTimeout:
518 raise HttpError("Timeout while shutting down connection")
519 except socket.error, err:
520 # Ignore ENOTCONN
521 if not (err.args and err.args[0] == errno.ENOTCONN):
522 raise HttpError("Error while shutting down connection: %s" % err)
523
524
525 def Handshake(sock, write_timeout):
526 """Shakes peer's hands.
527
528 @type sock: socket
529 @param sock: Socket to be shut down
530 @type write_timeout: float
531 @param write_timeout: Write timeout for handshake
532
533 """
534 try:
535 return SocketOperation(sock, SOCKOP_HANDSHAKE, None, write_timeout)
536 except HttpSocketTimeout:
537 raise HttpError("Timeout during SSL handshake")
538 except socket.error, err:
539 raise HttpError("Error in SSL handshake: %s" % err)
540
541
542 class HttpSslParams(object):
543 """Data class for SSL key and certificate.
544
545 """
546 def __init__(self, ssl_key_path, ssl_cert_path):
547 """Initializes this class.
548
549 @type ssl_key_path: string
550 @param ssl_key_path: Path to file containing SSL key in PEM format
551 @type ssl_cert_path: string
552 @param ssl_cert_path: Path to file containing SSL certificate
553 in PEM format
554
555 """
556 self.ssl_key_pem = utils.ReadFile(ssl_key_path)
557 self.ssl_cert_pem = utils.ReadFile(ssl_cert_path)
558 self.ssl_cert_path = ssl_cert_path
559
560 def GetKey(self):
561 return OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM,
562 self.ssl_key_pem)
563
564 def GetCertificate(self):
565 return OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
566 self.ssl_cert_pem)
567
568
569 class HttpBase(object):
570 """Base class for HTTP server and client.
571
572 """
573 def __init__(self):
574 self.using_ssl = None
575 self._ssl_params = None
576 self._ssl_key = None
577 self._ssl_cert = None
578
579 def _CreateSocket(self, ssl_params, ssl_verify_peer, family,
580 ssl_verify_callback):
581 """Creates a TCP socket and initializes SSL if needed.
582
583 @type ssl_params: HttpSslParams
584 @param ssl_params: SSL key and certificate
585 @type ssl_verify_peer: bool
586 @param ssl_verify_peer: Whether to require client certificate
587 and compare it with our certificate
588 @type family: int
589 @param family: socket.AF_INET | socket.AF_INET6
590
591 """
592 assert family in (socket.AF_INET, socket.AF_INET6)
593 if ssl_verify_peer:
594 assert ssl_verify_callback is not None
595
596 self._ssl_params = ssl_params
597 sock = socket.socket(family, socket.SOCK_STREAM)
598
599 # Should we enable SSL?
600 self.using_ssl = ssl_params is not None
601
602 if not self.using_ssl:
603 return sock
604
605 self._ssl_key = ssl_params.GetKey()
606 self._ssl_cert = ssl_params.GetCertificate()
607
608 ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD)
609 ctx.set_options(OpenSSL.SSL.OP_NO_SSLv2)
610
611 ciphers = self.GetSslCiphers()
612 logging.debug("Setting SSL cipher string %s", ciphers)
613 ctx.set_cipher_list(ciphers)
614
615 ctx.use_privatekey(self._ssl_key)
616 ctx.use_certificate(self._ssl_cert)
617 ctx.check_privatekey()
618
619 if ssl_verify_peer:
620 ctx.set_verify(OpenSSL.SSL.VERIFY_PEER |
621 OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
622 ssl_verify_callback)
623
624 # Also add our certificate as a trusted CA to be sent to the client.
625 # This is required at least for GnuTLS clients to work.
626 try:
627 # This will fail for PyOpenssl versions before 0.10
628 ctx.add_client_ca(self._ssl_cert)
629 except AttributeError:
630 # Fall back to letting OpenSSL read the certificate file directly.
631 ctx.load_client_ca(ssl_params.ssl_cert_path)
632
633 return OpenSSL.SSL.Connection(ctx, sock)
634
635 def GetSslCiphers(self): # pylint: disable=R0201
636 """Returns the ciphers string for SSL.
637
638 """
639 return constants.OPENSSL_CIPHERS
640
641 def _SSLVerifyCallback(self, conn, cert, errnum, errdepth, ok):
642 """Verify the certificate provided by the peer
643
644 We only compare fingerprints. The client must use the same certificate as
645 we do on our side.
646
647 """
648 # some parameters are unused, but this is the API
649 # pylint: disable=W0613
650 assert self._ssl_params, "SSL not initialized"
651
652 return (self._ssl_cert.digest("sha1") == cert.digest("sha1") and
653 self._ssl_cert.digest("md5") == cert.digest("md5"))
654
655
656 class HttpMessage(object):
657 """Data structure for HTTP message.
658
659 """
660 def __init__(self):
661 self.start_line = None
662 self.headers = None
663 self.body = None
664
665
666 class HttpClientToServerStartLine(object):
667 """Data structure for HTTP request start line.
668
669 """
670 def __init__(self, method, path, version):
671 self.method = method
672 self.path = path
673 self.version = version
674
675 def __str__(self):
676 return "%s %s %s" % (self.method, self.path, self.version)
677
678
679 class HttpServerToClientStartLine(object):
680 """Data structure for HTTP response start line.
681
682 """
683 def __init__(self, version, code, reason):
684 self.version = version
685 self.code = code
686 self.reason = reason
687
688 def __str__(self):
689 return "%s %s %s" % (self.version, self.code, self.reason)
690
691
692 class HttpMessageWriter(object):
693 """Writes an HTTP message to a socket.
694
695 """
696 def __init__(self, sock, msg, write_timeout):
697 """Initializes this class and writes an HTTP message to a socket.
698
699 @type sock: socket
700 @param sock: Socket to be written to
701 @type msg: http.HttpMessage
702 @param msg: HTTP message to be written
703 @type write_timeout: float
704 @param write_timeout: Write timeout for socket
705
706 """
707 self._msg = msg
708
709 self._PrepareMessage()
710
711 buf = self._FormatMessage()
712
713 pos = 0
714 end = len(buf)
715 while pos < end:
716 # Send only SOCK_BUF_SIZE bytes at a time
717 data = buf[pos:(pos + SOCK_BUF_SIZE)]
718
719 sent = SocketOperation(sock, SOCKOP_SEND, data, write_timeout)
720
721 # Remove sent bytes
722 pos += sent
723
724 assert pos == end, "Message wasn't sent completely"
725
726 def _PrepareMessage(self):
727 """Prepares the HTTP message by setting mandatory headers.
728
729 """
730 # RFC2616, section 4.3: "The presence of a message-body in a request is
731 # signaled by the inclusion of a Content-Length or Transfer-Encoding header
732 # field in the request's message-headers."
733 if self._msg.body:
734 self._msg.headers[HTTP_CONTENT_LENGTH] = len(self._msg.body)
735
736 def _FormatMessage(self):
737 """Serializes the HTTP message into a string.
738
739 """
740 buf = StringIO()
741
742 # Add start line
743 buf.write(str(self._msg.start_line))
744 buf.write("\r\n")
745
746 # Add headers
747 if self._msg.start_line.version != HTTP_0_9:
748 for name, value in self._msg.headers.iteritems():
749 buf.write("%s: %s\r\n" % (name, value))
750
751 buf.write("\r\n")
752
753 # Add message body if needed
754 if self.HasMessageBody():
755 buf.write(self._msg.body)
756
757 elif self._msg.body:
758 logging.warning("Ignoring message body")
759
760 return buf.getvalue()
761
762 def HasMessageBody(self):
763 """Checks whether the HTTP message contains a body.
764
765 Can be overridden by subclasses.
766
767 """
768 return bool(self._msg.body)
769
770
771 class HttpMessageReader(object):
772 """Reads HTTP message from socket.
773
774 """
775 # Length limits
776 START_LINE_LENGTH_MAX = None
777 HEADER_LENGTH_MAX = None
778
779 # Parser state machine
780 PS_START_LINE = "start-line"
781 PS_HEADERS = "headers"
782 PS_BODY = "entity-body"
783 PS_COMPLETE = "complete"
784
785 def __init__(self, sock, msg, read_timeout):
786 """Reads an HTTP message from a socket.
787
788 @type sock: socket
789 @param sock: Socket to be read from
790 @type msg: http.HttpMessage
791 @param msg: Object for the read message
792 @type read_timeout: float
793 @param read_timeout: Read timeout for socket
794
795 """
796 self.sock = sock
797 self.msg = msg
798
799 self.start_line_buffer = None
800 self.header_buffer = StringIO()
801 self.body_buffer = StringIO()
802 self.parser_status = self.PS_START_LINE
803 self.content_length = None
804 self.peer_will_close = None
805
806 buf = ""
807 eof = False
808 while self.parser_status != self.PS_COMPLETE:
809 # TODO: Don't read more than necessary (Content-Length), otherwise
810 # data might be lost and/or an error could occur
811 data = SocketOperation(sock, SOCKOP_RECV, SOCK_BUF_SIZE, read_timeout)
812
813 if data:
814 buf += data
815 else:
816 eof = True
817
818 # Do some parsing and error checking while more data arrives
819 buf = self._ContinueParsing(buf, eof)
820
821 # Must be done only after the buffer has been evaluated
822 # TODO: Content-Length < len(data read) and connection closed
823 if (eof and
824 self.parser_status in (self.PS_START_LINE,
825 self.PS_HEADERS)):
826 raise HttpError("Connection closed prematurely")
827
828 # Parse rest
829 buf = self._ContinueParsing(buf, True)
830
831 assert self.parser_status == self.PS_COMPLETE
832 assert not buf, "Parser didn't read full response"
833
834 # Body is complete
835 msg.body = self.body_buffer.getvalue()
836
837 def _ContinueParsing(self, buf, eof):
838 """Main function for HTTP message state machine.
839
840 @type buf: string
841 @param buf: Receive buffer
842 @type eof: bool
843 @param eof: Whether we've reached EOF on the socket
844 @rtype: string
845 @return: Updated receive buffer
846
847 """
848 # TODO: Use offset instead of slicing when possible
849 if self.parser_status == self.PS_START_LINE:
850 # Expect start line
851 while True:
852 idx = buf.find("\r\n")
853
854 # RFC2616, section 4.1: "In the interest of robustness, servers SHOULD
855 # ignore any empty line(s) received where a Request-Line is expected.
856 # In other words, if the server is reading the protocol stream at the
857 # beginning of a message and receives a CRLF first, it should ignore
858 # the CRLF."
859 if idx == 0:
860 # TODO: Limit number of CRLFs/empty lines for safety?
861 buf = buf[2:]
862 continue
863
864 if idx > 0:
865 self.start_line_buffer = buf[:idx]
866
867 self._CheckStartLineLength(len(self.start_line_buffer))
868
869 # Remove status line, including CRLF
870 buf = buf[idx + 2:]
871
872 self.msg.start_line = self.ParseStartLine(self.start_line_buffer)
873
874 self.parser_status = self.PS_HEADERS
875 else:
876 # Check whether incoming data is getting too large, otherwise we just
877 # fill our read buffer.
878 self._CheckStartLineLength(len(buf))
879
880 break
881
882 # TODO: Handle messages without headers
883 if self.parser_status == self.PS_HEADERS:
884 # Wait for header end
885 idx = buf.find("\r\n\r\n")
886 if idx >= 0:
887 self.header_buffer.write(buf[:idx + 2])
888
889 self._CheckHeaderLength(self.header_buffer.tell())
890
891 # Remove headers, including CRLF
892 buf = buf[idx + 4:]
893
894 self._ParseHeaders()
895
896 self.parser_status = self.PS_BODY
897 else:
898 # Check whether incoming data is getting too large, otherwise we just
899 # fill our read buffer.
900 self._CheckHeaderLength(len(buf))
901
902 if self.parser_status == self.PS_BODY:
903 # TODO: Implement max size for body_buffer
904 self.body_buffer.write(buf)
905 buf = ""
906
907 # Check whether we've read everything
908 #
909 # RFC2616, section 4.4: "When a message-body is included with a message,
910 # the transfer-length of that body is determined by one of the following
911 # [...] 5. By the server closing the connection. (Closing the connection
912 # cannot be used to indicate the end of a request body, since that would
913 # leave no possibility for the server to send back a response.)"
914 #
915 # TODO: Error when buffer length > Content-Length header
916 if (eof or
917 self.content_length is None or
918 (self.content_length is not None and
919 self.body_buffer.tell() >= self.content_length)):
920 self.parser_status = self.PS_COMPLETE
921
922 return buf
923
924 def _CheckStartLineLength(self, length):
925 """Limits the start line buffer size.
926
927 @type length: int
928 @param length: Buffer size
929
930 """
931 if (self.START_LINE_LENGTH_MAX is not None and
932 length > self.START_LINE_LENGTH_MAX):
933 raise HttpError("Start line longer than %d chars" %
934 self.START_LINE_LENGTH_MAX)
935
936 def _CheckHeaderLength(self, length):
937 """Limits the header buffer size.
938
939 @type length: int
940 @param length: Buffer size
941
942 """
943 if (self.HEADER_LENGTH_MAX is not None and
944 length > self.HEADER_LENGTH_MAX):
945 raise HttpError("Headers longer than %d chars" % self.HEADER_LENGTH_MAX)
946
947 def ParseStartLine(self, start_line):
948 """Parses the start line of a message.
949
950 Must be overridden by subclass.
951
952 @type start_line: string
953 @param start_line: Start line string
954
955 """
956 raise NotImplementedError()
957
958 def _WillPeerCloseConnection(self):
959 """Evaluate whether peer will close the connection.
960
961 @rtype: bool
962 @return: Whether peer will close the connection
963
964 """
965 # RFC2616, section 14.10: "HTTP/1.1 defines the "close" connection option
966 # for the sender to signal that the connection will be closed after
967 # completion of the response. For example,
968 #
969 # Connection: close
970 #
971 # in either the request or the response header fields indicates that the
972 # connection SHOULD NOT be considered `persistent' (section 8.1) after the
973 # current request/response is complete."
974
975 hdr_connection = self.msg.headers.get(HTTP_CONNECTION, None)
976 if hdr_connection:
977 hdr_connection = hdr_connection.lower()
978
979 # An HTTP/1.1 server is assumed to stay open unless explicitly closed.
980 if self.msg.start_line.version == HTTP_1_1:
981 return (hdr_connection and "close" in hdr_connection)
982
983 # Some HTTP/1.0 implementations have support for persistent connections,
984 # using rules different than HTTP/1.1.
985
986 # For older HTTP, Keep-Alive indicates persistent connection.
987 if self.msg.headers.get(HTTP_KEEP_ALIVE):
988 return False
989
990 # At least Akamai returns a "Connection: Keep-Alive" header, which was
991 # supposed to be sent by the client.
992 if hdr_connection and "keep-alive" in hdr_connection:
993 return False
994
995 return True
996
997 def _ParseHeaders(self):
998 """Parses the headers.
999
1000 This function also adjusts internal variables based on header values.
1001
1002 RFC2616, section 4.3: The presence of a message-body in a request is
1003 signaled by the inclusion of a Content-Length or Transfer-Encoding header
1004 field in the request's message-headers.
1005
1006 """
1007 # Parse headers
1008 self.header_buffer.seek(0, 0)
1009 self.msg.headers = ParseHeaders(self.header_buffer)
1010
1011 self.peer_will_close = self._WillPeerCloseConnection()
1012
1013 # Do we have a Content-Length header?
1014 hdr_content_length = self.msg.headers.get(HTTP_CONTENT_LENGTH, None)
1015 if hdr_content_length:
1016 try:
1017 self.content_length = int(hdr_content_length)
1018 except (TypeError, ValueError):
1019 self.content_length = None
1020 if self.content_length is not None and self.content_length < 0:
1021 self.content_length = None
1022
1023 # if the connection remains open and a content-length was not provided,
1024 # then assume that the connection WILL close.
1025 if self.content_length is None:
1026 self.peer_will_close = True