12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226 |
- """HTTP server classes.
- From Python 3.3
- Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
- SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
- and CGIHTTPRequestHandler for CGI scripts.
- It does, however, optionally implement HTTP/1.1 persistent connections,
- as of version 0.3.
- Notes on CGIHTTPRequestHandler
- ------------------------------
- This class implements GET and POST requests to cgi-bin scripts.
- If the os.fork() function is not present (e.g. on Windows),
- subprocess.Popen() is used as a fallback, with slightly altered semantics.
- In all cases, the implementation is intentionally naive -- all
- requests are executed synchronously.
- SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
- -- it may execute arbitrary Python code or external programs.
- Note that status code 200 is sent prior to execution of a CGI script, so
- scripts cannot send other status codes such as 302 (redirect).
- XXX To do:
- - log requests even later (to capture byte count)
- - log user-agent header and other interesting goodies
- - send error log to separate file
- """
- from __future__ import (absolute_import, division,
- print_function, unicode_literals)
- from future import utils
- from future.builtins import *
- # See also:
- #
- # HTTP Working Group T. Berners-Lee
- # INTERNET-DRAFT R. T. Fielding
- # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
- # Expires September 8, 1995 March 8, 1995
- #
- # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
- #
- # and
- #
- # Network Working Group R. Fielding
- # Request for Comments: 2616 et al
- # Obsoletes: 2068 June 1999
- # Category: Standards Track
- #
- # URL: http://www.faqs.org/rfcs/rfc2616.html
- # Log files
- # ---------
- #
- # Here's a quote from the NCSA httpd docs about log file format.
- #
- # | The logfile format is as follows. Each line consists of:
- # |
- # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
- # |
- # | host: Either the DNS name or the IP number of the remote client
- # | rfc931: Any information returned by identd for this person,
- # | - otherwise.
- # | authuser: If user sent a userid for authentication, the user name,
- # | - otherwise.
- # | DD: Day
- # | Mon: Month (calendar name)
- # | YYYY: Year
- # | hh: hour (24-hour format, the machine's timezone)
- # | mm: minutes
- # | ss: seconds
- # | request: The first line of the HTTP request as sent by the client.
- # | ddd: the status code returned by the server, - if not available.
- # | bbbb: the total number of bytes sent,
- # | *not including the HTTP/1.0 header*, - if not available
- # |
- # | You can determine the name of the file accessed through request.
- #
- # (Actually, the latter is only true if you know the server configuration
- # at the time the request was made!)
- __version__ = "0.6"
- __all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
- from future.backports import html
- from future.backports.http import client as http_client
- from future.backports.urllib import parse as urllib_parse
- from future.backports import socketserver
- import io
- import mimetypes
- import os
- import posixpath
- import select
- import shutil
- import socket # For gethostbyaddr()
- import sys
- import time
- import copy
- import argparse
- # Default error message template
- DEFAULT_ERROR_MESSAGE = """\
- <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html4/strict.dtd">
- <html>
- <head>
- <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
- <title>Error response</title>
- </head>
- <body>
- <h1>Error response</h1>
- <p>Error code: %(code)d</p>
- <p>Message: %(message)s.</p>
- <p>Error code explanation: %(code)s - %(explain)s.</p>
- </body>
- </html>
- """
- DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
- def _quote_html(html):
- return html.replace("&", "&").replace("<", "<").replace(">", ">")
- class HTTPServer(socketserver.TCPServer):
- allow_reuse_address = 1 # Seems to make sense in testing environment
- def server_bind(self):
- """Override server_bind to store the server name."""
- socketserver.TCPServer.server_bind(self)
- host, port = self.socket.getsockname()[:2]
- self.server_name = socket.getfqdn(host)
- self.server_port = port
- class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
- """HTTP request handler base class.
- The following explanation of HTTP serves to guide you through the
- code as well as to expose any misunderstandings I may have about
- HTTP (so you don't need to read the code to figure out I'm wrong
- :-).
- HTTP (HyperText Transfer Protocol) is an extensible protocol on
- top of a reliable stream transport (e.g. TCP/IP). The protocol
- recognizes three parts to a request:
- 1. One line identifying the request type and path
- 2. An optional set of RFC-822-style headers
- 3. An optional data part
- The headers and data are separated by a blank line.
- The first line of the request has the form
- <command> <path> <version>
- where <command> is a (case-sensitive) keyword such as GET or POST,
- <path> is a string containing path information for the request,
- and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
- <path> is encoded using the URL encoding scheme (using %xx to signify
- the ASCII character with hex code xx).
- The specification specifies that lines are separated by CRLF but
- for compatibility with the widest range of clients recommends
- servers also handle LF. Similarly, whitespace in the request line
- is treated sensibly (allowing multiple spaces between components
- and allowing trailing whitespace).
- Similarly, for output, lines ought to be separated by CRLF pairs
- but most clients grok LF characters just fine.
- If the first line of the request has the form
- <command> <path>
- (i.e. <version> is left out) then this is assumed to be an HTTP
- 0.9 request; this form has no optional headers and data part and
- the reply consists of just the data.
- The reply form of the HTTP 1.x protocol again has three parts:
- 1. One line giving the response code
- 2. An optional set of RFC-822-style headers
- 3. The data
- Again, the headers and data are separated by a blank line.
- The response code line has the form
- <version> <responsecode> <responsestring>
- where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
- <responsecode> is a 3-digit response code indicating success or
- failure of the request, and <responsestring> is an optional
- human-readable string explaining what the response code means.
- This server parses the request and the headers, and then calls a
- function specific to the request type (<command>). Specifically,
- a request SPAM will be handled by a method do_SPAM(). If no
- such method exists the server sends an error response to the
- client. If it exists, it is called with no arguments:
- do_SPAM()
- Note that the request name is case sensitive (i.e. SPAM and spam
- are different requests).
- The various request details are stored in instance variables:
- - client_address is the client IP address in the form (host,
- port);
- - command, path and version are the broken-down request line;
- - headers is an instance of email.message.Message (or a derived
- class) containing the header information;
- - rfile is a file object open for reading positioned at the
- start of the optional input data part;
- - wfile is a file object open for writing.
- IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
- The first thing to be written must be the response line. Then
- follow 0 or more header lines, then a blank line, and then the
- actual data (if any). The meaning of the header lines depends on
- the command executed by the server; in most cases, when data is
- returned, there should be at least one header line of the form
- Content-type: <type>/<subtype>
- where <type> and <subtype> should be registered MIME types,
- e.g. "text/html" or "text/plain".
- """
- # The Python system version, truncated to its first component.
- sys_version = "Python/" + sys.version.split()[0]
- # The server software version. You may want to override this.
- # The format is multiple whitespace-separated strings,
- # where each string is of the form name[/version].
- server_version = "BaseHTTP/" + __version__
- error_message_format = DEFAULT_ERROR_MESSAGE
- error_content_type = DEFAULT_ERROR_CONTENT_TYPE
- # The default request version. This only affects responses up until
- # the point where the request line is parsed, so it mainly decides what
- # the client gets back when sending a malformed request line.
- # Most web servers default to HTTP 0.9, i.e. don't send a status line.
- default_request_version = "HTTP/0.9"
- def parse_request(self):
- """Parse a request (internal).
- The request should be stored in self.raw_requestline; the results
- are in self.command, self.path, self.request_version and
- self.headers.
- Return True for success, False for failure; on failure, an
- error is sent back.
- """
- self.command = None # set in case of error on the first line
- self.request_version = version = self.default_request_version
- self.close_connection = 1
- requestline = str(self.raw_requestline, 'iso-8859-1')
- requestline = requestline.rstrip('\r\n')
- self.requestline = requestline
- words = requestline.split()
- if len(words) == 3:
- command, path, version = words
- if version[:5] != 'HTTP/':
- self.send_error(400, "Bad request version (%r)" % version)
- return False
- try:
- base_version_number = version.split('/', 1)[1]
- version_number = base_version_number.split(".")
- # RFC 2145 section 3.1 says there can be only one "." and
- # - major and minor numbers MUST be treated as
- # separate integers;
- # - HTTP/2.4 is a lower version than HTTP/2.13, which in
- # turn is lower than HTTP/12.3;
- # - Leading zeros MUST be ignored by recipients.
- if len(version_number) != 2:
- raise ValueError
- version_number = int(version_number[0]), int(version_number[1])
- except (ValueError, IndexError):
- self.send_error(400, "Bad request version (%r)" % version)
- return False
- if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
- self.close_connection = 0
- if version_number >= (2, 0):
- self.send_error(505,
- "Invalid HTTP Version (%s)" % base_version_number)
- return False
- elif len(words) == 2:
- command, path = words
- self.close_connection = 1
- if command != 'GET':
- self.send_error(400,
- "Bad HTTP/0.9 request type (%r)" % command)
- return False
- elif not words:
- return False
- else:
- self.send_error(400, "Bad request syntax (%r)" % requestline)
- return False
- self.command, self.path, self.request_version = command, path, version
- # Examine the headers and look for a Connection directive.
- try:
- self.headers = http_client.parse_headers(self.rfile,
- _class=self.MessageClass)
- except http_client.LineTooLong:
- self.send_error(400, "Line too long")
- return False
- conntype = self.headers.get('Connection', "")
- if conntype.lower() == 'close':
- self.close_connection = 1
- elif (conntype.lower() == 'keep-alive' and
- self.protocol_version >= "HTTP/1.1"):
- self.close_connection = 0
- # Examine the headers and look for an Expect directive
- expect = self.headers.get('Expect', "")
- if (expect.lower() == "100-continue" and
- self.protocol_version >= "HTTP/1.1" and
- self.request_version >= "HTTP/1.1"):
- if not self.handle_expect_100():
- return False
- return True
- def handle_expect_100(self):
- """Decide what to do with an "Expect: 100-continue" header.
- If the client is expecting a 100 Continue response, we must
- respond with either a 100 Continue or a final response before
- waiting for the request body. The default is to always respond
- with a 100 Continue. You can behave differently (for example,
- reject unauthorized requests) by overriding this method.
- This method should either return True (possibly after sending
- a 100 Continue response) or send an error response and return
- False.
- """
- self.send_response_only(100)
- self.flush_headers()
- return True
- def handle_one_request(self):
- """Handle a single HTTP request.
- You normally don't need to override this method; see the class
- __doc__ string for information on how to handle specific HTTP
- commands such as GET and POST.
- """
- try:
- self.raw_requestline = self.rfile.readline(65537)
- if len(self.raw_requestline) > 65536:
- self.requestline = ''
- self.request_version = ''
- self.command = ''
- self.send_error(414)
- return
- if not self.raw_requestline:
- self.close_connection = 1
- return
- if not self.parse_request():
- # An error code has been sent, just exit
- return
- mname = 'do_' + self.command
- if not hasattr(self, mname):
- self.send_error(501, "Unsupported method (%r)" % self.command)
- return
- method = getattr(self, mname)
- method()
- self.wfile.flush() #actually send the response if not already done.
- except socket.timeout as e:
- #a read or a write timed out. Discard this connection
- self.log_error("Request timed out: %r", e)
- self.close_connection = 1
- return
- def handle(self):
- """Handle multiple requests if necessary."""
- self.close_connection = 1
- self.handle_one_request()
- while not self.close_connection:
- self.handle_one_request()
- def send_error(self, code, message=None):
- """Send and log an error reply.
- Arguments are the error code, and a detailed message.
- The detailed message defaults to the short entry matching the
- response code.
- This sends an error response (so it must be called before any
- output has been generated), logs the error, and finally sends
- a piece of HTML explaining the error to the user.
- """
- try:
- shortmsg, longmsg = self.responses[code]
- except KeyError:
- shortmsg, longmsg = '???', '???'
- if message is None:
- message = shortmsg
- explain = longmsg
- self.log_error("code %d, message %s", code, message)
- # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
- content = (self.error_message_format %
- {'code': code, 'message': _quote_html(message), 'explain': explain})
- self.send_response(code, message)
- self.send_header("Content-Type", self.error_content_type)
- self.send_header('Connection', 'close')
- self.end_headers()
- if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
- self.wfile.write(content.encode('UTF-8', 'replace'))
- def send_response(self, code, message=None):
- """Add the response header to the headers buffer and log the
- response code.
- Also send two standard headers with the server software
- version and the current date.
- """
- self.log_request(code)
- self.send_response_only(code, message)
- self.send_header('Server', self.version_string())
- self.send_header('Date', self.date_time_string())
- def send_response_only(self, code, message=None):
- """Send the response header only."""
- if message is None:
- if code in self.responses:
- message = self.responses[code][0]
- else:
- message = ''
- if self.request_version != 'HTTP/0.9':
- if not hasattr(self, '_headers_buffer'):
- self._headers_buffer = []
- self._headers_buffer.append(("%s %d %s\r\n" %
- (self.protocol_version, code, message)).encode(
- 'latin-1', 'strict'))
- def send_header(self, keyword, value):
- """Send a MIME header to the headers buffer."""
- if self.request_version != 'HTTP/0.9':
- if not hasattr(self, '_headers_buffer'):
- self._headers_buffer = []
- self._headers_buffer.append(
- ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
- if keyword.lower() == 'connection':
- if value.lower() == 'close':
- self.close_connection = 1
- elif value.lower() == 'keep-alive':
- self.close_connection = 0
- def end_headers(self):
- """Send the blank line ending the MIME headers."""
- if self.request_version != 'HTTP/0.9':
- self._headers_buffer.append(b"\r\n")
- self.flush_headers()
- def flush_headers(self):
- if hasattr(self, '_headers_buffer'):
- self.wfile.write(b"".join(self._headers_buffer))
- self._headers_buffer = []
- def log_request(self, code='-', size='-'):
- """Log an accepted request.
- This is called by send_response().
- """
- self.log_message('"%s" %s %s',
- self.requestline, str(code), str(size))
- def log_error(self, format, *args):
- """Log an error.
- This is called when a request cannot be fulfilled. By
- default it passes the message on to log_message().
- Arguments are the same as for log_message().
- XXX This should go to the separate error log.
- """
- self.log_message(format, *args)
- def log_message(self, format, *args):
- """Log an arbitrary message.
- This is used by all other logging functions. Override
- it if you have specific logging wishes.
- The first argument, FORMAT, is a format string for the
- message to be logged. If the format string contains
- any % escapes requiring parameters, they should be
- specified as subsequent arguments (it's just like
- printf!).
- The client ip and current date/time are prefixed to
- every message.
- """
- sys.stderr.write("%s - - [%s] %s\n" %
- (self.address_string(),
- self.log_date_time_string(),
- format%args))
- def version_string(self):
- """Return the server software version string."""
- return self.server_version + ' ' + self.sys_version
- def date_time_string(self, timestamp=None):
- """Return the current date and time formatted for a message header."""
- if timestamp is None:
- timestamp = time.time()
- year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
- s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
- self.weekdayname[wd],
- day, self.monthname[month], year,
- hh, mm, ss)
- return s
- def log_date_time_string(self):
- """Return the current time formatted for logging."""
- now = time.time()
- year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
- s = "%02d/%3s/%04d %02d:%02d:%02d" % (
- day, self.monthname[month], year, hh, mm, ss)
- return s
- weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
- monthname = [None,
- 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
- def address_string(self):
- """Return the client address."""
- return self.client_address[0]
- # Essentially static class variables
- # The version of the HTTP protocol we support.
- # Set this to HTTP/1.1 to enable automatic keepalive
- protocol_version = "HTTP/1.0"
- # MessageClass used to parse headers
- MessageClass = http_client.HTTPMessage
- # Table mapping response codes to messages; entries have the
- # form {code: (shortmessage, longmessage)}.
- # See RFC 2616 and 6585.
- responses = {
- 100: ('Continue', 'Request received, please continue'),
- 101: ('Switching Protocols',
- 'Switching to new protocol; obey Upgrade header'),
- 200: ('OK', 'Request fulfilled, document follows'),
- 201: ('Created', 'Document created, URL follows'),
- 202: ('Accepted',
- 'Request accepted, processing continues off-line'),
- 203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
- 204: ('No Content', 'Request fulfilled, nothing follows'),
- 205: ('Reset Content', 'Clear input form for further input.'),
- 206: ('Partial Content', 'Partial content follows.'),
- 300: ('Multiple Choices',
- 'Object has several resources -- see URI list'),
- 301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
- 302: ('Found', 'Object moved temporarily -- see URI list'),
- 303: ('See Other', 'Object moved -- see Method and URL list'),
- 304: ('Not Modified',
- 'Document has not changed since given time'),
- 305: ('Use Proxy',
- 'You must use proxy specified in Location to access this '
- 'resource.'),
- 307: ('Temporary Redirect',
- 'Object moved temporarily -- see URI list'),
- 400: ('Bad Request',
- 'Bad request syntax or unsupported method'),
- 401: ('Unauthorized',
- 'No permission -- see authorization schemes'),
- 402: ('Payment Required',
- 'No payment -- see charging schemes'),
- 403: ('Forbidden',
- 'Request forbidden -- authorization will not help'),
- 404: ('Not Found', 'Nothing matches the given URI'),
- 405: ('Method Not Allowed',
- 'Specified method is invalid for this resource.'),
- 406: ('Not Acceptable', 'URI not available in preferred format.'),
- 407: ('Proxy Authentication Required', 'You must authenticate with '
- 'this proxy before proceeding.'),
- 408: ('Request Timeout', 'Request timed out; try again later.'),
- 409: ('Conflict', 'Request conflict.'),
- 410: ('Gone',
- 'URI no longer exists and has been permanently removed.'),
- 411: ('Length Required', 'Client must specify Content-Length.'),
- 412: ('Precondition Failed', 'Precondition in headers is false.'),
- 413: ('Request Entity Too Large', 'Entity is too large.'),
- 414: ('Request-URI Too Long', 'URI is too long.'),
- 415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
- 416: ('Requested Range Not Satisfiable',
- 'Cannot satisfy request range.'),
- 417: ('Expectation Failed',
- 'Expect condition could not be satisfied.'),
- 428: ('Precondition Required',
- 'The origin server requires the request to be conditional.'),
- 429: ('Too Many Requests', 'The user has sent too many requests '
- 'in a given amount of time ("rate limiting").'),
- 431: ('Request Header Fields Too Large', 'The server is unwilling to '
- 'process the request because its header fields are too large.'),
- 500: ('Internal Server Error', 'Server got itself in trouble'),
- 501: ('Not Implemented',
- 'Server does not support this operation'),
- 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
- 503: ('Service Unavailable',
- 'The server cannot process the request due to a high load'),
- 504: ('Gateway Timeout',
- 'The gateway server did not receive a timely response'),
- 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
- 511: ('Network Authentication Required',
- 'The client needs to authenticate to gain network access.'),
- }
- class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
- """Simple HTTP request handler with GET and HEAD commands.
- This serves files from the current directory and any of its
- subdirectories. The MIME type for files is determined by
- calling the .guess_type() method.
- The GET and HEAD requests are identical except that the HEAD
- request omits the actual contents of the file.
- """
- server_version = "SimpleHTTP/" + __version__
- def do_GET(self):
- """Serve a GET request."""
- f = self.send_head()
- if f:
- self.copyfile(f, self.wfile)
- f.close()
- def do_HEAD(self):
- """Serve a HEAD request."""
- f = self.send_head()
- if f:
- f.close()
- def send_head(self):
- """Common code for GET and HEAD commands.
- This sends the response code and MIME headers.
- Return value is either a file object (which has to be copied
- to the outputfile by the caller unless the command was HEAD,
- and must be closed by the caller under all circumstances), or
- None, in which case the caller has nothing further to do.
- """
- path = self.translate_path(self.path)
- f = None
- if os.path.isdir(path):
- if not self.path.endswith('/'):
- # redirect browser - doing basically what apache does
- self.send_response(301)
- self.send_header("Location", self.path + "/")
- self.end_headers()
- return None
- for index in "index.html", "index.htm":
- index = os.path.join(path, index)
- if os.path.exists(index):
- path = index
- break
- else:
- return self.list_directory(path)
- ctype = self.guess_type(path)
- try:
- f = open(path, 'rb')
- except IOError:
- self.send_error(404, "File not found")
- return None
- self.send_response(200)
- self.send_header("Content-type", ctype)
- fs = os.fstat(f.fileno())
- self.send_header("Content-Length", str(fs[6]))
- self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
- self.end_headers()
- return f
- def list_directory(self, path):
- """Helper to produce a directory listing (absent index.html).
- Return value is either a file object, or None (indicating an
- error). In either case, the headers are sent, making the
- interface the same as for send_head().
- """
- try:
- list = os.listdir(path)
- except os.error:
- self.send_error(404, "No permission to list directory")
- return None
- list.sort(key=lambda a: a.lower())
- r = []
- displaypath = html.escape(urllib_parse.unquote(self.path))
- enc = sys.getfilesystemencoding()
- title = 'Directory listing for %s' % displaypath
- r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
- '"http://www.w3.org/TR/html4/strict.dtd">')
- r.append('<html>\n<head>')
- r.append('<meta http-equiv="Content-Type" '
- 'content="text/html; charset=%s">' % enc)
- r.append('<title>%s</title>\n</head>' % title)
- r.append('<body>\n<h1>%s</h1>' % title)
- r.append('<hr>\n<ul>')
- for name in list:
- fullname = os.path.join(path, name)
- displayname = linkname = name
- # Append / for directories or @ for symbolic links
- if os.path.isdir(fullname):
- displayname = name + "/"
- linkname = name + "/"
- if os.path.islink(fullname):
- displayname = name + "@"
- # Note: a link to a directory displays with @ and links with /
- r.append('<li><a href="%s">%s</a></li>'
- % (urllib_parse.quote(linkname), html.escape(displayname)))
- # # Use this instead:
- # r.append('<li><a href="%s">%s</a></li>'
- # % (urllib.quote(linkname), cgi.escape(displayname)))
- r.append('</ul>\n<hr>\n</body>\n</html>\n')
- encoded = '\n'.join(r).encode(enc)
- f = io.BytesIO()
- f.write(encoded)
- f.seek(0)
- self.send_response(200)
- self.send_header("Content-type", "text/html; charset=%s" % enc)
- self.send_header("Content-Length", str(len(encoded)))
- self.end_headers()
- return f
- def translate_path(self, path):
- """Translate a /-separated PATH to the local filename syntax.
- Components that mean special things to the local file system
- (e.g. drive or directory names) are ignored. (XXX They should
- probably be diagnosed.)
- """
- # abandon query parameters
- path = path.split('?',1)[0]
- path = path.split('#',1)[0]
- path = posixpath.normpath(urllib_parse.unquote(path))
- words = path.split('/')
- words = filter(None, words)
- path = os.getcwd()
- for word in words:
- drive, word = os.path.splitdrive(word)
- head, word = os.path.split(word)
- if word in (os.curdir, os.pardir): continue
- path = os.path.join(path, word)
- return path
- def copyfile(self, source, outputfile):
- """Copy all data between two file objects.
- The SOURCE argument is a file object open for reading
- (or anything with a read() method) and the DESTINATION
- argument is a file object open for writing (or
- anything with a write() method).
- The only reason for overriding this would be to change
- the block size or perhaps to replace newlines by CRLF
- -- note however that this the default server uses this
- to copy binary data as well.
- """
- shutil.copyfileobj(source, outputfile)
- def guess_type(self, path):
- """Guess the type of a file.
- Argument is a PATH (a filename).
- Return value is a string of the form type/subtype,
- usable for a MIME Content-type header.
- The default implementation looks the file's extension
- up in the table self.extensions_map, using application/octet-stream
- as a default; however it would be permissible (if
- slow) to look inside the data to make a better guess.
- """
- base, ext = posixpath.splitext(path)
- if ext in self.extensions_map:
- return self.extensions_map[ext]
- ext = ext.lower()
- if ext in self.extensions_map:
- return self.extensions_map[ext]
- else:
- return self.extensions_map['']
- if not mimetypes.inited:
- mimetypes.init() # try to read system mime.types
- extensions_map = mimetypes.types_map.copy()
- extensions_map.update({
- '': 'application/octet-stream', # Default
- '.py': 'text/plain',
- '.c': 'text/plain',
- '.h': 'text/plain',
- })
- # Utilities for CGIHTTPRequestHandler
- def _url_collapse_path(path):
- """
- Given a URL path, remove extra '/'s and '.' path elements and collapse
- any '..' references and returns a colllapsed path.
- Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
- The utility of this function is limited to is_cgi method and helps
- preventing some security attacks.
- Returns: A tuple of (head, tail) where tail is everything after the final /
- and head is everything before it. Head will always start with a '/' and,
- if it contains anything else, never have a trailing '/'.
- Raises: IndexError if too many '..' occur within the path.
- """
- # Similar to os.path.split(os.path.normpath(path)) but specific to URL
- # path semantics rather than local operating system semantics.
- path_parts = path.split('/')
- head_parts = []
- for part in path_parts[:-1]:
- if part == '..':
- head_parts.pop() # IndexError if more '..' than prior parts
- elif part and part != '.':
- head_parts.append( part )
- if path_parts:
- tail_part = path_parts.pop()
- if tail_part:
- if tail_part == '..':
- head_parts.pop()
- tail_part = ''
- elif tail_part == '.':
- tail_part = ''
- else:
- tail_part = ''
- splitpath = ('/' + '/'.join(head_parts), tail_part)
- collapsed_path = "/".join(splitpath)
- return collapsed_path
- nobody = None
- def nobody_uid():
- """Internal routine to get nobody's uid"""
- global nobody
- if nobody:
- return nobody
- try:
- import pwd
- except ImportError:
- return -1
- try:
- nobody = pwd.getpwnam('nobody')[2]
- except KeyError:
- nobody = 1 + max(x[2] for x in pwd.getpwall())
- return nobody
- def executable(path):
- """Test for executable file."""
- return os.access(path, os.X_OK)
- class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
- """Complete HTTP server with GET, HEAD and POST commands.
- GET and HEAD also support running CGI scripts.
- The POST command is *only* implemented for CGI scripts.
- """
- # Determine platform specifics
- have_fork = hasattr(os, 'fork')
- # Make rfile unbuffered -- we need to read one line and then pass
- # the rest to a subprocess, so we can't use buffered input.
- rbufsize = 0
- def do_POST(self):
- """Serve a POST request.
- This is only implemented for CGI scripts.
- """
- if self.is_cgi():
- self.run_cgi()
- else:
- self.send_error(501, "Can only POST to CGI scripts")
- def send_head(self):
- """Version of send_head that support CGI scripts"""
- if self.is_cgi():
- return self.run_cgi()
- else:
- return SimpleHTTPRequestHandler.send_head(self)
- def is_cgi(self):
- """Test whether self.path corresponds to a CGI script.
- Returns True and updates the cgi_info attribute to the tuple
- (dir, rest) if self.path requires running a CGI script.
- Returns False otherwise.
- If any exception is raised, the caller should assume that
- self.path was rejected as invalid and act accordingly.
- The default implementation tests whether the normalized url
- path begins with one of the strings in self.cgi_directories
- (and the next character is a '/' or the end of the string).
- """
- collapsed_path = _url_collapse_path(self.path)
- dir_sep = collapsed_path.find('/', 1)
- head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
- if head in self.cgi_directories:
- self.cgi_info = head, tail
- return True
- return False
- cgi_directories = ['/cgi-bin', '/htbin']
- def is_executable(self, path):
- """Test whether argument path is an executable file."""
- return executable(path)
- def is_python(self, path):
- """Test whether argument path is a Python script."""
- head, tail = os.path.splitext(path)
- return tail.lower() in (".py", ".pyw")
- def run_cgi(self):
- """Execute a CGI script."""
- path = self.path
- dir, rest = self.cgi_info
- i = path.find('/', len(dir) + 1)
- while i >= 0:
- nextdir = path[:i]
- nextrest = path[i+1:]
- scriptdir = self.translate_path(nextdir)
- if os.path.isdir(scriptdir):
- dir, rest = nextdir, nextrest
- i = path.find('/', len(dir) + 1)
- else:
- break
- # find an explicit query string, if present.
- i = rest.rfind('?')
- if i >= 0:
- rest, query = rest[:i], rest[i+1:]
- else:
- query = ''
- # dissect the part after the directory name into a script name &
- # a possible additional path, to be stored in PATH_INFO.
- i = rest.find('/')
- if i >= 0:
- script, rest = rest[:i], rest[i:]
- else:
- script, rest = rest, ''
- scriptname = dir + '/' + script
- scriptfile = self.translate_path(scriptname)
- if not os.path.exists(scriptfile):
- self.send_error(404, "No such CGI script (%r)" % scriptname)
- return
- if not os.path.isfile(scriptfile):
- self.send_error(403, "CGI script is not a plain file (%r)" %
- scriptname)
- return
- ispy = self.is_python(scriptname)
- if self.have_fork or not ispy:
- if not self.is_executable(scriptfile):
- self.send_error(403, "CGI script is not executable (%r)" %
- scriptname)
- return
- # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
- # XXX Much of the following could be prepared ahead of time!
- env = copy.deepcopy(os.environ)
- env['SERVER_SOFTWARE'] = self.version_string()
- env['SERVER_NAME'] = self.server.server_name
- env['GATEWAY_INTERFACE'] = 'CGI/1.1'
- env['SERVER_PROTOCOL'] = self.protocol_version
- env['SERVER_PORT'] = str(self.server.server_port)
- env['REQUEST_METHOD'] = self.command
- uqrest = urllib_parse.unquote(rest)
- env['PATH_INFO'] = uqrest
- env['PATH_TRANSLATED'] = self.translate_path(uqrest)
- env['SCRIPT_NAME'] = scriptname
- if query:
- env['QUERY_STRING'] = query
- env['REMOTE_ADDR'] = self.client_address[0]
- authorization = self.headers.get("authorization")
- if authorization:
- authorization = authorization.split()
- if len(authorization) == 2:
- import base64, binascii
- env['AUTH_TYPE'] = authorization[0]
- if authorization[0].lower() == "basic":
- try:
- authorization = authorization[1].encode('ascii')
- if utils.PY3:
- # In Py3.3, was:
- authorization = base64.decodebytes(authorization).\
- decode('ascii')
- else:
- # Backport to Py2.7:
- authorization = base64.decodestring(authorization).\
- decode('ascii')
- except (binascii.Error, UnicodeError):
- pass
- else:
- authorization = authorization.split(':')
- if len(authorization) == 2:
- env['REMOTE_USER'] = authorization[0]
- # XXX REMOTE_IDENT
- if self.headers.get('content-type') is None:
- env['CONTENT_TYPE'] = self.headers.get_content_type()
- else:
- env['CONTENT_TYPE'] = self.headers['content-type']
- length = self.headers.get('content-length')
- if length:
- env['CONTENT_LENGTH'] = length
- referer = self.headers.get('referer')
- if referer:
- env['HTTP_REFERER'] = referer
- accept = []
- for line in self.headers.getallmatchingheaders('accept'):
- if line[:1] in "\t\n\r ":
- accept.append(line.strip())
- else:
- accept = accept + line[7:].split(',')
- env['HTTP_ACCEPT'] = ','.join(accept)
- ua = self.headers.get('user-agent')
- if ua:
- env['HTTP_USER_AGENT'] = ua
- co = filter(None, self.headers.get_all('cookie', []))
- cookie_str = ', '.join(co)
- if cookie_str:
- env['HTTP_COOKIE'] = cookie_str
- # XXX Other HTTP_* headers
- # Since we're setting the env in the parent, provide empty
- # values to override previously set values
- for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
- 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
- env.setdefault(k, "")
- self.send_response(200, "Script output follows")
- self.flush_headers()
- decoded_query = query.replace('+', ' ')
- if self.have_fork:
- # Unix -- fork as we should
- args = [script]
- if '=' not in decoded_query:
- args.append(decoded_query)
- nobody = nobody_uid()
- self.wfile.flush() # Always flush before forking
- pid = os.fork()
- if pid != 0:
- # Parent
- pid, sts = os.waitpid(pid, 0)
- # throw away additional data [see bug #427345]
- while select.select([self.rfile], [], [], 0)[0]:
- if not self.rfile.read(1):
- break
- if sts:
- self.log_error("CGI script exit status %#x", sts)
- return
- # Child
- try:
- try:
- os.setuid(nobody)
- except os.error:
- pass
- os.dup2(self.rfile.fileno(), 0)
- os.dup2(self.wfile.fileno(), 1)
- os.execve(scriptfile, args, env)
- except:
- self.server.handle_error(self.request, self.client_address)
- os._exit(127)
- else:
- # Non-Unix -- use subprocess
- import subprocess
- cmdline = [scriptfile]
- if self.is_python(scriptfile):
- interp = sys.executable
- if interp.lower().endswith("w.exe"):
- # On Windows, use python.exe, not pythonw.exe
- interp = interp[:-5] + interp[-4:]
- cmdline = [interp, '-u'] + cmdline
- if '=' not in query:
- cmdline.append(query)
- self.log_message("command: %s", subprocess.list2cmdline(cmdline))
- try:
- nbytes = int(length)
- except (TypeError, ValueError):
- nbytes = 0
- p = subprocess.Popen(cmdline,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env = env
- )
- if self.command.lower() == "post" and nbytes > 0:
- data = self.rfile.read(nbytes)
- else:
- data = None
- # throw away additional data [see bug #427345]
- while select.select([self.rfile._sock], [], [], 0)[0]:
- if not self.rfile._sock.recv(1):
- break
- stdout, stderr = p.communicate(data)
- self.wfile.write(stdout)
- if stderr:
- self.log_error('%s', stderr)
- p.stderr.close()
- p.stdout.close()
- status = p.returncode
- if status:
- self.log_error("CGI script exit status %#x", status)
- else:
- self.log_message("CGI script exited OK")
- def test(HandlerClass = BaseHTTPRequestHandler,
- ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000):
- """Test the HTTP request handler class.
- This runs an HTTP server on port 8000 (or the first command line
- argument).
- """
- server_address = ('', port)
- HandlerClass.protocol_version = protocol
- httpd = ServerClass(server_address, HandlerClass)
- sa = httpd.socket.getsockname()
- print("Serving HTTP on", sa[0], "port", sa[1], "...")
- try:
- httpd.serve_forever()
- except KeyboardInterrupt:
- print("\nKeyboard interrupt received, exiting.")
- httpd.server_close()
- sys.exit(0)
- if __name__ == '__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument('--cgi', action='store_true',
- help='Run as CGI Server')
- parser.add_argument('port', action='store',
- default=8000, type=int,
- nargs='?',
- help='Specify alternate port [default: 8000]')
- args = parser.parse_args()
- if args.cgi:
- test(HandlerClass=CGIHTTPRequestHandler, port=args.port)
- else:
- test(HandlerClass=SimpleHTTPRequestHandler, port=args.port)
|