client.py 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346
  1. """HTTP/1.1 client library
  2. A backport of the Python 3.3 http/client.py module for python-future.
  3. <intro stuff goes here>
  4. <other stuff, too>
  5. HTTPConnection goes through a number of "states", which define when a client
  6. may legally make another request or fetch the response for a particular
  7. request. This diagram details these state transitions:
  8. (null)
  9. |
  10. | HTTPConnection()
  11. v
  12. Idle
  13. |
  14. | putrequest()
  15. v
  16. Request-started
  17. |
  18. | ( putheader() )* endheaders()
  19. v
  20. Request-sent
  21. |
  22. | response = getresponse()
  23. v
  24. Unread-response [Response-headers-read]
  25. |\____________________
  26. | |
  27. | response.read() | putrequest()
  28. v v
  29. Idle Req-started-unread-response
  30. ______/|
  31. / |
  32. response.read() | | ( putheader() )* endheaders()
  33. v v
  34. Request-started Req-sent-unread-response
  35. |
  36. | response.read()
  37. v
  38. Request-sent
  39. This diagram presents the following rules:
  40. -- a second request may not be started until {response-headers-read}
  41. -- a response [object] cannot be retrieved until {request-sent}
  42. -- there is no differentiation between an unread response body and a
  43. partially read response body
  44. Note: this enforcement is applied by the HTTPConnection class. The
  45. HTTPResponse class does not enforce this state machine, which
  46. implies sophisticated clients may accelerate the request/response
  47. pipeline. Caution should be taken, though: accelerating the states
  48. beyond the above pattern may imply knowledge of the server's
  49. connection-close behavior for certain requests. For example, it
  50. is impossible to tell whether the server will close the connection
  51. UNTIL the response headers have been read; this means that further
  52. requests cannot be placed into the pipeline until it is known that
  53. the server will NOT be closing the connection.
  54. Logical State __state __response
  55. ------------- ------- ----------
  56. Idle _CS_IDLE None
  57. Request-started _CS_REQ_STARTED None
  58. Request-sent _CS_REQ_SENT None
  59. Unread-response _CS_IDLE <response_class>
  60. Req-started-unread-response _CS_REQ_STARTED <response_class>
  61. Req-sent-unread-response _CS_REQ_SENT <response_class>
  62. """
  63. from __future__ import (absolute_import, division,
  64. print_function, unicode_literals)
  65. from future.builtins import bytes, int, str, super
  66. from future.utils import PY2
  67. from future.backports.email import parser as email_parser
  68. from future.backports.email import message as email_message
  69. from future.backports.misc import create_connection as socket_create_connection
  70. import io
  71. import os
  72. import socket
  73. from future.backports.urllib.parse import urlsplit
  74. import warnings
  75. from array import array
  76. if PY2:
  77. from collections import Iterable
  78. else:
  79. from collections.abc import Iterable
  80. __all__ = ["HTTPResponse", "HTTPConnection",
  81. "HTTPException", "NotConnected", "UnknownProtocol",
  82. "UnknownTransferEncoding", "UnimplementedFileMode",
  83. "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  84. "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  85. "BadStatusLine", "error", "responses"]
  86. HTTP_PORT = 80
  87. HTTPS_PORT = 443
  88. _UNKNOWN = 'UNKNOWN'
  89. # connection states
  90. _CS_IDLE = 'Idle'
  91. _CS_REQ_STARTED = 'Request-started'
  92. _CS_REQ_SENT = 'Request-sent'
  93. # status codes
  94. # informational
  95. CONTINUE = 100
  96. SWITCHING_PROTOCOLS = 101
  97. PROCESSING = 102
  98. # successful
  99. OK = 200
  100. CREATED = 201
  101. ACCEPTED = 202
  102. NON_AUTHORITATIVE_INFORMATION = 203
  103. NO_CONTENT = 204
  104. RESET_CONTENT = 205
  105. PARTIAL_CONTENT = 206
  106. MULTI_STATUS = 207
  107. IM_USED = 226
  108. # redirection
  109. MULTIPLE_CHOICES = 300
  110. MOVED_PERMANENTLY = 301
  111. FOUND = 302
  112. SEE_OTHER = 303
  113. NOT_MODIFIED = 304
  114. USE_PROXY = 305
  115. TEMPORARY_REDIRECT = 307
  116. # client error
  117. BAD_REQUEST = 400
  118. UNAUTHORIZED = 401
  119. PAYMENT_REQUIRED = 402
  120. FORBIDDEN = 403
  121. NOT_FOUND = 404
  122. METHOD_NOT_ALLOWED = 405
  123. NOT_ACCEPTABLE = 406
  124. PROXY_AUTHENTICATION_REQUIRED = 407
  125. REQUEST_TIMEOUT = 408
  126. CONFLICT = 409
  127. GONE = 410
  128. LENGTH_REQUIRED = 411
  129. PRECONDITION_FAILED = 412
  130. REQUEST_ENTITY_TOO_LARGE = 413
  131. REQUEST_URI_TOO_LONG = 414
  132. UNSUPPORTED_MEDIA_TYPE = 415
  133. REQUESTED_RANGE_NOT_SATISFIABLE = 416
  134. EXPECTATION_FAILED = 417
  135. UNPROCESSABLE_ENTITY = 422
  136. LOCKED = 423
  137. FAILED_DEPENDENCY = 424
  138. UPGRADE_REQUIRED = 426
  139. PRECONDITION_REQUIRED = 428
  140. TOO_MANY_REQUESTS = 429
  141. REQUEST_HEADER_FIELDS_TOO_LARGE = 431
  142. # server error
  143. INTERNAL_SERVER_ERROR = 500
  144. NOT_IMPLEMENTED = 501
  145. BAD_GATEWAY = 502
  146. SERVICE_UNAVAILABLE = 503
  147. GATEWAY_TIMEOUT = 504
  148. HTTP_VERSION_NOT_SUPPORTED = 505
  149. INSUFFICIENT_STORAGE = 507
  150. NOT_EXTENDED = 510
  151. NETWORK_AUTHENTICATION_REQUIRED = 511
  152. # Mapping status codes to official W3C names
  153. responses = {
  154. 100: 'Continue',
  155. 101: 'Switching Protocols',
  156. 200: 'OK',
  157. 201: 'Created',
  158. 202: 'Accepted',
  159. 203: 'Non-Authoritative Information',
  160. 204: 'No Content',
  161. 205: 'Reset Content',
  162. 206: 'Partial Content',
  163. 300: 'Multiple Choices',
  164. 301: 'Moved Permanently',
  165. 302: 'Found',
  166. 303: 'See Other',
  167. 304: 'Not Modified',
  168. 305: 'Use Proxy',
  169. 306: '(Unused)',
  170. 307: 'Temporary Redirect',
  171. 400: 'Bad Request',
  172. 401: 'Unauthorized',
  173. 402: 'Payment Required',
  174. 403: 'Forbidden',
  175. 404: 'Not Found',
  176. 405: 'Method Not Allowed',
  177. 406: 'Not Acceptable',
  178. 407: 'Proxy Authentication Required',
  179. 408: 'Request Timeout',
  180. 409: 'Conflict',
  181. 410: 'Gone',
  182. 411: 'Length Required',
  183. 412: 'Precondition Failed',
  184. 413: 'Request Entity Too Large',
  185. 414: 'Request-URI Too Long',
  186. 415: 'Unsupported Media Type',
  187. 416: 'Requested Range Not Satisfiable',
  188. 417: 'Expectation Failed',
  189. 428: 'Precondition Required',
  190. 429: 'Too Many Requests',
  191. 431: 'Request Header Fields Too Large',
  192. 500: 'Internal Server Error',
  193. 501: 'Not Implemented',
  194. 502: 'Bad Gateway',
  195. 503: 'Service Unavailable',
  196. 504: 'Gateway Timeout',
  197. 505: 'HTTP Version Not Supported',
  198. 511: 'Network Authentication Required',
  199. }
  200. # maximal amount of data to read at one time in _safe_read
  201. MAXAMOUNT = 1048576
  202. # maximal line length when calling readline().
  203. _MAXLINE = 65536
  204. _MAXHEADERS = 100
  205. class HTTPMessage(email_message.Message):
  206. # XXX The only usage of this method is in
  207. # http.server.CGIHTTPRequestHandler. Maybe move the code there so
  208. # that it doesn't need to be part of the public API. The API has
  209. # never been defined so this could cause backwards compatibility
  210. # issues.
  211. def getallmatchingheaders(self, name):
  212. """Find all header lines matching a given header name.
  213. Look through the list of headers and find all lines matching a given
  214. header name (and their continuation lines). A list of the lines is
  215. returned, without interpretation. If the header does not occur, an
  216. empty list is returned. If the header occurs multiple times, all
  217. occurrences are returned. Case is not important in the header name.
  218. """
  219. name = name.lower() + ':'
  220. n = len(name)
  221. lst = []
  222. hit = 0
  223. for line in self.keys():
  224. if line[:n].lower() == name:
  225. hit = 1
  226. elif not line[:1].isspace():
  227. hit = 0
  228. if hit:
  229. lst.append(line)
  230. return lst
  231. def parse_headers(fp, _class=HTTPMessage):
  232. """Parses only RFC2822 headers from a file pointer.
  233. email Parser wants to see strings rather than bytes.
  234. But a TextIOWrapper around self.rfile would buffer too many bytes
  235. from the stream, bytes which we later need to read as bytes.
  236. So we read the correct bytes here, as bytes, for email Parser
  237. to parse.
  238. """
  239. headers = []
  240. while True:
  241. line = fp.readline(_MAXLINE + 1)
  242. if len(line) > _MAXLINE:
  243. raise LineTooLong("header line")
  244. headers.append(line)
  245. if len(headers) > _MAXHEADERS:
  246. raise HTTPException("got more than %d headers" % _MAXHEADERS)
  247. if line in (b'\r\n', b'\n', b''):
  248. break
  249. hstring = bytes(b'').join(headers).decode('iso-8859-1')
  250. return email_parser.Parser(_class=_class).parsestr(hstring)
  251. _strict_sentinel = object()
  252. class HTTPResponse(io.RawIOBase):
  253. # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
  254. # The bytes from the socket object are iso-8859-1 strings.
  255. # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
  256. # text following RFC 2047. The basic status line parsing only
  257. # accepts iso-8859-1.
  258. def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None):
  259. # If the response includes a content-length header, we need to
  260. # make sure that the client doesn't read more than the
  261. # specified number of bytes. If it does, it will block until
  262. # the server times out and closes the connection. This will
  263. # happen if a self.fp.read() is done (without a size) whether
  264. # self.fp is buffered or not. So, no self.fp.read() by
  265. # clients unless they know what they are doing.
  266. self.fp = sock.makefile("rb")
  267. self.debuglevel = debuglevel
  268. if strict is not _strict_sentinel:
  269. warnings.warn("the 'strict' argument isn't supported anymore; "
  270. "http.client now always assumes HTTP/1.x compliant servers.",
  271. DeprecationWarning, 2)
  272. self._method = method
  273. # The HTTPResponse object is returned via urllib. The clients
  274. # of http and urllib expect different attributes for the
  275. # headers. headers is used here and supports urllib. msg is
  276. # provided as a backwards compatibility layer for http
  277. # clients.
  278. self.headers = self.msg = None
  279. # from the Status-Line of the response
  280. self.version = _UNKNOWN # HTTP-Version
  281. self.status = _UNKNOWN # Status-Code
  282. self.reason = _UNKNOWN # Reason-Phrase
  283. self.chunked = _UNKNOWN # is "chunked" being used?
  284. self.chunk_left = _UNKNOWN # bytes left to read in current chunk
  285. self.length = _UNKNOWN # number of bytes left in response
  286. self.will_close = _UNKNOWN # conn will close at end of response
  287. def _read_status(self):
  288. line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  289. if len(line) > _MAXLINE:
  290. raise LineTooLong("status line")
  291. if self.debuglevel > 0:
  292. print("reply:", repr(line))
  293. if not line:
  294. # Presumably, the server closed the connection before
  295. # sending a valid response.
  296. raise BadStatusLine(line)
  297. try:
  298. version, status, reason = line.split(None, 2)
  299. except ValueError:
  300. try:
  301. version, status = line.split(None, 1)
  302. reason = ""
  303. except ValueError:
  304. # empty version will cause next test to fail.
  305. version = ""
  306. if not version.startswith("HTTP/"):
  307. self._close_conn()
  308. raise BadStatusLine(line)
  309. # The status code is a three-digit number
  310. try:
  311. status = int(status)
  312. if status < 100 or status > 999:
  313. raise BadStatusLine(line)
  314. except ValueError:
  315. raise BadStatusLine(line)
  316. return version, status, reason
  317. def begin(self):
  318. if self.headers is not None:
  319. # we've already started reading the response
  320. return
  321. # read until we get a non-100 response
  322. while True:
  323. version, status, reason = self._read_status()
  324. if status != CONTINUE:
  325. break
  326. # skip the header from the 100 response
  327. while True:
  328. skip = self.fp.readline(_MAXLINE + 1)
  329. if len(skip) > _MAXLINE:
  330. raise LineTooLong("header line")
  331. skip = skip.strip()
  332. if not skip:
  333. break
  334. if self.debuglevel > 0:
  335. print("header:", skip)
  336. self.code = self.status = status
  337. self.reason = reason.strip()
  338. if version in ("HTTP/1.0", "HTTP/0.9"):
  339. # Some servers might still return "0.9", treat it as 1.0 anyway
  340. self.version = 10
  341. elif version.startswith("HTTP/1."):
  342. self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
  343. else:
  344. raise UnknownProtocol(version)
  345. self.headers = self.msg = parse_headers(self.fp)
  346. if self.debuglevel > 0:
  347. for hdr in self.headers:
  348. print("header:", hdr, end=" ")
  349. # are we using the chunked-style of transfer encoding?
  350. tr_enc = self.headers.get("transfer-encoding")
  351. if tr_enc and tr_enc.lower() == "chunked":
  352. self.chunked = True
  353. self.chunk_left = None
  354. else:
  355. self.chunked = False
  356. # will the connection close at the end of the response?
  357. self.will_close = self._check_close()
  358. # do we have a Content-Length?
  359. # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
  360. self.length = None
  361. length = self.headers.get("content-length")
  362. # are we using the chunked-style of transfer encoding?
  363. tr_enc = self.headers.get("transfer-encoding")
  364. if length and not self.chunked:
  365. try:
  366. self.length = int(length)
  367. except ValueError:
  368. self.length = None
  369. else:
  370. if self.length < 0: # ignore nonsensical negative lengths
  371. self.length = None
  372. else:
  373. self.length = None
  374. # does the body have a fixed length? (of zero)
  375. if (status == NO_CONTENT or status == NOT_MODIFIED or
  376. 100 <= status < 200 or # 1xx codes
  377. self._method == "HEAD"):
  378. self.length = 0
  379. # if the connection remains open, and we aren't using chunked, and
  380. # a content-length was not provided, then assume that the connection
  381. # WILL close.
  382. if (not self.will_close and
  383. not self.chunked and
  384. self.length is None):
  385. self.will_close = True
  386. def _check_close(self):
  387. conn = self.headers.get("connection")
  388. if self.version == 11:
  389. # An HTTP/1.1 proxy is assumed to stay open unless
  390. # explicitly closed.
  391. conn = self.headers.get("connection")
  392. if conn and "close" in conn.lower():
  393. return True
  394. return False
  395. # Some HTTP/1.0 implementations have support for persistent
  396. # connections, using rules different than HTTP/1.1.
  397. # For older HTTP, Keep-Alive indicates persistent connection.
  398. if self.headers.get("keep-alive"):
  399. return False
  400. # At least Akamai returns a "Connection: Keep-Alive" header,
  401. # which was supposed to be sent by the client.
  402. if conn and "keep-alive" in conn.lower():
  403. return False
  404. # Proxy-Connection is a netscape hack.
  405. pconn = self.headers.get("proxy-connection")
  406. if pconn and "keep-alive" in pconn.lower():
  407. return False
  408. # otherwise, assume it will close
  409. return True
  410. def _close_conn(self):
  411. fp = self.fp
  412. self.fp = None
  413. fp.close()
  414. def close(self):
  415. super().close() # set "closed" flag
  416. if self.fp:
  417. self._close_conn()
  418. # These implementations are for the benefit of io.BufferedReader.
  419. # XXX This class should probably be revised to act more like
  420. # the "raw stream" that BufferedReader expects.
  421. def flush(self):
  422. super().flush()
  423. if self.fp:
  424. self.fp.flush()
  425. def readable(self):
  426. return True
  427. # End of "raw stream" methods
  428. def isclosed(self):
  429. """True if the connection is closed."""
  430. # NOTE: it is possible that we will not ever call self.close(). This
  431. # case occurs when will_close is TRUE, length is None, and we
  432. # read up to the last byte, but NOT past it.
  433. #
  434. # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
  435. # called, meaning self.isclosed() is meaningful.
  436. return self.fp is None
  437. def read(self, amt=None):
  438. if self.fp is None:
  439. return bytes(b"")
  440. if self._method == "HEAD":
  441. self._close_conn()
  442. return bytes(b"")
  443. if amt is not None:
  444. # Amount is given, so call base class version
  445. # (which is implemented in terms of self.readinto)
  446. return bytes(super(HTTPResponse, self).read(amt))
  447. else:
  448. # Amount is not given (unbounded read) so we must check self.length
  449. # and self.chunked
  450. if self.chunked:
  451. return self._readall_chunked()
  452. if self.length is None:
  453. s = self.fp.read()
  454. else:
  455. try:
  456. s = self._safe_read(self.length)
  457. except IncompleteRead:
  458. self._close_conn()
  459. raise
  460. self.length = 0
  461. self._close_conn() # we read everything
  462. return bytes(s)
  463. def readinto(self, b):
  464. if self.fp is None:
  465. return 0
  466. if self._method == "HEAD":
  467. self._close_conn()
  468. return 0
  469. if self.chunked:
  470. return self._readinto_chunked(b)
  471. if self.length is not None:
  472. if len(b) > self.length:
  473. # clip the read to the "end of response"
  474. b = memoryview(b)[0:self.length]
  475. # we do not use _safe_read() here because this may be a .will_close
  476. # connection, and the user is reading more bytes than will be provided
  477. # (for example, reading in 1k chunks)
  478. if PY2:
  479. data = self.fp.read(len(b))
  480. n = len(data)
  481. b[:n] = data
  482. else:
  483. n = self.fp.readinto(b)
  484. if not n and b:
  485. # Ideally, we would raise IncompleteRead if the content-length
  486. # wasn't satisfied, but it might break compatibility.
  487. self._close_conn()
  488. elif self.length is not None:
  489. self.length -= n
  490. if not self.length:
  491. self._close_conn()
  492. return n
  493. def _read_next_chunk_size(self):
  494. # Read the next chunk size from the file
  495. line = self.fp.readline(_MAXLINE + 1)
  496. if len(line) > _MAXLINE:
  497. raise LineTooLong("chunk size")
  498. i = line.find(b";")
  499. if i >= 0:
  500. line = line[:i] # strip chunk-extensions
  501. try:
  502. return int(line, 16)
  503. except ValueError:
  504. # close the connection as protocol synchronisation is
  505. # probably lost
  506. self._close_conn()
  507. raise
  508. def _read_and_discard_trailer(self):
  509. # read and discard trailer up to the CRLF terminator
  510. ### note: we shouldn't have any trailers!
  511. while True:
  512. line = self.fp.readline(_MAXLINE + 1)
  513. if len(line) > _MAXLINE:
  514. raise LineTooLong("trailer line")
  515. if not line:
  516. # a vanishingly small number of sites EOF without
  517. # sending the trailer
  518. break
  519. if line in (b'\r\n', b'\n', b''):
  520. break
  521. def _readall_chunked(self):
  522. assert self.chunked != _UNKNOWN
  523. chunk_left = self.chunk_left
  524. value = []
  525. while True:
  526. if chunk_left is None:
  527. try:
  528. chunk_left = self._read_next_chunk_size()
  529. if chunk_left == 0:
  530. break
  531. except ValueError:
  532. raise IncompleteRead(bytes(b'').join(value))
  533. value.append(self._safe_read(chunk_left))
  534. # we read the whole chunk, get another
  535. self._safe_read(2) # toss the CRLF at the end of the chunk
  536. chunk_left = None
  537. self._read_and_discard_trailer()
  538. # we read everything; close the "file"
  539. self._close_conn()
  540. return bytes(b'').join(value)
  541. def _readinto_chunked(self, b):
  542. assert self.chunked != _UNKNOWN
  543. chunk_left = self.chunk_left
  544. total_bytes = 0
  545. mvb = memoryview(b)
  546. while True:
  547. if chunk_left is None:
  548. try:
  549. chunk_left = self._read_next_chunk_size()
  550. if chunk_left == 0:
  551. break
  552. except ValueError:
  553. raise IncompleteRead(bytes(b[0:total_bytes]))
  554. if len(mvb) < chunk_left:
  555. n = self._safe_readinto(mvb)
  556. self.chunk_left = chunk_left - n
  557. return total_bytes + n
  558. elif len(mvb) == chunk_left:
  559. n = self._safe_readinto(mvb)
  560. self._safe_read(2) # toss the CRLF at the end of the chunk
  561. self.chunk_left = None
  562. return total_bytes + n
  563. else:
  564. temp_mvb = mvb[0:chunk_left]
  565. n = self._safe_readinto(temp_mvb)
  566. mvb = mvb[n:]
  567. total_bytes += n
  568. # we read the whole chunk, get another
  569. self._safe_read(2) # toss the CRLF at the end of the chunk
  570. chunk_left = None
  571. self._read_and_discard_trailer()
  572. # we read everything; close the "file"
  573. self._close_conn()
  574. return total_bytes
  575. def _safe_read(self, amt):
  576. """Read the number of bytes requested, compensating for partial reads.
  577. Normally, we have a blocking socket, but a read() can be interrupted
  578. by a signal (resulting in a partial read).
  579. Note that we cannot distinguish between EOF and an interrupt when zero
  580. bytes have been read. IncompleteRead() will be raised in this
  581. situation.
  582. This function should be used when <amt> bytes "should" be present for
  583. reading. If the bytes are truly not available (due to EOF), then the
  584. IncompleteRead exception can be used to detect the problem.
  585. """
  586. s = []
  587. while amt > 0:
  588. chunk = self.fp.read(min(amt, MAXAMOUNT))
  589. if not chunk:
  590. raise IncompleteRead(bytes(b'').join(s), amt)
  591. s.append(chunk)
  592. amt -= len(chunk)
  593. return bytes(b"").join(s)
  594. def _safe_readinto(self, b):
  595. """Same as _safe_read, but for reading into a buffer."""
  596. total_bytes = 0
  597. mvb = memoryview(b)
  598. while total_bytes < len(b):
  599. if MAXAMOUNT < len(mvb):
  600. temp_mvb = mvb[0:MAXAMOUNT]
  601. if PY2:
  602. data = self.fp.read(len(temp_mvb))
  603. n = len(data)
  604. temp_mvb[:n] = data
  605. else:
  606. n = self.fp.readinto(temp_mvb)
  607. else:
  608. if PY2:
  609. data = self.fp.read(len(mvb))
  610. n = len(data)
  611. mvb[:n] = data
  612. else:
  613. n = self.fp.readinto(mvb)
  614. if not n:
  615. raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
  616. mvb = mvb[n:]
  617. total_bytes += n
  618. return total_bytes
  619. def fileno(self):
  620. return self.fp.fileno()
  621. def getheader(self, name, default=None):
  622. if self.headers is None:
  623. raise ResponseNotReady()
  624. headers = self.headers.get_all(name) or default
  625. if isinstance(headers, str) or not hasattr(headers, '__iter__'):
  626. return headers
  627. else:
  628. return ', '.join(headers)
  629. def getheaders(self):
  630. """Return list of (header, value) tuples."""
  631. if self.headers is None:
  632. raise ResponseNotReady()
  633. return list(self.headers.items())
  634. # We override IOBase.__iter__ so that it doesn't check for closed-ness
  635. def __iter__(self):
  636. return self
  637. # For compatibility with old-style urllib responses.
  638. def info(self):
  639. return self.headers
  640. def geturl(self):
  641. return self.url
  642. def getcode(self):
  643. return self.status
  644. class HTTPConnection(object):
  645. _http_vsn = 11
  646. _http_vsn_str = 'HTTP/1.1'
  647. response_class = HTTPResponse
  648. default_port = HTTP_PORT
  649. auto_open = 1
  650. debuglevel = 0
  651. def __init__(self, host, port=None, strict=_strict_sentinel,
  652. timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
  653. if strict is not _strict_sentinel:
  654. warnings.warn("the 'strict' argument isn't supported anymore; "
  655. "http.client now always assumes HTTP/1.x compliant servers.",
  656. DeprecationWarning, 2)
  657. self.timeout = timeout
  658. self.source_address = source_address
  659. self.sock = None
  660. self._buffer = []
  661. self.__response = None
  662. self.__state = _CS_IDLE
  663. self._method = None
  664. self._tunnel_host = None
  665. self._tunnel_port = None
  666. self._tunnel_headers = {}
  667. self._set_hostport(host, port)
  668. def set_tunnel(self, host, port=None, headers=None):
  669. """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
  670. The headers argument should be a mapping of extra HTTP headers
  671. to send with the CONNECT request.
  672. """
  673. self._tunnel_host = host
  674. self._tunnel_port = port
  675. if headers:
  676. self._tunnel_headers = headers
  677. else:
  678. self._tunnel_headers.clear()
  679. def _set_hostport(self, host, port):
  680. if port is None:
  681. i = host.rfind(':')
  682. j = host.rfind(']') # ipv6 addresses have [...]
  683. if i > j:
  684. try:
  685. port = int(host[i+1:])
  686. except ValueError:
  687. if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
  688. port = self.default_port
  689. else:
  690. raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
  691. host = host[:i]
  692. else:
  693. port = self.default_port
  694. if host and host[0] == '[' and host[-1] == ']':
  695. host = host[1:-1]
  696. self.host = host
  697. self.port = port
  698. def set_debuglevel(self, level):
  699. self.debuglevel = level
  700. def _tunnel(self):
  701. self._set_hostport(self._tunnel_host, self._tunnel_port)
  702. connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)
  703. connect_bytes = connect_str.encode("ascii")
  704. self.send(connect_bytes)
  705. for header, value in self._tunnel_headers.items():
  706. header_str = "%s: %s\r\n" % (header, value)
  707. header_bytes = header_str.encode("latin-1")
  708. self.send(header_bytes)
  709. self.send(bytes(b'\r\n'))
  710. response = self.response_class(self.sock, method=self._method)
  711. (version, code, message) = response._read_status()
  712. if code != 200:
  713. self.close()
  714. raise socket.error("Tunnel connection failed: %d %s" % (code,
  715. message.strip()))
  716. while True:
  717. line = response.fp.readline(_MAXLINE + 1)
  718. if len(line) > _MAXLINE:
  719. raise LineTooLong("header line")
  720. if not line:
  721. # for sites which EOF without sending a trailer
  722. break
  723. if line in (b'\r\n', b'\n', b''):
  724. break
  725. def connect(self):
  726. """Connect to the host and port specified in __init__."""
  727. self.sock = socket_create_connection((self.host,self.port),
  728. self.timeout, self.source_address)
  729. if self._tunnel_host:
  730. self._tunnel()
  731. def close(self):
  732. """Close the connection to the HTTP server."""
  733. if self.sock:
  734. self.sock.close() # close it manually... there may be other refs
  735. self.sock = None
  736. if self.__response:
  737. self.__response.close()
  738. self.__response = None
  739. self.__state = _CS_IDLE
  740. def send(self, data):
  741. """Send `data' to the server.
  742. ``data`` can be a string object, a bytes object, an array object, a
  743. file-like object that supports a .read() method, or an iterable object.
  744. """
  745. if self.sock is None:
  746. if self.auto_open:
  747. self.connect()
  748. else:
  749. raise NotConnected()
  750. if self.debuglevel > 0:
  751. print("send:", repr(data))
  752. blocksize = 8192
  753. # Python 2.7 array objects have a read method which is incompatible
  754. # with the 2-arg calling syntax below.
  755. if hasattr(data, "read") and not isinstance(data, array):
  756. if self.debuglevel > 0:
  757. print("sendIng a read()able")
  758. encode = False
  759. try:
  760. mode = data.mode
  761. except AttributeError:
  762. # io.BytesIO and other file-like objects don't have a `mode`
  763. # attribute.
  764. pass
  765. else:
  766. if "b" not in mode:
  767. encode = True
  768. if self.debuglevel > 0:
  769. print("encoding file using iso-8859-1")
  770. while 1:
  771. datablock = data.read(blocksize)
  772. if not datablock:
  773. break
  774. if encode:
  775. datablock = datablock.encode("iso-8859-1")
  776. self.sock.sendall(datablock)
  777. return
  778. try:
  779. self.sock.sendall(data)
  780. except TypeError:
  781. if isinstance(data, Iterable):
  782. for d in data:
  783. self.sock.sendall(d)
  784. else:
  785. raise TypeError("data should be a bytes-like object "
  786. "or an iterable, got %r" % type(data))
  787. def _output(self, s):
  788. """Add a line of output to the current request buffer.
  789. Assumes that the line does *not* end with \\r\\n.
  790. """
  791. self._buffer.append(s)
  792. def _send_output(self, message_body=None):
  793. """Send the currently buffered request and clear the buffer.
  794. Appends an extra \\r\\n to the buffer.
  795. A message_body may be specified, to be appended to the request.
  796. """
  797. self._buffer.extend((bytes(b""), bytes(b"")))
  798. msg = bytes(b"\r\n").join(self._buffer)
  799. del self._buffer[:]
  800. # If msg and message_body are sent in a single send() call,
  801. # it will avoid performance problems caused by the interaction
  802. # between delayed ack and the Nagle algorithm.
  803. if isinstance(message_body, bytes):
  804. msg += message_body
  805. message_body = None
  806. self.send(msg)
  807. if message_body is not None:
  808. # message_body was not a string (i.e. it is a file), and
  809. # we must run the risk of Nagle.
  810. self.send(message_body)
  811. def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
  812. """Send a request to the server.
  813. `method' specifies an HTTP request method, e.g. 'GET'.
  814. `url' specifies the object being requested, e.g. '/index.html'.
  815. `skip_host' if True does not add automatically a 'Host:' header
  816. `skip_accept_encoding' if True does not add automatically an
  817. 'Accept-Encoding:' header
  818. """
  819. # if a prior response has been completed, then forget about it.
  820. if self.__response and self.__response.isclosed():
  821. self.__response = None
  822. # in certain cases, we cannot issue another request on this connection.
  823. # this occurs when:
  824. # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
  825. # 2) a response to a previous request has signalled that it is going
  826. # to close the connection upon completion.
  827. # 3) the headers for the previous response have not been read, thus
  828. # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
  829. #
  830. # if there is no prior response, then we can request at will.
  831. #
  832. # if point (2) is true, then we will have passed the socket to the
  833. # response (effectively meaning, "there is no prior response"), and
  834. # will open a new one when a new request is made.
  835. #
  836. # Note: if a prior response exists, then we *can* start a new request.
  837. # We are not allowed to begin fetching the response to this new
  838. # request, however, until that prior response is complete.
  839. #
  840. if self.__state == _CS_IDLE:
  841. self.__state = _CS_REQ_STARTED
  842. else:
  843. raise CannotSendRequest(self.__state)
  844. # Save the method we use, we need it later in the response phase
  845. self._method = method
  846. if not url:
  847. url = '/'
  848. request = '%s %s %s' % (method, url, self._http_vsn_str)
  849. # Non-ASCII characters should have been eliminated earlier
  850. self._output(request.encode('ascii'))
  851. if self._http_vsn == 11:
  852. # Issue some standard headers for better HTTP/1.1 compliance
  853. if not skip_host:
  854. # this header is issued *only* for HTTP/1.1
  855. # connections. more specifically, this means it is
  856. # only issued when the client uses the new
  857. # HTTPConnection() class. backwards-compat clients
  858. # will be using HTTP/1.0 and those clients may be
  859. # issuing this header themselves. we should NOT issue
  860. # it twice; some web servers (such as Apache) barf
  861. # when they see two Host: headers
  862. # If we need a non-standard port,include it in the
  863. # header. If the request is going through a proxy,
  864. # but the host of the actual URL, not the host of the
  865. # proxy.
  866. netloc = ''
  867. if url.startswith('http'):
  868. nil, netloc, nil, nil, nil = urlsplit(url)
  869. if netloc:
  870. try:
  871. netloc_enc = netloc.encode("ascii")
  872. except UnicodeEncodeError:
  873. netloc_enc = netloc.encode("idna")
  874. self.putheader('Host', netloc_enc)
  875. else:
  876. try:
  877. host_enc = self.host.encode("ascii")
  878. except UnicodeEncodeError:
  879. host_enc = self.host.encode("idna")
  880. # As per RFC 273, IPv6 address should be wrapped with []
  881. # when used as Host header
  882. if self.host.find(':') >= 0:
  883. host_enc = bytes(b'[' + host_enc + b']')
  884. if self.port == self.default_port:
  885. self.putheader('Host', host_enc)
  886. else:
  887. host_enc = host_enc.decode("ascii")
  888. self.putheader('Host', "%s:%s" % (host_enc, self.port))
  889. # note: we are assuming that clients will not attempt to set these
  890. # headers since *this* library must deal with the
  891. # consequences. this also means that when the supporting
  892. # libraries are updated to recognize other forms, then this
  893. # code should be changed (removed or updated).
  894. # we only want a Content-Encoding of "identity" since we don't
  895. # support encodings such as x-gzip or x-deflate.
  896. if not skip_accept_encoding:
  897. self.putheader('Accept-Encoding', 'identity')
  898. # we can accept "chunked" Transfer-Encodings, but no others
  899. # NOTE: no TE header implies *only* "chunked"
  900. #self.putheader('TE', 'chunked')
  901. # if TE is supplied in the header, then it must appear in a
  902. # Connection header.
  903. #self.putheader('Connection', 'TE')
  904. else:
  905. # For HTTP/1.0, the server will assume "not chunked"
  906. pass
  907. def putheader(self, header, *values):
  908. """Send a request header line to the server.
  909. For example: h.putheader('Accept', 'text/html')
  910. """
  911. if self.__state != _CS_REQ_STARTED:
  912. raise CannotSendHeader()
  913. if hasattr(header, 'encode'):
  914. header = header.encode('ascii')
  915. values = list(values)
  916. for i, one_value in enumerate(values):
  917. if hasattr(one_value, 'encode'):
  918. values[i] = one_value.encode('latin-1')
  919. elif isinstance(one_value, int):
  920. values[i] = str(one_value).encode('ascii')
  921. value = bytes(b'\r\n\t').join(values)
  922. header = header + bytes(b': ') + value
  923. self._output(header)
  924. def endheaders(self, message_body=None):
  925. """Indicate that the last header line has been sent to the server.
  926. This method sends the request to the server. The optional message_body
  927. argument can be used to pass a message body associated with the
  928. request. The message body will be sent in the same packet as the
  929. message headers if it is a string, otherwise it is sent as a separate
  930. packet.
  931. """
  932. if self.__state == _CS_REQ_STARTED:
  933. self.__state = _CS_REQ_SENT
  934. else:
  935. raise CannotSendHeader()
  936. self._send_output(message_body)
  937. def request(self, method, url, body=None, headers={}):
  938. """Send a complete request to the server."""
  939. self._send_request(method, url, body, headers)
  940. def _set_content_length(self, body):
  941. # Set the content-length based on the body.
  942. thelen = None
  943. try:
  944. thelen = str(len(body))
  945. except TypeError as te:
  946. # If this is a file-like object, try to
  947. # fstat its file descriptor
  948. try:
  949. thelen = str(os.fstat(body.fileno()).st_size)
  950. except (AttributeError, OSError):
  951. # Don't send a length if this failed
  952. if self.debuglevel > 0: print("Cannot stat!!")
  953. if thelen is not None:
  954. self.putheader('Content-Length', thelen)
  955. def _send_request(self, method, url, body, headers):
  956. # Honor explicitly requested Host: and Accept-Encoding: headers.
  957. header_names = dict.fromkeys([k.lower() for k in headers])
  958. skips = {}
  959. if 'host' in header_names:
  960. skips['skip_host'] = 1
  961. if 'accept-encoding' in header_names:
  962. skips['skip_accept_encoding'] = 1
  963. self.putrequest(method, url, **skips)
  964. if body is not None and ('content-length' not in header_names):
  965. self._set_content_length(body)
  966. for hdr, value in headers.items():
  967. self.putheader(hdr, value)
  968. if isinstance(body, str):
  969. # RFC 2616 Section 3.7.1 says that text default has a
  970. # default charset of iso-8859-1.
  971. body = body.encode('iso-8859-1')
  972. self.endheaders(body)
  973. def getresponse(self):
  974. """Get the response from the server.
  975. If the HTTPConnection is in the correct state, returns an
  976. instance of HTTPResponse or of whatever object is returned by
  977. class the response_class variable.
  978. If a request has not been sent or if a previous response has
  979. not be handled, ResponseNotReady is raised. If the HTTP
  980. response indicates that the connection should be closed, then
  981. it will be closed before the response is returned. When the
  982. connection is closed, the underlying socket is closed.
  983. """
  984. # if a prior response has been completed, then forget about it.
  985. if self.__response and self.__response.isclosed():
  986. self.__response = None
  987. # if a prior response exists, then it must be completed (otherwise, we
  988. # cannot read this response's header to determine the connection-close
  989. # behavior)
  990. #
  991. # note: if a prior response existed, but was connection-close, then the
  992. # socket and response were made independent of this HTTPConnection
  993. # object since a new request requires that we open a whole new
  994. # connection
  995. #
  996. # this means the prior response had one of two states:
  997. # 1) will_close: this connection was reset and the prior socket and
  998. # response operate independently
  999. # 2) persistent: the response was retained and we await its
  1000. # isclosed() status to become true.
  1001. #
  1002. if self.__state != _CS_REQ_SENT or self.__response:
  1003. raise ResponseNotReady(self.__state)
  1004. if self.debuglevel > 0:
  1005. response = self.response_class(self.sock, self.debuglevel,
  1006. method=self._method)
  1007. else:
  1008. response = self.response_class(self.sock, method=self._method)
  1009. response.begin()
  1010. assert response.will_close != _UNKNOWN
  1011. self.__state = _CS_IDLE
  1012. if response.will_close:
  1013. # this effectively passes the connection to the response
  1014. self.close()
  1015. else:
  1016. # remember this, so we can tell when it is complete
  1017. self.__response = response
  1018. return response
  1019. try:
  1020. import ssl
  1021. from ssl import SSLContext
  1022. except ImportError:
  1023. pass
  1024. else:
  1025. class HTTPSConnection(HTTPConnection):
  1026. "This class allows communication via SSL."
  1027. default_port = HTTPS_PORT
  1028. # XXX Should key_file and cert_file be deprecated in favour of context?
  1029. def __init__(self, host, port=None, key_file=None, cert_file=None,
  1030. strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
  1031. source_address=None, **_3to2kwargs):
  1032. if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname']
  1033. else: check_hostname = None
  1034. if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context']
  1035. else: context = None
  1036. super(HTTPSConnection, self).__init__(host, port, strict, timeout,
  1037. source_address)
  1038. self.key_file = key_file
  1039. self.cert_file = cert_file
  1040. if context is None:
  1041. # Some reasonable defaults
  1042. context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
  1043. context.options |= ssl.OP_NO_SSLv2
  1044. will_verify = context.verify_mode != ssl.CERT_NONE
  1045. if check_hostname is None:
  1046. check_hostname = will_verify
  1047. elif check_hostname and not will_verify:
  1048. raise ValueError("check_hostname needs a SSL context with "
  1049. "either CERT_OPTIONAL or CERT_REQUIRED")
  1050. if key_file or cert_file:
  1051. context.load_cert_chain(cert_file, key_file)
  1052. self._context = context
  1053. self._check_hostname = check_hostname
  1054. def connect(self):
  1055. "Connect to a host on a given (SSL) port."
  1056. sock = socket_create_connection((self.host, self.port),
  1057. self.timeout, self.source_address)
  1058. if self._tunnel_host:
  1059. self.sock = sock
  1060. self._tunnel()
  1061. server_hostname = self.host if ssl.HAS_SNI else None
  1062. self.sock = self._context.wrap_socket(sock,
  1063. server_hostname=server_hostname)
  1064. try:
  1065. if self._check_hostname:
  1066. ssl.match_hostname(self.sock.getpeercert(), self.host)
  1067. except Exception:
  1068. self.sock.shutdown(socket.SHUT_RDWR)
  1069. self.sock.close()
  1070. raise
  1071. __all__.append("HTTPSConnection")
  1072. # ######################################
  1073. # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext
  1074. # # doesn't exist in the Py2.7 stdlib
  1075. # class HTTPSConnection(HTTPConnection):
  1076. # "This class allows communication via SSL."
  1077. # default_port = HTTPS_PORT
  1078. # def __init__(self, host, port=None, key_file=None, cert_file=None,
  1079. # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
  1080. # source_address=None):
  1081. # HTTPConnection.__init__(self, host, port, strict, timeout,
  1082. # source_address)
  1083. # self.key_file = key_file
  1084. # self.cert_file = cert_file
  1085. # def connect(self):
  1086. # "Connect to a host on a given (SSL) port."
  1087. # sock = socket_create_connection((self.host, self.port),
  1088. # self.timeout, self.source_address)
  1089. # if self._tunnel_host:
  1090. # self.sock = sock
  1091. # self._tunnel()
  1092. # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
  1093. # __all__.append("HTTPSConnection")
  1094. # ######################################
  1095. class HTTPException(Exception):
  1096. # Subclasses that define an __init__ must call Exception.__init__
  1097. # or define self.args. Otherwise, str() will fail.
  1098. pass
  1099. class NotConnected(HTTPException):
  1100. pass
  1101. class InvalidURL(HTTPException):
  1102. pass
  1103. class UnknownProtocol(HTTPException):
  1104. def __init__(self, version):
  1105. self.args = version,
  1106. self.version = version
  1107. class UnknownTransferEncoding(HTTPException):
  1108. pass
  1109. class UnimplementedFileMode(HTTPException):
  1110. pass
  1111. class IncompleteRead(HTTPException):
  1112. def __init__(self, partial, expected=None):
  1113. self.args = partial,
  1114. self.partial = partial
  1115. self.expected = expected
  1116. def __repr__(self):
  1117. if self.expected is not None:
  1118. e = ', %i more expected' % self.expected
  1119. else:
  1120. e = ''
  1121. return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
  1122. def __str__(self):
  1123. return repr(self)
  1124. class ImproperConnectionState(HTTPException):
  1125. pass
  1126. class CannotSendRequest(ImproperConnectionState):
  1127. pass
  1128. class CannotSendHeader(ImproperConnectionState):
  1129. pass
  1130. class ResponseNotReady(ImproperConnectionState):
  1131. pass
  1132. class BadStatusLine(HTTPException):
  1133. def __init__(self, line):
  1134. if not line:
  1135. line = repr(line)
  1136. self.args = line,
  1137. self.line = line
  1138. class LineTooLong(HTTPException):
  1139. def __init__(self, line_type):
  1140. HTTPException.__init__(self, "got more than %d bytes when reading %s"
  1141. % (_MAXLINE, line_type))
  1142. # for backwards compatibility
  1143. error = HTTPException