urls.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import os
  2. import string
  3. import urllib.parse
  4. import urllib.request
  5. from typing import Optional
  6. from .compat import WINDOWS
  7. def get_url_scheme(url):
  8. # type: (str) -> Optional[str]
  9. if ":" not in url:
  10. return None
  11. return url.split(":", 1)[0].lower()
  12. def path_to_url(path):
  13. # type: (str) -> str
  14. """
  15. Convert a path to a file: URL. The path will be made absolute and have
  16. quoted path parts.
  17. """
  18. path = os.path.normpath(os.path.abspath(path))
  19. url = urllib.parse.urljoin("file:", urllib.request.pathname2url(path))
  20. return url
  21. def url_to_path(url):
  22. # type: (str) -> str
  23. """
  24. Convert a file: URL to a path.
  25. """
  26. assert url.startswith(
  27. "file:"
  28. ), f"You can only turn file: urls into filenames (not {url!r})"
  29. _, netloc, path, _, _ = urllib.parse.urlsplit(url)
  30. if not netloc or netloc == "localhost":
  31. # According to RFC 8089, same as empty authority.
  32. netloc = ""
  33. elif WINDOWS:
  34. # If we have a UNC path, prepend UNC share notation.
  35. netloc = "\\\\" + netloc
  36. else:
  37. raise ValueError(
  38. f"non-local file URIs are not supported on this platform: {url!r}"
  39. )
  40. path = urllib.request.url2pathname(netloc + path)
  41. # On Windows, urlsplit parses the path as something like "/C:/Users/foo".
  42. # This creates issues for path-related functions like io.open(), so we try
  43. # to detect and strip the leading slash.
  44. if (
  45. WINDOWS
  46. and not netloc # Not UNC.
  47. and len(path) >= 3
  48. and path[0] == "/" # Leading slash to strip.
  49. and path[1] in string.ascii_letters # Drive letter.
  50. and path[2:4] in (":", ":/") # Colon + end of string, or colon + absolute path.
  51. ):
  52. path = path[1:]
  53. return path