PyFontify.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. #Copyright ReportLab Europe Ltd. 2000-2017
  2. #see license.txt for license details
  3. __version__='3.3.0'
  4. __doc__="""
  5. Module to analyze Python source code; for syntax coloring tools.
  6. Interface::
  7. tags = fontify(pytext, searchfrom, searchto)
  8. - The 'pytext' argument is a string containing Python source code.
  9. - The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
  10. - The returned value is a list of tuples, formatted like this::
  11. [('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
  12. - The tuple contents are always like this::
  13. (tag, startindex, endindex, sublist)
  14. - tag is one of 'keyword', 'string', 'comment' or 'identifier'
  15. - sublist is not used, hence always None.
  16. """
  17. # Based on FontText.py by Mitchell S. Chapman,
  18. # which was modified by Zachary Roadhouse,
  19. # then un-Tk'd by Just van Rossum.
  20. # Many thanks for regular expression debugging & authoring are due to:
  21. # Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
  22. # So, who owns the copyright? ;-) How about this:
  23. # Copyright 1996-2001:
  24. # Mitchell S. Chapman,
  25. # Zachary Roadhouse,
  26. # Tim Peters,
  27. # Just van Rossum
  28. __version__ = "0.4"
  29. import re
  30. # First a little helper, since I don't like to repeat things. (Tismer speaking)
  31. def replace(src, sep, rep):
  32. return rep.join(src.split(sep))
  33. # This list of keywords is taken from ref/node13.html of the
  34. # Python 1.3 HTML documentation. ("access" is intentionally omitted.)
  35. keywordsList = [
  36. "as", "assert", "exec",
  37. "del", "from", "lambda", "return",
  38. "and", "elif", "global", "not", "try",
  39. "break", "else", "if", "or", "while",
  40. "class", "except", "import", "pass",
  41. "continue", "finally", "in", "print",
  42. "def", "for", "is", "raise", "yield",
  43. "with"]
  44. # Build up a regular expression which will match anything
  45. # interesting, including multi-line triple-quoted strings.
  46. commentPat = r"#[^\n]*"
  47. pat = r"q[^\\q\n]*(\\[\000-\377][^\\q\n]*)*q"
  48. quotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')
  49. # Way to go, Tim!
  50. pat = r"""
  51. qqq
  52. [^\\q]*
  53. (
  54. ( \\[\000-\377]
  55. | q
  56. ( \\[\000-\377]
  57. | [^\q]
  58. | q
  59. ( \\[\000-\377]
  60. | [^\\q]
  61. )
  62. )
  63. )
  64. [^\\q]*
  65. )*
  66. qqq
  67. """
  68. pat = ''.join(pat.split()) # get rid of whitespace
  69. tripleQuotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')
  70. # Build up a regular expression which matches all and only
  71. # Python keywords. This will let us skip the uninteresting
  72. # identifier references.
  73. # nonKeyPat identifies characters which may legally precede
  74. # a keyword pattern.
  75. nonKeyPat = r"(^|[^a-zA-Z0-9_.\"'])"
  76. keyPat = nonKeyPat + "(" + "|".join(keywordsList) + ")" + nonKeyPat
  77. matchPat = commentPat + "|" + keyPat + "|" + tripleQuotePat + "|" + quotePat
  78. matchRE = re.compile(matchPat)
  79. idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*" # Ident w. leading whitespace.
  80. idRE = re.compile(idKeyPat)
  81. def fontify(pytext, searchfrom = 0, searchto = None):
  82. if searchto is None:
  83. searchto = len(pytext)
  84. # Cache a few attributes for quicker reference.
  85. search = matchRE.search
  86. idSearch = idRE.search
  87. tags = []
  88. tags_append = tags.append
  89. commentTag = 'comment'
  90. stringTag = 'string'
  91. keywordTag = 'keyword'
  92. identifierTag = 'identifier'
  93. start = 0
  94. end = searchfrom
  95. while 1:
  96. m = search(pytext, end)
  97. if m is None:
  98. break # EXIT LOOP
  99. start = m.start()
  100. if start >= searchto:
  101. break # EXIT LOOP
  102. match = m.group(0)
  103. end = start + len(match)
  104. c = match[0]
  105. if c not in "#'\"":
  106. # Must have matched a keyword.
  107. if start != searchfrom:
  108. # there's still a redundant char before and after it, strip!
  109. match = match[1:-1]
  110. start = start + 1
  111. else:
  112. # this is the first keyword in the text.
  113. # Only a space at the end.
  114. match = match[:-1]
  115. end = end - 1
  116. tags_append((keywordTag, start, end, None))
  117. # If this was a defining keyword, look ahead to the
  118. # following identifier.
  119. if match in ["def", "class"]:
  120. m = idSearch(pytext, end)
  121. if m is not None:
  122. start = m.start()
  123. if start == end:
  124. match = m.group(0)
  125. end = start + len(match)
  126. tags_append((identifierTag, start, end, None))
  127. elif c == "#":
  128. tags_append((commentTag, start, end, None))
  129. else:
  130. tags_append((stringTag, start, end, None))
  131. return tags
  132. def test(path):
  133. f = open(path)
  134. text = f.read()
  135. f.close()
  136. tags = fontify(text)
  137. for tag, start, end, sublist in tags:
  138. print(tag, repr(text[start:end]))