Logo Search packages:      
Sourcecode: viewcvs version File versions  Download package

PyFontify.py

00001 """Module to analyze Python source code; for syntax coloring tools.

Interface:
      tags = fontify(pytext, searchfrom, searchto)

The 'pytext' argument is a string containing Python source code.
The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext. 
The returned value is a list of tuples, formatted like this:
      [('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
The tuple contents are always like this:
      (tag, startindex, endindex, sublist)
tag is one of 'keyword', 'string', 'comment' or 'identifier'
sublist is not used, hence always None. 
"""

# Based on FontText.py by Mitchell S. Chapman,
# which was modified by Zachary Roadhouse,
# then un-Tk'd by Just van Rossum.
# Many thanks for regular expression debugging & authoring are due to:
#     Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
# So, who owns the copyright? ;-) How about this:
# Copyright 1996-1997: 
#     Mitchell S. Chapman,
#     Zachary Roadhouse,
#     Tim Peters,
#     Just van Rossum

__version__ = "0.3.1"

import string, re

# First a little helper, since I don't like to repeat things. (Tismer speaking)
def replace(where, what, with):
      return string.join(string.split(where, what), with)

# This list of keywords is taken from ref/node13.html of the
# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
keywordsList = [
      "del", "from", "lambda", "return",
      "and", "elif", "global", "not", "try",
      "break", "else", "if", "or", "while",
      "class", "except", "import", "pass",
      "continue", "finally", "in", "print",
      "def", "for", "is", "raise"]

# Build up a regular expression which will match anything
# interesting, including multi-line triple-quoted strings.
commentPat = "#.*"

pat = "q[^\q\n]*\(\\\\[\000-\377][^\q\n]*\)*q"
quotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')

# Way to go, Tim!
pat = """
      qqq
      [^\\q]*
      \(
            \(    \\\\[\000-\377]
            \|    q
                  \(    \\\\[\000-\377]
                  \|    [^\\q]
                  \|    q
                        \(    \\\\[\000-\377]
                        \|    [^\\q]
                        \)
                  \)
            \)
            [^\\q]*
      \)*
      qqq
"""
pat = string.join(string.split(pat), '')  # get rid of whitespace
tripleQuotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')

# Build up a regular expression which matches all and only
# Python keywords. This will let us skip the uninteresting
# identifier references.
# nonKeyPat identifies characters which may legally precede
# a keyword pattern.
nonKeyPat = "\(^\|[^a-zA-Z0-9_.\"']\)"

keyPat = nonKeyPat + "\("
for keyword in keywordsList:
      keyPat = keyPat + keyword + "\|"
keyPat = keyPat[:-2] + "\)" + nonKeyPat

matchPat = keyPat + "\|" + commentPat + "\|" + tripleQuotePat + "\|" + quotePat
matchRE = re.compile(matchPat)

idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*"      # Ident w. leading whitespace.
idRE = re.compile(idKeyPat)


def fontify(pytext, searchfrom = 0, searchto = None):
      if searchto is None:
            searchto = len(pytext)
      # Cache a few attributes for quicker reference.
      search = matchRE.search
      group = matchRE.group
      idSearch = idRE.search
      idGroup = idRE.group
      
      tags = []
      tags_append = tags.append
      commentTag = 'comment'
      stringTag = 'string'
      keywordTag = 'keyword'
      identifierTag = 'identifier'
      
      start = 0
      end = searchfrom
      while 1:
            start = search(pytext, end)
            if start < 0 or start >= searchto:
                  break # EXIT LOOP
            match = group(0)
            end = start + len(match)
            c = match[0]
            if c not in "#'\"":
                  # Must have matched a keyword.
                  if start <> searchfrom:
                        # there's still a redundant char before and after it, strip!
                        match = match[1:-1]
                        start = start + 1
                  else:
                        # this is the first keyword in the text.
                        # Only a space at the end.
                        match = match[:-1]
                  end = end - 1
                  tags_append((keywordTag, start, end, None))
                  # If this was a defining keyword, look ahead to the
                  # following identifier.
                  if match in ["def", "class"]:
                        start = idSearch(pytext, end)
                        if start == end:
                              match = idGroup(0)
                              end = start + len(match)
                              tags_append((identifierTag, start, end, None))
            elif c == "#":
                  tags_append((commentTag, start, end, None))
            else:
                  tags_append((stringTag, start, end, None))
      return tags


def test(path):
      f = open(path)
      text = f.read()
      f.close()
      tags = fontify(text)
      for tag, start, end, sublist in tags:
            print tag, `text[start:end]`

Generated by  Doxygen 1.6.0   Back to index