[svn r37594] Fixed unicode issues in apigen/htmlgen.py and apigen/source, moved some methods

out of HTMLDocument and added support for finding out the encoding of a Python file in order to accomplish that (both in source/html.py), fixed some minor issues (items with a name starting with _ are now hidden from nav, fixed indentation issue in nav) in htmlgen.py. --HG-- branch : trunk
2007-01-30 14:24:27 +01:00 · 2007-01-30 14:24:27 +01:00 · 45868fe506
parent c2e1510f86
commit 45868fe506
5 changed files with 149 additions and 81 deletions
--- a/py/apigen/htmlgen.py
+++ b/py/apigen/htmlgen.py
@ -361,12 +361,12 @@ class ApiPageBuilder(AbstractPageBuilder):
                            H.a('source: %s' % (sourcefile,),
                                href=self.linker.get_lazyhref(sourcefile)),
                            H.br(),
-                            H.SourceDef(H.pre(callable_source)))
+                            H.SourceDef(H.pre(unicode(callable_source, 'UTF-8'))))
        elif not is_in_pkg and sourcefile and callable_source:
            csource = H.div(H.br(),
                            H.em('source: %s' % (sourcefile,)),
                            H.br(),
-                            H.SourceDef(H.pre(callable_source)))
+                            H.SourceDef(H.pre(unicode(callable_source, 'UTF-8'))))
        else:
            csource = H.SourceDef('could not get source file')

@ -460,6 +460,8 @@ class ApiPageBuilder(AbstractPageBuilder):
            H.Docstring(docstring or '*no docstring available*')
        )
        for dotted_name in sorted(item_dotted_names):
+            if dotted_name.startswith('_'):
+                continue
            itemname = dotted_name.split('.')[-1]
            if is_private(itemname):
                continue
@ -586,7 +588,7 @@ class ApiPageBuilder(AbstractPageBuilder):
                    elif lastlevel and build_children:
                        # XXX hack
                        navitems += build_nav_level('%s.' % (dotted_name,),
-                                                    depth+2)
+                                                    depth+1)

            return navitems

@ -698,9 +700,9 @@ class ApiPageBuilder(AbstractPageBuilder):
            mangled = []
            for i, sline in enumerate(str(source).split('\n')):
                if i == lineno:
-                    l = '-> %s' % (sline,)
+                    l = '-> %s' % (unicode(sline, 'UTF-8'),)
                else:
-                    l = '   %s' % (sline,)
+                    l = '   %s' % (unicode(sline, 'UTF-8'),)
                mangled.append(l)
            if sourcefile:
                linktext = '%s - line %s' % (sourcefile, line.lineno + 1)
--- a/py/apigen/source/html.py
+++ b/py/apigen/source/html.py
@ -2,14 +2,13 @@
 """ html - generating ad-hoc html out of source browser
 """

+import py
 from py.xml import html, raw
 from compiler import ast
 import time
 from py.__.apigen.source.color import Tokenizer, PythonSchema

 class HtmlEnchanter(object):
-    reserved_words = ['if', 'for', 'return', 'yield']
-
    def __init__(self, mod):
        self.mod = mod
        self.create_caches()
@ -37,8 +36,30 @@ class HtmlEnchanter(object):
        except KeyError:
            return [row] # no more info

+def prepare_line(text, tokenizer, encoding):
+    """ adds html formatting to text items (list)
+
+        only processes items if they're of a string type (or unicode)
+    """
+    ret = []
+    for item in text:
+        if type(item) in [str, unicode]:
+            tokens = tokenizer.tokenize(item)
+            for t in tokens:
+                data = unicode(t.data, encoding)
+                if t.type in ['keyword', 'alt_keyword', 'number',
+                              'string', 'comment']:
+                    ret.append(html.span(data, class_=t.type))
+                else:
+                    ret.append(data)
+        else:
+            ret.append(item)
+    return ret
+
 class HTMLDocument(object):
-    def __init__(self, tokenizer=None):
+    def __init__(self, encoding, tokenizer=None):
+        self.encoding = encoding
+
        self.html = root = html.html()
        self.head = head = self.create_head()
        root.append(head)
@ -119,30 +140,11 @@ class HTMLDocument(object):
        table.append(tbody)
        return table, tbody

-    def prepare_line(self, text):
-        """ adds html formatting to text items (list)
-
-            only processes items if they're of a string type (or unicode)
-        """
-        ret = []
-        for item in text:
-            if type(item) in [str, unicode]:
-                tokens = self.tokenizer.tokenize(item)
-                for t in tokens:
-                    if t.type in ['keyword', 'alt_keyword', 'number',
-                                  'string', 'comment']:
-                        ret.append(html.span(t.data, class_=t.type))
-                    else:
-                        ret.append(t.data)
-            else:
-                ret.append(item)
-        return ret
-
    def add_row(self, lineno, text):
        if text == ['']:
            text = [raw('&#xa0;')]
        else:
-            text = self.prepare_line(text)
+            text = prepare_line(text, self.tokenizer, self.encoding)
        self.tbody.append(html.tr(html.td(str(lineno), class_='lineno'),
                                  html.td(class_='code', *text)))

@ -157,7 +159,8 @@ def create_html(mod):
    lines = mod.path.open().readlines()
    
    enchanter = HtmlEnchanter(mod)
-    doc = HTMLDocument()
+    enc = get_module_encoding(mod.path)
+    doc = HTMLDocument(enc)
    for i, row in enumerate(lines):
        row = enchanter.enchant_row(i + 1, row)
        doc.add_row(i + 1, row)
@ -248,3 +251,16 @@ def create_unknown_html(path):
    )
    return h.unicode()

+_reg_enc = py.std.re.compile(r'coding[:=]\s*([-\w.]+)')
+def get_module_encoding(path):
+    if hasattr(path, 'strpath'):
+        path = path.strpath
+    if path[-1] in ['c', 'o']:
+        path = path[:-1]
+    fpath = py.path.local(path)
+    code = fpath.read()
+    match = _reg_enc.search(code)
+    if match:
+        return match.group(1)
+    return 'ISO-8859-1'
+
--- a/py/apigen/source/testing/test_html.py
+++ b/py/apigen/source/testing/test_html.py
@ -1,9 +1,12 @@
+# -*- coding: UTF-8 -*-

 """ test of html generation
 """

-from py.__.apigen.source.html import create_html, HTMLDocument
+from py.__.apigen.source.html import prepare_line, create_html, HTMLDocument, \
+                                     get_module_encoding
 from py.__.apigen.source.browser import parse_path
+from py.__.apigen.source.color import Tokenizer, PythonSchema
 from py.xml import html

 import py
@ -49,7 +52,7 @@ def test_basic():

 class _HTMLDocument(HTMLDocument):
    def __init__(self):
-        pass
+        self.encoding = 'ascii'

 class TestHTMLDocument(object):
    def test_head(self):
@ -73,51 +76,8 @@ class TestHTMLDocument(object):
        assert isinstance(tbody, html.tbody)
        assert tbody == table[0]

-    def prepare_line(self, line, doc=None):
-        if doc is None:
-            doc = HTMLDocument()
-        l = doc.prepare_line(line)
-        return ''.join([unicode(i) for i in l])
-
-    def test_prepare_line_basic(self):
-        result = self.prepare_line(['see if this works'])
-        assert result == 'see <span class="keyword">if</span> this works'
-        result = self.prepare_line(['see if this ',
-                                    html.a('works', name='works'),' too'])
-        assert result == ('see <span class="keyword">if</span> this '
-                          '<a name="works">works</a> too')
-        result = self.prepare_line(['see if something else works'])
-        assert result == ('see <span class="keyword">if</span> something '
-                          '<span class="keyword">else</span> works')
-        result = self.prepare_line(['see if something ',
-                                    html.a('else', name='else'), ' works too'])
-        assert result == ('see <span class="keyword">if</span> something '
-                          '<a name="else">else</a> works too')
-
-    def test_prepare_line_strings(self):
-        result = self.prepare_line(['foo = "bar"'])
-        assert result == 'foo = <span class="string">&quot;bar&quot;</span>'
-
-        result = self.prepare_line(['"spam"'])
-        assert result == '<span class="string">&quot;spam&quot;</span>'
-        
-        # test multiline strings
-        doc = HTMLDocument()
-        result = self.prepare_line(['"""start of multiline'], doc)
-        assert result == ('<span class="string">&quot;&quot;&quot;start of '
-                          'multiline</span>')
-        # doc should now be in 'string mode'
-        result = self.prepare_line(['see if it doesn\'t touch this'], doc)
-        assert result == ('<span class="string">see if it doesn&apos;t touch '
-                          'this</span>')
-        result = self.prepare_line(['"""'], doc)
-        assert result == '<span class="string">&quot;&quot;&quot;</span>'
-        result = self.prepare_line(['see if it colours this again'], doc)
-        assert result == ('see <span class="keyword">if</span> it colours '
-                          'this again')
-
    def test_add_row(self):
-        doc = HTMLDocument()
+        doc = HTMLDocument('ascii')
        doc.add_row(1, ['""" this is a foo implementation """'])
        doc.add_row(2, [''])
        doc.add_row(3, ['class ', html.a('Foo', name='Foo'), ':'])
@ -141,9 +101,79 @@ class TestHTMLDocument(object):
                                        '</span></td>')

    def test_unicode(self):
-        doc = HTMLDocument()
+        doc = HTMLDocument('ascii')
        h = unicode(doc)
        print h
        assert py.std.re.match(r'<html>\s*<head>\s*<title>[^<]+</title>'
                                '.*</body>\w*</html>$', h, py.std.re.S)

+def prepare_line_helper(line, tokenizer=None, encoding='ascii'):
+    if tokenizer is None:
+        tokenizer = Tokenizer(PythonSchema)
+    l = prepare_line(line, tokenizer, encoding)
+    return ''.join([unicode(i) for i in l])
+
+def test_prepare_line_basic():
+    result = prepare_line_helper(['see if this works'])
+    assert result == 'see <span class="keyword">if</span> this works'
+    result = prepare_line_helper(['see if this ',
+                                html.a('works', name='works'),' too'])
+    assert result == ('see <span class="keyword">if</span> this '
+                      '<a name="works">works</a> too')
+    result = prepare_line_helper(['see if something else works'])
+    assert result == ('see <span class="keyword">if</span> something '
+                      '<span class="keyword">else</span> works')
+    result = prepare_line_helper(['see if something ',
+                                html.a('else', name='else'), ' works too'])
+    assert result == ('see <span class="keyword">if</span> something '
+                      '<a name="else">else</a> works too')
+
+def test_prepare_line_strings():
+    result = prepare_line_helper(['foo = "bar"'])
+    assert result == 'foo = <span class="string">&quot;bar&quot;</span>'
+
+    result = prepare_line_helper(['"spam"'])
+    assert result == '<span class="string">&quot;spam&quot;</span>'
+    
+def test_prepare_line_multiline_strings():
+    # test multiline strings
+    t = Tokenizer(PythonSchema)
+    result = prepare_line_helper(['"""start of multiline'], t)
+    assert result == ('<span class="string">&quot;&quot;&quot;start of '
+                      'multiline</span>')
+    result = prepare_line_helper(['see if it doesn\'t touch this'], t)
+    assert result == ('<span class="string">see if it doesn&apos;t touch '
+                      'this</span>')
+    result = prepare_line_helper(['"""'], t)
+    assert result == '<span class="string">&quot;&quot;&quot;</span>'
+    result = prepare_line_helper(['see if it colours this again'], t)
+    assert result == ('see <span class="keyword">if</span> it colours '
+                      'this again')
+
+def test_prepare_line_nonascii():
+    result = prepare_line_helper(['"föö"'], encoding='UTF-8')
+    assert (result ==
+            unicode('<span class="string">&quot;föö&quot;</span>', 'UTF-8'))
+
+def test_get_encoding_ascii():
+    temp = py.test.ensuretemp('test_get_encoding')
+    fpath = temp.join('ascii.py')
+    fpath.write(str(py.code.Source("""\
+        def foo():
+            return 'foo'
+    """)))
+    # XXX I think the specs say we have to assume latin-1 here...
+    assert get_module_encoding(fpath.strpath) == 'ISO-8859-1'
+
+def test_get_encoding_for_real():
+    temp = py.test.ensuretemp('test_get_encoding')
+    fpath = temp.join('utf-8.py')
+    fpath.write(str(py.code.Source("""\
+        #!/usr/bin/env python
+        # -*- coding: UTF-8 -*-
+
+        def foo():
+            return 'föö'
+    """)))
+    assert get_module_encoding(fpath.strpath) == 'UTF-8'
+
--- a/py/apigen/testing/test_apigen_functional.py
+++ b/py/apigen/testing/test_apigen_functional.py
@ -38,6 +38,8 @@ def setup_fs_project(name):
            return 'bar'
        def baz(qux):
            return qux
+        def _hidden():
+            return 'quux'
    """))
    temp.ensure("pak/__init__.py").write(py.code.Source("""\
        from py.initpkg import initpkg
@ -77,6 +79,8 @@ def setup_fs_project(name):
            ''')
            c = compile(str(source), '<test>', 'exec')
            exec c in globals()
+
+            assert pak.somenamespace._hidden() == 'quux'
    """))
    return temp, 'pak'

--- a/py/apigen/todo-apigen.txt
+++ b/py/apigen/todo-apigen.txt
@ -1,5 +1,5 @@

-* format docstrings more nicely (with tests) 
+* format docstrings more nicely (with tests) - DONE I guess

 * have the API function view be as informative as possible
  without having to go to the "single method" view
@ -10,6 +10,8 @@
  viewed.  method views (when navigating there through
  the class view) should also have the source there 

+  DONE I guess (todo: add syntax coloring)
+
 * have class-level attributes be displayed

 * use "inherited" doc strings, i.e. for
@ -30,11 +32,11 @@
  be separately tested and the caller should not need
  to guess what it will get, i think) 

+  DONE
+
 * look out for and streamline all apigen/source-viewer
  documentation into one document 

-
-
 * consider automating dependencies: 

  e.g. something like: queue_render(page, fspath, linker, ...) 
@ -61,8 +63,22 @@
               ... 
               raise ... 

+  NOT SURE if this is still required
+
 * also we might have a support function for tests that
  fills the linker with "dummy hrefs" for certain types
  like source links

-* XXX list more here 
+  KIND OF DONE, the tests now use a linker that just doesn't
+  barf on non-existing linkids anymore, which seems to be
+  good enough (we may want to add more sophisticated debugging
+  later, but for now this works)
+       
+* add syntax coloring for Python source snippets
+
+* remove py.test/apigen cruft from stack traces
+
+* fix non-ascii source encoding support
+
+* XXX
+