[svn r37594] Fixed unicode issues in apigen/htmlgen.py and apigen/source, moved some methods

out of HTMLDocument and added support for finding out the encoding of a Python
file in order to accomplish that (both in source/html.py), fixed some minor
issues (items with a name starting with _ are now hidden from nav, fixed
indentation issue in nav) in htmlgen.py.

--HG--
branch : trunk
This commit is contained in:
guido 2007-01-30 14:24:27 +01:00
parent c2e1510f86
commit 45868fe506
5 changed files with 149 additions and 81 deletions

View File

@ -361,12 +361,12 @@ class ApiPageBuilder(AbstractPageBuilder):
H.a('source: %s' % (sourcefile,), H.a('source: %s' % (sourcefile,),
href=self.linker.get_lazyhref(sourcefile)), href=self.linker.get_lazyhref(sourcefile)),
H.br(), H.br(),
H.SourceDef(H.pre(callable_source))) H.SourceDef(H.pre(unicode(callable_source, 'UTF-8'))))
elif not is_in_pkg and sourcefile and callable_source: elif not is_in_pkg and sourcefile and callable_source:
csource = H.div(H.br(), csource = H.div(H.br(),
H.em('source: %s' % (sourcefile,)), H.em('source: %s' % (sourcefile,)),
H.br(), H.br(),
H.SourceDef(H.pre(callable_source))) H.SourceDef(H.pre(unicode(callable_source, 'UTF-8'))))
else: else:
csource = H.SourceDef('could not get source file') csource = H.SourceDef('could not get source file')
@ -460,6 +460,8 @@ class ApiPageBuilder(AbstractPageBuilder):
H.Docstring(docstring or '*no docstring available*') H.Docstring(docstring or '*no docstring available*')
) )
for dotted_name in sorted(item_dotted_names): for dotted_name in sorted(item_dotted_names):
if dotted_name.startswith('_'):
continue
itemname = dotted_name.split('.')[-1] itemname = dotted_name.split('.')[-1]
if is_private(itemname): if is_private(itemname):
continue continue
@ -586,7 +588,7 @@ class ApiPageBuilder(AbstractPageBuilder):
elif lastlevel and build_children: elif lastlevel and build_children:
# XXX hack # XXX hack
navitems += build_nav_level('%s.' % (dotted_name,), navitems += build_nav_level('%s.' % (dotted_name,),
depth+2) depth+1)
return navitems return navitems
@ -698,9 +700,9 @@ class ApiPageBuilder(AbstractPageBuilder):
mangled = [] mangled = []
for i, sline in enumerate(str(source).split('\n')): for i, sline in enumerate(str(source).split('\n')):
if i == lineno: if i == lineno:
l = '-> %s' % (sline,) l = '-> %s' % (unicode(sline, 'UTF-8'),)
else: else:
l = ' %s' % (sline,) l = ' %s' % (unicode(sline, 'UTF-8'),)
mangled.append(l) mangled.append(l)
if sourcefile: if sourcefile:
linktext = '%s - line %s' % (sourcefile, line.lineno + 1) linktext = '%s - line %s' % (sourcefile, line.lineno + 1)

View File

@ -2,14 +2,13 @@
""" html - generating ad-hoc html out of source browser """ html - generating ad-hoc html out of source browser
""" """
import py
from py.xml import html, raw from py.xml import html, raw
from compiler import ast from compiler import ast
import time import time
from py.__.apigen.source.color import Tokenizer, PythonSchema from py.__.apigen.source.color import Tokenizer, PythonSchema
class HtmlEnchanter(object): class HtmlEnchanter(object):
reserved_words = ['if', 'for', 'return', 'yield']
def __init__(self, mod): def __init__(self, mod):
self.mod = mod self.mod = mod
self.create_caches() self.create_caches()
@ -37,8 +36,30 @@ class HtmlEnchanter(object):
except KeyError: except KeyError:
return [row] # no more info return [row] # no more info
def prepare_line(text, tokenizer, encoding):
""" adds html formatting to text items (list)
only processes items if they're of a string type (or unicode)
"""
ret = []
for item in text:
if type(item) in [str, unicode]:
tokens = tokenizer.tokenize(item)
for t in tokens:
data = unicode(t.data, encoding)
if t.type in ['keyword', 'alt_keyword', 'number',
'string', 'comment']:
ret.append(html.span(data, class_=t.type))
else:
ret.append(data)
else:
ret.append(item)
return ret
class HTMLDocument(object): class HTMLDocument(object):
def __init__(self, tokenizer=None): def __init__(self, encoding, tokenizer=None):
self.encoding = encoding
self.html = root = html.html() self.html = root = html.html()
self.head = head = self.create_head() self.head = head = self.create_head()
root.append(head) root.append(head)
@ -119,30 +140,11 @@ class HTMLDocument(object):
table.append(tbody) table.append(tbody)
return table, tbody return table, tbody
def prepare_line(self, text):
""" adds html formatting to text items (list)
only processes items if they're of a string type (or unicode)
"""
ret = []
for item in text:
if type(item) in [str, unicode]:
tokens = self.tokenizer.tokenize(item)
for t in tokens:
if t.type in ['keyword', 'alt_keyword', 'number',
'string', 'comment']:
ret.append(html.span(t.data, class_=t.type))
else:
ret.append(t.data)
else:
ret.append(item)
return ret
def add_row(self, lineno, text): def add_row(self, lineno, text):
if text == ['']: if text == ['']:
text = [raw(' ')] text = [raw(' ')]
else: else:
text = self.prepare_line(text) text = prepare_line(text, self.tokenizer, self.encoding)
self.tbody.append(html.tr(html.td(str(lineno), class_='lineno'), self.tbody.append(html.tr(html.td(str(lineno), class_='lineno'),
html.td(class_='code', *text))) html.td(class_='code', *text)))
@ -157,7 +159,8 @@ def create_html(mod):
lines = mod.path.open().readlines() lines = mod.path.open().readlines()
enchanter = HtmlEnchanter(mod) enchanter = HtmlEnchanter(mod)
doc = HTMLDocument() enc = get_module_encoding(mod.path)
doc = HTMLDocument(enc)
for i, row in enumerate(lines): for i, row in enumerate(lines):
row = enchanter.enchant_row(i + 1, row) row = enchanter.enchant_row(i + 1, row)
doc.add_row(i + 1, row) doc.add_row(i + 1, row)
@ -248,3 +251,16 @@ def create_unknown_html(path):
) )
return h.unicode() return h.unicode()
_reg_enc = py.std.re.compile(r'coding[:=]\s*([-\w.]+)')
def get_module_encoding(path):
if hasattr(path, 'strpath'):
path = path.strpath
if path[-1] in ['c', 'o']:
path = path[:-1]
fpath = py.path.local(path)
code = fpath.read()
match = _reg_enc.search(code)
if match:
return match.group(1)
return 'ISO-8859-1'

View File

@ -1,9 +1,12 @@
# -*- coding: UTF-8 -*-
""" test of html generation """ test of html generation
""" """
from py.__.apigen.source.html import create_html, HTMLDocument from py.__.apigen.source.html import prepare_line, create_html, HTMLDocument, \
get_module_encoding
from py.__.apigen.source.browser import parse_path from py.__.apigen.source.browser import parse_path
from py.__.apigen.source.color import Tokenizer, PythonSchema
from py.xml import html from py.xml import html
import py import py
@ -49,7 +52,7 @@ def test_basic():
class _HTMLDocument(HTMLDocument): class _HTMLDocument(HTMLDocument):
def __init__(self): def __init__(self):
pass self.encoding = 'ascii'
class TestHTMLDocument(object): class TestHTMLDocument(object):
def test_head(self): def test_head(self):
@ -73,51 +76,8 @@ class TestHTMLDocument(object):
assert isinstance(tbody, html.tbody) assert isinstance(tbody, html.tbody)
assert tbody == table[0] assert tbody == table[0]
def prepare_line(self, line, doc=None):
if doc is None:
doc = HTMLDocument()
l = doc.prepare_line(line)
return ''.join([unicode(i) for i in l])
def test_prepare_line_basic(self):
result = self.prepare_line(['see if this works'])
assert result == 'see <span class="keyword">if</span> this works'
result = self.prepare_line(['see if this ',
html.a('works', name='works'),' too'])
assert result == ('see <span class="keyword">if</span> this '
'<a name="works">works</a> too')
result = self.prepare_line(['see if something else works'])
assert result == ('see <span class="keyword">if</span> something '
'<span class="keyword">else</span> works')
result = self.prepare_line(['see if something ',
html.a('else', name='else'), ' works too'])
assert result == ('see <span class="keyword">if</span> something '
'<a name="else">else</a> works too')
def test_prepare_line_strings(self):
result = self.prepare_line(['foo = "bar"'])
assert result == 'foo = <span class="string">&quot;bar&quot;</span>'
result = self.prepare_line(['"spam"'])
assert result == '<span class="string">&quot;spam&quot;</span>'
# test multiline strings
doc = HTMLDocument()
result = self.prepare_line(['"""start of multiline'], doc)
assert result == ('<span class="string">&quot;&quot;&quot;start of '
'multiline</span>')
# doc should now be in 'string mode'
result = self.prepare_line(['see if it doesn\'t touch this'], doc)
assert result == ('<span class="string">see if it doesn&apos;t touch '
'this</span>')
result = self.prepare_line(['"""'], doc)
assert result == '<span class="string">&quot;&quot;&quot;</span>'
result = self.prepare_line(['see if it colours this again'], doc)
assert result == ('see <span class="keyword">if</span> it colours '
'this again')
def test_add_row(self): def test_add_row(self):
doc = HTMLDocument() doc = HTMLDocument('ascii')
doc.add_row(1, ['""" this is a foo implementation """']) doc.add_row(1, ['""" this is a foo implementation """'])
doc.add_row(2, ['']) doc.add_row(2, [''])
doc.add_row(3, ['class ', html.a('Foo', name='Foo'), ':']) doc.add_row(3, ['class ', html.a('Foo', name='Foo'), ':'])
@ -141,9 +101,79 @@ class TestHTMLDocument(object):
'</span></td>') '</span></td>')
def test_unicode(self): def test_unicode(self):
doc = HTMLDocument() doc = HTMLDocument('ascii')
h = unicode(doc) h = unicode(doc)
print h print h
assert py.std.re.match(r'<html>\s*<head>\s*<title>[^<]+</title>' assert py.std.re.match(r'<html>\s*<head>\s*<title>[^<]+</title>'
'.*</body>\w*</html>$', h, py.std.re.S) '.*</body>\w*</html>$', h, py.std.re.S)
def prepare_line_helper(line, tokenizer=None, encoding='ascii'):
if tokenizer is None:
tokenizer = Tokenizer(PythonSchema)
l = prepare_line(line, tokenizer, encoding)
return ''.join([unicode(i) for i in l])
def test_prepare_line_basic():
result = prepare_line_helper(['see if this works'])
assert result == 'see <span class="keyword">if</span> this works'
result = prepare_line_helper(['see if this ',
html.a('works', name='works'),' too'])
assert result == ('see <span class="keyword">if</span> this '
'<a name="works">works</a> too')
result = prepare_line_helper(['see if something else works'])
assert result == ('see <span class="keyword">if</span> something '
'<span class="keyword">else</span> works')
result = prepare_line_helper(['see if something ',
html.a('else', name='else'), ' works too'])
assert result == ('see <span class="keyword">if</span> something '
'<a name="else">else</a> works too')
def test_prepare_line_strings():
result = prepare_line_helper(['foo = "bar"'])
assert result == 'foo = <span class="string">&quot;bar&quot;</span>'
result = prepare_line_helper(['"spam"'])
assert result == '<span class="string">&quot;spam&quot;</span>'
def test_prepare_line_multiline_strings():
# test multiline strings
t = Tokenizer(PythonSchema)
result = prepare_line_helper(['"""start of multiline'], t)
assert result == ('<span class="string">&quot;&quot;&quot;start of '
'multiline</span>')
result = prepare_line_helper(['see if it doesn\'t touch this'], t)
assert result == ('<span class="string">see if it doesn&apos;t touch '
'this</span>')
result = prepare_line_helper(['"""'], t)
assert result == '<span class="string">&quot;&quot;&quot;</span>'
result = prepare_line_helper(['see if it colours this again'], t)
assert result == ('see <span class="keyword">if</span> it colours '
'this again')
def test_prepare_line_nonascii():
result = prepare_line_helper(['"föö"'], encoding='UTF-8')
assert (result ==
unicode('<span class="string">&quot;föö&quot;</span>', 'UTF-8'))
def test_get_encoding_ascii():
temp = py.test.ensuretemp('test_get_encoding')
fpath = temp.join('ascii.py')
fpath.write(str(py.code.Source("""\
def foo():
return 'foo'
""")))
# XXX I think the specs say we have to assume latin-1 here...
assert get_module_encoding(fpath.strpath) == 'ISO-8859-1'
def test_get_encoding_for_real():
temp = py.test.ensuretemp('test_get_encoding')
fpath = temp.join('utf-8.py')
fpath.write(str(py.code.Source("""\
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
def foo():
return 'föö'
""")))
assert get_module_encoding(fpath.strpath) == 'UTF-8'

View File

@ -38,6 +38,8 @@ def setup_fs_project(name):
return 'bar' return 'bar'
def baz(qux): def baz(qux):
return qux return qux
def _hidden():
return 'quux'
""")) """))
temp.ensure("pak/__init__.py").write(py.code.Source("""\ temp.ensure("pak/__init__.py").write(py.code.Source("""\
from py.initpkg import initpkg from py.initpkg import initpkg
@ -77,6 +79,8 @@ def setup_fs_project(name):
''') ''')
c = compile(str(source), '<test>', 'exec') c = compile(str(source), '<test>', 'exec')
exec c in globals() exec c in globals()
assert pak.somenamespace._hidden() == 'quux'
""")) """))
return temp, 'pak' return temp, 'pak'

View File

@ -1,5 +1,5 @@
* format docstrings more nicely (with tests) * format docstrings more nicely (with tests) - DONE I guess
* have the API function view be as informative as possible * have the API function view be as informative as possible
without having to go to the "single method" view without having to go to the "single method" view
@ -10,7 +10,9 @@
viewed. method views (when navigating there through viewed. method views (when navigating there through
the class view) should also have the source there the class view) should also have the source there
* have class-level attributes be displayed DONE I guess (todo: add syntax coloring)
* have class-level attributes be displayed
* use "inherited" doc strings, i.e. for * use "inherited" doc strings, i.e. for
class A: class A:
@ -30,11 +32,11 @@
be separately tested and the caller should not need be separately tested and the caller should not need
to guess what it will get, i think) to guess what it will get, i think)
DONE
* look out for and streamline all apigen/source-viewer * look out for and streamline all apigen/source-viewer
documentation into one document documentation into one document
* consider automating dependencies: * consider automating dependencies:
e.g. something like: queue_render(page, fspath, linker, ...) e.g. something like: queue_render(page, fspath, linker, ...)
@ -61,8 +63,22 @@
... ...
raise ... raise ...
NOT SURE if this is still required
* also we might have a support function for tests that * also we might have a support function for tests that
fills the linker with "dummy hrefs" for certain types fills the linker with "dummy hrefs" for certain types
like source links like source links
KIND OF DONE, the tests now use a linker that just doesn't
barf on non-existing linkids anymore, which seems to be
good enough (we may want to add more sophisticated debugging
later, but for now this works)
* XXX list more here * add syntax coloring for Python source snippets
* remove py.test/apigen cruft from stack traces
* fix non-ascii source encoding support
* XXX