reviewing, refactoring, porting xml/html object/tree generation to work with 3k

--HG--
branch : trunk
This commit is contained in:
holger krekel 2009-09-04 18:16:10 +02:00
parent 6823fa634b
commit a0d7ab2244
8 changed files with 302 additions and 322 deletions

View File

@ -186,11 +186,11 @@ initpkg(__name__,
# small and mean xml/html generation
'xml.__doc__' : ('./xmlobj/__init__.py', '__doc__'),
'xml.html' : ('./xmlobj/html.py', 'html'),
'xml.Tag' : ('./xmlobj/xml.py', 'Tag'),
'xml.raw' : ('./xmlobj/xml.py', 'raw'),
'xml.Namespace' : ('./xmlobj/xml.py', 'Namespace'),
'xml.escape' : ('./xmlobj/misc.py', 'escape'),
'xml.html' : ('./xmlobj/xmlgen.py', 'html'),
'xml.Tag' : ('./xmlobj/xmlgen.py', 'Tag'),
'xml.raw' : ('./xmlobj/xmlgen.py', 'raw'),
'xml.Namespace' : ('./xmlobj/xmlgen.py', 'Namespace'),
'xml.escape' : ('./xmlobj/xmlgen.py', 'escape'),
# logging API ('producers' and 'consumers' connected via keywords)
'log.__doc__' : ('./log/__init__.py', '__doc__'),

View File

@ -1,65 +0,0 @@
"""
"""
import sys
from py.xml import Namespace, Tag
from py.__.xmlobj.visit import SimpleUnicodeVisitor
class HtmlVisitor(SimpleUnicodeVisitor):
single = dict([(x, 1) for x in
('br,img,area,param,col,hr,meta,link,base,'
'input,frame').split(',')])
inline = dict([(x, 1) for x in
('a abbr acronym b basefont bdo big br cite code dfn em font '
'i img input kbd label q s samp select small span strike '
'strong sub sup textarea tt u var'.split(' '))])
def repr_attribute(self, attrs, name):
if name == 'class_':
value = getattr(attrs, name)
if value is None:
return
return super(HtmlVisitor, self).repr_attribute(attrs, name)
def _issingleton(self, tagname):
return tagname in self.single
def _isinline(self, tagname):
return tagname in self.inline
if sys.version_info > (3, 0):
def u(s): return s
else:
def u(s): return unicode(s)
class HtmlTag(Tag):
def unicode(self, indent=2):
l = []
HtmlVisitor(l.append, indent, shortempty=False).visit(self)
return u("").join(l)
# exported plain html namespace
class html(Namespace):
__tagclass__ = HtmlTag
__stickyname__ = True
__tagspec__ = dict([(x,1) for x in (
'a,abbr,acronym,address,applet,area,b,bdo,big,blink,'
'blockquote,body,br,button,caption,center,cite,code,col,'
'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,'
'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,'
'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,'
'map,marquee,menu,meta,multicol,nobr,noembed,noframes,'
'noscript,object,ol,optgroup,option,p,pre,q,s,script,'
'select,small,span,strike,strong,style,sub,sup,table,'
'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,'
'base,basefont,frame,hr,isindex,param,samp,var'
).split(',') if x])
class Style(object):
def __init__(self, **kw):
for x, y in kw.items():
x = x.replace('_', '-')
setattr(self, x, y)

View File

@ -1,26 +0,0 @@
import re
import sys
if sys.version_info > (3, 0):
def u(s):
return s
else:
def u(s):
return unicode(s)
class _escape:
def __init__(self):
self.escape = {
u('"') : u('&quot;'), u('<') : u('&lt;'), u('>') : u('&gt;'),
u('&') : u('&amp;'), u("'") : u('&apos;'),
}
self.charef_rex = re.compile(u("|").join(self.escape.keys()))
def _replacer(self, match):
return self.escape[match.group(0)]
def __call__(self, ustring):
""" xml-escape the given unicode string. """
return self.charef_rex.sub(self._replacer, ustring)
escape = _escape()

View File

@ -1,54 +0,0 @@
from py.xml import html
def test_html_name_stickyness():
class my(html.p):
pass
x = my("hello")
assert unicode(x) == '<p>hello</p>'
def test_stylenames():
class my:
class body(html.body):
style = html.Style(font_size = "12pt")
u = unicode(my.body())
assert u == '<body style="font-size: 12pt"></body>'
def test_class_None():
t = html.body(class_=None)
u = unicode(t)
assert u == '<body></body>'
def test_alternating_style():
alternating = (
html.Style(background="white"),
html.Style(background="grey"),
)
class my(html):
class li(html.li):
def style(self):
i = self.parent.index(self)
return alternating[i%2]
style = property(style)
x = my.ul(
my.li("hello"),
my.li("world"),
my.li("42"))
u = unicode(x)
assert u == ('<ul><li style="background: white">hello</li>'
'<li style="background: grey">world</li>'
'<li style="background: white">42</li>'
'</ul>')
def test_singleton():
h = html.head(html.link(href="foo"))
assert unicode(h) == '<head><link href="foo"/></head>'
h = html.head(html.script(src="foo"))
assert unicode(h) == '<head><script src="foo"></script></head>'
def test_inline():
h = html.div(html.span('foo'), html.span('bar'))
assert (h.unicode(indent=2) ==
'<div><span>foo</span><span>bar</span></div>')

View File

@ -1,5 +1,6 @@
import py
from py.__.xmlobj.xmlgen import unicode, html
class ns(py.xml.Namespace):
pass
@ -56,3 +57,56 @@ def test_raw():
u = unicode(x)
assert u == "<some><p>literal</p></some>"
def test_html_name_stickyness():
class my(html.p):
pass
x = my("hello")
assert unicode(x) == '<p>hello</p>'
def test_stylenames():
class my:
class body(html.body):
style = html.Style(font_size = "12pt")
u = unicode(my.body())
assert u == '<body style="font-size: 12pt"></body>'
def test_class_None():
t = html.body(class_=None)
u = unicode(t)
assert u == '<body></body>'
def test_alternating_style():
alternating = (
html.Style(background="white"),
html.Style(background="grey"),
)
class my(html):
class li(html.li):
def style(self):
i = self.parent.index(self)
return alternating[i%2]
style = property(style)
x = my.ul(
my.li("hello"),
my.li("world"),
my.li("42"))
u = unicode(x)
assert u == ('<ul><li style="background: white">hello</li>'
'<li style="background: grey">world</li>'
'<li style="background: white">42</li>'
'</ul>')
def test_singleton():
h = html.head(html.link(href="foo"))
assert unicode(h) == '<head><link href="foo"/></head>'
h = html.head(html.script(src="foo"))
assert unicode(h) == '<head><script src="foo"></script></head>'
def test_inline():
h = html.div(html.span('foo'), html.span('bar'))
assert (h.unicode(indent=2) ==
'<div><span>foo</span><span>bar</span></div>')

View File

@ -1,114 +0,0 @@
#
# a generic conversion serializer
#
import sys
from py.xml import escape
if sys.version_info > (3, 0):
def u(s):
return s
else:
def u(s):
return unicode(s)
class SimpleUnicodeVisitor(object):
""" recursive visitor to write unicode. """
def __init__(self, write, indent=0, curindent=0, shortempty=True):
self.write = write
self.cache = {}
self.visited = {} # for detection of recursion
self.indent = indent
self.curindent = curindent
self.parents = []
self.shortempty = shortempty # short empty tags or not
def visit(self, node):
""" dispatcher on node's class/bases name. """
cls = node.__class__
try:
visitmethod = self.cache[cls]
except KeyError:
for subclass in cls.__mro__:
visitmethod = getattr(self, subclass.__name__, None)
if visitmethod is not None:
break
else:
visitmethod = self.object
self.cache[cls] = visitmethod
visitmethod(node)
def object(self, obj):
#self.write(obj)
self.write(escape(unicode(obj)))
def raw(self, obj):
self.write(obj.uniobj)
def list(self, obj):
assert id(obj) not in self.visited
self.visited[id(obj)] = 1
map(self.visit, obj)
def Tag(self, tag):
assert id(tag) not in self.visited
try:
tag.parent = self.parents[-1]
except IndexError:
tag.parent = None
self.visited[id(tag)] = 1
tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
if self.curindent and not self._isinline(tagname):
self.write("\n" + u(' ') * self.curindent)
if tag:
self.curindent += self.indent
self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
self.parents.append(tag)
map(self.visit, tag)
self.parents.pop()
self.write(u('</%s>') % tagname)
self.curindent -= self.indent
else:
nameattr = tagname+self.attributes(tag)
if self._issingleton(tagname):
self.write(u('<%s/>') % (nameattr,))
else:
self.write(u('<%s></%s>') % (nameattr, tagname))
def attributes(self, tag):
# serialize attributes
attrlist = dir(tag.attr)
attrlist.sort()
l = []
for name in attrlist:
res = self.repr_attribute(tag.attr, name)
if res is not None:
l.append(res)
l.extend(self.getstyle(tag))
return u("").join(l)
def repr_attribute(self, attrs, name):
if name[:2] != '__':
value = getattr(attrs, name)
if name.endswith('_'):
name = name[:-1]
return u(' %s="%s"') % (name, escape(unicode(value)))
def getstyle(self, tag):
""" return attribute list suitable for styling. """
try:
styledict = tag.style.__dict__
except AttributeError:
return []
else:
stylelist = [x+': ' + y for x,y in styledict.items()]
return [u(' style="%s"') % u('; ').join(stylelist)]
def _issingleton(self, tagname):
"""can (and will) be overridden in subclasses"""
return self.shortempty
def _isinline(self, tagname):
"""can (and will) be overridden in subclasses"""
return False

View File

@ -1,58 +0,0 @@
"""
generic (and pythonic :-) xml tag and namespace objects
"""
class Tag(list):
class Attr(object):
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
def __init__(self, *args, **kwargs):
super(Tag, self).__init__(args)
self.attr = self.Attr(**kwargs)
def __unicode__(self):
return self.unicode(indent=0)
def unicode(self, indent=2):
from py.__.xmlobj.visit import SimpleUnicodeVisitor
l = []
SimpleUnicodeVisitor(l.append, indent).visit(self)
return "".join(l)
def __repr__(self):
name = self.__class__.__name__
return "<%r tag object %d>" % (name, id(self))
class raw(object):
"""just a box that can contain a unicode string that will be
included directly in the output"""
def __init__(self, uniobj):
self.uniobj = uniobj
# the generic xml namespace
# provides Tag classes on the fly optionally checking for
# a tagspecification
class NamespaceMetaclass(type):
def __getattr__(self, name):
if name[:1] == '_':
raise AttributeError(name)
if self == Namespace:
raise ValueError("Namespace class is abstract")
tagspec = self.__tagspec__
if tagspec is not None and name not in tagspec:
raise AttributeError(name)
classattr = {}
if self.__stickyname__:
classattr['xmlname'] = name
cls = type(name, (self.__tagclass__,), classattr)
setattr(self, name, cls)
return cls
class Namespace(object):
__tagspec__ = None
__tagclass__ = Tag
__metaclass__ = NamespaceMetaclass
__stickyname__ = False

243
py/xmlobj/xmlgen.py Normal file
View File

@ -0,0 +1,243 @@
"""
module for generating and serializing xml and html structures
by using simple python objects.
(c) holger krekel, holger at merlinux eu. 2009
"""
import py
import sys, re
if sys.version_info >= (3,0):
def u(s):
return s
def unicode(x):
if hasattr(x, '__unicode__'):
return x.__unicode__()
return str(x)
else:
def u(s):
return unicode(s)
unicode = unicode
class NamespaceMetaclass(type):
def __getattr__(self, name):
if name[:1] == '_':
raise AttributeError(name)
if self == Namespace:
raise ValueError("Namespace class is abstract")
tagspec = self.__tagspec__
if tagspec is not None and name not in tagspec:
raise AttributeError(name)
classattr = {}
if self.__stickyname__:
classattr['xmlname'] = name
cls = type(name, (self.__tagclass__,), classattr)
setattr(self, name, cls)
return cls
class Tag(list):
class Attr(object):
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
def __init__(self, *args, **kwargs):
super(Tag, self).__init__(args)
self.attr = self.Attr(**kwargs)
def __unicode__(self):
return self.unicode(indent=0)
__str__ = __unicode__
def unicode(self, indent=2):
l = []
SimpleUnicodeVisitor(l.append, indent).visit(self)
return "".join(l)
def __repr__(self):
name = self.__class__.__name__
return "<%r tag object %d>" % (name, id(self))
Namespace = NamespaceMetaclass('Namespace', (object, ), {
'__tagspec__': None,
'__tagclass__': Tag,
'__stickyname__': False,
})
class HtmlTag(Tag):
def unicode(self, indent=2):
l = []
HtmlVisitor(l.append, indent, shortempty=False).visit(self)
return u("").join(l)
# exported plain html namespace
class html(Namespace):
__tagclass__ = HtmlTag
__stickyname__ = True
__tagspec__ = dict([(x,1) for x in (
'a,abbr,acronym,address,applet,area,b,bdo,big,blink,'
'blockquote,body,br,button,caption,center,cite,code,col,'
'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,'
'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,'
'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,'
'map,marquee,menu,meta,multicol,nobr,noembed,noframes,'
'noscript,object,ol,optgroup,option,p,pre,q,s,script,'
'select,small,span,strike,strong,style,sub,sup,table,'
'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,'
'base,basefont,frame,hr,isindex,param,samp,var'
).split(',') if x])
class Style(object):
def __init__(self, **kw):
for x, y in kw.items():
x = x.replace('_', '-')
setattr(self, x, y)
class raw(object):
"""just a box that can contain a unicode string that will be
included directly in the output"""
def __init__(self, uniobj):
self.uniobj = uniobj
class SimpleUnicodeVisitor(object):
""" recursive visitor to write unicode. """
def __init__(self, write, indent=0, curindent=0, shortempty=True):
self.write = write
self.cache = {}
self.visited = {} # for detection of recursion
self.indent = indent
self.curindent = curindent
self.parents = []
self.shortempty = shortempty # short empty tags or not
def visit(self, node):
""" dispatcher on node's class/bases name. """
cls = node.__class__
try:
visitmethod = self.cache[cls]
except KeyError:
for subclass in cls.__mro__:
visitmethod = getattr(self, subclass.__name__, None)
if visitmethod is not None:
break
else:
visitmethod = self.object
self.cache[cls] = visitmethod
visitmethod(node)
def object(self, obj):
#self.write(obj)
self.write(escape(unicode(obj)))
def raw(self, obj):
self.write(obj.uniobj)
def list(self, obj):
assert id(obj) not in self.visited
self.visited[id(obj)] = 1
map(self.visit, obj)
def Tag(self, tag):
assert id(tag) not in self.visited
try:
tag.parent = self.parents[-1]
except IndexError:
tag.parent = None
self.visited[id(tag)] = 1
tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
if self.curindent and not self._isinline(tagname):
self.write("\n" + u(' ') * self.curindent)
if tag:
self.curindent += self.indent
self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
self.parents.append(tag)
for x in tag:
self.visit(x)
self.parents.pop()
self.write(u('</%s>') % tagname)
self.curindent -= self.indent
else:
nameattr = tagname+self.attributes(tag)
if self._issingleton(tagname):
self.write(u('<%s/>') % (nameattr,))
else:
self.write(u('<%s></%s>') % (nameattr, tagname))
def attributes(self, tag):
# serialize attributes
attrlist = dir(tag.attr)
attrlist.sort()
l = []
for name in attrlist:
res = self.repr_attribute(tag.attr, name)
if res is not None:
l.append(res)
l.extend(self.getstyle(tag))
return u("").join(l)
def repr_attribute(self, attrs, name):
if name[:2] != '__':
value = getattr(attrs, name)
if name.endswith('_'):
name = name[:-1]
return ' %s="%s"' % (name, escape(unicode(value)))
def getstyle(self, tag):
""" return attribute list suitable for styling. """
try:
styledict = tag.style.__dict__
except AttributeError:
return []
else:
stylelist = [x+': ' + y for x,y in styledict.items()]
return [u(' style="%s"') % u('; ').join(stylelist)]
def _issingleton(self, tagname):
"""can (and will) be overridden in subclasses"""
return self.shortempty
def _isinline(self, tagname):
"""can (and will) be overridden in subclasses"""
return False
class HtmlVisitor(SimpleUnicodeVisitor):
single = dict([(x, 1) for x in
('br,img,area,param,col,hr,meta,link,base,'
'input,frame').split(',')])
inline = dict([(x, 1) for x in
('a abbr acronym b basefont bdo big br cite code dfn em font '
'i img input kbd label q s samp select small span strike '
'strong sub sup textarea tt u var'.split(' '))])
def repr_attribute(self, attrs, name):
if name == 'class_':
value = getattr(attrs, name)
if value is None:
return
return super(HtmlVisitor, self).repr_attribute(attrs, name)
def _issingleton(self, tagname):
return tagname in self.single
def _isinline(self, tagname):
return tagname in self.inline
class _escape:
def __init__(self):
self.escape = {
u('"') : u('&quot;'), u('<') : u('&lt;'), u('>') : u('&gt;'),
u('&') : u('&amp;'), u("'") : u('&apos;'),
}
self.charef_rex = re.compile(u("|").join(self.escape.keys()))
def _replacer(self, match):
return self.escape[match.group(0)]
def __call__(self, ustring):
""" xml-escape the given unicode string. """
return self.charef_rex.sub(self._replacer, ustring)
escape = _escape()