From a0d7ab2244bc5503cc3972993bf931dcf6f05953 Mon Sep 17 00:00:00 2001
From: holger krekel <holger@merlinux.eu>
Date: Fri, 4 Sep 2009 18:16:10 +0200
Subject: [PATCH] reviewing, refactoring, porting xml/html object/tree
 generation to work with 3k

--HG--
branch : trunk
---
 py/__init__.py                 |  10 +-
 py/xmlobj/html.py              |  65 ---------
 py/xmlobj/misc.py              |  26 ----
 py/xmlobj/testing/test_html.py |  54 --------
 py/xmlobj/testing/test_xml.py  |  54 ++++++++
 py/xmlobj/visit.py             | 114 ----------------
 py/xmlobj/xml.py               |  58 --------
 py/xmlobj/xmlgen.py            | 243 +++++++++++++++++++++++++++++++++
 8 files changed, 302 insertions(+), 322 deletions(-)
 delete mode 100644 py/xmlobj/html.py
 delete mode 100644 py/xmlobj/misc.py
 delete mode 100644 py/xmlobj/testing/test_html.py
 delete mode 100644 py/xmlobj/visit.py
 delete mode 100644 py/xmlobj/xml.py
 create mode 100644 py/xmlobj/xmlgen.py

diff --git a/py/__init__.py b/py/__init__.py
index afd45843f..4a1cd9e2f 100644
--- a/py/__init__.py
+++ b/py/__init__.py
@@ -186,11 +186,11 @@ initpkg(__name__,
 
     # small and mean xml/html generation
     'xml.__doc__'            : ('./xmlobj/__init__.py', '__doc__'),
-    'xml.html'               : ('./xmlobj/html.py', 'html'),
-    'xml.Tag'                : ('./xmlobj/xml.py', 'Tag'),
-    'xml.raw'                : ('./xmlobj/xml.py', 'raw'),
-    'xml.Namespace'          : ('./xmlobj/xml.py', 'Namespace'),
-    'xml.escape'             : ('./xmlobj/misc.py', 'escape'),
+    'xml.html'               : ('./xmlobj/xmlgen.py', 'html'),
+    'xml.Tag'                : ('./xmlobj/xmlgen.py', 'Tag'),
+    'xml.raw'                : ('./xmlobj/xmlgen.py', 'raw'),
+    'xml.Namespace'          : ('./xmlobj/xmlgen.py', 'Namespace'),
+    'xml.escape'             : ('./xmlobj/xmlgen.py', 'escape'),
 
     # logging API ('producers' and 'consumers' connected via keywords)
     'log.__doc__'            : ('./log/__init__.py', '__doc__'),
diff --git a/py/xmlobj/html.py b/py/xmlobj/html.py
deleted file mode 100644
index 32a26d470..000000000
--- a/py/xmlobj/html.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""
-
-
-""" 
-import sys
-from py.xml import Namespace, Tag
-from py.__.xmlobj.visit import SimpleUnicodeVisitor 
-
-class HtmlVisitor(SimpleUnicodeVisitor): 
-    
-    single = dict([(x, 1) for x in 
-                ('br,img,area,param,col,hr,meta,link,base,'
-                    'input,frame').split(',')])
-    inline = dict([(x, 1) for x in
-                ('a abbr acronym b basefont bdo big br cite code dfn em font '
-                 'i img input kbd label q s samp select small span strike '
-                 'strong sub sup textarea tt u var'.split(' '))])
-
-    def repr_attribute(self, attrs, name): 
-        if name == 'class_':
-            value = getattr(attrs, name) 
-            if value is None: 
-                return
-        return super(HtmlVisitor, self).repr_attribute(attrs, name) 
-
-    def _issingleton(self, tagname):
-        return tagname in self.single
-
-    def _isinline(self, tagname):
-        return tagname in self.inline
-
-if sys.version_info > (3, 0):
-    def u(s): return s
-else:
-    def u(s): return unicode(s)
-
-class HtmlTag(Tag): 
-    def unicode(self, indent=2):
-        l = []
-        HtmlVisitor(l.append, indent, shortempty=False).visit(self) 
-        return u("").join(l) 
-
-# exported plain html namespace 
-class html(Namespace):
-    __tagclass__ = HtmlTag
-    __stickyname__ = True 
-    __tagspec__ = dict([(x,1) for x in ( 
-        'a,abbr,acronym,address,applet,area,b,bdo,big,blink,'
-        'blockquote,body,br,button,caption,center,cite,code,col,'
-        'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,'
-        'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,'
-        'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,'
-        'map,marquee,menu,meta,multicol,nobr,noembed,noframes,'
-        'noscript,object,ol,optgroup,option,p,pre,q,s,script,'
-        'select,small,span,strike,strong,style,sub,sup,table,'
-        'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,'
-        'base,basefont,frame,hr,isindex,param,samp,var'
-    ).split(',') if x])
-
-    class Style(object): 
-        def __init__(self, **kw): 
-            for x, y in kw.items():
-                x = x.replace('_', '-')
-                setattr(self, x, y) 
-
diff --git a/py/xmlobj/misc.py b/py/xmlobj/misc.py
deleted file mode 100644
index 358910a0f..000000000
--- a/py/xmlobj/misc.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import re
-import sys
-
-if sys.version_info > (3, 0):
-    def u(s):
-        return s
-else:
-    def u(s):
-        return unicode(s)
-
-class _escape:
-    def __init__(self):
-        self.escape = {
-            u('"') : u('&quot;'), u('<') : u('&lt;'), u('>') : u('&gt;'), 
-            u('&') : u('&amp;'), u("'") : u('&apos;'),
-            }
-        self.charef_rex = re.compile(u("|").join(self.escape.keys()))
-
-    def _replacer(self, match):
-        return self.escape[match.group(0)]
-
-    def __call__(self, ustring):
-        """ xml-escape the given unicode string. """
-        return self.charef_rex.sub(self._replacer, ustring)
-
-escape = _escape()
diff --git a/py/xmlobj/testing/test_html.py b/py/xmlobj/testing/test_html.py
deleted file mode 100644
index 2bf59c0f9..000000000
--- a/py/xmlobj/testing/test_html.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from py.xml import html 
-
-def test_html_name_stickyness(): 
-    class my(html.p): 
-        pass 
-    x = my("hello") 
-    assert unicode(x) == '<p>hello</p>' 
-
-def test_stylenames(): 
-    class my: 
-        class body(html.body): 
-            style = html.Style(font_size = "12pt")
-    u = unicode(my.body())
-    assert u == '<body style="font-size: 12pt"></body>' 
-
-def test_class_None(): 
-    t = html.body(class_=None)
-    u = unicode(t) 
-    assert u == '<body></body>'
-
-def test_alternating_style(): 
-    alternating = (
-        html.Style(background="white"), 
-        html.Style(background="grey"),
-    )
-    class my(html): 
-        class li(html.li): 
-            def style(self): 
-                i = self.parent.index(self) 
-                return alternating[i%2]
-            style = property(style) 
-    
-    x = my.ul(
-            my.li("hello"), 
-            my.li("world"), 
-            my.li("42"))
-    u = unicode(x) 
-    assert u == ('<ul><li style="background: white">hello</li>'
-                     '<li style="background: grey">world</li>'
-                     '<li style="background: white">42</li>'
-                 '</ul>')
-
-def test_singleton():
-    h = html.head(html.link(href="foo"))
-    assert unicode(h) == '<head><link href="foo"/></head>'
-    
-    h = html.head(html.script(src="foo"))
-    assert unicode(h) == '<head><script src="foo"></script></head>'
-
-def test_inline():
-    h = html.div(html.span('foo'), html.span('bar'))
-    assert (h.unicode(indent=2) ==
-            '<div><span>foo</span><span>bar</span></div>')
-
diff --git a/py/xmlobj/testing/test_xml.py b/py/xmlobj/testing/test_xml.py
index 031aa649a..b4acaf7b3 100644
--- a/py/xmlobj/testing/test_xml.py
+++ b/py/xmlobj/testing/test_xml.py
@@ -1,5 +1,6 @@
 
 import py
+from py.__.xmlobj.xmlgen import unicode, html
 
 class ns(py.xml.Namespace): 
     pass 
@@ -56,3 +57,56 @@ def test_raw():
     u = unicode(x)
     assert u == "<some><p>literal</p></some>"
 
+
+def test_html_name_stickyness(): 
+    class my(html.p): 
+        pass 
+    x = my("hello") 
+    assert unicode(x) == '<p>hello</p>' 
+
+def test_stylenames(): 
+    class my: 
+        class body(html.body): 
+            style = html.Style(font_size = "12pt")
+    u = unicode(my.body())
+    assert u == '<body style="font-size: 12pt"></body>' 
+
+def test_class_None(): 
+    t = html.body(class_=None)
+    u = unicode(t) 
+    assert u == '<body></body>'
+
+def test_alternating_style(): 
+    alternating = (
+        html.Style(background="white"), 
+        html.Style(background="grey"),
+    )
+    class my(html): 
+        class li(html.li): 
+            def style(self): 
+                i = self.parent.index(self) 
+                return alternating[i%2]
+            style = property(style) 
+    
+    x = my.ul(
+            my.li("hello"), 
+            my.li("world"), 
+            my.li("42"))
+    u = unicode(x) 
+    assert u == ('<ul><li style="background: white">hello</li>'
+                     '<li style="background: grey">world</li>'
+                     '<li style="background: white">42</li>'
+                 '</ul>')
+
+def test_singleton():
+    h = html.head(html.link(href="foo"))
+    assert unicode(h) == '<head><link href="foo"/></head>'
+    
+    h = html.head(html.script(src="foo"))
+    assert unicode(h) == '<head><script src="foo"></script></head>'
+
+def test_inline():
+    h = html.div(html.span('foo'), html.span('bar'))
+    assert (h.unicode(indent=2) ==
+            '<div><span>foo</span><span>bar</span></div>')
+
diff --git a/py/xmlobj/visit.py b/py/xmlobj/visit.py
deleted file mode 100644
index bff64bdac..000000000
--- a/py/xmlobj/visit.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# 
-# a generic conversion serializer 
-#
-
-import sys
-from py.xml import escape 
-
-if sys.version_info > (3, 0):
-    def u(s):
-        return s
-else:
-    def u(s):
-        return unicode(s)
-
-class SimpleUnicodeVisitor(object):
-    """ recursive visitor to write unicode. """
-    def __init__(self, write, indent=0, curindent=0, shortempty=True): 
-        self.write = write
-        self.cache = {}
-        self.visited = {} # for detection of recursion
-        self.indent = indent 
-        self.curindent = curindent
-        self.parents = []
-        self.shortempty = shortempty  # short empty tags or not 
-
-    def visit(self, node): 
-        """ dispatcher on node's class/bases name. """
-        cls = node.__class__
-        try:
-            visitmethod = self.cache[cls]   
-        except KeyError:
-            for subclass in cls.__mro__: 
-                visitmethod = getattr(self, subclass.__name__, None)
-                if visitmethod is not None:
-                    break
-            else:
-                visitmethod = self.object 
-            self.cache[cls] = visitmethod
-        visitmethod(node) 
-
-    def object(self, obj):
-        #self.write(obj) 
-        self.write(escape(unicode(obj)))
-
-    def raw(self, obj):
-        self.write(obj.uniobj) 
-
-    def list(self, obj):  
-        assert id(obj) not in self.visited
-        self.visited[id(obj)] = 1
-        map(self.visit, obj) 
-
-    def Tag(self, tag):
-        assert id(tag) not in self.visited
-        try: 
-            tag.parent = self.parents[-1]
-        except IndexError: 
-            tag.parent = None 
-        self.visited[id(tag)] = 1
-        tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
-        if self.curindent and not self._isinline(tagname):
-            self.write("\n" + u(' ') * self.curindent) 
-        if tag:
-            self.curindent += self.indent 
-            self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
-            self.parents.append(tag) 
-            map(self.visit, tag)
-            self.parents.pop() 
-            self.write(u('</%s>') % tagname) 
-            self.curindent -= self.indent 
-        else:
-            nameattr = tagname+self.attributes(tag) 
-            if self._issingleton(tagname): 
-                self.write(u('<%s/>') % (nameattr,))
-            else: 
-                self.write(u('<%s></%s>') % (nameattr, tagname))
-
-    def attributes(self, tag):
-        # serialize attributes
-        attrlist = dir(tag.attr) 
-        attrlist.sort() 
-        l = []
-        for name in attrlist: 
-            res = self.repr_attribute(tag.attr, name)
-            if res is not None: 
-                l.append(res) 
-        l.extend(self.getstyle(tag))
-        return u("").join(l)
-
-    def repr_attribute(self, attrs, name): 
-        if name[:2] != '__': 
-            value = getattr(attrs, name) 
-            if name.endswith('_'): 
-                name = name[:-1]
-            return u(' %s="%s"') % (name, escape(unicode(value)))
-
-    def getstyle(self, tag): 
-        """ return attribute list suitable for styling. """ 
-        try: 
-            styledict = tag.style.__dict__
-        except AttributeError: 
-            return [] 
-        else: 
-            stylelist = [x+': ' + y for x,y in styledict.items()]
-            return [u(' style="%s"') % u('; ').join(stylelist)]
-
-    def _issingleton(self, tagname):
-        """can (and will) be overridden in subclasses"""
-        return self.shortempty
-
-    def _isinline(self, tagname):
-        """can (and will) be overridden in subclasses"""
-        return False
-
diff --git a/py/xmlobj/xml.py b/py/xmlobj/xml.py
deleted file mode 100644
index 1aa970538..000000000
--- a/py/xmlobj/xml.py
+++ /dev/null
@@ -1,58 +0,0 @@
-"""
-generic (and pythonic :-) xml tag and namespace objects 
-""" 
-
-class Tag(list):
-    class Attr(object): 
-        def __init__(self, **kwargs): 
-            self.__dict__.update(kwargs) 
-
-    def __init__(self, *args, **kwargs):
-        super(Tag, self).__init__(args)
-        self.attr = self.Attr(**kwargs) 
-
-    def __unicode__(self):
-        return self.unicode(indent=0) 
-
-    def unicode(self, indent=2):
-        from py.__.xmlobj.visit import SimpleUnicodeVisitor 
-        l = []
-        SimpleUnicodeVisitor(l.append, indent).visit(self) 
-        return "".join(l) 
-
-    def __repr__(self):
-        name = self.__class__.__name__ 
-        return "<%r tag object %d>" % (name, id(self))
-
-class raw(object):
-    """just a box that can contain a unicode string that will be
-    included directly in the output"""
-    def __init__(self, uniobj):
-        self.uniobj = uniobj
-
-# the generic xml namespace 
-# provides Tag classes on the fly optionally checking for
-# a tagspecification 
-
-class NamespaceMetaclass(type): 
-    def __getattr__(self, name): 
-        if name[:1] == '_': 
-            raise AttributeError(name) 
-        if self == Namespace: 
-            raise ValueError("Namespace class is abstract") 
-        tagspec = self.__tagspec__
-        if tagspec is not None and name not in tagspec: 
-            raise AttributeError(name) 
-        classattr = {}
-        if self.__stickyname__: 
-            classattr['xmlname'] = name 
-        cls = type(name, (self.__tagclass__,), classattr) 
-        setattr(self, name, cls) 
-        return cls 
-        
-class Namespace(object):
-    __tagspec__ = None 
-    __tagclass__ = Tag
-    __metaclass__ = NamespaceMetaclass
-    __stickyname__ = False 
-       
diff --git a/py/xmlobj/xmlgen.py b/py/xmlobj/xmlgen.py
new file mode 100644
index 000000000..2d1f63350
--- /dev/null
+++ b/py/xmlobj/xmlgen.py
@@ -0,0 +1,243 @@
+"""
+module for generating and serializing xml and html structures
+by using simple python objects. 
+
+(c) holger krekel, holger at merlinux eu. 2009
+""" 
+import py
+import sys, re
+
+if sys.version_info >= (3,0):
+    def u(s): 
+        return s
+    def unicode(x):
+        if hasattr(x, '__unicode__'):
+            return x.__unicode__()
+        return str(x)
+else:
+    def u(s):
+        return unicode(s)
+    unicode = unicode 
+    
+
+class NamespaceMetaclass(type): 
+    def __getattr__(self, name): 
+        if name[:1] == '_': 
+            raise AttributeError(name) 
+        if self == Namespace: 
+            raise ValueError("Namespace class is abstract") 
+        tagspec = self.__tagspec__
+        if tagspec is not None and name not in tagspec: 
+            raise AttributeError(name) 
+        classattr = {}
+        if self.__stickyname__: 
+            classattr['xmlname'] = name 
+        cls = type(name, (self.__tagclass__,), classattr) 
+        setattr(self, name, cls) 
+        return cls 
+
+class Tag(list):
+    class Attr(object): 
+        def __init__(self, **kwargs): 
+            self.__dict__.update(kwargs) 
+
+    def __init__(self, *args, **kwargs):
+        super(Tag, self).__init__(args)
+        self.attr = self.Attr(**kwargs) 
+
+    def __unicode__(self):
+        return self.unicode(indent=0) 
+    __str__ = __unicode__
+
+    def unicode(self, indent=2):
+        l = []
+        SimpleUnicodeVisitor(l.append, indent).visit(self) 
+        return "".join(l) 
+
+    def __repr__(self):
+        name = self.__class__.__name__ 
+        return "<%r tag object %d>" % (name, id(self))
+    
+Namespace = NamespaceMetaclass('Namespace', (object, ), {
+    '__tagspec__': None, 
+    '__tagclass__': Tag, 
+    '__stickyname__': False, 
+})
+
+class HtmlTag(Tag): 
+    def unicode(self, indent=2):
+        l = []
+        HtmlVisitor(l.append, indent, shortempty=False).visit(self) 
+        return u("").join(l) 
+
+# exported plain html namespace 
+class html(Namespace):
+    __tagclass__ = HtmlTag
+    __stickyname__ = True 
+    __tagspec__ = dict([(x,1) for x in ( 
+        'a,abbr,acronym,address,applet,area,b,bdo,big,blink,'
+        'blockquote,body,br,button,caption,center,cite,code,col,'
+        'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,'
+        'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,'
+        'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,'
+        'map,marquee,menu,meta,multicol,nobr,noembed,noframes,'
+        'noscript,object,ol,optgroup,option,p,pre,q,s,script,'
+        'select,small,span,strike,strong,style,sub,sup,table,'
+        'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,'
+        'base,basefont,frame,hr,isindex,param,samp,var'
+    ).split(',') if x])
+
+    class Style(object): 
+        def __init__(self, **kw): 
+            for x, y in kw.items():
+                x = x.replace('_', '-')
+                setattr(self, x, y) 
+
+
+class raw(object):
+    """just a box that can contain a unicode string that will be
+    included directly in the output"""
+    def __init__(self, uniobj):
+        self.uniobj = uniobj
+
+class SimpleUnicodeVisitor(object):
+    """ recursive visitor to write unicode. """
+    def __init__(self, write, indent=0, curindent=0, shortempty=True): 
+        self.write = write
+        self.cache = {}
+        self.visited = {} # for detection of recursion
+        self.indent = indent 
+        self.curindent = curindent
+        self.parents = []
+        self.shortempty = shortempty  # short empty tags or not 
+
+    def visit(self, node): 
+        """ dispatcher on node's class/bases name. """
+        cls = node.__class__
+        try:
+            visitmethod = self.cache[cls]   
+        except KeyError:
+            for subclass in cls.__mro__: 
+                visitmethod = getattr(self, subclass.__name__, None)
+                if visitmethod is not None:
+                    break
+            else:
+                visitmethod = self.object 
+            self.cache[cls] = visitmethod
+        visitmethod(node) 
+
+    def object(self, obj):
+        #self.write(obj) 
+        self.write(escape(unicode(obj)))
+
+    def raw(self, obj):
+        self.write(obj.uniobj) 
+
+    def list(self, obj):  
+        assert id(obj) not in self.visited
+        self.visited[id(obj)] = 1
+        map(self.visit, obj) 
+
+    def Tag(self, tag):
+        assert id(tag) not in self.visited
+        try: 
+            tag.parent = self.parents[-1]
+        except IndexError: 
+            tag.parent = None 
+        self.visited[id(tag)] = 1
+        tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
+        if self.curindent and not self._isinline(tagname):
+            self.write("\n" + u(' ') * self.curindent) 
+        if tag:
+            self.curindent += self.indent 
+            self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
+            self.parents.append(tag) 
+            for x in tag:
+                self.visit(x)
+            self.parents.pop() 
+            self.write(u('</%s>') % tagname) 
+            self.curindent -= self.indent 
+        else:
+            nameattr = tagname+self.attributes(tag) 
+            if self._issingleton(tagname): 
+                self.write(u('<%s/>') % (nameattr,))
+            else: 
+                self.write(u('<%s></%s>') % (nameattr, tagname))
+
+    def attributes(self, tag):
+        # serialize attributes
+        attrlist = dir(tag.attr) 
+        attrlist.sort() 
+        l = []
+        for name in attrlist: 
+            res = self.repr_attribute(tag.attr, name)
+            if res is not None: 
+                l.append(res) 
+        l.extend(self.getstyle(tag))
+        return u("").join(l)
+
+    def repr_attribute(self, attrs, name): 
+        if name[:2] != '__': 
+            value = getattr(attrs, name) 
+            if name.endswith('_'): 
+                name = name[:-1]
+            return ' %s="%s"' % (name, escape(unicode(value)))
+
+    def getstyle(self, tag): 
+        """ return attribute list suitable for styling. """ 
+        try: 
+            styledict = tag.style.__dict__
+        except AttributeError: 
+            return [] 
+        else: 
+            stylelist = [x+': ' + y for x,y in styledict.items()]
+            return [u(' style="%s"') % u('; ').join(stylelist)]
+
+    def _issingleton(self, tagname):
+        """can (and will) be overridden in subclasses"""
+        return self.shortempty
+
+    def _isinline(self, tagname):
+        """can (and will) be overridden in subclasses"""
+        return False
+
+class HtmlVisitor(SimpleUnicodeVisitor): 
+    
+    single = dict([(x, 1) for x in 
+                ('br,img,area,param,col,hr,meta,link,base,'
+                    'input,frame').split(',')])
+    inline = dict([(x, 1) for x in
+                ('a abbr acronym b basefont bdo big br cite code dfn em font '
+                 'i img input kbd label q s samp select small span strike '
+                 'strong sub sup textarea tt u var'.split(' '))])
+
+    def repr_attribute(self, attrs, name): 
+        if name == 'class_':
+            value = getattr(attrs, name) 
+            if value is None: 
+                return
+        return super(HtmlVisitor, self).repr_attribute(attrs, name) 
+
+    def _issingleton(self, tagname):
+        return tagname in self.single
+
+    def _isinline(self, tagname):
+        return tagname in self.inline
+
+       
+class _escape:
+    def __init__(self):
+        self.escape = {
+            u('"') : u('&quot;'), u('<') : u('&lt;'), u('>') : u('&gt;'), 
+            u('&') : u('&amp;'), u("'") : u('&apos;'),
+            }
+        self.charef_rex = re.compile(u("|").join(self.escape.keys()))
+
+    def _replacer(self, match):
+        return self.escape[match.group(0)]
+
+    def __call__(self, ustring):
+        """ xml-escape the given unicode string. """
+        return self.charef_rex.sub(self._replacer, ustring)
+
+escape = _escape()