From a0d7ab2244bc5503cc3972993bf931dcf6f05953 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Fri, 4 Sep 2009 18:16:10 +0200 Subject: [PATCH] reviewing, refactoring, porting xml/html object/tree generation to work with 3k --HG-- branch : trunk --- py/__init__.py | 10 +- py/xmlobj/html.py | 65 --------- py/xmlobj/misc.py | 26 ---- py/xmlobj/testing/test_html.py | 54 -------- py/xmlobj/testing/test_xml.py | 54 ++++++++ py/xmlobj/visit.py | 114 ---------------- py/xmlobj/xml.py | 58 -------- py/xmlobj/xmlgen.py | 243 +++++++++++++++++++++++++++++++++ 8 files changed, 302 insertions(+), 322 deletions(-) delete mode 100644 py/xmlobj/html.py delete mode 100644 py/xmlobj/misc.py delete mode 100644 py/xmlobj/testing/test_html.py delete mode 100644 py/xmlobj/visit.py delete mode 100644 py/xmlobj/xml.py create mode 100644 py/xmlobj/xmlgen.py diff --git a/py/__init__.py b/py/__init__.py index afd45843f..4a1cd9e2f 100644 --- a/py/__init__.py +++ b/py/__init__.py @@ -186,11 +186,11 @@ initpkg(__name__, # small and mean xml/html generation 'xml.__doc__' : ('./xmlobj/__init__.py', '__doc__'), - 'xml.html' : ('./xmlobj/html.py', 'html'), - 'xml.Tag' : ('./xmlobj/xml.py', 'Tag'), - 'xml.raw' : ('./xmlobj/xml.py', 'raw'), - 'xml.Namespace' : ('./xmlobj/xml.py', 'Namespace'), - 'xml.escape' : ('./xmlobj/misc.py', 'escape'), + 'xml.html' : ('./xmlobj/xmlgen.py', 'html'), + 'xml.Tag' : ('./xmlobj/xmlgen.py', 'Tag'), + 'xml.raw' : ('./xmlobj/xmlgen.py', 'raw'), + 'xml.Namespace' : ('./xmlobj/xmlgen.py', 'Namespace'), + 'xml.escape' : ('./xmlobj/xmlgen.py', 'escape'), # logging API ('producers' and 'consumers' connected via keywords) 'log.__doc__' : ('./log/__init__.py', '__doc__'), diff --git a/py/xmlobj/html.py b/py/xmlobj/html.py deleted file mode 100644 index 32a26d470..000000000 --- a/py/xmlobj/html.py +++ /dev/null @@ -1,65 +0,0 @@ -""" - - -""" -import sys -from py.xml import Namespace, Tag -from py.__.xmlobj.visit import SimpleUnicodeVisitor - -class HtmlVisitor(SimpleUnicodeVisitor): - - single = dict([(x, 1) for x in - ('br,img,area,param,col,hr,meta,link,base,' - 'input,frame').split(',')]) - inline = dict([(x, 1) for x in - ('a abbr acronym b basefont bdo big br cite code dfn em font ' - 'i img input kbd label q s samp select small span strike ' - 'strong sub sup textarea tt u var'.split(' '))]) - - def repr_attribute(self, attrs, name): - if name == 'class_': - value = getattr(attrs, name) - if value is None: - return - return super(HtmlVisitor, self).repr_attribute(attrs, name) - - def _issingleton(self, tagname): - return tagname in self.single - - def _isinline(self, tagname): - return tagname in self.inline - -if sys.version_info > (3, 0): - def u(s): return s -else: - def u(s): return unicode(s) - -class HtmlTag(Tag): - def unicode(self, indent=2): - l = [] - HtmlVisitor(l.append, indent, shortempty=False).visit(self) - return u("").join(l) - -# exported plain html namespace -class html(Namespace): - __tagclass__ = HtmlTag - __stickyname__ = True - __tagspec__ = dict([(x,1) for x in ( - 'a,abbr,acronym,address,applet,area,b,bdo,big,blink,' - 'blockquote,body,br,button,caption,center,cite,code,col,' - 'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,' - 'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,' - 'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,' - 'map,marquee,menu,meta,multicol,nobr,noembed,noframes,' - 'noscript,object,ol,optgroup,option,p,pre,q,s,script,' - 'select,small,span,strike,strong,style,sub,sup,table,' - 'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,' - 'base,basefont,frame,hr,isindex,param,samp,var' - ).split(',') if x]) - - class Style(object): - def __init__(self, **kw): - for x, y in kw.items(): - x = x.replace('_', '-') - setattr(self, x, y) - diff --git a/py/xmlobj/misc.py b/py/xmlobj/misc.py deleted file mode 100644 index 358910a0f..000000000 --- a/py/xmlobj/misc.py +++ /dev/null @@ -1,26 +0,0 @@ -import re -import sys - -if sys.version_info > (3, 0): - def u(s): - return s -else: - def u(s): - return unicode(s) - -class _escape: - def __init__(self): - self.escape = { - u('"') : u('"'), u('<') : u('<'), u('>') : u('>'), - u('&') : u('&'), u("'") : u('''), - } - self.charef_rex = re.compile(u("|").join(self.escape.keys())) - - def _replacer(self, match): - return self.escape[match.group(0)] - - def __call__(self, ustring): - """ xml-escape the given unicode string. """ - return self.charef_rex.sub(self._replacer, ustring) - -escape = _escape() diff --git a/py/xmlobj/testing/test_html.py b/py/xmlobj/testing/test_html.py deleted file mode 100644 index 2bf59c0f9..000000000 --- a/py/xmlobj/testing/test_html.py +++ /dev/null @@ -1,54 +0,0 @@ -from py.xml import html - -def test_html_name_stickyness(): - class my(html.p): - pass - x = my("hello") - assert unicode(x) == '

hello

' - -def test_stylenames(): - class my: - class body(html.body): - style = html.Style(font_size = "12pt") - u = unicode(my.body()) - assert u == '' - -def test_class_None(): - t = html.body(class_=None) - u = unicode(t) - assert u == '' - -def test_alternating_style(): - alternating = ( - html.Style(background="white"), - html.Style(background="grey"), - ) - class my(html): - class li(html.li): - def style(self): - i = self.parent.index(self) - return alternating[i%2] - style = property(style) - - x = my.ul( - my.li("hello"), - my.li("world"), - my.li("42")) - u = unicode(x) - assert u == ('') - -def test_singleton(): - h = html.head(html.link(href="foo")) - assert unicode(h) == '' - - h = html.head(html.script(src="foo")) - assert unicode(h) == '' - -def test_inline(): - h = html.div(html.span('foo'), html.span('bar')) - assert (h.unicode(indent=2) == - '
foobar
') - diff --git a/py/xmlobj/testing/test_xml.py b/py/xmlobj/testing/test_xml.py index 031aa649a..b4acaf7b3 100644 --- a/py/xmlobj/testing/test_xml.py +++ b/py/xmlobj/testing/test_xml.py @@ -1,5 +1,6 @@ import py +from py.__.xmlobj.xmlgen import unicode, html class ns(py.xml.Namespace): pass @@ -56,3 +57,56 @@ def test_raw(): u = unicode(x) assert u == "

literal

" + +def test_html_name_stickyness(): + class my(html.p): + pass + x = my("hello") + assert unicode(x) == '

hello

' + +def test_stylenames(): + class my: + class body(html.body): + style = html.Style(font_size = "12pt") + u = unicode(my.body()) + assert u == '' + +def test_class_None(): + t = html.body(class_=None) + u = unicode(t) + assert u == '' + +def test_alternating_style(): + alternating = ( + html.Style(background="white"), + html.Style(background="grey"), + ) + class my(html): + class li(html.li): + def style(self): + i = self.parent.index(self) + return alternating[i%2] + style = property(style) + + x = my.ul( + my.li("hello"), + my.li("world"), + my.li("42")) + u = unicode(x) + assert u == ('') + +def test_singleton(): + h = html.head(html.link(href="foo")) + assert unicode(h) == '' + + h = html.head(html.script(src="foo")) + assert unicode(h) == '' + +def test_inline(): + h = html.div(html.span('foo'), html.span('bar')) + assert (h.unicode(indent=2) == + '
foobar
') + diff --git a/py/xmlobj/visit.py b/py/xmlobj/visit.py deleted file mode 100644 index bff64bdac..000000000 --- a/py/xmlobj/visit.py +++ /dev/null @@ -1,114 +0,0 @@ -# -# a generic conversion serializer -# - -import sys -from py.xml import escape - -if sys.version_info > (3, 0): - def u(s): - return s -else: - def u(s): - return unicode(s) - -class SimpleUnicodeVisitor(object): - """ recursive visitor to write unicode. """ - def __init__(self, write, indent=0, curindent=0, shortempty=True): - self.write = write - self.cache = {} - self.visited = {} # for detection of recursion - self.indent = indent - self.curindent = curindent - self.parents = [] - self.shortempty = shortempty # short empty tags or not - - def visit(self, node): - """ dispatcher on node's class/bases name. """ - cls = node.__class__ - try: - visitmethod = self.cache[cls] - except KeyError: - for subclass in cls.__mro__: - visitmethod = getattr(self, subclass.__name__, None) - if visitmethod is not None: - break - else: - visitmethod = self.object - self.cache[cls] = visitmethod - visitmethod(node) - - def object(self, obj): - #self.write(obj) - self.write(escape(unicode(obj))) - - def raw(self, obj): - self.write(obj.uniobj) - - def list(self, obj): - assert id(obj) not in self.visited - self.visited[id(obj)] = 1 - map(self.visit, obj) - - def Tag(self, tag): - assert id(tag) not in self.visited - try: - tag.parent = self.parents[-1] - except IndexError: - tag.parent = None - self.visited[id(tag)] = 1 - tagname = getattr(tag, 'xmlname', tag.__class__.__name__) - if self.curindent and not self._isinline(tagname): - self.write("\n" + u(' ') * self.curindent) - if tag: - self.curindent += self.indent - self.write(u('<%s%s>') % (tagname, self.attributes(tag))) - self.parents.append(tag) - map(self.visit, tag) - self.parents.pop() - self.write(u('') % tagname) - self.curindent -= self.indent - else: - nameattr = tagname+self.attributes(tag) - if self._issingleton(tagname): - self.write(u('<%s/>') % (nameattr,)) - else: - self.write(u('<%s>') % (nameattr, tagname)) - - def attributes(self, tag): - # serialize attributes - attrlist = dir(tag.attr) - attrlist.sort() - l = [] - for name in attrlist: - res = self.repr_attribute(tag.attr, name) - if res is not None: - l.append(res) - l.extend(self.getstyle(tag)) - return u("").join(l) - - def repr_attribute(self, attrs, name): - if name[:2] != '__': - value = getattr(attrs, name) - if name.endswith('_'): - name = name[:-1] - return u(' %s="%s"') % (name, escape(unicode(value))) - - def getstyle(self, tag): - """ return attribute list suitable for styling. """ - try: - styledict = tag.style.__dict__ - except AttributeError: - return [] - else: - stylelist = [x+': ' + y for x,y in styledict.items()] - return [u(' style="%s"') % u('; ').join(stylelist)] - - def _issingleton(self, tagname): - """can (and will) be overridden in subclasses""" - return self.shortempty - - def _isinline(self, tagname): - """can (and will) be overridden in subclasses""" - return False - diff --git a/py/xmlobj/xml.py b/py/xmlobj/xml.py deleted file mode 100644 index 1aa970538..000000000 --- a/py/xmlobj/xml.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -generic (and pythonic :-) xml tag and namespace objects -""" - -class Tag(list): - class Attr(object): - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - - def __init__(self, *args, **kwargs): - super(Tag, self).__init__(args) - self.attr = self.Attr(**kwargs) - - def __unicode__(self): - return self.unicode(indent=0) - - def unicode(self, indent=2): - from py.__.xmlobj.visit import SimpleUnicodeVisitor - l = [] - SimpleUnicodeVisitor(l.append, indent).visit(self) - return "".join(l) - - def __repr__(self): - name = self.__class__.__name__ - return "<%r tag object %d>" % (name, id(self)) - -class raw(object): - """just a box that can contain a unicode string that will be - included directly in the output""" - def __init__(self, uniobj): - self.uniobj = uniobj - -# the generic xml namespace -# provides Tag classes on the fly optionally checking for -# a tagspecification - -class NamespaceMetaclass(type): - def __getattr__(self, name): - if name[:1] == '_': - raise AttributeError(name) - if self == Namespace: - raise ValueError("Namespace class is abstract") - tagspec = self.__tagspec__ - if tagspec is not None and name not in tagspec: - raise AttributeError(name) - classattr = {} - if self.__stickyname__: - classattr['xmlname'] = name - cls = type(name, (self.__tagclass__,), classattr) - setattr(self, name, cls) - return cls - -class Namespace(object): - __tagspec__ = None - __tagclass__ = Tag - __metaclass__ = NamespaceMetaclass - __stickyname__ = False - diff --git a/py/xmlobj/xmlgen.py b/py/xmlobj/xmlgen.py new file mode 100644 index 000000000..2d1f63350 --- /dev/null +++ b/py/xmlobj/xmlgen.py @@ -0,0 +1,243 @@ +""" +module for generating and serializing xml and html structures +by using simple python objects. + +(c) holger krekel, holger at merlinux eu. 2009 +""" +import py +import sys, re + +if sys.version_info >= (3,0): + def u(s): + return s + def unicode(x): + if hasattr(x, '__unicode__'): + return x.__unicode__() + return str(x) +else: + def u(s): + return unicode(s) + unicode = unicode + + +class NamespaceMetaclass(type): + def __getattr__(self, name): + if name[:1] == '_': + raise AttributeError(name) + if self == Namespace: + raise ValueError("Namespace class is abstract") + tagspec = self.__tagspec__ + if tagspec is not None and name not in tagspec: + raise AttributeError(name) + classattr = {} + if self.__stickyname__: + classattr['xmlname'] = name + cls = type(name, (self.__tagclass__,), classattr) + setattr(self, name, cls) + return cls + +class Tag(list): + class Attr(object): + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def __init__(self, *args, **kwargs): + super(Tag, self).__init__(args) + self.attr = self.Attr(**kwargs) + + def __unicode__(self): + return self.unicode(indent=0) + __str__ = __unicode__ + + def unicode(self, indent=2): + l = [] + SimpleUnicodeVisitor(l.append, indent).visit(self) + return "".join(l) + + def __repr__(self): + name = self.__class__.__name__ + return "<%r tag object %d>" % (name, id(self)) + +Namespace = NamespaceMetaclass('Namespace', (object, ), { + '__tagspec__': None, + '__tagclass__': Tag, + '__stickyname__': False, +}) + +class HtmlTag(Tag): + def unicode(self, indent=2): + l = [] + HtmlVisitor(l.append, indent, shortempty=False).visit(self) + return u("").join(l) + +# exported plain html namespace +class html(Namespace): + __tagclass__ = HtmlTag + __stickyname__ = True + __tagspec__ = dict([(x,1) for x in ( + 'a,abbr,acronym,address,applet,area,b,bdo,big,blink,' + 'blockquote,body,br,button,caption,center,cite,code,col,' + 'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,' + 'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,' + 'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,' + 'map,marquee,menu,meta,multicol,nobr,noembed,noframes,' + 'noscript,object,ol,optgroup,option,p,pre,q,s,script,' + 'select,small,span,strike,strong,style,sub,sup,table,' + 'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,' + 'base,basefont,frame,hr,isindex,param,samp,var' + ).split(',') if x]) + + class Style(object): + def __init__(self, **kw): + for x, y in kw.items(): + x = x.replace('_', '-') + setattr(self, x, y) + + +class raw(object): + """just a box that can contain a unicode string that will be + included directly in the output""" + def __init__(self, uniobj): + self.uniobj = uniobj + +class SimpleUnicodeVisitor(object): + """ recursive visitor to write unicode. """ + def __init__(self, write, indent=0, curindent=0, shortempty=True): + self.write = write + self.cache = {} + self.visited = {} # for detection of recursion + self.indent = indent + self.curindent = curindent + self.parents = [] + self.shortempty = shortempty # short empty tags or not + + def visit(self, node): + """ dispatcher on node's class/bases name. """ + cls = node.__class__ + try: + visitmethod = self.cache[cls] + except KeyError: + for subclass in cls.__mro__: + visitmethod = getattr(self, subclass.__name__, None) + if visitmethod is not None: + break + else: + visitmethod = self.object + self.cache[cls] = visitmethod + visitmethod(node) + + def object(self, obj): + #self.write(obj) + self.write(escape(unicode(obj))) + + def raw(self, obj): + self.write(obj.uniobj) + + def list(self, obj): + assert id(obj) not in self.visited + self.visited[id(obj)] = 1 + map(self.visit, obj) + + def Tag(self, tag): + assert id(tag) not in self.visited + try: + tag.parent = self.parents[-1] + except IndexError: + tag.parent = None + self.visited[id(tag)] = 1 + tagname = getattr(tag, 'xmlname', tag.__class__.__name__) + if self.curindent and not self._isinline(tagname): + self.write("\n" + u(' ') * self.curindent) + if tag: + self.curindent += self.indent + self.write(u('<%s%s>') % (tagname, self.attributes(tag))) + self.parents.append(tag) + for x in tag: + self.visit(x) + self.parents.pop() + self.write(u('') % tagname) + self.curindent -= self.indent + else: + nameattr = tagname+self.attributes(tag) + if self._issingleton(tagname): + self.write(u('<%s/>') % (nameattr,)) + else: + self.write(u('<%s>') % (nameattr, tagname)) + + def attributes(self, tag): + # serialize attributes + attrlist = dir(tag.attr) + attrlist.sort() + l = [] + for name in attrlist: + res = self.repr_attribute(tag.attr, name) + if res is not None: + l.append(res) + l.extend(self.getstyle(tag)) + return u("").join(l) + + def repr_attribute(self, attrs, name): + if name[:2] != '__': + value = getattr(attrs, name) + if name.endswith('_'): + name = name[:-1] + return ' %s="%s"' % (name, escape(unicode(value))) + + def getstyle(self, tag): + """ return attribute list suitable for styling. """ + try: + styledict = tag.style.__dict__ + except AttributeError: + return [] + else: + stylelist = [x+': ' + y for x,y in styledict.items()] + return [u(' style="%s"') % u('; ').join(stylelist)] + + def _issingleton(self, tagname): + """can (and will) be overridden in subclasses""" + return self.shortempty + + def _isinline(self, tagname): + """can (and will) be overridden in subclasses""" + return False + +class HtmlVisitor(SimpleUnicodeVisitor): + + single = dict([(x, 1) for x in + ('br,img,area,param,col,hr,meta,link,base,' + 'input,frame').split(',')]) + inline = dict([(x, 1) for x in + ('a abbr acronym b basefont bdo big br cite code dfn em font ' + 'i img input kbd label q s samp select small span strike ' + 'strong sub sup textarea tt u var'.split(' '))]) + + def repr_attribute(self, attrs, name): + if name == 'class_': + value = getattr(attrs, name) + if value is None: + return + return super(HtmlVisitor, self).repr_attribute(attrs, name) + + def _issingleton(self, tagname): + return tagname in self.single + + def _isinline(self, tagname): + return tagname in self.inline + + +class _escape: + def __init__(self): + self.escape = { + u('"') : u('"'), u('<') : u('<'), u('>') : u('>'), + u('&') : u('&'), u("'") : u('''), + } + self.charef_rex = re.compile(u("|").join(self.escape.keys())) + + def _replacer(self, match): + return self.escape[match.group(0)] + + def __call__(self, ustring): + """ xml-escape the given unicode string. """ + return self.charef_rex.sub(self._replacer, ustring) + +escape = _escape()