test_ok2/py/misc/rest.py

74 lines
2.3 KiB
Python
Raw Normal View History

import py
import sys, os, traceback
import re
if hasattr(sys.stdout, 'fileno') and os.isatty(sys.stdout.fileno()):
def log(msg):
print msg
else:
def log(msg):
pass
def convert_rest_html(source, source_path, stylesheet=None, encoding='latin1'):
from py.__.rest import directive
""" return html latin1-encoded document for the given input.
source a ReST-string
sourcepath where to look for includes (basically)
stylesheet path (to be used if any)
"""
from docutils.core import publish_string
directive.set_backend_and_register_directives("html")
kwargs = {
'stylesheet' : stylesheet,
'stylesheet_path': None,
'traceback' : 1,
'embed_stylesheet': 0,
'output_encoding' : encoding,
#'halt' : 0, # 'info',
'halt_level' : 2,
}
# docutils uses os.getcwd() :-(
source_path = os.path.abspath(str(source_path))
prevdir = os.getcwd()
try:
os.chdir(os.path.dirname(source_path))
return publish_string(source, source_path, writer_name='html',
settings_overrides=kwargs)
finally:
os.chdir(prevdir)
def process(txtpath, encoding='latin1'):
""" process a textfile """
log("processing %s" % txtpath)
assert txtpath.check(ext='.txt')
if isinstance(txtpath, py.path.svnwc):
txtpath = txtpath.localpath
htmlpath = txtpath.new(ext='.html')
#svninfopath = txtpath.localpath.new(ext='.svninfo')
style = txtpath.dirpath('style.css')
if style.check():
stylesheet = style.basename
else:
stylesheet = None
content = unicode(txtpath.read(), encoding)
doc = convert_rest_html(content, txtpath, stylesheet=stylesheet, encoding=encoding)
htmlpath.write(doc)
#log("wrote %r" % htmlpath)
#if txtpath.check(svnwc=1, versioned=1):
# info = txtpath.info()
# svninfopath.dump(info)
rex1 = re.compile(ur'.*<body>(.*)</body>.*', re.MULTILINE | re.DOTALL)
rex2 = re.compile(ur'.*<div class="document">(.*)</div>.*', re.MULTILINE | re.DOTALL)
def strip_html_header(string, encoding='utf8'):
""" return the content of the body-tag """
uni = unicode(string, encoding)
for rex in rex1,rex2:
match = rex.search(uni)
if not match:
break
uni = match.group(1)
return uni