django1/django/utils/text.py

import re

from django.conf import settings

# Capitalizes the first letter of a string.
capfirst = lambda x: x and x[0].upper() + x[1:]

def wrap(text, width):
    """
    A word-wrap function that preserves existing line breaks and most spaces in
    the text. Expects that existing line breaks are posix newlines (\n).
    See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
    """
    return reduce(lambda line, word, width=width: '%s%s%s' %
                  (line,
                   ' \n'[(len(line[line.rfind('\n')+1:])
                         + len(word.split('\n',1)[0]
                              ) >= width)],
                   word),
                  text.split(' ')
                 )

def truncate_words(s, num):
    "Truncates a string after a certain number of words."
    length = int(num)
    words = s.split()
    if len(words) > length:
        words = words[:length]
        if not words[-1].endswith('...'):
            words.append('...')
    return ' '.join(words)

def get_valid_filename(s):
    """
    Returns the given string converted to a string that can be used for a clean
    filename. Specifically, leading and trailing spaces are removed; other
    spaces are converted to underscores; and all non-filename-safe characters
    are removed.
    >>> get_valid_filename("john's portrait in 2004.jpg")
    'johns_portrait_in_2004.jpg'
    """
    s = s.strip().replace(' ', '_')
    return re.sub(r'[^-A-Za-z0-9_.]', '', s)

def get_text_list(list_, last_word='or'):
    """
    >>> get_text_list(['a', 'b', 'c', 'd'])
    'a, b, c or d'
    >>> get_text_list(['a', 'b', 'c'], 'and')
    'a, b and c'
    >>> get_text_list(['a', 'b'], 'and')
    'a and b'
    >>> get_text_list(['a'])
    'a'
    >>> get_text_list([])
    ''
    """
    if len(list_) == 0: return ''
    if len(list_) == 1: return list_[0]
    return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])

def normalize_newlines(text):
    return re.sub(r'\r\n|\r|\n', '\n', text)

def recapitalize(text):
    "Recapitalizes text, placing caps after end-of-sentence punctuation."
#     capwords = ()
    text = text.lower()
    capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
    text = capsRE.sub(lambda x: x.group(1).upper(), text)
#     for capword in capwords:
#         capwordRE = re.compile(r'\b%s\b' % capword, re.I)
#         text = capwordRE.sub(capword, text)
    return text

def phone2numeric(phone):
    "Converts a phone number with letters into its numeric equivalent."
    letters = re.compile(r'[A-PR-Y]', re.I)
    char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
         'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
         'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
         's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
         'y': '9', 'x': '9'}.get(m.group(0).lower())
    return letters.sub(char2number, phone)

# From http://www.xhaus.com/alan/python/httpcomp.html#gzip
# Used with permission.
def compress_string(s):
    import cStringIO, gzip
    zbuf = cStringIO.StringIO()
    zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
    zfile.write(s)
    zfile.close()
    return zbuf.getvalue()

ustring_re = re.compile(u"([\u0080-\uffff])")
def javascript_quote(s):

    def fix(match):
        return r"\u%04x" % ord(match.group(1))

    if type(s) == str:
        s = s.decode(settings.DEFAULT_CHARSET)
    elif type(s) != unicode:
        raise TypeError, s
    s = s.replace('\\', '\\\\')
    s = s.replace('\n', '\\n')
    s = s.replace('\t', '\\t')
    s = s.replace("'", "\\'")
    return str(ustring_re.sub(fix, s))

smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)')
def smart_split(text):
    """
    Generator that splits a string by spaces, leaving quoted phrases together.
    Supports both single and double quotes, and supports escaping quotes with
    backslashes. In the output, strings will keep their initial and trailing
    quote marks.
    >>> list(smart_split('This is "a person\'s" test.'))
    ['This', 'is', '"a person\'s"', 'test.']
    """
    for bit in smart_split_re.finditer(text):
        bit = bit.group(0)
        if bit[0] == '"':
            yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'
        elif bit[0] == "'":
            yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"
        else:
            yield bit
Imported Django from private SVN repository (created from r. 8825) git-svn-id: http://code.djangoproject.com/svn/django/trunk@3 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-07-13 09:25:57 +08:00			`import re`

MERGED MAGIC-REMOVAL BRANCH TO TRUNK. This change is highly backwards-incompatible. Please read http://code.djangoproject.com/wiki/RemovingTheMagic for upgrade instructions. git-svn-id: http://code.djangoproject.com/svn/django/trunk@2809 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-05-02 09:31:56 +08:00			`from django.conf import settings`
added infrastructure code for later javascript translating (currently not active) git-svn-id: http://code.djangoproject.com/svn/django/trunk@1529 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-12-04 20:06:16 +08:00
Refactored meta.py -- created a django.core.meta package, with init.py and fields.py git-svn-id: http://code.djangoproject.com/svn/django/trunk@378 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-08-02 05:29:52 +08:00			`# Capitalizes the first letter of a string.`
			`capfirst = lambda x: x and x[0].upper() + x[1:]`

Imported Django from private SVN repository (created from r. 8825) git-svn-id: http://code.djangoproject.com/svn/django/trunk@3 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-07-13 09:25:57 +08:00			`def wrap(text, width):`
			`"""`
			`A word-wrap function that preserves existing line breaks and most spaces in`
			`the text. Expects that existing line breaks are posix newlines (\n).`
			`See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061`
			`"""`
			`return reduce(lambda line, word, width=width: '%s%s%s' %`
			`(line,`
			`' \n'[(len(line[line.rfind('\n')+1:])`
			`+ len(word.split('\n',1)[0]`
			`) >= width)],`
			`word),`
			`text.split(' ')`
			`)`

			`def truncate_words(s, num):`
			`"Truncates a string after a certain number of words."`
			`length = int(num)`
			`words = s.split()`
			`if len(words) > length:`
			`words = words[:length]`
			`if not words[-1].endswith('...'):`
			`words.append('...')`
			`return ' '.join(words)`

			`def get_valid_filename(s):`
			`"""`
			`Returns the given string converted to a string that can be used for a clean`
			`filename. Specifically, leading and trailing spaces are removed; other`
			`spaces are converted to underscores; and all non-filename-safe characters`
			`are removed.`
			`>>> get_valid_filename("john's portrait in 2004.jpg")`
			`'johns_portrait_in_2004.jpg'`
			`"""`
			`s = s.strip().replace(' ', '_')`
			`return re.sub(r'[^-A-Za-z0-9_.]', '', s)`

			`def get_text_list(list_, last_word='or'):`
			`"""`
			`>>> get_text_list(['a', 'b', 'c', 'd'])`
			`'a, b, c or d'`
			`>>> get_text_list(['a', 'b', 'c'], 'and')`
			`'a, b and c'`
			`>>> get_text_list(['a', 'b'], 'and')`
			`'a and b'`
			`>>> get_text_list(['a'])`
			`'a'`
			`>>> get_text_list([])`
			`''`
			`"""`
			`if len(list_) == 0: return ''`
			`if len(list_) == 1: return list_[0]`
Merged i18n branch into the trunk! Fixes #65, and perhaps some others. NB: this means that the i18n branch is now obsolete and will be made read-only. git-svn-id: http://code.djangoproject.com/svn/django/trunk@1068 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-11-04 12:59:46 +08:00			`return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1])`
Imported Django from private SVN repository (created from r. 8825) git-svn-id: http://code.djangoproject.com/svn/django/trunk@3 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-07-13 09:25:57 +08:00
			`def normalize_newlines(text):`
			`return re.sub(r'\r\n\|\r\|\n', '\n', text)`

			`def recapitalize(text):`
			`"Recapitalizes text, placing caps after end-of-sentence punctuation."`
Removed Lawrence-specific stuff in utils.text.recapitalize. Refs #467 git-svn-id: http://code.djangoproject.com/svn/django/trunk@638 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-09-13 11:31:34 +08:00			`# capwords = ()`
Imported Django from private SVN repository (created from r. 8825) git-svn-id: http://code.djangoproject.com/svn/django/trunk@3 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-07-13 09:25:57 +08:00			`text = text.lower()`
			`capsRE = re.compile(r'(?:^\|(?<=[\.\?\!] ))([a-z])')`
			`text = capsRE.sub(lambda x: x.group(1).upper(), text)`
Removed Lawrence-specific stuff in utils.text.recapitalize. Refs #467 git-svn-id: http://code.djangoproject.com/svn/django/trunk@638 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-09-13 11:31:34 +08:00			`# for capword in capwords:`
			`# capwordRE = re.compile(r'\b%s\b' % capword, re.I)`
			`# text = capwordRE.sub(capword, text)`
Imported Django from private SVN repository (created from r. 8825) git-svn-id: http://code.djangoproject.com/svn/django/trunk@3 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-07-13 09:25:57 +08:00			`return text`

			`def phone2numeric(phone):`
			`"Converts a phone number with letters into its numeric equivalent."`
			`letters = re.compile(r'[A-PR-Y]', re.I)`
			`char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',`
			`'d': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',`
			`'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',`
			`'s': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',`
			`'y': '9', 'x': '9'}.get(m.group(0).lower())`
			`return letters.sub(char2number, phone)`
Moved django.views.decorators.cache.compress_string into django.utils.text git-svn-id: http://code.djangoproject.com/svn/django/trunk@175 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-07-19 01:23:04 +08:00
			`# From http://www.xhaus.com/alan/python/httpcomp.html#gzip`
			`# Used with permission.`
			`def compress_string(s):`
			`import cStringIO, gzip`
			`zbuf = cStringIO.StringIO()`
			`zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)`
			`zfile.write(s)`
			`zfile.close()`
			`return zbuf.getvalue()`
added infrastructure code for later javascript translating (currently not active) git-svn-id: http://code.djangoproject.com/svn/django/trunk@1529 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-12-04 20:06:16 +08:00
			`ustring_re = re.compile(u"([\u0080-\uffff])")`
			`def javascript_quote(s):`

			`def fix(match):`
			`return r"\u%04x" % ord(match.group(1))`

			`if type(s) == str:`
MERGED MAGIC-REMOVAL BRANCH TO TRUNK. This change is highly backwards-incompatible. Please read http://code.djangoproject.com/wiki/RemovingTheMagic for upgrade instructions. git-svn-id: http://code.djangoproject.com/svn/django/trunk@2809 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-05-02 09:31:56 +08:00			`s = s.decode(settings.DEFAULT_CHARSET)`
added infrastructure code for later javascript translating (currently not active) git-svn-id: http://code.djangoproject.com/svn/django/trunk@1529 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2005-12-04 20:06:16 +08:00			`elif type(s) != unicode:`
			`raise TypeError, s`
			`s = s.replace('\\', '\\\\')`
			`s = s.replace('\n', '\\n')`
			`s = s.replace('\t', '\\t')`
			`s = s.replace("'", "\\'")`
			`return str(ustring_re.sub(fix, s))`

Added django.utils.text.smart_split. Thanks, ckknight git-svn-id: http://code.djangoproject.com/svn/django/trunk@3101 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-06-07 14:08:23 +08:00			`smart_split_re = re.compile('("(?:[^"\\\\](?:\\\\.[^"\\\\]))"\|\'(?:[^\'\\\\](?:\\\\.[^\'\\\\]))\'\|[^\\s]+)')`
			`def smart_split(text):`
Changed django.utils.text.smart_split to return strings, not tuples git-svn-id: http://code.djangoproject.com/svn/django/trunk@3111 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-06-08 12:26:23 +08:00			`"""`
			`Generator that splits a string by spaces, leaving quoted phrases together.`
			`Supports both single and double quotes, and supports escaping quotes with`
			`backslashes. In the output, strings will keep their initial and trailing`
			`quote marks.`
			`>>> list(smart_split('This is "a person\'s" test.'))`
			`['This', 'is', '"a person\'s"', 'test.']`
			`"""`
Added django.utils.text.smart_split. Thanks, ckknight git-svn-id: http://code.djangoproject.com/svn/django/trunk@3101 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-06-07 14:08:23 +08:00			`for bit in smart_split_re.finditer(text):`
			`bit = bit.group(0)`
			`if bit[0] == '"':`
Changed django.utils.text.smart_split to return strings, not tuples git-svn-id: http://code.djangoproject.com/svn/django/trunk@3111 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-06-08 12:26:23 +08:00			`yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"'`
Added django.utils.text.smart_split. Thanks, ckknight git-svn-id: http://code.djangoproject.com/svn/django/trunk@3101 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-06-07 14:08:23 +08:00			`elif bit[0] == "'":`
Changed django.utils.text.smart_split to return strings, not tuples git-svn-id: http://code.djangoproject.com/svn/django/trunk@3111 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-06-08 12:26:23 +08:00			`yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'"`
Added django.utils.text.smart_split. Thanks, ckknight git-svn-id: http://code.djangoproject.com/svn/django/trunk@3101 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-06-07 14:08:23 +08:00			`else:`
Changed django.utils.text.smart_split to return strings, not tuples git-svn-id: http://code.djangoproject.com/svn/django/trunk@3111 bcc190cf-cafb-0310-a4f2-bffc1f526a37 2006-06-08 12:26:23 +08:00			`yield bit`