2016-08-08 21:46:52 +08:00
|
|
|
import warnings
|
2017-01-07 19:11:46 +08:00
|
|
|
from io import StringIO
|
2016-08-08 21:46:52 +08:00
|
|
|
|
2021-08-05 11:20:04 +08:00
|
|
|
from django.template.base import Lexer, TokenType
|
2019-10-26 22:42:32 +08:00
|
|
|
from django.utils.regex_helper import _lazy_re_compile
|
2016-08-08 21:46:52 +08:00
|
|
|
|
|
|
|
from . import TranslatorCommentWarning, trim_whitespace
|
|
|
|
|
2021-08-05 11:20:04 +08:00
|
|
|
TRANSLATOR_COMMENT_MARK = "Translators"
|
|
|
|
|
2019-10-26 22:42:32 +08:00
|
|
|
dot_re = _lazy_re_compile(r"\S")
|
2016-08-08 21:46:52 +08:00
|
|
|
|
|
|
|
|
|
|
|
def blankout(src, char):
|
|
|
|
"""
|
|
|
|
Change every non-whitespace character to the given char.
|
|
|
|
Used in the templatize function.
|
|
|
|
"""
|
|
|
|
return dot_re.sub(char, src)
|
|
|
|
|
|
|
|
|
2019-10-26 22:42:32 +08:00
|
|
|
context_re = _lazy_re_compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""")
|
|
|
|
inline_re = _lazy_re_compile(
|
2019-06-22 00:41:01 +08:00
|
|
|
# Match the trans/translate 'some text' part.
|
|
|
|
r"""^\s*trans(?:late)?\s+((?:"[^"]*?")|(?:'[^']*?'))"""
|
2016-08-08 21:46:52 +08:00
|
|
|
# Match and ignore optional filters
|
|
|
|
r"""(?:\s*\|\s*[^\s:]+(?::(?:[^\s'":]+|(?:"[^"]*?")|(?:'[^']*?')))?)*"""
|
|
|
|
# Match the optional context part
|
|
|
|
r"""(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*"""
|
|
|
|
)
|
2019-06-22 00:41:01 +08:00
|
|
|
block_re = _lazy_re_compile(
|
|
|
|
r"""^\s*blocktrans(?:late)?(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)"""
|
|
|
|
)
|
|
|
|
endblock_re = _lazy_re_compile(r"""^\s*endblocktrans(?:late)?$""")
|
2019-10-26 22:42:32 +08:00
|
|
|
plural_re = _lazy_re_compile(r"""^\s*plural$""")
|
|
|
|
constant_re = _lazy_re_compile(r"""_\(((?:".*?")|(?:'.*?'))\)""")
|
2016-08-08 21:46:52 +08:00
|
|
|
|
|
|
|
|
2017-02-06 16:15:25 +08:00
|
|
|
def templatize(src, origin=None):
|
2016-08-08 21:46:52 +08:00
|
|
|
"""
|
|
|
|
Turn a Django template into something that is understood by xgettext. It
|
|
|
|
does so by translating the Django translation tags into standard gettext
|
|
|
|
function invocations.
|
|
|
|
"""
|
|
|
|
out = StringIO("")
|
|
|
|
message_context = None
|
|
|
|
intrans = False
|
|
|
|
inplural = False
|
|
|
|
trimmed = False
|
|
|
|
singular = []
|
|
|
|
plural = []
|
|
|
|
incomment = False
|
|
|
|
comment = []
|
|
|
|
lineno_comment_map = {}
|
|
|
|
comment_lineno_cache = None
|
2017-01-21 05:04:05 +08:00
|
|
|
# Adding the u prefix allows gettext to recognize the string (#26093).
|
2016-12-01 18:38:01 +08:00
|
|
|
raw_prefix = "u"
|
2016-08-08 21:46:52 +08:00
|
|
|
|
|
|
|
def join_tokens(tokens, trim=False):
|
|
|
|
message = "".join(tokens)
|
|
|
|
if trim:
|
|
|
|
message = trim_whitespace(message)
|
|
|
|
return message
|
|
|
|
|
|
|
|
for t in Lexer(src).tokenize():
|
|
|
|
if incomment:
|
2018-05-10 23:51:51 +08:00
|
|
|
if t.token_type == TokenType.BLOCK and t.contents == "endcomment":
|
2016-08-08 21:46:52 +08:00
|
|
|
content = "".join(comment)
|
|
|
|
translators_comment_start = None
|
|
|
|
for lineno, line in enumerate(content.splitlines(True)):
|
|
|
|
if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
|
|
|
|
translators_comment_start = lineno
|
|
|
|
for lineno, line in enumerate(content.splitlines(True)):
|
|
|
|
if (
|
|
|
|
translators_comment_start is not None
|
|
|
|
and lineno >= translators_comment_start
|
|
|
|
):
|
|
|
|
out.write(" # %s" % line)
|
|
|
|
else:
|
|
|
|
out.write(" #\n")
|
|
|
|
incomment = False
|
|
|
|
comment = []
|
|
|
|
else:
|
|
|
|
comment.append(t.contents)
|
|
|
|
elif intrans:
|
2018-05-10 23:51:51 +08:00
|
|
|
if t.token_type == TokenType.BLOCK:
|
2016-08-08 21:46:52 +08:00
|
|
|
endbmatch = endblock_re.match(t.contents)
|
|
|
|
pluralmatch = plural_re.match(t.contents)
|
|
|
|
if endbmatch:
|
|
|
|
if inplural:
|
|
|
|
if message_context:
|
|
|
|
out.write(
|
|
|
|
" npgettext({p}{!r}, {p}{!r}, {p}{!r},count) ".format(
|
|
|
|
message_context,
|
|
|
|
join_tokens(singular, trimmed),
|
|
|
|
join_tokens(plural, trimmed),
|
|
|
|
p=raw_prefix,
|
2022-02-04 03:24:19 +08:00
|
|
|
)
|
2016-08-08 21:46:52 +08:00
|
|
|
)
|
|
|
|
else:
|
|
|
|
out.write(
|
|
|
|
" ngettext({p}{!r}, {p}{!r}, count) ".format(
|
|
|
|
join_tokens(singular, trimmed),
|
|
|
|
join_tokens(plural, trimmed),
|
|
|
|
p=raw_prefix,
|
2022-02-04 03:24:19 +08:00
|
|
|
)
|
2016-08-08 21:46:52 +08:00
|
|
|
)
|
|
|
|
for part in singular:
|
|
|
|
out.write(blankout(part, "S"))
|
|
|
|
for part in plural:
|
|
|
|
out.write(blankout(part, "P"))
|
|
|
|
else:
|
|
|
|
if message_context:
|
|
|
|
out.write(
|
|
|
|
" pgettext({p}{!r}, {p}{!r}) ".format(
|
|
|
|
message_context,
|
|
|
|
join_tokens(singular, trimmed),
|
|
|
|
p=raw_prefix,
|
2022-02-04 03:24:19 +08:00
|
|
|
)
|
2016-08-08 21:46:52 +08:00
|
|
|
)
|
|
|
|
else:
|
|
|
|
out.write(
|
|
|
|
" gettext({p}{!r}) ".format(
|
|
|
|
join_tokens(singular, trimmed),
|
|
|
|
p=raw_prefix,
|
|
|
|
)
|
2022-02-04 03:24:19 +08:00
|
|
|
)
|
2016-08-08 21:46:52 +08:00
|
|
|
for part in singular:
|
|
|
|
out.write(blankout(part, "S"))
|
|
|
|
message_context = None
|
|
|
|
intrans = False
|
|
|
|
inplural = False
|
|
|
|
singular = []
|
|
|
|
plural = []
|
|
|
|
elif pluralmatch:
|
|
|
|
inplural = True
|
|
|
|
else:
|
|
|
|
filemsg = ""
|
|
|
|
if origin:
|
|
|
|
filemsg = "file %s, " % origin
|
|
|
|
raise SyntaxError(
|
|
|
|
"Translation blocks must not include other block tags: "
|
|
|
|
"%s (%sline %d)" % (t.contents, filemsg, t.lineno)
|
|
|
|
)
|
2018-05-10 23:51:51 +08:00
|
|
|
elif t.token_type == TokenType.VAR:
|
2016-08-08 21:46:52 +08:00
|
|
|
if inplural:
|
|
|
|
plural.append("%%(%s)s" % t.contents)
|
|
|
|
else:
|
|
|
|
singular.append("%%(%s)s" % t.contents)
|
2018-05-10 23:51:51 +08:00
|
|
|
elif t.token_type == TokenType.TEXT:
|
2016-08-08 21:46:52 +08:00
|
|
|
contents = t.contents.replace("%", "%%")
|
|
|
|
if inplural:
|
|
|
|
plural.append(contents)
|
|
|
|
else:
|
|
|
|
singular.append(contents)
|
|
|
|
else:
|
|
|
|
# Handle comment tokens (`{# ... #}`) plus other constructs on
|
|
|
|
# the same line:
|
|
|
|
if comment_lineno_cache is not None:
|
|
|
|
cur_lineno = t.lineno + t.contents.count("\n")
|
|
|
|
if comment_lineno_cache == cur_lineno:
|
2018-05-10 23:51:51 +08:00
|
|
|
if t.token_type != TokenType.COMMENT:
|
2016-08-08 21:46:52 +08:00
|
|
|
for c in lineno_comment_map[comment_lineno_cache]:
|
|
|
|
filemsg = ""
|
|
|
|
if origin:
|
|
|
|
filemsg = "file %s, " % origin
|
|
|
|
warn_msg = (
|
|
|
|
"The translator-targeted comment '%s' "
|
|
|
|
"(%sline %d) was ignored, because it wasn't "
|
|
|
|
"the last item on the line."
|
|
|
|
) % (c, filemsg, comment_lineno_cache)
|
|
|
|
warnings.warn(warn_msg, TranslatorCommentWarning)
|
|
|
|
lineno_comment_map[comment_lineno_cache] = []
|
|
|
|
else:
|
|
|
|
out.write(
|
|
|
|
"# %s" % " | ".join(lineno_comment_map[comment_lineno_cache])
|
2022-02-04 03:24:19 +08:00
|
|
|
)
|
2016-08-08 21:46:52 +08:00
|
|
|
comment_lineno_cache = None
|
|
|
|
|
2018-05-10 23:51:51 +08:00
|
|
|
if t.token_type == TokenType.BLOCK:
|
2016-08-08 21:46:52 +08:00
|
|
|
imatch = inline_re.match(t.contents)
|
|
|
|
bmatch = block_re.match(t.contents)
|
|
|
|
cmatches = constant_re.findall(t.contents)
|
|
|
|
if imatch:
|
2020-05-11 04:03:39 +08:00
|
|
|
g = imatch[1]
|
2016-08-08 21:46:52 +08:00
|
|
|
if g[0] == '"':
|
|
|
|
g = g.strip('"')
|
|
|
|
elif g[0] == "'":
|
|
|
|
g = g.strip("'")
|
|
|
|
g = g.replace("%", "%%")
|
2020-05-11 04:03:39 +08:00
|
|
|
if imatch[2]:
|
2016-08-08 21:46:52 +08:00
|
|
|
# A context is provided
|
2020-05-11 04:03:39 +08:00
|
|
|
context_match = context_re.match(imatch[2])
|
|
|
|
message_context = context_match[1]
|
2016-08-08 21:46:52 +08:00
|
|
|
if message_context[0] == '"':
|
|
|
|
message_context = message_context.strip('"')
|
|
|
|
elif message_context[0] == "'":
|
|
|
|
message_context = message_context.strip("'")
|
|
|
|
out.write(
|
|
|
|
" pgettext({p}{!r}, {p}{!r}) ".format(
|
|
|
|
message_context, g, p=raw_prefix
|
|
|
|
)
|
2022-02-04 03:24:19 +08:00
|
|
|
)
|
2016-08-08 21:46:52 +08:00
|
|
|
message_context = None
|
|
|
|
else:
|
|
|
|
out.write(" gettext({p}{!r}) ".format(g, p=raw_prefix))
|
|
|
|
elif bmatch:
|
|
|
|
for fmatch in constant_re.findall(t.contents):
|
|
|
|
out.write(" _(%s) " % fmatch)
|
2020-05-11 04:03:39 +08:00
|
|
|
if bmatch[1]:
|
2016-08-08 21:46:52 +08:00
|
|
|
# A context is provided
|
2020-05-11 04:03:39 +08:00
|
|
|
context_match = context_re.match(bmatch[1])
|
|
|
|
message_context = context_match[1]
|
2016-08-08 21:46:52 +08:00
|
|
|
if message_context[0] == '"':
|
|
|
|
message_context = message_context.strip('"')
|
|
|
|
elif message_context[0] == "'":
|
|
|
|
message_context = message_context.strip("'")
|
|
|
|
intrans = True
|
|
|
|
inplural = False
|
|
|
|
trimmed = "trimmed" in t.split_contents()
|
|
|
|
singular = []
|
|
|
|
plural = []
|
|
|
|
elif cmatches:
|
|
|
|
for cmatch in cmatches:
|
|
|
|
out.write(" _(%s) " % cmatch)
|
|
|
|
elif t.contents == "comment":
|
|
|
|
incomment = True
|
|
|
|
else:
|
|
|
|
out.write(blankout(t.contents, "B"))
|
2018-05-10 23:51:51 +08:00
|
|
|
elif t.token_type == TokenType.VAR:
|
2016-08-08 21:46:52 +08:00
|
|
|
parts = t.contents.split("|")
|
|
|
|
cmatch = constant_re.match(parts[0])
|
|
|
|
if cmatch:
|
2020-05-11 04:03:39 +08:00
|
|
|
out.write(" _(%s) " % cmatch[1])
|
2016-08-08 21:46:52 +08:00
|
|
|
for p in parts[1:]:
|
|
|
|
if p.find(":_(") >= 0:
|
|
|
|
out.write(" %s " % p.split(":", 1)[1])
|
|
|
|
else:
|
|
|
|
out.write(blankout(p, "F"))
|
2018-05-10 23:51:51 +08:00
|
|
|
elif t.token_type == TokenType.COMMENT:
|
2016-08-08 21:46:52 +08:00
|
|
|
if t.contents.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
|
|
|
|
lineno_comment_map.setdefault(t.lineno, []).append(t.contents)
|
|
|
|
comment_lineno_cache = t.lineno
|
|
|
|
else:
|
|
|
|
out.write(blankout(t.contents, "X"))
|
|
|
|
return out.getvalue()
|