Fixed #7704, #14045 and #15495 -- Introduce a lexer for Javascript to fix multiple problems of the translation of Javascript files with xgettext. Many thanks to Ned Batchelder for his contribution of the JsLex library.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@16333 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
d14eb13992
commit
64e19ffb4e
|
@ -9,8 +9,8 @@ from subprocess import PIPE, Popen
|
||||||
|
|
||||||
from django.core.management.base import CommandError, NoArgsCommand
|
from django.core.management.base import CommandError, NoArgsCommand
|
||||||
from django.utils.text import get_text_list
|
from django.utils.text import get_text_list
|
||||||
|
from django.utils.jslex import prepare_js_for_gettext
|
||||||
|
|
||||||
pythonize_re = re.compile(r'(?:^|\n)\s*//')
|
|
||||||
plural_forms_re = re.compile(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL)
|
plural_forms_re = re.compile(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL)
|
||||||
|
|
||||||
def handle_extensions(extensions=('html',)):
|
def handle_extensions(extensions=('html',)):
|
||||||
|
@ -184,15 +184,15 @@ def make_messages(locale=None, domain='django', verbosity='1', all=False,
|
||||||
if verbosity > 1:
|
if verbosity > 1:
|
||||||
sys.stdout.write('processing file %s in %s\n' % (file, dirpath))
|
sys.stdout.write('processing file %s in %s\n' % (file, dirpath))
|
||||||
src = open(os.path.join(dirpath, file), "rU").read()
|
src = open(os.path.join(dirpath, file), "rU").read()
|
||||||
src = pythonize_re.sub('\n#', src)
|
src = prepare_js_for_gettext(src)
|
||||||
thefile = '%s.py' % file
|
thefile = '%s.c' % file
|
||||||
f = open(os.path.join(dirpath, thefile), "w")
|
f = open(os.path.join(dirpath, thefile), "w")
|
||||||
try:
|
try:
|
||||||
f.write(src)
|
f.write(src)
|
||||||
finally:
|
finally:
|
||||||
f.close()
|
f.close()
|
||||||
cmd = (
|
cmd = (
|
||||||
'xgettext -d %s -L Perl %s --keyword=gettext_noop '
|
'xgettext -d %s -L C %s --keyword=gettext_noop '
|
||||||
'--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 '
|
'--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 '
|
||||||
'--keyword=pgettext:1c,2 --keyword=npgettext:1c,2,3 '
|
'--keyword=pgettext:1c,2 --keyword=npgettext:1c,2,3 '
|
||||||
'--from-code UTF-8 --add-comments=Translators -o - "%s"' % (
|
'--from-code UTF-8 --add-comments=Translators -o - "%s"' % (
|
||||||
|
|
|
@ -0,0 +1,213 @@
|
||||||
|
"""JsLex: a lexer for Javascript"""
|
||||||
|
# Originally from https://bitbucket.org/ned/jslex
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Tok(object):
|
||||||
|
"""
|
||||||
|
A specification for a token class.
|
||||||
|
"""
|
||||||
|
num = 0
|
||||||
|
|
||||||
|
def __init__(self, name, regex, next=None):
|
||||||
|
self.id = Tok.num
|
||||||
|
Tok.num += 1
|
||||||
|
self.name = name
|
||||||
|
self.regex = regex
|
||||||
|
self.next = next
|
||||||
|
|
||||||
|
def literals(choices, prefix="", suffix=""):
|
||||||
|
"""
|
||||||
|
Create a regex from a space-separated list of literal `choices`.
|
||||||
|
|
||||||
|
If provided, `prefix` and `suffix` will be attached to each choice
|
||||||
|
individually.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return "|".join(prefix+re.escape(c)+suffix for c in choices.split())
|
||||||
|
|
||||||
|
|
||||||
|
class Lexer(object):
|
||||||
|
"""
|
||||||
|
A generic multi-state regex-based lexer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, states, first):
|
||||||
|
self.regexes = {}
|
||||||
|
self.toks = {}
|
||||||
|
|
||||||
|
for state, rules in states.items():
|
||||||
|
parts = []
|
||||||
|
for tok in rules:
|
||||||
|
groupid = "t%d" % tok.id
|
||||||
|
self.toks[groupid] = tok
|
||||||
|
parts.append("(?P<%s>%s)" % (groupid, tok.regex))
|
||||||
|
self.regexes[state] = re.compile("|".join(parts), re.MULTILINE|re.VERBOSE)
|
||||||
|
|
||||||
|
self.state = first
|
||||||
|
|
||||||
|
def lex(self, text):
|
||||||
|
"""
|
||||||
|
Lexically analyze `text`.
|
||||||
|
|
||||||
|
Yields pairs (`name`, `tokentext`).
|
||||||
|
"""
|
||||||
|
while text:
|
||||||
|
eaten = 0
|
||||||
|
for match in self.regexes[self.state].finditer(text):
|
||||||
|
for name, toktext in match.groupdict().iteritems():
|
||||||
|
if toktext is not None:
|
||||||
|
tok = self.toks[name]
|
||||||
|
new_state = tok.next
|
||||||
|
eaten += len(toktext)
|
||||||
|
yield (tok.name, toktext)
|
||||||
|
if new_state:
|
||||||
|
self.state = new_state
|
||||||
|
break
|
||||||
|
text = text[eaten:]
|
||||||
|
|
||||||
|
|
||||||
|
class JsLexer(Lexer):
|
||||||
|
"""
|
||||||
|
A Javascript lexer
|
||||||
|
|
||||||
|
>>> lexer = JsLexer()
|
||||||
|
>>> list(lexer.lex("a = 1"))
|
||||||
|
[('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')]
|
||||||
|
|
||||||
|
This doesn't properly handle non-Ascii characters in the Javascript source.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Because these tokens are matched as alternatives in a regex, longer
|
||||||
|
# possibilities must appear in the list before shorter ones, for example,
|
||||||
|
# '>>' before '>'.
|
||||||
|
#
|
||||||
|
# Note that we don't have to detect malformed Javascript, only properly
|
||||||
|
# lex correct Javascript, so much of this is simplified.
|
||||||
|
|
||||||
|
# Details of Javascript lexical structure are taken from
|
||||||
|
# http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf
|
||||||
|
|
||||||
|
# A useful explanation of automatic semicolon insertion is at
|
||||||
|
# http://inimino.org/~inimino/blog/javascript_semicolons
|
||||||
|
|
||||||
|
both_before = [
|
||||||
|
Tok("comment", r"/\*(.|\n)*?\*/"),
|
||||||
|
Tok("linecomment", r"//.*?$"),
|
||||||
|
Tok("ws", r"\s+"),
|
||||||
|
Tok("keyword", literals("""
|
||||||
|
break case catch class const continue debugger
|
||||||
|
default delete do else enum export extends
|
||||||
|
finally for function if import in instanceof
|
||||||
|
new return super switch this throw try typeof
|
||||||
|
var void while with
|
||||||
|
""", suffix=r"\b"), next='reg'),
|
||||||
|
Tok("reserved", literals("null true false", suffix=r"\b"), next='div'),
|
||||||
|
Tok("id", r"""
|
||||||
|
([a-zA-Z_$ ]|\\u[0-9a-fA-Z]{4}) # first char
|
||||||
|
([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})* # rest chars
|
||||||
|
""", next='div'),
|
||||||
|
Tok("hnum", r"0[xX][0-9a-fA-F]+", next='div'),
|
||||||
|
Tok("onum", r"0[0-7]+"),
|
||||||
|
Tok("dnum", r"""
|
||||||
|
( (0|[1-9][0-9]*) # DecimalIntegerLiteral
|
||||||
|
\. # dot
|
||||||
|
[0-9]* # DecimalDigits-opt
|
||||||
|
([eE][-+]?[0-9]+)? # ExponentPart-opt
|
||||||
|
|
|
||||||
|
\. # dot
|
||||||
|
[0-9]+ # DecimalDigits
|
||||||
|
([eE][-+]?[0-9]+)? # ExponentPart-opt
|
||||||
|
|
|
||||||
|
(0|[1-9][0-9]*) # DecimalIntegerLiteral
|
||||||
|
([eE][-+]?[0-9]+)? # ExponentPart-opt
|
||||||
|
)
|
||||||
|
""", next='div'),
|
||||||
|
Tok("punct", literals("""
|
||||||
|
>>>= === !== >>> <<= >>= <= >= == != << >> &&
|
||||||
|
|| += -= *= %= &= |= ^=
|
||||||
|
"""), next="reg"),
|
||||||
|
Tok("punct", literals("++ -- ) ]"), next='div'),
|
||||||
|
Tok("punct", literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'),
|
||||||
|
Tok("string", r'"([^"\\]|(\\(.|\n)))*?"', next='div'),
|
||||||
|
Tok("string", r"'([^'\\]|(\\(.|\n)))*?'", next='div'),
|
||||||
|
]
|
||||||
|
|
||||||
|
both_after = [
|
||||||
|
Tok("other", r"."),
|
||||||
|
]
|
||||||
|
|
||||||
|
states = {
|
||||||
|
'div': # slash will mean division
|
||||||
|
both_before + [
|
||||||
|
Tok("punct", literals("/= /"), next='reg'),
|
||||||
|
] + both_after,
|
||||||
|
|
||||||
|
'reg': # slash will mean regex
|
||||||
|
both_before + [
|
||||||
|
Tok("regex",
|
||||||
|
r"""
|
||||||
|
/ # opening slash
|
||||||
|
# First character is..
|
||||||
|
( [^*\\/[] # anything but * \ / or [
|
||||||
|
| \\. # or an escape sequence
|
||||||
|
| \[ # or a class, which has
|
||||||
|
( [^\]\\] # anything but \ or ]
|
||||||
|
| \\. # or an escape sequence
|
||||||
|
)* # many times
|
||||||
|
\]
|
||||||
|
)
|
||||||
|
# Following characters are same, except for excluding a star
|
||||||
|
( [^\\/[] # anything but \ / or [
|
||||||
|
| \\. # or an escape sequence
|
||||||
|
| \[ # or a class, which has
|
||||||
|
( [^\]\\] # anything but \ or ]
|
||||||
|
| \\. # or an escape sequence
|
||||||
|
)* # many times
|
||||||
|
\]
|
||||||
|
)* # many times
|
||||||
|
/ # closing slash
|
||||||
|
[a-zA-Z0-9]* # trailing flags
|
||||||
|
""", next='div'),
|
||||||
|
] + both_after,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(JsLexer, self).__init__(self.states, 'reg')
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_js_for_gettext(js):
|
||||||
|
"""
|
||||||
|
Convert the Javascript source `js` into something resembling C for
|
||||||
|
xgettext.
|
||||||
|
|
||||||
|
What actually happens is that all the regex literals are replaced with
|
||||||
|
"REGEX".
|
||||||
|
"""
|
||||||
|
def escape_quotes(m):
|
||||||
|
"""Used in a regex to properly escape double quotes."""
|
||||||
|
s = m.group(0)
|
||||||
|
if s == '"':
|
||||||
|
return r'\"'
|
||||||
|
else:
|
||||||
|
return s
|
||||||
|
|
||||||
|
lexer = JsLexer()
|
||||||
|
c = []
|
||||||
|
for name, tok in lexer.lex(js):
|
||||||
|
if name == 'regex':
|
||||||
|
# C doesn't grok regexes, and they aren't needed for gettext,
|
||||||
|
# so just output a string instead.
|
||||||
|
tok = '"REGEX"';
|
||||||
|
elif name == 'string':
|
||||||
|
# C doesn't have single-quoted strings, so make all strings
|
||||||
|
# double-quoted.
|
||||||
|
if tok.startswith("'"):
|
||||||
|
guts = re.sub(r"\\.|.", escape_quotes, tok[1:-1])
|
||||||
|
tok = '"' + guts + '"'
|
||||||
|
elif name == 'id':
|
||||||
|
# C can't deal with Unicode escapes in identifiers. We don't
|
||||||
|
# need them for gettext anyway, so replace them with something
|
||||||
|
# innocuous
|
||||||
|
tok = tok.replace("\\", "U");
|
||||||
|
c.append(tok)
|
||||||
|
return ''.join(c)
|
|
@ -31,11 +31,13 @@ class ExtractorTests(TestCase):
|
||||||
def assertMsgId(self, msgid, s, use_quotes=True):
|
def assertMsgId(self, msgid, s, use_quotes=True):
|
||||||
if use_quotes:
|
if use_quotes:
|
||||||
msgid = '"%s"' % msgid
|
msgid = '"%s"' % msgid
|
||||||
|
msgid = re.escape(msgid)
|
||||||
return self.assertTrue(re.search('^msgid %s' % msgid, s, re.MULTILINE))
|
return self.assertTrue(re.search('^msgid %s' % msgid, s, re.MULTILINE))
|
||||||
|
|
||||||
def assertNotMsgId(self, msgid, s, use_quotes=True):
|
def assertNotMsgId(self, msgid, s, use_quotes=True):
|
||||||
if use_quotes:
|
if use_quotes:
|
||||||
msgid = '"%s"' % msgid
|
msgid = '"%s"' % msgid
|
||||||
|
msgid = re.escape(msgid)
|
||||||
return self.assertTrue(not re.search('^msgid %s' % msgid, s, re.MULTILINE))
|
return self.assertTrue(not re.search('^msgid %s' % msgid, s, re.MULTILINE))
|
||||||
|
|
||||||
|
|
||||||
|
@ -73,7 +75,7 @@ class BasicExtractorTests(ExtractorTests):
|
||||||
self.assertTrue(os.path.exists(self.PO_FILE))
|
self.assertTrue(os.path.exists(self.PO_FILE))
|
||||||
po_contents = open(self.PO_FILE, 'r').read()
|
po_contents = open(self.PO_FILE, 'r').read()
|
||||||
self.assertMsgId('I think that 100%% is more that 50%% of anything.', po_contents)
|
self.assertMsgId('I think that 100%% is more that 50%% of anything.', po_contents)
|
||||||
self.assertMsgId('I think that 100%% is more that 50%% of %\(obj\)s.', po_contents)
|
self.assertMsgId('I think that 100%% is more that 50%% of %(obj)s.', po_contents)
|
||||||
|
|
||||||
def test_extraction_error(self):
|
def test_extraction_error(self):
|
||||||
os.chdir(self.test_dir)
|
os.chdir(self.test_dir)
|
||||||
|
@ -102,7 +104,17 @@ class JavascriptExtractorTests(ExtractorTests):
|
||||||
po_contents = open(self.PO_FILE, 'r').read()
|
po_contents = open(self.PO_FILE, 'r').read()
|
||||||
self.assertMsgId('This literal should be included.', po_contents)
|
self.assertMsgId('This literal should be included.', po_contents)
|
||||||
self.assertMsgId('This one as well.', po_contents)
|
self.assertMsgId('This one as well.', po_contents)
|
||||||
|
self.assertMsgId(r'He said, \"hello\".', po_contents)
|
||||||
|
self.assertMsgId("okkkk", po_contents)
|
||||||
|
self.assertMsgId("TEXT", po_contents)
|
||||||
|
self.assertMsgId("It's at http://example.com", po_contents)
|
||||||
|
self.assertMsgId("String", po_contents)
|
||||||
|
self.assertMsgId("/* but this one will be too */ 'cause there is no way of telling...", po_contents)
|
||||||
|
self.assertMsgId("foo", po_contents)
|
||||||
|
self.assertMsgId("bar", po_contents)
|
||||||
|
self.assertMsgId("baz", po_contents)
|
||||||
|
self.assertMsgId("quz", po_contents)
|
||||||
|
self.assertMsgId("foobar", po_contents)
|
||||||
|
|
||||||
class IgnoredExtractorTests(ExtractorTests):
|
class IgnoredExtractorTests(ExtractorTests):
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,47 @@
|
||||||
// '
|
// '
|
||||||
gettext('This literal should be included.')
|
gettext('This literal should be included.')
|
||||||
// '
|
x = y; // '
|
||||||
gettext('This one as well.')
|
gettext("This one as well.")
|
||||||
|
|
||||||
|
/** (from ticket 7704)
|
||||||
|
* *****************************
|
||||||
|
* AddModule main / window
|
||||||
|
* @constructor
|
||||||
|
* @class MyDesktop.AddModule
|
||||||
|
* *****************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
gettext('He said, \"hello".')
|
||||||
|
|
||||||
|
// from ticket 14045
|
||||||
|
function mfunc() {
|
||||||
|
var val = 0;
|
||||||
|
return val ? 1 : 0;
|
||||||
|
}
|
||||||
|
gettext('okkkk');
|
||||||
|
print mysub();
|
||||||
|
|
||||||
|
// from ticket 15495
|
||||||
|
/* / ' */ gettext("TEXT");
|
||||||
|
|
||||||
|
gettext("It's at http://example.com")
|
||||||
|
|
||||||
|
// also from ticket 15495
|
||||||
|
gettext("String"); // This comment won't be caught by pythonize_re and it contains "'" which is a string start in Perl
|
||||||
|
/*
|
||||||
|
* This one will be removed by the patch
|
||||||
|
*/
|
||||||
|
gettext("/* but this one will be too */ 'cause there is no way of telling...");
|
||||||
|
f(/* ... if it's different from this one */);
|
||||||
|
|
||||||
|
// from ticket 15331
|
||||||
|
gettext("foo");
|
||||||
|
true ? true : false;
|
||||||
|
gettext("bar");
|
||||||
|
true ? true : false;
|
||||||
|
gettext("baz");
|
||||||
|
true ? true : false; // ?
|
||||||
|
gettext("quz");
|
||||||
|
"?";
|
||||||
|
gettext("foobar");
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,217 @@
|
||||||
|
"""Tests for jslex."""
|
||||||
|
# encoding: utf-8
|
||||||
|
# originally from https://bitbucket.org/ned/jslex
|
||||||
|
|
||||||
|
import difflib
|
||||||
|
from django.test import TestCase
|
||||||
|
from django.utils.jslex import JsLexer, prepare_js_for_gettext
|
||||||
|
|
||||||
|
class JsTokensTest(TestCase):
|
||||||
|
LEX_CASES = [
|
||||||
|
# ids
|
||||||
|
("a ABC $ _ a123", ["id a", "id ABC", "id $", "id _", "id a123"]),
|
||||||
|
(r"\u1234 abc\u0020 \u0065_\u0067", [r"id \u1234", r"id abc\u0020", r"id \u0065_\u0067"]),
|
||||||
|
# numbers
|
||||||
|
("123 1.234 0.123e-3 0 1E+40 1e1 .123", ["dnum 123", "dnum 1.234", "dnum 0.123e-3", "dnum 0", "dnum 1E+40", "dnum 1e1", "dnum .123"]),
|
||||||
|
("0x1 0xabCD 0XABcd", ["hnum 0x1", "hnum 0xabCD", "hnum 0XABcd"]),
|
||||||
|
("010 0377 090", ["onum 010", "onum 0377", "dnum 0", "dnum 90"]),
|
||||||
|
("0xa123ghi", ["hnum 0xa123", "id ghi"]),
|
||||||
|
# keywords
|
||||||
|
("function Function FUNCTION", ["keyword function", "id Function", "id FUNCTION"]),
|
||||||
|
("const constructor in inherits", ["keyword const", "id constructor", "keyword in", "id inherits"]),
|
||||||
|
("true true_enough", ["reserved true", "id true_enough"]),
|
||||||
|
# strings
|
||||||
|
(''' 'hello' "hello" ''', ["string 'hello'", 'string "hello"']),
|
||||||
|
(r""" 'don\'t' "don\"t" '"' "'" '\'' "\"" """,
|
||||||
|
[r"""string 'don\'t'""", r'''string "don\"t"''', r"""string '"'""", r'''string "'"''', r"""string '\''""", r'''string "\""''']),
|
||||||
|
(ur'"ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""', [ur'string "ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""']),
|
||||||
|
# comments
|
||||||
|
("a//b", ["id a", "linecomment //b"]),
|
||||||
|
("/****/a/=2//hello", ["comment /****/", "id a", "punct /=", "dnum 2", "linecomment //hello"]),
|
||||||
|
("/*\n * Header\n */\na=1;", ["comment /*\n * Header\n */", "id a", "punct =", "dnum 1", "punct ;"]),
|
||||||
|
# punctuation
|
||||||
|
("a+++b", ["id a", "punct ++", "punct +", "id b"]),
|
||||||
|
# regex
|
||||||
|
(r"a=/a*/,1", ["id a", "punct =", "regex /a*/", "punct ,", "dnum 1"]),
|
||||||
|
(r"a=/a*[^/]+/,1", ["id a", "punct =", "regex /a*[^/]+/", "punct ,", "dnum 1"]),
|
||||||
|
(r"a=/a*\[^/,1", ["id a", "punct =", r"regex /a*\[^/", "punct ,", "dnum 1"]),
|
||||||
|
(r"a=/\//,1", ["id a", "punct =", r"regex /\//", "punct ,", "dnum 1"]),
|
||||||
|
|
||||||
|
# next two are from http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
|
||||||
|
("""for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}""",
|
||||||
|
["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in",
|
||||||
|
"id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z",
|
||||||
|
"punct :", "regex /x:3;x<5;y</g", "punct /", "id i", "punct )", "punct {",
|
||||||
|
"id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]),
|
||||||
|
("""for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}""",
|
||||||
|
["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in",
|
||||||
|
"id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z",
|
||||||
|
"punct /", "id x", "punct :", "dnum 3", "punct ;", "id x", "punct <", "dnum 5",
|
||||||
|
"punct ;", "id y", "punct <", "regex /g/i", "punct )", "punct {",
|
||||||
|
"id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]),
|
||||||
|
|
||||||
|
# Various "illegal" regexes that are valid according to the std.
|
||||||
|
(r"""/????/, /++++/, /[----]/ """, ["regex /????/", "punct ,", "regex /++++/", "punct ,", "regex /[----]/"]),
|
||||||
|
|
||||||
|
# Stress cases from http://stackoverflow.com/questions/5533925/what-javascript-constructs-does-jslex-incorrectly-lex/5573409#5573409
|
||||||
|
(r"""/\[/""", [r"""regex /\[/"""]),
|
||||||
|
(r"""/[i]/""", [r"""regex /[i]/"""]),
|
||||||
|
(r"""/[\]]/""", [r"""regex /[\]]/"""]),
|
||||||
|
(r"""/a[\]]/""", [r"""regex /a[\]]/"""]),
|
||||||
|
(r"""/a[\]]b/""", [r"""regex /a[\]]b/"""]),
|
||||||
|
(r"""/[\]/]/gi""", [r"""regex /[\]/]/gi"""]),
|
||||||
|
(r"""/\[[^\]]+\]/gi""", [r"""regex /\[[^\]]+\]/gi"""]),
|
||||||
|
("""
|
||||||
|
rexl.re = {
|
||||||
|
NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/,
|
||||||
|
UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/,
|
||||||
|
QUOTED_LITERAL: /^'(?:[^']|'')*'/,
|
||||||
|
NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/,
|
||||||
|
SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/
|
||||||
|
};
|
||||||
|
""",
|
||||||
|
["id rexl", "punct .", "id re", "punct =", "punct {",
|
||||||
|
"id NAME", "punct :", r"""regex /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,",
|
||||||
|
"id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,",
|
||||||
|
"id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,",
|
||||||
|
"id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,",
|
||||||
|
"id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""",
|
||||||
|
"punct }", "punct ;"
|
||||||
|
]),
|
||||||
|
|
||||||
|
("""
|
||||||
|
rexl.re = {
|
||||||
|
NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/,
|
||||||
|
UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/,
|
||||||
|
QUOTED_LITERAL: /^'(?:[^']|'')*'/,
|
||||||
|
NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/,
|
||||||
|
SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/
|
||||||
|
};
|
||||||
|
str = '"';
|
||||||
|
""",
|
||||||
|
["id rexl", "punct .", "id re", "punct =", "punct {",
|
||||||
|
"id NAME", "punct :", r"""regex /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,",
|
||||||
|
"id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,",
|
||||||
|
"id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,",
|
||||||
|
"id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,",
|
||||||
|
"id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""",
|
||||||
|
"punct }", "punct ;",
|
||||||
|
"id str", "punct =", """string '"'""", "punct ;",
|
||||||
|
]),
|
||||||
|
|
||||||
|
(r""" this._js = "e.str(\"" + this.value.replace(/\\/g, "\\\\").replace(/"/g, "\\\"") + "\")"; """,
|
||||||
|
["keyword this", "punct .", "id _js", "punct =", r'''string "e.str(\""''', "punct +", "keyword this", "punct .",
|
||||||
|
"id value", "punct .", "id replace", "punct (", r"regex /\\/g", "punct ,", r'string "\\\\"', "punct )",
|
||||||
|
"punct .", "id replace", "punct (", r'regex /"/g', "punct ,", r'string "\\\""', "punct )", "punct +",
|
||||||
|
r'string "\")"', "punct ;"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
def make_function(input, toks):
|
||||||
|
def test_func(self):
|
||||||
|
lexer = JsLexer()
|
||||||
|
result = ["%s %s" % (name, tok) for name, tok in lexer.lex(input) if name != 'ws']
|
||||||
|
self.assertListEqual(result, toks)
|
||||||
|
return test_func
|
||||||
|
|
||||||
|
for i, (input, toks) in enumerate(JsTokensTest.LEX_CASES):
|
||||||
|
setattr(JsTokensTest, "test_case_%d" % i, make_function(input, toks))
|
||||||
|
|
||||||
|
|
||||||
|
GETTEXT_CASES = (
|
||||||
|
(
|
||||||
|
r"""
|
||||||
|
a = 1; /* /[0-9]+/ */
|
||||||
|
b = 0x2a0b / 1; // /[0-9]+/
|
||||||
|
c = 3;
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
a = 1; /* /[0-9]+/ */
|
||||||
|
b = 0x2a0b / 1; // /[0-9]+/
|
||||||
|
c = 3;
|
||||||
|
"""
|
||||||
|
), (
|
||||||
|
r"""
|
||||||
|
a = 1.234e-5;
|
||||||
|
/*
|
||||||
|
* /[0-9+/
|
||||||
|
*/
|
||||||
|
b = .0123;
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
a = 1.234e-5;
|
||||||
|
/*
|
||||||
|
* /[0-9+/
|
||||||
|
*/
|
||||||
|
b = .0123;
|
||||||
|
"""
|
||||||
|
), (
|
||||||
|
r"""
|
||||||
|
x = y / z;
|
||||||
|
alert(gettext("hello"));
|
||||||
|
x /= 3;
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
x = y / z;
|
||||||
|
alert(gettext("hello"));
|
||||||
|
x /= 3;
|
||||||
|
"""
|
||||||
|
), (
|
||||||
|
r"""
|
||||||
|
s = "Hello \"th/foo/ere\"";
|
||||||
|
s = 'He\x23llo \'th/foo/ere\'';
|
||||||
|
s = 'slash quote \", just quote "';
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
s = "Hello \"th/foo/ere\"";
|
||||||
|
s = "He\x23llo \'th/foo/ere\'";
|
||||||
|
s = "slash quote \", just quote \"";
|
||||||
|
"""
|
||||||
|
), (
|
||||||
|
r"""
|
||||||
|
s = "Line continuation\
|
||||||
|
continued /hello/ still the string";/hello/;
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
s = "Line continuation\
|
||||||
|
continued /hello/ still the string";"REGEX";
|
||||||
|
"""
|
||||||
|
), (
|
||||||
|
r"""
|
||||||
|
var regex = /pattern/;
|
||||||
|
var regex2 = /matter/gm;
|
||||||
|
var regex3 = /[*/]+/gm.foo("hey");
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
var regex = "REGEX";
|
||||||
|
var regex2 = "REGEX";
|
||||||
|
var regex3 = "REGEX".foo("hey");
|
||||||
|
"""
|
||||||
|
), (
|
||||||
|
r"""
|
||||||
|
for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}
|
||||||
|
for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}
|
||||||
|
""",
|
||||||
|
r"""
|
||||||
|
for (var x = a in foo && "</x>" || mot ? z:"REGEX"/i) {xyz(x++);}
|
||||||
|
for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y<"REGEX") {xyz(x++);}
|
||||||
|
"""
|
||||||
|
), (
|
||||||
|
r"""
|
||||||
|
\u1234xyz = gettext('Hello there');
|
||||||
|
""", r"""
|
||||||
|
Uu1234xyz = gettext("Hello there");
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class JsToCForGettextTest(TestCase):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def make_function(js, c):
|
||||||
|
def test_func(self):
|
||||||
|
self.assertMultiLineEqual(prepare_js_for_gettext(js), c)
|
||||||
|
return test_func
|
||||||
|
|
||||||
|
for i, pair in enumerate(GETTEXT_CASES):
|
||||||
|
setattr(JsToCForGettextTest, "test_case_%d" % i, make_function(*pair))
|
|
@ -18,3 +18,4 @@ from datastructures import *
|
||||||
from tzinfo import *
|
from tzinfo import *
|
||||||
from datetime_safe import *
|
from datetime_safe import *
|
||||||
from baseconv import *
|
from baseconv import *
|
||||||
|
from jslex import *
|
||||||
|
|
Loading…
Reference in New Issue