From 64e19ffb4ee32767861d25c874f0d2dfc75618b7 Mon Sep 17 00:00:00 2001
From: Jannis Leidel <jannis@leidel.info>
Date: Tue, 7 Jun 2011 16:11:25 +0000
Subject: [PATCH] Fixed #7704, #14045 and #15495 -- Introduce a lexer for
 Javascript to fix multiple problems of the translation of Javascript files
 with xgettext. Many thanks to Ned Batchelder for his contribution of the
 JsLex library.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16333 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 .../core/management/commands/makemessages.py  |   8 +-
 django/utils/jslex.py                         | 213 +++++++++++++++++
 .../i18n/commands/extraction.py               |  16 +-
 .../i18n/commands/javascript.js               |  47 +++-
 tests/regressiontests/utils/jslex.py          | 217 ++++++++++++++++++
 tests/regressiontests/utils/tests.py          |   1 +
 6 files changed, 494 insertions(+), 8 deletions(-)
 create mode 100644 django/utils/jslex.py
 create mode 100644 tests/regressiontests/utils/jslex.py
diff --git a/django/core/management/commands/makemessages.py b/django/core/management/commands/makemessages.py
index a244a60de5..899775d020 100644
--- a/django/core/management/commands/makemessages.py
+++ b/django/core/management/commands/makemessages.py
@@ -9,8 +9,8 @@ from subprocess import PIPE, Popen
 
 from django.core.management.base import CommandError, NoArgsCommand
 from django.utils.text import get_text_list
+from django.utils.jslex import prepare_js_for_gettext
 
-pythonize_re = re.compile(r'(?:^|\n)\s*//')
 plural_forms_re = re.compile(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL)
 
 def handle_extensions(extensions=('html',)):
@@ -184,15 +184,15 @@ def make_messages(locale=None, domain='django', verbosity='1', all=False,
                 if verbosity > 1:
                     sys.stdout.write('processing file %s in %s\n' % (file, dirpath))
                 src = open(os.path.join(dirpath, file), "rU").read()
-                src = pythonize_re.sub('\n#', src)
-                thefile = '%s.py' % file
+                src = prepare_js_for_gettext(src)
+                thefile = '%s.c' % file
                 f = open(os.path.join(dirpath, thefile), "w")
                 try:
                     f.write(src)
                 finally:
                     f.close()
                 cmd = (
-                    'xgettext -d %s -L Perl %s --keyword=gettext_noop '
+                    'xgettext -d %s -L C %s --keyword=gettext_noop '
                     '--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 '
                     '--keyword=pgettext:1c,2 --keyword=npgettext:1c,2,3 '
                     '--from-code UTF-8 --add-comments=Translators -o - "%s"' % (
diff --git a/django/utils/jslex.py b/django/utils/jslex.py
new file mode 100644
index 0000000000..88a22ec67d
--- /dev/null
+++ b/django/utils/jslex.py
@@ -0,0 +1,213 @@
+"""JsLex: a lexer for Javascript"""
+# Originally from https://bitbucket.org/ned/jslex
+import re
+
+class Tok(object):
+    """
+    A specification for a token class.
+    """
+    num = 0
+
+    def __init__(self, name, regex, next=None):
+        self.id = Tok.num
+        Tok.num += 1
+        self.name = name
+        self.regex = regex
+        self.next = next
+
+def literals(choices, prefix="", suffix=""):
+    """
+    Create a regex from a space-separated list of literal `choices`.
+
+    If provided, `prefix` and `suffix` will be attached to each choice
+    individually.
+
+    """
+    return "|".join(prefix+re.escape(c)+suffix for c in choices.split())
+
+
+class Lexer(object):
+    """
+    A generic multi-state regex-based lexer.
+    """
+
+    def __init__(self, states, first):
+        self.regexes = {}
+        self.toks = {}
+
+        for state, rules in states.items():
+            parts = []
+            for tok in rules:
+                groupid = "t%d" % tok.id
+                self.toks[groupid] = tok
+                parts.append("(?P<%s>%s)" % (groupid, tok.regex))
+            self.regexes[state] = re.compile("|".join(parts), re.MULTILINE|re.VERBOSE)
+
+        self.state = first
+
+    def lex(self, text):
+        """
+        Lexically analyze `text`.
+
+        Yields pairs (`name`, `tokentext`).
+        """
+        while text:
+            eaten = 0
+            for match in self.regexes[self.state].finditer(text):
+                for name, toktext in match.groupdict().iteritems():
+                    if toktext is not None:
+                        tok = self.toks[name]
+                        new_state = tok.next
+                        eaten += len(toktext)
+                        yield (tok.name, toktext)
+                if new_state:
+                    self.state = new_state
+                    break
+            text = text[eaten:]
+
+
+class JsLexer(Lexer):
+    """
+    A Javascript lexer
+
+    >>> lexer = JsLexer()
+    >>> list(lexer.lex("a = 1"))
+    [('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')]
+
+    This doesn't properly handle non-Ascii characters in the Javascript source.
+    """
+
+    # Because these tokens are matched as alternatives in a regex, longer
+    # possibilities must appear in the list before shorter ones, for example,
+    # '>>' before '>'.
+    #
+    # Note that we don't have to detect malformed Javascript, only properly
+    # lex correct Javascript, so much of this is simplified.
+
+    # Details of Javascript lexical structure are taken from
+    # http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf
+
+    # A useful explanation of automatic semicolon insertion is at
+    # http://inimino.org/~inimino/blog/javascript_semicolons
+
+    both_before = [
+        Tok("comment",      r"/\*(.|\n)*?\*/"),
+        Tok("linecomment",  r"//.*?$"),
+        Tok("ws",           r"\s+"),
+        Tok("keyword",      literals("""
+                                break case catch class const continue debugger
+                                default delete do else enum export extends
+                                finally for function if import in instanceof
+                                new return super switch this throw try typeof
+                                var void while with
+                                """, suffix=r"\b"), next='reg'),
+        Tok("reserved",     literals("null true false", suffix=r"\b"), next='div'),
+        Tok("id",           r"""
+                            ([a-zA-Z_$   ]|\\u[0-9a-fA-Z]{4})   # first char
+                            ([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})*  # rest chars
+                            """, next='div'),
+        Tok("hnum",         r"0[xX][0-9a-fA-F]+", next='div'),
+        Tok("onum",         r"0[0-7]+"),
+        Tok("dnum",         r"""
+                            (   (0|[1-9][0-9]*)     # DecimalIntegerLiteral
+                                \.                  # dot
+                                [0-9]*              # DecimalDigits-opt
+                                ([eE][-+]?[0-9]+)?  # ExponentPart-opt
+                            |
+                                \.                  # dot
+                                [0-9]+              # DecimalDigits
+                                ([eE][-+]?[0-9]+)?  # ExponentPart-opt
+                            |
+                                (0|[1-9][0-9]*)     # DecimalIntegerLiteral
+                                ([eE][-+]?[0-9]+)?  # ExponentPart-opt
+                            )
+                            """, next='div'),
+        Tok("punct",        literals("""
+                                >>>= === !== >>> <<= >>= <= >= == != << >> &&
+                                || += -= *= %= &= |= ^=
+                                """), next="reg"),
+        Tok("punct",        literals("++ -- ) ]"), next='div'),
+        Tok("punct",        literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'),
+        Tok("string",       r'"([^"\\]|(\\(.|\n)))*?"', next='div'),
+        Tok("string",       r"'([^'\\]|(\\(.|\n)))*?'", next='div'),
+        ]
+
+    both_after = [
+        Tok("other",        r"."),
+    ]
+
+    states = {
+        'div': # slash will mean division
+            both_before + [
+            Tok("punct", literals("/= /"), next='reg'),
+            ] + both_after,
+
+        'reg':  # slash will mean regex
+            both_before + [
+            Tok("regex",
+                r"""
+                    /                       # opening slash
+                    # First character is..
+                    (   [^*\\/[]            # anything but * \ / or [
+                    |   \\.                 # or an escape sequence
+                    |   \[                  # or a class, which has
+                            (   [^\]\\]     #   anything but \ or ]
+                            |   \\.         #   or an escape sequence
+                            )*              #   many times
+                        \]
+                    )
+                    # Following characters are same, except for excluding a star
+                    (   [^\\/[]             # anything but \ / or [
+                    |   \\.                 # or an escape sequence
+                    |   \[                  # or a class, which has
+                            (   [^\]\\]     #   anything but \ or ]
+                            |   \\.         #   or an escape sequence
+                            )*              #   many times
+                        \]
+                    )*                      # many times
+                    /                       # closing slash
+                    [a-zA-Z0-9]*            # trailing flags
+                """, next='div'),
+            ] + both_after,
+        }
+
+    def __init__(self):
+        super(JsLexer, self).__init__(self.states, 'reg')
+
+
+def prepare_js_for_gettext(js):
+    """
+    Convert the Javascript source `js` into something resembling C for
+    xgettext.
+
+    What actually happens is that all the regex literals are replaced with
+    "REGEX".
+    """
+    def escape_quotes(m):
+        """Used in a regex to properly escape double quotes."""
+        s = m.group(0)
+        if s == '"':
+            return r'\"'
+        else:
+            return s
+
+    lexer = JsLexer()
+    c = []
+    for name, tok in lexer.lex(js):
+        if name == 'regex':
+            # C doesn't grok regexes, and they aren't needed for gettext,
+            # so just output a string instead.
+            tok = '"REGEX"';
+        elif name == 'string':
+            # C doesn't have single-quoted strings, so make all strings
+            # double-quoted.
+            if tok.startswith("'"):
+                guts = re.sub(r"\\.|.", escape_quotes, tok[1:-1])
+                tok = '"' + guts + '"'
+        elif name == 'id':
+            # C can't deal with Unicode escapes in identifiers.  We don't
+            # need them for gettext anyway, so replace them with something
+            # innocuous
+            tok = tok.replace("\\", "U");
+        c.append(tok)
+    return ''.join(c)
diff --git a/tests/regressiontests/i18n/commands/extraction.py b/tests/regressiontests/i18n/commands/extraction.py
index a5b01f3617..7341c4f7cb 100644
--- a/tests/regressiontests/i18n/commands/extraction.py
+++ b/tests/regressiontests/i18n/commands/extraction.py
@@ -31,11 +31,13 @@ class ExtractorTests(TestCase):
     def assertMsgId(self, msgid, s, use_quotes=True):
         if use_quotes:
             msgid = '"%s"' % msgid
+        msgid = re.escape(msgid)
         return self.assertTrue(re.search('^msgid %s' % msgid, s, re.MULTILINE))
 
     def assertNotMsgId(self, msgid, s, use_quotes=True):
         if use_quotes:
             msgid = '"%s"' % msgid
+        msgid = re.escape(msgid)
         return self.assertTrue(not re.search('^msgid %s' % msgid, s, re.MULTILINE))
 
 
@@ -73,7 +75,7 @@ class BasicExtractorTests(ExtractorTests):
         self.assertTrue(os.path.exists(self.PO_FILE))
         po_contents = open(self.PO_FILE, 'r').read()
         self.assertMsgId('I think that 100%% is more that 50%% of anything.', po_contents)
-        self.assertMsgId('I think that 100%% is more that 50%% of %\(obj\)s.', po_contents)
+        self.assertMsgId('I think that 100%% is more that 50%% of %(obj)s.', po_contents)
 
     def test_extraction_error(self):
         os.chdir(self.test_dir)
@@ -102,7 +104,17 @@ class JavascriptExtractorTests(ExtractorTests):
         po_contents = open(self.PO_FILE, 'r').read()
         self.assertMsgId('This literal should be included.', po_contents)
         self.assertMsgId('This one as well.', po_contents)
-
+        self.assertMsgId(r'He said, \"hello\".', po_contents)
+        self.assertMsgId("okkkk", po_contents)
+        self.assertMsgId("TEXT", po_contents)
+        self.assertMsgId("It's at http://example.com", po_contents)
+        self.assertMsgId("String", po_contents)
+        self.assertMsgId("/* but this one will be too */ 'cause there is no way of telling...", po_contents)
+        self.assertMsgId("foo", po_contents)
+        self.assertMsgId("bar", po_contents)
+        self.assertMsgId("baz", po_contents)
+        self.assertMsgId("quz", po_contents)
+        self.assertMsgId("foobar", po_contents)
 
 class IgnoredExtractorTests(ExtractorTests):
 
diff --git a/tests/regressiontests/i18n/commands/javascript.js b/tests/regressiontests/i18n/commands/javascript.js
index bc5ec87957..fa059d70f4 100644
--- a/tests/regressiontests/i18n/commands/javascript.js
+++ b/tests/regressiontests/i18n/commands/javascript.js
@@ -1,4 +1,47 @@
 // '
 gettext('This literal should be included.')
-// '
-gettext('This one as well.')
+x = y; // '
+gettext("This one as well.")
+
+/** (from ticket 7704)
+ * *****************************
+ * AddModule main / window
+ * @constructor
+ * @class MyDesktop.AddModule
+ * *****************************
+ */
+
+gettext('He said, \"hello".')
+
+// from ticket 14045
+function mfunc() {
+    var val = 0;
+    return val ? 1 : 0;
+}
+gettext('okkkk');
+print mysub();
+
+// from ticket 15495
+/* / ' */ gettext("TEXT");
+
+gettext("It's at http://example.com")
+
+// also from ticket 15495
+gettext("String"); // This comment won't be caught by pythonize_re and it contains "'" which is a string start in Perl
+/*
+ * This one will be removed by the patch
+ */
+gettext("/* but this one will be too */ 'cause there is no way of telling...");
+f(/* ... if it's different from this one */);
+
+// from ticket 15331
+gettext("foo");
+true ? true : false;
+gettext("bar");
+true ? true : false;
+gettext("baz");
+true ? true : false; // ?
+gettext("quz");
+"?";
+gettext("foobar");
+
diff --git a/tests/regressiontests/utils/jslex.py b/tests/regressiontests/utils/jslex.py
new file mode 100644
index 0000000000..08e8386831
--- /dev/null
+++ b/tests/regressiontests/utils/jslex.py
@@ -0,0 +1,217 @@
+"""Tests for jslex."""
+# encoding: utf-8
+# originally from https://bitbucket.org/ned/jslex
+
+import difflib
+from django.test import TestCase
+from django.utils.jslex import JsLexer, prepare_js_for_gettext
+
+class JsTokensTest(TestCase):
+    LEX_CASES = [
+        # ids
+        ("a ABC $ _ a123", ["id a", "id ABC", "id $", "id _", "id a123"]),
+        (r"\u1234 abc\u0020 \u0065_\u0067", [r"id \u1234", r"id abc\u0020", r"id \u0065_\u0067"]),
+        # numbers
+        ("123 1.234 0.123e-3 0 1E+40 1e1 .123", ["dnum 123", "dnum 1.234", "dnum 0.123e-3", "dnum 0", "dnum 1E+40", "dnum 1e1", "dnum .123"]),
+        ("0x1 0xabCD 0XABcd", ["hnum 0x1", "hnum 0xabCD", "hnum 0XABcd"]),
+        ("010 0377 090", ["onum 010", "onum 0377", "dnum 0", "dnum 90"]),
+        ("0xa123ghi", ["hnum 0xa123", "id ghi"]),
+        # keywords
+        ("function Function FUNCTION", ["keyword function", "id Function", "id FUNCTION"]),
+        ("const constructor in inherits", ["keyword const", "id constructor", "keyword in", "id inherits"]),
+        ("true true_enough", ["reserved true", "id true_enough"]),
+        # strings
+        (''' 'hello' "hello" ''', ["string 'hello'", 'string "hello"']),
+        (r""" 'don\'t' "don\"t" '"' "'" '\'' "\"" """,
+         [r"""string 'don\'t'""", r'''string "don\"t"''', r"""string '"'""", r'''string "'"''', r"""string '\''""", r'''string "\""''']),
+        (ur'"ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""', [ur'string "ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""']),
+        # comments
+        ("a//b", ["id a", "linecomment //b"]),
+        ("/****/a/=2//hello", ["comment /****/", "id a", "punct /=", "dnum 2", "linecomment //hello"]),
+        ("/*\n * Header\n */\na=1;", ["comment /*\n * Header\n */", "id a", "punct =", "dnum 1", "punct ;"]),
+        # punctuation
+        ("a+++b", ["id a", "punct ++", "punct +", "id b"]),
+        # regex
+        (r"a=/a*/,1", ["id a", "punct =", "regex /a*/", "punct ,", "dnum 1"]),
+        (r"a=/a*[^/]+/,1", ["id a", "punct =", "regex /a*[^/]+/", "punct ,", "dnum 1"]),
+        (r"a=/a*\[^/,1", ["id a", "punct =", r"regex /a*\[^/", "punct ,", "dnum 1"]),
+        (r"a=/\//,1", ["id a", "punct =", r"regex /\//", "punct ,", "dnum 1"]),
+
+        # next two are from http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
+        ("""for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}""",
+            ["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in",
+            "id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z",
+            "punct :", "regex /x:3;x<5;y</g", "punct /", "id i", "punct )", "punct {",
+            "id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]),
+        ("""for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}""",
+            ["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in",
+            "id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z",
+            "punct /", "id x", "punct :", "dnum 3", "punct ;", "id x", "punct <", "dnum 5",
+            "punct ;", "id y", "punct <", "regex /g/i", "punct )", "punct {",
+            "id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]),
+
+        # Various "illegal" regexes that are valid according to the std.
+        (r"""/????/, /++++/, /[----]/ """, ["regex /????/", "punct ,", "regex /++++/", "punct ,", "regex /[----]/"]),
+
+        # Stress cases from http://stackoverflow.com/questions/5533925/what-javascript-constructs-does-jslex-incorrectly-lex/5573409#5573409
+        (r"""/\[/""", [r"""regex /\[/"""]),
+        (r"""/[i]/""", [r"""regex /[i]/"""]),
+        (r"""/[\]]/""", [r"""regex /[\]]/"""]),
+        (r"""/a[\]]/""", [r"""regex /a[\]]/"""]),
+        (r"""/a[\]]b/""", [r"""regex /a[\]]b/"""]),
+        (r"""/[\]/]/gi""", [r"""regex /[\]/]/gi"""]),
+        (r"""/\[[^\]]+\]/gi""", [r"""regex /\[[^\]]+\]/gi"""]),
+        ("""
+            rexl.re = {
+            NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/,
+            UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/,
+            QUOTED_LITERAL: /^'(?:[^']|'')*'/,
+            NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/,
+            SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/
+            };
+        """,
+        ["id rexl", "punct .", "id re", "punct =", "punct {",
+         "id NAME", "punct :", r"""regex /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,",
+         "id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,",
+         "id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,",
+         "id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,",
+         "id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""",
+         "punct }", "punct ;"
+         ]),
+
+        ("""
+            rexl.re = {
+            NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/,
+            UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/,
+            QUOTED_LITERAL: /^'(?:[^']|'')*'/,
+            NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/,
+            SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/
+            };
+            str = '"';
+        """,
+        ["id rexl", "punct .", "id re", "punct =", "punct {",
+         "id NAME", "punct :", r"""regex /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,",
+         "id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,",
+         "id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,",
+         "id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,",
+         "id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""",
+         "punct }", "punct ;",
+         "id str", "punct =", """string '"'""", "punct ;",
+         ]),
+
+        (r""" this._js = "e.str(\"" + this.value.replace(/\\/g, "\\\\").replace(/"/g, "\\\"") + "\")"; """,
+         ["keyword this", "punct .", "id _js", "punct =", r'''string "e.str(\""''', "punct +", "keyword this", "punct .",
+          "id value", "punct .", "id replace", "punct (", r"regex /\\/g", "punct ,", r'string "\\\\"', "punct )",
+          "punct .", "id replace", "punct (", r'regex /"/g', "punct ,", r'string "\\\""', "punct )", "punct +",
+          r'string "\")"', "punct ;"]),
+        ]
+
+def make_function(input, toks):
+    def test_func(self):
+        lexer = JsLexer()
+        result = ["%s %s" % (name, tok) for name, tok in lexer.lex(input) if name != 'ws']
+        self.assertListEqual(result, toks)
+    return test_func
+
+for i, (input, toks) in enumerate(JsTokensTest.LEX_CASES):
+    setattr(JsTokensTest, "test_case_%d" % i, make_function(input, toks))
+
+
+GETTEXT_CASES = (
+    (
+        r"""
+            a = 1; /* /[0-9]+/ */
+            b = 0x2a0b / 1; // /[0-9]+/
+            c = 3;
+        """,
+        r"""
+            a = 1; /* /[0-9]+/ */
+            b = 0x2a0b / 1; // /[0-9]+/
+            c = 3;
+        """
+    ), (
+        r"""
+            a = 1.234e-5;
+            /*
+             * /[0-9+/
+             */
+            b = .0123;
+        """,
+        r"""
+            a = 1.234e-5;
+            /*
+             * /[0-9+/
+             */
+            b = .0123;
+        """
+    ), (
+        r"""
+            x = y / z;
+            alert(gettext("hello"));
+            x /= 3;
+        """,
+        r"""
+            x = y / z;
+            alert(gettext("hello"));
+            x /= 3;
+        """
+    ), (
+        r"""
+            s = "Hello \"th/foo/ere\"";
+            s = 'He\x23llo \'th/foo/ere\'';
+            s = 'slash quote \", just quote "';
+        """,
+        r"""
+            s = "Hello \"th/foo/ere\"";
+            s = "He\x23llo \'th/foo/ere\'";
+            s = "slash quote \", just quote \"";
+        """
+    ), (
+        r"""
+            s = "Line continuation\
+            continued /hello/ still the string";/hello/;
+        """,
+        r"""
+            s = "Line continuation\
+            continued /hello/ still the string";"REGEX";
+        """
+    ), (
+        r"""
+            var regex = /pattern/;
+            var regex2 = /matter/gm;
+            var regex3 = /[*/]+/gm.foo("hey");
+        """,
+        r"""
+            var regex = "REGEX";
+            var regex2 = "REGEX";
+            var regex3 = "REGEX".foo("hey");
+        """
+    ), (
+        r"""
+            for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}
+            for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}
+        """,
+        r"""
+            for (var x = a in foo && "</x>" || mot ? z:"REGEX"/i) {xyz(x++);}
+            for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y<"REGEX") {xyz(x++);}
+        """
+    ), (
+        r"""
+            \u1234xyz = gettext('Hello there');
+        """, r"""
+            Uu1234xyz = gettext("Hello there");
+        """
+    )
+)
+
+
+class JsToCForGettextTest(TestCase):
+    pass
+
+def make_function(js, c):
+    def test_func(self):
+        self.assertMultiLineEqual(prepare_js_for_gettext(js), c)
+    return test_func
+
+for i, pair in enumerate(GETTEXT_CASES):
+    setattr(JsToCForGettextTest, "test_case_%d" % i, make_function(*pair))
diff --git a/tests/regressiontests/utils/tests.py b/tests/regressiontests/utils/tests.py
index 2b61627c48..384d394879 100644
--- a/tests/regressiontests/utils/tests.py
+++ b/tests/regressiontests/utils/tests.py
@@ -18,3 +18,4 @@ from datastructures import *
 from tzinfo import *
 from datetime_safe import *
 from baseconv import *
+from jslex import *