django1/django/core/management/commands/makemessages.py

784 lines
29 KiB
Python

import glob
import os
import re
import sys
from functools import total_ordering
from itertools import dropwhile
from pathlib import Path
import django
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.files.temp import NamedTemporaryFile
from django.core.management.base import BaseCommand, CommandError
from django.core.management.utils import (
find_command,
handle_extensions,
is_ignored_path,
popen_wrapper,
)
from django.utils.encoding import DEFAULT_LOCALE_ENCODING
from django.utils.functional import cached_property
from django.utils.jslex import prepare_js_for_gettext
from django.utils.regex_helper import _lazy_re_compile
from django.utils.text import get_text_list
from django.utils.translation import templatize
plural_forms_re = _lazy_re_compile(
r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL
)
STATUS_OK = 0
NO_LOCALE_DIR = object()
def check_programs(*programs):
for program in programs:
if find_command(program) is None:
raise CommandError(
"Can't find %s. Make sure you have GNU gettext tools 0.15 or "
"newer installed." % program
)
def is_valid_locale(locale):
return re.match(r"^[a-z]+$", locale) or re.match(r"^[a-z]+_[A-Z].*$", locale)
@total_ordering
class TranslatableFile:
def __init__(self, dirpath, file_name, locale_dir):
self.file = file_name
self.dirpath = dirpath
self.locale_dir = locale_dir
def __repr__(self):
return "<%s: %s>" % (
self.__class__.__name__,
os.sep.join([self.dirpath, self.file]),
)
def __eq__(self, other):
return self.path == other.path
def __lt__(self, other):
return self.path < other.path
@property
def path(self):
return os.path.join(self.dirpath, self.file)
class BuildFile:
"""
Represent the state of a translatable file during the build process.
"""
def __init__(self, command, domain, translatable):
self.command = command
self.domain = domain
self.translatable = translatable
@cached_property
def is_templatized(self):
if self.domain == "djangojs":
return self.command.gettext_version < (0, 18, 3)
elif self.domain == "django":
file_ext = os.path.splitext(self.translatable.file)[1]
return file_ext != ".py"
return False
@cached_property
def path(self):
return self.translatable.path
@cached_property
def work_path(self):
"""
Path to a file which is being fed into GNU gettext pipeline. This may
be either a translatable or its preprocessed version.
"""
if not self.is_templatized:
return self.path
extension = {
"djangojs": "c",
"django": "py",
}.get(self.domain)
filename = "%s.%s" % (self.translatable.file, extension)
return os.path.join(self.translatable.dirpath, filename)
def preprocess(self):
"""
Preprocess (if necessary) a translatable file before passing it to
xgettext GNU gettext utility.
"""
if not self.is_templatized:
return
with open(self.path, encoding="utf-8") as fp:
src_data = fp.read()
if self.domain == "djangojs":
content = prepare_js_for_gettext(src_data)
elif self.domain == "django":
content = templatize(src_data, origin=self.path[2:])
with open(self.work_path, "w", encoding="utf-8") as fp:
fp.write(content)
def postprocess_messages(self, msgs):
"""
Postprocess messages generated by xgettext GNU gettext utility.
Transform paths as if these messages were generated from original
translatable files rather than from preprocessed versions.
"""
if not self.is_templatized:
return msgs
# Remove '.py' suffix
if os.name == "nt":
# Preserve '.\' prefix on Windows to respect gettext behavior
old_path = self.work_path
new_path = self.path
else:
old_path = self.work_path[2:]
new_path = self.path[2:]
return re.sub(
r"^(#: .*)(" + re.escape(old_path) + r")",
lambda match: match[0].replace(old_path, new_path),
msgs,
flags=re.MULTILINE,
)
def cleanup(self):
"""
Remove a preprocessed copy of a translatable file (if any).
"""
if self.is_templatized:
# This check is needed for the case of a symlinked file and its
# source being processed inside a single group (locale dir);
# removing either of those two removes both.
if os.path.exists(self.work_path):
os.unlink(self.work_path)
def normalize_eols(raw_contents):
"""
Take a block of raw text that will be passed through str.splitlines() to
get universal newlines treatment.
Return the resulting block of text with normalized `\n` EOL sequences ready
to be written to disk using current platform's native EOLs.
"""
lines_list = raw_contents.splitlines()
# Ensure last line has its EOL
if lines_list and lines_list[-1]:
lines_list.append("")
return "\n".join(lines_list)
def write_pot_file(potfile, msgs):
"""
Write the `potfile` with the `msgs` contents, making sure its format is
valid.
"""
pot_lines = msgs.splitlines()
if os.path.exists(potfile):
# Strip the header
lines = dropwhile(len, pot_lines)
else:
lines = []
found, header_read = False, False
for line in pot_lines:
if not found and not header_read:
if "charset=CHARSET" in line:
found = True
line = line.replace("charset=CHARSET", "charset=UTF-8")
if not line and not found:
header_read = True
lines.append(line)
msgs = "\n".join(lines)
# Force newlines of POT files to '\n' to work around
# https://savannah.gnu.org/bugs/index.php?52395
with open(potfile, "a", encoding="utf-8", newline="\n") as fp:
fp.write(msgs)
class Command(BaseCommand):
help = (
"Runs over the entire source tree of the current directory and pulls out all "
"strings marked for translation. It creates (or updates) a message file in the "
"conf/locale (in the django tree) or locale (for projects and applications) "
"directory.\n\nYou must run this command with one of either the --locale, "
"--exclude, or --all options."
)
translatable_file_class = TranslatableFile
build_file_class = BuildFile
requires_system_checks = []
msgmerge_options = ["-q", "--backup=none", "--previous", "--update"]
msguniq_options = ["--to-code=utf-8"]
msgattrib_options = ["--no-obsolete"]
xgettext_options = ["--from-code=UTF-8", "--add-comments=Translators"]
def add_arguments(self, parser):
parser.add_argument(
"--locale",
"-l",
default=[],
action="append",
help=(
"Creates or updates the message files for the given locale(s) (e.g. "
"pt_BR). Can be used multiple times."
),
)
parser.add_argument(
"--exclude",
"-x",
default=[],
action="append",
help="Locales to exclude. Default is none. Can be used multiple times.",
)
parser.add_argument(
"--domain",
"-d",
default="django",
help='The domain of the message files (default: "django").',
)
parser.add_argument(
"--all",
"-a",
action="store_true",
help="Updates the message files for all existing locales.",
)
parser.add_argument(
"--extension",
"-e",
dest="extensions",
action="append",
help='The file extension(s) to examine (default: "html,txt,py", or "js" '
'if the domain is "djangojs"). Separate multiple extensions with '
"commas, or use -e multiple times.",
)
parser.add_argument(
"--symlinks",
"-s",
action="store_true",
help="Follows symlinks to directories when examining source code "
"and templates for translation strings.",
)
parser.add_argument(
"--ignore",
"-i",
action="append",
dest="ignore_patterns",
default=[],
metavar="PATTERN",
help="Ignore files or directories matching this glob-style pattern. "
"Use multiple times to ignore more.",
)
parser.add_argument(
"--no-default-ignore",
action="store_false",
dest="use_default_ignore_patterns",
help=(
"Don't ignore the common glob-style patterns 'CVS', '.*', '*~' and "
"'*.pyc'."
),
)
parser.add_argument(
"--no-wrap",
action="store_true",
help="Don't break long message lines into several lines.",
)
parser.add_argument(
"--no-location",
action="store_true",
help="Don't write '#: filename:line' lines.",
)
parser.add_argument(
"--add-location",
choices=("full", "file", "never"),
const="full",
nargs="?",
help=(
"Controls '#: filename:line' lines. If the option is 'full' "
"(the default if not given), the lines include both file name "
"and line number. If it's 'file', the line number is omitted. If "
"it's 'never', the lines are suppressed (same as --no-location). "
"--add-location requires gettext 0.19 or newer."
),
)
parser.add_argument(
"--no-obsolete",
action="store_true",
help="Remove obsolete message strings.",
)
parser.add_argument(
"--keep-pot",
action="store_true",
help="Keep .pot file after making messages. Useful when debugging.",
)
def handle(self, *args, **options):
locale = options["locale"]
exclude = options["exclude"]
self.domain = options["domain"]
self.verbosity = options["verbosity"]
process_all = options["all"]
extensions = options["extensions"]
self.symlinks = options["symlinks"]
ignore_patterns = options["ignore_patterns"]
if options["use_default_ignore_patterns"]:
ignore_patterns += ["CVS", ".*", "*~", "*.pyc"]
self.ignore_patterns = list(set(ignore_patterns))
# Avoid messing with mutable class variables
if options["no_wrap"]:
self.msgmerge_options = self.msgmerge_options[:] + ["--no-wrap"]
self.msguniq_options = self.msguniq_options[:] + ["--no-wrap"]
self.msgattrib_options = self.msgattrib_options[:] + ["--no-wrap"]
self.xgettext_options = self.xgettext_options[:] + ["--no-wrap"]
if options["no_location"]:
self.msgmerge_options = self.msgmerge_options[:] + ["--no-location"]
self.msguniq_options = self.msguniq_options[:] + ["--no-location"]
self.msgattrib_options = self.msgattrib_options[:] + ["--no-location"]
self.xgettext_options = self.xgettext_options[:] + ["--no-location"]
if options["add_location"]:
if self.gettext_version < (0, 19):
raise CommandError(
"The --add-location option requires gettext 0.19 or later. "
"You have %s." % ".".join(str(x) for x in self.gettext_version)
)
arg_add_location = "--add-location=%s" % options["add_location"]
self.msgmerge_options = self.msgmerge_options[:] + [arg_add_location]
self.msguniq_options = self.msguniq_options[:] + [arg_add_location]
self.msgattrib_options = self.msgattrib_options[:] + [arg_add_location]
self.xgettext_options = self.xgettext_options[:] + [arg_add_location]
self.no_obsolete = options["no_obsolete"]
self.keep_pot = options["keep_pot"]
if self.domain not in ("django", "djangojs"):
raise CommandError(
"currently makemessages only supports domains "
"'django' and 'djangojs'"
)
if self.domain == "djangojs":
exts = extensions or ["js"]
else:
exts = extensions or ["html", "txt", "py"]
self.extensions = handle_extensions(exts)
if (not locale and not exclude and not process_all) or self.domain is None:
raise CommandError(
"Type '%s help %s' for usage information."
% (os.path.basename(sys.argv[0]), sys.argv[1])
)
if self.verbosity > 1:
self.stdout.write(
"examining files with the extensions: %s"
% get_text_list(list(self.extensions), "and")
)
self.invoked_for_django = False
self.locale_paths = []
self.default_locale_path = None
if os.path.isdir(os.path.join("conf", "locale")):
self.locale_paths = [os.path.abspath(os.path.join("conf", "locale"))]
self.default_locale_path = self.locale_paths[0]
self.invoked_for_django = True
else:
if self.settings_available:
self.locale_paths.extend(settings.LOCALE_PATHS)
# Allow to run makemessages inside an app dir
if os.path.isdir("locale"):
self.locale_paths.append(os.path.abspath("locale"))
if self.locale_paths:
self.default_locale_path = self.locale_paths[0]
os.makedirs(self.default_locale_path, exist_ok=True)
# Build locale list
looks_like_locale = re.compile(r"[a-z]{2}")
locale_dirs = filter(
os.path.isdir, glob.glob("%s/*" % self.default_locale_path)
)
all_locales = [
lang_code
for lang_code in map(os.path.basename, locale_dirs)
if looks_like_locale.match(lang_code)
]
# Account for excluded locales
if process_all:
locales = all_locales
else:
locales = locale or all_locales
locales = set(locales).difference(exclude)
if locales:
check_programs("msguniq", "msgmerge", "msgattrib")
check_programs("xgettext")
try:
potfiles = self.build_potfiles()
# Build po files for each selected locale
for locale in locales:
if not is_valid_locale(locale):
# Try to guess what valid locale it could be
# Valid examples are: en_GB, shi_Latn_MA and nl_NL-x-informal
# Search for characters followed by a non character (i.e. separator)
match = re.match(
r"^(?P<language>[a-zA-Z]+)"
r"(?P<separator>[^a-zA-Z])"
r"(?P<territory>.+)$",
locale,
)
if match:
locale_parts = match.groupdict()
language = locale_parts["language"].lower()
territory = (
locale_parts["territory"][:2].upper()
+ locale_parts["territory"][2:]
)
proposed_locale = f"{language}_{territory}"
else:
# It could be a language in uppercase
proposed_locale = locale.lower()
# Recheck if the proposed locale is valid
if is_valid_locale(proposed_locale):
self.stdout.write(
"invalid locale %s, did you mean %s?"
% (
locale,
proposed_locale,
),
)
else:
self.stdout.write("invalid locale %s" % locale)
continue
if self.verbosity > 0:
self.stdout.write("processing locale %s" % locale)
for potfile in potfiles:
self.write_po_file(potfile, locale)
finally:
if not self.keep_pot:
self.remove_potfiles()
@cached_property
def gettext_version(self):
# Gettext tools will output system-encoded bytestrings instead of UTF-8,
# when looking up the version. It's especially a problem on Windows.
out, err, status = popen_wrapper(
["xgettext", "--version"],
stdout_encoding=DEFAULT_LOCALE_ENCODING,
)
m = re.search(r"(\d+)\.(\d+)\.?(\d+)?", out)
if m:
return tuple(int(d) for d in m.groups() if d is not None)
else:
raise CommandError("Unable to get gettext version. Is it installed?")
@cached_property
def settings_available(self):
try:
settings.LOCALE_PATHS
except ImproperlyConfigured:
if self.verbosity > 1:
self.stderr.write("Running without configured settings.")
return False
return True
def build_potfiles(self):
"""
Build pot files and apply msguniq to them.
"""
file_list = self.find_files(".")
self.remove_potfiles()
self.process_files(file_list)
potfiles = []
for path in self.locale_paths:
potfile = os.path.join(path, "%s.pot" % self.domain)
if not os.path.exists(potfile):
continue
args = ["msguniq"] + self.msguniq_options + [potfile]
msgs, errors, status = popen_wrapper(args)
if errors:
if status != STATUS_OK:
raise CommandError(
"errors happened while running msguniq\n%s" % errors
)
elif self.verbosity > 0:
self.stdout.write(errors)
msgs = normalize_eols(msgs)
with open(potfile, "w", encoding="utf-8") as fp:
fp.write(msgs)
potfiles.append(potfile)
return potfiles
def remove_potfiles(self):
for path in self.locale_paths:
pot_path = os.path.join(path, "%s.pot" % self.domain)
if os.path.exists(pot_path):
os.unlink(pot_path)
def find_files(self, root):
"""
Get all files in the given root. Also check that there is a matching
locale dir for each file.
"""
all_files = []
ignored_roots = []
if self.settings_available:
ignored_roots = [
os.path.normpath(p)
for p in (settings.MEDIA_ROOT, settings.STATIC_ROOT)
if p
]
for dirpath, dirnames, filenames in os.walk(
root, topdown=True, followlinks=self.symlinks
):
for dirname in dirnames[:]:
if (
is_ignored_path(
os.path.normpath(os.path.join(dirpath, dirname)),
self.ignore_patterns,
)
or os.path.join(os.path.abspath(dirpath), dirname) in ignored_roots
):
dirnames.remove(dirname)
if self.verbosity > 1:
self.stdout.write("ignoring directory %s" % dirname)
elif dirname == "locale":
dirnames.remove(dirname)
self.locale_paths.insert(
0, os.path.join(os.path.abspath(dirpath), dirname)
)
for filename in filenames:
file_path = os.path.normpath(os.path.join(dirpath, filename))
file_ext = os.path.splitext(filename)[1]
if file_ext not in self.extensions or is_ignored_path(
file_path, self.ignore_patterns
):
if self.verbosity > 1:
self.stdout.write(
"ignoring file %s in %s" % (filename, dirpath)
)
else:
locale_dir = None
for path in self.locale_paths:
if os.path.abspath(dirpath).startswith(os.path.dirname(path)):
locale_dir = path
break
locale_dir = locale_dir or self.default_locale_path or NO_LOCALE_DIR
all_files.append(
self.translatable_file_class(dirpath, filename, locale_dir)
)
return sorted(all_files)
def process_files(self, file_list):
"""
Group translatable files by locale directory and run pot file build
process for each group.
"""
file_groups = {}
for translatable in file_list:
file_group = file_groups.setdefault(translatable.locale_dir, [])
file_group.append(translatable)
for locale_dir, files in file_groups.items():
self.process_locale_dir(locale_dir, files)
def process_locale_dir(self, locale_dir, files):
"""
Extract translatable literals from the specified files, creating or
updating the POT file for a given locale directory.
Use the xgettext GNU gettext utility.
"""
build_files = []
for translatable in files:
if self.verbosity > 1:
self.stdout.write(
"processing file %s in %s"
% (translatable.file, translatable.dirpath)
)
if self.domain not in ("djangojs", "django"):
continue
build_file = self.build_file_class(self, self.domain, translatable)
try:
build_file.preprocess()
except UnicodeDecodeError as e:
self.stdout.write(
"UnicodeDecodeError: skipped file %s in %s (reason: %s)"
% (
translatable.file,
translatable.dirpath,
e,
)
)
continue
except BaseException:
# Cleanup before exit.
for build_file in build_files:
build_file.cleanup()
raise
build_files.append(build_file)
if self.domain == "djangojs":
is_templatized = build_file.is_templatized
args = [
"xgettext",
"-d",
self.domain,
"--language=%s" % ("C" if is_templatized else "JavaScript",),
"--keyword=gettext_noop",
"--keyword=gettext_lazy",
"--keyword=ngettext_lazy:1,2",
"--keyword=pgettext:1c,2",
"--keyword=npgettext:1c,2,3",
"--output=-",
]
elif self.domain == "django":
args = [
"xgettext",
"-d",
self.domain,
"--language=Python",
"--keyword=gettext_noop",
"--keyword=gettext_lazy",
"--keyword=ngettext_lazy:1,2",
"--keyword=pgettext:1c,2",
"--keyword=npgettext:1c,2,3",
"--keyword=pgettext_lazy:1c,2",
"--keyword=npgettext_lazy:1c,2,3",
"--output=-",
]
else:
return
input_files = [bf.work_path for bf in build_files]
with NamedTemporaryFile(mode="w+") as input_files_list:
input_files_list.write("\n".join(input_files))
input_files_list.flush()
args.extend(["--files-from", input_files_list.name])
args.extend(self.xgettext_options)
msgs, errors, status = popen_wrapper(args)
if errors:
if status != STATUS_OK:
for build_file in build_files:
build_file.cleanup()
raise CommandError(
"errors happened while running xgettext on %s\n%s"
% ("\n".join(input_files), errors)
)
elif self.verbosity > 0:
# Print warnings
self.stdout.write(errors)
if msgs:
if locale_dir is NO_LOCALE_DIR:
for build_file in build_files:
build_file.cleanup()
file_path = os.path.normpath(build_files[0].path)
raise CommandError(
"Unable to find a locale path to store translations for "
"file %s. Make sure the 'locale' directory exists in an "
"app or LOCALE_PATHS setting is set." % file_path
)
for build_file in build_files:
msgs = build_file.postprocess_messages(msgs)
potfile = os.path.join(locale_dir, "%s.pot" % self.domain)
write_pot_file(potfile, msgs)
for build_file in build_files:
build_file.cleanup()
def write_po_file(self, potfile, locale):
"""
Create or update the PO file for self.domain and `locale`.
Use contents of the existing `potfile`.
Use msgmerge and msgattrib GNU gettext utilities.
"""
basedir = os.path.join(os.path.dirname(potfile), locale, "LC_MESSAGES")
os.makedirs(basedir, exist_ok=True)
pofile = os.path.join(basedir, "%s.po" % self.domain)
if os.path.exists(pofile):
args = ["msgmerge"] + self.msgmerge_options + [pofile, potfile]
_, errors, status = popen_wrapper(args)
if errors:
if status != STATUS_OK:
raise CommandError(
"errors happened while running msgmerge\n%s" % errors
)
elif self.verbosity > 0:
self.stdout.write(errors)
msgs = Path(pofile).read_text(encoding="utf-8")
else:
with open(potfile, encoding="utf-8") as fp:
msgs = fp.read()
if not self.invoked_for_django:
msgs = self.copy_plural_forms(msgs, locale)
msgs = normalize_eols(msgs)
msgs = msgs.replace(
"#. #-#-#-#-# %s.pot (PACKAGE VERSION) #-#-#-#-#\n" % self.domain, ""
)
with open(pofile, "w", encoding="utf-8") as fp:
fp.write(msgs)
if self.no_obsolete:
args = ["msgattrib"] + self.msgattrib_options + ["-o", pofile, pofile]
msgs, errors, status = popen_wrapper(args)
if errors:
if status != STATUS_OK:
raise CommandError(
"errors happened while running msgattrib\n%s" % errors
)
elif self.verbosity > 0:
self.stdout.write(errors)
def copy_plural_forms(self, msgs, locale):
"""
Copy plural forms header contents from a Django catalog of locale to
the msgs string, inserting it at the right place. msgs should be the
contents of a newly created .po file.
"""
django_dir = os.path.normpath(os.path.join(os.path.dirname(django.__file__)))
if self.domain == "djangojs":
domains = ("djangojs", "django")
else:
domains = ("django",)
for domain in domains:
django_po = os.path.join(
django_dir, "conf", "locale", locale, "LC_MESSAGES", "%s.po" % domain
)
if os.path.exists(django_po):
with open(django_po, encoding="utf-8") as fp:
m = plural_forms_re.search(fp.read())
if m:
plural_form_line = m["value"]
if self.verbosity > 1:
self.stdout.write("copying plural forms: %s" % plural_form_line)
lines = []
found = False
for line in msgs.splitlines():
if not found and (not line or plural_forms_re.search(line)):
line = plural_form_line
found = True
lines.append(line)
msgs = "\n".join(lines)
break
return msgs