diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst
index a4695547921faa..c5882c273717c0 100644
--- a/Doc/library/argparse.rst
+++ b/Doc/library/argparse.rst
@@ -192,6 +192,12 @@ arguments it contains. The default message can be overridden with the
The ``%(prog)s`` format specifier is available to fill in the program name in
your usage messages.
+When a custom usage message is specified for the main parser, you may also want to
+consider passing the ``prog`` argument to :meth:`~ArgumentParser.add_subparsers`
+or the ``prog`` and the ``usage`` arguments to
+:meth:`~_SubParsersAction.add_parser`, to ensure consistent command prefixes and
+usage information across subparsers.
+
.. _description:
@@ -1810,6 +1816,10 @@ Sub-commands
.. versionchanged:: 3.7
New *required* keyword-only parameter.
+ .. versionchanged:: 3.14
+ Subparser's *prog* is no longer affected by a custom usage message in
+ the main parser.
+
FileType objects
^^^^^^^^^^^^^^^^
diff --git a/Lib/argparse.py b/Lib/argparse.py
index 5ecfdca17175e3..f5a7342c2fc355 100644
--- a/Lib/argparse.py
+++ b/Lib/argparse.py
@@ -1889,7 +1889,7 @@ def add_subparsers(self, **kwargs):
formatter = self._get_formatter()
positionals = self._get_positional_actions()
groups = self._mutually_exclusive_groups
- formatter.add_usage(self.usage, positionals, groups, '')
+ formatter.add_usage(None, positionals, groups, '')
kwargs['prog'] = formatter.format_help().strip()
# create the parsers action and add it to the positionals list
diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py
index 90281b47274299..eab16ea3e2889f 100644
--- a/Lib/test/support/strace_helper.py
+++ b/Lib/test/support/strace_helper.py
@@ -71,6 +71,27 @@ def sections(self):
return sections
+def _filter_memory_call(call):
+ # mmap can operate on a fd or "MAP_ANONYMOUS" which gives a block of memory.
+ # Ignore "MAP_ANONYMOUS + the "MAP_ANON" alias.
+ if call.syscall == "mmap" and "MAP_ANON" in call.args[3]:
+ return True
+
+ if call.syscall in ("munmap", "mprotect"):
+ return True
+
+ return False
+
+
+def filter_memory(syscalls):
+ """Filter out memory allocation calls from File I/O calls.
+
+ Some calls (mmap, munmap, etc) can be used on files or to just get a block
+ of memory. Use this function to filter out the memory related calls from
+ other calls."""
+
+ return [call for call in syscalls if not _filter_memory_call(call)]
+
@support.requires_subprocess()
def strace_python(code, strace_flags, check=True):
@@ -93,8 +114,6 @@ def _make_error(reason, details):
"-c",
textwrap.dedent(code),
__run_using_command=[_strace_binary] + strace_flags,
- # Don't want to trace our JIT's own mmap and mprotect calls:
- PYTHON_JIT="0",
)
except OSError as err:
return _make_error("Caught OSError", err)
@@ -145,9 +164,14 @@ def get_events(code, strace_flags, prelude, cleanup):
return all_sections['code']
-def get_syscalls(code, strace_flags, prelude="", cleanup=""):
+def get_syscalls(code, strace_flags, prelude="", cleanup="",
+ ignore_memory=True):
"""Get the syscalls which a given chunk of python code generates"""
events = get_events(code, strace_flags, prelude=prelude, cleanup=cleanup)
+
+ if ignore_memory:
+ events = filter_memory(events)
+
return [ev.syscall for ev in events]
@@ -177,5 +201,5 @@ def requires_strace():
return unittest.skipUnless(_can_strace(), "Requires working strace")
-__all__ = ["get_events", "get_syscalls", "requires_strace", "strace_python",
- "StraceEvent", "StraceResult"]
+__all__ = ["filter_memory", "get_events", "get_syscalls", "requires_strace",
+ "strace_python", "StraceEvent", "StraceResult"]
diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py
index 358cfb1c56aae4..69243fde5f0f98 100644
--- a/Lib/test/test_argparse.py
+++ b/Lib/test/test_argparse.py
@@ -2409,16 +2409,17 @@ def assertArgumentParserError(self, *args, **kwargs):
self.assertRaises(ArgumentParserError, *args, **kwargs)
def _get_parser(self, subparser_help=False, prefix_chars=None,
- aliases=False):
+ aliases=False, usage=None):
# create a parser with a subparsers argument
if prefix_chars:
parser = ErrorRaisingArgumentParser(
- prog='PROG', description='main description', prefix_chars=prefix_chars)
+ prog='PROG', description='main description', usage=usage,
+ prefix_chars=prefix_chars)
parser.add_argument(
prefix_chars[0] * 2 + 'foo', action='store_true', help='foo help')
else:
parser = ErrorRaisingArgumentParser(
- prog='PROG', description='main description')
+ prog='PROG', description='main description', usage=usage)
parser.add_argument(
'--foo', action='store_true', help='foo help')
parser.add_argument(
@@ -2455,7 +2456,8 @@ def _get_parser(self, subparser_help=False, prefix_chars=None,
parser2.add_argument('z', type=complex, nargs='*', help='z help')
# add third sub-parser
- parser3_kwargs = dict(description='3 description')
+ parser3_kwargs = dict(description='3 description',
+ usage='PROG --foo bar 3 t ...')
if subparser_help:
parser3_kwargs['help'] = '3 help'
parser3 = subparsers.add_parser('3', **parser3_kwargs)
@@ -2477,6 +2479,47 @@ def test_parse_args_failures(self):
args = args_str.split()
self.assertArgumentParserError(self.parser.parse_args, args)
+ def test_parse_args_failures_details(self):
+ for args_str, usage_str, error_str in [
+ ('',
+ 'usage: PROG [-h] [--foo] bar {1,2,3} ...',
+ 'PROG: error: the following arguments are required: bar'),
+ ('0.5 1 -y',
+ 'usage: PROG bar 1 [-h] [-w W] {a,b,c}',
+ 'PROG bar 1: error: the following arguments are required: x'),
+ ('0.5 3',
+ 'usage: PROG --foo bar 3 t ...',
+ 'PROG bar 3: error: the following arguments are required: t'),
+ ]:
+ with self.subTest(args_str):
+ args = args_str.split()
+ with self.assertRaises(ArgumentParserError) as cm:
+ self.parser.parse_args(args)
+ self.assertEqual(cm.exception.args[0], 'SystemExit')
+ self.assertEqual(cm.exception.args[2], f'{usage_str}\n{error_str}\n')
+
+ def test_parse_args_failures_details_custom_usage(self):
+ parser = self._get_parser(usage='PROG [--foo] bar 1 [-w W] {a,b,c}\n'
+ ' PROG --foo bar 3 t ...')
+ for args_str, usage_str, error_str in [
+ ('',
+ 'usage: PROG [--foo] bar 1 [-w W] {a,b,c}\n'
+ ' PROG --foo bar 3 t ...',
+ 'PROG: error: the following arguments are required: bar'),
+ ('0.5 1 -y',
+ 'usage: PROG bar 1 [-h] [-w W] {a,b,c}',
+ 'PROG bar 1: error: the following arguments are required: x'),
+ ('0.5 3',
+ 'usage: PROG --foo bar 3 t ...',
+ 'PROG bar 3: error: the following arguments are required: t'),
+ ]:
+ with self.subTest(args_str):
+ args = args_str.split()
+ with self.assertRaises(ArgumentParserError) as cm:
+ parser.parse_args(args)
+ self.assertEqual(cm.exception.args[0], 'SystemExit')
+ self.assertEqual(cm.exception.args[2], f'{usage_str}\n{error_str}\n')
+
def test_parse_args(self):
# check some non-failure cases:
self.assertEqual(
diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py
index d60aabcdf1ae22..e681417e15d34b 100644
--- a/Lib/test/test_fileio.py
+++ b/Lib/test/test_fileio.py
@@ -364,8 +364,7 @@ def testErrnoOnClosedReadinto(self, f):
@strace_helper.requires_strace()
def test_syscalls_read(self):
- """Check that the set of system calls produced by the I/O stack is what
- is expected for various read cases.
+ """Check set of system calls during common I/O patterns
It's expected as bits of the I/O implementation change, this will need
to change. The goal is to catch changes that unintentionally add
@@ -383,6 +382,11 @@ def check_readall(name, code, prelude="", cleanup="",
prelude=prelude,
cleanup=cleanup)
+ # Some system calls (ex. mmap) can be used for both File I/O and
+ # memory allocation. Filter out the ones used for memory
+ # allocation.
+ syscalls = strace_helper.filter_memory(syscalls)
+
# The first call should be an open that returns a
# file descriptor (fd). Afer that calls may vary. Once the file
# is opened, check calls refer to it by fd as the filename
diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot
index ddfbd18349ef4f..8d66fbc4f3a937 100644
--- a/Lib/test/test_tools/i18n_data/messages.pot
+++ b/Lib/test/test_tools/i18n_data/messages.pot
@@ -15,53 +15,75 @@ msgstr ""
"Generated-By: pygettext.py 1.5\n"
-#: messages.py:5
+#: messages.py:16
msgid ""
msgstr ""
-#: messages.py:8 messages.py:9
+#: messages.py:19 messages.py:20
msgid "parentheses"
msgstr ""
-#: messages.py:12
+#: messages.py:23
msgid "Hello, world!"
msgstr ""
-#: messages.py:15
+#: messages.py:26
msgid ""
"Hello,\n"
" multiline!\n"
msgstr ""
-#: messages.py:29
+#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94
+#: messages.py:99
+msgid "foo"
+msgid_plural "foos"
+msgstr[0] ""
+msgstr[1] ""
+
+#: messages.py:47
+msgid "something"
+msgstr ""
+
+#: messages.py:50
msgid "Hello, {}!"
msgstr ""
-#: messages.py:33
+#: messages.py:54
msgid "1"
msgstr ""
-#: messages.py:33
+#: messages.py:54
msgid "2"
msgstr ""
-#: messages.py:34 messages.py:35
+#: messages.py:55 messages.py:56
msgid "A"
msgstr ""
-#: messages.py:34 messages.py:35
+#: messages.py:55 messages.py:56
msgid "B"
msgstr ""
-#: messages.py:36
+#: messages.py:57
msgid "set"
msgstr ""
-#: messages.py:42
+#: messages.py:63
msgid "nested string"
msgstr ""
-#: messages.py:47
+#: messages.py:68
msgid "baz"
msgstr ""
+#: messages.py:91 messages.py:92 messages.py:95 messages.py:96
+msgctxt "context"
+msgid "foo"
+msgid_plural "foos"
+msgstr[0] ""
+msgstr[1] ""
+
+#: messages.py:100
+msgid "domain foo"
+msgstr ""
+
diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py
index f220294b8d5c67..1e03f4e556830d 100644
--- a/Lib/test/test_tools/i18n_data/messages.py
+++ b/Lib/test/test_tools/i18n_data/messages.py
@@ -1,5 +1,16 @@
# Test message extraction
-from gettext import gettext as _
+from gettext import (
+ gettext,
+ ngettext,
+ pgettext,
+ npgettext,
+ dgettext,
+ dngettext,
+ dpgettext,
+ dnpgettext
+)
+
+_ = gettext
# Empty string
_("")
@@ -21,13 +32,23 @@
_(None)
_(1)
_(False)
-_(x="kwargs are not allowed")
+_(("invalid"))
+_(["invalid"])
+_({"invalid"})
+_("string"[3])
+_("string"[:3])
+_({"string": "foo"})
+
+# pygettext does not allow keyword arguments, but both xgettext and pybabel do
+_(x="kwargs work!")
+
+# Unusual, but valid arguments
_("foo", "bar")
_("something", x="something else")
# .format()
_("Hello, {}!").format("world") # valid
-_("Hello, {}!".format("world")) # invalid
+_("Hello, {}!".format("world")) # invalid, but xgettext and pybabel extract the first string
# Nested structures
_("1"), _("2")
@@ -62,3 +83,28 @@ def _(x):
def _(x="don't extract me"):
pass
+
+
+# Other gettext functions
+gettext("foo")
+ngettext("foo", "foos", 1)
+pgettext("context", "foo")
+npgettext("context", "foo", "foos", 1)
+dgettext("domain", "foo")
+dngettext("domain", "foo", "foos", 1)
+dpgettext("domain", "context", "foo")
+dnpgettext("domain", "context", "foo", "foos", 1)
+
+# Complex arguments
+ngettext("foo", "foos", 42 + (10 - 20))
+dgettext(["some", {"complex"}, ("argument",)], "domain foo")
+
+# Invalid calls which are not extracted
+gettext()
+ngettext('foo')
+pgettext('context')
+npgettext('context', 'foo')
+dgettext('domain')
+dngettext('domain', 'foo')
+dpgettext('domain', 'context')
+dnpgettext('domain', 'context', 'foo')
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index 6f71f0976819f1..29c3423e234d20 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -332,14 +332,14 @@ def test_calls_in_fstring_with_multiple_args(self):
msgids = self.extract_docstrings_from_str(dedent('''\
f"{_('foo', 'bar')}"
'''))
- self.assertNotIn('foo', msgids)
+ self.assertIn('foo', msgids)
self.assertNotIn('bar', msgids)
def test_calls_in_fstring_with_keyword_args(self):
msgids = self.extract_docstrings_from_str(dedent('''\
f"{_('foo', bar='baz')}"
'''))
- self.assertNotIn('foo', msgids)
+ self.assertIn('foo', msgids)
self.assertNotIn('bar', msgids)
self.assertNotIn('baz', msgids)
diff --git a/Lib/test/translationdata/argparse/msgids.txt b/Lib/test/translationdata/argparse/msgids.txt
index 2b012906436e85..ae89ac74726ecf 100644
--- a/Lib/test/translationdata/argparse/msgids.txt
+++ b/Lib/test/translationdata/argparse/msgids.txt
@@ -8,6 +8,8 @@ argument %(argument_name)s: %(message)s
argument '%(argument_name)s' is deprecated
can't open '%(filename)s': %(error)s
command '%(parser_name)s' is deprecated
+conflicting option string: %s
+expected %s argument
expected at least one argument
expected at most one argument
expected one argument
diff --git a/Lib/test/translationdata/optparse/msgids.txt b/Lib/test/translationdata/optparse/msgids.txt
index ac5317c736af8c..8f405a2bf26dbe 100644
--- a/Lib/test/translationdata/optparse/msgids.txt
+++ b/Lib/test/translationdata/optparse/msgids.txt
@@ -1,3 +1,4 @@
+%(option)s option requires %(number)d argument
%prog [options]
%s option does not take a value
Options
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst
index 45bcb0ac6fe7c6..68b8b1d37cffc1 100644
--- a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst
@@ -1 +1 @@
-Remove redundant check on small ints in ``long_dealloc``
+Slightly optimize the :class:`int` deallocator by removing a redundant check.
diff --git a/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst b/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst
new file mode 100644
index 00000000000000..9ac155770e2254
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst
@@ -0,0 +1,2 @@
+The ``usage`` parameter of :class:`argparse.ArgumentParser` no longer
+affects the default value of the ``prog`` parameter in subparsers.
diff --git a/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst b/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst
new file mode 100644
index 00000000000000..83457da467ffa9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst
@@ -0,0 +1,2 @@
+Fix crash when calling a :func:`operator.methodcaller` instance from
+multiple threads in the free threading build.
diff --git a/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst b/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst
new file mode 100644
index 00000000000000..39323604bbef56
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst
@@ -0,0 +1,2 @@
+Filter out memory-related ``mmap``, ``munmap``, and ``mprotect`` calls from
+file-related ones when testing :mod:`io` behavior using strace.
diff --git a/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst b/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst
new file mode 100644
index 00000000000000..c08ad9d7059904
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst
@@ -0,0 +1 @@
+Add support for multi-argument :mod:`gettext` functions in :program:`pygettext.py`.
diff --git a/Modules/_operator.c b/Modules/_operator.c
index 7e0d1f3df87e4d..6c1945174ab7cd 100644
--- a/Modules/_operator.c
+++ b/Modules/_operator.c
@@ -1602,6 +1602,7 @@ typedef struct {
vectorcallfunc vectorcall;
} methodcallerobject;
+#ifndef Py_GIL_DISABLED
static int _methodcaller_initialize_vectorcall(methodcallerobject* mc)
{
PyObject* args = mc->xargs;
@@ -1664,6 +1665,7 @@ methodcaller_vectorcall(
(PyTuple_GET_SIZE(mc->xargs)) | PY_VECTORCALL_ARGUMENTS_OFFSET,
mc->vectorcall_kwnames);
}
+#endif
/* AC 3.5: variable number of arguments, not currently support by AC */
@@ -1703,7 +1705,14 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
mc->vectorcall_args = 0;
+#ifdef Py_GIL_DISABLED
+ // gh-127065: The current implementation of methodcaller_vectorcall
+ // is not thread-safe because it modifies the `vectorcall_args` array,
+ // which is shared across calls.
+ mc->vectorcall = NULL;
+#else
mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall;
+#endif
PyObject_GC_Track(mc);
return (PyObject *)mc;
diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters
index 09a5f4d30ef490..3842f52e514bb4 100644
--- a/PCbuild/_freeze_module.vcxproj.filters
+++ b/PCbuild/_freeze_module.vcxproj.filters
@@ -239,6 +239,8 @@
Source Files
+ Source Files
+
Source Files
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 0d16e8f7da0071..f78ff16bff9039 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -163,16 +163,13 @@
import time
import getopt
import ast
-import token
import tokenize
+from collections import defaultdict
+from dataclasses import dataclass, field
+from operator import itemgetter
__version__ = '1.5'
-default_keywords = ['_']
-DEFAULTKEYWORDS = ', '.join(default_keywords)
-
-EMPTYSTRING = ''
-
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
# there.
@@ -306,12 +303,64 @@ def getFilesForName(name):
return []
+# Key is the function name, value is a dictionary mapping argument positions to the
+# type of the argument. The type is one of 'msgid', 'msgid_plural', or 'msgctxt'.
+DEFAULTKEYWORDS = {
+ '_': {0: 'msgid'},
+ 'gettext': {0: 'msgid'},
+ 'ngettext': {0: 'msgid', 1: 'msgid_plural'},
+ 'pgettext': {0: 'msgctxt', 1: 'msgid'},
+ 'npgettext': {0: 'msgctxt', 1: 'msgid', 2: 'msgid_plural'},
+ 'dgettext': {1: 'msgid'},
+ 'dngettext': {1: 'msgid', 2: 'msgid_plural'},
+ 'dpgettext': {1: 'msgctxt', 2: 'msgid'},
+ 'dnpgettext': {1: 'msgctxt', 2: 'msgid', 3: 'msgid_plural'},
+}
+
+
+def matches_spec(message, spec):
+ """Check if a message has all the keys defined by the keyword spec."""
+ return all(key in message for key in spec.values())
+
+
+@dataclass(frozen=True)
+class Location:
+ filename: str
+ lineno: int
+
+ def __lt__(self, other):
+ return (self.filename, self.lineno) < (other.filename, other.lineno)
+
+
+@dataclass
+class Message:
+ msgid: str
+ msgid_plural: str | None
+ msgctxt: str | None
+ locations: set[Location] = field(default_factory=set)
+ is_docstring: bool = False
+
+ def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False):
+ if self.msgid_plural is None:
+ self.msgid_plural = msgid_plural
+ self.locations.add(Location(filename, lineno))
+ self.is_docstring |= is_docstring
+
+
+def key_for(msgid, msgctxt=None):
+ if msgctxt is not None:
+ return (msgctxt, msgid)
+ return msgid
+
+
class TokenEater:
def __init__(self, options):
self.__options = options
self.__messages = {}
self.__state = self.__waiting
- self.__data = []
+ self.__data = defaultdict(str)
+ self.__curr_arg = 0
+ self.__curr_keyword = None
self.__lineno = -1
self.__freshmodule = 1
self.__curfile = None
@@ -331,7 +380,7 @@ def __waiting(self, ttype, tstring, lineno):
# module docstring?
if self.__freshmodule:
if ttype == tokenize.STRING and is_literal_string(tstring):
- self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
+ self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True)
self.__freshmodule = 0
return
if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
@@ -346,6 +395,7 @@ def __waiting(self, ttype, tstring, lineno):
return
if ttype == tokenize.NAME and tstring in opts.keywords:
self.__state = self.__keywordseen
+ self.__curr_keyword = tstring
return
if ttype == tokenize.STRING:
maybe_fstring = ast.parse(tstring, mode='eval').body
@@ -397,7 +447,8 @@ def __waiting(self, ttype, tstring, lineno):
}, file=sys.stderr)
continue
if isinstance(arg.value, str):
- self.__addentry(arg.value, lineno)
+ self.__curr_keyword = func_name
+ self.__addentry({'msgid': arg.value}, lineno)
def __suiteseen(self, ttype, tstring, lineno):
# skip over any enclosure pairs until we see the colon
@@ -413,7 +464,7 @@ def __suiteseen(self, ttype, tstring, lineno):
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING and is_literal_string(tstring):
- self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
+ self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
tokenize.COMMENT):
@@ -422,44 +473,90 @@ def __suitedocstring(self, ttype, tstring, lineno):
def __keywordseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == '(':
- self.__data = []
+ self.__data.clear()
+ self.__curr_arg = 0
+ self.__enclosurecount = 0
self.__lineno = lineno
self.__state = self.__openseen
else:
self.__state = self.__waiting
def __openseen(self, ttype, tstring, lineno):
- if ttype == tokenize.OP and tstring == ')':
- # We've seen the last of the translatable strings. Record the
- # line number of the first line of the strings and update the list
- # of messages seen. Reset state for the next batch. If there
- # were no strings inside _(), then just ignore this entry.
- if self.__data:
- self.__addentry(EMPTYSTRING.join(self.__data))
- self.__state = self.__waiting
- elif ttype == tokenize.STRING and is_literal_string(tstring):
- self.__data.append(safe_eval(tstring))
- elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
- token.NEWLINE, tokenize.NL]:
- # warn if we see anything else than STRING or whitespace
- print(_(
- '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
- ) % {
- 'token': tstring,
- 'file': self.__curfile,
- 'lineno': self.__lineno
- }, file=sys.stderr)
- self.__state = self.__waiting
+ spec = self.__options.keywords[self.__curr_keyword]
+ arg_type = spec.get(self.__curr_arg)
+ expect_string_literal = arg_type is not None
+
+ if ttype == tokenize.OP and self.__enclosurecount == 0:
+ if tstring == ')':
+ # We've seen the last of the translatable strings. Record the
+ # line number of the first line of the strings and update the list
+ # of messages seen. Reset state for the next batch. If there
+ # were no strings inside _(), then just ignore this entry.
+ if self.__data:
+ self.__addentry(self.__data)
+ self.__state = self.__waiting
+ return
+ elif tstring == ',':
+ # Advance to the next argument
+ self.__curr_arg += 1
+ return
+
+ if expect_string_literal:
+ if ttype == tokenize.STRING and is_literal_string(tstring):
+ self.__data[arg_type] += safe_eval(tstring)
+ elif ttype not in (tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT,
+ tokenize.NEWLINE, tokenize.NL):
+ # We are inside an argument which is a translatable string and
+ # we encountered a token that is not a string. This is an error.
+ self.warn_unexpected_token(tstring)
+ self.__enclosurecount = 0
+ self.__state = self.__waiting
+ elif ttype == tokenize.OP:
+ if tstring in '([{':
+ self.__enclosurecount += 1
+ elif tstring in ')]}':
+ self.__enclosurecount -= 1
def __ignorenext(self, ttype, tstring, lineno):
self.__state = self.__waiting
- def __addentry(self, msg, lineno=None, isdocstring=0):
+ def __addentry(self, msg, lineno=None, *, is_docstring=False):
+ msgid = msg.get('msgid')
+ if msgid in self.__options.toexclude:
+ return
+ if not is_docstring:
+ spec = self.__options.keywords[self.__curr_keyword]
+ if not matches_spec(msg, spec):
+ return
if lineno is None:
lineno = self.__lineno
- if not msg in self.__options.toexclude:
- entry = (self.__curfile, lineno)
- self.__messages.setdefault(msg, {})[entry] = isdocstring
+ msgctxt = msg.get('msgctxt')
+ msgid_plural = msg.get('msgid_plural')
+ key = key_for(msgid, msgctxt)
+ if key in self.__messages:
+ self.__messages[key].add_location(
+ self.__curfile,
+ lineno,
+ msgid_plural,
+ is_docstring=is_docstring,
+ )
+ else:
+ self.__messages[key] = Message(
+ msgid=msgid,
+ msgid_plural=msgid_plural,
+ msgctxt=msgctxt,
+ locations={Location(self.__curfile, lineno)},
+ is_docstring=is_docstring,
+ )
+
+ def warn_unexpected_token(self, token):
+ print(_(
+ '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
+ ) % {
+ 'token': token,
+ 'file': self.__curfile,
+ 'lineno': self.__lineno
+ }, file=sys.stderr)
def set_filename(self, filename):
self.__curfile = filename
@@ -472,55 +569,54 @@ def write(self, fp):
print(pot_header % {'time': timestamp, 'version': __version__,
'charset': encoding,
'encoding': '8bit'}, file=fp)
- # Sort the entries. First sort each particular entry's keys, then
- # sort all the entries by their first item.
- reverse = {}
- for k, v in self.__messages.items():
- keys = sorted(v.keys())
- reverse.setdefault(tuple(keys), []).append((k, v))
- rkeys = sorted(reverse.keys())
- for rkey in rkeys:
- rentries = reverse[rkey]
- rentries.sort()
- for k, v in rentries:
- # If the entry was gleaned out of a docstring, then add a
- # comment stating so. This is to aid translators who may wish
- # to skip translating some unimportant docstrings.
- isdocstring = any(v.values())
- # k is the message string, v is a dictionary-set of (filename,
- # lineno) tuples. We want to sort the entries in v first by
- # file name and then by line number.
- v = sorted(v.keys())
- if not options.writelocations:
- pass
+
+ # Sort locations within each message by filename and lineno
+ sorted_keys = [
+ (key, sorted(msg.locations))
+ for key, msg in self.__messages.items()
+ ]
+ # Sort messages by locations
+ # For example, a message with locations [('test.py', 1), ('test.py', 2)] will
+ # appear before a message with locations [('test.py', 1), ('test.py', 3)]
+ sorted_keys.sort(key=itemgetter(1))
+
+ for key, locations in sorted_keys:
+ msg = self.__messages[key]
+ if options.writelocations:
# location comments are different b/w Solaris and GNU:
- elif options.locationstyle == options.SOLARIS:
- for filename, lineno in v:
- d = {'filename': filename, 'lineno': lineno}
- print(_(
- '# File: %(filename)s, line: %(lineno)d') % d, file=fp)
+ if options.locationstyle == options.SOLARIS:
+ for location in locations:
+ print(f'# File: {location.filename}, line: {location.lineno}', file=fp)
elif options.locationstyle == options.GNU:
# fit as many locations on one line, as long as the
# resulting line length doesn't exceed 'options.width'
locline = '#:'
- for filename, lineno in v:
- d = {'filename': filename, 'lineno': lineno}
- s = _(' %(filename)s:%(lineno)d') % d
+ for location in locations:
+ s = f' {location.filename}:{location.lineno}'
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
print(locline, file=fp)
- locline = "#:" + s
+ locline = f'#:{s}'
if len(locline) > 2:
print(locline, file=fp)
- if isdocstring:
- print('#, docstring', file=fp)
- print('msgid', normalize(k, encoding), file=fp)
+ if msg.is_docstring:
+ # If the entry was gleaned out of a docstring, then add a
+ # comment stating so. This is to aid translators who may wish
+ # to skip translating some unimportant docstrings.
+ print('#, docstring', file=fp)
+ if msg.msgctxt is not None:
+ print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
+ print('msgid', normalize(msg.msgid, encoding), file=fp)
+ if msg.msgid_plural is not None:
+ print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp)
+ print('msgstr[0] ""', file=fp)
+ print('msgstr[1] ""\n', file=fp)
+ else:
print('msgstr ""\n', file=fp)
def main():
- global default_keywords
try:
opts, args = getopt.getopt(
sys.argv[1:],
@@ -557,7 +653,7 @@ class Options:
locations = {'gnu' : options.GNU,
'solaris' : options.SOLARIS,
}
-
+ no_default_keywords = False
# parse options
for opt, arg in opts:
if opt in ('-h', '--help'):
@@ -573,7 +669,7 @@ class Options:
elif opt in ('-k', '--keyword'):
options.keywords.append(arg)
elif opt in ('-K', '--no-default-keywords'):
- default_keywords = []
+ no_default_keywords = True
elif opt in ('-n', '--add-location'):
options.writelocations = 1
elif opt in ('--no-location',):
@@ -613,7 +709,9 @@ class Options:
make_escapes(not options.escape)
# calculate all keywords
- options.keywords.extend(default_keywords)
+ options.keywords = {kw: {0: 'msgid'} for kw in options.keywords}
+ if not no_default_keywords:
+ options.keywords |= DEFAULTKEYWORDS
# initialize list of strings to exclude
if options.excludefilename: