diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index a4695547921faa..c5882c273717c0 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -192,6 +192,12 @@ arguments it contains. The default message can be overridden with the The ``%(prog)s`` format specifier is available to fill in the program name in your usage messages. +When a custom usage message is specified for the main parser, you may also want to +consider passing the ``prog`` argument to :meth:`~ArgumentParser.add_subparsers` +or the ``prog`` and the ``usage`` arguments to +:meth:`~_SubParsersAction.add_parser`, to ensure consistent command prefixes and +usage information across subparsers. + .. _description: @@ -1810,6 +1816,10 @@ Sub-commands .. versionchanged:: 3.7 New *required* keyword-only parameter. + .. versionchanged:: 3.14 + Subparser's *prog* is no longer affected by a custom usage message in + the main parser. + FileType objects ^^^^^^^^^^^^^^^^ diff --git a/Lib/argparse.py b/Lib/argparse.py index 5ecfdca17175e3..f5a7342c2fc355 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1889,7 +1889,7 @@ def add_subparsers(self, **kwargs): formatter = self._get_formatter() positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') + formatter.add_usage(None, positionals, groups, '') kwargs['prog'] = formatter.format_help().strip() # create the parsers action and add it to the positionals list diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index 90281b47274299..eab16ea3e2889f 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -71,6 +71,27 @@ def sections(self): return sections +def _filter_memory_call(call): + # mmap can operate on a fd or "MAP_ANONYMOUS" which gives a block of memory. + # Ignore "MAP_ANONYMOUS + the "MAP_ANON" alias. + if call.syscall == "mmap" and "MAP_ANON" in call.args[3]: + return True + + if call.syscall in ("munmap", "mprotect"): + return True + + return False + + +def filter_memory(syscalls): + """Filter out memory allocation calls from File I/O calls. + + Some calls (mmap, munmap, etc) can be used on files or to just get a block + of memory. Use this function to filter out the memory related calls from + other calls.""" + + return [call for call in syscalls if not _filter_memory_call(call)] + @support.requires_subprocess() def strace_python(code, strace_flags, check=True): @@ -93,8 +114,6 @@ def _make_error(reason, details): "-c", textwrap.dedent(code), __run_using_command=[_strace_binary] + strace_flags, - # Don't want to trace our JIT's own mmap and mprotect calls: - PYTHON_JIT="0", ) except OSError as err: return _make_error("Caught OSError", err) @@ -145,9 +164,14 @@ def get_events(code, strace_flags, prelude, cleanup): return all_sections['code'] -def get_syscalls(code, strace_flags, prelude="", cleanup=""): +def get_syscalls(code, strace_flags, prelude="", cleanup="", + ignore_memory=True): """Get the syscalls which a given chunk of python code generates""" events = get_events(code, strace_flags, prelude=prelude, cleanup=cleanup) + + if ignore_memory: + events = filter_memory(events) + return [ev.syscall for ev in events] @@ -177,5 +201,5 @@ def requires_strace(): return unittest.skipUnless(_can_strace(), "Requires working strace") -__all__ = ["get_events", "get_syscalls", "requires_strace", "strace_python", - "StraceEvent", "StraceResult"] +__all__ = ["filter_memory", "get_events", "get_syscalls", "requires_strace", + "strace_python", "StraceEvent", "StraceResult"] diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 358cfb1c56aae4..69243fde5f0f98 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2409,16 +2409,17 @@ def assertArgumentParserError(self, *args, **kwargs): self.assertRaises(ArgumentParserError, *args, **kwargs) def _get_parser(self, subparser_help=False, prefix_chars=None, - aliases=False): + aliases=False, usage=None): # create a parser with a subparsers argument if prefix_chars: parser = ErrorRaisingArgumentParser( - prog='PROG', description='main description', prefix_chars=prefix_chars) + prog='PROG', description='main description', usage=usage, + prefix_chars=prefix_chars) parser.add_argument( prefix_chars[0] * 2 + 'foo', action='store_true', help='foo help') else: parser = ErrorRaisingArgumentParser( - prog='PROG', description='main description') + prog='PROG', description='main description', usage=usage) parser.add_argument( '--foo', action='store_true', help='foo help') parser.add_argument( @@ -2455,7 +2456,8 @@ def _get_parser(self, subparser_help=False, prefix_chars=None, parser2.add_argument('z', type=complex, nargs='*', help='z help') # add third sub-parser - parser3_kwargs = dict(description='3 description') + parser3_kwargs = dict(description='3 description', + usage='PROG --foo bar 3 t ...') if subparser_help: parser3_kwargs['help'] = '3 help' parser3 = subparsers.add_parser('3', **parser3_kwargs) @@ -2477,6 +2479,47 @@ def test_parse_args_failures(self): args = args_str.split() self.assertArgumentParserError(self.parser.parse_args, args) + def test_parse_args_failures_details(self): + for args_str, usage_str, error_str in [ + ('', + 'usage: PROG [-h] [--foo] bar {1,2,3} ...', + 'PROG: error: the following arguments are required: bar'), + ('0.5 1 -y', + 'usage: PROG bar 1 [-h] [-w W] {a,b,c}', + 'PROG bar 1: error: the following arguments are required: x'), + ('0.5 3', + 'usage: PROG --foo bar 3 t ...', + 'PROG bar 3: error: the following arguments are required: t'), + ]: + with self.subTest(args_str): + args = args_str.split() + with self.assertRaises(ArgumentParserError) as cm: + self.parser.parse_args(args) + self.assertEqual(cm.exception.args[0], 'SystemExit') + self.assertEqual(cm.exception.args[2], f'{usage_str}\n{error_str}\n') + + def test_parse_args_failures_details_custom_usage(self): + parser = self._get_parser(usage='PROG [--foo] bar 1 [-w W] {a,b,c}\n' + ' PROG --foo bar 3 t ...') + for args_str, usage_str, error_str in [ + ('', + 'usage: PROG [--foo] bar 1 [-w W] {a,b,c}\n' + ' PROG --foo bar 3 t ...', + 'PROG: error: the following arguments are required: bar'), + ('0.5 1 -y', + 'usage: PROG bar 1 [-h] [-w W] {a,b,c}', + 'PROG bar 1: error: the following arguments are required: x'), + ('0.5 3', + 'usage: PROG --foo bar 3 t ...', + 'PROG bar 3: error: the following arguments are required: t'), + ]: + with self.subTest(args_str): + args = args_str.split() + with self.assertRaises(ArgumentParserError) as cm: + parser.parse_args(args) + self.assertEqual(cm.exception.args[0], 'SystemExit') + self.assertEqual(cm.exception.args[2], f'{usage_str}\n{error_str}\n') + def test_parse_args(self): # check some non-failure cases: self.assertEqual( diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index d60aabcdf1ae22..e681417e15d34b 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -364,8 +364,7 @@ def testErrnoOnClosedReadinto(self, f): @strace_helper.requires_strace() def test_syscalls_read(self): - """Check that the set of system calls produced by the I/O stack is what - is expected for various read cases. + """Check set of system calls during common I/O patterns It's expected as bits of the I/O implementation change, this will need to change. The goal is to catch changes that unintentionally add @@ -383,6 +382,11 @@ def check_readall(name, code, prelude="", cleanup="", prelude=prelude, cleanup=cleanup) + # Some system calls (ex. mmap) can be used for both File I/O and + # memory allocation. Filter out the ones used for memory + # allocation. + syscalls = strace_helper.filter_memory(syscalls) + # The first call should be an open that returns a # file descriptor (fd). Afer that calls may vary. Once the file # is opened, check calls refer to it by fd as the filename diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index ddfbd18349ef4f..8d66fbc4f3a937 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -15,53 +15,75 @@ msgstr "" "Generated-By: pygettext.py 1.5\n" -#: messages.py:5 +#: messages.py:16 msgid "" msgstr "" -#: messages.py:8 messages.py:9 +#: messages.py:19 messages.py:20 msgid "parentheses" msgstr "" -#: messages.py:12 +#: messages.py:23 msgid "Hello, world!" msgstr "" -#: messages.py:15 +#: messages.py:26 msgid "" "Hello,\n" " multiline!\n" msgstr "" -#: messages.py:29 +#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94 +#: messages.py:99 +msgid "foo" +msgid_plural "foos" +msgstr[0] "" +msgstr[1] "" + +#: messages.py:47 +msgid "something" +msgstr "" + +#: messages.py:50 msgid "Hello, {}!" msgstr "" -#: messages.py:33 +#: messages.py:54 msgid "1" msgstr "" -#: messages.py:33 +#: messages.py:54 msgid "2" msgstr "" -#: messages.py:34 messages.py:35 +#: messages.py:55 messages.py:56 msgid "A" msgstr "" -#: messages.py:34 messages.py:35 +#: messages.py:55 messages.py:56 msgid "B" msgstr "" -#: messages.py:36 +#: messages.py:57 msgid "set" msgstr "" -#: messages.py:42 +#: messages.py:63 msgid "nested string" msgstr "" -#: messages.py:47 +#: messages.py:68 msgid "baz" msgstr "" +#: messages.py:91 messages.py:92 messages.py:95 messages.py:96 +msgctxt "context" +msgid "foo" +msgid_plural "foos" +msgstr[0] "" +msgstr[1] "" + +#: messages.py:100 +msgid "domain foo" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py index f220294b8d5c67..1e03f4e556830d 100644 --- a/Lib/test/test_tools/i18n_data/messages.py +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -1,5 +1,16 @@ # Test message extraction -from gettext import gettext as _ +from gettext import ( + gettext, + ngettext, + pgettext, + npgettext, + dgettext, + dngettext, + dpgettext, + dnpgettext +) + +_ = gettext # Empty string _("") @@ -21,13 +32,23 @@ _(None) _(1) _(False) -_(x="kwargs are not allowed") +_(("invalid")) +_(["invalid"]) +_({"invalid"}) +_("string"[3]) +_("string"[:3]) +_({"string": "foo"}) + +# pygettext does not allow keyword arguments, but both xgettext and pybabel do +_(x="kwargs work!") + +# Unusual, but valid arguments _("foo", "bar") _("something", x="something else") # .format() _("Hello, {}!").format("world") # valid -_("Hello, {}!".format("world")) # invalid +_("Hello, {}!".format("world")) # invalid, but xgettext and pybabel extract the first string # Nested structures _("1"), _("2") @@ -62,3 +83,28 @@ def _(x): def _(x="don't extract me"): pass + + +# Other gettext functions +gettext("foo") +ngettext("foo", "foos", 1) +pgettext("context", "foo") +npgettext("context", "foo", "foos", 1) +dgettext("domain", "foo") +dngettext("domain", "foo", "foos", 1) +dpgettext("domain", "context", "foo") +dnpgettext("domain", "context", "foo", "foos", 1) + +# Complex arguments +ngettext("foo", "foos", 42 + (10 - 20)) +dgettext(["some", {"complex"}, ("argument",)], "domain foo") + +# Invalid calls which are not extracted +gettext() +ngettext('foo') +pgettext('context') +npgettext('context', 'foo') +dgettext('domain') +dngettext('domain', 'foo') +dpgettext('domain', 'context') +dnpgettext('domain', 'context', 'foo') diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 6f71f0976819f1..29c3423e234d20 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -332,14 +332,14 @@ def test_calls_in_fstring_with_multiple_args(self): msgids = self.extract_docstrings_from_str(dedent('''\ f"{_('foo', 'bar')}" ''')) - self.assertNotIn('foo', msgids) + self.assertIn('foo', msgids) self.assertNotIn('bar', msgids) def test_calls_in_fstring_with_keyword_args(self): msgids = self.extract_docstrings_from_str(dedent('''\ f"{_('foo', bar='baz')}" ''')) - self.assertNotIn('foo', msgids) + self.assertIn('foo', msgids) self.assertNotIn('bar', msgids) self.assertNotIn('baz', msgids) diff --git a/Lib/test/translationdata/argparse/msgids.txt b/Lib/test/translationdata/argparse/msgids.txt index 2b012906436e85..ae89ac74726ecf 100644 --- a/Lib/test/translationdata/argparse/msgids.txt +++ b/Lib/test/translationdata/argparse/msgids.txt @@ -8,6 +8,8 @@ argument %(argument_name)s: %(message)s argument '%(argument_name)s' is deprecated can't open '%(filename)s': %(error)s command '%(parser_name)s' is deprecated +conflicting option string: %s +expected %s argument expected at least one argument expected at most one argument expected one argument diff --git a/Lib/test/translationdata/optparse/msgids.txt b/Lib/test/translationdata/optparse/msgids.txt index ac5317c736af8c..8f405a2bf26dbe 100644 --- a/Lib/test/translationdata/optparse/msgids.txt +++ b/Lib/test/translationdata/optparse/msgids.txt @@ -1,3 +1,4 @@ +%(option)s option requires %(number)d argument %prog [options] %s option does not take a value Options diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst index 45bcb0ac6fe7c6..68b8b1d37cffc1 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst @@ -1 +1 @@ -Remove redundant check on small ints in ``long_dealloc`` +Slightly optimize the :class:`int` deallocator by removing a redundant check. diff --git a/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst b/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst new file mode 100644 index 00000000000000..9ac155770e2254 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst @@ -0,0 +1,2 @@ +The ``usage`` parameter of :class:`argparse.ArgumentParser` no longer +affects the default value of the ``prog`` parameter in subparsers. diff --git a/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst b/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst new file mode 100644 index 00000000000000..83457da467ffa9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst @@ -0,0 +1,2 @@ +Fix crash when calling a :func:`operator.methodcaller` instance from +multiple threads in the free threading build. diff --git a/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst b/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst new file mode 100644 index 00000000000000..39323604bbef56 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst @@ -0,0 +1,2 @@ +Filter out memory-related ``mmap``, ``munmap``, and ``mprotect`` calls from +file-related ones when testing :mod:`io` behavior using strace. diff --git a/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst b/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst new file mode 100644 index 00000000000000..c08ad9d7059904 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst @@ -0,0 +1 @@ +Add support for multi-argument :mod:`gettext` functions in :program:`pygettext.py`. diff --git a/Modules/_operator.c b/Modules/_operator.c index 7e0d1f3df87e4d..6c1945174ab7cd 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1602,6 +1602,7 @@ typedef struct { vectorcallfunc vectorcall; } methodcallerobject; +#ifndef Py_GIL_DISABLED static int _methodcaller_initialize_vectorcall(methodcallerobject* mc) { PyObject* args = mc->xargs; @@ -1664,6 +1665,7 @@ methodcaller_vectorcall( (PyTuple_GET_SIZE(mc->xargs)) | PY_VECTORCALL_ARGUMENTS_OFFSET, mc->vectorcall_kwnames); } +#endif /* AC 3.5: variable number of arguments, not currently support by AC */ @@ -1703,7 +1705,14 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) mc->vectorcall_args = 0; +#ifdef Py_GIL_DISABLED + // gh-127065: The current implementation of methodcaller_vectorcall + // is not thread-safe because it modifies the `vectorcall_args` array, + // which is shared across calls. + mc->vectorcall = NULL; +#else mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; +#endif PyObject_GC_Track(mc); return (PyObject *)mc; diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 09a5f4d30ef490..3842f52e514bb4 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -239,6 +239,8 @@ Source Files + Source Files + Source Files diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 0d16e8f7da0071..f78ff16bff9039 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -163,16 +163,13 @@ import time import getopt import ast -import token import tokenize +from collections import defaultdict +from dataclasses import dataclass, field +from operator import itemgetter __version__ = '1.5' -default_keywords = ['_'] -DEFAULTKEYWORDS = ', '.join(default_keywords) - -EMPTYSTRING = '' - # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's # there. @@ -306,12 +303,64 @@ def getFilesForName(name): return [] +# Key is the function name, value is a dictionary mapping argument positions to the +# type of the argument. The type is one of 'msgid', 'msgid_plural', or 'msgctxt'. +DEFAULTKEYWORDS = { + '_': {0: 'msgid'}, + 'gettext': {0: 'msgid'}, + 'ngettext': {0: 'msgid', 1: 'msgid_plural'}, + 'pgettext': {0: 'msgctxt', 1: 'msgid'}, + 'npgettext': {0: 'msgctxt', 1: 'msgid', 2: 'msgid_plural'}, + 'dgettext': {1: 'msgid'}, + 'dngettext': {1: 'msgid', 2: 'msgid_plural'}, + 'dpgettext': {1: 'msgctxt', 2: 'msgid'}, + 'dnpgettext': {1: 'msgctxt', 2: 'msgid', 3: 'msgid_plural'}, +} + + +def matches_spec(message, spec): + """Check if a message has all the keys defined by the keyword spec.""" + return all(key in message for key in spec.values()) + + +@dataclass(frozen=True) +class Location: + filename: str + lineno: int + + def __lt__(self, other): + return (self.filename, self.lineno) < (other.filename, other.lineno) + + +@dataclass +class Message: + msgid: str + msgid_plural: str | None + msgctxt: str | None + locations: set[Location] = field(default_factory=set) + is_docstring: bool = False + + def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False): + if self.msgid_plural is None: + self.msgid_plural = msgid_plural + self.locations.add(Location(filename, lineno)) + self.is_docstring |= is_docstring + + +def key_for(msgid, msgctxt=None): + if msgctxt is not None: + return (msgctxt, msgid) + return msgid + + class TokenEater: def __init__(self, options): self.__options = options self.__messages = {} self.__state = self.__waiting - self.__data = [] + self.__data = defaultdict(str) + self.__curr_arg = 0 + self.__curr_keyword = None self.__lineno = -1 self.__freshmodule = 1 self.__curfile = None @@ -331,7 +380,7 @@ def __waiting(self, ttype, tstring, lineno): # module docstring? if self.__freshmodule: if ttype == tokenize.STRING and is_literal_string(tstring): - self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True) self.__freshmodule = 0 return if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING): @@ -346,6 +395,7 @@ def __waiting(self, ttype, tstring, lineno): return if ttype == tokenize.NAME and tstring in opts.keywords: self.__state = self.__keywordseen + self.__curr_keyword = tstring return if ttype == tokenize.STRING: maybe_fstring = ast.parse(tstring, mode='eval').body @@ -397,7 +447,8 @@ def __waiting(self, ttype, tstring, lineno): }, file=sys.stderr) continue if isinstance(arg.value, str): - self.__addentry(arg.value, lineno) + self.__curr_keyword = func_name + self.__addentry({'msgid': arg.value}, lineno) def __suiteseen(self, ttype, tstring, lineno): # skip over any enclosure pairs until we see the colon @@ -413,7 +464,7 @@ def __suiteseen(self, ttype, tstring, lineno): def __suitedocstring(self, ttype, tstring, lineno): # ignore any intervening noise if ttype == tokenize.STRING and is_literal_string(tstring): - self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True) self.__state = self.__waiting elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, tokenize.COMMENT): @@ -422,44 +473,90 @@ def __suitedocstring(self, ttype, tstring, lineno): def __keywordseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == '(': - self.__data = [] + self.__data.clear() + self.__curr_arg = 0 + self.__enclosurecount = 0 self.__lineno = lineno self.__state = self.__openseen else: self.__state = self.__waiting def __openseen(self, ttype, tstring, lineno): - if ttype == tokenize.OP and tstring == ')': - # We've seen the last of the translatable strings. Record the - # line number of the first line of the strings and update the list - # of messages seen. Reset state for the next batch. If there - # were no strings inside _(), then just ignore this entry. - if self.__data: - self.__addentry(EMPTYSTRING.join(self.__data)) - self.__state = self.__waiting - elif ttype == tokenize.STRING and is_literal_string(tstring): - self.__data.append(safe_eval(tstring)) - elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, - token.NEWLINE, tokenize.NL]: - # warn if we see anything else than STRING or whitespace - print(_( - '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' - ) % { - 'token': tstring, - 'file': self.__curfile, - 'lineno': self.__lineno - }, file=sys.stderr) - self.__state = self.__waiting + spec = self.__options.keywords[self.__curr_keyword] + arg_type = spec.get(self.__curr_arg) + expect_string_literal = arg_type is not None + + if ttype == tokenize.OP and self.__enclosurecount == 0: + if tstring == ')': + # We've seen the last of the translatable strings. Record the + # line number of the first line of the strings and update the list + # of messages seen. Reset state for the next batch. If there + # were no strings inside _(), then just ignore this entry. + if self.__data: + self.__addentry(self.__data) + self.__state = self.__waiting + return + elif tstring == ',': + # Advance to the next argument + self.__curr_arg += 1 + return + + if expect_string_literal: + if ttype == tokenize.STRING and is_literal_string(tstring): + self.__data[arg_type] += safe_eval(tstring) + elif ttype not in (tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT, + tokenize.NEWLINE, tokenize.NL): + # We are inside an argument which is a translatable string and + # we encountered a token that is not a string. This is an error. + self.warn_unexpected_token(tstring) + self.__enclosurecount = 0 + self.__state = self.__waiting + elif ttype == tokenize.OP: + if tstring in '([{': + self.__enclosurecount += 1 + elif tstring in ')]}': + self.__enclosurecount -= 1 def __ignorenext(self, ttype, tstring, lineno): self.__state = self.__waiting - def __addentry(self, msg, lineno=None, isdocstring=0): + def __addentry(self, msg, lineno=None, *, is_docstring=False): + msgid = msg.get('msgid') + if msgid in self.__options.toexclude: + return + if not is_docstring: + spec = self.__options.keywords[self.__curr_keyword] + if not matches_spec(msg, spec): + return if lineno is None: lineno = self.__lineno - if not msg in self.__options.toexclude: - entry = (self.__curfile, lineno) - self.__messages.setdefault(msg, {})[entry] = isdocstring + msgctxt = msg.get('msgctxt') + msgid_plural = msg.get('msgid_plural') + key = key_for(msgid, msgctxt) + if key in self.__messages: + self.__messages[key].add_location( + self.__curfile, + lineno, + msgid_plural, + is_docstring=is_docstring, + ) + else: + self.__messages[key] = Message( + msgid=msgid, + msgid_plural=msgid_plural, + msgctxt=msgctxt, + locations={Location(self.__curfile, lineno)}, + is_docstring=is_docstring, + ) + + def warn_unexpected_token(self, token): + print(_( + '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' + ) % { + 'token': token, + 'file': self.__curfile, + 'lineno': self.__lineno + }, file=sys.stderr) def set_filename(self, filename): self.__curfile = filename @@ -472,55 +569,54 @@ def write(self, fp): print(pot_header % {'time': timestamp, 'version': __version__, 'charset': encoding, 'encoding': '8bit'}, file=fp) - # Sort the entries. First sort each particular entry's keys, then - # sort all the entries by their first item. - reverse = {} - for k, v in self.__messages.items(): - keys = sorted(v.keys()) - reverse.setdefault(tuple(keys), []).append((k, v)) - rkeys = sorted(reverse.keys()) - for rkey in rkeys: - rentries = reverse[rkey] - rentries.sort() - for k, v in rentries: - # If the entry was gleaned out of a docstring, then add a - # comment stating so. This is to aid translators who may wish - # to skip translating some unimportant docstrings. - isdocstring = any(v.values()) - # k is the message string, v is a dictionary-set of (filename, - # lineno) tuples. We want to sort the entries in v first by - # file name and then by line number. - v = sorted(v.keys()) - if not options.writelocations: - pass + + # Sort locations within each message by filename and lineno + sorted_keys = [ + (key, sorted(msg.locations)) + for key, msg in self.__messages.items() + ] + # Sort messages by locations + # For example, a message with locations [('test.py', 1), ('test.py', 2)] will + # appear before a message with locations [('test.py', 1), ('test.py', 3)] + sorted_keys.sort(key=itemgetter(1)) + + for key, locations in sorted_keys: + msg = self.__messages[key] + if options.writelocations: # location comments are different b/w Solaris and GNU: - elif options.locationstyle == options.SOLARIS: - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - print(_( - '# File: %(filename)s, line: %(lineno)d') % d, file=fp) + if options.locationstyle == options.SOLARIS: + for location in locations: + print(f'# File: {location.filename}, line: {location.lineno}', file=fp) elif options.locationstyle == options.GNU: # fit as many locations on one line, as long as the # resulting line length doesn't exceed 'options.width' locline = '#:' - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - s = _(' %(filename)s:%(lineno)d') % d + for location in locations: + s = f' {location.filename}:{location.lineno}' if len(locline) + len(s) <= options.width: locline = locline + s else: print(locline, file=fp) - locline = "#:" + s + locline = f'#:{s}' if len(locline) > 2: print(locline, file=fp) - if isdocstring: - print('#, docstring', file=fp) - print('msgid', normalize(k, encoding), file=fp) + if msg.is_docstring: + # If the entry was gleaned out of a docstring, then add a + # comment stating so. This is to aid translators who may wish + # to skip translating some unimportant docstrings. + print('#, docstring', file=fp) + if msg.msgctxt is not None: + print('msgctxt', normalize(msg.msgctxt, encoding), file=fp) + print('msgid', normalize(msg.msgid, encoding), file=fp) + if msg.msgid_plural is not None: + print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp) + print('msgstr[0] ""', file=fp) + print('msgstr[1] ""\n', file=fp) + else: print('msgstr ""\n', file=fp) def main(): - global default_keywords try: opts, args = getopt.getopt( sys.argv[1:], @@ -557,7 +653,7 @@ class Options: locations = {'gnu' : options.GNU, 'solaris' : options.SOLARIS, } - + no_default_keywords = False # parse options for opt, arg in opts: if opt in ('-h', '--help'): @@ -573,7 +669,7 @@ class Options: elif opt in ('-k', '--keyword'): options.keywords.append(arg) elif opt in ('-K', '--no-default-keywords'): - default_keywords = [] + no_default_keywords = True elif opt in ('-n', '--add-location'): options.writelocations = 1 elif opt in ('--no-location',): @@ -613,7 +709,9 @@ class Options: make_escapes(not options.escape) # calculate all keywords - options.keywords.extend(default_keywords) + options.keywords = {kw: {0: 'msgid'} for kw in options.keywords} + if not no_default_keywords: + options.keywords |= DEFAULTKEYWORDS # initialize list of strings to exclude if options.excludefilename: