From f83ca6962af973fff6a3124f4bd3d45fea4dd5b8 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 22 Nov 2024 14:21:59 +0000 Subject: [PATCH 1/6] gh-127065: Make `methodcaller` thread-safe in free threading build (#127109) The `methodcaller` C vectorcall implementation uses an arguments array that is shared across calls. The first argument is modified on every invocation. This isn't thread-safe in the free threading build. I think it's also not safe in general, but for now just disable it in the free threading build. --- .../2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst | 2 ++ Modules/_operator.c | 9 +++++++++ 2 files changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst diff --git a/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst b/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst new file mode 100644 index 00000000000000..83457da467ffa9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-21-16-23-16.gh-issue-127065.cfL1zd.rst @@ -0,0 +1,2 @@ +Fix crash when calling a :func:`operator.methodcaller` instance from +multiple threads in the free threading build. diff --git a/Modules/_operator.c b/Modules/_operator.c index 7e0d1f3df87e4d..6c1945174ab7cd 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1602,6 +1602,7 @@ typedef struct { vectorcallfunc vectorcall; } methodcallerobject; +#ifndef Py_GIL_DISABLED static int _methodcaller_initialize_vectorcall(methodcallerobject* mc) { PyObject* args = mc->xargs; @@ -1664,6 +1665,7 @@ methodcaller_vectorcall( (PyTuple_GET_SIZE(mc->xargs)) | PY_VECTORCALL_ARGUMENTS_OFFSET, mc->vectorcall_kwnames); } +#endif /* AC 3.5: variable number of arguments, not currently support by AC */ @@ -1703,7 +1705,14 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) mc->vectorcall_args = 0; +#ifdef Py_GIL_DISABLED + // gh-127065: The current implementation of methodcaller_vectorcall + // is not thread-safe because it modifies the `vectorcall_args` array, + // which is shared across calls. + mc->vectorcall = NULL; +#else mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; +#endif PyObject_GC_Track(mc); return (PyObject *)mc; From 0a1944cda8504ba0478a51075eba540576570336 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Fri, 22 Nov 2024 15:52:16 +0100 Subject: [PATCH 2/6] gh-126700: pygettext: Support more gettext functions (GH-126912) Support multi-argument gettext functions: ngettext(), pgettext(), dgettext(), etc. --- Lib/test/test_tools/i18n_data/messages.pot | 46 +++- Lib/test/test_tools/i18n_data/messages.py | 52 +++- Lib/test/test_tools/test_i18n.py | 4 +- Lib/test/translationdata/argparse/msgids.txt | 2 + Lib/test/translationdata/optparse/msgids.txt | 1 + ...-11-16-20-47-20.gh-issue-126700.ayrHv4.rst | 1 + Tools/i18n/pygettext.py | 244 ++++++++++++------ 7 files changed, 260 insertions(+), 90 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index ddfbd18349ef4f..8d66fbc4f3a937 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -15,53 +15,75 @@ msgstr "" "Generated-By: pygettext.py 1.5\n" -#: messages.py:5 +#: messages.py:16 msgid "" msgstr "" -#: messages.py:8 messages.py:9 +#: messages.py:19 messages.py:20 msgid "parentheses" msgstr "" -#: messages.py:12 +#: messages.py:23 msgid "Hello, world!" msgstr "" -#: messages.py:15 +#: messages.py:26 msgid "" "Hello,\n" " multiline!\n" msgstr "" -#: messages.py:29 +#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94 +#: messages.py:99 +msgid "foo" +msgid_plural "foos" +msgstr[0] "" +msgstr[1] "" + +#: messages.py:47 +msgid "something" +msgstr "" + +#: messages.py:50 msgid "Hello, {}!" msgstr "" -#: messages.py:33 +#: messages.py:54 msgid "1" msgstr "" -#: messages.py:33 +#: messages.py:54 msgid "2" msgstr "" -#: messages.py:34 messages.py:35 +#: messages.py:55 messages.py:56 msgid "A" msgstr "" -#: messages.py:34 messages.py:35 +#: messages.py:55 messages.py:56 msgid "B" msgstr "" -#: messages.py:36 +#: messages.py:57 msgid "set" msgstr "" -#: messages.py:42 +#: messages.py:63 msgid "nested string" msgstr "" -#: messages.py:47 +#: messages.py:68 msgid "baz" msgstr "" +#: messages.py:91 messages.py:92 messages.py:95 messages.py:96 +msgctxt "context" +msgid "foo" +msgid_plural "foos" +msgstr[0] "" +msgstr[1] "" + +#: messages.py:100 +msgid "domain foo" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py index f220294b8d5c67..1e03f4e556830d 100644 --- a/Lib/test/test_tools/i18n_data/messages.py +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -1,5 +1,16 @@ # Test message extraction -from gettext import gettext as _ +from gettext import ( + gettext, + ngettext, + pgettext, + npgettext, + dgettext, + dngettext, + dpgettext, + dnpgettext +) + +_ = gettext # Empty string _("") @@ -21,13 +32,23 @@ _(None) _(1) _(False) -_(x="kwargs are not allowed") +_(("invalid")) +_(["invalid"]) +_({"invalid"}) +_("string"[3]) +_("string"[:3]) +_({"string": "foo"}) + +# pygettext does not allow keyword arguments, but both xgettext and pybabel do +_(x="kwargs work!") + +# Unusual, but valid arguments _("foo", "bar") _("something", x="something else") # .format() _("Hello, {}!").format("world") # valid -_("Hello, {}!".format("world")) # invalid +_("Hello, {}!".format("world")) # invalid, but xgettext and pybabel extract the first string # Nested structures _("1"), _("2") @@ -62,3 +83,28 @@ def _(x): def _(x="don't extract me"): pass + + +# Other gettext functions +gettext("foo") +ngettext("foo", "foos", 1) +pgettext("context", "foo") +npgettext("context", "foo", "foos", 1) +dgettext("domain", "foo") +dngettext("domain", "foo", "foos", 1) +dpgettext("domain", "context", "foo") +dnpgettext("domain", "context", "foo", "foos", 1) + +# Complex arguments +ngettext("foo", "foos", 42 + (10 - 20)) +dgettext(["some", {"complex"}, ("argument",)], "domain foo") + +# Invalid calls which are not extracted +gettext() +ngettext('foo') +pgettext('context') +npgettext('context', 'foo') +dgettext('domain') +dngettext('domain', 'foo') +dpgettext('domain', 'context') +dnpgettext('domain', 'context', 'foo') diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 6f71f0976819f1..29c3423e234d20 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -332,14 +332,14 @@ def test_calls_in_fstring_with_multiple_args(self): msgids = self.extract_docstrings_from_str(dedent('''\ f"{_('foo', 'bar')}" ''')) - self.assertNotIn('foo', msgids) + self.assertIn('foo', msgids) self.assertNotIn('bar', msgids) def test_calls_in_fstring_with_keyword_args(self): msgids = self.extract_docstrings_from_str(dedent('''\ f"{_('foo', bar='baz')}" ''')) - self.assertNotIn('foo', msgids) + self.assertIn('foo', msgids) self.assertNotIn('bar', msgids) self.assertNotIn('baz', msgids) diff --git a/Lib/test/translationdata/argparse/msgids.txt b/Lib/test/translationdata/argparse/msgids.txt index 2b012906436e85..ae89ac74726ecf 100644 --- a/Lib/test/translationdata/argparse/msgids.txt +++ b/Lib/test/translationdata/argparse/msgids.txt @@ -8,6 +8,8 @@ argument %(argument_name)s: %(message)s argument '%(argument_name)s' is deprecated can't open '%(filename)s': %(error)s command '%(parser_name)s' is deprecated +conflicting option string: %s +expected %s argument expected at least one argument expected at most one argument expected one argument diff --git a/Lib/test/translationdata/optparse/msgids.txt b/Lib/test/translationdata/optparse/msgids.txt index ac5317c736af8c..8f405a2bf26dbe 100644 --- a/Lib/test/translationdata/optparse/msgids.txt +++ b/Lib/test/translationdata/optparse/msgids.txt @@ -1,3 +1,4 @@ +%(option)s option requires %(number)d argument %prog [options] %s option does not take a value Options diff --git a/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst b/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst new file mode 100644 index 00000000000000..c08ad9d7059904 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2024-11-16-20-47-20.gh-issue-126700.ayrHv4.rst @@ -0,0 +1 @@ +Add support for multi-argument :mod:`gettext` functions in :program:`pygettext.py`. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 0d16e8f7da0071..f78ff16bff9039 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -163,16 +163,13 @@ import time import getopt import ast -import token import tokenize +from collections import defaultdict +from dataclasses import dataclass, field +from operator import itemgetter __version__ = '1.5' -default_keywords = ['_'] -DEFAULTKEYWORDS = ', '.join(default_keywords) - -EMPTYSTRING = '' - # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's # there. @@ -306,12 +303,64 @@ def getFilesForName(name): return [] +# Key is the function name, value is a dictionary mapping argument positions to the +# type of the argument. The type is one of 'msgid', 'msgid_plural', or 'msgctxt'. +DEFAULTKEYWORDS = { + '_': {0: 'msgid'}, + 'gettext': {0: 'msgid'}, + 'ngettext': {0: 'msgid', 1: 'msgid_plural'}, + 'pgettext': {0: 'msgctxt', 1: 'msgid'}, + 'npgettext': {0: 'msgctxt', 1: 'msgid', 2: 'msgid_plural'}, + 'dgettext': {1: 'msgid'}, + 'dngettext': {1: 'msgid', 2: 'msgid_plural'}, + 'dpgettext': {1: 'msgctxt', 2: 'msgid'}, + 'dnpgettext': {1: 'msgctxt', 2: 'msgid', 3: 'msgid_plural'}, +} + + +def matches_spec(message, spec): + """Check if a message has all the keys defined by the keyword spec.""" + return all(key in message for key in spec.values()) + + +@dataclass(frozen=True) +class Location: + filename: str + lineno: int + + def __lt__(self, other): + return (self.filename, self.lineno) < (other.filename, other.lineno) + + +@dataclass +class Message: + msgid: str + msgid_plural: str | None + msgctxt: str | None + locations: set[Location] = field(default_factory=set) + is_docstring: bool = False + + def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False): + if self.msgid_plural is None: + self.msgid_plural = msgid_plural + self.locations.add(Location(filename, lineno)) + self.is_docstring |= is_docstring + + +def key_for(msgid, msgctxt=None): + if msgctxt is not None: + return (msgctxt, msgid) + return msgid + + class TokenEater: def __init__(self, options): self.__options = options self.__messages = {} self.__state = self.__waiting - self.__data = [] + self.__data = defaultdict(str) + self.__curr_arg = 0 + self.__curr_keyword = None self.__lineno = -1 self.__freshmodule = 1 self.__curfile = None @@ -331,7 +380,7 @@ def __waiting(self, ttype, tstring, lineno): # module docstring? if self.__freshmodule: if ttype == tokenize.STRING and is_literal_string(tstring): - self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True) self.__freshmodule = 0 return if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING): @@ -346,6 +395,7 @@ def __waiting(self, ttype, tstring, lineno): return if ttype == tokenize.NAME and tstring in opts.keywords: self.__state = self.__keywordseen + self.__curr_keyword = tstring return if ttype == tokenize.STRING: maybe_fstring = ast.parse(tstring, mode='eval').body @@ -397,7 +447,8 @@ def __waiting(self, ttype, tstring, lineno): }, file=sys.stderr) continue if isinstance(arg.value, str): - self.__addentry(arg.value, lineno) + self.__curr_keyword = func_name + self.__addentry({'msgid': arg.value}, lineno) def __suiteseen(self, ttype, tstring, lineno): # skip over any enclosure pairs until we see the colon @@ -413,7 +464,7 @@ def __suiteseen(self, ttype, tstring, lineno): def __suitedocstring(self, ttype, tstring, lineno): # ignore any intervening noise if ttype == tokenize.STRING and is_literal_string(tstring): - self.__addentry(safe_eval(tstring), lineno, isdocstring=1) + self.__addentry({'msgid': safe_eval(tstring)}, lineno, is_docstring=True) self.__state = self.__waiting elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, tokenize.COMMENT): @@ -422,44 +473,90 @@ def __suitedocstring(self, ttype, tstring, lineno): def __keywordseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == '(': - self.__data = [] + self.__data.clear() + self.__curr_arg = 0 + self.__enclosurecount = 0 self.__lineno = lineno self.__state = self.__openseen else: self.__state = self.__waiting def __openseen(self, ttype, tstring, lineno): - if ttype == tokenize.OP and tstring == ')': - # We've seen the last of the translatable strings. Record the - # line number of the first line of the strings and update the list - # of messages seen. Reset state for the next batch. If there - # were no strings inside _(), then just ignore this entry. - if self.__data: - self.__addentry(EMPTYSTRING.join(self.__data)) - self.__state = self.__waiting - elif ttype == tokenize.STRING and is_literal_string(tstring): - self.__data.append(safe_eval(tstring)) - elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, - token.NEWLINE, tokenize.NL]: - # warn if we see anything else than STRING or whitespace - print(_( - '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' - ) % { - 'token': tstring, - 'file': self.__curfile, - 'lineno': self.__lineno - }, file=sys.stderr) - self.__state = self.__waiting + spec = self.__options.keywords[self.__curr_keyword] + arg_type = spec.get(self.__curr_arg) + expect_string_literal = arg_type is not None + + if ttype == tokenize.OP and self.__enclosurecount == 0: + if tstring == ')': + # We've seen the last of the translatable strings. Record the + # line number of the first line of the strings and update the list + # of messages seen. Reset state for the next batch. If there + # were no strings inside _(), then just ignore this entry. + if self.__data: + self.__addentry(self.__data) + self.__state = self.__waiting + return + elif tstring == ',': + # Advance to the next argument + self.__curr_arg += 1 + return + + if expect_string_literal: + if ttype == tokenize.STRING and is_literal_string(tstring): + self.__data[arg_type] += safe_eval(tstring) + elif ttype not in (tokenize.COMMENT, tokenize.INDENT, tokenize.DEDENT, + tokenize.NEWLINE, tokenize.NL): + # We are inside an argument which is a translatable string and + # we encountered a token that is not a string. This is an error. + self.warn_unexpected_token(tstring) + self.__enclosurecount = 0 + self.__state = self.__waiting + elif ttype == tokenize.OP: + if tstring in '([{': + self.__enclosurecount += 1 + elif tstring in ')]}': + self.__enclosurecount -= 1 def __ignorenext(self, ttype, tstring, lineno): self.__state = self.__waiting - def __addentry(self, msg, lineno=None, isdocstring=0): + def __addentry(self, msg, lineno=None, *, is_docstring=False): + msgid = msg.get('msgid') + if msgid in self.__options.toexclude: + return + if not is_docstring: + spec = self.__options.keywords[self.__curr_keyword] + if not matches_spec(msg, spec): + return if lineno is None: lineno = self.__lineno - if not msg in self.__options.toexclude: - entry = (self.__curfile, lineno) - self.__messages.setdefault(msg, {})[entry] = isdocstring + msgctxt = msg.get('msgctxt') + msgid_plural = msg.get('msgid_plural') + key = key_for(msgid, msgctxt) + if key in self.__messages: + self.__messages[key].add_location( + self.__curfile, + lineno, + msgid_plural, + is_docstring=is_docstring, + ) + else: + self.__messages[key] = Message( + msgid=msgid, + msgid_plural=msgid_plural, + msgctxt=msgctxt, + locations={Location(self.__curfile, lineno)}, + is_docstring=is_docstring, + ) + + def warn_unexpected_token(self, token): + print(_( + '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' + ) % { + 'token': token, + 'file': self.__curfile, + 'lineno': self.__lineno + }, file=sys.stderr) def set_filename(self, filename): self.__curfile = filename @@ -472,55 +569,54 @@ def write(self, fp): print(pot_header % {'time': timestamp, 'version': __version__, 'charset': encoding, 'encoding': '8bit'}, file=fp) - # Sort the entries. First sort each particular entry's keys, then - # sort all the entries by their first item. - reverse = {} - for k, v in self.__messages.items(): - keys = sorted(v.keys()) - reverse.setdefault(tuple(keys), []).append((k, v)) - rkeys = sorted(reverse.keys()) - for rkey in rkeys: - rentries = reverse[rkey] - rentries.sort() - for k, v in rentries: - # If the entry was gleaned out of a docstring, then add a - # comment stating so. This is to aid translators who may wish - # to skip translating some unimportant docstrings. - isdocstring = any(v.values()) - # k is the message string, v is a dictionary-set of (filename, - # lineno) tuples. We want to sort the entries in v first by - # file name and then by line number. - v = sorted(v.keys()) - if not options.writelocations: - pass + + # Sort locations within each message by filename and lineno + sorted_keys = [ + (key, sorted(msg.locations)) + for key, msg in self.__messages.items() + ] + # Sort messages by locations + # For example, a message with locations [('test.py', 1), ('test.py', 2)] will + # appear before a message with locations [('test.py', 1), ('test.py', 3)] + sorted_keys.sort(key=itemgetter(1)) + + for key, locations in sorted_keys: + msg = self.__messages[key] + if options.writelocations: # location comments are different b/w Solaris and GNU: - elif options.locationstyle == options.SOLARIS: - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - print(_( - '# File: %(filename)s, line: %(lineno)d') % d, file=fp) + if options.locationstyle == options.SOLARIS: + for location in locations: + print(f'# File: {location.filename}, line: {location.lineno}', file=fp) elif options.locationstyle == options.GNU: # fit as many locations on one line, as long as the # resulting line length doesn't exceed 'options.width' locline = '#:' - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - s = _(' %(filename)s:%(lineno)d') % d + for location in locations: + s = f' {location.filename}:{location.lineno}' if len(locline) + len(s) <= options.width: locline = locline + s else: print(locline, file=fp) - locline = "#:" + s + locline = f'#:{s}' if len(locline) > 2: print(locline, file=fp) - if isdocstring: - print('#, docstring', file=fp) - print('msgid', normalize(k, encoding), file=fp) + if msg.is_docstring: + # If the entry was gleaned out of a docstring, then add a + # comment stating so. This is to aid translators who may wish + # to skip translating some unimportant docstrings. + print('#, docstring', file=fp) + if msg.msgctxt is not None: + print('msgctxt', normalize(msg.msgctxt, encoding), file=fp) + print('msgid', normalize(msg.msgid, encoding), file=fp) + if msg.msgid_plural is not None: + print('msgid_plural', normalize(msg.msgid_plural, encoding), file=fp) + print('msgstr[0] ""', file=fp) + print('msgstr[1] ""\n', file=fp) + else: print('msgstr ""\n', file=fp) def main(): - global default_keywords try: opts, args = getopt.getopt( sys.argv[1:], @@ -557,7 +653,7 @@ class Options: locations = {'gnu' : options.GNU, 'solaris' : options.SOLARIS, } - + no_default_keywords = False # parse options for opt, arg in opts: if opt in ('-h', '--help'): @@ -573,7 +669,7 @@ class Options: elif opt in ('-k', '--keyword'): options.keywords.append(arg) elif opt in ('-K', '--no-default-keywords'): - default_keywords = [] + no_default_keywords = True elif opt in ('-n', '--add-location'): options.writelocations = 1 elif opt in ('--no-location',): @@ -613,7 +709,9 @@ class Options: make_escapes(not options.escape) # calculate all keywords - options.keywords.extend(default_keywords) + options.keywords = {kw: {0: 'msgid'} for kw in options.keywords} + if not no_default_keywords: + options.keywords |= DEFAULTKEYWORDS # initialize list of strings to exclude if options.excludefilename: From 46f8a7bbdbb02cafaa00f7bb9478d3d27affc57a Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 22 Nov 2024 06:55:32 -0800 Subject: [PATCH 3/6] gh-127076: Ignore memory mmap in FileIO testing (#127088) `mmap`, `munmap`, and `mprotect` are used by CPython for memory management, which may occur in the middle of the FileIO tests. The system calls can also be used with files, so `strace` includes them in its `%file` and `%desc` filters. Filter out the `mmap` system calls related to memory allocation for the file tests. Currently FileIO doesn't do `mmap` at all, so didn't add code to track from `mmap` through `munmap` since it wouldn't be used. For now if an `mmap` on a fd happens, the call will be included (which may cause test to fail), and at that time support for tracking the address throug `munmap` could be added. --- Lib/test/support/strace_helper.py | 34 ++++++++++++++++--- Lib/test/test_fileio.py | 8 +++-- ...-11-20-18-49-01.gh-issue-127076.DHnXxo.rst | 2 ++ 3 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index 90281b47274299..eab16ea3e2889f 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -71,6 +71,27 @@ def sections(self): return sections +def _filter_memory_call(call): + # mmap can operate on a fd or "MAP_ANONYMOUS" which gives a block of memory. + # Ignore "MAP_ANONYMOUS + the "MAP_ANON" alias. + if call.syscall == "mmap" and "MAP_ANON" in call.args[3]: + return True + + if call.syscall in ("munmap", "mprotect"): + return True + + return False + + +def filter_memory(syscalls): + """Filter out memory allocation calls from File I/O calls. + + Some calls (mmap, munmap, etc) can be used on files or to just get a block + of memory. Use this function to filter out the memory related calls from + other calls.""" + + return [call for call in syscalls if not _filter_memory_call(call)] + @support.requires_subprocess() def strace_python(code, strace_flags, check=True): @@ -93,8 +114,6 @@ def _make_error(reason, details): "-c", textwrap.dedent(code), __run_using_command=[_strace_binary] + strace_flags, - # Don't want to trace our JIT's own mmap and mprotect calls: - PYTHON_JIT="0", ) except OSError as err: return _make_error("Caught OSError", err) @@ -145,9 +164,14 @@ def get_events(code, strace_flags, prelude, cleanup): return all_sections['code'] -def get_syscalls(code, strace_flags, prelude="", cleanup=""): +def get_syscalls(code, strace_flags, prelude="", cleanup="", + ignore_memory=True): """Get the syscalls which a given chunk of python code generates""" events = get_events(code, strace_flags, prelude=prelude, cleanup=cleanup) + + if ignore_memory: + events = filter_memory(events) + return [ev.syscall for ev in events] @@ -177,5 +201,5 @@ def requires_strace(): return unittest.skipUnless(_can_strace(), "Requires working strace") -__all__ = ["get_events", "get_syscalls", "requires_strace", "strace_python", - "StraceEvent", "StraceResult"] +__all__ = ["filter_memory", "get_events", "get_syscalls", "requires_strace", + "strace_python", "StraceEvent", "StraceResult"] diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index d60aabcdf1ae22..e681417e15d34b 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -364,8 +364,7 @@ def testErrnoOnClosedReadinto(self, f): @strace_helper.requires_strace() def test_syscalls_read(self): - """Check that the set of system calls produced by the I/O stack is what - is expected for various read cases. + """Check set of system calls during common I/O patterns It's expected as bits of the I/O implementation change, this will need to change. The goal is to catch changes that unintentionally add @@ -383,6 +382,11 @@ def check_readall(name, code, prelude="", cleanup="", prelude=prelude, cleanup=cleanup) + # Some system calls (ex. mmap) can be used for both File I/O and + # memory allocation. Filter out the ones used for memory + # allocation. + syscalls = strace_helper.filter_memory(syscalls) + # The first call should be an open that returns a # file descriptor (fd). Afer that calls may vary. Once the file # is opened, check calls refer to it by fd as the filename diff --git a/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst b/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst new file mode 100644 index 00000000000000..39323604bbef56 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-11-20-18-49-01.gh-issue-127076.DHnXxo.rst @@ -0,0 +1,2 @@ +Filter out memory-related ``mmap``, ``munmap``, and ``mprotect`` calls from +file-related ones when testing :mod:`io` behavior using strace. From 0cd1eba8255f2a7d369358c027001b4987d3b96d Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 22 Nov 2024 16:04:27 +0100 Subject: [PATCH 4/6] Update Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst Co-authored-by: Peter Bierma --- .../2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst index 45bcb0ac6fe7c6..68b8b1d37cffc1 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-22-07-58-00.gh-issue-127119.p9Yv4U.rst @@ -1 +1 @@ -Remove redundant check on small ints in ``long_dealloc`` +Slightly optimize the :class:`int` deallocator by removing a redundant check. From 0cb4d6c6549d2299f7518f083bbe7d10314ecd66 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Nov 2024 17:29:33 +0200 Subject: [PATCH 5/6] gh-86463: Fix default prog in subparsers if usage is used in the main parser (GH-125891) The usage parameter of argparse.ArgumentParser no longer affects the default value of the prog parameter in subparsers. Previously the full custom usage of the main parser was used as the prog prefix in subparsers. --- Doc/library/argparse.rst | 10 ++++ Lib/argparse.py | 2 +- Lib/test/test_argparse.py | 51 +++++++++++++++++-- ...4-10-23-20-05-54.gh-issue-86463.jvFTI_.rst | 2 + 4 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index a4695547921faa..c5882c273717c0 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -192,6 +192,12 @@ arguments it contains. The default message can be overridden with the The ``%(prog)s`` format specifier is available to fill in the program name in your usage messages. +When a custom usage message is specified for the main parser, you may also want to +consider passing the ``prog`` argument to :meth:`~ArgumentParser.add_subparsers` +or the ``prog`` and the ``usage`` arguments to +:meth:`~_SubParsersAction.add_parser`, to ensure consistent command prefixes and +usage information across subparsers. + .. _description: @@ -1810,6 +1816,10 @@ Sub-commands .. versionchanged:: 3.7 New *required* keyword-only parameter. + .. versionchanged:: 3.14 + Subparser's *prog* is no longer affected by a custom usage message in + the main parser. + FileType objects ^^^^^^^^^^^^^^^^ diff --git a/Lib/argparse.py b/Lib/argparse.py index 5ecfdca17175e3..f5a7342c2fc355 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1889,7 +1889,7 @@ def add_subparsers(self, **kwargs): formatter = self._get_formatter() positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') + formatter.add_usage(None, positionals, groups, '') kwargs['prog'] = formatter.format_help().strip() # create the parsers action and add it to the positionals list diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 358cfb1c56aae4..69243fde5f0f98 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2409,16 +2409,17 @@ def assertArgumentParserError(self, *args, **kwargs): self.assertRaises(ArgumentParserError, *args, **kwargs) def _get_parser(self, subparser_help=False, prefix_chars=None, - aliases=False): + aliases=False, usage=None): # create a parser with a subparsers argument if prefix_chars: parser = ErrorRaisingArgumentParser( - prog='PROG', description='main description', prefix_chars=prefix_chars) + prog='PROG', description='main description', usage=usage, + prefix_chars=prefix_chars) parser.add_argument( prefix_chars[0] * 2 + 'foo', action='store_true', help='foo help') else: parser = ErrorRaisingArgumentParser( - prog='PROG', description='main description') + prog='PROG', description='main description', usage=usage) parser.add_argument( '--foo', action='store_true', help='foo help') parser.add_argument( @@ -2455,7 +2456,8 @@ def _get_parser(self, subparser_help=False, prefix_chars=None, parser2.add_argument('z', type=complex, nargs='*', help='z help') # add third sub-parser - parser3_kwargs = dict(description='3 description') + parser3_kwargs = dict(description='3 description', + usage='PROG --foo bar 3 t ...') if subparser_help: parser3_kwargs['help'] = '3 help' parser3 = subparsers.add_parser('3', **parser3_kwargs) @@ -2477,6 +2479,47 @@ def test_parse_args_failures(self): args = args_str.split() self.assertArgumentParserError(self.parser.parse_args, args) + def test_parse_args_failures_details(self): + for args_str, usage_str, error_str in [ + ('', + 'usage: PROG [-h] [--foo] bar {1,2,3} ...', + 'PROG: error: the following arguments are required: bar'), + ('0.5 1 -y', + 'usage: PROG bar 1 [-h] [-w W] {a,b,c}', + 'PROG bar 1: error: the following arguments are required: x'), + ('0.5 3', + 'usage: PROG --foo bar 3 t ...', + 'PROG bar 3: error: the following arguments are required: t'), + ]: + with self.subTest(args_str): + args = args_str.split() + with self.assertRaises(ArgumentParserError) as cm: + self.parser.parse_args(args) + self.assertEqual(cm.exception.args[0], 'SystemExit') + self.assertEqual(cm.exception.args[2], f'{usage_str}\n{error_str}\n') + + def test_parse_args_failures_details_custom_usage(self): + parser = self._get_parser(usage='PROG [--foo] bar 1 [-w W] {a,b,c}\n' + ' PROG --foo bar 3 t ...') + for args_str, usage_str, error_str in [ + ('', + 'usage: PROG [--foo] bar 1 [-w W] {a,b,c}\n' + ' PROG --foo bar 3 t ...', + 'PROG: error: the following arguments are required: bar'), + ('0.5 1 -y', + 'usage: PROG bar 1 [-h] [-w W] {a,b,c}', + 'PROG bar 1: error: the following arguments are required: x'), + ('0.5 3', + 'usage: PROG --foo bar 3 t ...', + 'PROG bar 3: error: the following arguments are required: t'), + ]: + with self.subTest(args_str): + args = args_str.split() + with self.assertRaises(ArgumentParserError) as cm: + parser.parse_args(args) + self.assertEqual(cm.exception.args[0], 'SystemExit') + self.assertEqual(cm.exception.args[2], f'{usage_str}\n{error_str}\n') + def test_parse_args(self): # check some non-failure cases: self.assertEqual( diff --git a/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst b/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst new file mode 100644 index 00000000000000..9ac155770e2254 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-23-20-05-54.gh-issue-86463.jvFTI_.rst @@ -0,0 +1,2 @@ +The ``usage`` parameter of :class:`argparse.ArgumentParser` no longer +affects the default value of the ``prog`` parameter in subparsers. From 615abb99a4538520f380ab26a42f1506e08ffd09 Mon Sep 17 00:00:00 2001 From: Sergey Muraviov Date: Fri, 22 Nov 2024 18:30:01 +0300 Subject: [PATCH 6/6] Fix broken XML in Visual Studio project file (GH-127142) --- PCbuild/_freeze_module.vcxproj.filters | 2 ++ 1 file changed, 2 insertions(+) diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 09a5f4d30ef490..3842f52e514bb4 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -239,6 +239,8 @@ Source Files + Source Files + Source Files