From 94dcb58d1bbcc4592660f4f13d0d3aa8f19f9810 Mon Sep 17 00:00:00 2001 From: Marc Herbert Date: Tue, 28 May 2024 18:18:14 +0000 Subject: [PATCH 1/3] manifest: add new local constant "git_filenames_encoding" Cosmetic fix, no functional change. A little less duplication and hardcoding. listdir_at() had an optional "encoding" parameter but it was never used before now, always defaulting to 'utf-8'. Signed-off-by: Marc Herbert --- src/west/manifest.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/west/manifest.py b/src/west/manifest.py index e28faded..13daccee 100644 --- a/src/west/manifest.py +++ b/src/west/manifest.py @@ -272,7 +272,9 @@ def _manifest_content_at(project: 'Project', path: PathType, # enough! raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), path) - ptype = out.decode('utf-8').split()[1] + # TODO: does git always output utf-8? Test git config 'core.precomposeunicode' etc. + git_filenames_encoding = 'utf-8' + ptype = out.decode(git_filenames_encoding).split()[1] if ptype == 'blob': # Importing a file: just return its content. @@ -283,7 +285,8 @@ def _manifest_content_at(project: 'Project', path: PathType, # Use a PurePosixPath because that's the form git seems to # store internally, even on Windows. pathobj = PurePosixPath(path) - for f in filter(_is_yml, project.listdir_at(path, rev=rev)): + for f in filter(_is_yml, project.listdir_at(path, rev=rev, + encoding=git_filenames_encoding)): ret.append(project.read_at(pathobj / f, rev=rev).decode('utf-8')) return ret else: From 645e7a3275f3cc204a02558f3e0b17cda2fa0747 Mon Sep 17 00:00:00 2001 From: Marc Herbert Date: Tue, 28 May 2024 18:22:16 +0000 Subject: [PATCH 2/3] manifest: add new Manifest.encoding constant = 'utf-8' No functional change: just a lot less duplication & hardcoding. This can help testing and supporting other encodings. It does not hurt in any case. Signed-off-by: Marc Herbert --- src/west/app/project.py | 2 +- src/west/manifest.py | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/west/app/project.py b/src/west/app/project.py index a046b65c..ed7d230c 100644 --- a/src/west/app/project.py +++ b/src/west/app/project.py @@ -1049,7 +1049,7 @@ def update_importer(self, project, path): self.updated.add(project.name) try: - return _manifest_content_at(project, path) + return _manifest_content_at(project, path, Manifest.encoding) except FileNotFoundError: # FIXME we need each project to have back-pointers # to the manifest file where it was defined, so we can diff --git a/src/west/manifest.py b/src/west/manifest.py index 13daccee..4cabef51 100644 --- a/src/west/manifest.py +++ b/src/west/manifest.py @@ -243,7 +243,7 @@ def _west_commands_merge(wc1: List[str], wc2: List[str]) -> List[str]: def _default_importer(project: 'Project', file: str) -> NoReturn: raise ManifestImportFailed(project, file) -def _manifest_content_at(project: 'Project', path: PathType, +def _manifest_content_at(project: 'Project', path: PathType, mf_encoding: str, rev: str = QUAL_MANIFEST_REV_BRANCH) \ -> ImportedContentType: # Get a list of manifest data from project at path @@ -278,7 +278,7 @@ def _manifest_content_at(project: 'Project', path: PathType, if ptype == 'blob': # Importing a file: just return its content. - return project.read_at(path, rev=rev).decode('utf-8') + return project.read_at(path, rev=rev).decode(mf_encoding) elif ptype == 'tree': # Importing a tree: return the content of the YAML files inside it. ret = [] @@ -287,7 +287,7 @@ def _manifest_content_at(project: 'Project', path: PathType, pathobj = PurePosixPath(path) for f in filter(_is_yml, project.listdir_at(path, rev=rev, encoding=git_filenames_encoding)): - ret.append(project.read_at(pathobj / f, rev=rev).decode('utf-8')) + ret.append(project.read_at(pathobj / f, rev=rev).decode(mf_encoding)) return ret else: raise MalformedManifest(f"can't decipher project {project.name} " @@ -1187,6 +1187,9 @@ class Manifest: '''The parsed contents of a west manifest file. ''' + # TODO: make this a new west config: 'manifest.encoding' + encoding: str = 'utf-8' + @staticmethod def from_topdir(topdir: Optional[PathType] = None, config: Optional[Configuration] = None, @@ -1905,7 +1908,7 @@ def get_option(option, default=None): current_relpath = manifest_path / manifest_file current_abspath = topdir_abspath / current_relpath try: - current_data = current_abspath.read_text(encoding='utf-8') + current_data = current_abspath.read_text(encoding=Manifest.encoding) except FileNotFoundError: raise MalformedConfig( f'file not found: manifest file {current_abspath} ' @@ -2204,7 +2207,7 @@ def _import_pathobj_from_self(self, pathobj_abs: Path, child_ctx = self._ctx._replace( current_abspath=pathobj_abs, current_relpath=pathobj, - current_data=pathobj_abs.read_text(encoding='utf-8') + current_data=pathobj_abs.read_text(encoding=Manifest.encoding) ) try: Manifest(topdir=self.topdir, internal_import_ctx=child_ctx) @@ -2242,7 +2245,7 @@ def _import_map_from_self(self, imp: Dict) -> None: path_prefix=path_prefix, current_abspath=import_abs, current_relpath=pathobj / import_abs.name, - current_data=import_abs.read_text(encoding='utf-8') + current_data=import_abs.read_text(encoding=Manifest.encoding) ) try: Manifest(topdir=self.topdir, internal_import_ctx=child_ctx) @@ -2575,7 +2578,7 @@ def _import_content_from_project(self, project: Project, if not (self._ctx.import_flags & ImportFlag.FORCE_PROJECTS) and \ project.is_cloned(): try: - content = _manifest_content_at(project, path) + content = _manifest_content_at(project, path, Manifest.encoding) except MalformedManifest as mm: self._malformed(mm.args[0]) except FileNotFoundError: From 672631972feb2825f4161fc8025ac5ba3b6d26bd Mon Sep 17 00:00:00 2001 From: Marc Herbert Date: Tue, 28 May 2024 18:23:48 +0000 Subject: [PATCH 3/3] project: use new Manifest.encoding constant (utf-8) to read manifest Fixes issue reported in PR #710 where most places are hardcoded to 'utf-8' while this one is (Windows) locale-dependent. In the future, we may want to make this more flexible but the most urgent fix is consistency: with this commit, manifest decoding should be hardcoded to 'utf-8' everywhere. Signed-off-by: Marc Herbert --- src/west/app/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/west/app/project.py b/src/west/app/project.py index ed7d230c..0ffe1e5a 100644 --- a/src/west/app/project.py +++ b/src/west/app/project.py @@ -321,7 +321,7 @@ def bootstrap(self, args) -> Path: # Parse the manifest to get "self: path:", if it declares one. # Otherwise, use the URL. Ignore imports -- all we really # want to know is if there's a "self: path:" or not. - manifest = Manifest.from_data(temp_manifest.read_text(), + manifest = Manifest.from_data(temp_manifest.read_text(encoding=Manifest.encoding), import_flags=ImportFlag.IGNORE) if manifest.yaml_path: manifest_path = manifest.yaml_path