FIX: Support UTF-8 encoding for JSON files (#1357)

* WIP: add ensure_ascii flage to _write_json * Revert "WIP: add ensure_ascii flage to _write_json" This reverts commit 4c47679. * Dont Force ASCII encoding in _write_json * TST: Add a test TIL: That json.loads will always convert unicode. So to test that unicode was properly encoded while writing to disk, I had to had to just read the text on disk without the json module * DOC: update changelog * Commit Dan's suggestion Instead of closing and re-opening the file, rewind the "playhead" to the start of the open file, then use fid.read() as usual Co-authored-by: Daniel McCloy <dan@mccloy.info> --------- Co-authored-by: Daniel McCloy <dan@mccloy.info> Co-authored-by: Stefan Appelhoff <stefan.appelhoff@mailbox.org>
mne-tools · Jan 1, 2025 · 3f59b0e · 3f59b0e
1 parent 3492fa0
commit 3f59b0e
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 3 deletions.
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -23,6 +23,7 @@ The following authors had contributed before. Thank you for sticking around!
 
 * `Stefan Appelhoff`_
 * `Daniel McCloy`_
+* `Scott Huberty`_
 
 Detailed list of changes
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -47,6 +48,7 @@ Detailed list of changes
 ^^^^^^^^^^^^
 
 - :func:`mne_bids.read_raw_bids` can optionally return an ``event_id`` dictionary suitable for use with :func:`mne.events_from_annotations`, and if a ``values`` column is present in ``events.tsv`` it will be used as the source of the integer event ID codes, by `Daniel McCloy`_ (:gh:`1349`)
+- :func:`mne_bids.make_dataset_description` now correctly encodes the dataset description as UTF-8 on disk, by `Scott Huberty`_ (:gh:`1357`)
 
 ⚕️ Code health
 ^^^^^^^^^^^^^^

diff --git a/mne_bids/tests/test_write.py b/mne_bids/tests/test_write.py
@@ -376,7 +376,7 @@ def test_make_dataset_description(tmp_path, monkeypatch):
     make_dataset_description(
         path=tmp_path,
         name="tst2",
-        authors="MNE B., MNE P.",
+        authors="MNE B., MNE P., MNE Ł.",
         funding="GSOC2019, GSOC2021",
         references_and_links="https://doi.org/10.21105/joss.01896",
         dataset_type="derivative",
@@ -386,7 +386,14 @@ def test_make_dataset_description(tmp_path, monkeypatch):
 
     with open(op.join(tmp_path, "dataset_description.json"), encoding="utf-8") as fid:
         dataset_description_json = json.load(fid)
-        assert dataset_description_json["Authors"] == ["MNE B.", "MNE P."]
+        assert dataset_description_json["Authors"] == ["MNE B.", "MNE P.", "MNE Ł."]
+        # If the text on disk is unicode, json.load will convert it. So let's test that
+        # the text was encoded correctly on disk.
+        fid.seek(0)
+        # don't use json.load here, as it will convert unicode to str
+        dataset_description_string = fid.read()
+        # Check that U+0141 was correctly encoded as Ł on disk
+        assert "MNE Ł." in dataset_description_string
 
     # Check we raise warnings and errors where appropriate
     with pytest.raises(

diff --git a/mne_bids/utils.py b/mne_bids/utils.py
@@ -233,7 +233,7 @@ def _write_json(fname, dictionary, overwrite=False):
             f'"{fname}" already exists. Please set overwrite to True.'
         )
 
-    json_output = json.dumps(dictionary, indent=4)
+    json_output = json.dumps(dictionary, indent=4, ensure_ascii=False)
     with open(fname, "w", encoding="utf-8") as fid:
         fid.write(json_output)
         fid.write("\n")