From 8a9430e634711742b2e7ba32b6e2608e9e47e3a0 Mon Sep 17 00:00:00 2001 From: Alexander R Craven <12934018+alexcraven@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:27:34 +0100 Subject: [PATCH] GE P-file reader: adaptive character encoding (#156) * GE P-file reader: adaptive character encoding `ge_read_pfile` and `ge_pfile` assumed utf-8 encoding in character strings within the p-file; this does not appear to be standard across systems. Suggested patch attempts a few likely encoding candidates, before falling back on a permissive ascii encoding. * Fix lint errors in updated GE reader * Added non-English character tests to test_ge_pfile Corresponding test data https://github.com/user-attachments/files/17702724/GE_character_encoding_test_data.zip expected under spec2nii_test_data/ge/pFiles/PRESS/MR30.1 * Update submodule for new test data. * Fix directorys tructure. --------- Co-authored-by: wtclarke --- spec2nii/GE/ge_pfile.py | 36 +++++++++++------- spec2nii/GE/ge_read_pfile.py | 36 ++++++++++++++---- tests/spec2nii_test_data | 2 +- tests/test_ge_pfile.py | 72 ++++++++++++++++++++++++++++++++++++ 4 files changed, 125 insertions(+), 21 deletions(-) diff --git a/spec2nii/GE/ge_pfile.py b/spec2nii/GE/ge_pfile.py index 098976f..4af6bd4 100644 --- a/spec2nii/GE/ge_pfile.py +++ b/spec2nii/GE/ge_pfile.py @@ -89,12 +89,15 @@ def _process_svs_pfile(pfile): :return: List of NIFTI MRS data objects :return: List of file name suffixes """ - psd = pfile.hdr.rhi_psdname.decode('utf-8').lower() - proto = pfile.hdr.rhs_se_desc.decode('utf-8').lower() + + assert pfile.encoding is not None # encoding should have been set in ge_read_pfile get_mapper + + psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower() + proto = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower() if psd == 'hbcd' and "press" in proto: print('\nPSD was: ', psd) print('Proto is: ', proto) - psd = pfile.hdr.rhs_se_desc.decode('utf-8').lower() + psd = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower() print('PSD updated to: ', psd) # MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path @@ -429,7 +432,10 @@ def _process_mrsi_pfile(pfile): :return: List of NIFTI MRS data objects :return: List of file name suffixes """ - psd = pfile.hdr.rhi_psdname.decode('utf-8').lower() + + assert pfile.encoding is not None # encoding should have been set in ge_read_pfile get_mapper + + psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower() known_formats = ('probe-p', 'probe-sl', 'slaser_cni', 'presscsi') if psd not in known_formats: @@ -573,37 +579,41 @@ def _populate_metadata(pfile, water_suppressed=True, data_dimensions=None): # 'Manufacturer' meta.set_standard_def('Manufacturer', 'GE') # 'ManufacturersModelName' - meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode('utf-8')) + meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode(pfile.encoding, errors='replace')) # 'DeviceSerialNumber' - meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode('utf-8')) + meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode(pfile.encoding, errors='replace')) # 'SoftwareVersions' - meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode('utf-8')) + meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode(pfile.encoding, errors='replace')) # 'InstitutionName' - meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode('utf-8')) + meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode(pfile.encoding, errors='replace')) # 'InstitutionAddress' # Not known # 'TxCoil' # Not Known # 'RxCoil' - meta.set_user_def(key='ReceiveCoilName', value=hdr.rhi_cname.decode('utf-8'), doc='Rx coil name.') + meta.set_user_def( + key="ReceiveCoilName", + value=hdr.rhi_cname.decode(pfile.encoding, errors="replace"), + doc="Rx coil name.", + ) # # 5.3 Sequence information # 'SequenceName' - meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode('utf-8')) + meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode(pfile.encoding, errors='replace')) # 'ProtocolName' - meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode('utf-8')) + meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode(pfile.encoding, errors='replace')) # # 5.4 Sequence information # 'PatientPosition' # Not known # 'PatientName' - meta.set_standard_def('PatientName', hdr.rhe_patname.decode('utf-8')) + meta.set_standard_def('PatientName', hdr.rhe_patname.decode(pfile.encoding, errors='replace')) # 'PatientID' # Not known # 'PatientWeight' # Not known # 'PatientDoB' - meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode('utf-8')) + meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode(pfile.encoding, errors='replace')) # 'PatientSex' if hdr.rhe_patsex == 1: sex_str = 'M' diff --git a/spec2nii/GE/ge_read_pfile.py b/spec2nii/GE/ge_read_pfile.py index d48a084..b45b94a 100644 --- a/spec2nii/GE/ge_read_pfile.py +++ b/spec2nii/GE/ge_read_pfile.py @@ -124,6 +124,7 @@ def __init__(self, fname): self.hdr = None self.map = None self.endian = 'little' # def for version >= 11 + self.encoding = None self.read_header() @@ -176,10 +177,31 @@ def get_mapper(self): if self.hdr is None: return None - psd = self.hdr.rhi_psdname.decode('utf-8').lower() - proto = self.hdr.rhs_se_desc.decode('utf-8').lower() - if psd == 'hbcd' and "press" in proto: - psd = self.hdr.rhs_se_desc.decode('utf-8').lower() + # ARC 20241105 : utf-8 codec is not standard across systems; here, we try a + # couple of likely candidates, falling back on permissive ascii + + for encoding, errors in [ + ("utf-8", "strict"), + ("ISO-8859-1", "strict"), + ("ascii", "replace"), + ]: + try: + psd = self.hdr.rhi_psdname.decode(encoding, errors).lower() + proto = self.hdr.rhs_se_desc.decode(encoding, errors).lower() + + # the following is unused in this context, but can inform codec selection + _ = self.hdr.rhe_patname.decode(encoding, errors) + + if psd == "hbcd" and "press" in proto: + psd = self.hdr.rhs_se_desc.decode(encoding, errors).lower() + except UnicodeDecodeError: + psd = "" + proto = "" + continue + self.encoding = encoding + break + + assert self.encoding is not None # final codec must should have succeeded # MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path if psd.endswith('gaba'): @@ -645,7 +667,7 @@ def get_dcos(self): dcos[0][0] = (self.hdr.rhi_trhc_R - self.hdr.rhi_tlhc_R) dcos[0][1] = (self.hdr.rhi_trhc_A - self.hdr.rhi_tlhc_A) - dcos[0][2] = (self.hdr.rhi_trhc_S - self.hdr.rhi_tlhc_S) + dcos[0][2] = (self.hdr.rhi_trhc_S - self.hdr.rhi_tlhc_S) dcosLengthX = np.sqrt(dcos[0][0] * dcos[0][0] + dcos[0][1] * dcos[0][1] @@ -657,7 +679,7 @@ def get_dcos(self): dcos[1][0] = (self.hdr.rhi_brhc_R - self.hdr.rhi_trhc_R) dcos[1][1] = (self.hdr.rhi_brhc_A - self.hdr.rhi_trhc_A) - dcos[1][2] = (self.hdr.rhi_brhc_S - self.hdr.rhi_trhc_S) + dcos[1][2] = (self.hdr.rhi_brhc_S - self.hdr.rhi_trhc_S) dcosLengthY = np.sqrt(dcos[1][0] * dcos[1][0] + dcos[1][1] * dcos[1][1] @@ -986,7 +1008,7 @@ def read_data(self): numTimePts = self.get_num_time_points numSpecPts = self.hdr.rhr_rh_frame_size numFreqPts = numSpecPts - numComponents = 2 + numComponents = 2 dataWordSize = self.hdr.rhr_rh_point_size numBytesInVol = self.get_num_kspace_points * numSpecPts * numComponents * dataWordSize diff --git a/tests/spec2nii_test_data b/tests/spec2nii_test_data index 1594c26..088e8f1 160000 --- a/tests/spec2nii_test_data +++ b/tests/spec2nii_test_data @@ -1 +1 @@ -Subproject commit 1594c2625a53a877670f9dd0492c0e1b6f3471d5 +Subproject commit 088e8f1646f839ad2c75eacfdd2a8d6ae7707ade diff --git a/tests/test_ge_pfile.py b/tests/test_ge_pfile.py index 4489409..c0c22bd 100644 --- a/tests/test_ge_pfile.py +++ b/tests/test_ge_pfile.py @@ -35,6 +35,10 @@ # HBCD / ISTHMUS datasets hbcd2_path = ge_path / 'pFiles' / 'hbcd' / 'P31744.7' +# Test set from Bergen (MR30.1, non-English characters in header text) +bergen_press_301 = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30101.7' +bergen_press_301_non_english = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30104.7' + def test_svs(tmp_path): @@ -355,3 +359,71 @@ def test_hbcd_isthmus(tmp_path): img = NIFTI_MRS(tmp_path / 'hbcd_short_te.nii.gz') assert img.shape == (1, 1, 1, 2048, 32, 8) assert img.dim_tags == ['DIM_DYN', 'DIM_COIL', None] + + +def test_svs_bergen_301(tmp_path): + + subprocess.check_call(['spec2nii', 'ge', + '-f', 'svs', + '-o', tmp_path, + '-j', + str(bergen_press_301)]) + + img, hdr_ext = read_nifti_mrs_with_hdr(tmp_path / 'svs.nii.gz') + img_ref, hdr_ext_ref = read_nifti_mrs_with_hdr(tmp_path / 'svs_ref.nii.gz') + + assert img.shape == (1, 1, 1, 4096, 48, 2) + assert np.iscomplexobj(img.dataobj) + assert 1 / img.header['pixdim'][4] == 5000.0 + assert hdr_ext['WaterSuppressed'] + + assert img_ref.shape == (1, 1, 1, 4096, 48, 2) + assert np.iscomplexobj(img_ref.dataobj) + assert 1 / img_ref.header['pixdim'][4] == 5000.0 + assert not hdr_ext_ref['WaterSuppressed'] + + assert hdr_ext['dim_5'] == 'DIM_COIL' + assert hdr_ext['dim_6'] == 'DIM_DYN' + assert np.isclose(127.7, hdr_ext['SpectrometerFrequency'][0], atol=1E-1) + assert hdr_ext['ResonantNucleus'][0] == '1H' + + assert np.isclose(hdr_ext['EchoTime'], 0.03) + assert np.isclose(hdr_ext['RepetitionTime'], 2.0) + + assert hdr_ext['PatientName'] == 'fantom' + assert hdr_ext['SequenceName'] == 'PROBE-P' + assert hdr_ext['ProtocolName'] == 'PROBE-P' + + +def test_svs_bergen_301_non_english(tmp_path): + + subprocess.check_call(['spec2nii', 'ge', + '-f', 'svs', + '-o', tmp_path, + '-j', + str(bergen_press_301_non_english)]) + + img, hdr_ext = read_nifti_mrs_with_hdr(tmp_path / 'svs.nii.gz') + img_ref, hdr_ext_ref = read_nifti_mrs_with_hdr(tmp_path / 'svs_ref.nii.gz') + + assert img.shape == (1, 1, 1, 4096, 48, 2) + assert np.iscomplexobj(img.dataobj) + assert 1 / img.header['pixdim'][4] == 5000.0 + assert hdr_ext['WaterSuppressed'] + + assert img_ref.shape == (1, 1, 1, 4096, 48, 2) + assert np.iscomplexobj(img_ref.dataobj) + assert 1 / img_ref.header['pixdim'][4] == 5000.0 + assert not hdr_ext_ref['WaterSuppressed'] + + assert hdr_ext['dim_5'] == 'DIM_COIL' + assert hdr_ext['dim_6'] == 'DIM_DYN' + assert np.isclose(127.7, hdr_ext['SpectrometerFrequency'][0], atol=1E-1) + assert hdr_ext['ResonantNucleus'][0] == '1H' + + assert np.isclose(hdr_ext['EchoTime'], 0.03) + assert np.isclose(hdr_ext['RepetitionTime'], 2.0) + + assert hdr_ext['PatientName'] == 'fantom^prøve' + assert hdr_ext['SequenceName'] == 'PROBE-P' + assert hdr_ext['ProtocolName'] == 'PROBE-P åøæäöÅØÆÄÖ'