From bdf8334ad7616a9cd33c6ca5e8410ebc85b4728d Mon Sep 17 00:00:00 2001 From: Alexander R Craven <12934018+alexcraven@users.noreply.github.com> Date: Tue, 5 Nov 2024 14:20:43 +0100 Subject: [PATCH 1/5] GE P-file reader: adaptive character encoding `ge_read_pfile` and `ge_pfile` assumed utf-8 encoding in character strings within the p-file; this does not appear to be standard across systems. Suggested patch attempts a few likely encoding candidates, before falling back on a permissive ascii encoding. --- spec2nii/GE/ge_pfile.py | 32 +++++++++++++++++++------------- spec2nii/GE/ge_read_pfile.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/spec2nii/GE/ge_pfile.py b/spec2nii/GE/ge_pfile.py index 098976f..4de2aa4 100644 --- a/spec2nii/GE/ge_pfile.py +++ b/spec2nii/GE/ge_pfile.py @@ -89,12 +89,15 @@ def _process_svs_pfile(pfile): :return: List of NIFTI MRS data objects :return: List of file name suffixes """ - psd = pfile.hdr.rhi_psdname.decode('utf-8').lower() - proto = pfile.hdr.rhs_se_desc.decode('utf-8').lower() + + assert(pfile.encoding is not None) # encoding should have been set in ge_read_pfile get_mapper + + psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower() + proto = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower() if psd == 'hbcd' and "press" in proto: print('\nPSD was: ', psd) print('Proto is: ', proto) - psd = pfile.hdr.rhs_se_desc.decode('utf-8').lower() + psd = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower() print('PSD updated to: ', psd) # MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path @@ -429,7 +432,10 @@ def _process_mrsi_pfile(pfile): :return: List of NIFTI MRS data objects :return: List of file name suffixes """ - psd = pfile.hdr.rhi_psdname.decode('utf-8').lower() + + assert(pfile.encoding is not None) # encoding should have been set in ge_read_pfile get_mapper + + psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower() known_formats = ('probe-p', 'probe-sl', 'slaser_cni', 'presscsi') if psd not in known_formats: @@ -573,37 +579,37 @@ def _populate_metadata(pfile, water_suppressed=True, data_dimensions=None): # 'Manufacturer' meta.set_standard_def('Manufacturer', 'GE') # 'ManufacturersModelName' - meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode('utf-8')) + meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode(pfile.encoding, errors='replace')) # 'DeviceSerialNumber' - meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode('utf-8')) + meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode(pfile.encoding, errors='replace')) # 'SoftwareVersions' - meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode('utf-8')) + meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode(pfile.encoding, errors='replace')) # 'InstitutionName' - meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode('utf-8')) + meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode(pfile.encoding, errors='replace')) # 'InstitutionAddress' # Not known # 'TxCoil' # Not Known # 'RxCoil' - meta.set_user_def(key='ReceiveCoilName', value=hdr.rhi_cname.decode('utf-8'), doc='Rx coil name.') + meta.set_user_def(key='ReceiveCoilName', value=hdr.rhi_cname.decode(pfile.encoding, errors='replace'), doc='Rx coil name.') # # 5.3 Sequence information # 'SequenceName' - meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode('utf-8')) + meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode(pfile.encoding, errors='replace')) # 'ProtocolName' - meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode('utf-8')) + meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode(pfile.encoding, errors='replace')) # # 5.4 Sequence information # 'PatientPosition' # Not known # 'PatientName' - meta.set_standard_def('PatientName', hdr.rhe_patname.decode('utf-8')) + meta.set_standard_def('PatientName', hdr.rhe_patname.decode(pfile.encoding, errors='replace')) # 'PatientID' # Not known # 'PatientWeight' # Not known # 'PatientDoB' - meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode('utf-8')) + meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode(pfile.encoding, errors='replace')) # 'PatientSex' if hdr.rhe_patsex == 1: sex_str = 'M' diff --git a/spec2nii/GE/ge_read_pfile.py b/spec2nii/GE/ge_read_pfile.py index d48a084..caf6811 100644 --- a/spec2nii/GE/ge_read_pfile.py +++ b/spec2nii/GE/ge_read_pfile.py @@ -124,6 +124,7 @@ def __init__(self, fname): self.hdr = None self.map = None self.endian = 'little' # def for version >= 11 + self.encoding = None self.read_header() @@ -176,10 +177,31 @@ def get_mapper(self): if self.hdr is None: return None - psd = self.hdr.rhi_psdname.decode('utf-8').lower() - proto = self.hdr.rhs_se_desc.decode('utf-8').lower() - if psd == 'hbcd' and "press" in proto: - psd = self.hdr.rhs_se_desc.decode('utf-8').lower() + # ARC 20241105 : utf-8 codec is not standard across systems; here, we try a + # couple of likely candidates, falling back on permissive ascii + + for encoding, errors in [ + ("utf-8", "strict"), + ("ISO-8859-1", "strict"), + ("ascii", "replace"), + ]: + try: + psd = self.hdr.rhi_psdname.decode(encoding, errors).lower() + proto = self.hdr.rhs_se_desc.decode(encoding, errors).lower() + + # the following is unused in this context, but can inform codec selection + _ = self.hdr.rhe_patname.decode(encoding, errors) + + if psd == "hbcd" and "press" in proto: + psd = self.hdr.rhs_se_desc.decode(encoding, errors).lower() + except UnicodeDecodeError as err: + psd = "" + proto = "" + continue + self.encoding = encoding + break + + assert(self.encoding is not None) # final codec must should have succeeded # MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path if psd.endswith('gaba'): From 97455bc3786c5ae017ff2904d91a7e86d0d89ea6 Mon Sep 17 00:00:00 2001 From: Alexander R Craven <12934018+alexcraven@users.noreply.github.com> Date: Tue, 5 Nov 2024 14:39:23 +0100 Subject: [PATCH 2/5] Fix lint errors in updated GE reader --- spec2nii/GE/ge_pfile.py | 10 +++++++--- spec2nii/GE/ge_read_pfile.py | 10 +++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/spec2nii/GE/ge_pfile.py b/spec2nii/GE/ge_pfile.py index 4de2aa4..4af6bd4 100644 --- a/spec2nii/GE/ge_pfile.py +++ b/spec2nii/GE/ge_pfile.py @@ -90,7 +90,7 @@ def _process_svs_pfile(pfile): :return: List of file name suffixes """ - assert(pfile.encoding is not None) # encoding should have been set in ge_read_pfile get_mapper + assert pfile.encoding is not None # encoding should have been set in ge_read_pfile get_mapper psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower() proto = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower() @@ -433,7 +433,7 @@ def _process_mrsi_pfile(pfile): :return: List of file name suffixes """ - assert(pfile.encoding is not None) # encoding should have been set in ge_read_pfile get_mapper + assert pfile.encoding is not None # encoding should have been set in ge_read_pfile get_mapper psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower() @@ -591,7 +591,11 @@ def _populate_metadata(pfile, water_suppressed=True, data_dimensions=None): # 'TxCoil' # Not Known # 'RxCoil' - meta.set_user_def(key='ReceiveCoilName', value=hdr.rhi_cname.decode(pfile.encoding, errors='replace'), doc='Rx coil name.') + meta.set_user_def( + key="ReceiveCoilName", + value=hdr.rhi_cname.decode(pfile.encoding, errors="replace"), + doc="Rx coil name.", + ) # # 5.3 Sequence information # 'SequenceName' diff --git a/spec2nii/GE/ge_read_pfile.py b/spec2nii/GE/ge_read_pfile.py index caf6811..b45b94a 100644 --- a/spec2nii/GE/ge_read_pfile.py +++ b/spec2nii/GE/ge_read_pfile.py @@ -194,14 +194,14 @@ def get_mapper(self): if psd == "hbcd" and "press" in proto: psd = self.hdr.rhs_se_desc.decode(encoding, errors).lower() - except UnicodeDecodeError as err: + except UnicodeDecodeError: psd = "" proto = "" continue self.encoding = encoding break - assert(self.encoding is not None) # final codec must should have succeeded + assert self.encoding is not None # final codec must should have succeeded # MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path if psd.endswith('gaba'): @@ -667,7 +667,7 @@ def get_dcos(self): dcos[0][0] = (self.hdr.rhi_trhc_R - self.hdr.rhi_tlhc_R) dcos[0][1] = (self.hdr.rhi_trhc_A - self.hdr.rhi_tlhc_A) - dcos[0][2] = (self.hdr.rhi_trhc_S - self.hdr.rhi_tlhc_S) + dcos[0][2] = (self.hdr.rhi_trhc_S - self.hdr.rhi_tlhc_S) dcosLengthX = np.sqrt(dcos[0][0] * dcos[0][0] + dcos[0][1] * dcos[0][1] @@ -679,7 +679,7 @@ def get_dcos(self): dcos[1][0] = (self.hdr.rhi_brhc_R - self.hdr.rhi_trhc_R) dcos[1][1] = (self.hdr.rhi_brhc_A - self.hdr.rhi_trhc_A) - dcos[1][2] = (self.hdr.rhi_brhc_S - self.hdr.rhi_trhc_S) + dcos[1][2] = (self.hdr.rhi_brhc_S - self.hdr.rhi_trhc_S) dcosLengthY = np.sqrt(dcos[1][0] * dcos[1][0] + dcos[1][1] * dcos[1][1] @@ -1008,7 +1008,7 @@ def read_data(self): numTimePts = self.get_num_time_points numSpecPts = self.hdr.rhr_rh_frame_size numFreqPts = numSpecPts - numComponents = 2 + numComponents = 2 dataWordSize = self.hdr.rhr_rh_point_size numBytesInVol = self.get_num_kspace_points * numSpecPts * numComponents * dataWordSize From d5b5b97eb2847cee0ecd90cf8c7b3173baddd14f Mon Sep 17 00:00:00 2001 From: Alexander R Craven <12934018+alexcraven@users.noreply.github.com> Date: Mon, 11 Nov 2024 14:58:47 +0100 Subject: [PATCH 3/5] Added non-English character tests to test_ge_pfile Corresponding test data https://github.com/user-attachments/files/17702724/GE_character_encoding_test_data.zip expected under spec2nii_test_data/ge/pFiles/PRESS/MR30.1 --- tests/test_ge_pfile.py | 92 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 10 deletions(-) diff --git a/tests/test_ge_pfile.py b/tests/test_ge_pfile.py index 4489409..dcaba00 100644 --- a/tests/test_ge_pfile.py +++ b/tests/test_ge_pfile.py @@ -35,8 +35,12 @@ # HBCD / ISTHMUS datasets hbcd2_path = ge_path / 'pFiles' / 'hbcd' / 'P31744.7' +# Test set from Bergen (MR30.1, non-English characters in header text) +bergen_press_301 = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30101.7' +bergen_press_301_non_english = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30104.7' -def test_svs(tmp_path): + +def testsvs(tmp_path): subprocess.check_call(['spec2nii', 'ge', '-f', 'svs', @@ -68,7 +72,7 @@ def test_svs(tmp_path): assert np.isclose(hdr_ext['RepetitionTime'], 2.0) -def test_mrsi(tmp_path): +def testmrsi(tmp_path): subprocess.check_call(['spec2nii', 'ge', '-f', 'mrsi', @@ -90,7 +94,7 @@ def test_mrsi(tmp_path): assert hdr_ext['OriginalFile'][0] == mrsi_path.name -def test_mpress(tmp_path): +def testmpress(tmp_path): subprocess.check_call(['spec2nii', 'ge', '-f', 'mpress', @@ -123,7 +127,7 @@ def test_mpress(tmp_path): assert np.isclose(hdr_ext['RepetitionTime'], 2.0) -def test_mm_hercules(tmp_path): +def testmm_hercules(tmp_path): for path in mm_herc: subprocess.run([ 'spec2nii', 'ge', @@ -160,7 +164,7 @@ def test_mm_hercules(tmp_path): assert hdr_ext_ref['dim_7'] == 'DIM_EDIT' -def test_mm_hermes(tmp_path): +def testmm_hermes(tmp_path): for path in mm_hermes: subprocess.run([ 'spec2nii', 'ge', @@ -196,7 +200,7 @@ def test_mm_hermes(tmp_path): assert hdr_ext_ref['dim_7'] == 'DIM_EDIT' -def test_mm_mega(tmp_path): +def testmm_mega(tmp_path): for path in mm_mega.rglob('*.7'): subprocess.run([ 'spec2nii', 'ge', @@ -238,7 +242,7 @@ def test_mm_mega(tmp_path): assert hdr_ext_ref['dim_7'] == 'DIM_EDIT' -def test_mm_press(tmp_path): +def testmm_press(tmp_path): for path in mm_press.rglob('*.7'): subprocess.run([ 'spec2nii', 'ge', @@ -273,7 +277,7 @@ def test_mm_press(tmp_path): assert hdr_ext_ref['dim_6'] == 'DIM_DYN' -def test_mm_press_noid(tmp_path): +def testmm_press_noid(tmp_path): subprocess.run([ 'spec2nii', 'ge', '-f', mm_press_noid.stem, @@ -300,7 +304,7 @@ def test_mm_press_noid(tmp_path): assert hdr_ext_ref['dim_5'] == 'DIM_COIL' -def test_mm_slaser(tmp_path): +def testmm_slaser(tmp_path): subprocess.run([ 'spec2nii', 'ge', '-f', mm_slaser.stem, @@ -327,7 +331,7 @@ def test_mm_slaser(tmp_path): assert hdr_ext_ref['dim_5'] == 'DIM_COIL' -def test_hbcd_isthmus(tmp_path): +def testhbcd_isthmus(tmp_path): subprocess.run([ 'spec2nii', 'ge', '-f', 'hbcd', @@ -355,3 +359,71 @@ def test_hbcd_isthmus(tmp_path): img = NIFTI_MRS(tmp_path / 'hbcd_short_te.nii.gz') assert img.shape == (1, 1, 1, 2048, 32, 8) assert img.dim_tags == ['DIM_DYN', 'DIM_COIL', None] + + +def testsvs_bergen_301(tmp_path): + + subprocess.check_call(['spec2nii', 'ge', + '-f', 'svs', + '-o', tmp_path, + '-j', + str(bergen_press_301)]) + + img, hdr_ext = read_nifti_mrs_with_hdr(tmp_path / 'svs.nii.gz') + img_ref, hdr_ext_ref = read_nifti_mrs_with_hdr(tmp_path / 'svs_ref.nii.gz') + + assert img.shape == (1, 1, 1, 4096, 48, 2) + assert np.iscomplexobj(img.dataobj) + assert 1 / img.header['pixdim'][4] == 5000.0 + assert hdr_ext['WaterSuppressed'] + + assert img_ref.shape == (1, 1, 1, 4096, 48, 2) + assert np.iscomplexobj(img_ref.dataobj) + assert 1 / img_ref.header['pixdim'][4] == 5000.0 + assert not hdr_ext_ref['WaterSuppressed'] + + assert hdr_ext['dim_5'] == 'DIM_COIL' + assert hdr_ext['dim_6'] == 'DIM_DYN' + assert np.isclose(127.7, hdr_ext['SpectrometerFrequency'][0], atol=1E-1) + assert hdr_ext['ResonantNucleus'][0] == '1H' + + assert np.isclose(hdr_ext['EchoTime'], 0.03) + assert np.isclose(hdr_ext['RepetitionTime'], 2.0) + + assert hdr_ext['PatientName'] == 'fantom' + assert hdr_ext['SequenceName'] == 'PROBE-P' + assert hdr_ext['ProtocolName'] == 'PROBE-P' + + +def testsvs_bergen_301_non_english(tmp_path): + + subprocess.check_call(['spec2nii', 'ge', + '-f', 'svs', + '-o', tmp_path, + '-j', + str(bergen_press_301_non_english)]) + + img, hdr_ext = read_nifti_mrs_with_hdr(tmp_path / 'svs.nii.gz') + img_ref, hdr_ext_ref = read_nifti_mrs_with_hdr(tmp_path / 'svs_ref.nii.gz') + + assert img.shape == (1, 1, 1, 4096, 48, 2) + assert np.iscomplexobj(img.dataobj) + assert 1 / img.header['pixdim'][4] == 5000.0 + assert hdr_ext['WaterSuppressed'] + + assert img_ref.shape == (1, 1, 1, 4096, 48, 2) + assert np.iscomplexobj(img_ref.dataobj) + assert 1 / img_ref.header['pixdim'][4] == 5000.0 + assert not hdr_ext_ref['WaterSuppressed'] + + assert hdr_ext['dim_5'] == 'DIM_COIL' + assert hdr_ext['dim_6'] == 'DIM_DYN' + assert np.isclose(127.7, hdr_ext['SpectrometerFrequency'][0], atol=1E-1) + assert hdr_ext['ResonantNucleus'][0] == '1H' + + assert np.isclose(hdr_ext['EchoTime'], 0.03) + assert np.isclose(hdr_ext['RepetitionTime'], 2.0) + + assert hdr_ext['PatientName'] == 'fantom^prøve' + assert hdr_ext['SequenceName'] == 'PROBE-P' + assert hdr_ext['ProtocolName'] == 'PROBE-P åøæäöÅØÆÄÖ' From 391cfc544db1a6d71d3a94966f8035878d5a5c39 Mon Sep 17 00:00:00 2001 From: wtclarke Date: Mon, 11 Nov 2024 14:33:16 +0000 Subject: [PATCH 4/5] Update submodule for new test data. --- tests/spec2nii_test_data | 2 +- tests/test_ge_pfile.py | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/spec2nii_test_data b/tests/spec2nii_test_data index 1594c26..287fb54 160000 --- a/tests/spec2nii_test_data +++ b/tests/spec2nii_test_data @@ -1 +1 @@ -Subproject commit 1594c2625a53a877670f9dd0492c0e1b6f3471d5 +Subproject commit 287fb5415fbffb4bd11cee3925ba85b6f280f1d3 diff --git a/tests/test_ge_pfile.py b/tests/test_ge_pfile.py index dcaba00..c0c22bd 100644 --- a/tests/test_ge_pfile.py +++ b/tests/test_ge_pfile.py @@ -40,7 +40,7 @@ bergen_press_301_non_english = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30104.7' -def testsvs(tmp_path): +def test_svs(tmp_path): subprocess.check_call(['spec2nii', 'ge', '-f', 'svs', @@ -72,7 +72,7 @@ def testsvs(tmp_path): assert np.isclose(hdr_ext['RepetitionTime'], 2.0) -def testmrsi(tmp_path): +def test_mrsi(tmp_path): subprocess.check_call(['spec2nii', 'ge', '-f', 'mrsi', @@ -94,7 +94,7 @@ def testmrsi(tmp_path): assert hdr_ext['OriginalFile'][0] == mrsi_path.name -def testmpress(tmp_path): +def test_mpress(tmp_path): subprocess.check_call(['spec2nii', 'ge', '-f', 'mpress', @@ -127,7 +127,7 @@ def testmpress(tmp_path): assert np.isclose(hdr_ext['RepetitionTime'], 2.0) -def testmm_hercules(tmp_path): +def test_mm_hercules(tmp_path): for path in mm_herc: subprocess.run([ 'spec2nii', 'ge', @@ -164,7 +164,7 @@ def testmm_hercules(tmp_path): assert hdr_ext_ref['dim_7'] == 'DIM_EDIT' -def testmm_hermes(tmp_path): +def test_mm_hermes(tmp_path): for path in mm_hermes: subprocess.run([ 'spec2nii', 'ge', @@ -200,7 +200,7 @@ def testmm_hermes(tmp_path): assert hdr_ext_ref['dim_7'] == 'DIM_EDIT' -def testmm_mega(tmp_path): +def test_mm_mega(tmp_path): for path in mm_mega.rglob('*.7'): subprocess.run([ 'spec2nii', 'ge', @@ -242,7 +242,7 @@ def testmm_mega(tmp_path): assert hdr_ext_ref['dim_7'] == 'DIM_EDIT' -def testmm_press(tmp_path): +def test_mm_press(tmp_path): for path in mm_press.rglob('*.7'): subprocess.run([ 'spec2nii', 'ge', @@ -277,7 +277,7 @@ def testmm_press(tmp_path): assert hdr_ext_ref['dim_6'] == 'DIM_DYN' -def testmm_press_noid(tmp_path): +def test_mm_press_noid(tmp_path): subprocess.run([ 'spec2nii', 'ge', '-f', mm_press_noid.stem, @@ -304,7 +304,7 @@ def testmm_press_noid(tmp_path): assert hdr_ext_ref['dim_5'] == 'DIM_COIL' -def testmm_slaser(tmp_path): +def test_mm_slaser(tmp_path): subprocess.run([ 'spec2nii', 'ge', '-f', mm_slaser.stem, @@ -331,7 +331,7 @@ def testmm_slaser(tmp_path): assert hdr_ext_ref['dim_5'] == 'DIM_COIL' -def testhbcd_isthmus(tmp_path): +def test_hbcd_isthmus(tmp_path): subprocess.run([ 'spec2nii', 'ge', '-f', 'hbcd', @@ -361,7 +361,7 @@ def testhbcd_isthmus(tmp_path): assert img.dim_tags == ['DIM_DYN', 'DIM_COIL', None] -def testsvs_bergen_301(tmp_path): +def test_svs_bergen_301(tmp_path): subprocess.check_call(['spec2nii', 'ge', '-f', 'svs', @@ -395,7 +395,7 @@ def testsvs_bergen_301(tmp_path): assert hdr_ext['ProtocolName'] == 'PROBE-P' -def testsvs_bergen_301_non_english(tmp_path): +def test_svs_bergen_301_non_english(tmp_path): subprocess.check_call(['spec2nii', 'ge', '-f', 'svs', From 494169b1a306b74dd7f8fe336216edcc21000a90 Mon Sep 17 00:00:00 2001 From: wtclarke Date: Mon, 11 Nov 2024 14:48:25 +0000 Subject: [PATCH 5/5] Fix directorys tructure. --- tests/spec2nii_test_data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/spec2nii_test_data b/tests/spec2nii_test_data index 287fb54..088e8f1 160000 --- a/tests/spec2nii_test_data +++ b/tests/spec2nii_test_data @@ -1 +1 @@ -Subproject commit 287fb5415fbffb4bd11cee3925ba85b6f280f1d3 +Subproject commit 088e8f1646f839ad2c75eacfdd2a8d6ae7707ade