Skip to content

Commit

Permalink
GE P-file reader: adaptive character encoding (#156)
Browse files Browse the repository at this point in the history
* GE P-file reader: adaptive character encoding

`ge_read_pfile` and `ge_pfile` assumed utf-8 encoding in character strings within the p-file; this does not appear to be standard across systems. Suggested patch attempts a few likely encoding candidates, before falling back on a permissive ascii encoding.

* Fix lint errors in updated GE reader

* Added non-English character tests to test_ge_pfile

Corresponding test data https://github.com/user-attachments/files/17702724/GE_character_encoding_test_data.zip expected under spec2nii_test_data/ge/pFiles/PRESS/MR30.1

* Update submodule for new test data.

* Fix directorys tructure.

---------

Co-authored-by: wtclarke <william.clarke@ndcn.ox.ac.uk>
  • Loading branch information
alexcraven and wtclarke authored Nov 11, 2024
1 parent 3ba4b3c commit 8a9430e
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 21 deletions.
36 changes: 23 additions & 13 deletions spec2nii/GE/ge_pfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,15 @@ def _process_svs_pfile(pfile):
:return: List of NIFTI MRS data objects
:return: List of file name suffixes
"""
psd = pfile.hdr.rhi_psdname.decode('utf-8').lower()
proto = pfile.hdr.rhs_se_desc.decode('utf-8').lower()

assert pfile.encoding is not None # encoding should have been set in ge_read_pfile get_mapper

psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower()
proto = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower()
if psd == 'hbcd' and "press" in proto:
print('\nPSD was: ', psd)
print('Proto is: ', proto)
psd = pfile.hdr.rhs_se_desc.decode('utf-8').lower()
psd = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower()
print('PSD updated to: ', psd)

# MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path
Expand Down Expand Up @@ -429,7 +432,10 @@ def _process_mrsi_pfile(pfile):
:return: List of NIFTI MRS data objects
:return: List of file name suffixes
"""
psd = pfile.hdr.rhi_psdname.decode('utf-8').lower()

assert pfile.encoding is not None # encoding should have been set in ge_read_pfile get_mapper

psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower()

known_formats = ('probe-p', 'probe-sl', 'slaser_cni', 'presscsi')
if psd not in known_formats:
Expand Down Expand Up @@ -573,37 +579,41 @@ def _populate_metadata(pfile, water_suppressed=True, data_dimensions=None):
# 'Manufacturer'
meta.set_standard_def('Manufacturer', 'GE')
# 'ManufacturersModelName'
meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode('utf-8'))
meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode(pfile.encoding, errors='replace'))
# 'DeviceSerialNumber'
meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode('utf-8'))
meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode(pfile.encoding, errors='replace'))
# 'SoftwareVersions'
meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode('utf-8'))
meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode(pfile.encoding, errors='replace'))
# 'InstitutionName'
meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode('utf-8'))
meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode(pfile.encoding, errors='replace'))
# 'InstitutionAddress'
# Not known
# 'TxCoil'
# Not Known
# 'RxCoil'
meta.set_user_def(key='ReceiveCoilName', value=hdr.rhi_cname.decode('utf-8'), doc='Rx coil name.')
meta.set_user_def(
key="ReceiveCoilName",
value=hdr.rhi_cname.decode(pfile.encoding, errors="replace"),
doc="Rx coil name.",
)

# # 5.3 Sequence information
# 'SequenceName'
meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode('utf-8'))
meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode(pfile.encoding, errors='replace'))
# 'ProtocolName'
meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode('utf-8'))
meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode(pfile.encoding, errors='replace'))

# # 5.4 Sequence information
# 'PatientPosition'
# Not known
# 'PatientName'
meta.set_standard_def('PatientName', hdr.rhe_patname.decode('utf-8'))
meta.set_standard_def('PatientName', hdr.rhe_patname.decode(pfile.encoding, errors='replace'))
# 'PatientID'
# Not known
# 'PatientWeight'
# Not known
# 'PatientDoB'
meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode('utf-8'))
meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode(pfile.encoding, errors='replace'))
# 'PatientSex'
if hdr.rhe_patsex == 1:
sex_str = 'M'
Expand Down
36 changes: 29 additions & 7 deletions spec2nii/GE/ge_read_pfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def __init__(self, fname):
self.hdr = None
self.map = None
self.endian = 'little' # def for version >= 11
self.encoding = None

self.read_header()

Expand Down Expand Up @@ -176,10 +177,31 @@ def get_mapper(self):
if self.hdr is None:
return None

psd = self.hdr.rhi_psdname.decode('utf-8').lower()
proto = self.hdr.rhs_se_desc.decode('utf-8').lower()
if psd == 'hbcd' and "press" in proto:
psd = self.hdr.rhs_se_desc.decode('utf-8').lower()
# ARC 20241105 : utf-8 codec is not standard across systems; here, we try a
# couple of likely candidates, falling back on permissive ascii

for encoding, errors in [
("utf-8", "strict"),
("ISO-8859-1", "strict"),
("ascii", "replace"),
]:
try:
psd = self.hdr.rhi_psdname.decode(encoding, errors).lower()
proto = self.hdr.rhs_se_desc.decode(encoding, errors).lower()

# the following is unused in this context, but can inform codec selection
_ = self.hdr.rhe_patname.decode(encoding, errors)

if psd == "hbcd" and "press" in proto:
psd = self.hdr.rhs_se_desc.decode(encoding, errors).lower()
except UnicodeDecodeError:
psd = ""
proto = ""
continue
self.encoding = encoding
break

assert self.encoding is not None # final codec must should have succeeded

# MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path
if psd.endswith('gaba'):
Expand Down Expand Up @@ -645,7 +667,7 @@ def get_dcos(self):

dcos[0][0] = (self.hdr.rhi_trhc_R - self.hdr.rhi_tlhc_R)
dcos[0][1] = (self.hdr.rhi_trhc_A - self.hdr.rhi_tlhc_A)
dcos[0][2] = (self.hdr.rhi_trhc_S - self.hdr.rhi_tlhc_S)
dcos[0][2] = (self.hdr.rhi_trhc_S - self.hdr.rhi_tlhc_S)

dcosLengthX = np.sqrt(dcos[0][0] * dcos[0][0]
+ dcos[0][1] * dcos[0][1]
Expand All @@ -657,7 +679,7 @@ def get_dcos(self):

dcos[1][0] = (self.hdr.rhi_brhc_R - self.hdr.rhi_trhc_R)
dcos[1][1] = (self.hdr.rhi_brhc_A - self.hdr.rhi_trhc_A)
dcos[1][2] = (self.hdr.rhi_brhc_S - self.hdr.rhi_trhc_S)
dcos[1][2] = (self.hdr.rhi_brhc_S - self.hdr.rhi_trhc_S)

dcosLengthY = np.sqrt(dcos[1][0] * dcos[1][0]
+ dcos[1][1] * dcos[1][1]
Expand Down Expand Up @@ -986,7 +1008,7 @@ def read_data(self):
numTimePts = self.get_num_time_points
numSpecPts = self.hdr.rhr_rh_frame_size
numFreqPts = numSpecPts
numComponents = 2
numComponents = 2
dataWordSize = self.hdr.rhr_rh_point_size

numBytesInVol = self.get_num_kspace_points * numSpecPts * numComponents * dataWordSize
Expand Down
2 changes: 1 addition & 1 deletion tests/spec2nii_test_data
Submodule spec2nii_test_data updated from 1594c2 to 088e8f
72 changes: 72 additions & 0 deletions tests/test_ge_pfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
# HBCD / ISTHMUS datasets
hbcd2_path = ge_path / 'pFiles' / 'hbcd' / 'P31744.7'

# Test set from Bergen (MR30.1, non-English characters in header text)
bergen_press_301 = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30101.7'
bergen_press_301_non_english = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30104.7'


def test_svs(tmp_path):

Expand Down Expand Up @@ -355,3 +359,71 @@ def test_hbcd_isthmus(tmp_path):
img = NIFTI_MRS(tmp_path / 'hbcd_short_te.nii.gz')
assert img.shape == (1, 1, 1, 2048, 32, 8)
assert img.dim_tags == ['DIM_DYN', 'DIM_COIL', None]


def test_svs_bergen_301(tmp_path):

subprocess.check_call(['spec2nii', 'ge',
'-f', 'svs',
'-o', tmp_path,
'-j',
str(bergen_press_301)])

img, hdr_ext = read_nifti_mrs_with_hdr(tmp_path / 'svs.nii.gz')
img_ref, hdr_ext_ref = read_nifti_mrs_with_hdr(tmp_path / 'svs_ref.nii.gz')

assert img.shape == (1, 1, 1, 4096, 48, 2)
assert np.iscomplexobj(img.dataobj)
assert 1 / img.header['pixdim'][4] == 5000.0
assert hdr_ext['WaterSuppressed']

assert img_ref.shape == (1, 1, 1, 4096, 48, 2)
assert np.iscomplexobj(img_ref.dataobj)
assert 1 / img_ref.header['pixdim'][4] == 5000.0
assert not hdr_ext_ref['WaterSuppressed']

assert hdr_ext['dim_5'] == 'DIM_COIL'
assert hdr_ext['dim_6'] == 'DIM_DYN'
assert np.isclose(127.7, hdr_ext['SpectrometerFrequency'][0], atol=1E-1)
assert hdr_ext['ResonantNucleus'][0] == '1H'

assert np.isclose(hdr_ext['EchoTime'], 0.03)
assert np.isclose(hdr_ext['RepetitionTime'], 2.0)

assert hdr_ext['PatientName'] == 'fantom'
assert hdr_ext['SequenceName'] == 'PROBE-P'
assert hdr_ext['ProtocolName'] == 'PROBE-P'


def test_svs_bergen_301_non_english(tmp_path):

subprocess.check_call(['spec2nii', 'ge',
'-f', 'svs',
'-o', tmp_path,
'-j',
str(bergen_press_301_non_english)])

img, hdr_ext = read_nifti_mrs_with_hdr(tmp_path / 'svs.nii.gz')
img_ref, hdr_ext_ref = read_nifti_mrs_with_hdr(tmp_path / 'svs_ref.nii.gz')

assert img.shape == (1, 1, 1, 4096, 48, 2)
assert np.iscomplexobj(img.dataobj)
assert 1 / img.header['pixdim'][4] == 5000.0
assert hdr_ext['WaterSuppressed']

assert img_ref.shape == (1, 1, 1, 4096, 48, 2)
assert np.iscomplexobj(img_ref.dataobj)
assert 1 / img_ref.header['pixdim'][4] == 5000.0
assert not hdr_ext_ref['WaterSuppressed']

assert hdr_ext['dim_5'] == 'DIM_COIL'
assert hdr_ext['dim_6'] == 'DIM_DYN'
assert np.isclose(127.7, hdr_ext['SpectrometerFrequency'][0], atol=1E-1)
assert hdr_ext['ResonantNucleus'][0] == '1H'

assert np.isclose(hdr_ext['EchoTime'], 0.03)
assert np.isclose(hdr_ext['RepetitionTime'], 2.0)

assert hdr_ext['PatientName'] == 'fantom^prøve'
assert hdr_ext['SequenceName'] == 'PROBE-P'
assert hdr_ext['ProtocolName'] == 'PROBE-P åøæäöÅØÆÄÖ'

0 comments on commit 8a9430e

Please sign in to comment.