Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GE P-file reader: adaptive character encoding #156

Merged
merged 5 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions spec2nii/GE/ge_pfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,15 @@ def _process_svs_pfile(pfile):
:return: List of NIFTI MRS data objects
:return: List of file name suffixes
"""
psd = pfile.hdr.rhi_psdname.decode('utf-8').lower()
proto = pfile.hdr.rhs_se_desc.decode('utf-8').lower()

assert pfile.encoding is not None # encoding should have been set in ge_read_pfile get_mapper

psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower()
proto = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower()
if psd == 'hbcd' and "press" in proto:
print('\nPSD was: ', psd)
print('Proto is: ', proto)
psd = pfile.hdr.rhs_se_desc.decode('utf-8').lower()
psd = pfile.hdr.rhs_se_desc.decode(pfile.encoding, errors='replace').lower()
print('PSD updated to: ', psd)

# MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path
Expand Down Expand Up @@ -429,7 +432,10 @@ def _process_mrsi_pfile(pfile):
:return: List of NIFTI MRS data objects
:return: List of file name suffixes
"""
psd = pfile.hdr.rhi_psdname.decode('utf-8').lower()

assert pfile.encoding is not None # encoding should have been set in ge_read_pfile get_mapper

psd = pfile.hdr.rhi_psdname.decode(pfile.encoding, errors='replace').lower()

known_formats = ('probe-p', 'probe-sl', 'slaser_cni', 'presscsi')
if psd not in known_formats:
Expand Down Expand Up @@ -573,37 +579,41 @@ def _populate_metadata(pfile, water_suppressed=True, data_dimensions=None):
# 'Manufacturer'
meta.set_standard_def('Manufacturer', 'GE')
# 'ManufacturersModelName'
meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode('utf-8'))
meta.set_standard_def('ManufacturersModelName', hdr.rhe_ex_sysid.decode(pfile.encoding, errors='replace'))
# 'DeviceSerialNumber'
meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode('utf-8'))
meta.set_standard_def('DeviceSerialNumber', hdr.rhe_uniq_sys_id.decode(pfile.encoding, errors='replace'))
# 'SoftwareVersions'
meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode('utf-8'))
meta.set_standard_def('SoftwareVersions', hdr.rhe_ex_verscre.decode(pfile.encoding, errors='replace'))
# 'InstitutionName'
meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode('utf-8'))
meta.set_standard_def('InstitutionName', hdr.rhe_hospname.decode(pfile.encoding, errors='replace'))
# 'InstitutionAddress'
# Not known
# 'TxCoil'
# Not Known
# 'RxCoil'
meta.set_user_def(key='ReceiveCoilName', value=hdr.rhi_cname.decode('utf-8'), doc='Rx coil name.')
meta.set_user_def(
key="ReceiveCoilName",
value=hdr.rhi_cname.decode(pfile.encoding, errors="replace"),
doc="Rx coil name.",
)

# # 5.3 Sequence information
# 'SequenceName'
meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode('utf-8'))
meta.set_standard_def('SequenceName', hdr.rhi_psdname.decode(pfile.encoding, errors='replace'))
# 'ProtocolName'
meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode('utf-8'))
meta.set_standard_def('ProtocolName', hdr.rhs_se_desc.decode(pfile.encoding, errors='replace'))

# # 5.4 Sequence information
# 'PatientPosition'
# Not known
# 'PatientName'
meta.set_standard_def('PatientName', hdr.rhe_patname.decode('utf-8'))
meta.set_standard_def('PatientName', hdr.rhe_patname.decode(pfile.encoding, errors='replace'))
# 'PatientID'
# Not known
# 'PatientWeight'
# Not known
# 'PatientDoB'
meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode('utf-8'))
meta.set_standard_def('PatientDoB', hdr.rhe_dateofbirth.decode(pfile.encoding, errors='replace'))
# 'PatientSex'
if hdr.rhe_patsex == 1:
sex_str = 'M'
Expand Down
36 changes: 29 additions & 7 deletions spec2nii/GE/ge_read_pfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def __init__(self, fname):
self.hdr = None
self.map = None
self.endian = 'little' # def for version >= 11
self.encoding = None

self.read_header()

Expand Down Expand Up @@ -176,10 +177,31 @@ def get_mapper(self):
if self.hdr is None:
return None

psd = self.hdr.rhi_psdname.decode('utf-8').lower()
proto = self.hdr.rhs_se_desc.decode('utf-8').lower()
if psd == 'hbcd' and "press" in proto:
psd = self.hdr.rhs_se_desc.decode('utf-8').lower()
# ARC 20241105 : utf-8 codec is not standard across systems; here, we try a
# couple of likely candidates, falling back on permissive ascii

for encoding, errors in [
("utf-8", "strict"),
("ISO-8859-1", "strict"),
("ascii", "replace"),
]:
try:
psd = self.hdr.rhi_psdname.decode(encoding, errors).lower()
proto = self.hdr.rhs_se_desc.decode(encoding, errors).lower()

# the following is unused in this context, but can inform codec selection
_ = self.hdr.rhe_patname.decode(encoding, errors)

if psd == "hbcd" and "press" in proto:
psd = self.hdr.rhs_se_desc.decode(encoding, errors).lower()
except UnicodeDecodeError:
psd = ""
proto = ""
continue
self.encoding = encoding
break

assert self.encoding is not None # final codec must should have succeeded

# MM: Some 'gaba' psd strings contain full path names, so truncate to the end of the path
if psd.endswith('gaba'):
Expand Down Expand Up @@ -645,7 +667,7 @@ def get_dcos(self):

dcos[0][0] = (self.hdr.rhi_trhc_R - self.hdr.rhi_tlhc_R)
dcos[0][1] = (self.hdr.rhi_trhc_A - self.hdr.rhi_tlhc_A)
dcos[0][2] = (self.hdr.rhi_trhc_S - self.hdr.rhi_tlhc_S)
dcos[0][2] = (self.hdr.rhi_trhc_S - self.hdr.rhi_tlhc_S)

dcosLengthX = np.sqrt(dcos[0][0] * dcos[0][0]
+ dcos[0][1] * dcos[0][1]
Expand All @@ -657,7 +679,7 @@ def get_dcos(self):

dcos[1][0] = (self.hdr.rhi_brhc_R - self.hdr.rhi_trhc_R)
dcos[1][1] = (self.hdr.rhi_brhc_A - self.hdr.rhi_trhc_A)
dcos[1][2] = (self.hdr.rhi_brhc_S - self.hdr.rhi_trhc_S)
dcos[1][2] = (self.hdr.rhi_brhc_S - self.hdr.rhi_trhc_S)

dcosLengthY = np.sqrt(dcos[1][0] * dcos[1][0]
+ dcos[1][1] * dcos[1][1]
Expand Down Expand Up @@ -986,7 +1008,7 @@ def read_data(self):
numTimePts = self.get_num_time_points
numSpecPts = self.hdr.rhr_rh_frame_size
numFreqPts = numSpecPts
numComponents = 2
numComponents = 2
dataWordSize = self.hdr.rhr_rh_point_size

numBytesInVol = self.get_num_kspace_points * numSpecPts * numComponents * dataWordSize
Expand Down
2 changes: 1 addition & 1 deletion tests/spec2nii_test_data
Submodule spec2nii_test_data updated from 1594c2 to 088e8f
72 changes: 72 additions & 0 deletions tests/test_ge_pfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
# HBCD / ISTHMUS datasets
hbcd2_path = ge_path / 'pFiles' / 'hbcd' / 'P31744.7'

# Test set from Bergen (MR30.1, non-English characters in header text)
bergen_press_301 = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30101.7'
bergen_press_301_non_english = ge_path / 'pFiles' / 'PRESS' / 'MR30.1' / 'P30104.7'


def test_svs(tmp_path):

Expand Down Expand Up @@ -355,3 +359,71 @@ def test_hbcd_isthmus(tmp_path):
img = NIFTI_MRS(tmp_path / 'hbcd_short_te.nii.gz')
assert img.shape == (1, 1, 1, 2048, 32, 8)
assert img.dim_tags == ['DIM_DYN', 'DIM_COIL', None]


def test_svs_bergen_301(tmp_path):

subprocess.check_call(['spec2nii', 'ge',
'-f', 'svs',
'-o', tmp_path,
'-j',
str(bergen_press_301)])

img, hdr_ext = read_nifti_mrs_with_hdr(tmp_path / 'svs.nii.gz')
img_ref, hdr_ext_ref = read_nifti_mrs_with_hdr(tmp_path / 'svs_ref.nii.gz')

assert img.shape == (1, 1, 1, 4096, 48, 2)
assert np.iscomplexobj(img.dataobj)
assert 1 / img.header['pixdim'][4] == 5000.0
assert hdr_ext['WaterSuppressed']

assert img_ref.shape == (1, 1, 1, 4096, 48, 2)
assert np.iscomplexobj(img_ref.dataobj)
assert 1 / img_ref.header['pixdim'][4] == 5000.0
assert not hdr_ext_ref['WaterSuppressed']

assert hdr_ext['dim_5'] == 'DIM_COIL'
assert hdr_ext['dim_6'] == 'DIM_DYN'
assert np.isclose(127.7, hdr_ext['SpectrometerFrequency'][0], atol=1E-1)
assert hdr_ext['ResonantNucleus'][0] == '1H'

assert np.isclose(hdr_ext['EchoTime'], 0.03)
assert np.isclose(hdr_ext['RepetitionTime'], 2.0)

assert hdr_ext['PatientName'] == 'fantom'
assert hdr_ext['SequenceName'] == 'PROBE-P'
assert hdr_ext['ProtocolName'] == 'PROBE-P'


def test_svs_bergen_301_non_english(tmp_path):

subprocess.check_call(['spec2nii', 'ge',
'-f', 'svs',
'-o', tmp_path,
'-j',
str(bergen_press_301_non_english)])

img, hdr_ext = read_nifti_mrs_with_hdr(tmp_path / 'svs.nii.gz')
img_ref, hdr_ext_ref = read_nifti_mrs_with_hdr(tmp_path / 'svs_ref.nii.gz')

assert img.shape == (1, 1, 1, 4096, 48, 2)
assert np.iscomplexobj(img.dataobj)
assert 1 / img.header['pixdim'][4] == 5000.0
assert hdr_ext['WaterSuppressed']

assert img_ref.shape == (1, 1, 1, 4096, 48, 2)
assert np.iscomplexobj(img_ref.dataobj)
assert 1 / img_ref.header['pixdim'][4] == 5000.0
assert not hdr_ext_ref['WaterSuppressed']

assert hdr_ext['dim_5'] == 'DIM_COIL'
assert hdr_ext['dim_6'] == 'DIM_DYN'
assert np.isclose(127.7, hdr_ext['SpectrometerFrequency'][0], atol=1E-1)
assert hdr_ext['ResonantNucleus'][0] == '1H'

assert np.isclose(hdr_ext['EchoTime'], 0.03)
assert np.isclose(hdr_ext['RepetitionTime'], 2.0)

assert hdr_ext['PatientName'] == 'fantom^prøve'
assert hdr_ext['SequenceName'] == 'PROBE-P'
assert hdr_ext['ProtocolName'] == 'PROBE-P åøæäöÅØÆÄÖ'
Loading