Skip to content

Commit

Permalink
Documentation + bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ddooley committed Sep 5, 2024
1 parent 1858054 commit 654f11d
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 44 deletions.
35 changes: 27 additions & 8 deletions script/dh-validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,16 +419,35 @@ def getLinkMLTransform(SCHEMA, template, row_data):
if slot['multivalued'] == True:
output_val = [x.strip() for x in re.split(DELIMITERS, output_val)];

# If key isn't in snake_case, then convert it to that, since
# linkml-validate insists on that:

# ISSUE: () preserved rather than eliminated
# ISSUE: diagnostic pcr Ct value 1 not transformed to diagnostic_pcr_ct_value_1
# ISSUE: geo_loc_name_(state_province_territory) not transformed to geo_loc_name_(state_province_territory)
# ISSUE: NML submitted specimen type not transformed to nml_submitted_specimen_type
# ISSUE: specimen collector sample ID not transformed to specimen_collector_sample_id

# This relabling of key helps
key = re.sub("[-]","",re.sub("[ /]","_", key)); #LinkML doesn't convert to lowercase
# ISSUE:


# For validation, LinkML will transform both schema and slot labels into
# what it considers are standardized names, so we have to anticipate what
# new slot label will be via search and replace. Convert keys to
# **snake_case** since linkml-validate insists on that. However:
# - Forward slashes and parentheses are preserved though this is
# nonstandard, so:
# "geo_loc name (state/province/territory)"
# is changed to
# "geo_loc_name_(state/province/territory)"
# - Case is preserved though that is non-standard. So
# "specimen collector sample ID"
# is changed to
# "specimen_collector_sample_ID"
#
# - Validating caps CamelCase Enums is hard, e.g. if an Enum is named
# "geo_loc_name (state/province/territory) menu"
# LinkML will automatically rename this to
# "GeoLocName(state/province/territory)Menu"
# However, it doesn't update the name in slot range expressions!
# Hence these must be renamed in source schema.

The GeoLocName(state/province/territory)Menu
key = re.sub("[-]","",re.sub("[ ]","_", key)); # Accepts ()/ in field name.
data[key] = output_val;

return data;
Expand Down
80 changes: 44 additions & 36 deletions script/tabular_to_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,19 +149,37 @@ def set_range(slot, slot_range, slot_range_2):
def set_min_max(slot, slot_minimum_value, slot_maximum_value):

if slot_minimum_value > '':
if slot_minimum_value.isnumeric():
if isInteger(slot_minimum_value):
slot['minimum_value'] = int(slot_minimum_value);
elif isDecimal(slot_minimum_value):
slot['minimum_value'] = float(slot_minimum_value);
else:
slot['todos'] = ['>=' + slot_minimum_value];
if slot_maximum_value > '':
if slot_maximum_value.isnumeric():
if isInteger(slot_maximum_value):
slot['maximum_value'] = int(slot_maximum_value);
elif isDecimal(slot_maximum_value):
slot['maximum_value'] = float(slot_maximum_value);
else:
if slot['todos']:
slot['todos'].append('<=' + slot_maximum_value);
else:
slot['todos'] = ['<=' + slot_maximum_value];

def isDecimal(x):
try:
float(x);
return True
except ValueError:
return False

def isInteger(x):
try:
int(x)
except ValueError:
return False
else:
return True

def set_classes(schema_slot_path, schema, locale_schemas, export_format, warnings):

Expand Down Expand Up @@ -356,9 +374,8 @@ def set_enums(enum_path, schema, locale_schemas, export_format, warnings):
reader = csv.DictReader(tsvfile, dialect='excel-tab');

enumerations = schema['enums'];

name = ''; # running title for chunks of enumeration rows
menu_path = [];
name = ''; # running name for chunks of enumeration rows
choice_path = [];
enum = {};

for row in reader:
Expand All @@ -368,21 +385,18 @@ def set_enums(enum_path, schema, locale_schemas, export_format, warnings):
row[field] = row[field].strip();

# Each enumeration begins with a row that provides the name of the enum.
if row.get('title','') > '':
# subsequent rows may not have a name.
if row.get('name','') > '' or row.get('title','') > '':

# Process default language title

name = row.get('name');
title = row.get('title');
name = row.get('name','');
if name == '': name = title;
print ("name:", name)

description = row.get('description','');
if not name: # For enumerations that don't have separate name field
name = title;
if not (name in enumerations):
enum = {
'name': name,
'title': title,
'description': description,
'permissible_values': {}
};
enumerations[name] = enum;
Expand All @@ -393,58 +407,52 @@ def set_enums(enum_path, schema, locale_schemas, export_format, warnings):
for lcode in locale_schemas.keys():
locale_schema = locale_schemas[lcode];
locale_schema['enums'][name] = {
'name': name, # default (usu. english) name acts as key
'name': name, # Acts as key
'permissible_values': {}
};

# Provide translation title if available for this menu.
locale_title = row.get('title_' + lcode, '');
if locale_title > '':
locale_schema['enums'][name]['title'] = row.get('title_' + lcode, title);

# Provide translation description if available for this menu.
locale_description = row.get('description_' + lcode, '');
if locale_description > '':
locale_schema['enums'][name]['description'] = locale_description;

locale_schema['enums'][name]['title'] = locale_title;

# If there is a title (or name) of an emum at play
if name and name > '':
if name > '':
# Text is label of a particular menu choice
# Loop scans through columns until it gets a value
for depth in range(1,6):
menu_x = 'menu_' + str(depth);
choice_value = row.get(menu_x);
choice_text = row.get(menu_x);
# Here there is a menu item to process
if choice_value > '':
del menu_path[depth-1:] # Menu path always points to parent
if choice_text > '':
del choice_path[depth-1:] # Menu path always points to parent

description = row.get('description','');
meaning = row.get('meaning','');

choice = {'text' : choice_value}
choice = {'text' : choice_text}
if description > '': choice['description'] = description;
if meaning > '': choice['meaning'] = meaning;

# Export mappings can be established for any enumeration items too.
set_mappings(choice, row, export_format);

# IMPLEMENTS FLAT LIST WITH IS_A HIERARCHY
if len(menu_path) > 0:
choice['is_a'] = menu_path[-1]; # Last item in path
if len(choice_path) > 0:
choice['is_a'] = choice_path[-1]; # Last item in path

enum['permissible_values'][choice_value] = choice
menu_path.append(choice_value)
enum['permissible_values'][choice_text] = choice;
choice_path.append(choice_text);

for lcode in locale_schemas.keys():
translation = row.get(menu_x + '_' + lcode, '');
if translation > '':
if translation > '' and translation != choice['text']:

local_choice = copy.deepcopy(choice);
del local_choice['text']; # in language variant files this isn't needed.
local_choice['title'] = translation;
local_choice = {'title': translation}
description = row.get(description + '_' + lcode, '');
if description:
local_choice['description': description];

locale_schemas[lcode]['enums'][name]['permissible_values'][choice_value] = local_choice;
locale_schemas[lcode]['enums'][name]['permissible_values'][choice_text] = local_choice;

break;

Expand Down

0 comments on commit 654f11d

Please sign in to comment.