diff --git a/README.md b/README.md index 5f62da4..e55aa45 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,11 @@ Contact the StudyFinder team at studyfinder@umn.edu if you: - Have any questions about StudyFinder, or - Want to learn more about updates or enhancements of the tool. -## Upgrade notes for 2.1 +## Upgrade notes for 2.2 +The built-in clinicaltrials.gov connector has been transitioned fully to the clinicaltrials.gov V2 API. This includes two breaking changes in the private API for the ctgov connector. -The main page carousel/video feature was an accessibility and usability issue, and has been replaced with a three-wide panel of "featured studies". These can be configured in the admin panel, where the carousel configuration formerly was. +1. In `Connectors::Ctgov#load(start_date,end_date)` the start and end dates must now be in ISO format YYYY-MM-DD (the old format was MM/DD/YYYY). Any custom tasks that directly call this method should be updated. +2. `Connectors::Ctgov#load(start_date,end_date)` now calls `Connectors::Ctgov#process` itself to recurse through the V2 API's paged results. Formerly, `load` and `process` had to be called separately in that order. Remove any direct calls to `process` in order to avoid a redundant re-processing of the last "page" of data from the API. ## Development diff --git a/app/views/studies/_clinicaltrialsgov_button.html.erb b/app/views/studies/_clinicaltrialsgov_button.html.erb index 4364f52..1ede9df 100644 --- a/app/views/studies/_clinicaltrialsgov_button.html.erb +++ b/app/views/studies/_clinicaltrialsgov_button.html.erb @@ -1,5 +1,5 @@ <% if Trial.is_nct_number?(study.nct_id) %> - + See this study on ClinicalTrials.gov diff --git a/docker-compose.yml b/docker-compose.yml index 78c21c6..b23d841 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,3 @@ -version: '3' services: elasticsearch: image: elasticsearch:8.10.2 diff --git a/lib/connectors/ctgov.rb b/lib/connectors/ctgov.rb index 292ee40..d314069 100644 --- a/lib/connectors/ctgov.rb +++ b/lib/connectors/ctgov.rb @@ -5,77 +5,98 @@ class Ctgov def initialize @system_info = SystemInfo.current - @parser_id = Parser.find_by({ klass: 'Parsers::Ctgov'}).id if @system_info.nil? raise "There is no system info associated. Please run the seeds file, or add the info in the system administration section." end - end - - def load(start_date=nil, end_date=nil) - start_load_time = Time.now - url = "https://clinicaltrials.gov/ct2/results/download_studies?locn=#{ERB::Util.url_encode(@system_info.search_term)}" + @parser_id = Parser.find_by({ klass: 'Parsers::Ctgov'}).id + @location = @system_info.search_term + @page_token = nil + @payload = nil + @start_date = 'MIN' + @end_date = 'MAX' + @start_load_time = nil + @total_count = nil + @count = 0 + end - if !start_date.nil? and !end_date.nil? - puts "Loading clinicaltrials.gov results for #{@system_info.search_term} ... from #{start_date} to #{end_date}" - url = url + "&lup_s=#{ERB::Util.url_encode(start_date)}&lup_e=#{ERB::Util.url_encode(end_date)}" - else - puts "Loading all clinicaltrials.gov results for #{@system_info.search_term} ..." + def study_filters + q = { + 'query.locn' => "AREA[LocationFacility]#{@location} AND AREA[LocationStatus]RECRUITING", + 'query.term' => "AREA[LastUpdatePostDate]RANGE[#{@start_date},#{@end_date}]", + countTotal: true, + pageSize: 100, + format: "json" + } + # API only wants a pageToken arg at all if we are actually asking for one. + if !@page_token.blank? + q[:pageToken] = @page_token end - puts "Search URL: #{url}" - # @zipfile = Tempfile.new('file') - # @zipfile.binmode + return q + end - dirname = "#{Rails.root}/tmp/" - unless File.directory?(dirname) - FileUtils.mkdir_p(dirname) - end + def studies_page + response = HTTParty.get( + "https://clinicaltrials.gov/api/v2/studies", + query: self.study_filters + ) + @payload = JSON.parse(response.body || "{}") + @total_count ||= @payload.dig('totalCount') + puts "Retrieved page (#{@page_token})" + end - FileUtils.rm_rf("#{dirname}search_result.zip") - File.open("#{dirname}search_result.zip", "w+") do |f| - f.write(HTTParty.get(url).body) - end - # @zipfile.write(HTTParty.get(url).body) - # @zipfile.close + def load(start_date="MIN", end_date="MAX") + puts "Adding/Updating trials in the database. If it is a full reload it's going to be awhile... Maybe get some coffee? :)" + @start_date = start_date + @end_date = end_date + @start_load_time ||= Time.now - puts "Extracting trials from zip file" - extract() - end_load_time = Time.now + self.studies_page - puts "Time elapsed #{(end_load_time - start_load_time)} seconds" - end + # Process the studies we just received, and ... + self.process + # ... recurse if there's another page. - def extract - start_load_time = Time.now - extract_zip() - end_load_time = Time.now + if @payload.dig("nextPageToken") + @page_token = @payload.dig("nextPageToken") + else + @page_token = nil + end - puts "Zip time elapsed: #{(end_load_time - start_load_time)}" - return true + if @page_token.blank? + puts "clinicaltrials.gov load COMPLETE." + else + puts "Now we'll load page #{@payload.dig("nextPageToken")}}" + @payload = nil + self.load(@start_date,@end_date) + end end def process - start_load_time = Time.now - count = 0 - puts "Adding/Updating trials in the database. If it is a full reload it's going to be awhile... Maybe get some coffee? :)" - - Dir.glob("#{Rails.root}/tmp/trials/*.xml") do |file| - p = Parsers::Ctgov.new( file.gsub("#{Rails.root}/tmp/trials/", "").gsub(".xml", ""), @parser_id) - p.load(file) + page_start_load_time = Time.now + page_count = 0 + puts "Processing page (#{@page_token})" + + @payload.dig('studies').each do |study| + @id = study.dig('protocolSection', 'identificationModule', 'nctId') + p = Parsers::Ctgov.new(@id, @parser_id, study) + puts "Processing: #{@id} (#{@count + 1} of #{@total_count})" p.process - count = count + 1 + page_count = page_count + 1 + @count = @count + 1 end - end_load_time = Time.now + page_end_load_time = Time.now - puts "Logging update to updaters table. Processed #{count} records." + puts "Logging update to updaters table." Updater.create({ parser_id: @parser_id, - num_updated: count + num_updated: page_count }) - puts "Process time elapsed: #{(end_load_time - start_load_time)} seconds" + puts "Page time elapsed: #{(page_end_load_time - page_start_load_time)} seconds for #{page_count} records." + puts "Total process elapsed: #{(page_end_load_time - @start_load_time)} seconds for #{@count} records." return true end @@ -86,8 +107,9 @@ def clear TrialLocation.delete_all TrialKeyword.delete_all Location.delete_all - Trial.delete_all + TrialSubgroup.delete_all TrialCondition.delete_all + Trial.delete_all end def site_nct_ids @@ -103,7 +125,6 @@ def cleanup_stray_trials end def nct_ids_for_location(location, page_token = nil) - csc = 'M Health Fairview Clinics and Surgery Center' ids = [] q = { 'query.locn' => "SEARCH[Location](AREA[LocationFacility]#{location} AND AREA[LocationStatus]RECRUITING)", diff --git a/lib/parsers/ctgov.rb b/lib/parsers/ctgov.rb index 279e32d..ad68d81 100644 --- a/lib/parsers/ctgov.rb +++ b/lib/parsers/ctgov.rb @@ -4,40 +4,27 @@ module Parsers class Ctgov - @@simple_fields = [ - 'brief_title', - 'official_title', - 'acronym', - 'phase', - 'verification_date', - 'lastchanged_date', - 'firstreceived_date', - 'brief_summary', - 'detailed_description' - ] + @@simple_fields = { + brief_title: ['protocolSection', 'identificationModule', 'briefTitle'], + official_title: ['protocolSection', 'identificationModule', 'officialTitle'], + acronym: ['protocolSection', 'identificationModule', 'acronym'], + phase: ['protocolSection', 'designModule', 'phases', 0], + verification_date: ['protocolSection', 'statusModule', 'statusVerifiedDate'], + lastchanged_date: ['protocolSection', 'statusModule', 'lastUpdateSubmitDate'], + firstreceived_date: ['protocolSection', 'statusModule', 'studyFirstSubmitDate'], + brief_summary: ['protocolSection', 'descriptionModule', 'briefSummary'], + detailed_description: ['protocolSection', 'descriptionModule', 'detailedDescription'] + } # overwriting the built-in initialize method - def initialize(id, parser_id=nil) + def initialize(id, parser_id=nil, data) @id = id - @url = url @parser_id = parser_id - end - - def url - "https://clinicaltrials.gov/show/" + @id + "?displayxml=true" - end - - def load(path=nil) - path = url if path.nil? - @contents ||= Hash.from_xml( Nokogiri::XML( open(path) ).xpath('clinical_study').to_s )['clinical_study'] + @data = data end def contents - @contents - end - - def set_contents_from_xml(xml) - @contents = Hash.from_xml( Nokogiri::XML( xml ).xpath('clinical_study').to_s )['clinical_study'] + @data end def location_search_term @@ -45,12 +32,12 @@ def location_search_term end def locations - Array([contents.dig("location")]).flatten.compact + @data.dig('protocolSection', 'contactsLocationsModule', 'locations') end def location locations.filter do |location| - location.dig("facility", "name").to_s.try(:downcase) == location_search_term.try(:downcase) + /#{Regexp.escape(location_search_term)}/i.match?(location.dig("facility")) end.first || {} end @@ -59,7 +46,7 @@ def location_status end def overall_status - contents.dig("overall_status") + @data.dig('protocolSection', 'statusModule', 'overallStatus') end def calculated_status @@ -74,14 +61,14 @@ def preview trial end - def process() + def process trial = Trial.find_or_initialize_by(system_id: @id) trial.system_id = @id # i think this is just overwriting system_id from the line above # Trial does not exist yet, setup defaults if trial.id.nil? - if @contents.has_key?('overall_status') and @contents['overall_status'] == 'Recruiting' + if !overall_status.blank? and overall_status.downcase == 'recruiting' trial.visible = true # By default recruiting trials are visible unless otherwise specified. else trial.visible = false @@ -97,12 +84,12 @@ def process() retrieve_simple_fields(trial) begin - trial.added_on = Date.parse(@contents.dig("study_first_posted")) || Date.today + trial.added_on = Date.parse(@data.dig('protocolSection', 'statusModule', 'studyFirstSubmitDate')) || Date.today rescue ArgumentError, TypeError => e trial.added_on = Date.today end - if @contents.has_key?('eligibility') + if @data.dig('protocolSection', 'eligibilityModule') process_eligibility(trial) end @@ -112,26 +99,26 @@ def process() trial.save end - if @contents.has_key?('conditional_browse') || @contents.has_key?('intervention_browse') + if @data.dig('derivedSection', 'conditionBrowseModule','meshes') || @data.dig('derivedSection', 'interventionBrowseModule','meshes') process_mesh_term(trial) end trial.updated_at = DateTime.now # Set updated date, even if the trial has not changed. # Save associations. - if @contents.has_key?('condition') + if @data.dig('protocolSection', 'conditionsModule','conditions') process_conditions(trial.id) end - if @contents.has_key?('intervention') + if @data.dig('protocolSection', 'armsInterventionsModule','interventions') process_interventions(trial.id) end - if @contents.has_key?('location') + if @data.dig('protocolSection', 'contactsLocationsModule','locations') process_locations(trial.id) end - if @contents.has_key?('keyword') + if @data.dig('protocolSection', 'conditionsModule','keywords') process_keywords(trial.id) end @@ -142,108 +129,129 @@ def process() def process_contacts(trial) # Overall official - if @contents.has_key?('overall_official') - if @contents['overall_official'].instance_of?(Array) - overall_offical = @contents['overall_official'][0] - else - overall_offical = @contents['overall_official'] - end + if @data.dig('protocolSection', 'contactsLocationsModule','overallOfficials') + overall_offical = @data.dig('protocolSection', 'contactsLocationsModule','overallOfficials',0) + + trial.official_last_name = overall_offical['name'] + trial.official_role = overall_offical['role'] + trial.official_affiliation = overall_offical['affiliation'] + end - trial.official_last_name = overall_offical['last_name'] if overall_offical.has_key?('last_name') - trial.official_role = overall_offical['role'] if overall_offical.has_key?('role') - trial.official_affiliation = overall_offical['affiliation'] if overall_offical.has_key?('affiliation') + # V2 api no longer has "contact" and "backup contact". + # Use the first two of any location contacts and then central contacts (having an email address), + # in order, as "primary" and "backup". + location_contacts = central_contacts = [] + if !location.dig('contacts').blank? + location_contacts = location.dig('contacts').filter do |c| + !c.dig("email").blank? || !c.dig("phone").blank? + end end - # Primary Contact - if @contents.has_key?('overall_contact') - trial.contact_last_name = @contents['overall_contact']['last_name'] if @contents['overall_contact'].has_key?('last_name') - trial.contact_phone = @contents['overall_contact']['phone'] if @contents['overall_contact'].has_key?('phone') - trial.contact_email = @contents['overall_contact']['email'] if @contents['overall_contact'].has_key?('email') + if !@data.dig('protocolSection', 'contactsLocationsModule','centralContacts').blank? + central_contacts = @data.dig('protocolSection', 'contactsLocationsModule','centralContacts').filter do |c| + !c.dig("email").blank? + end end - # Backup Contact - if @contents.has_key?('overall_contact_backup') - trial.contact_backup_last_name = @contents['overall_contact_backup']['last_name'] if @contents['overall_contact_backup'].has_key?('last_name') - trial.contact_backup_phone = @contents['overall_contact_backup']['phone'] if @contents['overall_contact_backup'].has_key?('phone') - trial.contact_backup_email = @contents['overall_contact_backup']['email'] if @contents['overall_contact_backup'].has_key?('email') + all_contacts = location_contacts + central_contacts + if !all_contacts.blank? + c_0 = all_contacts.first + c_1 = all_contacts.second + + if !c_0.blank? + trial.contact_last_name = c_0["name"] + trial.contact_phone = c_0["phone"] + trial.contact_email =c_0["email"] + end + + if !c_1.blank? + trial.contact_backup_last_name = c_1["name"] + trial.contact_backup_phone = c_1["phone"] + trial.contact_backup_email =c_1["email"] + end end end def process_eligibility(trial) - if @contents['eligibility'].has_key?('gender') - trial.gender = @contents['eligibility']['gender'] + if @data.dig('protocolSection', 'eligibilityModule','sex') + trial.gender = @data.dig('protocolSection', 'eligibilityModule','sex') end - if @contents['eligibility'].has_key?('minimum_age') - trial.minimum_age = @contents['eligibility']['minimum_age'].gsub(' Years', '').gsub(' Year', '') unless @contents['eligibility']['minimum_age'].nil? - trial.minimum_age = nil if trial.minimum_age == 'N/A' - trial.min_age_unit = @contents['eligibility']['minimum_age'] + if @data.dig('protocolSection', 'eligibilityModule','minimumAge') + min_age = @data.dig('protocolSection', 'eligibilityModule','minimumAge') + if min_age.blank? || min_age == "N/A" + trial.minimum_age = nil + trial.min_age_unit = nil + return + end + + trial.minimum_age = min_age.gsub(/ year(?:s)?/i, '') unless min_age.nil? + trial.min_age_unit = min_age if !trial.min_age_unit.nil? and (trial.min_age_unit.include? 'Month' or trial.min_age_unit.include? 'Months') - trial.minimum_age = (trial.minimum_age.gsub(' Months', '').gsub(' Month', '').to_f / 12).round(2) + trial.minimum_age = (trial.minimum_age.gsub(/\D/, '').to_f / 12).round(2) end if !trial.min_age_unit.nil? and (trial.min_age_unit.include? 'Week' or trial.min_age_unit.include? 'Weeks') - trial.minimum_age = (trial.minimum_age.gsub(' Weeks', '').gsub(' Week', '').to_f * 0.0191781).round(2) + trial.minimum_age = (trial.minimum_age.gsub(/\D/, '').to_f * 0.0191781).round(2) end if !trial.min_age_unit.nil? and (trial.min_age_unit.include? 'Day' or trial.min_age_unit.include? 'Days') - trial.minimum_age = (trial.minimum_age.gsub(' Days', '').gsub(' Day', '').to_f * 0.002739728571424657).round(2) + trial.minimum_age = (trial.minimum_age.gsub(/\D/, '').to_f * 0.002739728571424657).round(2) end end - if @contents['eligibility'].has_key?('maximum_age') - trial.maximum_age = @contents['eligibility']['maximum_age'].gsub(' Years', '').gsub(' Year', '') unless @contents['eligibility']['maximum_age'].nil? - trial.maximum_age = nil if trial.maximum_age == 'N/A' - trial.max_age_unit = @contents['eligibility']['maximum_age'] + if @data.dig('protocolSection', 'eligibilityModule','maximumAge') + max_age = @data.dig('protocolSection', 'eligibilityModule','maximumAge') + if max_age.blank? || max_age == "N/A" + trial.maximum_age = nil + trial.max_age_unit = nil + return + end + + trial.maximum_age = max_age.gsub(/ year(?:s)?/i, '') unless max_age.nil? + trial.max_age_unit = max_age if !trial.max_age_unit.nil? and (trial.max_age_unit.include? 'Month' or trial.max_age_unit.include? 'Months') - trial.maximum_age = (trial.maximum_age.gsub(' Months', '').gsub(' Month', '').to_f / 12).round(2) + trial.maximum_age = (trial.maximum_age.gsub(/\D/, '').to_f / 12).round(2) end if !trial.max_age_unit.nil? and (trial.max_age_unit.include? 'Week' or trial.max_age_unit.include? 'Weeks') - trial.maximum_age = (trial.maximum_age.gsub(' Weeks', '').gsub(' Week', '').to_f * 0.0191781).round(2) + trial.maximum_age = (trial.maximum_age.gsub(/\D/, '').to_f * 0.0191781).round(2) end if !trial.max_age_unit.nil? and (trial.max_age_unit.include? 'Day' or trial.max_age_unit.include? 'Days') - trial.maximum_age = (trial.maximum_age.gsub(' Days', '').gsub(' Day', '').to_f * 0.002739728571424657).round(2) + trial.maximum_age = (trial.maximum_age.gsub(/\D/, '').to_f * 0.002739728571424657).round(2) end end - if @contents['eligibility'].has_key?('healthy_volunteers') - if @contents['eligibility']['healthy_volunteers'] == 'Accepts Healthy Volunteers' + if @data.dig('protocolSection', 'eligibilityModule','healthyVolunteers') == true trial.healthy_volunteers_imported = true - else + elsif @data.dig('protocolSection', 'eligibilityModule','healthyVolunteers') == false trial.healthy_volunteers_imported = false - end end - if @contents['eligibility'].has_key?('criteria') && @contents['eligibility']['criteria'].has_key?('textblock') - trial.eligibility_criteria = @contents['eligibility']['criteria']['textblock'] + if @data.dig('protocolSection', 'eligibilityModule','eligibilityCriteria') + trial.eligibility_criteria = @data.dig('protocolSection', 'eligibilityModule','eligibilityCriteria') end end def process_mesh_term(trial) - if (!@contents['conditional_browse'].nil? && @contents['conditional_browse'].has_key?('mesh_term')) || - (!@contents['intervention_browse'].nil? && @contents['intervention_browse'].has_key?('mesh_term')) - TrialMeshTerm.where(trial_id: trial.id).delete_all - end - if !@contents['conditional_browse'].nil? && @contents['conditional_browse'].has_key?('mesh_term') + TrialMeshTerm.where(trial_id: trial.id).delete_all + if @data.dig('derivedSection', 'conditionBrowseModule','meshes') process_condition_browse(trial) end - if !@contents['intervention_browse'].nil? && @contents['intervention_browse'].has_key?('mesh_term') + if @data.dig('derivedSection', 'interventionBrowseModule','meshes') process_intervention_browse(trial) end end def process_condition_browse(trial) - mesh_term = @contents['conditional_browse']['mesh_term'] - mesh_term = [mesh_term] unless mesh_term.instance_of?(Array) - + mesh_term = @data.dig('derivedSection', 'conditionBrowseModule','meshes').map { |e| e['term'] } mesh_term.each do |mesh| test = TrialMeshTerm.create({ trial_id: trial.id, @@ -254,9 +262,7 @@ def process_condition_browse(trial) end def process_intervention_browse(trial) - mesh_term = @contents['intervention_browse']['mesh_term'] - mesh_term = [mesh_term] unless mesh_term.instance_of?(Array) - + mesh_term = @data.dig('derivedSection', 'interventionBrowseModule','meshes').map { |e| e['term'] } mesh_term.each do |mesh| test = TrialMeshTerm.create({ trial_id: trial.id, @@ -267,7 +273,7 @@ def process_intervention_browse(trial) end def process_conditions(id) - conditions = @contents['condition'] + conditions = @data.dig('protocolSection', 'conditionsModule','conditions') conditions = [conditions] unless conditions.instance_of?(Array) TrialCondition.where(trial_id: id).delete_all @@ -278,7 +284,7 @@ def process_conditions(id) condition.save end - TrialCondition.create({ + tc = TrialCondition.create({ trial_id: id, condition_id: condition.id }) @@ -286,75 +292,67 @@ def process_conditions(id) end def process_interventions(id) - interventions = @contents['intervention'] - interventions = [interventions] unless interventions.instance_of?(Array) - + interventions = @data.dig('protocolSection', 'armsInterventionsModule','interventions') TrialIntervention.where(trial_id: id).delete_all interventions.each do |i| TrialIntervention.create({ trial_id: id, - intervention_type: i['intervention_type'], - intervention: i['intervention_name'], + intervention_type: i['type'], + intervention: i['name'], description: i['description'] }) end end def process_locations(id) - locations = @contents['location'] + locations = self.locations locations = [locations] unless locations.instance_of?(Array) TrialLocation.where(trial_id: id).delete_all - locations.each do |l| - facility = l['facility'] if l.has_key?('facility') - location = Location.find_or_initialize_by(location: facility['name']) - - if facility.has_key?('address') - address = facility['address'] - location.city = address['city'] if address.has_key?('city') - location.state = address['state'] if address.has_key?('state') - location.zip = address['zip'] if address.has_key?('zip') - location.country = address['country'] if address.has_key?('country') - end - - location.save - - trial_location_hash = { - trial_id: id, - location_id: location.id - } - - if l.has_key?('status') - trial_location_hash['status'] = l['status'] - end - - if l.has_key?('contact') - contact = l['contact'] - trial_location_hash['last_name'] = contact['last_name'] if contact.has_key?('last_name') - trial_location_hash['phone'] = contact['phone'] if contact.has_key?('phone') - trial_location_hash['email'] = contact['email'] if contact.has_key?('email') - end - - if l.has_key?('contact_backup') - contact_backup = l['contact_backup'] - trial_location_hash['backup_last_name'] = contact_backup['last_name'] if contact_backup.has_key?('last_name') - trial_location_hash['backup_phone'] = contact_backup['phone'] if contact_backup.has_key?('phone') - trial_location_hash['backup_email'] = contact_backup['email'] if contact_backup.has_key?('email') - end + facility = l.dig('facility') + if !facility.blank? # We key off the facility name, so we can't really do anything if it doesn't exist. + location = Location.find_or_initialize_by(location: facility) + + location.city = l.dig('city') + location.state = l.dig('state') + location.zip = l.dig('zip') + location.country = l.dig('country') + + location.save + + tl = TrialLocation.new(trial_id: id, location_id: location.id, status: l.dig('status')) + + if !l.dig('contacts').blank? + location_contacts = l.dig('contacts').filter do |c| + !c.dig("email").blank? + end + + c_0 = location_contacts.first + c_1 = location_contacts.second + + if !c_0.blank? + tl.last_name = c_0["name"] + tl.phone = c_0["phone"] + tl.email =c_0["email"] + end + + if !c_1.blank? + tl.backup_last_name = c_1["name"] + tl.backup_phone = c_1["phone"] + tl.backup_email =c_1["email"] + end + end - if l.has_key?('status') - trial_location_hash['status'] = l['status'] + tl.save end - - TrialLocation.create(trial_location_hash) end end def process_keywords(id) - keywords = @contents['keyword'] + keywords = @data.dig('protocolSection', 'conditionsModule','keywords') keywords = [keywords] unless keywords.instance_of?(Array) TrialKeyword.where(trial_id: id).delete_all @@ -366,24 +364,29 @@ def process_keywords(id) }) end + # TODO: Should we include the intervention "otherNames" as keywords? + # interventions = @data.dig('protocolSection', 'armsInterventionsModule','interventions').map { |e| e['otherNames'] }.flatten + # interventions.each do |i| + # TrialKeyword.create({ + # trial_id: id, + # keyword: i + # }) + # end + end def retrieve_simple_fields(trial) previous_status = trial.overall_status # Look at simple fields and update where appropriate. - @@simple_fields.each do |f| - if @contents.has_key?(f) - if f == 'brief_summary' || f == 'detailed_description' - trial[f] = @contents[f]['textblock'] - else - trial[f] = @contents[f] - end + @@simple_fields.each do |k,v| + if @data.dig(*v) + trial[k] = @data.dig(*v) end end trial.overall_status = calculated_status - trial.recruiting = (calculated_status == 'Recruiting') + trial.recruiting = (calculated_status.downcase == 'recruiting') trial.visible = trial.recruiting end diff --git a/lib/tasks/ctgov.rake b/lib/tasks/ctgov.rake index a180b26..c5467f6 100644 --- a/lib/tasks/ctgov.rake +++ b/lib/tasks/ctgov.rake @@ -16,8 +16,7 @@ namespace :studyfinder do puts "Processing ClinicalTrials.gov data" connector = Connectors::Ctgov.new - connector.load((Date.today - args[:days_previous].to_i).strftime('%m/%d/%Y') , Date.today.strftime('%m/%d/%Y') ) - connector.process + connector.load((Date.today - args[:days_previous].to_i).strftime('%Y-%m-%d') , Date.today.strftime('%Y-%m-%d') ) puts "Reindexing all trials into elasticsearch" Trial.import force: true @@ -28,7 +27,6 @@ namespace :studyfinder do connector = Connectors::Ctgov.new connector.load - connector.process puts "Reindexing all trials into elasticsearch" Trial.import force: true @@ -54,7 +52,6 @@ namespace :studyfinder do connector = Connectors::Ctgov.new connector.clear connector.load - connector.process puts "Reindexing all trials into elasticsearch" Trial.import force: true diff --git a/spec/parsers/ctgov_spec.rb b/spec/parsers/ctgov_spec.rb index 9499735..92c2de7 100644 --- a/spec/parsers/ctgov_spec.rb +++ b/spec/parsers/ctgov_spec.rb @@ -3,522 +3,635 @@ describe Parsers::Ctgov do - before do + before(:each) do + system_info = create(:system_info, initials: 'TSTU', search_term: "Test University") + @api_data = { + "protocolSection" => { + "identificationModule" => { + "nctId" => "NCT999999", + "orgStudyIdInfo" => { + "id" => "2024-STUDY" + }, + "organization" => { + "fullName" => "Spacely Sprockets", "class" => "INDUSTRY" + }, + "briefTitle" => + "This is the brief title", + "officialTitle" => + "This is the longer, official title", + "acronym" => "ACRO" + }, + "statusModule" => { + "statusVerifiedDate" => "2024-06", + "overallStatus" => "RECRUITING", + "expandedAccessInfo" => {"hasExpandedAccess" => false}, + "startDateStruct" => {"date" => "2023-10-30", "type" => "ACTUAL"}, + "primaryCompletionDateStruct" => {"date" => "2027-11", "type" => "ESTIMATED"}, + "completionDateStruct" => {"date" => "2027-11", "type" => "ESTIMATED"}, + "studyFirstSubmitDate" => "2023-07-05", + "studyFirstSubmitQcDate" => "2023-07-05", + "studyFirstPostDateStruct" => {"date" => "2023-07-13", "type" => "ACTUAL"}, + "lastUpdateSubmitDate" => "2024-06-28", + "lastUpdatePostDateStruct" => {"date" => "2024-07-01", "type" => "ACTUAL"} + }, + "sponsorCollaboratorsModule" => { + "responsibleParty" => {"type" => "SPONSOR"}, "leadSponsor" => {"name" => "Spacely Sprockets", "class" => "INDUSTRY"} + }, + "oversightModule" => { + "oversightHasDmc" => true, "isFdaRegulatedDrug" => true, "isFdaRegulatedDevice" => true + }, + "descriptionModule" => { + "briefSummary" => + "This summary of the study is brief.", + "detailedDescription" => + "This detailed description of the study is longer. This detailed description of the study is longer. This detailed description of the study is longer. This detailed description of the study is longer." + }, + "conditionsModule" => { + "conditions" => ["Condition 1", "Condition 2"], + "keywords" => ["Test Keyword 1", "Test Keyword 2", "Test Keyword 3"] + }, + "designModule" => { + "studyType" => "INTERVENTIONAL", + "phases" => ["PHASE3"], + "designInfo" => { + "allocation" => "RANDOMIZED", + "interventionModel" => "PARALLEL", + "primaryPurpose" => "TREATMENT", + "maskingInfo" => + {"masking" => "QUADRUPLE", "whoMasked" => ["PARTICIPANT", "CARE_PROVIDER", "INVESTIGATOR", "OUTCOMES_ASSESSOR"]} + }, + "enrollmentInfo" => {"count" => 200, "type" => "ESTIMATED"} + }, + "armsInterventionsModule" => { + "armGroups" => + [{"label" => "Placebo", + "type" => "PLACEBO_COMPARATOR", + "description" => "A placebo arm.", + "interventionNames" => ["Drug: Placebo", "Device: Some Device"]}, + {"label" => "Real Drug Label", + "type" => "EXPERIMENTAL", + "description" => + "Drug arm description", + "interventionNames" => ["Drug: Real Drug", "Device: Some Device"]}], + "interventions" => + [{"type" => "DRUG", "name" => "Placebo", "description" => "Placebo intervention description", "armGroupLabels" => ["Placebo"]}, + {"type" => "DRUG", + "name" => "Real Drug", + "description" => "Drug intervention description", + "armGroupLabels" => ["Real Drug Label"], + "otherNames" => ["Drug Brand Name", "Drug Brand Name 2"]}, + {"type" => "DEVICE", + "name" => "Some Device", + "description" => "Device intervention description", + "armGroupLabels" => ["Real Drug Label", "Placebo"]}] + }, + "outcomesModule" => { + "primaryOutcomes" => + [{"measure" => "Measure 1", + "description" => + "Measure 1 description.", + "timeFrame" => "Baseline to Week 52"}], + "secondaryOutcomes" => + [{"measure" => "Measure 2", + "description" => + "Measure 2 description.", + "timeFrame" => "Baseline to Week 52"}, + {"measure" => "Measure 3", + "description" => + "Measure 3 description.", + "timeFrame" => "Baseline to Week 52"}] + }, + "eligibilityModule" => { + "eligibilityCriteria" => "These are eligibility criteria.", + "healthyVolunteers" => false, + "sex" => "ALL", + "minimumAge" => "18 Years", + "stdAges" => ["ADULT", "OLDER_ADULT"] + }, + "contactsLocationsModule" => { + "centralContacts" => + [{"name" => "Spacely Sprockets Contact", + "role" => "CONTACT", + "phone" => "555-555-5555", + "email" => "clinicaltrials@spacelysprockets.com"}], + "overallOfficials"=> + [{"name"=>"Person One, PhD", + "affiliation"=>"The Major Medical Center", + "role"=>"PRINCIPAL_INVESTIGATOR"}], + "locations" => + [ + {"facility" => "Facility 1 Name", + "status" => "RECRUITING", + "city" => "Chicago", + "state" => "Illinois", + "zip" => "60193", + "country" => "United States", + "contacts" => + [{"name" => "Facility 1 Contact 1 Name", "role" => "CONTACT", "phone" => "555-555-5556", "email" => "someone@facility1.zzz"}, + {"name" => "Facility 1 Contact 2 Name", "role" => "PRINCIPAL_INVESTIGATOR"}], + "geoPoint" => {"lat" => 33.52066, "lon" => -86.80249}}, + {"facility" => "Facility 2 Name", + "status" => "RECRUITING", + "city" => "Phoenix", + "state" => "Arizona", + "zip" => "85013", + "country" => "United States", + "contacts" => + [{"name" => "Facility 2 Contact 1 Name", + "role" => "CONTACT", + "phone" => "602-555-5555", + "email" => "someone@facility2.zzz"}, + {"name" => "Facility 2 Contact 2 Name", "role" => "PRINCIPAL_INVESTIGATOR"}], + "geoPoint" => {"lat" => 33.44838, "lon" => -112.07404}} + ] + } + }, + "derivedSection" => { + "miscInfoModule" => {"versionHolder" => "2024-07-12"}, + "conditionBrowseModule" => { + "meshes" => + [{"id" => "D000008171", "term" => "Lung Diseases"}, + {"id" => "D000011658", "term" => "Pulmonary Fibrosis"}, + {"id" => "D000017563", "term" => "Lung Diseases, Interstitial"}, + {"id" => "D000005355", "term" => "Fibrosis"}], + "ancestors" => + [{"id" => "D000010335", "term" => "Pathologic Processes"}, {"id" => "D000012140", "term" => "Respiratory Tract Diseases"}], + "browseLeaves" => + [{"id" => "M11168", "name" => "Lung Diseases", "asFound" => "Lung Disease", "relevance" => "HIGH"}, + {"id" => "M27137", "name" => "Respiratory Aspiration", "relevance" => "LOW"}, + {"id" => "M19813", "name" => "Lung Diseases, Interstitial", "asFound" => "Interstitial Lung Disease", "relevance" => "HIGH"}, + {"id" => "M8485", "name" => "Fibrosis", "asFound" => "Fibrosis", "relevance" => "HIGH"}, + {"id" => "M14512", "name" => "Pulmonary Fibrosis", "asFound" => "Pulmonary Fibrosis", "relevance" => "HIGH"}, + {"id" => "M14977", "name" => "Respiratory Tract Diseases", "relevance" => "LOW"}], + "browseBranches" => + [{"abbrev" => "BC08", "name" => "Respiratory Tract (Lung and Bronchial) Diseases"}, + {"abbrev" => "All", "name" => "All Conditions"}, + {"abbrev" => "BC23", "name" => "Symptoms and General Pathology"}] + }, + "interventionBrowseModule" => { + "meshes" => [{"id" => "C000427248", "term" => "Treprostinil"}], + "ancestors" => [{"id" => "D000000959", "term" => "Antihypertensive Agents"}], + "browseLeaves" => + [{"id" => "M21860", "name" => "Pharmaceutical Solutions", "relevance" => "LOW"}, + {"id" => "M255601", "name" => "Treprostinil", "asFound" => "Operator", "relevance" => "HIGH"}, + {"id" => "M4277", "name" => "Antihypertensive Agents", "relevance" => "LOW"}], + "browseBranches" => + [{"abbrev" => "PhSol", "name" => "Pharmaceutical Solutions"}, + {"abbrev" => "All", "name" => "All Drugs and Chemicals"}, + {"abbrev" => "AnAg", "name" => "Antihypertensive Agents"}] + } + } + } end - describe "#parse" do + describe "#contents" do + it "returns a structure equivalent to @api_data" do + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + expect(p.contents).to eq(@api_data) + end + end + + describe "#location_search_term" do + it "returns the correct term" do + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + expect(p.location_search_term).to eq("Test University") + end + end + + describe "#location" do + it "returns the correct location block for an exact match" do + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + allow(p).to receive(:location_search_term) { "Facility 1 Name" } + expect(p.location["facility"]).to eq("Facility 1 Name") + expect(p.location["city"]).to eq("Chicago") + expect(p.location["state"]).to eq("Illinois") + end + + it "returns the correct location block for a substring match" do + @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [ + {"facility"=>"University of Minnesota/Cancer Center","status"=>"Some status","city"=>"Minneapolis", + "state"=>"Minnesota","zip"=>"55455","country"=>"United States" + } + ] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + + allow(p).to receive(:location_search_term) { "University of Minnesota" } + expect(p.location["facility"]).to eq("University of Minnesota/Cancer Center") + expect(p.location["city"]).to eq("Minneapolis") + expect(p.location["state"]).to eq("Minnesota") + end + end + + describe "#preview" do + it "returns the correct preview block" do + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + expect(p.preview.system_id).to eq("NCT999999") + expect(p.preview.brief_title).to eq("This is the brief title") + expect(p.preview.official_title).to eq("This is the longer, official title") + expect(p.preview.acronym).to eq("ACRO") + expect(p.preview.phase).to eq("PHASE3") + expect(p.preview.overall_status).to eq("RECRUITING") + expect(p.preview.verification_date).to eq("2024-06") + expect(p.preview.brief_summary).to eq("This summary of the study is brief.") + expect(p.preview.detailed_description).to eq("This detailed description of the study is longer. This detailed description of the study is longer. This detailed description of the study is longer. This detailed description of the study is longer.") + expect(p.preview.visible).to eq(true) + expect(p.preview.lastchanged_date).to eq('2024-06-28') + expect(p.preview.firstreceived_date).to eq('2023-07-05') + expect(p.preview.min_age_unit).to eq(nil) + expect(p.preview.maximum_age).to eq(nil) + end + end + + describe "#process_contacts" do + context "if there is one location contact with an email, and one central contact" do + it "returns those in order as the primary and backup" do + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + allow(p).to receive(:location_search_term) { "Facility 1 Name" } + t = Trial.new + p.process_contacts(t) + expect(t.contact_last_name).to eq('Facility 1 Contact 1 Name') + expect(t.contact_backup_last_name).to eq('Spacely Sprockets Contact') + end + end + context "if there are two location contacts with an email" do + it "returns those in order as the primary and backup" do + @api_data["protocolSection"]["contactsLocationsModule"]["locations"][0]["contacts"][1]["email"] = "someone2@facility1.zzz" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + allow(p).to receive(:location_search_term) { "Facility 1 Name" } + t = Trial.new + p.process_contacts(t) + expect(t.contact_last_name).to eq('Facility 1 Contact 1 Name') + expect(t.contact_backup_last_name).to eq('Facility 1 Contact 2 Name') + end + end + end + + describe "#process_eligibility" do + it "parses sex correctly" do + @api_data["protocolSection"]["eligibilityModule"]["sex"] = "ALL" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = Trial.new + p.process_eligibility(t) + expect(t.gender).to eq("ALL") + end + + it "parses age ranges correctly when weeks are involved" do + @api_data["protocolSection"]["eligibilityModule"]["minimumAge"] = nil + @api_data["protocolSection"]["eligibilityModule"]["maximumAge"] = "37 Weeks" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = Trial.new + p.process_eligibility(t) + expect(t.minimum_age).to eq(nil) + expect(t.maximum_age).to eq('0.71') + end + + it "parses age ranges correctly when years are involved" do + @api_data["protocolSection"]["eligibilityModule"]["minimumAge"] = "37 Days" + @api_data["protocolSection"]["eligibilityModule"]["maximumAge"] = "2 Years" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = Trial.new + p.process_eligibility(t) + expect(t.minimum_age).to eq('0.1') + expect(t.min_age_unit).to eq('37 Days') + expect(t.maximum_age).to eq('2') + expect(t.max_age_unit).to eq('2 Years') + end + + it "parses age ranges correctly when months are involved" do + @api_data["protocolSection"]["eligibilityModule"]["minimumAge"] = "13 Months" + @api_data["protocolSection"]["eligibilityModule"]["maximumAge"] = "300 Months" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = Trial.new + p.process_eligibility(t) + expect(t.minimum_age).to eq('1.08') + expect(t.min_age_unit).to eq('13 Months') + expect(t.maximum_age).to eq('25.0') + expect(t.max_age_unit).to eq('300 Months') + end + + it "parses healthy volunteers correctly" do + @api_data["protocolSection"]["eligibilityModule"]["healthyVolunteers"] = true + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = Trial.new + p.process_eligibility(t) + expect(t.healthy_volunteers_imported).to eq(true) + + @api_data["protocolSection"]["eligibilityModule"]["healthyVolunteers"] = false + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = Trial.new + p.process_eligibility(t) + expect(t.healthy_volunteers_imported).to eq(false) + end + + it "parses eligibilityCriteria correctly" do + @api_data["protocolSection"]["eligibilityModule"]["eligibilityCriteria"] = "Test Criteria 1 through 10." + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = Trial.new + p.process_eligibility(t) + expect(t.eligibility_criteria).to eq("Test Criteria 1 through 10.") + end + end + + describe "#process_locations" do + context "when there is a location present with one contact" do + it "creates a location record and a trial location with one contact" do + @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [ + {"facility"=>"University of Minnesota","status"=>"Some status","city"=>"Minneapolis", + "state"=>"Minnesota","zip"=>"55455","country"=>"United States", + "contacts" => + [{"name" => "UMN Contact 1 Name", "role" => "CONTACT", "phone" => "555-555-5556", "email" => "someone@umn.edu"}, + {"name" => "UMN Contact 2 Name", "role" => "PRINCIPAL_INVESTIGATOR"}] + } + ] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = create(:trial) + p.process_locations(t.id) + tl = TrialLocation.first + l = tl.location + expect(l.location).to eq("University of Minnesota") + expect(l.city).to eq("Minneapolis") + expect(l.state).to eq("Minnesota") + expect(l.zip).to eq("55455") + expect(l.country).to eq("United States") + + expect(tl.status).to eq("Some status") + expect(tl.last_name).to eq("UMN Contact 1 Name") + expect(tl.phone).to eq("555-555-5556") + expect(tl.email).to eq("someone@umn.edu") + expect(tl.backup_last_name).to eq(nil) + expect(tl.backup_phone).to eq(nil) + expect(tl.backup_email).to eq(nil) + end + it "creates a location record and a trial location with one contact" do + @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [ + {"facility"=>"University of Minnesota","status"=>"Some status","city"=>"Minneapolis", + "state"=>"Minnesota","zip"=>"55455","country"=>"United States", + "contacts" => + [{"name" => "UMN Contact 1 Name", "role" => "CONTACT", "phone" => "555-555-5556", "email" => "someone@umn.edu"}, + {"name" => "UMN Contact 2 Name", "role" => "CONTACT", "phone" => "555-555-5557", "email" => "another@umn.edu"}, + {"name" => "UMN Contact 3 Name", "role" => "PRINCIPAL_INVESTIGATOR"}] + } + ] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) + t = create(:trial) + p.process_locations(t.id) + tl = TrialLocation.first + l = tl.location + expect(l.location).to eq("University of Minnesota") + expect(l.city).to eq("Minneapolis") + expect(l.state).to eq("Minnesota") + expect(l.zip).to eq("55455") + expect(l.country).to eq("United States") + + expect(tl.status).to eq("Some status") + expect(tl.last_name).to eq("UMN Contact 1 Name") + expect(tl.phone).to eq("555-555-5556") + expect(tl.email).to eq("someone@umn.edu") + expect(tl.backup_last_name).to eq("UMN Contact 2 Name") + expect(tl.backup_phone).to eq("555-555-5557") + expect(tl.backup_email).to eq("another@umn.edu") + end + end + end + describe "#parse" do it "parses status and sets visibility when Recruiting" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - Recruiting - - ") + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.visible).to eq(true) end it "parses status and sets visibility when not Recruiting" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - Not yet Recruiting - - ") + @api_data["protocolSection"]["statusModule"]["overallStatus"] = "Not yet recruiting" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.visible).to eq(false) end - it "parses status and sets visibility from Recruiting to not Recruiting" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - Recruiting - - ") + it "parses status change from recruiting to non and sets visibility from true to false" do + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.visible).to eq(true) - p2 = Parsers::Ctgov.new( 'NCT01678638', 1) - p2.set_contents_from_xml(" - - Test Study - Not yet Recruiting - - ") + @api_data["protocolSection"]["statusModule"]["overallStatus"] = "Not yet recruiting" + p2 = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p2.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial.reload expect(trial.visible).to eq(false) end - it "parses status and sets visibility from not Recruiting to Recruiting" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - Not yet Recruiting - - ") + it "parses status and sets visibility from false to true" do + @api_data["protocolSection"]["statusModule"]["overallStatus"] = "Not yet recruiting" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.visible).to eq(false) - p2 = Parsers::Ctgov.new( 'NCT01678638', 1) - p2.set_contents_from_xml(" - - Test Study - Recruiting - - ") + @api_data["protocolSection"]["statusModule"]["overallStatus"] = "RECRUITING" + p2 = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p2.process - trial2 = Trial.find_by(system_id: 'NCT01678638') - expect(trial2.visible).to eq(true) + trial.reload + expect(trial.visible).to eq(true) end - it "sets visibility correctly regardless of current value" do - trial = create(:trial, system_id: "NCT123", overall_status: "Completed", visible: true) - p = Parsers::Ctgov.new("NCT123", 1) - - p.set_contents_from_xml(" - - Completed - - ") + it "sets visibility correctly regardless of current value" do + trial = create(:trial, system_id: "NCT999999", overall_status: "Completed", visible: true) + expect(trial.visible).to eq(true) + @api_data["protocolSection"]["statusModule"]["overallStatus"] = "COMPLETED" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process trial.reload expect(trial.visible).to eq(false) end - it "parses age ranges correctly when weeks are involved" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - - - - Both - N/A - 37 Weeks - No - - - ") - p.process - - trial = Trial.find_by(system_id: 'NCT01678638') - expect(trial.minimum_age).to eq(nil) - expect(trial.maximum_age).to eq('0.71') - end - - it "parses age ranges correctly when years are involved" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - - - - Both - 37 Days - 2 Years - No - - - ") - p.process - - trial = Trial.find_by(system_id: 'NCT01678638') - expect(trial.minimum_age).to eq('0.1') - expect(trial.min_age_unit).to eq('37 Days') - expect(trial.maximum_age).to eq('2') - expect(trial.max_age_unit).to eq('2 Years') - end - - it "parses age ranges correctly when months are involved" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - - - - Both - 13 Months - 300 Months - No - - - ") - p.process - - trial = Trial.find_by(system_id: 'NCT01678638') - expect(trial.minimum_age).to eq('1.08') - expect(trial.min_age_unit).to eq('13 Months') - expect(trial.maximum_age).to eq('25.0') - expect(trial.max_age_unit).to eq('300 Months') - end - it "parses conditions when there is only one" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - - - - Both - 13 Months - 300 Months - No - - Test Condition - - ") + @api_data["protocolSection"]["conditionsModule"]["conditions"] = "Test Condition" + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.conditions.first.condition).to eq('Test Condition') end it "parses conditions when there are many" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - - - - Both - 13 Months - 300 Months - No - - Test Condition 1 - Test Condition 2 - Test Condition 3 - Test Condition 4 - - ") + @api_data["protocolSection"]["conditionsModule"]["conditions"] = ["Test Condition 1", "Test Condition 3", "Test Condition 3", "Test Condition 4"] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.conditions.size).to eq(4) expect(trial.conditions.first.condition).to eq('Test Condition 1') expect(trial.conditions.last.condition).to eq('Test Condition 4') end it "parses intervention when there is only one" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - Intervention Type - Intervention Name - - - ") + @api_data["protocolSection"]["armsInterventionsModule"]["interventions"] = [ + {"type" => "DRUG", + "name" => "Real Drug", + "description" => "Drug intervention description", + "armGroupLabels" => ["Real Drug Label"], + "otherNames" => ["Drug Brand Name", "Drug Brand Name 2"] + } + ] + + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') - expect(trial.interventions).to eq('Intervention Type: Intervention Name') + trial = Trial.find_by(system_id: 'NCT999999') + expect(trial.interventions).to eq('DRUG: Real Drug') end - it "parses intervention when there are many" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - Intervention Type 1 - Intervention Name 1 - - - Intervention Type 2 - Intervention Name 2 - - - Intervention Type 3 - Intervention Name 3 - - - ") + it "parses interventions when there are many" do + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') interventions = trial.interventions.split("; ") expect(interventions.size).to eq(3) - expect(interventions.first).to eq('Intervention Type 1: Intervention Name 1') - expect(interventions.second).to eq('Intervention Type 2: Intervention Name 2') - expect(interventions.third).to eq('Intervention Type 3: Intervention Name 3') + expect(interventions.first).to eq('DRUG: Placebo') + expect(interventions.second).to eq('DRUG: Real Drug') + expect(interventions.third).to eq('DEVICE: Some Device') end it "parses keyword when there is only one" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - Test Keyword - - ") + @api_data["protocolSection"]["conditionsModule"]["keywords"] = ["Test Keyword"] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.keywords).to eq("Test Keyword") end it "parses keyword when there are many" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - Test Keyword 1 - Test Keyword 2 - Test Keyword 3 - Test Keyword 4 - Test Keyword 5 - - ") + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') keywords = trial.keywords.split("; ") - expect(keywords.size).to eq(5) + expect(keywords.size).to eq(3) expect(keywords.first).to eq("Test Keyword 1") expect(keywords.second).to eq("Test Keyword 2") - expect(keywords.last).to eq("Test Keyword 5") + expect(keywords.last).to eq("Test Keyword 3") end it "parses conditional browse mesh term when there is one" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - Test Conditional Mesh Term - - - ") + @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Conditional Mesh Term"}] + @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = [] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.mesh_terms).to eq("Conditional: Test Conditional Mesh Term") end it "parses conditional browse mesh term when there are many" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - Test Conditional Mesh Term 1 - Test Conditional Mesh Term 2 - Test Conditional Mesh Term 3 - - - ") + @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Conditional Mesh Term 1"},{"id"=>"D002","term"=>"Test Conditional Mesh Term 2"}] + @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = [] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') mesh_term = trial.mesh_terms.split("; ") - expect(mesh_term.size).to eq(3) + expect(mesh_term.size).to eq(2) expect(mesh_term.first).to eq("Conditional: Test Conditional Mesh Term 1") - expect(mesh_term.second).to eq("Conditional: Test Conditional Mesh Term 2") - expect(mesh_term.last).to eq("Conditional: Test Conditional Mesh Term 3") + expect(mesh_term.last).to eq("Conditional: Test Conditional Mesh Term 2") end it "parses intervention browse mesh term when there is one" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - Test Intervention Mesh Term - - - ") + @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = [] + @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Intervention Mesh Term"}] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') expect(trial.mesh_terms).to eq("Intervention: Test Intervention Mesh Term") end it "parses intervention browse mesh term when there are many" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - Test Intervention Mesh Term 1 - Test Intervention Mesh Term 2 - Test Intervention Mesh Term 3 - - - ") + @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = [] + @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Intervention Mesh Term 1"},{"id"=>"D002","term"=>"Test Intervention Mesh Term 2"}] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') mesh_term = trial.mesh_terms.split("; ") - expect(mesh_term.size).to eq(3) + expect(mesh_term.size).to eq(2) expect(mesh_term.first).to eq("Intervention: Test Intervention Mesh Term 1") - expect(mesh_term.second).to eq("Intervention: Test Intervention Mesh Term 2") - expect(mesh_term.last).to eq("Intervention: Test Intervention Mesh Term 3") + expect(mesh_term.last).to eq("Intervention: Test Intervention Mesh Term 2") end - it "parses conditional and intervention browse mesh term" do - url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true' - p = Parsers::Ctgov.new( 'NCT01678638', 1) - p.set_contents_from_xml(" - - Test Study - - Test Conditional Mesh Term 1 - Test Conditional Mesh Term 2 - Test Conditional Mesh Term 3 - - - Test Intervention Mesh Term 1 - Test Intervention Mesh Term 2 - Test Intervention Mesh Term 3 - - - ") + @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Conditional Mesh Term 1"},{"id"=>"D002","term"=>"Test Conditional Mesh Term 2"}] + @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Intervention Mesh Term 1"}] + p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data) p.process - trial = Trial.find_by(system_id: 'NCT01678638') + trial = Trial.find_by(system_id: 'NCT999999') mesh_term = trial.mesh_terms.split("; ") - expect(mesh_term.size).to eq(6) + expect(mesh_term.size).to eq(3) expect(mesh_term.first).to eq("Conditional: Test Conditional Mesh Term 1") expect(mesh_term.second).to eq("Conditional: Test Conditional Mesh Term 2") - expect(mesh_term.third).to eq("Conditional: Test Conditional Mesh Term 3") - expect(mesh_term.fourth).to eq("Intervention: Test Intervention Mesh Term 1") - expect(mesh_term.fifth).to eq("Intervention: Test Intervention Mesh Term 2") - expect(mesh_term.last).to eq("Intervention: Test Intervention Mesh Term 3") + expect(mesh_term.third).to eq("Intervention: Test Intervention Mesh Term 1") end end it "#overall_status" do - p = Parsers::Ctgov.new("NCT123") - p.set_contents_from_xml(" - - Recruiting - - ") - - expect(p.overall_status).to eq("Recruiting") + @api_data["protocolSection"]["statusModule"]["overallStatus"] = "RECRUITING" + p = Parsers::Ctgov.new("NCT123", 1, @api_data) + expect(p.overall_status).to eq("RECRUITING") end it "#location_status with one location" do - p = Parsers::Ctgov.new("NCT123") - p.set_contents_from_xml(" - - - - University of Minnesota - - Some status - - - ") + @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [{"facility"=>"University of Minnesota","status"=>"Some status"}] + p = Parsers::Ctgov.new("NCT123", 1, @api_data) allow(p).to receive(:location_search_term) { "University of Minnesota" } expect(p.location_status).to eq("Some status") end it "#location_status with multiple locations" do - p = Parsers::Ctgov.new("NCT123") - p.set_contents_from_xml(" - - - - Somewhere else - - Another status - - - - University of Minnesota - - Some status - - - ") + @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [ + {"facility"=>"University of Minnesota","status"=>"Some status"}, + {"facility"=>"University of Wisconsin","status"=>"A different status"}, + ] + p = Parsers::Ctgov.new("NCT123", 1, @api_data) allow(p).to receive(:location_search_term) { "University of Minnesota" } expect(p.location_status).to eq("Some status") end it "#calculated_status" do - p = Parsers::Ctgov.new("NCT123") - p.set_contents_from_xml(" - - Not this one - - - University of Minnesota - - This one - - - ") + @api_data["protocolSection"]["statusModule"]["overallStatus"] = "RECRUITING" + @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [ + {"facility"=>"University of Minnesota","status"=>"This one"}, + {"facility"=>"University of Wisconsin","status"=>"A different status"}, + ] + p = Parsers::Ctgov.new("NCT123", 1, @api_data) allow(p).to receive(:location_search_term) { "University of Minnesota" } - + expect(p.calculated_status).to eq("This one") end it "#calculated_status with no location status" do - p = Parsers::Ctgov.new("NCT123") - p.set_contents_from_xml(" - - Some status - - - University of Minnesota - - - - ") + @api_data["protocolSection"]["statusModule"]["overallStatus"] = "RECRUITING" + @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [ + {"facility"=>"University of Minnesota"}, + {"facility"=>"University of Wisconsin"}, + ] + p = Parsers::Ctgov.new("NCT123", 1, @api_data) allow(p).to receive(:location_search_term) { "University of Minnesota" } - expect(p.calculated_status).to eq("Some status") + expect(p.calculated_status).to eq("RECRUITING") end end