diff --git a/README.md b/README.md
index 5f62da4..e55aa45 100644
--- a/README.md
+++ b/README.md
@@ -10,9 +10,11 @@ Contact the StudyFinder team at studyfinder@umn.edu if you:
- Have any questions about StudyFinder, or
- Want to learn more about updates or enhancements of the tool.
-## Upgrade notes for 2.1
+## Upgrade notes for 2.2
+The built-in clinicaltrials.gov connector has been transitioned fully to the clinicaltrials.gov V2 API. This includes two breaking changes in the private API for the ctgov connector.
-The main page carousel/video feature was an accessibility and usability issue, and has been replaced with a three-wide panel of "featured studies". These can be configured in the admin panel, where the carousel configuration formerly was.
+1. In `Connectors::Ctgov#load(start_date,end_date)` the start and end dates must now be in ISO format YYYY-MM-DD (the old format was MM/DD/YYYY). Any custom tasks that directly call this method should be updated.
+2. `Connectors::Ctgov#load(start_date,end_date)` now calls `Connectors::Ctgov#process` itself to recurse through the V2 API's paged results. Formerly, `load` and `process` had to be called separately in that order. Remove any direct calls to `process` in order to avoid a redundant re-processing of the last "page" of data from the API.
## Development
diff --git a/app/views/studies/_clinicaltrialsgov_button.html.erb b/app/views/studies/_clinicaltrialsgov_button.html.erb
index 4364f52..1ede9df 100644
--- a/app/views/studies/_clinicaltrialsgov_button.html.erb
+++ b/app/views/studies/_clinicaltrialsgov_button.html.erb
@@ -1,5 +1,5 @@
<% if Trial.is_nct_number?(study.nct_id) %>
-
+
See this study on ClinicalTrials.gov
diff --git a/docker-compose.yml b/docker-compose.yml
index 78c21c6..b23d841 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,4 +1,3 @@
-version: '3'
services:
elasticsearch:
image: elasticsearch:8.10.2
diff --git a/lib/connectors/ctgov.rb b/lib/connectors/ctgov.rb
index 292ee40..d314069 100644
--- a/lib/connectors/ctgov.rb
+++ b/lib/connectors/ctgov.rb
@@ -5,77 +5,98 @@ class Ctgov
def initialize
@system_info = SystemInfo.current
- @parser_id = Parser.find_by({ klass: 'Parsers::Ctgov'}).id
if @system_info.nil?
raise "There is no system info associated. Please run the seeds file, or add the info in the system administration section."
end
- end
-
- def load(start_date=nil, end_date=nil)
- start_load_time = Time.now
- url = "https://clinicaltrials.gov/ct2/results/download_studies?locn=#{ERB::Util.url_encode(@system_info.search_term)}"
+ @parser_id = Parser.find_by({ klass: 'Parsers::Ctgov'}).id
+ @location = @system_info.search_term
+ @page_token = nil
+ @payload = nil
+ @start_date = 'MIN'
+ @end_date = 'MAX'
+ @start_load_time = nil
+ @total_count = nil
+ @count = 0
+ end
- if !start_date.nil? and !end_date.nil?
- puts "Loading clinicaltrials.gov results for #{@system_info.search_term} ... from #{start_date} to #{end_date}"
- url = url + "&lup_s=#{ERB::Util.url_encode(start_date)}&lup_e=#{ERB::Util.url_encode(end_date)}"
- else
- puts "Loading all clinicaltrials.gov results for #{@system_info.search_term} ..."
+ def study_filters
+ q = {
+ 'query.locn' => "AREA[LocationFacility]#{@location} AND AREA[LocationStatus]RECRUITING",
+ 'query.term' => "AREA[LastUpdatePostDate]RANGE[#{@start_date},#{@end_date}]",
+ countTotal: true,
+ pageSize: 100,
+ format: "json"
+ }
+ # API only wants a pageToken arg at all if we are actually asking for one.
+ if !@page_token.blank?
+ q[:pageToken] = @page_token
end
- puts "Search URL: #{url}"
- # @zipfile = Tempfile.new('file')
- # @zipfile.binmode
+ return q
+ end
- dirname = "#{Rails.root}/tmp/"
- unless File.directory?(dirname)
- FileUtils.mkdir_p(dirname)
- end
+ def studies_page
+ response = HTTParty.get(
+ "https://clinicaltrials.gov/api/v2/studies",
+ query: self.study_filters
+ )
+ @payload = JSON.parse(response.body || "{}")
+ @total_count ||= @payload.dig('totalCount')
+ puts "Retrieved page (#{@page_token})"
+ end
- FileUtils.rm_rf("#{dirname}search_result.zip")
- File.open("#{dirname}search_result.zip", "w+") do |f|
- f.write(HTTParty.get(url).body)
- end
- # @zipfile.write(HTTParty.get(url).body)
- # @zipfile.close
+ def load(start_date="MIN", end_date="MAX")
+ puts "Adding/Updating trials in the database. If it is a full reload it's going to be awhile... Maybe get some coffee? :)"
+ @start_date = start_date
+ @end_date = end_date
+ @start_load_time ||= Time.now
- puts "Extracting trials from zip file"
- extract()
- end_load_time = Time.now
+ self.studies_page
- puts "Time elapsed #{(end_load_time - start_load_time)} seconds"
- end
+ # Process the studies we just received, and ...
+ self.process
+ # ... recurse if there's another page.
- def extract
- start_load_time = Time.now
- extract_zip()
- end_load_time = Time.now
+ if @payload.dig("nextPageToken")
+ @page_token = @payload.dig("nextPageToken")
+ else
+ @page_token = nil
+ end
- puts "Zip time elapsed: #{(end_load_time - start_load_time)}"
- return true
+ if @page_token.blank?
+ puts "clinicaltrials.gov load COMPLETE."
+ else
+ puts "Now we'll load page #{@payload.dig("nextPageToken")}}"
+ @payload = nil
+ self.load(@start_date,@end_date)
+ end
end
def process
- start_load_time = Time.now
- count = 0
- puts "Adding/Updating trials in the database. If it is a full reload it's going to be awhile... Maybe get some coffee? :)"
-
- Dir.glob("#{Rails.root}/tmp/trials/*.xml") do |file|
- p = Parsers::Ctgov.new( file.gsub("#{Rails.root}/tmp/trials/", "").gsub(".xml", ""), @parser_id)
- p.load(file)
+ page_start_load_time = Time.now
+ page_count = 0
+ puts "Processing page (#{@page_token})"
+
+ @payload.dig('studies').each do |study|
+ @id = study.dig('protocolSection', 'identificationModule', 'nctId')
+ p = Parsers::Ctgov.new(@id, @parser_id, study)
+ puts "Processing: #{@id} (#{@count + 1} of #{@total_count})"
p.process
- count = count + 1
+ page_count = page_count + 1
+ @count = @count + 1
end
- end_load_time = Time.now
+ page_end_load_time = Time.now
- puts "Logging update to updaters table. Processed #{count} records."
+ puts "Logging update to updaters table."
Updater.create({
parser_id: @parser_id,
- num_updated: count
+ num_updated: page_count
})
- puts "Process time elapsed: #{(end_load_time - start_load_time)} seconds"
+ puts "Page time elapsed: #{(page_end_load_time - page_start_load_time)} seconds for #{page_count} records."
+ puts "Total process elapsed: #{(page_end_load_time - @start_load_time)} seconds for #{@count} records."
return true
end
@@ -86,8 +107,9 @@ def clear
TrialLocation.delete_all
TrialKeyword.delete_all
Location.delete_all
- Trial.delete_all
+ TrialSubgroup.delete_all
TrialCondition.delete_all
+ Trial.delete_all
end
def site_nct_ids
@@ -103,7 +125,6 @@ def cleanup_stray_trials
end
def nct_ids_for_location(location, page_token = nil)
- csc = 'M Health Fairview Clinics and Surgery Center'
ids = []
q = {
'query.locn' => "SEARCH[Location](AREA[LocationFacility]#{location} AND AREA[LocationStatus]RECRUITING)",
diff --git a/lib/parsers/ctgov.rb b/lib/parsers/ctgov.rb
index 279e32d..ad68d81 100644
--- a/lib/parsers/ctgov.rb
+++ b/lib/parsers/ctgov.rb
@@ -4,40 +4,27 @@
module Parsers
class Ctgov
- @@simple_fields = [
- 'brief_title',
- 'official_title',
- 'acronym',
- 'phase',
- 'verification_date',
- 'lastchanged_date',
- 'firstreceived_date',
- 'brief_summary',
- 'detailed_description'
- ]
+ @@simple_fields = {
+ brief_title: ['protocolSection', 'identificationModule', 'briefTitle'],
+ official_title: ['protocolSection', 'identificationModule', 'officialTitle'],
+ acronym: ['protocolSection', 'identificationModule', 'acronym'],
+ phase: ['protocolSection', 'designModule', 'phases', 0],
+ verification_date: ['protocolSection', 'statusModule', 'statusVerifiedDate'],
+ lastchanged_date: ['protocolSection', 'statusModule', 'lastUpdateSubmitDate'],
+ firstreceived_date: ['protocolSection', 'statusModule', 'studyFirstSubmitDate'],
+ brief_summary: ['protocolSection', 'descriptionModule', 'briefSummary'],
+ detailed_description: ['protocolSection', 'descriptionModule', 'detailedDescription']
+ }
# overwriting the built-in initialize method
- def initialize(id, parser_id=nil)
+ def initialize(id, parser_id=nil, data)
@id = id
- @url = url
@parser_id = parser_id
- end
-
- def url
- "https://clinicaltrials.gov/show/" + @id + "?displayxml=true"
- end
-
- def load(path=nil)
- path = url if path.nil?
- @contents ||= Hash.from_xml( Nokogiri::XML( open(path) ).xpath('clinical_study').to_s )['clinical_study']
+ @data = data
end
def contents
- @contents
- end
-
- def set_contents_from_xml(xml)
- @contents = Hash.from_xml( Nokogiri::XML( xml ).xpath('clinical_study').to_s )['clinical_study']
+ @data
end
def location_search_term
@@ -45,12 +32,12 @@ def location_search_term
end
def locations
- Array([contents.dig("location")]).flatten.compact
+ @data.dig('protocolSection', 'contactsLocationsModule', 'locations')
end
def location
locations.filter do |location|
- location.dig("facility", "name").to_s.try(:downcase) == location_search_term.try(:downcase)
+ /#{Regexp.escape(location_search_term)}/i.match?(location.dig("facility"))
end.first || {}
end
@@ -59,7 +46,7 @@ def location_status
end
def overall_status
- contents.dig("overall_status")
+ @data.dig('protocolSection', 'statusModule', 'overallStatus')
end
def calculated_status
@@ -74,14 +61,14 @@ def preview
trial
end
- def process()
+ def process
trial = Trial.find_or_initialize_by(system_id: @id)
trial.system_id = @id # i think this is just overwriting system_id from the line above
# Trial does not exist yet, setup defaults
if trial.id.nil?
- if @contents.has_key?('overall_status') and @contents['overall_status'] == 'Recruiting'
+ if !overall_status.blank? and overall_status.downcase == 'recruiting'
trial.visible = true # By default recruiting trials are visible unless otherwise specified.
else
trial.visible = false
@@ -97,12 +84,12 @@ def process()
retrieve_simple_fields(trial)
begin
- trial.added_on = Date.parse(@contents.dig("study_first_posted")) || Date.today
+ trial.added_on = Date.parse(@data.dig('protocolSection', 'statusModule', 'studyFirstSubmitDate')) || Date.today
rescue ArgumentError, TypeError => e
trial.added_on = Date.today
end
- if @contents.has_key?('eligibility')
+ if @data.dig('protocolSection', 'eligibilityModule')
process_eligibility(trial)
end
@@ -112,26 +99,26 @@ def process()
trial.save
end
- if @contents.has_key?('conditional_browse') || @contents.has_key?('intervention_browse')
+ if @data.dig('derivedSection', 'conditionBrowseModule','meshes') || @data.dig('derivedSection', 'interventionBrowseModule','meshes')
process_mesh_term(trial)
end
trial.updated_at = DateTime.now # Set updated date, even if the trial has not changed.
# Save associations.
- if @contents.has_key?('condition')
+ if @data.dig('protocolSection', 'conditionsModule','conditions')
process_conditions(trial.id)
end
- if @contents.has_key?('intervention')
+ if @data.dig('protocolSection', 'armsInterventionsModule','interventions')
process_interventions(trial.id)
end
- if @contents.has_key?('location')
+ if @data.dig('protocolSection', 'contactsLocationsModule','locations')
process_locations(trial.id)
end
- if @contents.has_key?('keyword')
+ if @data.dig('protocolSection', 'conditionsModule','keywords')
process_keywords(trial.id)
end
@@ -142,108 +129,129 @@ def process()
def process_contacts(trial)
# Overall official
- if @contents.has_key?('overall_official')
- if @contents['overall_official'].instance_of?(Array)
- overall_offical = @contents['overall_official'][0]
- else
- overall_offical = @contents['overall_official']
- end
+ if @data.dig('protocolSection', 'contactsLocationsModule','overallOfficials')
+ overall_offical = @data.dig('protocolSection', 'contactsLocationsModule','overallOfficials',0)
+
+ trial.official_last_name = overall_offical['name']
+ trial.official_role = overall_offical['role']
+ trial.official_affiliation = overall_offical['affiliation']
+ end
- trial.official_last_name = overall_offical['last_name'] if overall_offical.has_key?('last_name')
- trial.official_role = overall_offical['role'] if overall_offical.has_key?('role')
- trial.official_affiliation = overall_offical['affiliation'] if overall_offical.has_key?('affiliation')
+ # V2 api no longer has "contact" and "backup contact".
+ # Use the first two of any location contacts and then central contacts (having an email address),
+ # in order, as "primary" and "backup".
+ location_contacts = central_contacts = []
+ if !location.dig('contacts').blank?
+ location_contacts = location.dig('contacts').filter do |c|
+ !c.dig("email").blank? || !c.dig("phone").blank?
+ end
end
- # Primary Contact
- if @contents.has_key?('overall_contact')
- trial.contact_last_name = @contents['overall_contact']['last_name'] if @contents['overall_contact'].has_key?('last_name')
- trial.contact_phone = @contents['overall_contact']['phone'] if @contents['overall_contact'].has_key?('phone')
- trial.contact_email = @contents['overall_contact']['email'] if @contents['overall_contact'].has_key?('email')
+ if !@data.dig('protocolSection', 'contactsLocationsModule','centralContacts').blank?
+ central_contacts = @data.dig('protocolSection', 'contactsLocationsModule','centralContacts').filter do |c|
+ !c.dig("email").blank?
+ end
end
- # Backup Contact
- if @contents.has_key?('overall_contact_backup')
- trial.contact_backup_last_name = @contents['overall_contact_backup']['last_name'] if @contents['overall_contact_backup'].has_key?('last_name')
- trial.contact_backup_phone = @contents['overall_contact_backup']['phone'] if @contents['overall_contact_backup'].has_key?('phone')
- trial.contact_backup_email = @contents['overall_contact_backup']['email'] if @contents['overall_contact_backup'].has_key?('email')
+ all_contacts = location_contacts + central_contacts
+ if !all_contacts.blank?
+ c_0 = all_contacts.first
+ c_1 = all_contacts.second
+
+ if !c_0.blank?
+ trial.contact_last_name = c_0["name"]
+ trial.contact_phone = c_0["phone"]
+ trial.contact_email =c_0["email"]
+ end
+
+ if !c_1.blank?
+ trial.contact_backup_last_name = c_1["name"]
+ trial.contact_backup_phone = c_1["phone"]
+ trial.contact_backup_email =c_1["email"]
+ end
end
end
def process_eligibility(trial)
- if @contents['eligibility'].has_key?('gender')
- trial.gender = @contents['eligibility']['gender']
+ if @data.dig('protocolSection', 'eligibilityModule','sex')
+ trial.gender = @data.dig('protocolSection', 'eligibilityModule','sex')
end
- if @contents['eligibility'].has_key?('minimum_age')
- trial.minimum_age = @contents['eligibility']['minimum_age'].gsub(' Years', '').gsub(' Year', '') unless @contents['eligibility']['minimum_age'].nil?
- trial.minimum_age = nil if trial.minimum_age == 'N/A'
- trial.min_age_unit = @contents['eligibility']['minimum_age']
+ if @data.dig('protocolSection', 'eligibilityModule','minimumAge')
+ min_age = @data.dig('protocolSection', 'eligibilityModule','minimumAge')
+ if min_age.blank? || min_age == "N/A"
+ trial.minimum_age = nil
+ trial.min_age_unit = nil
+ return
+ end
+
+ trial.minimum_age = min_age.gsub(/ year(?:s)?/i, '') unless min_age.nil?
+ trial.min_age_unit = min_age
if !trial.min_age_unit.nil? and (trial.min_age_unit.include? 'Month' or trial.min_age_unit.include? 'Months')
- trial.minimum_age = (trial.minimum_age.gsub(' Months', '').gsub(' Month', '').to_f / 12).round(2)
+ trial.minimum_age = (trial.minimum_age.gsub(/\D/, '').to_f / 12).round(2)
end
if !trial.min_age_unit.nil? and (trial.min_age_unit.include? 'Week' or trial.min_age_unit.include? 'Weeks')
- trial.minimum_age = (trial.minimum_age.gsub(' Weeks', '').gsub(' Week', '').to_f * 0.0191781).round(2)
+ trial.minimum_age = (trial.minimum_age.gsub(/\D/, '').to_f * 0.0191781).round(2)
end
if !trial.min_age_unit.nil? and (trial.min_age_unit.include? 'Day' or trial.min_age_unit.include? 'Days')
- trial.minimum_age = (trial.minimum_age.gsub(' Days', '').gsub(' Day', '').to_f * 0.002739728571424657).round(2)
+ trial.minimum_age = (trial.minimum_age.gsub(/\D/, '').to_f * 0.002739728571424657).round(2)
end
end
- if @contents['eligibility'].has_key?('maximum_age')
- trial.maximum_age = @contents['eligibility']['maximum_age'].gsub(' Years', '').gsub(' Year', '') unless @contents['eligibility']['maximum_age'].nil?
- trial.maximum_age = nil if trial.maximum_age == 'N/A'
- trial.max_age_unit = @contents['eligibility']['maximum_age']
+ if @data.dig('protocolSection', 'eligibilityModule','maximumAge')
+ max_age = @data.dig('protocolSection', 'eligibilityModule','maximumAge')
+ if max_age.blank? || max_age == "N/A"
+ trial.maximum_age = nil
+ trial.max_age_unit = nil
+ return
+ end
+
+ trial.maximum_age = max_age.gsub(/ year(?:s)?/i, '') unless max_age.nil?
+ trial.max_age_unit = max_age
if !trial.max_age_unit.nil? and (trial.max_age_unit.include? 'Month' or trial.max_age_unit.include? 'Months')
- trial.maximum_age = (trial.maximum_age.gsub(' Months', '').gsub(' Month', '').to_f / 12).round(2)
+ trial.maximum_age = (trial.maximum_age.gsub(/\D/, '').to_f / 12).round(2)
end
if !trial.max_age_unit.nil? and (trial.max_age_unit.include? 'Week' or trial.max_age_unit.include? 'Weeks')
- trial.maximum_age = (trial.maximum_age.gsub(' Weeks', '').gsub(' Week', '').to_f * 0.0191781).round(2)
+ trial.maximum_age = (trial.maximum_age.gsub(/\D/, '').to_f * 0.0191781).round(2)
end
if !trial.max_age_unit.nil? and (trial.max_age_unit.include? 'Day' or trial.max_age_unit.include? 'Days')
- trial.maximum_age = (trial.maximum_age.gsub(' Days', '').gsub(' Day', '').to_f * 0.002739728571424657).round(2)
+ trial.maximum_age = (trial.maximum_age.gsub(/\D/, '').to_f * 0.002739728571424657).round(2)
end
end
- if @contents['eligibility'].has_key?('healthy_volunteers')
- if @contents['eligibility']['healthy_volunteers'] == 'Accepts Healthy Volunteers'
+ if @data.dig('protocolSection', 'eligibilityModule','healthyVolunteers') == true
trial.healthy_volunteers_imported = true
- else
+ elsif @data.dig('protocolSection', 'eligibilityModule','healthyVolunteers') == false
trial.healthy_volunteers_imported = false
- end
end
- if @contents['eligibility'].has_key?('criteria') && @contents['eligibility']['criteria'].has_key?('textblock')
- trial.eligibility_criteria = @contents['eligibility']['criteria']['textblock']
+ if @data.dig('protocolSection', 'eligibilityModule','eligibilityCriteria')
+ trial.eligibility_criteria = @data.dig('protocolSection', 'eligibilityModule','eligibilityCriteria')
end
end
def process_mesh_term(trial)
- if (!@contents['conditional_browse'].nil? && @contents['conditional_browse'].has_key?('mesh_term')) ||
- (!@contents['intervention_browse'].nil? && @contents['intervention_browse'].has_key?('mesh_term'))
- TrialMeshTerm.where(trial_id: trial.id).delete_all
- end
- if !@contents['conditional_browse'].nil? && @contents['conditional_browse'].has_key?('mesh_term')
+ TrialMeshTerm.where(trial_id: trial.id).delete_all
+ if @data.dig('derivedSection', 'conditionBrowseModule','meshes')
process_condition_browse(trial)
end
- if !@contents['intervention_browse'].nil? && @contents['intervention_browse'].has_key?('mesh_term')
+ if @data.dig('derivedSection', 'interventionBrowseModule','meshes')
process_intervention_browse(trial)
end
end
def process_condition_browse(trial)
- mesh_term = @contents['conditional_browse']['mesh_term']
- mesh_term = [mesh_term] unless mesh_term.instance_of?(Array)
-
+ mesh_term = @data.dig('derivedSection', 'conditionBrowseModule','meshes').map { |e| e['term'] }
mesh_term.each do |mesh|
test = TrialMeshTerm.create({
trial_id: trial.id,
@@ -254,9 +262,7 @@ def process_condition_browse(trial)
end
def process_intervention_browse(trial)
- mesh_term = @contents['intervention_browse']['mesh_term']
- mesh_term = [mesh_term] unless mesh_term.instance_of?(Array)
-
+ mesh_term = @data.dig('derivedSection', 'interventionBrowseModule','meshes').map { |e| e['term'] }
mesh_term.each do |mesh|
test = TrialMeshTerm.create({
trial_id: trial.id,
@@ -267,7 +273,7 @@ def process_intervention_browse(trial)
end
def process_conditions(id)
- conditions = @contents['condition']
+ conditions = @data.dig('protocolSection', 'conditionsModule','conditions')
conditions = [conditions] unless conditions.instance_of?(Array)
TrialCondition.where(trial_id: id).delete_all
@@ -278,7 +284,7 @@ def process_conditions(id)
condition.save
end
- TrialCondition.create({
+ tc = TrialCondition.create({
trial_id: id,
condition_id: condition.id
})
@@ -286,75 +292,67 @@ def process_conditions(id)
end
def process_interventions(id)
- interventions = @contents['intervention']
- interventions = [interventions] unless interventions.instance_of?(Array)
-
+ interventions = @data.dig('protocolSection', 'armsInterventionsModule','interventions')
TrialIntervention.where(trial_id: id).delete_all
interventions.each do |i|
TrialIntervention.create({
trial_id: id,
- intervention_type: i['intervention_type'],
- intervention: i['intervention_name'],
+ intervention_type: i['type'],
+ intervention: i['name'],
description: i['description']
})
end
end
def process_locations(id)
- locations = @contents['location']
+ locations = self.locations
locations = [locations] unless locations.instance_of?(Array)
TrialLocation.where(trial_id: id).delete_all
-
locations.each do |l|
- facility = l['facility'] if l.has_key?('facility')
- location = Location.find_or_initialize_by(location: facility['name'])
-
- if facility.has_key?('address')
- address = facility['address']
- location.city = address['city'] if address.has_key?('city')
- location.state = address['state'] if address.has_key?('state')
- location.zip = address['zip'] if address.has_key?('zip')
- location.country = address['country'] if address.has_key?('country')
- end
-
- location.save
-
- trial_location_hash = {
- trial_id: id,
- location_id: location.id
- }
-
- if l.has_key?('status')
- trial_location_hash['status'] = l['status']
- end
-
- if l.has_key?('contact')
- contact = l['contact']
- trial_location_hash['last_name'] = contact['last_name'] if contact.has_key?('last_name')
- trial_location_hash['phone'] = contact['phone'] if contact.has_key?('phone')
- trial_location_hash['email'] = contact['email'] if contact.has_key?('email')
- end
-
- if l.has_key?('contact_backup')
- contact_backup = l['contact_backup']
- trial_location_hash['backup_last_name'] = contact_backup['last_name'] if contact_backup.has_key?('last_name')
- trial_location_hash['backup_phone'] = contact_backup['phone'] if contact_backup.has_key?('phone')
- trial_location_hash['backup_email'] = contact_backup['email'] if contact_backup.has_key?('email')
- end
+ facility = l.dig('facility')
+ if !facility.blank? # We key off the facility name, so we can't really do anything if it doesn't exist.
+ location = Location.find_or_initialize_by(location: facility)
+
+ location.city = l.dig('city')
+ location.state = l.dig('state')
+ location.zip = l.dig('zip')
+ location.country = l.dig('country')
+
+ location.save
+
+ tl = TrialLocation.new(trial_id: id, location_id: location.id, status: l.dig('status'))
+
+ if !l.dig('contacts').blank?
+ location_contacts = l.dig('contacts').filter do |c|
+ !c.dig("email").blank?
+ end
+
+ c_0 = location_contacts.first
+ c_1 = location_contacts.second
+
+ if !c_0.blank?
+ tl.last_name = c_0["name"]
+ tl.phone = c_0["phone"]
+ tl.email =c_0["email"]
+ end
+
+ if !c_1.blank?
+ tl.backup_last_name = c_1["name"]
+ tl.backup_phone = c_1["phone"]
+ tl.backup_email =c_1["email"]
+ end
+ end
- if l.has_key?('status')
- trial_location_hash['status'] = l['status']
+ tl.save
end
-
- TrialLocation.create(trial_location_hash)
end
end
def process_keywords(id)
- keywords = @contents['keyword']
+ keywords = @data.dig('protocolSection', 'conditionsModule','keywords')
keywords = [keywords] unless keywords.instance_of?(Array)
TrialKeyword.where(trial_id: id).delete_all
@@ -366,24 +364,29 @@ def process_keywords(id)
})
end
+ # TODO: Should we include the intervention "otherNames" as keywords?
+ # interventions = @data.dig('protocolSection', 'armsInterventionsModule','interventions').map { |e| e['otherNames'] }.flatten
+ # interventions.each do |i|
+ # TrialKeyword.create({
+ # trial_id: id,
+ # keyword: i
+ # })
+ # end
+
end
def retrieve_simple_fields(trial)
previous_status = trial.overall_status
# Look at simple fields and update where appropriate.
- @@simple_fields.each do |f|
- if @contents.has_key?(f)
- if f == 'brief_summary' || f == 'detailed_description'
- trial[f] = @contents[f]['textblock']
- else
- trial[f] = @contents[f]
- end
+ @@simple_fields.each do |k,v|
+ if @data.dig(*v)
+ trial[k] = @data.dig(*v)
end
end
trial.overall_status = calculated_status
- trial.recruiting = (calculated_status == 'Recruiting')
+ trial.recruiting = (calculated_status.downcase == 'recruiting')
trial.visible = trial.recruiting
end
diff --git a/lib/tasks/ctgov.rake b/lib/tasks/ctgov.rake
index a180b26..c5467f6 100644
--- a/lib/tasks/ctgov.rake
+++ b/lib/tasks/ctgov.rake
@@ -16,8 +16,7 @@ namespace :studyfinder do
puts "Processing ClinicalTrials.gov data"
connector = Connectors::Ctgov.new
- connector.load((Date.today - args[:days_previous].to_i).strftime('%m/%d/%Y') , Date.today.strftime('%m/%d/%Y') )
- connector.process
+ connector.load((Date.today - args[:days_previous].to_i).strftime('%Y-%m-%d') , Date.today.strftime('%Y-%m-%d') )
puts "Reindexing all trials into elasticsearch"
Trial.import force: true
@@ -28,7 +27,6 @@ namespace :studyfinder do
connector = Connectors::Ctgov.new
connector.load
- connector.process
puts "Reindexing all trials into elasticsearch"
Trial.import force: true
@@ -54,7 +52,6 @@ namespace :studyfinder do
connector = Connectors::Ctgov.new
connector.clear
connector.load
- connector.process
puts "Reindexing all trials into elasticsearch"
Trial.import force: true
diff --git a/spec/parsers/ctgov_spec.rb b/spec/parsers/ctgov_spec.rb
index 9499735..92c2de7 100644
--- a/spec/parsers/ctgov_spec.rb
+++ b/spec/parsers/ctgov_spec.rb
@@ -3,522 +3,635 @@
describe Parsers::Ctgov do
- before do
+ before(:each) do
+ system_info = create(:system_info, initials: 'TSTU', search_term: "Test University")
+ @api_data = {
+ "protocolSection" => {
+ "identificationModule" => {
+ "nctId" => "NCT999999",
+ "orgStudyIdInfo" => {
+ "id" => "2024-STUDY"
+ },
+ "organization" => {
+ "fullName" => "Spacely Sprockets", "class" => "INDUSTRY"
+ },
+ "briefTitle" =>
+ "This is the brief title",
+ "officialTitle" =>
+ "This is the longer, official title",
+ "acronym" => "ACRO"
+ },
+ "statusModule" => {
+ "statusVerifiedDate" => "2024-06",
+ "overallStatus" => "RECRUITING",
+ "expandedAccessInfo" => {"hasExpandedAccess" => false},
+ "startDateStruct" => {"date" => "2023-10-30", "type" => "ACTUAL"},
+ "primaryCompletionDateStruct" => {"date" => "2027-11", "type" => "ESTIMATED"},
+ "completionDateStruct" => {"date" => "2027-11", "type" => "ESTIMATED"},
+ "studyFirstSubmitDate" => "2023-07-05",
+ "studyFirstSubmitQcDate" => "2023-07-05",
+ "studyFirstPostDateStruct" => {"date" => "2023-07-13", "type" => "ACTUAL"},
+ "lastUpdateSubmitDate" => "2024-06-28",
+ "lastUpdatePostDateStruct" => {"date" => "2024-07-01", "type" => "ACTUAL"}
+ },
+ "sponsorCollaboratorsModule" => {
+ "responsibleParty" => {"type" => "SPONSOR"}, "leadSponsor" => {"name" => "Spacely Sprockets", "class" => "INDUSTRY"}
+ },
+ "oversightModule" => {
+ "oversightHasDmc" => true, "isFdaRegulatedDrug" => true, "isFdaRegulatedDevice" => true
+ },
+ "descriptionModule" => {
+ "briefSummary" =>
+ "This summary of the study is brief.",
+ "detailedDescription" =>
+ "This detailed description of the study is longer. This detailed description of the study is longer. This detailed description of the study is longer. This detailed description of the study is longer."
+ },
+ "conditionsModule" => {
+ "conditions" => ["Condition 1", "Condition 2"],
+ "keywords" => ["Test Keyword 1", "Test Keyword 2", "Test Keyword 3"]
+ },
+ "designModule" => {
+ "studyType" => "INTERVENTIONAL",
+ "phases" => ["PHASE3"],
+ "designInfo" => {
+ "allocation" => "RANDOMIZED",
+ "interventionModel" => "PARALLEL",
+ "primaryPurpose" => "TREATMENT",
+ "maskingInfo" =>
+ {"masking" => "QUADRUPLE", "whoMasked" => ["PARTICIPANT", "CARE_PROVIDER", "INVESTIGATOR", "OUTCOMES_ASSESSOR"]}
+ },
+ "enrollmentInfo" => {"count" => 200, "type" => "ESTIMATED"}
+ },
+ "armsInterventionsModule" => {
+ "armGroups" =>
+ [{"label" => "Placebo",
+ "type" => "PLACEBO_COMPARATOR",
+ "description" => "A placebo arm.",
+ "interventionNames" => ["Drug: Placebo", "Device: Some Device"]},
+ {"label" => "Real Drug Label",
+ "type" => "EXPERIMENTAL",
+ "description" =>
+ "Drug arm description",
+ "interventionNames" => ["Drug: Real Drug", "Device: Some Device"]}],
+ "interventions" =>
+ [{"type" => "DRUG", "name" => "Placebo", "description" => "Placebo intervention description", "armGroupLabels" => ["Placebo"]},
+ {"type" => "DRUG",
+ "name" => "Real Drug",
+ "description" => "Drug intervention description",
+ "armGroupLabels" => ["Real Drug Label"],
+ "otherNames" => ["Drug Brand Name", "Drug Brand Name 2"]},
+ {"type" => "DEVICE",
+ "name" => "Some Device",
+ "description" => "Device intervention description",
+ "armGroupLabels" => ["Real Drug Label", "Placebo"]}]
+ },
+ "outcomesModule" => {
+ "primaryOutcomes" =>
+ [{"measure" => "Measure 1",
+ "description" =>
+ "Measure 1 description.",
+ "timeFrame" => "Baseline to Week 52"}],
+ "secondaryOutcomes" =>
+ [{"measure" => "Measure 2",
+ "description" =>
+ "Measure 2 description.",
+ "timeFrame" => "Baseline to Week 52"},
+ {"measure" => "Measure 3",
+ "description" =>
+ "Measure 3 description.",
+ "timeFrame" => "Baseline to Week 52"}]
+ },
+ "eligibilityModule" => {
+ "eligibilityCriteria" => "These are eligibility criteria.",
+ "healthyVolunteers" => false,
+ "sex" => "ALL",
+ "minimumAge" => "18 Years",
+ "stdAges" => ["ADULT", "OLDER_ADULT"]
+ },
+ "contactsLocationsModule" => {
+ "centralContacts" =>
+ [{"name" => "Spacely Sprockets Contact",
+ "role" => "CONTACT",
+ "phone" => "555-555-5555",
+ "email" => "clinicaltrials@spacelysprockets.com"}],
+ "overallOfficials"=>
+ [{"name"=>"Person One, PhD",
+ "affiliation"=>"The Major Medical Center",
+ "role"=>"PRINCIPAL_INVESTIGATOR"}],
+ "locations" =>
+ [
+ {"facility" => "Facility 1 Name",
+ "status" => "RECRUITING",
+ "city" => "Chicago",
+ "state" => "Illinois",
+ "zip" => "60193",
+ "country" => "United States",
+ "contacts" =>
+ [{"name" => "Facility 1 Contact 1 Name", "role" => "CONTACT", "phone" => "555-555-5556", "email" => "someone@facility1.zzz"},
+ {"name" => "Facility 1 Contact 2 Name", "role" => "PRINCIPAL_INVESTIGATOR"}],
+ "geoPoint" => {"lat" => 33.52066, "lon" => -86.80249}},
+ {"facility" => "Facility 2 Name",
+ "status" => "RECRUITING",
+ "city" => "Phoenix",
+ "state" => "Arizona",
+ "zip" => "85013",
+ "country" => "United States",
+ "contacts" =>
+ [{"name" => "Facility 2 Contact 1 Name",
+ "role" => "CONTACT",
+ "phone" => "602-555-5555",
+ "email" => "someone@facility2.zzz"},
+ {"name" => "Facility 2 Contact 2 Name", "role" => "PRINCIPAL_INVESTIGATOR"}],
+ "geoPoint" => {"lat" => 33.44838, "lon" => -112.07404}}
+ ]
+ }
+ },
+ "derivedSection" => {
+ "miscInfoModule" => {"versionHolder" => "2024-07-12"},
+ "conditionBrowseModule" => {
+ "meshes" =>
+ [{"id" => "D000008171", "term" => "Lung Diseases"},
+ {"id" => "D000011658", "term" => "Pulmonary Fibrosis"},
+ {"id" => "D000017563", "term" => "Lung Diseases, Interstitial"},
+ {"id" => "D000005355", "term" => "Fibrosis"}],
+ "ancestors" =>
+ [{"id" => "D000010335", "term" => "Pathologic Processes"}, {"id" => "D000012140", "term" => "Respiratory Tract Diseases"}],
+ "browseLeaves" =>
+ [{"id" => "M11168", "name" => "Lung Diseases", "asFound" => "Lung Disease", "relevance" => "HIGH"},
+ {"id" => "M27137", "name" => "Respiratory Aspiration", "relevance" => "LOW"},
+ {"id" => "M19813", "name" => "Lung Diseases, Interstitial", "asFound" => "Interstitial Lung Disease", "relevance" => "HIGH"},
+ {"id" => "M8485", "name" => "Fibrosis", "asFound" => "Fibrosis", "relevance" => "HIGH"},
+ {"id" => "M14512", "name" => "Pulmonary Fibrosis", "asFound" => "Pulmonary Fibrosis", "relevance" => "HIGH"},
+ {"id" => "M14977", "name" => "Respiratory Tract Diseases", "relevance" => "LOW"}],
+ "browseBranches" =>
+ [{"abbrev" => "BC08", "name" => "Respiratory Tract (Lung and Bronchial) Diseases"},
+ {"abbrev" => "All", "name" => "All Conditions"},
+ {"abbrev" => "BC23", "name" => "Symptoms and General Pathology"}]
+ },
+ "interventionBrowseModule" => {
+ "meshes" => [{"id" => "C000427248", "term" => "Treprostinil"}],
+ "ancestors" => [{"id" => "D000000959", "term" => "Antihypertensive Agents"}],
+ "browseLeaves" =>
+ [{"id" => "M21860", "name" => "Pharmaceutical Solutions", "relevance" => "LOW"},
+ {"id" => "M255601", "name" => "Treprostinil", "asFound" => "Operator", "relevance" => "HIGH"},
+ {"id" => "M4277", "name" => "Antihypertensive Agents", "relevance" => "LOW"}],
+ "browseBranches" =>
+ [{"abbrev" => "PhSol", "name" => "Pharmaceutical Solutions"},
+ {"abbrev" => "All", "name" => "All Drugs and Chemicals"},
+ {"abbrev" => "AnAg", "name" => "Antihypertensive Agents"}]
+ }
+ }
+ }
end
- describe "#parse" do
+ describe "#contents" do
+ it "returns a structure equivalent to @api_data" do
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ expect(p.contents).to eq(@api_data)
+ end
+ end
+
+ describe "#location_search_term" do
+ it "returns the correct term" do
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ expect(p.location_search_term).to eq("Test University")
+ end
+ end
+
+ describe "#location" do
+ it "returns the correct location block for an exact match" do
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ allow(p).to receive(:location_search_term) { "Facility 1 Name" }
+ expect(p.location["facility"]).to eq("Facility 1 Name")
+ expect(p.location["city"]).to eq("Chicago")
+ expect(p.location["state"]).to eq("Illinois")
+ end
+
+ it "returns the correct location block for a substring match" do
+ @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [
+ {"facility"=>"University of Minnesota/Cancer Center","status"=>"Some status","city"=>"Minneapolis",
+ "state"=>"Minnesota","zip"=>"55455","country"=>"United States"
+ }
+ ]
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+
+ allow(p).to receive(:location_search_term) { "University of Minnesota" }
+ expect(p.location["facility"]).to eq("University of Minnesota/Cancer Center")
+ expect(p.location["city"]).to eq("Minneapolis")
+ expect(p.location["state"]).to eq("Minnesota")
+ end
+ end
+
+ describe "#preview" do
+ it "returns the correct preview block" do
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ expect(p.preview.system_id).to eq("NCT999999")
+ expect(p.preview.brief_title).to eq("This is the brief title")
+ expect(p.preview.official_title).to eq("This is the longer, official title")
+ expect(p.preview.acronym).to eq("ACRO")
+ expect(p.preview.phase).to eq("PHASE3")
+ expect(p.preview.overall_status).to eq("RECRUITING")
+ expect(p.preview.verification_date).to eq("2024-06")
+ expect(p.preview.brief_summary).to eq("This summary of the study is brief.")
+ expect(p.preview.detailed_description).to eq("This detailed description of the study is longer. This detailed description of the study is longer. This detailed description of the study is longer. This detailed description of the study is longer.")
+ expect(p.preview.visible).to eq(true)
+ expect(p.preview.lastchanged_date).to eq('2024-06-28')
+ expect(p.preview.firstreceived_date).to eq('2023-07-05')
+ expect(p.preview.min_age_unit).to eq(nil)
+ expect(p.preview.maximum_age).to eq(nil)
+ end
+ end
+
+ describe "#process_contacts" do
+ context "if there is one location contact with an email, and one central contact" do
+ it "returns those in order as the primary and backup" do
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ allow(p).to receive(:location_search_term) { "Facility 1 Name" }
+ t = Trial.new
+ p.process_contacts(t)
+ expect(t.contact_last_name).to eq('Facility 1 Contact 1 Name')
+ expect(t.contact_backup_last_name).to eq('Spacely Sprockets Contact')
+ end
+ end
+ context "if there are two location contacts with an email" do
+ it "returns those in order as the primary and backup" do
+ @api_data["protocolSection"]["contactsLocationsModule"]["locations"][0]["contacts"][1]["email"] = "someone2@facility1.zzz"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ allow(p).to receive(:location_search_term) { "Facility 1 Name" }
+ t = Trial.new
+ p.process_contacts(t)
+ expect(t.contact_last_name).to eq('Facility 1 Contact 1 Name')
+ expect(t.contact_backup_last_name).to eq('Facility 1 Contact 2 Name')
+ end
+ end
+ end
+
+ describe "#process_eligibility" do
+ it "parses sex correctly" do
+ @api_data["protocolSection"]["eligibilityModule"]["sex"] = "ALL"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = Trial.new
+ p.process_eligibility(t)
+ expect(t.gender).to eq("ALL")
+ end
+
+ it "parses age ranges correctly when weeks are involved" do
+ @api_data["protocolSection"]["eligibilityModule"]["minimumAge"] = nil
+ @api_data["protocolSection"]["eligibilityModule"]["maximumAge"] = "37 Weeks"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = Trial.new
+ p.process_eligibility(t)
+ expect(t.minimum_age).to eq(nil)
+ expect(t.maximum_age).to eq('0.71')
+ end
+
+ it "parses age ranges correctly when years are involved" do
+ @api_data["protocolSection"]["eligibilityModule"]["minimumAge"] = "37 Days"
+ @api_data["protocolSection"]["eligibilityModule"]["maximumAge"] = "2 Years"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = Trial.new
+ p.process_eligibility(t)
+ expect(t.minimum_age).to eq('0.1')
+ expect(t.min_age_unit).to eq('37 Days')
+ expect(t.maximum_age).to eq('2')
+ expect(t.max_age_unit).to eq('2 Years')
+ end
+
+ it "parses age ranges correctly when months are involved" do
+ @api_data["protocolSection"]["eligibilityModule"]["minimumAge"] = "13 Months"
+ @api_data["protocolSection"]["eligibilityModule"]["maximumAge"] = "300 Months"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = Trial.new
+ p.process_eligibility(t)
+ expect(t.minimum_age).to eq('1.08')
+ expect(t.min_age_unit).to eq('13 Months')
+ expect(t.maximum_age).to eq('25.0')
+ expect(t.max_age_unit).to eq('300 Months')
+ end
+
+ it "parses healthy volunteers correctly" do
+ @api_data["protocolSection"]["eligibilityModule"]["healthyVolunteers"] = true
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = Trial.new
+ p.process_eligibility(t)
+ expect(t.healthy_volunteers_imported).to eq(true)
+
+ @api_data["protocolSection"]["eligibilityModule"]["healthyVolunteers"] = false
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = Trial.new
+ p.process_eligibility(t)
+ expect(t.healthy_volunteers_imported).to eq(false)
+ end
+
+ it "parses eligibilityCriteria correctly" do
+ @api_data["protocolSection"]["eligibilityModule"]["eligibilityCriteria"] = "Test Criteria 1 through 10."
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = Trial.new
+ p.process_eligibility(t)
+ expect(t.eligibility_criteria).to eq("Test Criteria 1 through 10.")
+ end
+ end
+
+ describe "#process_locations" do
+ context "when there is a location present with one contact" do
+ it "creates a location record and a trial location with one contact" do
+ @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [
+ {"facility"=>"University of Minnesota","status"=>"Some status","city"=>"Minneapolis",
+ "state"=>"Minnesota","zip"=>"55455","country"=>"United States",
+ "contacts" =>
+ [{"name" => "UMN Contact 1 Name", "role" => "CONTACT", "phone" => "555-555-5556", "email" => "someone@umn.edu"},
+ {"name" => "UMN Contact 2 Name", "role" => "PRINCIPAL_INVESTIGATOR"}]
+ }
+ ]
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = create(:trial)
+ p.process_locations(t.id)
+ tl = TrialLocation.first
+ l = tl.location
+ expect(l.location).to eq("University of Minnesota")
+ expect(l.city).to eq("Minneapolis")
+ expect(l.state).to eq("Minnesota")
+ expect(l.zip).to eq("55455")
+ expect(l.country).to eq("United States")
+
+ expect(tl.status).to eq("Some status")
+ expect(tl.last_name).to eq("UMN Contact 1 Name")
+ expect(tl.phone).to eq("555-555-5556")
+ expect(tl.email).to eq("someone@umn.edu")
+ expect(tl.backup_last_name).to eq(nil)
+ expect(tl.backup_phone).to eq(nil)
+ expect(tl.backup_email).to eq(nil)
+ end
+ it "creates a location record and a trial location with one contact" do
+ @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [
+ {"facility"=>"University of Minnesota","status"=>"Some status","city"=>"Minneapolis",
+ "state"=>"Minnesota","zip"=>"55455","country"=>"United States",
+ "contacts" =>
+ [{"name" => "UMN Contact 1 Name", "role" => "CONTACT", "phone" => "555-555-5556", "email" => "someone@umn.edu"},
+ {"name" => "UMN Contact 2 Name", "role" => "CONTACT", "phone" => "555-555-5557", "email" => "another@umn.edu"},
+ {"name" => "UMN Contact 3 Name", "role" => "PRINCIPAL_INVESTIGATOR"}]
+ }
+ ]
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
+ t = create(:trial)
+ p.process_locations(t.id)
+ tl = TrialLocation.first
+ l = tl.location
+ expect(l.location).to eq("University of Minnesota")
+ expect(l.city).to eq("Minneapolis")
+ expect(l.state).to eq("Minnesota")
+ expect(l.zip).to eq("55455")
+ expect(l.country).to eq("United States")
+
+ expect(tl.status).to eq("Some status")
+ expect(tl.last_name).to eq("UMN Contact 1 Name")
+ expect(tl.phone).to eq("555-555-5556")
+ expect(tl.email).to eq("someone@umn.edu")
+ expect(tl.backup_last_name).to eq("UMN Contact 2 Name")
+ expect(tl.backup_phone).to eq("555-555-5557")
+ expect(tl.backup_email).to eq("another@umn.edu")
+ end
+ end
+ end
+ describe "#parse" do
it "parses status and sets visibility when Recruiting" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
- Recruiting
-
- ")
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.visible).to eq(true)
end
it "parses status and sets visibility when not Recruiting" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
- Not yet Recruiting
-
- ")
+ @api_data["protocolSection"]["statusModule"]["overallStatus"] = "Not yet recruiting"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.visible).to eq(false)
end
- it "parses status and sets visibility from Recruiting to not Recruiting" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
- Recruiting
-
- ")
+ it "parses status change from recruiting to non and sets visibility from true to false" do
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.visible).to eq(true)
- p2 = Parsers::Ctgov.new( 'NCT01678638', 1)
- p2.set_contents_from_xml("
-
- Test Study
- Not yet Recruiting
-
- ")
+ @api_data["protocolSection"]["statusModule"]["overallStatus"] = "Not yet recruiting"
+ p2 = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p2.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial.reload
expect(trial.visible).to eq(false)
end
- it "parses status and sets visibility from not Recruiting to Recruiting" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
- Not yet Recruiting
-
- ")
+ it "parses status and sets visibility from false to true" do
+ @api_data["protocolSection"]["statusModule"]["overallStatus"] = "Not yet recruiting"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.visible).to eq(false)
- p2 = Parsers::Ctgov.new( 'NCT01678638', 1)
- p2.set_contents_from_xml("
-
- Test Study
- Recruiting
-
- ")
+ @api_data["protocolSection"]["statusModule"]["overallStatus"] = "RECRUITING"
+ p2 = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p2.process
- trial2 = Trial.find_by(system_id: 'NCT01678638')
- expect(trial2.visible).to eq(true)
+ trial.reload
+ expect(trial.visible).to eq(true)
end
- it "sets visibility correctly regardless of current value" do
- trial = create(:trial, system_id: "NCT123", overall_status: "Completed", visible: true)
- p = Parsers::Ctgov.new("NCT123", 1)
-
- p.set_contents_from_xml("
-
- Completed
-
- ")
+ it "sets visibility correctly regardless of current value" do
+ trial = create(:trial, system_id: "NCT999999", overall_status: "Completed", visible: true)
+ expect(trial.visible).to eq(true)
+ @api_data["protocolSection"]["statusModule"]["overallStatus"] = "COMPLETED"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
trial.reload
expect(trial.visible).to eq(false)
end
- it "parses age ranges correctly when weeks are involved" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
-
-
-
- Both
- N/A
- 37 Weeks
- No
-
-
- ")
- p.process
-
- trial = Trial.find_by(system_id: 'NCT01678638')
- expect(trial.minimum_age).to eq(nil)
- expect(trial.maximum_age).to eq('0.71')
- end
-
- it "parses age ranges correctly when years are involved" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
-
-
-
- Both
- 37 Days
- 2 Years
- No
-
-
- ")
- p.process
-
- trial = Trial.find_by(system_id: 'NCT01678638')
- expect(trial.minimum_age).to eq('0.1')
- expect(trial.min_age_unit).to eq('37 Days')
- expect(trial.maximum_age).to eq('2')
- expect(trial.max_age_unit).to eq('2 Years')
- end
-
- it "parses age ranges correctly when months are involved" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
-
-
-
- Both
- 13 Months
- 300 Months
- No
-
-
- ")
- p.process
-
- trial = Trial.find_by(system_id: 'NCT01678638')
- expect(trial.minimum_age).to eq('1.08')
- expect(trial.min_age_unit).to eq('13 Months')
- expect(trial.maximum_age).to eq('25.0')
- expect(trial.max_age_unit).to eq('300 Months')
- end
-
it "parses conditions when there is only one" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
-
-
-
- Both
- 13 Months
- 300 Months
- No
-
- Test Condition
-
- ")
+ @api_data["protocolSection"]["conditionsModule"]["conditions"] = "Test Condition"
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.conditions.first.condition).to eq('Test Condition')
end
it "parses conditions when there are many" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
-
-
-
- Both
- 13 Months
- 300 Months
- No
-
- Test Condition 1
- Test Condition 2
- Test Condition 3
- Test Condition 4
-
- ")
+ @api_data["protocolSection"]["conditionsModule"]["conditions"] = ["Test Condition 1", "Test Condition 3", "Test Condition 3", "Test Condition 4"]
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.conditions.size).to eq(4)
expect(trial.conditions.first.condition).to eq('Test Condition 1')
expect(trial.conditions.last.condition).to eq('Test Condition 4')
end
it "parses intervention when there is only one" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
- Intervention Type
- Intervention Name
-
-
- ")
+ @api_data["protocolSection"]["armsInterventionsModule"]["interventions"] = [
+ {"type" => "DRUG",
+ "name" => "Real Drug",
+ "description" => "Drug intervention description",
+ "armGroupLabels" => ["Real Drug Label"],
+ "otherNames" => ["Drug Brand Name", "Drug Brand Name 2"]
+ }
+ ]
+
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
- expect(trial.interventions).to eq('Intervention Type: Intervention Name')
+ trial = Trial.find_by(system_id: 'NCT999999')
+ expect(trial.interventions).to eq('DRUG: Real Drug')
end
- it "parses intervention when there are many" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
- Intervention Type 1
- Intervention Name 1
-
-
- Intervention Type 2
- Intervention Name 2
-
-
- Intervention Type 3
- Intervention Name 3
-
-
- ")
+ it "parses interventions when there are many" do
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
interventions = trial.interventions.split("; ")
expect(interventions.size).to eq(3)
- expect(interventions.first).to eq('Intervention Type 1: Intervention Name 1')
- expect(interventions.second).to eq('Intervention Type 2: Intervention Name 2')
- expect(interventions.third).to eq('Intervention Type 3: Intervention Name 3')
+ expect(interventions.first).to eq('DRUG: Placebo')
+ expect(interventions.second).to eq('DRUG: Real Drug')
+ expect(interventions.third).to eq('DEVICE: Some Device')
end
it "parses keyword when there is only one" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
- Test Keyword
-
- ")
+ @api_data["protocolSection"]["conditionsModule"]["keywords"] = ["Test Keyword"]
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.keywords).to eq("Test Keyword")
end
it "parses keyword when there are many" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
- Test Keyword 1
- Test Keyword 2
- Test Keyword 3
- Test Keyword 4
- Test Keyword 5
-
- ")
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
keywords = trial.keywords.split("; ")
- expect(keywords.size).to eq(5)
+ expect(keywords.size).to eq(3)
expect(keywords.first).to eq("Test Keyword 1")
expect(keywords.second).to eq("Test Keyword 2")
- expect(keywords.last).to eq("Test Keyword 5")
+ expect(keywords.last).to eq("Test Keyword 3")
end
it "parses conditional browse mesh term when there is one" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
- Test Conditional Mesh Term
-
-
- ")
+ @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Conditional Mesh Term"}]
+ @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = []
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.mesh_terms).to eq("Conditional: Test Conditional Mesh Term")
end
it "parses conditional browse mesh term when there are many" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
- Test Conditional Mesh Term 1
- Test Conditional Mesh Term 2
- Test Conditional Mesh Term 3
-
-
- ")
+ @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Conditional Mesh Term 1"},{"id"=>"D002","term"=>"Test Conditional Mesh Term 2"}]
+ @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = []
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
mesh_term = trial.mesh_terms.split("; ")
- expect(mesh_term.size).to eq(3)
+ expect(mesh_term.size).to eq(2)
expect(mesh_term.first).to eq("Conditional: Test Conditional Mesh Term 1")
- expect(mesh_term.second).to eq("Conditional: Test Conditional Mesh Term 2")
- expect(mesh_term.last).to eq("Conditional: Test Conditional Mesh Term 3")
+ expect(mesh_term.last).to eq("Conditional: Test Conditional Mesh Term 2")
end
it "parses intervention browse mesh term when there is one" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
- Test Intervention Mesh Term
-
-
- ")
+ @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = []
+ @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Intervention Mesh Term"}]
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
expect(trial.mesh_terms).to eq("Intervention: Test Intervention Mesh Term")
end
it "parses intervention browse mesh term when there are many" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
- Test Intervention Mesh Term 1
- Test Intervention Mesh Term 2
- Test Intervention Mesh Term 3
-
-
- ")
+ @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = []
+ @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Intervention Mesh Term 1"},{"id"=>"D002","term"=>"Test Intervention Mesh Term 2"}]
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
mesh_term = trial.mesh_terms.split("; ")
- expect(mesh_term.size).to eq(3)
+ expect(mesh_term.size).to eq(2)
expect(mesh_term.first).to eq("Intervention: Test Intervention Mesh Term 1")
- expect(mesh_term.second).to eq("Intervention: Test Intervention Mesh Term 2")
- expect(mesh_term.last).to eq("Intervention: Test Intervention Mesh Term 3")
+ expect(mesh_term.last).to eq("Intervention: Test Intervention Mesh Term 2")
end
-
it "parses conditional and intervention browse mesh term" do
- url = 'https://clinicaltrials.gov/show/NCT01678638?displayxml=true'
- p = Parsers::Ctgov.new( 'NCT01678638', 1)
- p.set_contents_from_xml("
-
- Test Study
-
- Test Conditional Mesh Term 1
- Test Conditional Mesh Term 2
- Test Conditional Mesh Term 3
-
-
- Test Intervention Mesh Term 1
- Test Intervention Mesh Term 2
- Test Intervention Mesh Term 3
-
-
- ")
+ @api_data["derivedSection"]["conditionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Conditional Mesh Term 1"},{"id"=>"D002","term"=>"Test Conditional Mesh Term 2"}]
+ @api_data["derivedSection"]["interventionBrowseModule"]["meshes"] = [{"id"=>"D001","term"=>"Test Intervention Mesh Term 1"}]
+ p = Parsers::Ctgov.new( 'NCT999999', 1, @api_data)
p.process
- trial = Trial.find_by(system_id: 'NCT01678638')
+ trial = Trial.find_by(system_id: 'NCT999999')
mesh_term = trial.mesh_terms.split("; ")
- expect(mesh_term.size).to eq(6)
+ expect(mesh_term.size).to eq(3)
expect(mesh_term.first).to eq("Conditional: Test Conditional Mesh Term 1")
expect(mesh_term.second).to eq("Conditional: Test Conditional Mesh Term 2")
- expect(mesh_term.third).to eq("Conditional: Test Conditional Mesh Term 3")
- expect(mesh_term.fourth).to eq("Intervention: Test Intervention Mesh Term 1")
- expect(mesh_term.fifth).to eq("Intervention: Test Intervention Mesh Term 2")
- expect(mesh_term.last).to eq("Intervention: Test Intervention Mesh Term 3")
+ expect(mesh_term.third).to eq("Intervention: Test Intervention Mesh Term 1")
end
end
it "#overall_status" do
- p = Parsers::Ctgov.new("NCT123")
- p.set_contents_from_xml("
-
- Recruiting
-
- ")
-
- expect(p.overall_status).to eq("Recruiting")
+ @api_data["protocolSection"]["statusModule"]["overallStatus"] = "RECRUITING"
+ p = Parsers::Ctgov.new("NCT123", 1, @api_data)
+ expect(p.overall_status).to eq("RECRUITING")
end
it "#location_status with one location" do
- p = Parsers::Ctgov.new("NCT123")
- p.set_contents_from_xml("
-
-
-
- University of Minnesota
-
- Some status
-
-
- ")
+ @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [{"facility"=>"University of Minnesota","status"=>"Some status"}]
+ p = Parsers::Ctgov.new("NCT123", 1, @api_data)
allow(p).to receive(:location_search_term) { "University of Minnesota" }
expect(p.location_status).to eq("Some status")
end
it "#location_status with multiple locations" do
- p = Parsers::Ctgov.new("NCT123")
- p.set_contents_from_xml("
-
-
-
- Somewhere else
-
- Another status
-
-
-
- University of Minnesota
-
- Some status
-
-
- ")
+ @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [
+ {"facility"=>"University of Minnesota","status"=>"Some status"},
+ {"facility"=>"University of Wisconsin","status"=>"A different status"},
+ ]
+ p = Parsers::Ctgov.new("NCT123", 1, @api_data)
allow(p).to receive(:location_search_term) { "University of Minnesota" }
expect(p.location_status).to eq("Some status")
end
it "#calculated_status" do
- p = Parsers::Ctgov.new("NCT123")
- p.set_contents_from_xml("
-
- Not this one
-
-
- University of Minnesota
-
- This one
-
-
- ")
+ @api_data["protocolSection"]["statusModule"]["overallStatus"] = "RECRUITING"
+ @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [
+ {"facility"=>"University of Minnesota","status"=>"This one"},
+ {"facility"=>"University of Wisconsin","status"=>"A different status"},
+ ]
+ p = Parsers::Ctgov.new("NCT123", 1, @api_data)
allow(p).to receive(:location_search_term) { "University of Minnesota" }
-
+
expect(p.calculated_status).to eq("This one")
end
it "#calculated_status with no location status" do
- p = Parsers::Ctgov.new("NCT123")
- p.set_contents_from_xml("
-
- Some status
-
-
- University of Minnesota
-
-
-
- ")
+ @api_data["protocolSection"]["statusModule"]["overallStatus"] = "RECRUITING"
+ @api_data["protocolSection"]["contactsLocationsModule"]["locations"] = [
+ {"facility"=>"University of Minnesota"},
+ {"facility"=>"University of Wisconsin"},
+ ]
+ p = Parsers::Ctgov.new("NCT123", 1, @api_data)
allow(p).to receive(:location_search_term) { "University of Minnesota" }
- expect(p.calculated_status).to eq("Some status")
+ expect(p.calculated_status).to eq("RECRUITING")
end
end