Skip to content

Commit

Permalink
Address expiry of studies from clinicaltrials.gov
Browse files Browse the repository at this point in the history
Added a task to un-publish trials that are no longer in the list of recruiting trials for the location that is being used to pull from clinicaltrials.gov.

- Refactored ctgov connector nct_ids_for_location method to use current ct.gov API. Also uses recursion without passing arrays of IDs into a new method invocation for memory benefits.
- In same method, include filter for only recruiting studies at location.
- Allow cleanup_stray_trials and stray_trials to accept an argument of an array of IDs that should be considered "still recruiting"; this can be leveraged for testing bc there isn't a great way to mock ct.gov API responses given the current state of coupling in the connector.
- Tests for these updates.
  • Loading branch information
machinehum committed Dec 19, 2023
1 parent 89aeea5 commit 6e9a639
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 36 deletions.
93 changes: 57 additions & 36 deletions lib/connectors/ctgov.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,55 +90,76 @@ def clear
TrialCondition.delete_all
end

def stray_trials
Trial.where.not(system_id: nct_ids_for_location(@system_info.search_term))
def site_nct_ids
nct_ids_for_location(SystemInfo.search_term)
end

def cleanup_stray_trials
stray_trials.destroy_all
def stray_trials(nct_ids = nil)
nct_ids ||= site_nct_ids
Trial.where(parser_id: @parser_id).where.not(nct_id: nct_ids)
end

private

def extract_zip
dirname = "#{Rails.root}/tmp/"
unless File.directory?(dirname)
FileUtils.mkdir_p(dirname)
end

unless File.directory?("#{dirname}trials/")
FileUtils.mkdir_p("#{dirname}trials/")
end
def cleanup_stray_trials(nct_ids = nil)
nct_ids ||= site_nct_ids
stray_trials(nct_ids).update(visible: false)
end

FileUtils.rm_rf(Dir.glob("#{dirname}trials/*"))
Zip::File.open("#{dirname}search_result.zip") do |file|
file.each do |entry|
file.extract(entry, "#{dirname}trials/#{entry.name}")
end
end
def nct_ids_for_location(location, page_token = nil)
csc = 'M Health Fairview Clinics and Surgery Center'
ids = []
q = {
'query.locn' => "SEARCH[Location](AREA[LocationFacility]#{location} AND AREA[LocationStatus]RECRUITING)",
fields: "NCTId",
countTotal: true,
pageSize: 1000,
format: "json"
}

# API only wants a pageToken arg at all if we are actually asking for one.
if !page_token.blank?
q[:pageToken] = page_token
end

def nct_ids_for_location(location, start = 1, endd = 1000, ids = [])
response = HTTParty.get(
"https://classic.clinicaltrials.gov/api/query/study_fields",
query: {
expr: "SEARCH[Location](AREA[LocationFacility]#{location})",
fields: "NCTId",
min_rnk: start,
max_rnk: endd,
fmt: "json"
}
"https://clinicaltrials.gov/api/v2/studies",
query: q
)
payload = JSON.parse(response.body || "{}")

response_ids = Array(JSON.parse(response.body || "{}").dig("StudyFieldsResponse").dig("StudyFields")).map do |result|
Array(result.dig("NCTId")).first
response_ids = Array(payload.dig("studies")).map do |result|
result.dig("protocolSection").dig("identificationModule").dig("nctId")
end

if response_ids.empty?
ids
else
nct_ids_for_location(location, endd + 1, endd + 1000, ids + response_ids)
# Add the ids we just received, and ...
ids.push(*response_ids)

# ... recurse if there's another page.
if payload.dig("nextPageToken")
ids.push(*(nct_ids_for_location(location, payload.dig("nextPageToken"))))
end

return ids
end

private

def extract_zip
dirname = "#{Rails.root}/tmp/"
unless File.directory?(dirname)
FileUtils.mkdir_p(dirname)
end

unless File.directory?("#{dirname}trials/")
FileUtils.mkdir_p("#{dirname}trials/")
end

FileUtils.rm_rf(Dir.glob("#{dirname}trials/*"))
Zip::File.open("#{dirname}search_result.zip") do |file|
file.each do |entry|
file.extract(entry, "#{dirname}trials/#{entry.name}")
end
end
end

end
end
8 changes: 8 additions & 0 deletions lib/tasks/ctgov.rake
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ namespace :studyfinder do
Trial.import force: true
end

task cleanup_strays: :environment do |t, args|
puts "Cleaning up stray trials"
connector = Connectors::Ctgov.new
trials = connector.cleanup_stray_trials
puts "Have un-published (system_ids): "
puts trials.map{ |e| " #{e.system_id}\n" }
end

# ==============================================================================================
# studyfinder:ctgov:reload_all
# Note: Dangerous business here!! This will delete and reload data from every
Expand Down
46 changes: 46 additions & 0 deletions spec/connectors/ctgov_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
require 'rails_helper'
require 'connectors/ctgov'

describe Connectors::Ctgov do
context "cleanup_stray_trials" do
it "hides trials that are no longer actively recruiting at the given location(s)" do
parser = create(:parser)
system_info = create(:system_info, initials: 'TSTU')
ctgov = Connectors::Ctgov.new
will_hide = create(:trial, parser: parser)
wont_hide = create_list(:trial, 5, parser: parser)
remaining_ids = wont_hide.map { |e| e.nct_id }

expect(will_hide.visible).to be_truthy
expect(wont_hide.first.visible).to be_truthy

strays = ctgov.stray_trials(remaining_ids)
expect(strays.map { |e| e.nct_id }).to include(will_hide.nct_id)

ctgov.cleanup_stray_trials(remaining_ids)
will_hide.reload
expect(will_hide.visible).to be_falsey
expect(wont_hide.first.visible).to be_truthy
end

it "does not hide trials from a different parser" do
parser = create(:parser)
parser2 = create(:parser, name: 'foobar', klass: 'Parsers::Foobar')
system_info = create(:system_info, initials: 'TSTU')
ctgov = Connectors::Ctgov.new
will_hide = create(:trial, parser: parser)
wont_hide = create_list(:trial, 5, parser: parser)
wont_hide_2 = create(:trial, parser: parser2)
remaining_ids = wont_hide.map { |e| e.nct_id }

expect(will_hide.visible).to be_truthy
expect(wont_hide_2.visible).to be_truthy

ctgov.cleanup_stray_trials(remaining_ids)
will_hide.reload

expect(will_hide.visible).to be_falsey
expect(wont_hide_2.visible).to be_truthy
end
end
end
6 changes: 6 additions & 0 deletions spec/factories/parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FactoryBot.define do
factory :parser do
name { 'clinicaltrials.gov' }
klass { 'Parsers::Ctgov' }
end
end
2 changes: 2 additions & 0 deletions spec/factories/trial.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
FactoryBot.define do
sequence(:system_id, 10000) { |n| "STUDY#{n}" }
sequence(:nct_id, 10000) { |n| "NCT#{n}" }
factory :trial do
system_id { generate(:system_id) }
nct_id { generate(:nct_id) }
brief_title { Faker::Lorem.sentence }
approved { true }
visible { true }
Expand Down

0 comments on commit 6e9a639

Please sign in to comment.