-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Address expiry of studies from clinicaltrials.gov #157
Changes from all commits
6e9a639
f19beea
5057661
e792a2e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,55 +90,74 @@ def clear | |
TrialCondition.delete_all | ||
end | ||
|
||
def site_nct_ids | ||
nct_ids_for_location(SystemInfo.search_term) | ||
end | ||
|
||
def stray_trials | ||
Trial.where.not(system_id: nct_ids_for_location(@system_info.search_term)) | ||
Trial.where(parser_id: @parser_id).where.not(nct_id: self.site_nct_ids) | ||
end | ||
|
||
def cleanup_stray_trials | ||
stray_trials.destroy_all | ||
stray_trials.update_all(visible: false) | ||
end | ||
|
||
private | ||
def nct_ids_for_location(location, page_token = nil) | ||
csc = 'M Health Fairview Clinics and Surgery Center' | ||
ids = [] | ||
q = { | ||
'query.locn' => "SEARCH[Location](AREA[LocationFacility]#{location} AND AREA[LocationStatus]RECRUITING)", | ||
fields: "NCTId", | ||
countTotal: true, | ||
pageSize: 1000, | ||
format: "json" | ||
} | ||
|
||
# API only wants a pageToken arg at all if we are actually asking for one. | ||
if !page_token.blank? | ||
q[:pageToken] = page_token | ||
end | ||
|
||
def extract_zip | ||
dirname = "#{Rails.root}/tmp/" | ||
unless File.directory?(dirname) | ||
FileUtils.mkdir_p(dirname) | ||
end | ||
response = HTTParty.get( | ||
"https://clinicaltrials.gov/api/v2/studies", | ||
query: q | ||
) | ||
payload = JSON.parse(response.body || "{}") | ||
|
||
unless File.directory?("#{dirname}trials/") | ||
FileUtils.mkdir_p("#{dirname}trials/") | ||
end | ||
response_ids = Array(payload.dig("studies")).map do |result| | ||
result.dig("protocolSection").dig("identificationModule").dig("nctId") | ||
end | ||
|
||
FileUtils.rm_rf(Dir.glob("#{dirname}trials/*")) | ||
Zip::File.open("#{dirname}search_result.zip") do |file| | ||
file.each do |entry| | ||
file.extract(entry, "#{dirname}trials/#{entry.name}") | ||
end | ||
end | ||
# Add the ids we just received, and ... | ||
ids.push(*response_ids) | ||
|
||
# ... recurse if there's another page. | ||
if payload.dig("nextPageToken") | ||
ids.push(*(nct_ids_for_location(location, payload.dig("nextPageToken")))) | ||
end | ||
|
||
def nct_ids_for_location(location, start = 1, endd = 1000, ids = []) | ||
response = HTTParty.get( | ||
"https://classic.clinicaltrials.gov/api/query/study_fields", | ||
query: { | ||
expr: "SEARCH[Location](AREA[LocationFacility]#{location})", | ||
fields: "NCTId", | ||
min_rnk: start, | ||
max_rnk: endd, | ||
fmt: "json" | ||
} | ||
) | ||
return ids | ||
end | ||
|
||
private | ||
|
||
def extract_zip | ||
dirname = "#{Rails.root}/tmp/" | ||
unless File.directory?(dirname) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
FileUtils.mkdir_p(dirname) | ||
end | ||
|
||
response_ids = Array(JSON.parse(response.body || "{}").dig("StudyFieldsResponse").dig("StudyFields")).map do |result| | ||
Array(result.dig("NCTId")).first | ||
unless File.directory?("#{dirname}trials/") | ||
FileUtils.mkdir_p("#{dirname}trials/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's |
||
end | ||
|
||
if response_ids.empty? | ||
ids | ||
else | ||
nct_ids_for_location(location, endd + 1, endd + 1000, ids + response_ids) | ||
FileUtils.rm_rf(Dir.glob("#{dirname}trials/*")) | ||
Zip::File.open("#{dirname}search_result.zip") do |file| | ||
file.each do |entry| | ||
file.extract(entry, "#{dirname}trials/#{entry.name}") | ||
end | ||
end | ||
end | ||
|
||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,14 @@ namespace :studyfinder do | |
Trial.import force: true | ||
end | ||
|
||
task cleanup_strays: :environment do |t, args| | ||
puts "Cleaning up stray trials" | ||
connector = Connectors::Ctgov.new | ||
trials = connector.cleanup_stray_trials | ||
puts "Have un-published (system_ids): " | ||
puts trials.map{ |e| " #{e.system_id}\n" } | ||
Comment on lines
+40
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There's only a single validation I saw for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Valid point. Updated to use |
||
end | ||
|
||
# ============================================================================================== | ||
# studyfinder:ctgov:reload_all | ||
# Note: Dangerous business here!! This will delete and reload data from every | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
require 'rails_helper' | ||
require 'connectors/ctgov' | ||
|
||
describe Connectors::Ctgov do | ||
context "cleanup_stray_trials" do | ||
it "hides trials that are no longer actively recruiting at the given location(s)" do | ||
parser = create(:parser) | ||
system_info = create(:system_info, initials: 'TSTU') | ||
ctgov = Connectors::Ctgov.new | ||
will_hide = create(:trial, parser: parser) | ||
wont_hide = create_list(:trial, 5, parser: parser) | ||
remaining_ids = wont_hide.map { |e| e.nct_id } | ||
|
||
expect(will_hide.visible).to be_truthy | ||
expect(wont_hide.first.visible).to be_truthy | ||
|
||
allow(ctgov).to receive(:site_nct_ids).and_return(remaining_ids) | ||
|
||
strays = ctgov.stray_trials | ||
expect(strays.map { |e| e.nct_id }).to include(will_hide.nct_id) | ||
|
||
ctgov.cleanup_stray_trials | ||
will_hide.reload | ||
expect(will_hide.visible).to be_falsey | ||
expect(wont_hide.first.visible).to be_truthy | ||
end | ||
|
||
it "does not hide trials from a different parser" do | ||
parser = create(:parser) | ||
parser2 = create(:parser, name: 'foobar', klass: 'Parsers::Foobar') | ||
system_info = create(:system_info, initials: 'TSTU') | ||
ctgov = Connectors::Ctgov.new | ||
will_hide = create(:trial, parser: parser) | ||
wont_hide = create_list(:trial, 5, parser: parser) | ||
wont_hide_2 = create(:trial, parser: parser2) | ||
remaining_ids = wont_hide.map { |e| e.nct_id } | ||
|
||
expect(will_hide.visible).to be_truthy | ||
expect(wont_hide_2.visible).to be_truthy | ||
|
||
allow(ctgov).to receive(:site_nct_ids).and_return(remaining_ids) | ||
|
||
ctgov.cleanup_stray_trials | ||
will_hide.reload | ||
|
||
expect(will_hide.visible).to be_falsey | ||
expect(wont_hide_2.visible).to be_truthy | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
FactoryBot.define do | ||
factory :parser do | ||
name { 'clinicaltrials.gov' } | ||
klass { 'Parsers::Ctgov' } | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not using
File.join
and friends here is screaming to my eyeballs, but I don't think it's strictly necessary anymore (i.e. forward slashes work on both *nix and Windows now).