-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Address expiry of studies from clinicaltrials.gov #157
Changes from 2 commits
6e9a639
f19beea
5057661
e792a2e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,55 +90,76 @@ def clear | |
TrialCondition.delete_all | ||
end | ||
|
||
def stray_trials | ||
Trial.where.not(system_id: nct_ids_for_location(@system_info.search_term)) | ||
def site_nct_ids | ||
nct_ids_for_location(SystemInfo.search_term) | ||
end | ||
|
||
def cleanup_stray_trials | ||
stray_trials.destroy_all | ||
def stray_trials(nct_ids = nil) | ||
nct_ids ||= site_nct_ids | ||
Trial.where(parser_id: @parser_id).where.not(nct_id: nct_ids) | ||
end | ||
|
||
private | ||
|
||
def extract_zip | ||
dirname = "#{Rails.root}/tmp/" | ||
unless File.directory?(dirname) | ||
FileUtils.mkdir_p(dirname) | ||
end | ||
|
||
unless File.directory?("#{dirname}trials/") | ||
FileUtils.mkdir_p("#{dirname}trials/") | ||
end | ||
def cleanup_stray_trials(nct_ids = nil) | ||
nct_ids ||= site_nct_ids | ||
stray_trials(nct_ids).update!(visible: false) | ||
end | ||
|
||
FileUtils.rm_rf(Dir.glob("#{dirname}trials/*")) | ||
Zip::File.open("#{dirname}search_result.zip") do |file| | ||
file.each do |entry| | ||
file.extract(entry, "#{dirname}trials/#{entry.name}") | ||
end | ||
end | ||
def nct_ids_for_location(location, page_token = nil) | ||
csc = 'M Health Fairview Clinics and Surgery Center' | ||
ids = [] | ||
q = { | ||
'query.locn' => "SEARCH[Location](AREA[LocationFacility]#{location} AND AREA[LocationStatus]RECRUITING)", | ||
fields: "NCTId", | ||
countTotal: true, | ||
pageSize: 1000, | ||
format: "json" | ||
} | ||
|
||
# API only wants a pageToken arg at all if we are actually asking for one. | ||
if !page_token.blank? | ||
q[:pageToken] = page_token | ||
end | ||
|
||
def nct_ids_for_location(location, start = 1, endd = 1000, ids = []) | ||
response = HTTParty.get( | ||
"https://classic.clinicaltrials.gov/api/query/study_fields", | ||
query: { | ||
expr: "SEARCH[Location](AREA[LocationFacility]#{location})", | ||
fields: "NCTId", | ||
min_rnk: start, | ||
max_rnk: endd, | ||
fmt: "json" | ||
} | ||
"https://clinicaltrials.gov/api/v2/studies", | ||
query: q | ||
) | ||
payload = JSON.parse(response.body || "{}") | ||
|
||
response_ids = Array(JSON.parse(response.body || "{}").dig("StudyFieldsResponse").dig("StudyFields")).map do |result| | ||
Array(result.dig("NCTId")).first | ||
response_ids = Array(payload.dig("studies")).map do |result| | ||
result.dig("protocolSection").dig("identificationModule").dig("nctId") | ||
end | ||
|
||
if response_ids.empty? | ||
ids | ||
else | ||
nct_ids_for_location(location, endd + 1, endd + 1000, ids + response_ids) | ||
# Add the ids we just received, and ... | ||
ids.push(*response_ids) | ||
|
||
# ... recurse if there's another page. | ||
if payload.dig("nextPageToken") | ||
ids.push(*(nct_ids_for_location(location, payload.dig("nextPageToken")))) | ||
end | ||
|
||
return ids | ||
end | ||
|
||
private | ||
|
||
def extract_zip | ||
dirname = "#{Rails.root}/tmp/" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not using |
||
unless File.directory?(dirname) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
FileUtils.mkdir_p(dirname) | ||
end | ||
|
||
unless File.directory?("#{dirname}trials/") | ||
FileUtils.mkdir_p("#{dirname}trials/") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's |
||
end | ||
|
||
FileUtils.rm_rf(Dir.glob("#{dirname}trials/*")) | ||
Zip::File.open("#{dirname}search_result.zip") do |file| | ||
file.each do |entry| | ||
file.extract(entry, "#{dirname}trials/#{entry.name}") | ||
end | ||
end | ||
end | ||
|
||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,14 @@ namespace :studyfinder do | |
Trial.import force: true | ||
end | ||
|
||
task cleanup_strays: :environment do |t, args| | ||
puts "Cleaning up stray trials" | ||
connector = Connectors::Ctgov.new | ||
trials = connector.cleanup_stray_trials | ||
puts "Have un-published (system_ids): " | ||
puts trials.map{ |e| " #{e.system_id}\n" } | ||
Comment on lines
+40
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There's only a single validation I saw for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Valid point. Updated to use |
||
end | ||
|
||
# ============================================================================================== | ||
# studyfinder:ctgov:reload_all | ||
# Note: Dangerous business here!! This will delete and reload data from every | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
require 'rails_helper' | ||
require 'connectors/ctgov' | ||
|
||
describe Connectors::Ctgov do | ||
context "cleanup_stray_trials" do | ||
it "hides trials that are no longer actively recruiting at the given location(s)" do | ||
parser = create(:parser) | ||
system_info = create(:system_info, initials: 'TSTU') | ||
ctgov = Connectors::Ctgov.new | ||
will_hide = create(:trial, parser: parser) | ||
wont_hide = create_list(:trial, 5, parser: parser) | ||
remaining_ids = wont_hide.map { |e| e.nct_id } | ||
|
||
expect(will_hide.visible).to be_truthy | ||
expect(wont_hide.first.visible).to be_truthy | ||
|
||
strays = ctgov.stray_trials(remaining_ids) | ||
expect(strays.map { |e| e.nct_id }).to include(will_hide.nct_id) | ||
|
||
ctgov.cleanup_stray_trials(remaining_ids) | ||
will_hide.reload | ||
expect(will_hide.visible).to be_falsey | ||
expect(wont_hide.first.visible).to be_truthy | ||
end | ||
|
||
it "does not hide trials from a different parser" do | ||
parser = create(:parser) | ||
parser2 = create(:parser, name: 'foobar', klass: 'Parsers::Foobar') | ||
system_info = create(:system_info, initials: 'TSTU') | ||
ctgov = Connectors::Ctgov.new | ||
will_hide = create(:trial, parser: parser) | ||
wont_hide = create_list(:trial, 5, parser: parser) | ||
wont_hide_2 = create(:trial, parser: parser2) | ||
remaining_ids = wont_hide.map { |e| e.nct_id } | ||
|
||
expect(will_hide.visible).to be_truthy | ||
expect(wont_hide_2.visible).to be_truthy | ||
|
||
ctgov.cleanup_stray_trials(remaining_ids) | ||
will_hide.reload | ||
|
||
expect(will_hide.visible).to be_falsey | ||
expect(wont_hide_2.visible).to be_truthy | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you need to reload There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe so, because trials in |
||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
FactoryBot.define do | ||
factory :parser do | ||
name { 'clinicaltrials.gov' } | ||
klass { 'Parsers::Ctgov' } | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wonky indentation