-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add workers to download and import ONSPD data
- Loading branch information
Showing
5 changed files
with
87 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
require "aws-sdk-s3" | ||
|
||
class OnsBaseWorker | ||
include Sidekiq::Worker | ||
sidekiq_options queue: :queue_ons, lock: :until_executed, lock_timeout: nil | ||
|
||
BUCKET_NAME = "govuk-#{ENV['GOVUK_ENVIRONMENT_NAME']}-locations-api-import-csvs".freeze | ||
|
||
def s3_client | ||
@s3_client ||= Aws::S3::Client.new(region: "eu-west-1") | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
require "open-uri" | ||
require "zip" | ||
|
||
class OnsDownloadWorker < OnsBaseWorker | ||
# Example URL: https://www.arcgis.com/sharing/rest/content/items/a2f8c9c5778a452bbf640d98c166657c/data | ||
# retrieved by visiting https://geoportal.statistics.gov.uk/search?collection=Dataset&sort=-created&tags=all(PRD_ONSPD) | ||
# clicking on the first search result and then copying the link from the download button. | ||
|
||
DATAFILE_REGEX = /\AData\/multi_csv\/ONSPD_(.*)_UK_(.*).csv\z/ | ||
|
||
def perform(url) | ||
# 1. Download File | ||
temp_zip_file = Tempfile.new("tmp/ONSPD.zip") | ||
IO.copy_stream(URI.parse(url).open, temp_zip_file.path) | ||
|
||
# 2. Unzip File/Data/multi_csv, and post to S3 bucket | ||
Zip::File.open(temp_zip_file.path) do |zip_file| | ||
zip_file.each do |entry| | ||
file_details = entry.name.match(DATAFILE_REGEX) | ||
next unless file_details | ||
|
||
begin | ||
s3_key_name = "ons/#{file_details.match(1)}/#{file_details.match(2)}.csv" | ||
content = entry.get_input_stream.read | ||
|
||
_response = s3_client.put_object( | ||
bucket: BUCKET_NAME, | ||
key: s3_key_name, | ||
body: content, | ||
) | ||
# TODO: check response.etag (if false, upload failed somehow?) | ||
# TODO: Kick off OnsImportWorker for the file | ||
# OnsImportWorker.new.perform_async(s3_key_name) | ||
puts "Added #{entry.name} to S3 bucket as #{s3_key_name}" | ||
rescue StandardError => e | ||
puts "Error extracting and uploading object #{e.message}" | ||
end | ||
end | ||
end | ||
# TODO: delete the zip file | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
class OnsImportWorker < OnsBaseWorker | ||
def perform(s3_key_name) | ||
temp_csv_file = Tempfile.new("tmp/ONSPD.csv") | ||
|
||
s3_client.get_object( | ||
response_target: temp_csv_file.path, | ||
bucket: BUCKET_NAME, | ||
key: s3_key_name, | ||
) | ||
|
||
CSV.foreach(temp_csv_file.path, headers: true) do |row| | ||
postcode = PostcodeHelper.normalise(row["pcds"]) | ||
next if Postcode.where(postcode:).count.positive? | ||
|
||
results = [ | ||
{ | ||
"ONS" => { | ||
"AVG_LNG" => row["long"], | ||
"AVG_LAT" => row["lat"], | ||
"TYPE" => row["usertype"] == "0" ? "S" : "L", | ||
"DOTERM" => row["doterm"], | ||
}, | ||
}, | ||
] | ||
|
||
Postcode.create(postcode:, results:) | ||
end | ||
rescue StandardError => e | ||
puts "Error getting object: #{e.message}" | ||
end | ||
end |