Skip to content

Commit

Permalink
Merge pull request #1220 from sul-dlss/use-correct-part-audit-class
Browse files Browse the repository at this point in the history
Use correct part audit class
  • Loading branch information
jmartin-sul authored Nov 12, 2019
2 parents b718721 + bf42388 commit 15511b9
Show file tree
Hide file tree
Showing 10 changed files with 99 additions and 36 deletions.
2 changes: 1 addition & 1 deletion app/jobs/part_replication_audit_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def perform(complete_moab, zip_endpoint)
results = new_results(complete_moab)
complete_moab.zipped_moab_versions.where(zip_endpoint: zip_endpoint).each do |zmv|
next unless check_child_zip_part_attributes(zmv, results)
PreservationCatalog::S3::Audit.check_aws_replicated_zipped_moab_version(zmv, results)
zip_endpoint.audit_class.check_replicated_zipped_moab_version(zmv, results)
end
results.report_results(logger)
end
Expand Down
4 changes: 2 additions & 2 deletions app/lib/preservation_catalog/ibm/audit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

module PreservationCatalog
module Ibm
# Methods for auditing checking the state of a ZippedMoabVersion on an IBM S3 compatible endpoint. Requires AWS credentials are
# Methods for auditing checking the state of a ZippedMoabVersion on an IBM S3 compatible endpoint. Requires IBM credentials are
# available in the environment. At the time of this comment, ONLY running queue workers will have proper creds loaded.
class Audit
delegate :bucket, :bucket_name, to: ::PreservationCatalog::Ibm
Expand All @@ -17,7 +17,7 @@ def initialize(zmv, results)
end

# convenience method for instantiating the audit class and running the check in one call
def self.check_ibm_replicated_zipped_moab_version(zmv, results)
def self.check_replicated_zipped_moab_version(zmv, results)
new(zmv, results).check_ibm_replicated_zipped_moab_version
end

Expand Down
2 changes: 1 addition & 1 deletion app/lib/preservation_catalog/s3/audit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def initialize(zmv, results)
end

# convenience method for instantiating the audit class and running the check in one call
def self.check_aws_replicated_zipped_moab_version(zmv, results)
def self.check_replicated_zipped_moab_version(zmv, results)
new(zmv, results).check_aws_replicated_zipped_moab_version
end

Expand Down
13 changes: 13 additions & 0 deletions app/models/zip_endpoint.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,17 @@ def self.seed_from_config(preservation_policies)
end
end
end

def audit_class
raise "No audit class configured for #{endpoint_name}" unless audit_class_setting
audit_class_setting.constantize
rescue NameError
raise "Failed to return audit class based on setting for #{endpoint_name}. Check setting string for accuracy."
end

private

def audit_class_setting
@audit_class_setting ||= Settings.zip_endpoints[endpoint_name]&.audit_class
end
end
1 change: 1 addition & 0 deletions config/settings/development.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ zip_endpoints:
endpoint_node: 'localhost'
storage_location: 'bucket_name'
delivery_class: 'S3WestDeliveryJob'
audit_class: 'PreservationCatalog::S3::Audit'
workflow_services_url: 'https://sul-lyberservices-test.stanford.edu/workflow/'
resque_dashboard_hostnames:
- 'localhost'
2 changes: 2 additions & 0 deletions config/settings/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ zip_endpoints:
endpoint_node: 'localhost'
storage_location: 'bucket_name'
delivery_class: 'S3WestDeliveryJob'
audit_class: 'PreservationCatalog::S3::Audit'
ibm_us_south:
endpoint_node: 'https://s3.us-south.cloud-object-storage.appdomain.cloud'
storage_location: 'storage_location'
delivery_class: 'IbmSouthDeliveryJob'
audit_class: 'PreservationCatalog::Ibm::Audit'
27 changes: 23 additions & 4 deletions spec/jobs/part_replication_audit_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
describe PartReplicationAuditJob, type: :job do
let(:cm) { create(:complete_moab, version: 2) }
let(:job) { described_class.new(cm, endpoint) }
let(:endpoint) { cm.zipped_moab_versions.first.zip_endpoint }
let(:endpoints) { cm.zipped_moab_versions.map(&:zip_endpoint).uniq }
let(:endpoint) { endpoints.first }
let(:endpoint2) { endpoints.second }
let(:logger) { instance_double(Logger) }

before do
Expand All @@ -29,7 +31,11 @@
describe '#perform' do
let(:zmv1) { cm.zipped_moab_versions.where(zip_endpoint: endpoint).first }
let(:zmv2) { cm.zipped_moab_versions.where(zip_endpoint: endpoint).second }
let(:zmv3) { cm.zipped_moab_versions.where(zip_endpoint: endpoint2).first }
let(:zmv4) { cm.zipped_moab_versions.where(zip_endpoint: endpoint2).second }
let(:results) { job.send(:new_results, cm) }
let(:audit_class) { endpoint.audit_class }
let(:audit_class2) { endpoint2.audit_class }

it 'only checks parts for one endpoint' do
other_ep = create(:zip_endpoint)
Expand All @@ -40,14 +46,27 @@
job.perform(cm, endpoint)
end

it 'builds results from sub-checks' do
it 'builds results from sub-checks, only for the given endpoint' do
allow(job).to receive(:new_results).with(cm).and_return(results)
allow(job).to receive(:check_child_zip_part_attributes).with(zmv2, AuditResults)
expect(job).to receive(:check_child_zip_part_attributes).with(zmv1, AuditResults).and_return(true)
expect(PreservationCatalog::S3::Audit).to receive(:check_aws_replicated_zipped_moab_version).with(zmv1, AuditResults)
expect(PreservationCatalog::S3::Audit).not_to receive(:check_aws_replicated_zipped_moab_version).with(zmv2, AuditResults)
expect(audit_class).to receive(:check_replicated_zipped_moab_version).with(zmv1, AuditResults)
expect(audit_class).not_to receive(:check_replicated_zipped_moab_version).with(zmv2, AuditResults)
expect(audit_class2).not_to receive(:check_replicated_zipped_moab_version)
expect(results).to receive(:report_results)
job.perform(cm, endpoint)
end

it 'checks the other endpoint when requested' do
allow(job).to receive(:new_results).with(cm).and_return(results)
allow(job).to receive(:check_child_zip_part_attributes).with(zmv3, results).and_return(true)
allow(job).to receive(:check_child_zip_part_attributes).with(zmv4, results).and_return(true)
expect(audit_class).not_to receive(:check_replicated_zipped_moab_version)
expect(audit_class2).not_to receive(:check_replicated_zipped_moab_version).with(zmv1, AuditResults)
expect(audit_class2).not_to receive(:check_replicated_zipped_moab_version).with(zmv2, AuditResults)
expect(audit_class2).to receive(:check_replicated_zipped_moab_version).with(zmv3, results)
expect(audit_class2).to receive(:check_replicated_zipped_moab_version).with(zmv4, results)
job.perform(cm, endpoint2)
end
end
end
28 changes: 14 additions & 14 deletions spec/lib/preservation_catalog/ibm/audit_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

expect(bucket).to receive(:object).with(ok_part.s3_key).and_return(s3_obj)
expect(s3_obj).to receive(:metadata).and_return('checksum_md5' => ok_part.md5)
expect { described_class.check_ibm_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { ok_part.reload.last_existence_check }
.from(nil)
.and change { ok_part.reload.last_checksum_validation }
Expand All @@ -67,7 +67,7 @@
end

it 'logs the missing parts and sets status to not_found' do
described_class.check_ibm_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
zmv.zip_parts.each do |part|
msg = "replicated part not found on #{endpoint_name}: #{part.s3_key} was not found on #{bucket_name}"
expect(results.result_array).to include(
Expand Down Expand Up @@ -102,17 +102,17 @@
end

it "doesn't log checksum mismatches" do
described_class.check_ibm_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
expect(results.result_array).not_to include(a_hash_including(AuditResults::ZIP_PART_CHECKSUM_MISMATCH))
end

it "doesn't log not found errors" do
described_class.check_ibm_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
expect(results.result_array).not_to include(a_hash_including(AuditResults::ZIP_PART_NOT_FOUND))
end

it 'updates existence check timestamps' do
expect { described_class.check_ibm_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.last_existence_check }
.from(nil)
.and change { zmv.zip_parts.second.reload.last_existence_check }
Expand All @@ -122,7 +122,7 @@
end

it 'updates checksum validation timestamps' do
expect { described_class.check_ibm_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.last_checksum_validation }
.from(nil)
.and change { zmv.zip_parts.second.reload.last_checksum_validation }
Expand All @@ -138,7 +138,7 @@
end

it 'logs the mismatches' do
described_class.check_ibm_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
zmv.zip_parts.where(suffix: ['.zip', '.z01']).each do |part|
msg = "replicated md5 mismatch on #{endpoint_name}: #{part.s3_key} catalog md5 (#{part.md5})"\
" doesn't match the replicated md5 (#{non_matching_md5}) on #{bucket_name}"
Expand All @@ -147,7 +147,7 @@
end

it 'updates existence check timestamps' do
expect { described_class.check_ibm_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.last_existence_check }
.from(nil)
.and change { zmv.zip_parts.second.reload.last_existence_check }
Expand All @@ -157,7 +157,7 @@
end

it 'updates validation timestamps' do
expect { described_class.check_ibm_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.last_checksum_validation }
.from(nil)
.and change { zmv.zip_parts.second.reload.last_checksum_validation }
Expand All @@ -167,7 +167,7 @@
end

it 'updates status to replicated_checksum_mismatch' do
expect { described_class.check_ibm_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.status }
.to('replicated_checksum_mismatch')
.and change { zmv.zip_parts.second.reload.status }
Expand Down Expand Up @@ -210,15 +210,15 @@
end

it 'logs the missing parts' do
described_class.check_ibm_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
[zmv.zip_parts.first, zmv.zip_parts.fourth].each do |part|
msg = "replicated part not found on #{endpoint_name}: #{part.s3_key} was not found on #{bucket_name}"
expect(results.result_array).to include(a_hash_including(AuditResults::ZIP_PART_NOT_FOUND => msg))
end
end

it "doesn't log checksum mismatches" do
described_class.check_ibm_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
expect(results.result_array).not_to include(a_hash_including(AuditResults::ZIP_PART_CHECKSUM_MISMATCH))
end
end
Expand All @@ -229,7 +229,7 @@
end

it 'logs the missing parts' do
described_class.check_ibm_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
[zmv.zip_parts.first, zmv.zip_parts.fourth].each do |part|
msg = "replicated part not found on #{endpoint_name}: #{part.s3_key} was not found on #{bucket_name}"
expect(results.result_array).to include(a_hash_including(AuditResults::ZIP_PART_NOT_FOUND => msg))
Expand All @@ -240,7 +240,7 @@
part = zmv.zip_parts.second
msg = "replicated md5 mismatch on #{endpoint_name}: #{part.s3_key} catalog md5 (#{part.md5}) "\
"doesn't match the replicated md5 (#{non_matching_md5}) on #{bucket_name}"
described_class.check_ibm_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
expect(results.result_array).to include(a_hash_including(AuditResults::ZIP_PART_CHECKSUM_MISMATCH => msg))
end
end
Expand Down
28 changes: 14 additions & 14 deletions spec/lib/preservation_catalog/s3/audit_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

expect(bucket).to receive(:object).with(ok_part.s3_key).and_return(s3_obj)
expect(s3_obj).to receive(:metadata).and_return('checksum_md5' => ok_part.md5)
expect { described_class.check_aws_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { ok_part.reload.last_existence_check }.from(nil)
.and change { ok_part.reload.last_checksum_validation }.from(nil)
end
Expand All @@ -65,7 +65,7 @@
end

it 'logs the missing parts and sets status to not_found' do
described_class.check_aws_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
zmv.zip_parts.each do |part|
msg = "replicated part not found on #{endpoint_name}: #{part.s3_key} was not found on #{bucket_name}"
expect(results.result_array).to include(
Expand Down Expand Up @@ -100,24 +100,24 @@
end

it "doesn't log checksum mismatches" do
described_class.check_aws_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
expect(results.result_array).not_to include(a_hash_including(AuditResults::ZIP_PART_CHECKSUM_MISMATCH))
end

it "doesn't log not found errors" do
described_class.check_aws_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
expect(results.result_array).not_to include(a_hash_including(AuditResults::ZIP_PART_NOT_FOUND))
end

it 'updates existence check timestamps' do
expect { described_class.check_aws_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.last_existence_check }.from(nil)
.and change { zmv.zip_parts.second.reload.last_existence_check }.from(nil)
.and change { zmv.zip_parts.third.reload.last_existence_check }.from(nil)
end

it 'updates checksum validation timestamps' do
expect { described_class.check_aws_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.last_checksum_validation }.from(nil)
.and change { zmv.zip_parts.second.reload.last_checksum_validation }.from(nil)
.and change { zmv.zip_parts.third.reload.last_checksum_validation }.from(nil)
Expand All @@ -130,7 +130,7 @@
end

it 'logs the mismatches' do
described_class.check_aws_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
zmv.zip_parts.where(suffix: ['.zip', '.z01']).each do |part|
msg = "replicated md5 mismatch on #{endpoint_name}: #{part.s3_key} catalog md5 (#{part.md5})"\
" doesn't match the replicated md5 (#{non_matching_md5}) on #{bucket_name}"
Expand All @@ -139,21 +139,21 @@
end

it 'updates existence check timestamps' do
expect { described_class.check_aws_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.last_existence_check }.from(nil)
.and change { zmv.zip_parts.second.reload.last_existence_check }.from(nil)
.and change { zmv.zip_parts.third.reload.last_existence_check }.from(nil)
end

it 'updates validation timestamps' do
expect { described_class.check_aws_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.last_checksum_validation }.from(nil)
.and change { zmv.zip_parts.second.reload.last_checksum_validation }.from(nil)
.and change { zmv.zip_parts.third.reload.last_checksum_validation }.from(nil)
end

it 'updates status to replicated_checksum_mismatch' do
expect { described_class.check_aws_replicated_zipped_moab_version(zmv, results) }
expect { described_class.check_replicated_zipped_moab_version(zmv, results) }
.to change { zmv.zip_parts.first.reload.status }
.to('replicated_checksum_mismatch')
.and change { zmv.zip_parts.second.reload.status }
Expand Down Expand Up @@ -196,15 +196,15 @@
end

it 'logs the missing parts' do
described_class.check_aws_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
[zmv.zip_parts.first, zmv.zip_parts.fourth].each do |part|
msg = "replicated part not found on #{endpoint_name}: #{part.s3_key} was not found on #{bucket_name}"
expect(results.result_array).to include(a_hash_including(AuditResults::ZIP_PART_NOT_FOUND => msg))
end
end

it "doesn't log checksum mismatches" do
described_class.check_aws_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
expect(results.result_array).not_to include(a_hash_including(AuditResults::ZIP_PART_CHECKSUM_MISMATCH))
end
end
Expand All @@ -215,7 +215,7 @@
end

it 'logs the missing parts' do
described_class.check_aws_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
[zmv.zip_parts.first, zmv.zip_parts.fourth].each do |part|
msg = "replicated part not found on #{endpoint_name}: #{part.s3_key} was not found on #{bucket_name}"
expect(results.result_array).to include(a_hash_including(AuditResults::ZIP_PART_NOT_FOUND => msg))
Expand All @@ -226,7 +226,7 @@
part = zmv.zip_parts.second
msg = "replicated md5 mismatch on #{endpoint_name}: #{part.s3_key} catalog md5 (#{part.md5}) "\
"doesn't match the replicated md5 (#{non_matching_md5}) on #{bucket_name}"
described_class.check_aws_replicated_zipped_moab_version(zmv, results)
described_class.check_replicated_zipped_moab_version(zmv, results)
expect(results.result_array).to include(a_hash_including(AuditResults::ZIP_PART_CHECKSUM_MISMATCH => msg))
end
end
Expand Down
28 changes: 28 additions & 0 deletions spec/models/zip_endpoint_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,34 @@
it { is_expected.to validate_presence_of(:endpoint_name) }
it { is_expected.to validate_presence_of(:delivery_class) }

describe '#audit_class' do
it 'returns the right audit class when one is configured' do
expect(described_class.find_by(endpoint_name: 'mock_archive1').audit_class).to be(PreservationCatalog::S3::Audit)
expect(described_class.find_by(endpoint_name: 'ibm_us_south').audit_class).to be(PreservationCatalog::Ibm::Audit)
end

it 'raises a helpful error when no audit class is configured' do
expect { zip_endpoint.audit_class }.to raise_error("No audit class configured for #{zip_endpoint.endpoint_name}")
end

it 'raises a helpful error when a non-existent audit class is configured' do
ep_name = zip_endpoint.endpoint_name
zip_endpoints_setting = Config::Options.new(
"#{ep_name}":
Config::Options.new(
endpoint_node: 'endpoint_node',
storage_location: 'storage_location',
delivery_class: 'S3WestDeliveryJob',
audit_class: 'PreservationCatalog::Hal::Audit'
)
)

allow(Settings).to receive(:zip_endpoints).and_return(zip_endpoints_setting)
msg = "Failed to return audit class based on setting for #{ep_name}. Check setting string for accuracy."
expect { zip_endpoint.audit_class }.to raise_error(msg)
end
end

describe '.seed_from_config' do
it 'creates an endpoints entry for each zip endpoint' do
Settings.zip_endpoints.each do |endpoint_name, endpoint_config|
Expand Down

0 comments on commit 15511b9

Please sign in to comment.