Skip to content

Commit

Permalink
Export files attached to exported works (#207)
Browse files Browse the repository at this point in the history
Updates the importer to export links to the files attached to each
exported work.

Files are exported in the same ordering they are attached to the work.

In Hyrax, the file we want is the `:original_file` attached
to each FileSet.  The Hyrax downloads controller abstracts this so we
can just provide the `:id` of the desired FileSet.
  • Loading branch information
mark-dce authored Sep 6, 2022
1 parent b4c81f8 commit fc88bf2
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 11 deletions.
25 changes: 16 additions & 9 deletions app/lib/tenejo/csv_exporter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

module Tenejo
class CsvExporter
EXCLUDE_FROM_EXPORT = [:date_modified, :identifier, :label, :arkivo_checksum, :state].freeze
HEADER_ROW = (([:primary_identifier, :error, :class, :parent, :title] \
EXCLUDE_FROM_EXPORT = [:date_modified, :label, :arkivo_checksum, :state].freeze
HEADER_ROW = (([:identifier, :error, :object_type, :visibility, :parent, :title, :file_url] \
+ Tenejo::CsvImporter.collection_attributes_to_copy.keys \
+ Tenejo::CsvImporter.work_attributes_to_copy.keys).uniq \
- EXCLUDE_FROM_EXPORT).freeze
Expand All @@ -28,14 +28,11 @@ def run
def generate_csv
csv_string = CSV.generate(encoding: 'UTF-8', write_headers: true) do |csv|
csv << HEADER_ROW
csv << CSV::Row.new([:primary_identifier, :error], ["missing", "No identifiers provided"]) if @export.identifiers.empty?
csv << CSV::Row.new([:identifier, :error], ["missing", "No identifiers provided"]) if @export.identifiers.empty?
@export.identifiers.each do |id|
find_and_export(id, csv)
end
end

# TODO: remove this after refactoring Tenejo metadata to rename primary_identifier and identifer
csv_string.gsub!('primary_identifier,error,class,', 'identifier,error,object type,')
csv_string
end

Expand All @@ -53,7 +50,7 @@ def export_name
# @param csv[CSV] a CSV IO object to append the metadata to
def find_and_export(id, csv)
obj = ActiveFedora::Base.where(primary_identifier_ssi: id).last
csv << CSV::Row.new([:primary_identifier, :error], [id, "No match for identifier"]) unless obj
csv << CSV::Row.new([:identifier, :error], [id, "No match for identifier"]) unless obj
serialize_with_descendants(obj, nil, csv)
end

Expand All @@ -72,13 +69,23 @@ def serialize_children(csv, obj)
parent_id = obj.primary_identifier
obj.try(:child_collections)&.map { |child| serialize_with_descendants(child, parent_id, csv) }
obj.try(:child_works)&.map { |child| serialize_with_descendants(child, parent_id, csv) }
obj.try(:ordered_file_sets)&.map { |child| serialize_with_descendants(child, parent_id, csv) }
end

def download_url(obj)
return unless obj.is_a? FileSet
Hyrax::Engine.routes.url_helpers.download_url(obj.id)
end

def serialize(obj, parent_id = nil)
return unless obj
obj.define_singleton_method(:parent) { parent_id }
values = HEADER_ROW.map { |attr| pack_field(obj.try(attr)) }
CSV::Row.new(HEADER_ROW, values)
row = CSV::Row.new(HEADER_ROW, values)
row[:parent] = parent_id
row[:identifier] = obj.primary_identifier || obj.id
row[:file_url] = download_url(obj)
row[:object_type] = obj.class.to_s.gsub('FileSet', 'File')
row
end

# Handle multi-value fields regardless of underlying class
Expand Down
2 changes: 2 additions & 0 deletions config/initializers/hyrax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,8 @@
require './app/lib/hydra/file_characterization/characterizers/fits_servlet'
Date::DATE_FORMATS[:standard] = "%m/%d/%Y"

Hyrax::Engine.routes.default_url_options = Rails.application.routes.default_url_options

Qa::Authorities::Local.register_subauthority('subjects', 'Qa::Authorities::Local::TableBasedAuthority')
Qa::Authorities::Local.register_subauthority('languages', 'Qa::Authorities::Local::TableBasedAuthority')
Qa::Authorities::Local.register_subauthority('genres', 'Qa::Authorities::Local::TableBasedAuthority')
36 changes: 34 additions & 2 deletions spec/lib/tenejo/csv_exporter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@
expect(rows[0]['identifier']).to eq 'COL001'
expect(rows[0]['error']).to be_nil
expect(rows[0]['title']).to include 'Test collection'
expect(rows[0]['object type']).to eq "Collection"
expect(rows[0]['object_type']).to eq "Collection"

# Work WRK001
expect(rows[1]['identifier']).to eq 'WRK001'
expect(rows[1]['error']).to be_nil
expect(rows[1]['title']).to include 'Test work'
expect(rows[1]['object type']).to eq "Work"
expect(rows[1]['object_type']).to eq "Work"
end

it 'includes children', :aggregate_failures do
Expand All @@ -84,10 +84,34 @@

expect(rows[0]['parent']).to be_blank
expect(rows[0]['identifier']).to eq 'COL001'
expect(rows[0]['object_type']).to eq 'Collection'
expect(rows[1]['parent']).to eq 'COL001'
expect(rows[1]['identifier']).to eq 'WRK001'
expect(rows[1]['object_type']).to eq 'Work'
expect(rows[2]['parent']).to eq 'WRK001'
expect(rows[2]['identifier']).to eq 'WRK002'
expect(rows[2]['object_type']).to eq 'Work'
end

it 'includes files', :aggregate_failures do
file001 = FileSet.new(primary_identifier: 'FIL001', id: 'placeholder')
file002 = FileSet.new(id: 'auto-generated')
allow(ActiveFedora::Base).to receive(:where).and_return([work001])
allow(work001).to receive(:ordered_file_sets).and_return([file001, file002])

export.identifiers = ['WRK001']
csv_string = described_class.new(export).generate_csv
rows = CSV.parse(csv_string, headers: true)

expect(rows[0]['object_type']).to eq 'Work'
expect(rows[0]['identifier']).to eq 'WRK001'
expect(rows[0]['file_url']).to be_blank
expect(rows[1]['object_type']).to eq 'File'
expect(rows[1]['identifier']).to eq 'FIL001'
expect(rows[1]['file_url']).to eq 'http://localhost:3000/downloads/placeholder'
expect(rows[2]['object_type']).to eq 'File'
expect(rows[2]['identifier']).to eq 'auto-generated'
expect(rows[2]['file_url']).to eq 'http://localhost:3000/downloads/auto-generated'
end
end

Expand Down Expand Up @@ -131,6 +155,10 @@
expect(serialized[:parent]).to eq 'PARENT_ID'
end

it "returns the row type" do
expect(serialized[:object_type]).to eq 'Work'
end

it "handles all the fields" do
# Just check for one value deep in the array
# Otherwise we need to check the whole exact string and the test becomes fragile
Expand All @@ -148,5 +176,9 @@
it "handles embedded commas" do
expect(serialized[:creator]).to eq 'Anon., 16th Century'
end

it "defaults to private(restricted) visibility" do
expect(serialized[:visibility]).to eq 'restricted'
end
end
end
11 changes: 11 additions & 0 deletions spec/rails_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
require_relative 'support/controller_macros'
# Add additional requires below this line. Rails is not loaded until this point!
require 'capybara/rails'
require 'noid/rails/rspec'

# use this in specs to avoid actually using a working virus scanner during tests (very slow)
Hyrax.config.virus_scanner = Hyrax::VirusScanner
Expand Down Expand Up @@ -46,6 +47,16 @@
config.include Devise::Test::IntegrationHelpers, type: :system
config.include Devise::Test::IntegrationHelpers, type: :request

# Avoid rollback of id minter-state to avoid errors like
# `Ldp::Conflict, "Can't call create on an existing resource"`
# see testing notes at https://github.com/samvera/noid-rails#overriding-default-behavior
include Noid::Rails::RSpec
config.before(:suite) { disable_production_minter! }
config.after(:suite) { enable_production_minter! }

# Clean out ActiveFedora
config.before(:suite) { ActiveFedora::Cleaner.clean! }

config.before do |_example|
class_double(Tenejo::VirusScanner)
allow(Tenejo::VirusScanner).to receive(:infected?).and_return(false)
Expand Down

0 comments on commit fc88bf2

Please sign in to comment.