Skip to content

Commit

Permalink
Merge pull request #5719 from avalonmediasystem/transform_srt
Browse files Browse the repository at this point in the history
Convert SRT to VTT captions for cosumption into Video.js
  • Loading branch information
masaball authored Mar 11, 2024
2 parents 3e818f6 + 4ab5fb0 commit 9900b02
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 12 deletions.
2 changes: 1 addition & 1 deletion app/controllers/master_files_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def caption_manifest
@caption_url = if caption_id == 'master_file_caption'
captions_master_file_path
else
master_file_supplemental_file_path(master_file_id: @master_file.id, id: caption_id)
captions_master_file_supplemental_file_path(master_file_id: @master_file.id, id: caption_id)
end
end

Expand Down
28 changes: 21 additions & 7 deletions app/controllers/supplemental_files_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,7 @@ def create
end

def show
# TODO: Use a master file presenter which reads from solr instead of loading the masterfile from fedora
# FIXME: authorize supplemental file directly (needs supplemental file to have reference to masterfile)
raise Avalon::NotFound, "Supplemental file: #{params[:id]} not found" unless SupplementalFile.exists? params[:id].to_s

@supplemental_file = SupplementalFile.find(params[:id])
raise Avalon::NotFound, "Supplemental file: #{@supplemental_file.id} not found" unless @object.supplemental_files.any? { |f| f.id == @supplemental_file.id }
find_supplemental_file

# Redirect or proxy the content
if Settings.supplemental_files.proxy
Expand Down Expand Up @@ -107,6 +102,15 @@ def destroy
end
end

def captions
find_supplemental_file

file_content = @supplemental_file.file.download
content = @supplemental_file.file.content_type == 'text/srt' ? SupplementalFile.convert_from_srt(file_content) : file_content

send_data content, filename: @supplemental_file.file.filename.to_s, type: 'text/vtt', disposition: 'attachment'
end

private

def set_object
Expand All @@ -118,6 +122,16 @@ def supplemental_file_params
params.fetch(:supplemental_file, {}).permit(:label, :language, :file, tags: [])
end

def find_supplemental_file
# TODO: Use a master file presenter which reads from solr instead of loading the masterfile from fedora
# FIXME: authorize supplemental file directly (needs supplemental file to have reference to masterfile)
raise Avalon::NotFound, "Supplemental file: #{params[:id]} not found" unless SupplementalFile.exists? params[:id].to_s

@supplemental_file = SupplementalFile.find(params[:id])
raise Avalon::NotFound, "Supplemental file: #{@supplemental_file.id} not found" unless @object.supplemental_files.any? { |f| f.id == @supplemental_file.id }
end


def handle_error(message:, status:)
if request.format == :json
render json: { errors: message }, status: status
Expand Down Expand Up @@ -158,7 +172,7 @@ def object_supplemental_file_path
end

def authorize_object
action = action_name.to_sym == :show ? :show : :edit
action = [:show, :captions].include?(action_name.to_sym) ? :show : :edit
authorize! action, @object, message: "You do not have sufficient privileges to #{action_name} this supplemental file"
end
end
2 changes: 1 addition & 1 deletion app/models/concerns/iiif_supplemental_file_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def object_supplemental_file_url(object, supplemental_file)

def determine_rendering_type(mime)
case mime
when 'application/pdf', 'application/msword', 'application/vnd.oasis.opendocument.text', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'text/html', 'text/plain', 'text/vtt'
when 'application/pdf', 'application/msword', 'application/vnd.oasis.opendocument.text', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'text/html', 'text/plain', 'text/srt', 'text/vtt'
'Text'
when /image\/.+/
'Image'
Expand Down
16 changes: 16 additions & 0 deletions app/models/supplemental_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,20 @@ def caption?
def machine_generated?
tags.include?('machine_generated')
end

# Adapted from https://github.com/opencoconut/webvtt-ruby/blob/e07d59220260fce33ba5a0c3b355e3ae88b99457/lib/webvtt/parser.rb#L11-L30
def self.convert_from_srt(srt)
# normalize timestamps in srt
# This Regex looks for malformed time stamp pieces such as '00:1:00,000', '0:01:00,000', etc.
# When it finds a match it prepends a 0 to the capture group so both of the above examples
# would return '00:01:00,000'
conversion = srt.gsub(/(:|^)(\d)(,|:)/, '\10\2\3')
# convert timestamps and save the file
# VTT uses '.' as its decimal separator, SRT uses ',' so we convert the punctuation
conversion.gsub!(/([0-9]{2}:[0-9]{2}:[0-9]{2})([,])([0-9]{3})/, '\1.\3')
# normalize new line character
conversion.gsub!("\r\n", "\n")

"WEBVTT\n\n#{conversion}".strip
end
end
2 changes: 1 addition & 1 deletion config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
# Supplemental Files
resources :supplemental_files, except: [:new, :index, :edit] do
member do
get 'captions', :to => redirect('/master_files/%{master_file_id}/supplemental_files/%{id}')
get 'captions'
get 'transcripts', :to => redirect('/master_files/%{master_file_id}/supplemental_files/%{id}')
end
end
Expand Down
51 changes: 51 additions & 0 deletions spec/controllers/supplemental_files_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,55 @@
RSpec.describe SupplementalFilesController, type: :controller do
it_behaves_like "a nested controller for", MasterFile
it_behaves_like "a nested controller for", MediaObject

describe 'captions endpoint for MasterFile' do
let(:supplemental_file) { FactoryBot.create(:supplemental_file, :with_caption_file, :with_caption_tag) }
# This should return the minimal set of values that should be in the session
# in order to pass any filters (e.g. authentication) defined in
# SupplementalFilesController. Be sure to keep this updated too.
let(:valid_session) { {} }


describe 'security' do
let(:master_file) { FactoryBot.create(:master_file, :with_media_object, supplemental_files: [supplemental_file]) }

context 'with unauthenticated user' do
it 'should return 401' do
expect(get :captions, params: { master_file_id: master_file.id, id: supplemental_file.id }).to have_http_status(401)
end
end
context 'with end-user without permissions' do
before do
login_as :user
end
it 'should return 401' do
expect(get :captions, params: { master_file_id: master_file.id, id: supplemental_file.id }).to have_http_status(401)
end
end
end

describe "GET #captions" do
let(:public_media_object) { FactoryBot.create(:fully_searchable_media_object) }
let(:master_file) { FactoryBot.create(:master_file, media_object: public_media_object, supplemental_files: [supplemental_file]) }
before { allow(Settings.supplemental_files).to receive(:proxy).and_return(true) }

it "returns the caption file content" do
get :captions, params: { master_file_id: master_file.id, id: supplemental_file.id }, session: valid_session
expect(response).to have_http_status(200)
expect(response.header["Content-Type"]).to eq 'text/vtt'
expect(response.body).to eq supplemental_file.file.download
end

context 'with SRT caption' do
let(:supplemental_file) { FactoryBot.create(:supplemental_file, :with_caption_tag, :with_caption_srt_file) }
let(:file) { Rails.root.join('spec', 'fixtures', 'captions.srt')}
it 'returns the caption file content in VTT format' do
get :captions, params: { master_file_id: master_file.id, id: supplemental_file.id }, session: valid_session
expect(response).to have_http_status(200)
expect(response.header["Content-Type"]).to eq 'text/vtt'
expect(response.body).to eq SupplementalFile.convert_from_srt(File.read(file))
end
end
end
end
end
8 changes: 8 additions & 0 deletions spec/models/supplemental_file_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,12 @@
expect(subject.reload.language).to eq "ger"
end
end

describe '.convert_from_srt' do
let(:input) { "1\n00:00:03,498 --> 00:00:05,000\n- Example Captions\n" }
let(:output) { "WEBVTT\n\n1\n00:00:03.498 --> 00:00:05.000\n- Example Captions" }
it 'converts SRT format captions into VTT captions' do
expect(SupplementalFile.convert_from_srt(input)).to eq output
end
end
end
5 changes: 3 additions & 2 deletions spec/routing/supplemental_files_routing_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@
it "routes to #update" do
expect(:put => "/master_files/abc1234/supplemental_files/edf567").to route_to("supplemental_files#update", master_file_id: 'abc1234', id: 'edf567')
end
it "routes to #captions" do
expect(:get => "/master_files/abc1234/supplemental_files/edf567/captions").to route_to("supplemental_files#captions", master_file_id: 'abc1234', id: 'edf567')
end
# Redirects are not testable from the routing spec out of the box.
# Forcing the tests to `type: :request` to keep routing tests in one place.
it "redirects to supplemental_files#show", type: :request do
get "/master_files/abc1234/supplemental_files/edf567/captions"
expect(response).to redirect_to("/master_files/abc1234/supplemental_files/edf567")
get "/master_files/abc1234/supplemental_files/edf567/transcripts"
expect(response).to redirect_to("/master_files/abc1234/supplemental_files/edf567")
end
Expand Down

0 comments on commit 9900b02

Please sign in to comment.