From 41282a8f1e40c9b9519253db362fb9b5a50bdb1e Mon Sep 17 00:00:00 2001 From: Ben Pennell Date: Tue, 19 Nov 2024 14:27:48 -0500 Subject: [PATCH] HYC-1988 - Handle soffice pptx (#1132) * Attempt to kill soffice if it times out * Add custom retry behavior for create derivs job, have it skill SofficeTimeoutErrors immediately * Use redlock to prevent multiple soffices from running at once. Remove retry handler since it wasn't getting called * Explicitly add redlock dependency. Require redlock * Add tests for encode method to verify timeout and kill works. Reduce the number of retries to 3, to avoid bogging the system down * Use pgroup, capture more info and reproduce more of the behavior from original timeout method * Wait for lock to become available rather than giving up immediately. Move lock closer to execution * Reduce the amount of new code by using the existing execute functionality. Fix locking retries --- Gemfile | 1 + Gemfile.lock | 1 + .../processors/document_override.rb | 52 +++++++++++- config/initializers/hyrax.rb | 2 +- config/sidekiq.yml | 2 +- .../derivatives/processors/document_spec.rb | 80 +++++++++++++++++++ 6 files changed, 134 insertions(+), 4 deletions(-) diff --git a/Gemfile b/Gemfile index 997df08c3..babacfae5 100644 --- a/Gemfile +++ b/Gemfile @@ -56,6 +56,7 @@ gem 'rails', '~> 6.0' gem 'rdf', git: 'https://github.com/ruby-rdf/rdf.git', branch: '3.2.11-patch' # Use Redis adapter to run Action Cable in production gem 'redis', '~> 4.5.0' +gem 'redlock' # version determined by hyrax gem 'riiif', '~> 2.5.0' gem 'roo', '~>2.9.0' gem 'rsolr', '~> 2.5.0' diff --git a/Gemfile.lock b/Gemfile.lock index 5b2b85675..bf1cd1c48 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1130,6 +1130,7 @@ DEPENDENCIES rails (~> 6.0) rdf! redis (~> 4.5.0) + redlock riiif (~> 2.5.0) roo (~> 2.9.0) rsolr (~> 2.5.0) diff --git a/app/overrides/lib/hydra/derivatives/processors/document_override.rb b/app/overrides/lib/hydra/derivatives/processors/document_override.rb index 7211eb4d9..e0e1ee5f3 100644 --- a/app/overrides/lib/hydra/derivatives/processors/document_override.rb +++ b/app/overrides/lib/hydra/derivatives/processors/document_override.rb @@ -1,8 +1,56 @@ # frozen_string_literal: true # [hyc-override] https://github.com/samvera/hydra-derivatives/blob/v3.8.0/lib/hydra/derivatives/processors/document.rb +require 'redlock' + +class SofficeTimeoutError < Hydra::Derivatives::TimeoutError; end + Hydra::Derivatives::Processors::Document.class_eval do - # TODO: soffice can only run one command at a time. Right now we manage this by only running one - # background job at a time; however, if we want to up concurrency we'll need to deal with this + # [hyc-override] Use Redlock to manage soffice process lock + LOCK_KEY = 'soffice:document_conversion' + LOCK_TIMEOUT = 6 * 60 * 1000 # Lock timeout should be longer than the job timeout + JOB_TIMEOUT_SECONDS = 5 * 60 + LOCK_MANAGER = Redlock::Client.new([Redis.current]) + + # [hyc-override] Adding in a graceful termination before hard kill, reap the process after kill + def self.execute_with_timeout(timeout, command, context) + Timeout.timeout(timeout) do + execute_without_timeout(command, context) + end + rescue Timeout::Error + pid = context[:pid] + if pid + Rails.logger.warn("Terminating soffice process #{pid} after #{timeout} seconds") + Process.kill('TERM', pid) # Attempt a graceful termination + sleep 5 # Give it a few seconds to exit + if system("ps -p #{pid}") + Rails.logger.warn("Killing soffice process #{pid} after graceful termination failed") + Process.kill('KILL', pid) + end + # Harvest the defunct process so it doesn't linger forever + Process.wait(pid) + end + raise SofficeTimeoutError, "Unable to execute command \"#{command}\"\nThe command took longer than #{timeout} seconds to execute" + end + + def self.encode(path, format, outdir, timeout = JOB_TIMEOUT_SECONDS) + # [hyc-override] Use Redlock to manage soffice process lock, since only one soffice process can run at a time + begin + LOCK_MANAGER.lock(LOCK_KEY, LOCK_TIMEOUT) do |locked| + if locked + Rails.logger.debug("Acquired lock for document conversion of #{path}") + command = "#{Hydra::Derivatives.libreoffice_path} --invisible --headless --convert-to #{format} --outdir #{outdir} #{Shellwords.escape(path)}" + # [hyc-override] Use execute_with_timeout directly + execute_with_timeout(timeout, command, {}) + else + sleep(0.5) + raise Redlock::LockError, "Failed to acquire lock for document conversion of #{path}" + end + end + Rails.logger.debug("Released soffice lock for #{path}") + rescue Redlock::LockError + retry + end + end # Converts the document to the format specified in the directives hash. # TODO: file_suffix and options are passed from ShellBasedProcessor.process but are not needed. diff --git a/config/initializers/hyrax.rb b/config/initializers/hyrax.rb index b1be942a2..b377ee701 100755 --- a/config/initializers/hyrax.rb +++ b/config/initializers/hyrax.rb @@ -194,7 +194,7 @@ config.fits_path = ENV['FITS_LOCATION'] # Path to the file derivatives creation tool - # config.libreoffice_path = "soffice" + config.libreoffice_path = ENV['LIBREOFFICE_PATH'] || 'soffice' # Option to enable/disable full text extraction from PDFs # Default is true, set to false to disable full text extraction diff --git a/config/sidekiq.yml b/config/sidekiq.yml index 7c0ab61d8..8d337d7fd 100755 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -1,7 +1,7 @@ --- # In production this is overridden by a template in the vagrant-rails project :concurrency: 6 -:max_retries: 5 +:max_retries: 3 :queues: - default - derivatives diff --git a/spec/lib/hydra/derivatives/processors/document_spec.rb b/spec/lib/hydra/derivatives/processors/document_spec.rb index 7c5a75b12..c8742af43 100644 --- a/spec/lib/hydra/derivatives/processors/document_spec.rb +++ b/spec/lib/hydra/derivatives/processors/document_spec.rb @@ -4,6 +4,7 @@ RSpec.describe Hydra::Derivatives::Processors::Document do subject { described_class.new(source_path, directives) } + PID = 991234 let(:source_path) { File.join(fixture_path, 'test.doc') } let(:output_service) { Hyrax::PersistDerivatives } @@ -29,4 +30,83 @@ end end end + + describe '.encode' do + let(:path) { '/path/to/document.pptx' } + let(:format) { 'pdf' } + let(:outdir) { '/output/dir' } + let(:timeout) { 5 } + + before do + # Mock the Hydra::Derivatives.libreoffice_path + allow(Hydra::Derivatives).to receive(:libreoffice_path).and_return('/fake/path/to/soffice') + end + + context 'when the process completes successfully' do + it 'runs the command and completes without timeout' do + # Mock Process.spawn and Process.wait to simulate successful execution + allow(Hydra::Derivatives::Processors::Document).to receive(:execute_without_timeout).and_return(true) + + described_class.encode(path, format, outdir, timeout) + + # Verify that the process was executed + expect(Hydra::Derivatives::Processors::Document).to have_received(:execute_without_timeout) + end + end + + context 'when the process times out' do + it 'kills the process after a timeout' do + # Mock Process.kill to simulate killing the process + allow(Process).to receive(:kill) + allow(Process).to receive(:wait).with(PID) + allow(Hydra::Derivatives::Processors::Document).to receive(:execute_without_timeout).and_wrap_original do |original_method, command, context| + # Simulate setting the PID in the context hash + context[:pid] = PID + # Raise the Timeout::Error + sleep 2 + end + allow(Hydra::Derivatives::Processors::Document).to receive(:system).with("ps -p #{PID}").and_return(true) + + expect do + described_class.encode(path, format, outdir, 0.5) + end.to raise_error(SofficeTimeoutError, /Unable to execute command.*/) + + # Verify that the process was spawned + expect(Process).to have_received(:kill).with('TERM', PID) # Attempted graceful termination + expect(Process).to have_received(:kill).with('KILL', PID) # Force kill if necessary + # Verify that process was reaped after being killed + expect(Process).to have_received(:wait).with(PID) + end + end + + context 'when another job already has lock' do + let(:lock_manager) { instance_double(Redlock::Client) } + let(:lock_key) { 'soffice:document_conversion' } + + before do + stub_const('LOCK_MANAGER', lock_manager) + allow(described_class).to receive(:execute_with_timeout) + end + + it 'it waits to acquire the lock and then runs the command' do + # Set up the locking block to indicate its locked twice, and then unlocked on the third attempt + lock_attempts = 0 + allow(lock_manager).to receive(:lock).with(lock_key, anything) do |_key, _timeout, &block| + lock_attempts += 1 + if lock_attempts == 3 + block.call({ validity: 3000, resource: lock_key, value: 'lock_value' }) + else + block.call(false) + end + end + + described_class.encode(path, format, outdir, timeout) + + # Verify lock was called multiple times + expect(lock_manager).to have_received(:lock).exactly(3).times + # Verify that the process was executed after those retries + expect(Hydra::Derivatives::Processors::Document).to have_received(:execute_with_timeout) + end + end + end end