From fd809737956faac7281a35b88865fae8cd9b85ed Mon Sep 17 00:00:00 2001 From: Beth Skurrie Date: Wed, 2 Dec 2020 16:57:55 +1100 Subject: [PATCH] feat: allow overwritten data deletion to be configured with extra options --- lib/pact_broker/db/delete_overwritten_data.rb | 82 +++++++++++++------ lib/pact_broker/tasks/clean_task.rb | 12 ++- .../tasks/delete_overwritten_data_task.rb | 30 +++++-- .../db/delete_overwritten_data_spec.rb | 82 ++++++++++++++++--- 4 files changed, 158 insertions(+), 48 deletions(-) diff --git a/lib/pact_broker/db/delete_overwritten_data.rb b/lib/pact_broker/db/delete_overwritten_data.rb index 1f57b2526..3632990c2 100644 --- a/lib/pact_broker/db/delete_overwritten_data.rb +++ b/lib/pact_broker/db/delete_overwritten_data.rb @@ -11,65 +11,93 @@ def self.call database_connection, options = {} def initialize database_connection, options = {} @db = database_connection @options = options - @before = options[:before] || DateTime.now + @cut_off_date = options[:max_age] ? (DateTime.now - options[:max_age]) : DateTime.now + @limit = options[:limit] || 1000 end def call + require 'pact_broker/pacts/pact_publication' + require 'pact_broker/domain/verification' + deleted_counts = {} kept_counts = {} - deleted_counts.merge!(delete_overwritten_pact_publications) deleted_counts.merge!(delete_overwritten_verifications) deleted_counts.merge!(delete_orphan_pact_versions) + deleted_counts.merge!(delete_webhook_data) kept_counts[:pact_publications] = db[:pact_publications].count kept_counts[:verification_results] = db[:verifications].count kept_counts[:pact_versions] = db[:pact_versions].count + kept_counts[:triggered_webhooks] = db[:triggered_webhooks].count - - { deleted: deleted_counts, kept: kept_counts } + if dry_run? + to_keep = deleted_counts.keys.each_with_object({}) do | table_name, new_counts | + new_counts[table_name] = kept_counts[table_name] - deleted_counts[table_name] + end + { toDelete: deleted_counts, toKeep: to_keep } + else + { deleted: deleted_counts, kept: kept_counts } + end end private - attr_reader :db, :options, :before + attr_reader :db, :options, :cut_off_date, :limit + + def dry_run? + options[:dry_run] + end + + def delete_webhook_data + ids_to_keep = db[:latest_triggered_webhooks].select(:id) + resolved_ids_to_delete = db[:triggered_webhooks] + .where(id: ids_to_keep) + .invert + .where(Sequel.lit('created_at < ?', cut_off_date)) + .limit(limit) + .collect{ |row| row[:id] } - def delete_webhook_data(triggered_webhook_ids) - db[:webhook_executions].where(triggered_webhook_id: triggered_webhook_ids).delete - db[:triggered_webhooks].where(id: triggered_webhook_ids).delete + PactBroker::Webhooks::TriggeredWebhook.where(id: resolved_ids_to_delete).delete unless dry_run? + { triggered_webhooks: resolved_ids_to_delete.count } end def delete_orphan_pact_versions referenced_pact_version_ids = db[:pact_publications].select(:pact_version_id).union(db[:verifications].select(:pact_version_id)) - pact_version_ids_to_delete = db[:pact_versions].where(id: referenced_pact_version_ids).invert - deleted_counts = { pact_versions: pact_version_ids_to_delete.count } - pact_version_ids_to_delete.delete - deleted_counts + pact_version_ids_to_delete = db[:pact_versions].where(id: referenced_pact_version_ids).invert.order(:id).limit(limit).collect{ |row| row[:id] } + db[:pact_versions].where(id: pact_version_ids_to_delete).delete unless dry_run? + { pact_versions: pact_version_ids_to_delete.count } end def delete_overwritten_pact_publications - pact_publication_ids_to_delete = db[:pact_publications] - .select(:id) - .where(id: db[:latest_pact_publication_ids_for_consumer_versions].select(:pact_publication_id)) + ids_to_keep = db[:latest_pact_publication_ids_for_consumer_versions].select(:pact_publication_id) + + resolved_ids_to_delete = db[:pact_publications] + .where(id: ids_to_keep) .invert - .where(Sequel.lit('created_at < ?', before)) + .where(Sequel.lit('created_at < ?', cut_off_date)) + .order(:id) + .limit(limit) + .collect{ |row| row[:id] } - deleted_counts = { pact_publications: pact_publication_ids_to_delete.count } - delete_webhook_data(db[:triggered_webhooks].where(pact_publication_id: pact_publication_ids_to_delete).select(:id)) - pact_publication_ids_to_delete.delete - deleted_counts + PactBroker::Pacts::PactPublication.where(id: resolved_ids_to_delete).delete unless dry_run? + + { pact_publications: resolved_ids_to_delete.count } end def delete_overwritten_verifications - verification_ids = db[:verifications].select(:id) - .where(id: db[:latest_verification_id_for_pact_version_and_provider_version].select(:verification_id)) + ids_to_keep = db[:latest_verification_id_for_pact_version_and_provider_version].select(:verification_id) + resolved_ids_to_delete = db[:verifications] + .where(id: ids_to_keep) .invert - .where(Sequel.lit('created_at < ?', before)) - deleted_counts = { verification_results: verification_ids.count } - delete_webhook_data(db[:triggered_webhooks].where(verification_id: verification_ids).select(:id)) - verification_ids.delete - deleted_counts + .where(Sequel.lit('created_at < ?', cut_off_date)) + .order(:id) + .limit(limit) + .collect{ |row| row[:id] } + + PactBroker::Domain::Verification.where(id: resolved_ids_to_delete).delete unless dry_run? + { verification_results: resolved_ids_to_delete.count } end end end diff --git a/lib/pact_broker/tasks/clean_task.rb b/lib/pact_broker/tasks/clean_task.rb index 04f161887..02148b71d 100644 --- a/lib/pact_broker/tasks/clean_task.rb +++ b/lib/pact_broker/tasks/clean_task.rb @@ -38,15 +38,21 @@ def rake_task &block raise PactBroker::Error.new("You must specify the version_deletion_limit") unless version_deletion_limit + prefix = dry_run ? "[DRY RUN] " : "" + if keep_version_selectors.nil? || keep_version_selectors.empty? raise PactBroker::Error.new("You must specify which versions to keep") else - output "Deleting oldest #{version_deletion_limit} versions, keeping versions that match the configured selectors", keep_version_selectors + output "#{prefix}Deleting oldest #{version_deletion_limit} versions, keeping versions that match the configured selectors", keep_version_selectors end start_time = Time.now - results = PactBroker::DB::CleanIncremental.call( - database_connection, keep: keep_version_selectors, limit: version_deletion_limit, logger: logger, dry_run: dry_run) + results = PactBroker::DB::CleanIncremental.call(database_connection, + keep: keep_version_selectors, + limit: version_deletion_limit, + logger: logger, + dry_run: dry_run + ) end_time = Time.now elapsed_seconds = (end_time - start_time).to_i output "Results (#{elapsed_seconds} seconds)", results diff --git a/lib/pact_broker/tasks/delete_overwritten_data_task.rb b/lib/pact_broker/tasks/delete_overwritten_data_task.rb index 374404d6a..b8f1554fb 100644 --- a/lib/pact_broker/tasks/delete_overwritten_data_task.rb +++ b/lib/pact_broker/tasks/delete_overwritten_data_task.rb @@ -2,9 +2,13 @@ module PactBroker module DB class DeleteOverwrittenDataTask < ::Rake::TaskLib attr_accessor :database_connection - attr_accessor :age_in_days + attr_accessor :max_age + attr_accessor :logger + attr_accessor :deletion_limit + attr_accessor :dry_run def initialize &block + @max_age = 7 rake_task &block end @@ -19,15 +23,27 @@ def rake_task &block instance_eval(&block) options = {} - if age_in_days - options[:before] = (Date.today - age_in_days.to_i).to_datetime - $stdout.puts "Deleting overwritten pact publications and verifications older than #{age_in_days} days" + prefix = dry_run ? "[DRY RUN] " : "" + + if max_age + options[:max_age] = max_age + output "#{prefix}Deleting overwritten pact publications and verifications older than #{max_age} days" else - $stdout.puts "Deleting overwritten pact publications and verifications" + output "#{prefix}Deleting overwritten pact publications and verifications" end - report = PactBroker::DB::DeleteOverwrittenData.call(database_connection, options) - $stdout.puts report.to_yaml + options[:limit] = deletion_limit if deletion_limit + options[:dry_run] = dry_run + + start_time = Time.now + results = PactBroker::DB::DeleteOverwrittenData.call(database_connection, options) + end_time = Time.now + elapsed_seconds = (end_time - start_time).to_i + output "Results (#{elapsed_seconds} seconds)", results + end + + def output string, payload = {} + logger ? logger.info(string, payload: payload) : puts("#{string} #{payload.to_json}") end end end diff --git a/spec/lib/pact_broker/db/delete_overwritten_data_spec.rb b/spec/lib/pact_broker/db/delete_overwritten_data_spec.rb index b128be40c..fa5a5eff0 100644 --- a/spec/lib/pact_broker/db/delete_overwritten_data_spec.rb +++ b/spec/lib/pact_broker/db/delete_overwritten_data_spec.rb @@ -2,11 +2,14 @@ module PactBroker module DB - describe DeleteOverwrittenData, pending: !!DB.mysql? do + describe DeleteOverwrittenData, skip: !!DB.mysql? do describe ".call" do let(:db) { PactBroker::DB.connection } - subject { DeleteOverwrittenData.call(db, before: before_date) } - let(:before_date) { nil } + let(:max_age) { nil } + let(:dry_run) { nil } + let(:limit) { nil } + + subject { DeleteOverwrittenData.call(db, max_age: max_age, limit: limit, dry_run: dry_run) } context "when a pact is overwritten" do let!(:pact_to_delete) { td.create_everything_for_an_integration.and_return(:pact) } @@ -25,6 +28,14 @@ module DB expect(subject[:deleted][:pact_publications]).to eq 1 expect(subject[:kept][:pact_publications]).to eq 1 end + + context "when dry_run is true" do + let(:dry_run) { true } + + it "does not delete anything" do + expect { subject }.to_not change{ db[:pact_publications].count } + end + end end context "when a pact has multiple verifications" do @@ -53,10 +64,12 @@ module DB context "when a pact version is orphaned" do before do td.create_pact_with_verification.comment("this one will still have the verification, so can't be deleted") - .revise_pact.comment("this one can be deleted") - .revise_pact.comment("this one will still have a pact publication, so can't be deleted") + .create_pact_version_without_publication.comment("will be deleted") + .create_pact_version_without_publication.comment("will be kept because of limit") end + let(:limit) { 1 } + it "is deleted" do expect { subject }.to change{ db[:pact_versions].count }.by(-1) end @@ -65,36 +78,83 @@ module DB expect(subject[:deleted][:pact_versions]).to eq 1 expect(subject[:kept][:pact_versions]).to eq 2 end + + context "when dry_run is true" do + let(:dry_run) { true } + + it "does not delete anything" do + expect { subject }.to_not change{ db[:pact_versions].count } + end + end end - context "when the pact publication is created after the before date" do + context "when the pact publication is younger than the max age" do before do - td.set_now(before_date + 1) + td.set_now(DateTime.now - 3) .create_pact_with_hierarchy .revise_pact end - let(:before_date) { DateTime.new(2010, 2, 5) } + let(:max_age) { 4 } it "doesn't delete the data" do expect { subject }.to_not change { db[:pact_publications].count } end end - context "when the verification is created after the before date" do + context "when the verification is younger than the max age" do before do - td.set_now(before_date + 1) + td.set_now(DateTime.now - 3) .create_pact_with_hierarchy .create_verification(provider_version: "1", success: false) .create_verification(provider_version: "1", success: true, number: 2) end - let(:before_date) { DateTime.new(2010, 2, 5) } + let(:max_age) { 4 } it "doesn't delete the data" do expect { subject }.to_not change { db[:verifications].count } end end + + context "when there are triggered webhooks and executions" do + before do + td.create_pact_with_hierarchy("Foo", "1", "Bar") + .create_webhook + .create_triggered_webhook + .create_webhook_execution + .create_triggered_webhook.comment("latest") + .create_webhook_execution + .create_pact_with_hierarchy("Foo1", "1", "Bar1") + .create_webhook + .create_triggered_webhook + .create_webhook_execution + .create_triggered_webhook.comment("latest") + .create_webhook_execution + end + + let(:limit) { 3 } + + it "deletes all but the latest triggered webhooks, considering the limit" do + expect { subject }.to change { PactBroker::Webhooks::TriggeredWebhook.count }.by(-2) + end + + context "when dry_run is true" do + let(:dry_run) { true } + + it "does not delete anything" do + expect { subject }.to_not change{ PactBroker::Webhooks::TriggeredWebhook.count } + end + end + + context "when all the records are younger than the max age" do + let(:max_age) { 1 } + + it "doesn't delete anything" do + expect { subject }.to_not change { PactBroker::Webhooks::TriggeredWebhook.count } + end + end + end end end end