Skip to content

Commit

Permalink
feat: allow overwritten data deletion to be configured with extra opt…
Browse files Browse the repository at this point in the history
…ions
  • Loading branch information
bethesque committed Dec 2, 2020
1 parent acee8ce commit fd80973
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 48 deletions.
82 changes: 55 additions & 27 deletions lib/pact_broker/db/delete_overwritten_data.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,65 +11,93 @@ def self.call database_connection, options = {}
def initialize database_connection, options = {}
@db = database_connection
@options = options
@before = options[:before] || DateTime.now
@cut_off_date = options[:max_age] ? (DateTime.now - options[:max_age]) : DateTime.now
@limit = options[:limit] || 1000
end

def call
require 'pact_broker/pacts/pact_publication'
require 'pact_broker/domain/verification'

deleted_counts = {}
kept_counts = {}


deleted_counts.merge!(delete_overwritten_pact_publications)
deleted_counts.merge!(delete_overwritten_verifications)
deleted_counts.merge!(delete_orphan_pact_versions)
deleted_counts.merge!(delete_webhook_data)

kept_counts[:pact_publications] = db[:pact_publications].count
kept_counts[:verification_results] = db[:verifications].count
kept_counts[:pact_versions] = db[:pact_versions].count
kept_counts[:triggered_webhooks] = db[:triggered_webhooks].count


{ deleted: deleted_counts, kept: kept_counts }
if dry_run?
to_keep = deleted_counts.keys.each_with_object({}) do | table_name, new_counts |
new_counts[table_name] = kept_counts[table_name] - deleted_counts[table_name]
end
{ toDelete: deleted_counts, toKeep: to_keep }
else
{ deleted: deleted_counts, kept: kept_counts }
end
end

private

attr_reader :db, :options, :before
attr_reader :db, :options, :cut_off_date, :limit

def dry_run?
options[:dry_run]
end

def delete_webhook_data
ids_to_keep = db[:latest_triggered_webhooks].select(:id)
resolved_ids_to_delete = db[:triggered_webhooks]
.where(id: ids_to_keep)
.invert
.where(Sequel.lit('created_at < ?', cut_off_date))
.limit(limit)
.collect{ |row| row[:id] }

def delete_webhook_data(triggered_webhook_ids)
db[:webhook_executions].where(triggered_webhook_id: triggered_webhook_ids).delete
db[:triggered_webhooks].where(id: triggered_webhook_ids).delete
PactBroker::Webhooks::TriggeredWebhook.where(id: resolved_ids_to_delete).delete unless dry_run?
{ triggered_webhooks: resolved_ids_to_delete.count }
end

def delete_orphan_pact_versions
referenced_pact_version_ids = db[:pact_publications].select(:pact_version_id).union(db[:verifications].select(:pact_version_id))
pact_version_ids_to_delete = db[:pact_versions].where(id: referenced_pact_version_ids).invert
deleted_counts = { pact_versions: pact_version_ids_to_delete.count }
pact_version_ids_to_delete.delete
deleted_counts
pact_version_ids_to_delete = db[:pact_versions].where(id: referenced_pact_version_ids).invert.order(:id).limit(limit).collect{ |row| row[:id] }
db[:pact_versions].where(id: pact_version_ids_to_delete).delete unless dry_run?
{ pact_versions: pact_version_ids_to_delete.count }
end

def delete_overwritten_pact_publications
pact_publication_ids_to_delete = db[:pact_publications]
.select(:id)
.where(id: db[:latest_pact_publication_ids_for_consumer_versions].select(:pact_publication_id))
ids_to_keep = db[:latest_pact_publication_ids_for_consumer_versions].select(:pact_publication_id)

resolved_ids_to_delete = db[:pact_publications]
.where(id: ids_to_keep)
.invert
.where(Sequel.lit('created_at < ?', before))
.where(Sequel.lit('created_at < ?', cut_off_date))
.order(:id)
.limit(limit)
.collect{ |row| row[:id] }

deleted_counts = { pact_publications: pact_publication_ids_to_delete.count }
delete_webhook_data(db[:triggered_webhooks].where(pact_publication_id: pact_publication_ids_to_delete).select(:id))
pact_publication_ids_to_delete.delete
deleted_counts
PactBroker::Pacts::PactPublication.where(id: resolved_ids_to_delete).delete unless dry_run?

{ pact_publications: resolved_ids_to_delete.count }
end

def delete_overwritten_verifications
verification_ids = db[:verifications].select(:id)
.where(id: db[:latest_verification_id_for_pact_version_and_provider_version].select(:verification_id))
ids_to_keep = db[:latest_verification_id_for_pact_version_and_provider_version].select(:verification_id)
resolved_ids_to_delete = db[:verifications]
.where(id: ids_to_keep)
.invert
.where(Sequel.lit('created_at < ?', before))
deleted_counts = { verification_results: verification_ids.count }
delete_webhook_data(db[:triggered_webhooks].where(verification_id: verification_ids).select(:id))
verification_ids.delete
deleted_counts
.where(Sequel.lit('created_at < ?', cut_off_date))
.order(:id)
.limit(limit)
.collect{ |row| row[:id] }

PactBroker::Domain::Verification.where(id: resolved_ids_to_delete).delete unless dry_run?
{ verification_results: resolved_ids_to_delete.count }
end
end
end
Expand Down
12 changes: 9 additions & 3 deletions lib/pact_broker/tasks/clean_task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,21 @@ def rake_task &block

raise PactBroker::Error.new("You must specify the version_deletion_limit") unless version_deletion_limit

prefix = dry_run ? "[DRY RUN] " : ""

if keep_version_selectors.nil? || keep_version_selectors.empty?
raise PactBroker::Error.new("You must specify which versions to keep")
else
output "Deleting oldest #{version_deletion_limit} versions, keeping versions that match the configured selectors", keep_version_selectors
output "#{prefix}Deleting oldest #{version_deletion_limit} versions, keeping versions that match the configured selectors", keep_version_selectors
end

start_time = Time.now
results = PactBroker::DB::CleanIncremental.call(
database_connection, keep: keep_version_selectors, limit: version_deletion_limit, logger: logger, dry_run: dry_run)
results = PactBroker::DB::CleanIncremental.call(database_connection,
keep: keep_version_selectors,
limit: version_deletion_limit,
logger: logger,
dry_run: dry_run
)
end_time = Time.now
elapsed_seconds = (end_time - start_time).to_i
output "Results (#{elapsed_seconds} seconds)", results
Expand Down
30 changes: 23 additions & 7 deletions lib/pact_broker/tasks/delete_overwritten_data_task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ module PactBroker
module DB
class DeleteOverwrittenDataTask < ::Rake::TaskLib
attr_accessor :database_connection
attr_accessor :age_in_days
attr_accessor :max_age
attr_accessor :logger
attr_accessor :deletion_limit
attr_accessor :dry_run

def initialize &block
@max_age = 7
rake_task &block
end

Expand All @@ -19,15 +23,27 @@ def rake_task &block
instance_eval(&block)
options = {}

if age_in_days
options[:before] = (Date.today - age_in_days.to_i).to_datetime
$stdout.puts "Deleting overwritten pact publications and verifications older than #{age_in_days} days"
prefix = dry_run ? "[DRY RUN] " : ""

if max_age
options[:max_age] = max_age
output "#{prefix}Deleting overwritten pact publications and verifications older than #{max_age} days"
else
$stdout.puts "Deleting overwritten pact publications and verifications"
output "#{prefix}Deleting overwritten pact publications and verifications"
end

report = PactBroker::DB::DeleteOverwrittenData.call(database_connection, options)
$stdout.puts report.to_yaml
options[:limit] = deletion_limit if deletion_limit
options[:dry_run] = dry_run

start_time = Time.now
results = PactBroker::DB::DeleteOverwrittenData.call(database_connection, options)
end_time = Time.now
elapsed_seconds = (end_time - start_time).to_i
output "Results (#{elapsed_seconds} seconds)", results
end

def output string, payload = {}
logger ? logger.info(string, payload: payload) : puts("#{string} #{payload.to_json}")
end
end
end
Expand Down
82 changes: 71 additions & 11 deletions spec/lib/pact_broker/db/delete_overwritten_data_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

module PactBroker
module DB
describe DeleteOverwrittenData, pending: !!DB.mysql? do
describe DeleteOverwrittenData, skip: !!DB.mysql? do
describe ".call" do
let(:db) { PactBroker::DB.connection }
subject { DeleteOverwrittenData.call(db, before: before_date) }
let(:before_date) { nil }
let(:max_age) { nil }
let(:dry_run) { nil }
let(:limit) { nil }

subject { DeleteOverwrittenData.call(db, max_age: max_age, limit: limit, dry_run: dry_run) }

context "when a pact is overwritten" do
let!(:pact_to_delete) { td.create_everything_for_an_integration.and_return(:pact) }
Expand All @@ -25,6 +28,14 @@ module DB
expect(subject[:deleted][:pact_publications]).to eq 1
expect(subject[:kept][:pact_publications]).to eq 1
end

context "when dry_run is true" do
let(:dry_run) { true }

it "does not delete anything" do
expect { subject }.to_not change{ db[:pact_publications].count }
end
end
end

context "when a pact has multiple verifications" do
Expand Down Expand Up @@ -53,10 +64,12 @@ module DB
context "when a pact version is orphaned" do
before do
td.create_pact_with_verification.comment("this one will still have the verification, so can't be deleted")
.revise_pact.comment("this one can be deleted")
.revise_pact.comment("this one will still have a pact publication, so can't be deleted")
.create_pact_version_without_publication.comment("will be deleted")
.create_pact_version_without_publication.comment("will be kept because of limit")
end

let(:limit) { 1 }

it "is deleted" do
expect { subject }.to change{ db[:pact_versions].count }.by(-1)
end
Expand All @@ -65,36 +78,83 @@ module DB
expect(subject[:deleted][:pact_versions]).to eq 1
expect(subject[:kept][:pact_versions]).to eq 2
end

context "when dry_run is true" do
let(:dry_run) { true }

it "does not delete anything" do
expect { subject }.to_not change{ db[:pact_versions].count }
end
end
end

context "when the pact publication is created after the before date" do
context "when the pact publication is younger than the max age" do
before do
td.set_now(before_date + 1)
td.set_now(DateTime.now - 3)
.create_pact_with_hierarchy
.revise_pact
end

let(:before_date) { DateTime.new(2010, 2, 5) }
let(:max_age) { 4 }

it "doesn't delete the data" do
expect { subject }.to_not change { db[:pact_publications].count }
end
end

context "when the verification is created after the before date" do
context "when the verification is younger than the max age" do
before do
td.set_now(before_date + 1)
td.set_now(DateTime.now - 3)
.create_pact_with_hierarchy
.create_verification(provider_version: "1", success: false)
.create_verification(provider_version: "1", success: true, number: 2)
end

let(:before_date) { DateTime.new(2010, 2, 5) }
let(:max_age) { 4 }

it "doesn't delete the data" do
expect { subject }.to_not change { db[:verifications].count }
end
end

context "when there are triggered webhooks and executions" do
before do
td.create_pact_with_hierarchy("Foo", "1", "Bar")
.create_webhook
.create_triggered_webhook
.create_webhook_execution
.create_triggered_webhook.comment("latest")
.create_webhook_execution
.create_pact_with_hierarchy("Foo1", "1", "Bar1")
.create_webhook
.create_triggered_webhook
.create_webhook_execution
.create_triggered_webhook.comment("latest")
.create_webhook_execution
end

let(:limit) { 3 }

it "deletes all but the latest triggered webhooks, considering the limit" do
expect { subject }.to change { PactBroker::Webhooks::TriggeredWebhook.count }.by(-2)
end

context "when dry_run is true" do
let(:dry_run) { true }

it "does not delete anything" do
expect { subject }.to_not change{ PactBroker::Webhooks::TriggeredWebhook.count }
end
end

context "when all the records are younger than the max age" do
let(:max_age) { 1 }

it "doesn't delete anything" do
expect { subject }.to_not change { PactBroker::Webhooks::TriggeredWebhook.count }
end
end
end
end
end
end
Expand Down

0 comments on commit fd80973

Please sign in to comment.