From 0204262f57fa7af4a0f9fbc7a5a741df067986a9 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 3 Jul 2024 15:56:14 +0900 Subject: [PATCH] Add incrementWarcStatsForCrawlSeries variant that doesn't use subquery We keep hitting deadlocks on this particular UPDATE statement. I'm uncertain if they're caused by the subquery but since we already have the crawl series ID simplifying the update seems nice anyway. --- ui/src/bamboo/crawl/Crawls.java | 4 +++- ui/src/bamboo/crawl/WarcsDAO.java | 3 +++ ui/test/bamboo/crawl/WarcsTest.java | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ui/src/bamboo/crawl/Crawls.java b/ui/src/bamboo/crawl/Crawls.java index ff45475..aa7430f 100644 --- a/ui/src/bamboo/crawl/Crawls.java +++ b/ui/src/bamboo/crawl/Crawls.java @@ -89,7 +89,9 @@ private long create(Crawl metadata, List warcs, List artifacts) tx.warcs().batchInsertWarcsWithoutRollup(crawlId, warcs.iterator()); int warcFilesDelta = warcs.size(); tx.warcs().incrementWarcStatsForCrawlInternal(crawlId, warcFilesDelta, totalBytes); - tx.warcs().incrementWarcStatsForCrawlSeriesByCrawlId(crawlId, warcFilesDelta, totalBytes); + if (metadata.getCrawlSeriesId() != null) { + tx.warcs().incrementWarcStatsForCrawlSeries(metadata.getCrawlSeriesId(), warcFilesDelta, totalBytes); + } tx.batchInsertArtifacts(crawlId, artifacts.iterator()); return crawlId; }); diff --git a/ui/src/bamboo/crawl/WarcsDAO.java b/ui/src/bamboo/crawl/WarcsDAO.java index 8020faf..55fd7c4 100644 --- a/ui/src/bamboo/crawl/WarcsDAO.java +++ b/ui/src/bamboo/crawl/WarcsDAO.java @@ -75,6 +75,9 @@ public Statistics map(ResultSet r, StatementContext ctx) throws SQLException { @SqlUpdate("UPDATE crawl_series SET warc_files = warc_files + :warc_files, warc_size = warc_size + :warc_size WHERE id = (SELECT crawl_series_id FROM crawl WHERE crawl.id = :crawl_id)") void incrementWarcStatsForCrawlSeriesByCrawlId(@Bind("crawl_id") long crawlId, @Bind("warc_files") int warcFilesDelta, @Bind("warc_size") long warcSizeDelta); + @SqlUpdate("UPDATE crawl_series SET warc_files = warc_files + :warc_files, warc_size = warc_size + :warc_size WHERE id = :crawl_series_id") + void incrementWarcStatsForCrawlSeries(@Bind("crawl_series_id") long crawlSeriesId, @Bind("warc_files") int warcFilesDelta, @Bind("warc_size") long warcSizeDelta); + @SqlUpdate("UPDATE crawl SET warc_files = warc_files + :warc_files, warc_size = warc_size + :warc_size WHERE id = :crawlId") void incrementWarcStatsForCrawlInternal(@Bind("crawlId") long crawlId, @Bind("warc_files") int warcFilesDelta, @Bind("warc_size") long warcSizeDelta); diff --git a/ui/test/bamboo/crawl/WarcsTest.java b/ui/test/bamboo/crawl/WarcsTest.java index 7943662..edc5e53 100644 --- a/ui/test/bamboo/crawl/WarcsTest.java +++ b/ui/test/bamboo/crawl/WarcsTest.java @@ -29,8 +29,13 @@ public void testUpdateRecordStats() throws IOException { Path testFile = tmp.newFile("test.warc.gz").toPath(); + Series series = new Series(); + series.setName("test series"); + long seriesId = serieses.create(series); + Crawl crawl = new Crawl(); crawl.setName("test crawl"); + crawl.setCrawlSeriesId(seriesId); long crawlId = crawls.createInPlace(crawl, Arrays.asList(testFile)); long warcId = warcs.findByCrawlId(crawlId).get(0).getId();