Skip to content

Commit

Permalink
Add incrementWarcStatsForCrawlSeries variant that doesn't use subquery
Browse files Browse the repository at this point in the history
We keep hitting deadlocks on this particular UPDATE statement. I'm uncertain if they're caused by the subquery but since we already have the crawl series ID simplifying the update seems nice anyway.
  • Loading branch information
ato committed Jul 3, 2024
1 parent ac2edf7 commit 0204262
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
4 changes: 3 additions & 1 deletion ui/src/bamboo/crawl/Crawls.java
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ private long create(Crawl metadata, List<Warc> warcs, List<Artifact> artifacts)
tx.warcs().batchInsertWarcsWithoutRollup(crawlId, warcs.iterator());
int warcFilesDelta = warcs.size();
tx.warcs().incrementWarcStatsForCrawlInternal(crawlId, warcFilesDelta, totalBytes);
tx.warcs().incrementWarcStatsForCrawlSeriesByCrawlId(crawlId, warcFilesDelta, totalBytes);
if (metadata.getCrawlSeriesId() != null) {
tx.warcs().incrementWarcStatsForCrawlSeries(metadata.getCrawlSeriesId(), warcFilesDelta, totalBytes);
}
tx.batchInsertArtifacts(crawlId, artifacts.iterator());
return crawlId;
});
Expand Down
3 changes: 3 additions & 0 deletions ui/src/bamboo/crawl/WarcsDAO.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ public Statistics map(ResultSet r, StatementContext ctx) throws SQLException {
@SqlUpdate("UPDATE crawl_series SET warc_files = warc_files + :warc_files, warc_size = warc_size + :warc_size WHERE id = (SELECT crawl_series_id FROM crawl WHERE crawl.id = :crawl_id)")
void incrementWarcStatsForCrawlSeriesByCrawlId(@Bind("crawl_id") long crawlId, @Bind("warc_files") int warcFilesDelta, @Bind("warc_size") long warcSizeDelta);

@SqlUpdate("UPDATE crawl_series SET warc_files = warc_files + :warc_files, warc_size = warc_size + :warc_size WHERE id = :crawl_series_id")
void incrementWarcStatsForCrawlSeries(@Bind("crawl_series_id") long crawlSeriesId, @Bind("warc_files") int warcFilesDelta, @Bind("warc_size") long warcSizeDelta);

@SqlUpdate("UPDATE crawl SET warc_files = warc_files + :warc_files, warc_size = warc_size + :warc_size WHERE id = :crawlId")
void incrementWarcStatsForCrawlInternal(@Bind("crawlId") long crawlId, @Bind("warc_files") int warcFilesDelta, @Bind("warc_size") long warcSizeDelta);

Expand Down
5 changes: 5 additions & 0 deletions ui/test/bamboo/crawl/WarcsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@ public void testUpdateRecordStats() throws IOException {

Path testFile = tmp.newFile("test.warc.gz").toPath();

Series series = new Series();
series.setName("test series");
long seriesId = serieses.create(series);

Crawl crawl = new Crawl();
crawl.setName("test crawl");
crawl.setCrawlSeriesId(seriesId);
long crawlId = crawls.createInPlace(crawl, Arrays.asList(testFile));
long warcId = warcs.findByCrawlId(crawlId).get(0).getId();

Expand Down

0 comments on commit 0204262

Please sign in to comment.