Skip to content

Commit

Permalink
Data migration and history retention for 2.0.0 (louislam#5075)
Browse files Browse the repository at this point in the history
  • Loading branch information
louislam authored Oct 26, 2024
1 parent 2470451 commit 4d779cf
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 59 deletions.
24 changes: 24 additions & 0 deletions extra/reset-migrate-aggregate-table-state.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const { R } = require("redbean-node");
const Database = require("../server/database");
const args = require("args-parser")(process.argv);
const { Settings } = require("../server/settings");

const main = async () => {
console.log("Connecting the database");
Database.initDataDir(args);
await Database.connect(false, false, true);

console.log("Deleting all data from aggregate tables");
await R.exec("DELETE FROM stat_minutely");
await R.exec("DELETE FROM stat_hourly");
await R.exec("DELETE FROM stat_daily");

console.log("Resetting the aggregate table state");
await Settings.set("migrateAggregateTableState", "");

await Database.close();
console.log("Done");
};

main();

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@
"sort-contributors": "node extra/sort-contributors.js",
"quick-run-nightly": "docker run --rm --env NODE_ENV=development -p 3001:3001 louislam/uptime-kuma:nightly2",
"start-dev-container": "cd docker && docker-compose -f docker-compose-dev.yml up --force-recreate",
"rebase-pr-to-1.23.X": "node extra/rebase-pr.js 1.23.X"
"rebase-pr-to-1.23.X": "node extra/rebase-pr.js 1.23.X",
"reset-migrate-aggregate-table-state": "node extra/reset-migrate-aggregate-table-state.js"
},
"dependencies": {
"@grpc/grpc-js": "~1.8.22",
Expand Down
163 changes: 163 additions & 0 deletions server/database.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ const knex = require("knex");
const path = require("path");
const { EmbeddedMariaDB } = require("./embedded-mariadb");
const mysql = require("mysql2/promise");
const { Settings } = require("./settings");
const { UptimeCalculator } = require("./uptime-calculator");
const dayjs = require("dayjs");

/**
* Database & App Data Folder
Expand Down Expand Up @@ -391,9 +394,23 @@ class Database {
// https://knexjs.org/guide/migrations.html
// https://gist.github.com/NigelEarle/70db130cc040cc2868555b29a0278261
try {
// Disable foreign key check for SQLite
// Known issue of knex: https://github.com/drizzle-team/drizzle-orm/issues/1813
if (Database.dbConfig.type === "sqlite") {
await R.exec("PRAGMA foreign_keys = OFF");
}

await R.knex.migrate.latest({
directory: Database.knexMigrationsPath,
});

// Enable foreign key check for SQLite
if (Database.dbConfig.type === "sqlite") {
await R.exec("PRAGMA foreign_keys = ON");
}

await this.migrateAggregateTable();

} catch (e) {
// Allow missing patch files for downgrade or testing pr.
if (e.message.includes("the following files are missing:")) {
Expand Down Expand Up @@ -711,6 +728,152 @@ class Database {
}
}

/**
* Migrate the old data in the heartbeat table to the new format (stat_daily, stat_hourly, stat_minutely)
* It should be run once while upgrading V1 to V2
*
* Normally, it should be in transaction, but UptimeCalculator wasn't designed to be in transaction before that.
* I don't want to heavily modify the UptimeCalculator, so it is not in transaction.
* Run `npm run reset-migrate-aggregate-table-state` to reset, in case the migration is interrupted.
* @returns {Promise<void>}
*/
static async migrateAggregateTable() {
log.debug("db", "Enter Migrate Aggregate Table function");

// Add a setting for 2.0.0-dev users to skip this migration
if (process.env.SET_MIGRATE_AGGREGATE_TABLE_TO_TRUE === "1") {
log.warn("db", "SET_MIGRATE_AGGREGATE_TABLE_TO_TRUE is set to 1, skipping aggregate table migration forever (for 2.0.0-dev users)");
await Settings.set("migrateAggregateTableState", "migrated");
}

let migrateState = await Settings.get("migrateAggregateTableState");

// Skip if already migrated
// If it is migrating, it possibly means the migration was interrupted, or the migration is in progress
if (migrateState === "migrated") {
log.debug("db", "Migrated aggregate table already, skip");
return;
} else if (migrateState === "migrating") {
log.warn("db", "Aggregate table migration is already in progress, or it was interrupted");
throw new Error("Aggregate table migration is already in progress");
}

await Settings.set("migrateAggregateTableState", "migrating");

log.info("db", "Migrating Aggregate Table");

log.info("db", "Getting list of unique monitors");

// Get a list of unique monitors from the heartbeat table, using raw sql
let monitors = await R.getAll(`
SELECT DISTINCT monitor_id
FROM heartbeat
ORDER BY monitor_id ASC
`);

// Stop if stat_* tables are not empty
for (let table of [ "stat_minutely", "stat_hourly", "stat_daily" ]) {
let countResult = await R.getRow(`SELECT COUNT(*) AS count FROM ${table}`);
let count = countResult.count;
if (count > 0) {
log.warn("db", `Aggregate table ${table} is not empty, migration will not be started (Maybe you were using 2.0.0-dev?)`);
return;
}
}

let progressPercent = 0;
let part = 100 / monitors.length;
let i = 1;
for (let monitor of monitors) {
// Get a list of unique dates from the heartbeat table, using raw sql
let dates = await R.getAll(`
SELECT DISTINCT DATE(time) AS date
FROM heartbeat
WHERE monitor_id = ?
ORDER BY date ASC
`, [
monitor.monitor_id
]);

for (let date of dates) {
// New Uptime Calculator
let calculator = new UptimeCalculator();
calculator.monitorID = monitor.monitor_id;
calculator.setMigrationMode(true);

// Get all the heartbeats for this monitor and date
let heartbeats = await R.getAll(`
SELECT status, ping, time
FROM heartbeat
WHERE monitor_id = ?
AND DATE(time) = ?
ORDER BY time ASC
`, [ monitor.monitor_id, date.date ]);

if (heartbeats.length > 0) {
log.info("db", `[DON'T STOP] Migrating monitor data ${monitor.monitor_id} - ${date.date} [${progressPercent.toFixed(2)}%][${i}/${monitors.length}]`);
}

for (let heartbeat of heartbeats) {
await calculator.update(heartbeat.status, parseFloat(heartbeat.ping), dayjs(heartbeat.time));
}

progressPercent += (Math.round(part / dates.length * 100) / 100);

// Lazy to fix the floating point issue, it is acceptable since it is just a progress bar
if (progressPercent > 100) {
progressPercent = 100;
}
}

i++;
}

await Database.clearHeartbeatData(true);

await Settings.set("migrateAggregateTableState", "migrated");

if (monitors.length > 0) {
log.info("db", "Aggregate Table Migration Completed");
} else {
log.info("db", "No data to migrate");
}
}

/**
* Remove all non-important heartbeats from heartbeat table, keep last 24-hour or {KEEP_LAST_ROWS} rows for each monitor
* @param {boolean} detailedLog Log detailed information
* @returns {Promise<void>}
*/
static async clearHeartbeatData(detailedLog = false) {
let monitors = await R.getAll("SELECT id FROM monitor");
const sqlHourOffset = Database.sqlHourOffset();

for (let monitor of monitors) {
if (detailedLog) {
log.info("db", "Deleting non-important heartbeats for monitor " + monitor.id);
}
await R.exec(`
DELETE FROM heartbeat
WHERE monitor_id = ?
AND important = 0
AND time < ${sqlHourOffset}
AND id NOT IN (
SELECT id
FROM heartbeat
WHERE monitor_id = ?
ORDER BY time DESC
LIMIT ?
)
`, [
monitor.id,
-24,
monitor.id,
100,
]);
}
}

}

module.exports = Database;
34 changes: 21 additions & 13 deletions server/jobs/clear-old-data.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
const { R } = require("redbean-node");
const { log } = require("../../src/util");
const { setSetting, setting } = require("../util-server");
const Database = require("../database");
const { Settings } = require("../settings");
const dayjs = require("dayjs");

const DEFAULT_KEEP_PERIOD = 180;
const DEFAULT_KEEP_PERIOD = 365;

/**
* Clears old data from the heartbeat table of the database.
* Clears old data from the heartbeat table and the stat_daily of the database.
* @returns {Promise<void>} A promise that resolves when the data has been cleared.
*/

const clearOldData = async () => {
let period = await setting("keepDataPeriodDays");
await Database.clearHeartbeatData();
let period = await Settings.get("keepDataPeriodDays");

// Set Default Period
if (period == null) {
await setSetting("keepDataPeriodDays", DEFAULT_KEEP_PERIOD, "general");
await Settings.set("keepDataPeriodDays", DEFAULT_KEEP_PERIOD, "general");
period = DEFAULT_KEEP_PERIOD;
}

Expand All @@ -25,23 +26,28 @@ const clearOldData = async () => {
parsedPeriod = parseInt(period);
} catch (_) {
log.warn("clearOldData", "Failed to parse setting, resetting to default..");
await setSetting("keepDataPeriodDays", DEFAULT_KEEP_PERIOD, "general");
await Settings.set("keepDataPeriodDays", DEFAULT_KEEP_PERIOD, "general");
parsedPeriod = DEFAULT_KEEP_PERIOD;
}

if (parsedPeriod < 1) {
log.info("clearOldData", `Data deletion has been disabled as period is less than 1. Period is ${parsedPeriod} days.`);
} else {

log.debug("clearOldData", `Clearing Data older than ${parsedPeriod} days...`);

const sqlHourOffset = Database.sqlHourOffset();

try {
await R.exec(
"DELETE FROM heartbeat WHERE time < " + sqlHourOffset,
[ parsedPeriod * -24 ]
);
// Heartbeat
await R.exec("DELETE FROM heartbeat WHERE time < " + sqlHourOffset, [
parsedPeriod * -24,
]);

let timestamp = dayjs().subtract(parsedPeriod, "day").utc().startOf("day").unix();

// stat_daily
await R.exec("DELETE FROM stat_daily WHERE timestamp < ? ", [
timestamp,
]);

if (Database.dbConfig.type === "sqlite") {
await R.exec("PRAGMA optimize;");
Expand All @@ -50,6 +56,8 @@ const clearOldData = async () => {
log.error("clearOldData", `Failed to clear old data: ${e.message}`);
}
}

log.debug("clearOldData", "Data cleared.");
};

module.exports = {
Expand Down
16 changes: 11 additions & 5 deletions server/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -1604,18 +1604,20 @@ let needSetup = false;

await server.start();

server.httpServer.listen(port, hostname, () => {
server.httpServer.listen(port, hostname, async () => {
if (hostname) {
log.info("server", `Listening on ${hostname}:${port}`);
} else {
log.info("server", `Listening on ${port}`);
}
startMonitors();
await startMonitors();

// Put this here. Start background jobs after the db and server is ready to prevent clear up during db migration.
await initBackgroundJobs();

checkVersion.startInterval();
});

await initBackgroundJobs();

// Start cloudflared at the end if configured
await cloudflaredAutoStart(cloudflaredToken);

Expand Down Expand Up @@ -1809,7 +1811,11 @@ async function startMonitors() {
}

for (let monitor of list) {
await monitor.start(io);
try {
await monitor.start(io);
} catch (e) {
log.error("monitor", e);
}
// Give some delays, so all monitors won't make request at the same moment when just start the server.
await sleep(getRandomInt(300, 1000));
}
Expand Down
Loading

0 comments on commit 4d779cf

Please sign in to comment.