forked from tchernicum/bcapps
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbc-backup.pl
executable file
·79 lines (55 loc) · 2.26 KB
/
bc-backup.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/bin/perl
# --hostid: use this hostid, not the one from `hostid`
# This is a shell <h>(as in snails, not tcsh)</h> for a backup program
# with these properties:
#
# - encrypts filenames, not just file content
# - saves files based on sha1 (duplicate files stored just once)
# - can backup a single directory, but keeps track of what's been backed up globally
# - versioning
# - creates backup "chunks" of a given size (eg, fit to DVD), and remembers what its backed up
# - avoids expensive operations (such as sha1) when possible (or even find without an mtime)
# Tried this in sqlite3 once, but sense postgresl/mysql or flat files
# might be better
# bc-filename-encrypt.pl will become a part of this, most likely
# TODO: sort/uniq can cleanup dupes when needed
# TODO: keep plaintext and encrypted versions of each "table"
# TODO: splitting into multiple files won't kill us?
# NOTE: assumes if a file's timestamp hasn't changed, neither has the
# content (fails for special case of wikipediafs, alas)
# conceptually, this program does two things:
# converts machine/file/time to sha1sum
# converts sha1sum to backed-up-location
require "bclib.pl";
# determine hostid (yes, it's global)
$hostid = $globopts{hostid}||`hostid`||die("hostid is 0 or doesn't exist");
backdir("/home/barrycarter/MP3");
# "backup" a directory
sub backdir {
my($dir) = @_;
# TODO: indicate that dir is being backed up now
# TODO: only find files more recent that last backup
# TODO: (or parent dirs? but could be probs w symlinks)
# TODO: caching only while testing (null separator is cleaner)
# TODO: allow sorting (so "most important" files definitely backed up)
# we need timestamp to check for dupes; we get size "JFF" since it's easy
my($out, $err, $res)=cache_command("find $dir -type f -print0 -printf '%s %T\@ '", "age=3600");
# list of files
my(@files) = split(/\0/, $out);
debug("FILES",@files);
}
=item schema
Data we store (whether in an SQL table or flat file):
(note that 'size' below is redundant)
filedata: hostid path mtime sha1sum size
backupdata: sha1sum where-stored
symlinks: hostid path mtime target (merge w/ filedata?)
timestamps: dir when-last-backed-up
CREATE TABLE filedata (
hostid TEXT,
path TEXT,
mtime FLOAT,
sha1sum TEXT,
size INT
);
=cut