-
Notifications
You must be signed in to change notification settings - Fork 2
/
md5dups
executable file
·45 lines (42 loc) · 1.12 KB
/
md5dups
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/bin/bash
# (c) 2020 Leif Sawyer
# License: GPL 3.0 (see https://github.com/akhepcat/)
# Permanent home: https://github.com/akhepcat/Miscellaneous/
# Direct download: https://raw.githubusercontent.com/akhepcat/Miscellaneous/master/md5dups
#
# reads a list of MD5sums and filenames and outputs only the duplicates
#
# you could feed it with something like this, finding non-null, non-linked files
# find Pictures/ -type f -size +1c -links 1 -print0 | xargs -0 -i md5sum "{}" > allmd5s
# and then
# md5dups < allmd5s
#
# or make a bash alias:
# alias findups='find . -type f -size +1c -links 1 -print0 | xargs -0 -i md5sum "{}" | md5dups | sort -f'
#
#
# We use a long second fieldwidth to get around spaces in filenames/paths for the second argument
#
awk '
BEGIN {
FIELDWIDTHS = "32 256";
};
$1 != /^$/ {
md5=$1
fname=$2
files[fname]=md5
hashes[md5]=hashes[md5] + 1
};
END {
for (md5 in hashes) {
if (hashes[md5] > 1) {
for (fname in files) {
if ( files[fname] == md5 ) {
print md5 " " fname;
}
}
}
}
};
'
# end