-
Notifications
You must be signed in to change notification settings - Fork 3
/
dedupe.py
72 lines (62 loc) · 2.47 KB
/
dedupe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from zlib import crc32
import pathlib, os.path
import unityparser, uuid, yaml
def toTuple(d):
if isinstance(d, dict):
return tuple((k, toTuple(v)) for k, v in d.items())
elif isinstance(d, list):
return tuple(toTuple(v) for v in d)
else:
return d
def toDict(d):
if isinstance(d, dict):
return {k: toDict(v) for k, v in d.items()}
else:
return d
toCheck = set(pathlib.Path.cwd().rglob("*.*"))
while len(toCheck) > 0:
files = {}
for path in toCheck:
if path.is_dir(): continue
if path.suffix == ".meta": continue
data = path.read_bytes()
if path.suffix == ".shader": data = data[data.find(b'\n'):] # skip shader name
elif path.suffix in (".mat", ".asset"):
data = data.split(b'\n')
data = [line for line in data if not line.startswith(b' m_Name: ')]
data = b'\n'.join(data)
h = crc32(data)
if h in files: files[h].add(path)
else: files[h] = {path}
toCheck = set()
for dupes in files.values():
if len(dupes) == 1: continue
metas = [unityparser.UnityDocument.load_yaml(f.with_suffix(f.suffix+".meta")).entry for f in dupes]
guids = {m['guid'] for m in metas}
for m in metas: del m['guid']
origMeta = toDict(metas[0])
metas = {toTuple(m) for m in metas}
if len(metas) != 1: continue
name = (','.join({f.stem for f in dupes}))[:50]+(','.join({f.suffix for f in dupes}))
dir = pathlib.Path(os.path.commonpath(dupes))
newPath = dir / name
print("Moving", ", ".join(str(d.relative_to(dir)) for d in dupes), "to", newPath)
for d in dupes: d.with_suffix(d.suffix+".meta").unlink()
dupes.pop().rename(newPath)
for d in dupes: d.unlink()
newGuid = str(uuid.uuid4()).replace('-', '')
origMeta['guid'] = newGuid
yaml.dump(origMeta, open(newPath.with_suffix(newPath.suffix+".meta"), 'w'))
newGuid = newGuid.encode()
guids = {guid.encode() for guid in guids}
for f in pathlib.Path.cwd().rglob("*.*"):
if path.is_dir(): continue
if path.suffix == ".meta": continue
txt = f.read_bytes()
repl = txt
for guid in guids:
repl = repl.replace(guid, newGuid)
if repl != txt:
print("Updating", f)
f.write_bytes(repl)
toCheck.add(f)