-
Notifications
You must be signed in to change notification settings - Fork 0
/
update_structural.py
executable file
·51 lines (39 loc) · 1.95 KB
/
update_structural.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import re
import pandas
df = pandas.read_csv('structural.csv')
df = df.sort_values(['druid', 'sequence'])
# this one didn't convert so it can be removed from the file_manifest
df = df[df['druid'] != 'hc941fm6529']
# there are two files marked as 3d which aren't .obj files
df.loc[(df.resource_type == '3d') ^ (df.filename.str.endswith('.obj')), 'resource_type'] = 'file'
# this one zip had a different resource type from all the rest of the zips
df.loc[df.filename == 'bc769sr4504.zip', 'resource_type'] = 'file'
# get a copy of all the existing .obj 3d objects so we can modify and add them back as glb files
glbs = df[df.resource_type == '3d'].copy()
# now we can update all the original 3d objects to be type file
df.loc[df.resource_type == '3d', 'resource_type'] = 'file'
# update the original obj files to make them into glb files
glbs.filename = glbs.filename.str.replace(r'\.obj$', '.glb', regex=True)
glbs.file_label = glbs.file_label.str.replace(r'\.obj$', '.glb', regex=True)
glbs.mimetype = 'model/gltf-binary'
# this complicated function takes each new glb row and modifies its sequence and
# resource label.
def update_sequences(row):
row.sequence = df[df.druid == row.druid].sequence.max() + 1
if row.resource_label == 'Object 1':
row.resource_label = 'Object 2'
elif row.resource_label == '3d 1':
row.resource_label = '3d 2'
elif m := re.match(r'File (\d+)', row.resource_label):
max_seq = df[df.druid == row.druid].sequence.max()
row.resource_label = row.resource_label.replace(m.group(1), str(max_seq + 1))
else:
raise Exception(f"unknown 3d resource label: {row}")
return row
glbs = glbs.apply(update_sequences, axis=1)
# add our new glb files to the original table
df = pandas.concat([df, glbs])
# sort by druid and sequence so that the new glb files appear alongside the other druid files
df = df.sort_values(['druid', 'sequence'])
# export it!
df.to_csv('output/file_manifest.csv', index=False)