-
Notifications
You must be signed in to change notification settings - Fork 0
/
laion-tarify.py
50 lines (47 loc) · 1.47 KB
/
laion-tarify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import argparse
from make_tar_utils import tardir, packup
import os
parser = argparse.ArgumentParser()
parser.add_argument(
"--input",
type=str,
default="CMU-MOSI/data",
help="input folder, expecting subdirectory like train, valid or test",
)
parser.add_argument(
"--output",
type=str,
default="tar/cmu-mosi/",
help="output, generating tar files at output/dataclass/filename_{}.tar",
)
parser.add_argument(
"--filename",
type=str,
default="",
help="the filename of the tar, generating tar files at output/dataclass/filename_{}.tar",
)
parser.add_argument(
"--dataclass", type=str, default="all", help="train or test or valid or all"
)
parser.add_argument(
"--num_element", type=int, default=2048, help="pairs of (audio, text) to be included in a single tar"
)
parser.add_argument(
"--start_idx", type=int, default=0, help="start index of the tar"
)
args = parser.parse_args()
if __name__ == "__main__":
if args.dataclass == "all":
for x in ["train", "valid", "test"]:
packup(args.input, args.output, args.filename, x, args.num_element)
elif args.dataclass == "none":
os.makedirs(args.output, exist_ok=True)
tardir(
args.input,
args.output,
args.num_element,
start_idx=0,
delete_file=False,
)
else: # if dataclass is in other name
packup(args.input, args.output, args.filename, args.dataclass, args.num_element)