Skip to content

Commit

Permalink
Update dev deps (#283)
Browse files Browse the repository at this point in the history
  • Loading branch information
rom1504 authored Jan 21, 2024
1 parent bd0088b commit 527245c
Show file tree
Hide file tree
Showing 22 changed files with 31 additions and 98 deletions.
63 changes: 0 additions & 63 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
# pygtk.require().
#init-hook=

# Profiled execution.
profile=no

# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=CVS
Expand Down Expand Up @@ -41,22 +38,13 @@ enable=indexing-exception,old-raise-syntax
disable=design,similarities,no-self-use,attribute-defined-outside-init,locally-disabled,star-args,pointless-except,bad-option-value,global-statement,fixme,suppressed-message,useless-suppression,locally-enabled,no-member,no-name-in-module,import-error,unsubscriptable-object,unbalanced-tuple-unpacking,undefined-variable,not-context-manager,no-else-return,wrong-import-order,unnecessary-pass,logging-fstring-interpolation,logging-format-interpolation,C0330


# Set the cache size for astng objects.
cache-size=500


[REPORTS]

# Set the output format. Available formats are text, parseable, colorized, msvs
# (visual studio) and html. You can also give a reporter class, eg
# mypackage.mymodule.MyReporterClass.
output-format=text

# Put messages in a separate file for each module / package specified on the
# command line instead of printing them on stdout. Reports (if any) will be
# written in a file name "pylint_global.[txt|html]".
files-output=no

# Tells whether to display a full report or only the messages
reports=no

Expand All @@ -67,10 +55,6 @@ reports=no
# (RP0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)

# Add a comment according to your evaluation note. This is used by the global
# evaluation report (RP0004).
comment=no

# Template used to display messages. This is a python new-style format string
# used to format the message information. See doc for all details
#msg-template=
Expand All @@ -86,10 +70,6 @@ ignore-mixin-members=yes
# (useful for classes with attributes dynamically set).
ignored-classes=SQLObject

# When zope mode is activated, add a predefined set of Zope acquired attributes
# to generated-members.
zope=no

# List of members which are set dynamically and missed by pylint inference
# system, and so shouldn't trigger E0201 when accessed. Python regular
# expressions are accepted.
Expand All @@ -116,17 +96,6 @@ additional-builtins=

[BASIC]

# Required attributes for module, separated by a comma
required-attributes=

# List of builtins function names that should not be used, separated by a comma
bad-functions=apply,input,reduce


# Disable the report(s) with the given id(s).
# All non-Google reports are disabled by default.
disable-report=R0001,R0002,R0003,R0004,R0101,R0102,R0201,R0202,R0220,R0401,R0402,R0701,R0801,R0901,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0921,R0922,R0923

# Regular expression which should only match correct module names
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$

Expand Down Expand Up @@ -196,9 +165,6 @@ ignore-long-lines=(?x)
# else.
single-line-if-stmt=y

# List of optional constructs for which whitespace checking is disabled
no-space-check=

# Maximum number of lines in a module
max-module-lines=99999

Expand Down Expand Up @@ -250,10 +216,6 @@ extension-pkg-whitelist=_jsonnet

[CLASSES]

# List of interface methods to ignore, separated by a comma. This is used for
# instance to not check methods defines in Zope's Interface base class.
ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by

# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,__new__,setUp

Expand Down Expand Up @@ -298,31 +260,6 @@ min-public-methods=2
max-public-methods=20


[EXCEPTIONS]

# Exceptions that will emit a warning when being caught. Defaults to
# "Exception"
overgeneral-exceptions=Exception,StandardError,BaseException


[AST]

# Maximum line length for lambdas
short-func-length=1

# List of module members that should be marked as deprecated.
# All of the string functions are listed in 4.1.4 Deprecated string functions
# in the Python 2.4 docs.
deprecated-members=string.atof,string.atoi,string.atol,string.capitalize,string.expandtabs,string.find,string.rfind,string.index,string.rindex,string.count,string.lower,string.split,string.rsplit,string.splitfields,string.join,string.joinfields,string.lstrip,string.rstrip,string.strip,string.swapcase,string.translate,string.upper,string.ljust,string.rjust,string.center,string.zfill,string.replace,sys.exitfunc


[DOCSTRING]

# List of exceptions that do not need to be mentioned in the Raises section of
# a docstring.
ignore-exceptions=AssertionError,NotImplementedError,StopIteration,TypeError



[TOKENS]

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ install-dev: ## [Local development] Install test requirements
lint: ## [Local development] Run mypy, pylint and black
python -m mypy video2dataset
python -m pylint video2dataset
python -m black --check -l 120 video2dataset
python -m black --check -l 120 .

black: ## [Local development] Auto-format python code using black
python -m black -l 120 .
Expand Down
2 changes: 2 additions & 0 deletions benchmark/benchmark_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

# Benchmark videos are the WebVid validation split (5000 videos)
SHARDS = "examples/dataset/{00000..00004}.tar"


def benchmark_train_dl(num_frames, num_workers, bs=1, num_threads=4, resize_size=None, crop_size=None):
from argparse import Namespace
from webdataset import WebLoader
Expand Down
1 change: 1 addition & 0 deletions benchmark/benchmark_subsamplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from video2dataset import subsamplers
from video2dataset.dataloader import get_video_dataset


# Add this function to gather system information
def gather_system_info():
cpu_count = os.cpu_count()
Expand Down
12 changes: 6 additions & 6 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
black==22.3.0
mypy==1.6.0
black==23.12.1
mypy==1.8.0
pylint==3.0.3
pytest-cov==4.1.0
pytest-xdist==3.5.0
pytest==7.4.4
types-requests==2.31.0.20240106
types-PyYAML==6.0.12.12
pylint==2.13.4
pytest-cov==3.0.0
pytest-xdist==2.5.0
pytest==7.0.1
types-requests
webvtt-py
tensorflow
Expand Down
1 change: 0 additions & 1 deletion tests/test_data_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def test_writer(modalities, writer_type, tmp_path):
writer.close()

if writer_type != "dummy":

df = pd.read_parquet(output_folder + "/00000.parquet")

expected_columns = [
Expand Down
3 changes: 0 additions & 3 deletions tests/test_dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

@pytest.mark.parametrize("batch_size", [1, 4])
def test_return_all(batch_size):

decoder_kwargs = {"n_frames": 10, "fps": None, "num_threads": 1}

dset = get_video_dataset(
Expand Down Expand Up @@ -47,7 +46,6 @@ def test_return_all(batch_size):

@pytest.mark.parametrize("batch_size", [1, 2])
def test_default(batch_size):

decoder_kwargs = {"n_frames": 10, "fps": None, "num_threads": 1}

dset = get_video_dataset(
Expand Down Expand Up @@ -80,7 +78,6 @@ def test_default(batch_size):
[(1, ["0000008_00001", "0000030_00005", "0000038_00003"]), (2, ["0000008_00001", "0000030_00005"])],
)
def test_drop_last(batch_size, expected_keys):

decoder_kwargs = {"n_frames": 10, "fps": None, "num_threads": 1}

dset = get_video_dataset(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_subsamplers.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def test_resolution_subsampler_video_size(size, resize_mode):
assert w_vid == size


@pytest.mark.parametrize("height,width,resize_mode", [(-1,128, ["scale"]), (1620,1620, ["scale", "crop", "pad"])])
@pytest.mark.parametrize("height,width,resize_mode", [(-1, 128, ["scale"]), (1620, 1620, ["scale", "crop", "pad"])])
def test_resolution_subsampler_height_and_width(height, width, resize_mode):
current_folder = os.path.dirname(__file__)
# video lenght - 2:02, 1080x1920
Expand Down
3 changes: 1 addition & 2 deletions video2dataset/data_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,10 @@ def get_yt_meta(url, yt_metadata_args: dict) -> dict:
info_dict, sub_dict = None, None

with yt_dlp.YoutubeDL(yt_metadata_args) as yt:

info_dict = yt.extract_info(url, download=False)
if write_subs:
sub_url = info_dict["requested_subtitles"][yt_metadata_args["subtitleslangs"][0]]["url"]
res = requests.get(sub_url)
res = requests.get(sub_url, timeout=10)
sub = io.TextIOWrapper(io.BytesIO(res.content)).read()
sub_dict = sub_to_dict(sub)

Expand Down
4 changes: 3 additions & 1 deletion video2dataset/dataloader/audio_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def __call__(self, key, data):
if pad_start < 0:
waveform = waveform[:, : self.max_length * self.sample_rate]
if pad_start > 0:
waveform = F.pad(waveform, (0, self.max_length * self.sample_rate - waveform.shape[1]), "constant")
waveform = F.pad( # pylint: disable=not-callable
waveform, (0, self.max_length * self.sample_rate - waveform.shape[1]), "constant"
)
pad_masks[:pad_start] = 1.0

additional_info["audio_pad_masks"] = pad_masks
Expand Down
1 change: 0 additions & 1 deletion video2dataset/dataloader/custom_wds.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,6 @@ def refill_prefix(self, prefix):

def __iter__(self):
while self.it < self.__len__():

# sample prefix with corresponding probs
prefix_id = np.random.choice(len(self.ps), 1, p=list(self.ps.values())).item()
prefix = list(self.ps.keys())[prefix_id]
Expand Down
1 change: 0 additions & 1 deletion video2dataset/dataloader/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def get_video_dataset(
return_always: bool = False,
handler=wds.reraise_exception,
):

"""
Generates a webdataset given the specified parameters.
Parameters:
Expand Down
1 change: 0 additions & 1 deletion video2dataset/dataloader/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ def __call__(self, data):
reference = self._get_reference_frame(resize_size, h, w)

for frame in frames:

if resize_size is not None:
frame = cv2.resize(
frame,
Expand Down
2 changes: 1 addition & 1 deletion video2dataset/dataloader/video_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def get_frames(self, reader, n_frames, stride, **kwargs): # pylint: disable=arg
# can just output first_pad_index or a mask or something
pad_start = len(frames)
if self.pad_frames and frames.shape[0] < self.n_frames:
frames = F.pad(frames, (0, 0) * 3 + (0, self.n_frames - frames.shape[0]))
frames = F.pad(frames, (0, 0) * 3 + (0, self.n_frames - frames.shape[0])) # pylint: disable=not-callable

return frames, frame_start, pad_start

Expand Down
5 changes: 2 additions & 3 deletions video2dataset/distributor.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def multiprocessing_distributor(processes_count, worker, input_sharder, _, max_s

def run(gen):
failed_shards = []
for (status, row) in tqdm(process_pool.imap_unordered(worker, gen)):
for status, row in tqdm(process_pool.imap_unordered(worker, gen)):
if status is False:
failed_shards.append(row)
return failed_shards
Expand Down Expand Up @@ -76,7 +76,7 @@ def run(gen):
failed_shards = []
for batch in batcher(gen, subjob_size):
rdd = spark.sparkContext.parallelize(batch, len(batch))
for (status, row) in rdd.map(worker).collect():
for status, row in rdd.map(worker).collect():
if status is False:
failed_shards.append(row)
return failed_shards
Expand Down Expand Up @@ -199,7 +199,6 @@ def __init__(
print(f"Wrote sbatch to {self.sbatch_path}")

def _make_sbatch(self):

nodelist = ("#SBATCH --nodelist " + self.nodelist) if self.nodelist is not None else ""
exclude = ("#SBATCH --exclude " + self.exclude) if self.exclude is not None else ""
account = ("#SBATCH --account " + self.account) if self.account is not None else ""
Expand Down
4 changes: 2 additions & 2 deletions video2dataset/input_sharder.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(
if fs.isdir(url_path):
self.input_files = sorted(fs.glob(url_path + "/*." + input_format))
if len(self.input_files) == 0:
raise Exception(f"No file found at path {url_path} with extension {input_format}")
raise ValueError(f"No file found at path {url_path} with extension {input_format}")
else:
self.input_files = [url_path]

Expand Down Expand Up @@ -142,7 +142,7 @@ def write_shard(t):
else:
raise e
# can't reach here
raise Exception("Failed to write to file.")
raise ValueError("Failed to write to file.")

for i in range(10):
shards = []
Expand Down
2 changes: 1 addition & 1 deletion video2dataset/output_sharder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self, shard_list, input_format, done_shards, sampler=lambda x: x) -
if "s3://" in shard_list:
self.shard_list = ["s3://" + s for s in self.shard_list]
if len(self.shard_list) == 0:
raise Exception(f"No file found at path {url_path} with extension {input_format}")
raise ValueError(f"No file found at path {url_path} with extension {input_format}")
else:
self.shard_list = list(braceexpand.braceexpand(shard_list))

Expand Down
1 change: 1 addition & 0 deletions video2dataset/subsamplers/ffprobe_subsampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from .subsampler import Subsampler


# TODO: figuer out why this is so slow (12 samples/s)
class FFProbeSubsampler(Subsampler):
"""
Expand Down
1 change: 0 additions & 1 deletion video2dataset/subsamplers/optical_flow_subsampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ class RAFTDetector:
"""

def __init__(self, args, downsample_size=None):

self.device = args.get("device", "cuda")
self.downsample_size = downsample_size

Expand Down
2 changes: 1 addition & 1 deletion video2dataset/subsamplers/resolution_subsampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
encode_format: str = "mp4",
):
if video_size > 0 and (height > 0 or width > 0):
raise Exception("Either set video_size, or set height and/or width")
raise ValueError("Either set video_size, or set height and/or width")
self.resize_mode = resize_mode
self.height = height if video_size < 0 else video_size
self.width = width if video_size < 0 else video_size
Expand Down
8 changes: 4 additions & 4 deletions video2dataset/workers/download_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def data_generator():
print(error_message)
if "[youtube]" in error_message: # video-specific error, remove videoID
error_message = "ERROR: [youtube]:" + error_message.split(":")[-1]
raise Exception("failed_to_download")
raise ValueError("failed_to_download")

for stream in streams.values():
bytes_downloaded += len(stream)
Expand All @@ -203,7 +203,7 @@ def data_generator():
if self.ffprobe_subsampler is not None:
streams, meta, error_message = self.ffprobe_subsampler(streams, meta)
if error_message is not None:
raise Exception("failed_to_subsample")
raise ValueError("failed_to_subsample")

if self.config["storage"]["captions_are_subtitles"]: # create clips
subtitles = meta["yt_meta_dict"]["subtitles"]
Expand All @@ -212,7 +212,7 @@ def data_generator():
streams, cuts, error_message = self.cut_detector(streams)

if error_message is not None:
raise Exception("failed_to_subsample")
raise ValueError("failed_to_subsample")

meta["cuts"] = cuts

Expand All @@ -239,7 +239,7 @@ def data_generator():

if error_message is not None:
meta["clips"] = []
raise Exception("failed_to_subsample")
raise ValueError("failed_to_subsample")

successes += 1
status = "success"
Expand Down
Loading

0 comments on commit 527245c

Please sign in to comment.