You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/json/init.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw) 341 s = s.decode(detect_encoding(s), 'surrogatepass') 343 if (cls is None and object_hook is None and 344 parse_int is None and parse_float is None and 345 parse_constant is None and object_pairs_hook is None and not kw):
--> 346 return _default_decoder.decode(s) 347 if cls is None: 348 cls = JSONDecoder
/!\ PLEASE INCLUDE THE FULL STACKTRACE AND CODE SNIPPET
Short description
An error occurs when processing the speech_commands dataset.
Environment information
tensorflow-datasets
/tfds-nightly
version: 4.9.4tensorflow
/tf-nightly
version: 2.16.1Reproduction instructions
Link to logs
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py:1584, in GeneratorBasedBuilder._download_and_prepare(self, dl_manager, download_config)
1572 for split_name, generator in utils.tqdm(
1573 split_generators.items(),
1574 desc="Generating splits...",
1575 unit=" splits",
1576 leave=False,
1577 ):
1578 filename_template = naming.ShardedFileTemplate(
1579 split=split_name,
1580 dataset_name=self.name,
1581 data_dir=self.data_path,
1582 filetype_suffix=path_suffix,
1583 )
-> 1584 future = split_builder.submit_split_generation(
1585 split_name=split_name,
1586 generator=generator,
1587 filename_template=filename_template,
1588 disable_shuffling=self.info.disable_shuffling,
1589 )
1590 split_info_futures.append(future)
1592 # Process the result of the beam pipeline.
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/site-packages/tensorflow_datasets/core/split_builder.py:341, in SplitBuilder.submit_split_generation(self, split_name, generator, filename_template, disable_shuffling)
338 # Depending on the type of generator, we use the corresponding
339 #
_build_from_xyz
method.340 if isinstance(generator, collections.abc.Iterable):
--> 341 return self._build_from_generator(**build_kwargs)
342 else: # Otherwise, beam required
343 unknown_generator_type = TypeError(
344 f'Invalid split generator value for split
{split_name}
. '345 'Expected generator or apache_beam object. Got: '
346 f'{type(generator)}'
347 )
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/site-packages/tensorflow_datasets/core/split_builder.py:406, in SplitBuilder._build_from_generator(self, split_name, generator, filename_template, disable_shuffling)
396 serialized_info = self._features.get_serialized_info()
397 writer = writer_lib.Writer(
398 serializer=example_serializer.ExampleSerializer(serialized_info),
399 filename_template=filename_template,
(...)
404 shard_config=self._shard_config,
405 )
--> 406 for key, example in utils.tqdm(
407 generator,
408 desc=f'Generating {split_name} examples...',
409 unit=' examples',
410 total=total_num_examples,
411 leave=False,
412 mininterval=1.0,
413 ):
414 try:
415 example = self._features.encode_example(example)
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/site-packages/tqdm/notebook.py:249, in tqdm_notebook.iter(self)
247 try:
248 it = super(tqdm_notebook, self).iter()
--> 249 for obj in it:
250 # return super(tqdm...) will not catch exception
251 yield obj
252 # NB: except ... [ as ...] breaks IPython async KeyboardInterrupt
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/site-packages/tqdm/std.py:1182, in tqdm.iter(self)
1179 time = self._time
1181 try:
-> 1182 for obj in iterable:
1183 yield obj
1184 # Update and possibly print the progressbar.
1185 # Note: does not call self.update(1) for speed optimisation.
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/site-packages/tensorflow_datasets/datasets/speech_commands/speech_commands_dataset_builder.py:138, in Builder._generate_examples(self, archive, file_list)
134 else:
135 try:
136 example = {
137 'audio': np.array(
--> 138 lazy_imports_lib.lazy_imports.pydub.AudioSegment.from_file(
139 file_obj, format='wav'
140 ).get_array_of_samples()
141 ),
142 'label': label,
143 }
144 yield example_id, example
145 except (
146 lazy_imports_lib.lazy_imports.pydub.exceptions.CouldntDecodeError
147 ):
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/site-packages/pydub/audio_segment.py:728, in AudioSegment.from_file(cls, file, format, codec, parameters, start_second, duration, **kwargs)
726 info = None
727 else:
--> 728 info = mediainfo_json(orig_file, read_ahead_limit=read_ahead_limit)
729 if info:
730 audio_streams = [x for x in info['streams']
731 if x['codec_type'] == 'audio']
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/site-packages/pydub/utils.py:279, in mediainfo_json(filepath, read_ahead_limit)
276 output = output.decode("utf-8", 'ignore')
277 stderr = stderr.decode("utf-8", 'ignore')
--> 279 info = json.loads(output)
281 if not info:
282 # If ffprobe didn't give any information, just return it
283 # (for example, because the file doesn't exist)
284 return info
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/json/init.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
341 s = s.decode(detect_encoding(s), 'surrogatepass')
343 if (cls is None and object_hook is None and
344 parse_int is None and parse_float is None and
345 parse_constant is None and object_pairs_hook is None and not kw):
--> 346 return _default_decoder.decode(s)
347 if cls is None:
348 cls = JSONDecoder
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
332 def decode(self, s, _w=WHITESPACE.match):
333 """Return the Python representation of
s
(astr
instance334 containing a JSON document).
335
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
339 if end != len(s):
File ~/opt/anaconda3/envs/tensorflow_latest/lib/python3.10/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
353 obj, end = self.scan_once(s, idx)
354 except StopIteration as err:
--> 355 raise JSONDecodeError("Expecting value", s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Expected behavior
No error during the processing of the speech_command dataset.
The text was updated successfully, but these errors were encountered: