diff --git a/README.md b/README.md index 6b10669a..01c44455 100644 --- a/README.md +++ b/README.md @@ -236,7 +236,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an ### WAV - c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018 - c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed -- c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html) +- c_M1F1-Alaw-AFsp.wav and invalid_d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html) ### WEBP - With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons diff --git a/lib/parsers/mp3_parser.rb b/lib/parsers/mp3_parser.rb index 7bd5e753..8e16f928 100644 --- a/lib/parsers/mp3_parser.rb +++ b/lib/parsers/mp3_parser.rb @@ -76,6 +76,11 @@ def call(raw_io) io.seek(0) return if TIFF_HEADER_BYTES.include?(safe_read(io, 4)) + # Prevention against parsing WAV files. + io.seek(0) + wav_chunk_id, _wav_size, wav_riff_type = safe_read(io, 12).unpack('a4la4') + return if wav_chunk_id == 'RIFF' || wav_riff_type == 'WAVE' + # Read all the ID3 tags (or at least attempt to) io.seek(0) id3v1 = ID3Extraction.attempt_id3_v1_extraction(io) @@ -90,6 +95,7 @@ def call(raw_io) io.seek(ignore_bytes_at_head) + maybe_xing_header, initial_frames = parse_mpeg_frames(io) return if initial_frames.empty? @@ -315,5 +321,5 @@ def with_id3tag_local_configs end end - FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99 + FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 101 end diff --git a/spec/fixtures/FLAC/sample_rate_0.flac b/spec/fixtures/FLAC/invalid_sample_rate_0.flac similarity index 100% rename from spec/fixtures/FLAC/sample_rate_0.flac rename to spec/fixtures/FLAC/invalid_sample_rate_0.flac diff --git a/spec/fixtures/PDF/PDF 2.0 with offset start.pdf b/spec/fixtures/PDF/invalid PDF 2.0 with offset start.pdf similarity index 100% rename from spec/fixtures/PDF/PDF 2.0 with offset start.pdf rename to spec/fixtures/PDF/invalid PDF 2.0 with offset start.pdf diff --git a/spec/fixtures/PDF/exceed_PDF_read_limit.pdf b/spec/fixtures/PDF/invalid_exceed_PDF_read_limit.pdf similarity index 100% rename from spec/fixtures/PDF/exceed_PDF_read_limit.pdf rename to spec/fixtures/PDF/invalid_exceed_PDF_read_limit.pdf diff --git a/spec/fixtures/PDF/not_a.pdf b/spec/fixtures/PDF/invalid_not_a.pdf similarity index 100% rename from spec/fixtures/PDF/not_a.pdf rename to spec/fixtures/PDF/invalid_not_a.pdf diff --git a/spec/fixtures/WAV/c_SCAM_MIC_SOL001_RUN001.wav b/spec/fixtures/WAV/c_SCAM_MIC_SOL001_RUN001.wav new file mode 100644 index 00000000..94f11925 Binary files /dev/null and b/spec/fixtures/WAV/c_SCAM_MIC_SOL001_RUN001.wav differ diff --git a/spec/fixtures/WAV/d_6_Channel_ID.wav b/spec/fixtures/WAV/invalid_d_6_Channel_ID.wav similarity index 100% rename from spec/fixtures/WAV/d_6_Channel_ID.wav rename to spec/fixtures/WAV/invalid_d_6_Channel_ID.wav diff --git a/spec/fixtures/WEBP/unrecognised-variant.webp b/spec/fixtures/WEBP/invalid-unrecognised-variant.webp similarity index 100% rename from spec/fixtures/WEBP/unrecognised-variant.webp rename to spec/fixtures/WEBP/invalid-unrecognised-variant.webp diff --git a/spec/format_parser_spec.rb b/spec/format_parser_spec.rb index 46c4d875..bc794a34 100644 --- a/spec/format_parser_spec.rb +++ b/spec/format_parser_spec.rb @@ -189,12 +189,24 @@ 'FormatParser::CR3Parser', 'FormatParser::DPXParser', 'FormatParser::FLACParser', - 'FormatParser::MP3Parser', 'FormatParser::OggParser', 'FormatParser::TIFFParser', - 'FormatParser::WAVParser' + 'FormatParser::WAVParser', + 'FormatParser::MP3Parser' ]) end + + it 'ensures that MP3 parser is the last one among all' do + parsers = FormatParser.parsers_for( + [:audio, :image, :document, :text, :video, :archive], + [:aac, :aiff, :arw, :bmp, :cr2, :cr3, :dpx, :fdx, :flac, :gif, :heif, :heic, + :jpg, :json, :m3u, :mov, :mp3, :mp4, :m4a, :m4b, :m4p, :m4r, :m4v, :mpg, + :mpeg, :nef, :ogg, :pdf, :png, :psd, :rw2, :tif, :wav, :webp, :zip] + ) + + parser_class_names = parsers.map { |parser| parser.class.name } + expect(parser_class_names.last).to eq 'FormatParser::MP3Parser' + end end describe '.register_parser and .deregister_parser' do diff --git a/spec/parsers/flac_parser_spec.rb b/spec/parsers/flac_parser_spec.rb index 608007fa..c06790f1 100644 --- a/spec/parsers/flac_parser_spec.rb +++ b/spec/parsers/flac_parser_spec.rb @@ -55,7 +55,7 @@ end it 'raises an error when sample rate is 0' do - fpath = fixtures_dir + 'FLAC/sample_rate_0.flac' + fpath = fixtures_dir + 'FLAC/invalid_sample_rate_0.flac' expect { subject.call(File.open(fpath, 'rb')) diff --git a/spec/parsers/mp3_parser_spec.rb b/spec/parsers/mp3_parser_spec.rb index 62214918..9343b74a 100644 --- a/spec/parsers/mp3_parser_spec.rb +++ b/spec/parsers/mp3_parser_spec.rb @@ -36,6 +36,12 @@ expect(parsed).to be_nil end + it 'does not misdetect a WAV' do + fpath = fixtures_dir + '/WAV/c_SCAM_MIC_SOL001_RUN001.wav' + parsed = subject.call(File.open(fpath, 'rb')) + expect(parsed).to be_nil + end + describe 'title/artist/album attributes' do let(:parsed) { subject.call(File.open(fpath, 'rb')) } diff --git a/spec/parsers/pdf_parser_spec.rb b/spec/parsers/pdf_parser_spec.rb index 8d748474..47b6fe37 100644 --- a/spec/parsers/pdf_parser_spec.rb +++ b/spec/parsers/pdf_parser_spec.rb @@ -46,17 +46,17 @@ def parse_pdf(pdf_filename) describe 'broken PDF files should not parse' do it 'PDF with missing version header' do - parsed_pdf = parse_pdf 'not_a.pdf' + parsed_pdf = parse_pdf 'invalid_not_a.pdf' expect(parsed_pdf).to be_nil end it 'PDF 2.0 with offset start' do - parsed_pdf = parse_pdf 'PDF 2.0 with offset start.pdf' + parsed_pdf = parse_pdf 'invalid PDF 2.0 with offset start.pdf' expect(parsed_pdf).to be_nil end it 'exceeds the PDF read limit' do - parsed_pdf = parse_pdf 'exceed_PDF_read_limit.pdf' + parsed_pdf = parse_pdf 'invalid_exceed_PDF_read_limit.pdf' expect(parsed_pdf).to be_nil end end diff --git a/spec/parsers/wav_parser_spec.rb b/spec/parsers/wav_parser_spec.rb index c3dae466..e81c2b0b 100644 --- a/spec/parsers/wav_parser_spec.rb +++ b/spec/parsers/wav_parser_spec.rb @@ -48,7 +48,7 @@ it "cannot parse file with audio format different from 1 and no 'fact' chunk" do expect { - subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb')) + subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb')) }.to raise_error(FormatParser::IOUtils::InvalidRead) end end diff --git a/spec/parsers/webp_parser_spec.rb b/spec/parsers/webp_parser_spec.rb index 51ce9503..b9fc7980 100644 --- a/spec/parsers/webp_parser_spec.rb +++ b/spec/parsers/webp_parser_spec.rb @@ -7,7 +7,7 @@ end it 'does not parse files with an unrecognised variant' do - result = subject.call(File.open(fixtures_dir + 'WEBP/unrecognised-variant.webp', 'rb')) + result = subject.call(File.open(fixtures_dir + 'WEBP/invalid-unrecognised-variant.webp', 'rb')) expect(result).to be_nil end diff --git a/spec/remote_fetching_spec.rb b/spec/remote_fetching_spec.rb index 0f359970..59b96caa 100644 --- a/spec/remote_fetching_spec.rb +++ b/spec/remote_fetching_spec.rb @@ -104,6 +104,58 @@ expect(file_information.format).to eq(:png) end + describe 'correctly parses WAV files without falling back to another filetype' do + ['c_8kmp316.wav', 'c_SCAM_MIC_SOL001_RUN001.wav'].each do |filename| + it "parses WAV file #{filename}" do + remote_url = 'http://localhost:9399/WAV/' + filename + file_information = FormatParser.parse_http(remote_url) + expect(file_information).not_to be_nil + expect(file_information.format).to eq(:wav) + end + end + end + + describe "correctly parses files over HTTP without filename hint" do + nature_fixture_dirs = { + :document => ['PDF'], + :audio => ['AAC', 'AIFF', 'FLAC', 'MP3', 'WAV'], + :video => ['MOV', 'MP4'], + :image => ['ARW', 'CR2', 'CR3', 'GIF', 'JPG', 'NEF', 'PNG', 'PSD', 'RW2', 'TIF', 'WEBP'] + } + nature_fixture_dirs.each { |nature, dirs| + dirs.each do |file_type_dir| + Dir.glob(fixtures_dir + "/#{file_type_dir}/*.*").each do |file_path| + file_name = File.basename(file_path) + next if file_name.include? "invalid" + + expected_format = file_type_dir.downcase.to_sym + if file_type_dir == 'HEIF' + expected_format = File.extname(file_name).delete('.').downcase.to_sym + end + + it "parses #{file_type_dir} file: #{file_name}" do + url = "http://localhost:9399/#{file_type_dir}/#{file_name}?some_param=test".gsub(' ', '%20') + file_information = FormatParser.parse_http(url) + expect(file_information).not_to be_nil + expect(file_information.nature).to eq(nature) + expect(file_information.format == expected_format).to be_truthy + end + + end + end + } + + # Dir.glob(fixtures_dir + "/HEIF/*.*").each do |file_path| + # file_name = File.basename(file_path) + # next if file_name.include? "invalid" + # + # expected_format = file_type_dir.downcase.to_sym + # it "parses #{file_type_dir} file: #{file_name}" do + # parseFile(file_type_dir, file_name, nature, expected_format) + # end + # end + end + describe 'when parsing remote fixtures' do Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path| filename = File.basename(fixture_path)