Skip to content

Commit

Permalink
Guess file extension from mime type
Browse files Browse the repository at this point in the history
Add fallback logic to guess a file extension from known mime types.
  • Loading branch information
michaelweiser committed Aug 31, 2018
1 parent a225cd1 commit d7233d3
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 5 deletions.
15 changes: 14 additions & 1 deletion peekaboo/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
CuckooAnalysisFailedException
from peekaboo.toolbox.sampletools import ConnectionMap, next_job_hash
from peekaboo.toolbox.files import guess_mime_type_from_file_contents, \
guess_mime_type_from_filename
guess_mime_type_from_filename, \
guess_file_extension_from_mime_type
from peekaboo.toolbox.ms_office import has_office_macros
import peekaboo.ruleset as ruleset

Expand Down Expand Up @@ -279,6 +280,18 @@ def file_extension(self):

# extension or the empty string if none found
file_ext = os.path.splitext(filename)[1][1:]

# as a fallback try to guess file extension from mime type
if not file_ext:
for mt in self.mimetypes:
mtfe = guess_file_extension_from_mime_type(mt)
if mtfe:
# strip leading dot
file_ext = mtfe[1:]
break

# file_ext will still be emtpy string if fallback couldn't guess any
# extension either
self.set_attr('file_extension', file_ext)
return file_ext

Expand Down
28 changes: 24 additions & 4 deletions peekaboo/toolbox/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,34 @@ def guess_mime_type_from_file_contents(file_path):
return mt


def register_additional_mime_types():
# do not re-register if already done, take the module's inited flag as
# indicator
if mimetypes.inited:
return

# will init the module and set inited flag on first call
mimetypes.add_type('application/javascript', '.jse')


def guess_mime_type_from_filename(file_path):
""" Guess the type of a file based on its filename or URL. """
if not mimetypes.inited:
mimetypes.init()
mimetypes.add_type('application/javascript', '.jse')

register_additional_mime_types()
mt = mimetypes.guess_type(file_path)[0]
if not mt:
return None

return mt


def guess_file_extension_from_mime_type(mt):
""" Guess the file extension of a file based on its mime type. Will
non-deterministicly return any registered file extension from the mimetypes
module's mimetype-to-extension map, not the most widely used or canonic
one. """
register_additional_mime_types()
fe = mimetypes.guess_extension(mt)
if not fe:
return None

return fe

0 comments on commit d7233d3

Please sign in to comment.