From 69decd910a6877019c31e1dbba93484bd50c149e Mon Sep 17 00:00:00 2001 From: Michael Weiser Date: Fri, 31 Aug 2018 14:18:29 +0100 Subject: [PATCH] Make mime type detection more linear Reorder mime type detection logic to avoid redoing work whose result won't be used. Makes it clear that we detect only once and then use the attribute's value. The logic of merging in newly detected types hinted at by a comment has long been gone. Remove that comment. --- peekaboo/sample.py | 60 ++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/peekaboo/sample.py b/peekaboo/sample.py index 4393c9d1..cadf9ee5 100644 --- a/peekaboo/sample.py +++ b/peekaboo/sample.py @@ -284,15 +284,29 @@ def file_extension(self): @property def mimetypes(self): - """ - Can not be cached (hard to determine if known/complete). + if self.has_attr('mimetypes'): + return self.get_attr('mimetypes') - determine mime on original p[0-9]* file - later result will be "inode/symlink" - """ mime_types = set() - smime = { + # get MIME type from meta info + declared_mt = None + if self.has_attr('meta_info_type_declared'): + declared_mt = self.get_attr('meta_info_type_declared') + if declared_mt is not None: + logger.debug('Sample declared as "%s"' % declared_mt) + mime_types.add(declared_mt) + + declared_filename = self.__filename + if self.has_attr('meta_info_name_declared'): + declared_filename = self.get_attr('meta_info_name_declared') + + # check if the sample is an S/MIME signature (smime.p7s) + # If so, don't overwrite the MIME type since we do not want to analyse + # S/MIME signatures. + # FIXME: This is oddly specific for this generic routine. Should it be + # some sort of callback or plugin? + leave_alone_types = { 'p7s': [ 'application/pkcs7-signature', 'application/x-pkcs7-signature', @@ -301,22 +315,14 @@ def mimetypes(self): ] } - # get MIME type from meta info - try: - declared_mt = self.get_attr('meta_info_type_declared') - if declared_mt is not None: - logger.debug('Sample declared as "%s"' % declared_mt) - mime_types.add(declared_mt) - except Exception as e: - logger.exception(e) - declared_mt = None - logger.error('Cannot get MIME type from meta info.') - - try: - declared_filename = self.get_attr('meta_info_name_declared') - except KeyError: - declared_filename = self.__filename + if declared_filename == 'smime.p7s' and declared_mt in leave_alone_types['p7s']: + logger.info('S/MIME signature detected. Using declared MIME type over detected ones.') + mime_types = set([declared_mt]) + self.set_attr('mimetypes', mime_types) + return mime_types + # determine mime on original p[0-9]* file + # result of __submit_path would be "inode/symlink" content_based_mime_type = guess_mime_type_from_file_contents(self.__path) if content_based_mime_type is not None: mime_types.add(content_based_mime_type) @@ -326,16 +332,8 @@ def mimetypes(self): mime_types.add(name_based_mime_type) logger.debug('Determined MIME Types: %s' % mime_types) - # check if the sample is an S/MIME signature (smime.p7s) - # If so, don't overwrite the MIME type since we do not want to analyse S/MIME signatures. - if declared_filename == 'smime.p7s' and declared_mt in smime['p7s']: - logger.info('S/MIME signature detected. Using declared MIME type over detected ones.') - mime_types = set([declared_mt]) - - if not self.has_attr('mimetypes'): - self.set_attr('mimetypes', mime_types) - - return self.get_attr('mimetypes') + self.set_attr('mimetypes', mime_types) + return mime_types @property def job_id(self):