From 26ce5f82fc1f410b9f1ac4300c00a43220b1b45d Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Sun, 9 Oct 2016 22:02:26 +0200 Subject: [PATCH 1/2] Added reload conf for tokenizer bolt (only flag filters). Minor fixes --- src/bolts/abstracts.py | 11 +++-------- src/bolts/attachments.py | 2 +- src/bolts/output_elasticsearch.py | 2 +- src/bolts/output_redis.py | 2 +- src/bolts/phishing.py | 2 +- src/bolts/tokenizer.py | 12 ++++++++++-- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/bolts/abstracts.py b/src/bolts/abstracts.py index 9bf1db6..1c61bf7 100644 --- a/src/bolts/abstracts.py +++ b/src/bolts/abstracts.py @@ -41,9 +41,7 @@ def _conf_loader(self): if not self.conf_file: raise ImproperlyConfigured( "Bolts configuration path NOT set for '{}'".format( - self.component_name - ) - ) + self.component_name)) self.log("Reloading configuration for bolt") self._bolts_conf = load_config(self.conf_file) self._conf = self.bolts_conf[self.component_name] @@ -99,7 +97,7 @@ def _load_whitelist(self): def process_tick(self, freq): """Every freq seconds you reload the whitelist """ - super(AbstractUrlsHandlerBolt, self)._conf_loader() + super(AbstractUrlsHandlerBolt, self).process_tick(freq) self._load_whitelist() def _extract_urls(self, text, conv_to_str=True): @@ -120,9 +118,6 @@ def _extract_urls(self, text, conv_to_str=True): with_urls = True if conv_to_str: - urls = json.dumps( - urls, - ensure_ascii=False - ) + urls = json.dumps(urls, ensure_ascii=False) return with_urls, urls diff --git a/src/bolts/attachments.py b/src/bolts/attachments.py index 0065b1c..1b0a759 100644 --- a/src/bolts/attachments.py +++ b/src/bolts/attachments.py @@ -73,7 +73,7 @@ def _load_lists(self): def process_tick(self, freq): """Every freq seconds you reload the keywords. """ - super(Attachments, self)._conf_loader() + super(Attachments, self).process_tick(freq) self._load_settings() def process(self, tup): diff --git a/src/bolts/output_elasticsearch.py b/src/bolts/output_elasticsearch.py index d126c58..1957b0e 100644 --- a/src/bolts/output_elasticsearch.py +++ b/src/bolts/output_elasticsearch.py @@ -140,7 +140,7 @@ def process(self, tup): def process_tick(self, freq): """Every freq seconds flush messages. """ - super(OutputElasticsearch, self)._conf_loader() + super(OutputElasticsearch, self).process_tick(freq) if self._mails or self._attachments: self.log("Flush mail in Elasticsearch after tick") diff --git a/src/bolts/output_redis.py b/src/bolts/output_redis.py index 62066d9..71298a0 100644 --- a/src/bolts/output_redis.py +++ b/src/bolts/output_redis.py @@ -66,7 +66,7 @@ def process(self, tup): def process_tick(self, freq): """Every freq seconds flush messages. """ - super(OutputRedis, self)._conf_loader() + super(OutputRedis, self).process_tick(freq) if self._mails: self.log("Flush mail in Redis server after tick") self.flush() diff --git a/src/bolts/phishing.py b/src/bolts/phishing.py index 3d97262..ce476ad 100644 --- a/src/bolts/phishing.py +++ b/src/bolts/phishing.py @@ -190,7 +190,7 @@ def _search_phishing(self, greedy_data): def process_tick(self, freq): """Every freq seconds you reload the keywords. """ - super(Phishing, self)._conf_loader() + super(Phishing, self).process_tick(freq) self._load_lists() def process(self, tup): diff --git a/src/bolts/tokenizer.py b/src/bolts/tokenizer.py index 1dfe3b9..fbbe8b6 100644 --- a/src/bolts/tokenizer.py +++ b/src/bolts/tokenizer.py @@ -45,11 +45,14 @@ def initialize(self, stormconf, context): super(Tokenizer, self).initialize(stormconf, context) self._parser = MailParser() - self._filter_mails_enabled = self.conf["filter_mails"] - self._filter_attachments_enabled = self.conf["filter_attachments"] self._mails_analyzed = deque(maxlen=self.conf["maxlen_mails"]) self._attachments_analyzed = deque( maxlen=self.conf["maxlen_attachments"]) + self._load_filters() + + def _load_filters(self): + self._filter_mails_enabled = self.conf["filter_mails"] + self._filter_attachments_enabled = self.conf["filter_attachments"] @property def filter_mails_enabled(self): @@ -141,6 +144,11 @@ def _make_mail(self, tup): return sha256_rand, raw_mail, mail + def process_tick(self, freq): + """Every freq seconds you reload configuration. """ + super(Tokenizer, self).process_tick(freq) + self._load_filters() + def process(self, tup): try: sha256_rand, raw_mail, mail = self._make_mail(tup) From 7a4f03e56ec2cbe4106f6879506d7085c268aee2 Mon Sep 17 00:00:00 2001 From: Fedele Mantuano Date: Fri, 14 Oct 2016 22:21:19 +0200 Subject: [PATCH 2/2] Replace hard coding input bolts with context. Minor fix. --- src/bolts/abstracts.py | 7 +++---- src/bolts/json_maker.py | 8 +------- src/bolts/phishing.py | 6 +----- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/bolts/abstracts.py b/src/bolts/abstracts.py index 1c61bf7..1edec1e 100644 --- a/src/bolts/abstracts.py +++ b/src/bolts/abstracts.py @@ -109,10 +109,9 @@ def _extract_urls(self, text, conv_to_str=True): urls = self.extractor.urls_obj domains = urls.keys() - if self._whitelist: - for d in domains: - if d.lower() in self._whitelist: - urls.pop(d) + for d in domains: + if d.lower() in self._whitelist: + urls.pop(d) if urls: with_urls = True diff --git a/src/bolts/json_maker.py b/src/bolts/json_maker.py index 4225c39..8add50e 100644 --- a/src/bolts/json_maker.py +++ b/src/bolts/json_maker.py @@ -24,13 +24,7 @@ class JsonMaker(Bolt): def initialize(self, stormconf, context): self.mails = {} - self.input_bolts = set([ - "tokenizer", - "phishing", - "attachments", - "forms", - "urls-handler-body", - "urls-handler-attachments"]) + self.input_bolts = set(context['source->stream->grouping'].keys()) # Phishing bitmap self._phishing_bitmap = PhishingBitMap() diff --git a/src/bolts/phishing.py b/src/bolts/phishing.py index ce476ad..0606b96 100644 --- a/src/bolts/phishing.py +++ b/src/bolts/phishing.py @@ -30,11 +30,7 @@ def initialize(self, stormconf, context): super(Phishing, self).initialize(stormconf, context) # Input bolts for Phishing bolt - self.input_bolts = set([ - "tokenizer", - "attachments", - "urls-handler-body", - "urls-handler-attachments"]) + self.input_bolts = set(context['source->stream->grouping'].keys()) # All mails self.mails = {}