From 66a3631b4dba101f4359370e21c0fd4e766a6805 Mon Sep 17 00:00:00 2001 From: Guilherme Bacellar Moralez Date: Tue, 22 Aug 2023 16:03:10 -0300 Subject: [PATCH] Added tests to telegram_export_text_generator.py + updated on Documentation Signed-off-by: Guilherme Bacellar Moralez --- README.md | 14 ++ .../telegram_export_text_generator.py | 2 +- .../test_telegram_export_text_generator.py | 187 ++++++++++++++++++ tox.ini | 2 +- 4 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 tests/modules/telegram_report_generator/test_telegram_export_text_generator.py diff --git a/README.md b/README.md index 27bf392..038aabf 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,7 @@ TEx listen --config CONFIGURATION_FILE_PATH --group_id 1234,5678 * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's ### Download Messages (Download since first message for each group) +Scrap Messages from Telegram Server ```bash TEx download_messages --config CONFIGURATION_FILE_PATH --group_id 1234,5678 ``` @@ -132,6 +133,7 @@ TEx download_messages --config CONFIGURATION_FILE_PATH --group_id 1234,5678 * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's ### Generate Report +Generate HTML Report ```bash TEx report --config CONFIGURATION_FILE_PATH --report_folder REPORT_FOLDER_PATH --group_id * --around_messages NUM --order_desc --limit_days 3 --filter FILTER_EXPRESSION_1,FILTER_EXPRESSION_2,FILTER_EXPRESSION_N ``` @@ -145,6 +147,7 @@ TEx report --config CONFIGURATION_FILE_PATH --report_folder REPORT_FOLDER_PATH - * **suppress_repeating_messages** > Optional - If present, suppress all repeating messages in the same report ### Export Downloaded Files +Export Downloaded Files by MimeType ```bash TEx export_file --config CONFIGURATION_FILE_PATH -report_folder REPORT_FOLDER_PATH --group_id * --filter * --limit_days 3 --mime_type text/plain ``` @@ -155,6 +158,17 @@ TEx export_file --config CONFIGURATION_FILE_PATH -report_folder REPORT_FOLDER_PA * **limit_days** > Optional - Number of Days of past to filter the Messages * **mime_type** > Optional - File MIME Type. Ex: application/vnd.android.package-archive +### Export Texts +Export Messages (Texts) using Regex finder +```bash +TEx export_text --config CONFIGURATION_FILE_PATH --order_desc --limit_days 3 --regex REGEX --report_folder REPORT_FOLDER_PATH --group_id * +``` + * **config** > Required - Created Configuration File Path + * **report_folder** > Optional - Defines the Report Files Folder + * **group_id** > Optional - If present, Download the Messages only from Specified Groups ID's + * **limit_days** > Optional - Number of Days of past to filter the Messages + * **regex** > Required - Regex to find the messages. + * Ex: Export Links from Messages (.*http://.*),(.*https://.*) ## License diff --git a/TEx/modules/telegram_report_generator/telegram_export_text_generator.py b/TEx/modules/telegram_report_generator/telegram_export_text_generator.py index de67d1e..2ff0089 100644 --- a/TEx/modules/telegram_report_generator/telegram_export_text_generator.py +++ b/TEx/modules/telegram_report_generator/telegram_export_text_generator.py @@ -50,7 +50,7 @@ async def run(self, config: ConfigParser, args: Dict, data: Dict) -> None: # Load Groups from DB db_groups: List[TelegramGroupOrmEntity] = TelegramGroupDatabaseManager.get_all_by_phone_number( - args['target_phone_number']) + config['CONFIGURATION']['phone_number']) logger.info(f'\t\tFound {len(db_groups)} Groups') # Map to Facade Entities diff --git a/tests/modules/telegram_report_generator/test_telegram_export_text_generator.py b/tests/modules/telegram_report_generator/test_telegram_export_text_generator.py new file mode 100644 index 0000000..b8cdc23 --- /dev/null +++ b/tests/modules/telegram_report_generator/test_telegram_export_text_generator.py @@ -0,0 +1,187 @@ +"""Telegram Report - Export Texts Generator.""" + +import asyncio +import datetime +import os.path +import shutil +import unittest +from configparser import ConfigParser +from typing import Dict +from unittest import mock + +import pytz +from sqlalchemy import select, insert, delete +from telethon.tl.functions.messages import GetDialogsRequest + +from TEx.core.dir_manager import DirectoryManagerUtils +from TEx.database.db_initializer import DbInitializer +from TEx.database.db_manager import DbManager +from TEx.database.telegram_group_database import TelegramGroupDatabaseManager, TelegramMediaDatabaseManager, \ + TelegramMessageDatabaseManager +from TEx.models.database.telegram_db_model import ( + TelegramGroupOrmEntity, + TelegramMediaOrmEntity, TelegramMessageOrmEntity, TelegramUserOrmEntity, +) +from TEx.modules.execution_configuration_handler import ExecutionConfigurationHandler +from TEx.modules.telegram_messages_scrapper import TelegramGroupMessageScrapper +from TEx.modules.telegram_report_generator.telegram_export_text_generator import TelegramExportTextGenerator +from TEx.modules.telegram_report_generator.telegram_html_report_generator import TelegramReportGenerator +from tests.modules.common import TestsCommon +from tests.modules.mockups_groups_mockup_data import base_groups_mockup_data, base_messages_mockup_data, \ + base_users_mockup_data + + +class TelegramExportTextGeneratorTest(unittest.TestCase): + + def setUp(self) -> None: + + self.config = ConfigParser() + self.config.read('../../config.ini') + + DirectoryManagerUtils.ensure_dir_struct('_data') + DirectoryManagerUtils.ensure_dir_struct('_data/resources') + DirectoryManagerUtils.ensure_dir_struct('_data/media') + DirectoryManagerUtils.ensure_dir_struct('_data/media/2') + + DbInitializer.init(data_path='_data/') + + # Reset SQLlite Groups + DbManager.SESSIONS['data'].execute(delete(TelegramMessageOrmEntity)) + DbManager.SESSIONS['data'].execute(delete(TelegramGroupOrmEntity)) + DbManager.SESSIONS['data'].execute(delete(TelegramMediaOrmEntity)) + DbManager.SESSIONS['data'].commit() + + # Add Group 2 - With Previous Messages + TelegramGroupDatabaseManager.insert_or_update({ + 'id': 2, 'constructor_id': 'B', 'access_hash': 'BBBBBB', + 'fake': False, 'gigagroup': False, 'has_geo': False, + 'participants_count': 2, 'restricted': False, + 'scam': False, 'group_username': 'UN-b', + 'verified': False, 'title': 'UT-02', 'source': '5526986587745' + }) + TelegramMessageDatabaseManager.insert({ + 'id': 55, 'group_id': 2, 'date_time': datetime.datetime.now(tz=pytz.utc), + 'message': 'Message 1', 'raw': 'Raw Message 1', 'from_id': None, 'from_type': None, + 'to_id': None, 'media_id': None + }) + TelegramMessageDatabaseManager.insert({ + 'id': 56, 'group_id': 2, 'date_time': datetime.datetime.now(tz=pytz.utc), + 'message': 'Message 2', 'raw': 'Raw Message 2', 'from_id': None, 'from_type': None, + 'to_id': None, 'media_id': None + }) + TelegramMessageDatabaseManager.insert({ + 'id': 57, 'group_id': 2, 'date_time': datetime.datetime.now(tz=pytz.utc), + 'message': 'Message 2', 'raw': 'Raw Message 3', 'from_id': None, 'from_type': None, + 'to_id': None, 'media_id': None + }) + TelegramMessageDatabaseManager.insert({ + 'id': 58, 'group_id': 2, 'date_time': datetime.datetime.now(tz=pytz.utc), + 'message': 'Message 3', 'raw': 'Raw Message 4 - http://www.url.domain.com', 'from_id': None, 'from_type': None, + 'to_id': None, 'media_id': None + }) + + # Add Group 3 - With Previous Messages + TelegramGroupDatabaseManager.insert_or_update({ + 'id': 3, 'constructor_id': 'B', 'access_hash': 'BBBBBB', + 'fake': False, 'gigagroup': False, 'has_geo': False, + 'participants_count': 2, 'restricted': False, + 'scam': False, 'group_username': 'UN-c', + 'verified': False, 'title': 'UT-03', 'source': '5526986587745' + }) + TelegramMessageDatabaseManager.insert({ + 'id': 60, 'group_id': 3, 'date_time': datetime.datetime.now(tz=pytz.utc), + 'message': 'Message 7', 'raw': 'Raw Message 1', 'from_id': None, 'from_type': None, + 'to_id': None, 'media_id': None + }) + TelegramMessageDatabaseManager.insert({ + 'id': 61, 'group_id': 3, 'date_time': datetime.datetime.now(tz=pytz.utc), + 'message': 'Message 8', 'raw': 'Raw Message 2', 'from_id': None, 'from_type': None, + 'to_id': None, 'media_id': None + }) + TelegramMessageDatabaseManager.insert({ + 'id': 62, 'group_id': 3, 'date_time': datetime.datetime.now(tz=pytz.utc), + 'message': 'Message 9', 'raw': 'Raw Message 3', 'from_id': None, 'from_type': None, + 'to_id': None, 'media_id': None + }) + TelegramMessageDatabaseManager.insert({ + 'id': 63, 'group_id': 3, 'date_time': datetime.datetime.now(tz=pytz.utc), + 'message': 'Message 10', 'raw': 'Raw Message 4 - http://www.url.domain.com/2', 'from_id': None, + 'from_type': None, 'to_id': None, 'media_id': None + }) + + DbManager.SESSIONS['data'].commit() + + def tearDown(self) -> None: + DbManager.SESSIONS['data'].close() + + def test_run_generate_report_all(self): + """Test Run Method.""" + + # Call Test Target Method + target: TelegramExportTextGenerator = TelegramExportTextGenerator() + args: Dict = { + 'export_text': True, + 'config': 'unittest_configfile.config', + 'report_folder': '_report', + 'group_id': '*', + 'order_desc': True, + 'filter': 'Message', + 'limit_days': 30, + 'regex': '(.*http://.*),(.*https://.*)' + } + data: Dict = {} + TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) + + with self.assertLogs() as captured: + loop = asyncio.get_event_loop() + loop.run_until_complete( + target.run( + config=self.config, + args=args, + data=data + ) + ) + + # Check Output Files + self.assertTrue( + os.path.exists(os.path.join('_report', f'result_UN-b_2.txt')) + ) + self.assertTrue( + os.path.exists(os.path.join('_report', f'result_UN-c_3.txt')) + ) + + def test_run_generate_report_filtered(self): + """Test Run Method.""" + + # Call Test Target Method + target: TelegramExportTextGenerator = TelegramExportTextGenerator() + args: Dict = { + 'export_text': True, + 'config': 'unittest_configfile.config', + 'report_folder': '_report', + 'group_id': '2', + 'order_desc': True, + 'filter': 'Message', + 'limit_days': 30, + 'regex': '(.*http://.*),(.*https://.*)' + } + data: Dict = {} + TestsCommon.execute_basic_pipeline_steps_for_initialization(config=self.config, args=args, data=data) + + with self.assertLogs() as captured: + loop = asyncio.get_event_loop() + loop.run_until_complete( + target.run( + config=self.config, + args=args, + data=data + ) + ) + + # Check Output Files + self.assertTrue( + os.path.exists(os.path.join('_report', f'result_UN-b_2.txt')) + ) + self.assertFalse( + os.path.exists(os.path.join('_report', f'result_UN-c_3.txt')) + ) \ No newline at end of file diff --git a/tox.ini b/tox.ini index 20d94f1..d7f641b 100644 --- a/tox.ini +++ b/tox.ini @@ -40,7 +40,7 @@ commands = poetry run coverage erase poetry run coverage run --source='../TEx' -m pytest . {posargs} --color=yes poetry run coverage report --rcfile=../coverage.rc - poetry run coverage html --rcfile=../coverage.rc --fail-under=78 + poetry run coverage html --rcfile=../coverage.rc --fail-under=85 [testenv]